AB#2191 Evaluate K-Bench benchmarks in CI

Install Python for K-bench evaluation Add scripts to evaluate the K-Bench results in CI Attach graphs to the workflow results in GitHub Actions
2025-08-02 03:56:07 -04:00 · 2022-11-01 11:24:29 +00:00 · 2022-11-01 11:24:29 +00:00 · 1952eb5721
commit 1952eb5721
parent f4ff473677
8 changed files with 424 additions and 0 deletions
--- a/.github/actions/k-bench/action.yml
+++ b/.github/actions/k-bench/action.yml
@ -12,6 +12,15 @@ inputs:
 runs:
  using: "composite"
  steps:
+    - name: Setup python
+      uses: actions/setup-python@b55428b1882923874294fa556849718a1d7f2ca5 # tag=v4.2.0
+      with:
+        python-version: "3.10"
+    
+    - name: Install evaluation dependencies
+      shell: bash
+      run: pip install -r .github/actions/k-bench/evaluate/requirements.txt
+
    - name: Checkout patched K-Bench
      uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # tag=v3.1.0
      with:
@ -104,3 +113,19 @@ runs:
      with:
        path: "k-bench/out/kbench-constellation-${{ inputs.cloudProvider }}"
        name: "k-bench-constellation-${{ inputs.cloudProvider }}"
+
+    - name: Parse test results and create diagrams
+      shell: bash
+      run: python .github/actions/k-bench/evaluate/main.py
+      env:
+        KBENCH_RESULTS: ${{ github.workspace }}/k-bench/out/
+        CSP: ${{ inputs.cloudProvider }}
+
+    - name: Upload benchmark results
+      uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # tag=v3.1.0
+      if: ${{ !env.ACT }}
+      with:
+        path: |
+          *_perf.png
+          kbench_results.json
+        name: "benchmark_results"
--- a/.github/actions/k-bench/evaluate/evaluators/init.py
+++ b/.github/actions/k-bench/evaluate/evaluators/init.py
--- a/.github/actions/k-bench/evaluate/evaluators/default.py
+++ b/.github/actions/k-bench/evaluate/evaluators/default.py
@ -0,0 +1,71 @@
+"""Evaluator for the K-Bench default test."""
+import os
+import re
+from collections import defaultdict
+from typing import Dict
+
+pod_latencies = {
+    'pod_create':   'create pod latency:',
+    'pod_list':     'list pod latency:',
+    'pod_get':      'get pod latency:',
+    'pod_update':   'update pod latency:',
+    'pod_delete':   'delete pod latency:',
+}
+
+deployment_latencies = {
+    'depl_create':  'create deployment latency:',
+    'depl_list':    'list deployment latency:',
+    'depl_update':  'update deployment latency:',
+    'depl_scale':   'scale deployment latency:',
+    'depl_delete':  'delete deployment latency:',
+}
+
+service_latencies = {
+    'svc_create':   'create service latency:',
+    'svc_list':     'list service latency:',
+    'svc_get':      'get service latency:',
+    'svc_update':   'update service latency:',
+    'svc_delete':   'delete service latency:',
+}
+
+
+def eval(tests: Dict[str, str]) -> Dict[str, Dict[str, float]]:
+    """Read the results of the default tests.
+
+    Return a result dictionary.
+    """
+    result = {}
+    for t in tests:
+        row = defaultdict(float)
+        # read the default result file
+        kbench = []
+        with open(os.path.join(tests[t], 'default', 'kbench.log'), 'r') as f:
+            kbench = f.readlines()
+
+        if not kbench:
+            raise Exception("Empty kbench.log")
+
+        subtests = [pod_latencies, service_latencies, deployment_latencies]
+        for latency_dict in subtests:
+            # Get the API Call Latencies (median)
+            for key in latency_dict:
+                line = get_line_containing_needle(
+                    lines=kbench, needle=latency_dict[key])
+                median = get_median_from_line(line=line)
+                row[key] = float(median)
+
+        result[t] = row
+    return result
+
+
+def get_median_from_line(line):
+    """Extract the value (median) from the line."""
+    return re.search(r'\s(\d+\.\d+)(.+)', line).group(1)
+
+
+def get_line_containing_needle(lines, needle):
+    """Find matching line from list of lines."""
+    matches = list(filter(lambda l: needle in l, lines))
+    if len(matches) > 1:
+        raise Exception(f"'{needle}' matched multiple times..")
+    return matches[0]
--- a/.github/actions/k-bench/evaluate/evaluators/fio.py
+++ b/.github/actions/k-bench/evaluate/evaluators/fio.py
@ -0,0 +1,81 @@
+"""Parse the fio logs.
+
+Extracts the bandwidth for I/O,
+from various fio benchmarks.
+
+Example log file (extracting read and write bandwidth):
+...
+Run status group 0 (all jobs):
+   READ: bw=5311KiB/s (5438kB/s), 5311KiB/s-5311KiB/s (5438kB/s-5438kB/s), io=311MiB (327MB), run=60058-60058msec
+  WRITE: bw=2289KiB/s (2343kB/s), 2289KiB/s-2289KiB/s (2343kB/s-2343kB/s), io=134MiB (141MB), run=60058-60058msec
+...
+"""
+
+
+import os
+import re
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict
+
+# get different mixes of read/write IO as subtests
+subtests = {
+    'fio_root_async_R70W30':    'fio_async_randR70W30.out',
+    'fio_root_async_R100W0':    'fio_async_randR100W0.out',
+    'fio_root_async_R0W100':    'fio_async_randR0W100.out',
+}
+
+
+def eval(tests: Dict[str, str]) -> Dict[str, Dict[str, float]]:
+    """Read the results of the fio tests.
+    Return a result dictionary.
+    """
+    result = {}
+    for t in tests:
+        base_path = os.path.join(tests[t], 'dp_fio')
+        row = defaultdict(str)
+        for subtest in subtests:
+            try:
+                log_path = next(Path(base_path).rglob(subtests[subtest]))
+            except StopIteration:
+                raise Exception(
+                    f"Error: No iperfclient.out found for network test {subtest} in {base_path}"
+                )
+
+            with open(log_path) as f:
+                fio = f.readlines()
+            if not fio:
+                raise Exception(f"Empty fio log {subtest}?")
+
+            for line in fio:
+                if "READ" in line:
+                    speed = get_io_bw_from_line(line)
+                    row[subtest + '_R'] = speed
+                elif "WRITE" in line:
+                    speed = get_io_bw_from_line(line)
+                    row[subtest + '_W'] = speed
+        result[t] = row
+    return result
+
+
+# Dictionary to convert units
+units = {
+    'KiB': 1/1024,
+    'MiB': 1,
+    'GiB': 1024,
+}
+
+
+def get_io_bw_from_line(line) -> float:
+    """Get the IO bandwidth from line and convert to MiB/s.
+
+    Return the IO bandwidth in MiB/s
+    """
+    #    READ: bw=32.5MiB/s (34.1MB/s), 32.5MiB/s-32.5MiB/s (34.1MB/s-34.1MB/s), io=1954MiB (2048MB), run=60022-60022msec
+    match = re.search(r'bw=(\d+\.?\d+)(MiB|KiB|GiB)', line)
+    if not match:
+        raise Exception("Could not extract bw from fio line.")
+    num = float(match.group(1))
+    num = num * units[match.group(2)]
+    # return in MiB/s
+    return num
--- a/.github/actions/k-bench/evaluate/evaluators/network.py
+++ b/.github/actions/k-bench/evaluate/evaluators/network.py
@ -0,0 +1,83 @@
+"""Parse the iperf logs.
+
+Extracts the bandwidth for sending and receiving,
+from intranode and internode network benchmarks.
+
+Example log file (extract the bitrate for sending and receiving):
+...
+s1:  - - - - - - - - - - - - - - - - - - - - - - - - -
+s1:  [ ID] Interval           Transfer     Bitrate         Retr
+s1:  [  5]   0.00-90.00  sec  11.0 GBytes  1.05 Gbits/sec  509             sender
+s1:  [  5]   0.00-90.05  sec  11.1 GBytes  1.05 Gbits/sec                  receiver
+s1:
+s1:  iperf Done.
+"""
+import os
+import re
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict
+
+subtests = {
+    'net_internode':    'dp_network_internode',
+    'net_intranode':    'dp_network_intranode',
+}
+
+
+def eval(tests: Dict[str, str]) -> Dict[str, Dict[str, float]]:
+    """Read the results of the network tests.
+    Return a result dictionary.
+    """
+    result = {}
+    for t in tests:
+        row = defaultdict(str)
+        for subtest in subtests:
+            base_path = os.path.join(tests[t], subtests[subtest])
+            try:
+                log_path = next(Path(base_path).rglob('iperfclient.out'))
+            except StopIteration:
+                raise Exception(
+                    f"Error: No iperfclient.out found for network test {subtest} in {base_path}"
+                )
+
+            with open(log_path) as f:
+                iperfclient = f.readlines()
+
+            if not iperfclient:
+                raise Exception("Empty iperfclient?")
+
+            for line in iperfclient:
+                if "sender" in line:
+                    speed = get_speed_from_line(line)
+                    row[subtest + '_snd'] = speed
+                    break
+                elif "receiver" in line:
+                    speed = get_speed_from_line(line)
+                    row[subtest + '_rcv'] = speed
+                    break
+        result[t] = row
+    return result
+
+
+# Dictionary to convert units
+units = {
+    'bits': 1e-6,
+    'Mbits': 1,
+    'Gbits': 1000,
+}
+
+
+def get_speed_from_line(line) -> float:
+    """Extract the network throughput from the line.
+
+
+    Returns the throughput as Mbit/s.
+    """
+    match = re.search(
+        r'(\d+\.?\d+)\s(bits|Mbits|Gbits)\/sec[\s\d]+(sender|receiver)$', line)
+    if not match:
+        raise Exception("Could not extract speed from iperf line.")
+    num = float(match.group(1))
+    num = num * units[match.group(2)]
+    # return in Mbit/s
+    return float(num)
--- a/.github/actions/k-bench/evaluate/main.py
+++ b/.github/actions/k-bench/evaluate/main.py
@ -0,0 +1,159 @@
+"""Parse logs of K-Bench tests and generate performance graphs."""
+import json
+import os
+from collections import defaultdict
+
+import numpy as np
+from evaluators import default, fio, network
+from matplotlib import pyplot as plt
+
+BAR_COLOR = '#90FF99'  # Mint Green
+
+# Rotate bar labels by X degrees
+LABEL_ROTATE_BY = 30
+LABEL_FONTSIZE = 9
+
+# Some lookup dictionaries for x axis
+api_suffix = 'ms'
+pod_key2header = {
+    'pod_create':   'Pod Create',
+    'pod_list':     'Pod List',
+    'pod_get':      'Pod Get',
+    'pod_update':   'Pod Update',
+    'pod_delete':   'Pod Delete',
+}
+svc_key2header = {
+    'svc_create':   'Service Create',
+    'svc_list':     'Service List',
+    'svc_update':   'Service Update',
+    'svc_delete':   'Service Delete',
+    'svc_get':      'Service Get',
+}
+depl_key2header = {
+    'depl_create':  'Deployment Create',
+    'depl_list':    'Deployment List',
+    'depl_update':  'Deployment Update',
+    'depl_scale':   'Deployment Scale',
+    'depl_delete':  'Deployment Delete',
+}
+
+fio_suffix = 'MiB/s'
+fio_key2header = {
+    'fio_root_async_R70W30_R':   'async_R70W30 mix,\n seq. reads',
+    'fio_root_async_R70W30_W':   'async_R70W30 mix,\n seq. writes',
+    'fio_root_async_R100W0_R':   'async_R100W0 mix,\n seq. reads',
+    'fio_root_async_R0W100_W':   'async_R0W100 mix,\n seq. writes',
+}
+
+net_suffix = 'Mbit/s'
+net_key2header = {
+    'net_internode_snd':            f'iperf internode \n send ({net_suffix})',
+    'net_intranode_snd':            f'iperf intranode \n send ({net_suffix})',
+}
+
+
+def configure() -> dict:
+    """Set the config.
+
+    Raises BaseException if base_path or CSP missing.
+
+    Returns a config dict with the BASE_PATH to the tests
+    and the cloud service provider CSP.
+    """
+    base_path = os.getenv('KBENCH_RESULTS', None)
+    if not base_path or not os.path.isdir(base_path):
+        raise Exception("Environment variable 'KBENCH_RESULTS' \
+needs to point to the K-Bench results root folder")
+
+    csp = os.getenv('CSP', None)
+    if not csp:
+        raise Exception("Environment variable 'CSP' \
+needs to name the cloud service provider.")
+    return {'BASE_PATH': base_path, 'CSP': csp}
+
+
+def bar_chart(data, headers, title='', suffix='', val_label=True, y_log=False):
+    """Generate a bar chart from data.
+
+    Args:
+        data (list): List of value points.
+        headers (list): List of headers (x-axis).
+        title (str, optional): The title for the chart. Defaults to "".
+        suffix (str, optional): The suffix for values e.g. "MiB/s". Defaults to "".
+        val_label (bool, optional): Put a label of the value over the bar chart. Defaults to True.
+        y_log (bool, optional): Set the y-axis to a logarithmic scale. Defaults to False.
+    Returns:
+        fig (matplotlib.pyplot.figure): The pyplot figure
+    """
+    fig, ax = plt.subplots(figsize=(8, 5))
+    fig.patch.set_facecolor('white')
+    ax.set_xticks(np.arange(len(headers)))
+    ax.set_xticklabels(headers)
+    if y_log:
+        ax.set_yscale('log')
+    bars = ax.bar(headers, data, color=BAR_COLOR, edgecolor='black')
+    if val_label:
+        ax.bar_label(bars, fmt='%g {suffix}'.format(suffix=suffix))
+    plt.setp(ax.get_xticklabels(), fontsize=LABEL_FONTSIZE, rotation=LABEL_ROTATE_BY)
+    plt.title(f'{title} ({suffix})')
+    plt.tight_layout()
+    return fig
+
+
+def main() -> None:
+    """Read, parse and evaluate the K-Bench tests.
+
+    Generate a human-readable table and diagrams.
+    """
+    config = configure()
+
+    benchmark_path = os.path.join(
+        config['BASE_PATH'],
+        "kbench-constellation-" + config['CSP'],
+    )
+    if not os.path.exists(benchmark_path):
+        raise Exception(f'Path to benchmarks {benchmark_path} does not exist.')
+
+    tests = {f"constellation-{config['CSP']}": benchmark_path}
+
+    # Execute tests
+    default_results = default.eval(tests=tests)
+    network_results = network.eval(tests=tests)
+    fio_results = fio.eval(tests=tests)
+
+    combined_results = defaultdict(dict)
+    for test in tests:
+        combined_results[test].update(default_results[test])
+        combined_results[test].update(network_results[test])
+        combined_results[test].update(fio_results[test])
+
+    # Write the compact results.
+    with open('kbench_results.json', 'w') as w:
+        json.dump(combined_results, fp=w, sort_keys=False, indent=2)
+
+    # Generate graphs.
+    subject = list(combined_results.keys())[0]
+    data = combined_results[subject]
+
+    # Combine the evaluation of the Kubernetes API benchmarks
+    for i, api in enumerate([pod_key2header, svc_key2header, depl_key2header]):
+        api_data = [data[h] for h in api]
+        hdrs = api.values()
+        bar_chart(data=api_data, headers=hdrs, title="API Latency", suffix=api_suffix)
+        plt.savefig(f'api_{i}_perf.png', bbox_inches="tight")
+
+    # Network chart
+    net_data = [data[h] for h in net_key2header]
+    hdrs = net_key2header.values()
+    bar_chart(data=net_data, headers=hdrs, title="Network Throughput", suffix=net_suffix)
+    plt.savefig('net_perf.png', bbox_inches="tight")
+
+    # fio chart
+    fio_data = [data[h] for h in fio_key2header]
+    hdrs = fio_key2header.values()
+    bar_chart(data=fio_data, headers=hdrs, title="Storage Throughput", suffix=fio_suffix)
+    plt.savefig('storage_perf.png', bbox_inches="tight")
+
+
+if __name__ == "__main__":
+    main()
--- a/.github/actions/k-bench/evaluate/requirements.txt
+++ b/.github/actions/k-bench/evaluate/requirements.txt
@ -0,0 +1,2 @@
+matplotlib==3.6.0
+numpy==1.23.4
--- a/.gitignore
+++ b/.gitignore
@ -45,3 +45,6 @@ image/config.mk

 # macOS
 .DS_Store
+
+# Python
+__pycache__/