AB#2191 Evaluate K-Bench benchmarks in CI

Install Python for K-bench evaluation Add scripts to evaluate the K-Bench results in CI Attach graphs to the workflow results in GitHub Actions
2025-10-13 11:00:52 -04:00 · 2022-11-01 11:24:29 +00:00 · 2022-11-01 11:24:29 +00:00 · 1952eb5721
commit 1952eb5721
parent f4ff473677
8 changed files with 424 additions and 0 deletions
--- a/.github/actions/k-bench/action.yml
+++ b/.github/actions/k-bench/action.yml
@ -12,6 +12,15 @@ inputs:
 runs:
  using: "composite"
  steps:
    - name: Setup python
      uses: actions/setup-python@b55428b1882923874294fa556849718a1d7f2ca5 # tag=v4.2.0
      with:
        python-version: "3.10"
    - name: Install evaluation dependencies
      shell: bash
      run: pip install -r .github/actions/k-bench/evaluate/requirements.txt
    - name: Checkout patched K-Bench
      uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # tag=v3.1.0
      with:
@ -104,3 +113,19 @@ runs:
      with:
        path: "k-bench/out/kbench-constellation-${{ inputs.cloudProvider }}"
        name: "k-bench-constellation-${{ inputs.cloudProvider }}"
    - name: Parse test results and create diagrams
      shell: bash
      run: python .github/actions/k-bench/evaluate/main.py
      env:
        KBENCH_RESULTS: ${{ github.workspace }}/k-bench/out/
        CSP: ${{ inputs.cloudProvider }}
    - name: Upload benchmark results
      uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # tag=v3.1.0
      if: ${{ !env.ACT }}
      with:
        path: |
          *_perf.png
          kbench_results.json
        name: "benchmark_results"
--- a/.github/actions/k-bench/evaluate/evaluators/init.py
+++ b/.github/actions/k-bench/evaluate/evaluators/init.py
--- a/.github/actions/k-bench/evaluate/evaluators/default.py
+++ b/.github/actions/k-bench/evaluate/evaluators/default.py
@ -0,0 +1,71 @@
 """Evaluator for the K-Bench default test."""
 import os
 import re
 from collections import defaultdict
 from typing import Dict
 pod_latencies = {
    'pod_create':   'create pod latency:',
    'pod_list':     'list pod latency:',
    'pod_get':      'get pod latency:',
    'pod_update':   'update pod latency:',
    'pod_delete':   'delete pod latency:',
 }
 deployment_latencies = {
    'depl_create':  'create deployment latency:',
    'depl_list':    'list deployment latency:',
    'depl_update':  'update deployment latency:',
    'depl_scale':   'scale deployment latency:',
    'depl_delete':  'delete deployment latency:',
 }
 service_latencies = {
    'svc_create':   'create service latency:',
    'svc_list':     'list service latency:',
    'svc_get':      'get service latency:',
    'svc_update':   'update service latency:',
    'svc_delete':   'delete service latency:',
 }
 def eval(tests: Dict[str, str]) -> Dict[str, Dict[str, float]]:
    """Read the results of the default tests.
    Return a result dictionary.
    """
    result = {}
    for t in tests:
        row = defaultdict(float)
        # read the default result file
        kbench = []
        with open(os.path.join(tests[t], 'default', 'kbench.log'), 'r') as f:
            kbench = f.readlines()
        if not kbench:
            raise Exception("Empty kbench.log")
        subtests = [pod_latencies, service_latencies, deployment_latencies]
        for latency_dict in subtests:
            # Get the API Call Latencies (median)
            for key in latency_dict:
                line = get_line_containing_needle(
                    lines=kbench, needle=latency_dict[key])
                median = get_median_from_line(line=line)
                row[key] = float(median)
        result[t] = row
    return result
 def get_median_from_line(line):
    """Extract the value (median) from the line."""
    return re.search(r'\s(\d+\.\d+)(.+)', line).group(1)
 def get_line_containing_needle(lines, needle):
    """Find matching line from list of lines."""
    matches = list(filter(lambda l: needle in l, lines))
    if len(matches) > 1:
        raise Exception(f"'{needle}' matched multiple times..")
    return matches[0]
--- a/.github/actions/k-bench/evaluate/evaluators/fio.py
+++ b/.github/actions/k-bench/evaluate/evaluators/fio.py
@ -0,0 +1,81 @@
 """Parse the fio logs.
 Extracts the bandwidth for I/O,
 from various fio benchmarks.
 Example log file (extracting read and write bandwidth):
 ...
 Run status group 0 (all jobs):
   READ: bw=5311KiB/s (5438kB/s), 5311KiB/s-5311KiB/s (5438kB/s-5438kB/s), io=311MiB (327MB), run=60058-60058msec
  WRITE: bw=2289KiB/s (2343kB/s), 2289KiB/s-2289KiB/s (2343kB/s-2343kB/s), io=134MiB (141MB), run=60058-60058msec
 ...
 """
 import os
 import re
 from collections import defaultdict
 from pathlib import Path
 from typing import Dict
 # get different mixes of read/write IO as subtests
 subtests = {
    'fio_root_async_R70W30':    'fio_async_randR70W30.out',
    'fio_root_async_R100W0':    'fio_async_randR100W0.out',
    'fio_root_async_R0W100':    'fio_async_randR0W100.out',
 }
 def eval(tests: Dict[str, str]) -> Dict[str, Dict[str, float]]:
    """Read the results of the fio tests.
    Return a result dictionary.
    """
    result = {}
    for t in tests:
        base_path = os.path.join(tests[t], 'dp_fio')
        row = defaultdict(str)
        for subtest in subtests:
            try:
                log_path = next(Path(base_path).rglob(subtests[subtest]))
            except StopIteration:
                raise Exception(
                    f"Error: No iperfclient.out found for network test {subtest} in {base_path}"
                )
            with open(log_path) as f:
                fio = f.readlines()
            if not fio:
                raise Exception(f"Empty fio log {subtest}?")
            for line in fio:
                if "READ" in line:
                    speed = get_io_bw_from_line(line)
                    row[subtest + '_R'] = speed
                elif "WRITE" in line:
                    speed = get_io_bw_from_line(line)
                    row[subtest + '_W'] = speed
        result[t] = row
    return result
 # Dictionary to convert units
 units = {
    'KiB': 1/1024,
    'MiB': 1,
    'GiB': 1024,
 }
 def get_io_bw_from_line(line) -> float:
    """Get the IO bandwidth from line and convert to MiB/s.
    Return the IO bandwidth in MiB/s
    """
    #    READ: bw=32.5MiB/s (34.1MB/s), 32.5MiB/s-32.5MiB/s (34.1MB/s-34.1MB/s), io=1954MiB (2048MB), run=60022-60022msec
    match = re.search(r'bw=(\d+\.?\d+)(MiB|KiB|GiB)', line)
    if not match:
        raise Exception("Could not extract bw from fio line.")
    num = float(match.group(1))
    num = num * units[match.group(2)]
    # return in MiB/s
    return num
--- a/.github/actions/k-bench/evaluate/evaluators/network.py
+++ b/.github/actions/k-bench/evaluate/evaluators/network.py
@ -0,0 +1,83 @@
 """Parse the iperf logs.
 Extracts the bandwidth for sending and receiving,
 from intranode and internode network benchmarks.
 Example log file (extract the bitrate for sending and receiving):
 ...
 s1:  - - - - - - - - - - - - - - - - - - - - - - - - -
 s1:  [ ID] Interval           Transfer     Bitrate         Retr
 s1:  [  5]   0.00-90.00  sec  11.0 GBytes  1.05 Gbits/sec  509             sender
 s1:  [  5]   0.00-90.05  sec  11.1 GBytes  1.05 Gbits/sec                  receiver
 s1:
 s1:  iperf Done.
 """
 import os
 import re
 from collections import defaultdict
 from pathlib import Path
 from typing import Dict
 subtests = {
    'net_internode':    'dp_network_internode',
    'net_intranode':    'dp_network_intranode',
 }
 def eval(tests: Dict[str, str]) -> Dict[str, Dict[str, float]]:
    """Read the results of the network tests.
    Return a result dictionary.
    """
    result = {}
    for t in tests:
        row = defaultdict(str)
        for subtest in subtests:
            base_path = os.path.join(tests[t], subtests[subtest])
            try:
                log_path = next(Path(base_path).rglob('iperfclient.out'))
            except StopIteration:
                raise Exception(
                    f"Error: No iperfclient.out found for network test {subtest} in {base_path}"
                )
            with open(log_path) as f:
                iperfclient = f.readlines()
            if not iperfclient:
                raise Exception("Empty iperfclient?")
            for line in iperfclient:
                if "sender" in line:
                    speed = get_speed_from_line(line)
                    row[subtest + '_snd'] = speed
                    break
                elif "receiver" in line:
                    speed = get_speed_from_line(line)
                    row[subtest + '_rcv'] = speed
                    break
        result[t] = row
    return result
 # Dictionary to convert units
 units = {
    'bits': 1e-6,
    'Mbits': 1,
    'Gbits': 1000,
 }
 def get_speed_from_line(line) -> float:
    """Extract the network throughput from the line.
    Returns the throughput as Mbit/s.
    """
    match = re.search(
        r'(\d+\.?\d+)\s(bits|Mbits|Gbits)\/sec[\s\d]+(sender|receiver)$', line)
    if not match:
        raise Exception("Could not extract speed from iperf line.")
    num = float(match.group(1))
    num = num * units[match.group(2)]
    # return in Mbit/s
    return float(num)
--- a/.github/actions/k-bench/evaluate/main.py
+++ b/.github/actions/k-bench/evaluate/main.py
@ -0,0 +1,159 @@
 """Parse logs of K-Bench tests and generate performance graphs."""
 import json
 import os
 from collections import defaultdict
 import numpy as np
 from evaluators import default, fio, network
 from matplotlib import pyplot as plt
 BAR_COLOR = '#90FF99'  # Mint Green
 # Rotate bar labels by X degrees
 LABEL_ROTATE_BY = 30
 LABEL_FONTSIZE = 9
 # Some lookup dictionaries for x axis
 api_suffix = 'ms'
 pod_key2header = {
    'pod_create':   'Pod Create',
    'pod_list':     'Pod List',
    'pod_get':      'Pod Get',
    'pod_update':   'Pod Update',
    'pod_delete':   'Pod Delete',
 }
 svc_key2header = {
    'svc_create':   'Service Create',
    'svc_list':     'Service List',
    'svc_update':   'Service Update',
    'svc_delete':   'Service Delete',
    'svc_get':      'Service Get',
 }
 depl_key2header = {
    'depl_create':  'Deployment Create',
    'depl_list':    'Deployment List',
    'depl_update':  'Deployment Update',
    'depl_scale':   'Deployment Scale',
    'depl_delete':  'Deployment Delete',
 }
 fio_suffix = 'MiB/s'
 fio_key2header = {
    'fio_root_async_R70W30_R':   'async_R70W30 mix,\n seq. reads',
    'fio_root_async_R70W30_W':   'async_R70W30 mix,\n seq. writes',
    'fio_root_async_R100W0_R':   'async_R100W0 mix,\n seq. reads',
    'fio_root_async_R0W100_W':   'async_R0W100 mix,\n seq. writes',
 }
 net_suffix = 'Mbit/s'
 net_key2header = {
    'net_internode_snd':            f'iperf internode \n send ({net_suffix})',
    'net_intranode_snd':            f'iperf intranode \n send ({net_suffix})',
 }
 def configure() -> dict:
    """Set the config.
    Raises BaseException if base_path or CSP missing.
    Returns a config dict with the BASE_PATH to the tests
    and the cloud service provider CSP.
    """
    base_path = os.getenv('KBENCH_RESULTS', None)
    if not base_path or not os.path.isdir(base_path):
        raise Exception("Environment variable 'KBENCH_RESULTS' \
 needs to point to the K-Bench results root folder")
    csp = os.getenv('CSP', None)
    if not csp:
        raise Exception("Environment variable 'CSP' \
 needs to name the cloud service provider.")
    return {'BASE_PATH': base_path, 'CSP': csp}
 def bar_chart(data, headers, title='', suffix='', val_label=True, y_log=False):
    """Generate a bar chart from data.
    Args:
        data (list): List of value points.
        headers (list): List of headers (x-axis).
        title (str, optional): The title for the chart. Defaults to "".
        suffix (str, optional): The suffix for values e.g. "MiB/s". Defaults to "".
        val_label (bool, optional): Put a label of the value over the bar chart. Defaults to True.
        y_log (bool, optional): Set the y-axis to a logarithmic scale. Defaults to False.
    Returns:
        fig (matplotlib.pyplot.figure): The pyplot figure
    """
    fig, ax = plt.subplots(figsize=(8, 5))
    fig.patch.set_facecolor('white')
    ax.set_xticks(np.arange(len(headers)))
    ax.set_xticklabels(headers)
    if y_log:
        ax.set_yscale('log')
    bars = ax.bar(headers, data, color=BAR_COLOR, edgecolor='black')
    if val_label:
        ax.bar_label(bars, fmt='%g {suffix}'.format(suffix=suffix))
    plt.setp(ax.get_xticklabels(), fontsize=LABEL_FONTSIZE, rotation=LABEL_ROTATE_BY)
    plt.title(f'{title} ({suffix})')
    plt.tight_layout()
    return fig
 def main() -> None:
    """Read, parse and evaluate the K-Bench tests.
    Generate a human-readable table and diagrams.
    """
    config = configure()
    benchmark_path = os.path.join(
        config['BASE_PATH'],
        "kbench-constellation-" + config['CSP'],
    )
    if not os.path.exists(benchmark_path):
        raise Exception(f'Path to benchmarks {benchmark_path} does not exist.')
    tests = {f"constellation-{config['CSP']}": benchmark_path}
    # Execute tests
    default_results = default.eval(tests=tests)
    network_results = network.eval(tests=tests)
    fio_results = fio.eval(tests=tests)
    combined_results = defaultdict(dict)
    for test in tests:
        combined_results[test].update(default_results[test])
        combined_results[test].update(network_results[test])
        combined_results[test].update(fio_results[test])
    # Write the compact results.
    with open('kbench_results.json', 'w') as w:
        json.dump(combined_results, fp=w, sort_keys=False, indent=2)
    # Generate graphs.
    subject = list(combined_results.keys())[0]
    data = combined_results[subject]
    # Combine the evaluation of the Kubernetes API benchmarks
    for i, api in enumerate([pod_key2header, svc_key2header, depl_key2header]):
        api_data = [data[h] for h in api]
        hdrs = api.values()
        bar_chart(data=api_data, headers=hdrs, title="API Latency", suffix=api_suffix)
        plt.savefig(f'api_{i}_perf.png', bbox_inches="tight")
    # Network chart
    net_data = [data[h] for h in net_key2header]
    hdrs = net_key2header.values()
    bar_chart(data=net_data, headers=hdrs, title="Network Throughput", suffix=net_suffix)
    plt.savefig('net_perf.png', bbox_inches="tight")
    # fio chart
    fio_data = [data[h] for h in fio_key2header]
    hdrs = fio_key2header.values()
    bar_chart(data=fio_data, headers=hdrs, title="Storage Throughput", suffix=fio_suffix)
    plt.savefig('storage_perf.png', bbox_inches="tight")
 if __name__ == "__main__":
    main()
--- a/.github/actions/k-bench/evaluate/requirements.txt
+++ b/.github/actions/k-bench/evaluate/requirements.txt
@ -0,0 +1,2 @@
 matplotlib==3.6.0
 numpy==1.23.4
--- a/.gitignore
+++ b/.gitignore
@ -45,3 +45,6 @@ image/config.mk
 # macOS
 .DS_Store
 # Python
 __pycache__/