Merge pull request #129 from cevich/bench_stuff

[WIP] Add tool for handling podman benchmark data
containers · Mar 8, 2023 · bbd4a0a · bbd4a0a
2 parents 63703d3 + aa4ccb1
commit bbd4a0a
Show file tree

Hide file tree

Showing 12 changed files with 782 additions and 0 deletions.
diff --git a/bench_stuff/.install.sh b/bench_stuff/.install.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Installs bench_stuff.py and a python virtual environment
+# to execute with.  NOT intended to be used directly
+# by humans, should only be used indirectly by running
+# ../bin/install_automation.sh <ver> bench_stuff
+
+set -eo pipefail
+
+source "$AUTOMATION_LIB_PATH/anchors.sh"
+source "$AUTOMATION_LIB_PATH/console_output.sh"
+
+INSTALL_PREFIX=$(realpath $AUTOMATION_LIB_PATH/../)
+# Assume the directory this script is in, represents what is being installed
+INSTALL_NAME=$(basename $(dirname ${BASH_SOURCE[0]}))
+AUTOMATION_VERSION=$(automation_version)
+[[ -n "$AUTOMATION_VERSION" ]] || \
+    die "Could not determine version of common automation libs, was 'install_automation.sh' successful?"
+
+[[ -n "$(type -P virtualenv)" ]] || \
+    die "$INSTALL_NAME requires python3-virtualenv"
+
+echo "Installing $INSTALL_NAME version $(automation_version) into $INSTALL_PREFIX"
+
+unset INST_PERM_ARG
+if [[ $UID -eq 0 ]]; then
+    INST_PERM_ARG="-o root -g root"
+fi
+
+cd $(dirname $(realpath "${BASH_SOURCE[0]}"))
+virtualenv --clear --download \
+    $AUTOMATION_LIB_PATH/bs.venv
+(
+    source $AUTOMATION_LIB_PATH/bs.venv/bin/activate
+    pip3 install --requirement ./requirements.txt
+    deactivate
+)
+install -v $INST_PERM_ARG -m '0644' -D -t "$INSTALL_PREFIX/lib/bs.venv/bin" \
+    ./bench_stuff.py
+install -v $INST_PERM_ARG -D -t "$INSTALL_PREFIX/bin" ./cirrus-ci_artifacts
+
+# Needed for installer testing
+echo "Successfully installed $INSTALL_NAME"
diff --git a/bench_stuff/README.md b/bench_stuff/README.md
@@ -0,0 +1,8 @@
+### Performance metrics stuffer
+
+Python script which digests a `benchmarks.env` and `benchmarks.csv` file
+into a meaningful JSON document-set, then uploads it to google firebase.
+It's intended to be run from inside a container, in a podman CI environment.
+Besides the two benchmark related files, it requires the env. var.
+`$GOOGLE_APPLICATION_CREDENTIALS` is set to the path of a file containing
+JSON encoded credentials with access to firebase.
diff --git a/bench_stuff/bench_stuff b/bench_stuff/bench_stuff
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# This script wrapps bench_stuff inside a python
+# virtual environment setup at install time.  It should not
+# be executed prior to installation.
+
+set -e
+
+# This is a convenience for callers that don't separately source this first
+# in their automation setup.
+if [[ -z "$AUTOMATION_LIB_PATH" ]] && [[ -r /etc/automation_environment ]]; then
+    source /etc/automation_environment
+fi
+
+if [[ -z "$AUTOMATION_LIB_PATH" ]]; then
+    (
+        echo "ERROR: Expecting \$AUTOMATION_LIB_PATH to be defined with the"
+        echo "       installation directory of automation tooling."
+    ) > /dev/stderr
+    exit 1
+fi
+
+source $AUTOMATION_LIB_PATH/bs.venv/bin/activate
+exec python3 $AUTOMATION_LIB_PATH/bs.venv/bin/bench_stuff.py "$@"
diff --git a/bench_stuff/bench_stuff.py b/bench_stuff/bench_stuff.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+
+"""
+Digest `benchmarks.env` and `benchmarks.csv`, uploads to google firebase.
+
+Expects to be called with $GOOGLE_APPLICATION_CREDENTIALS env. var. value
+pointing at a JSON service account key file, with access to write firestore
+data.
+"""
+
+import csv
+import datetime
+import os
+import sys
+from argparse import ArgumentParser
+from math import ceil
+from pathlib import Path
+from pprint import pformat
+
+# Ref: https://pypi.org/project/binary/
+from binary import BinaryUnits, DecimalUnits, convert_units
+
+# Ref: https://github.com/rconradharris/envparse
+from envparse import env
+
+# Ref: https://cloud.google.com/firestore/docs/create-database-server-client-library
+from google.cloud import firestore
+
+# Set True when --verbose flag is set
+VERBOSE = False
+
+# Set True when --dry-run flag is set
+DRYRUN = False
+
+
+def v(msg):
+    """Print a helpful msg when the global VERBOSE is set true."""
+    if VERBOSE:
+        print(msg)
+
+
+def die(msg, code=1):
+    """Print an error message to stderr, then exit with code."""
+    sys.stderr.write(f"ERROR: {msg}\n")
+    sys.exit(code)
+
+
+# Ref: https://docs.python.org/3.10/library/argparse.html
+def get_args(argv):
+    """Return parsed argument namespace object."""
+    parser = ArgumentParser(prog="bench_stuff", description=__doc__)
+    parser.add_argument('-v', '--verbose',
+                        dest='verbose', action='store_true', default=False,
+                        help='Show internal state/status while processing input/output.')
+    parser.add_argument('-d', '--dry-run',
+                        dest='dryrun', action='store_true', default=False,
+                        help="Process benchmark data but don't try to store anything.")
+    parser.add_argument('bench_dir', metavar='<benchmarks dirpath>', type=Path,
+                        help=("Path to subdirectory containing benchmarks.env"
+                              " and benchmarks.csv files."))
+    parsed = parser.parse_args(args=argv[1:])
+
+    # Ref: https://docs.python.org/3.10/library/pathlib.html#operators
+    env_path = parsed.bench_dir / "benchmarks.env"
+    csv_path = parsed.bench_dir / "benchmarks.csv"
+    f_err_fmt = "Expecting a path to a directory containing an {0} file, got '{1}' instead."
+    for file_path in (env_path, csv_path):
+        if not file_path.exists() or not file_path.is_file():
+            parser.error(f_err_fmt.format(file_path.name, str(file_path.parent)))
+
+    gac = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
+    if gac is None or gac.strip() == "":
+        parser.error("Expecting $GOOGLE_APPLICATION_CREDENTIALS to be defined/non-empty")
+    # Google's firestore module will ultimately consume this, do some
+    # basic checks up-front to provide a quick error message if possible.
+    gac_path = Path(gac)
+    if not gac_path.exists() or not gac_path.is_file():
+        parser.error(f"Expecting $GOOGLE_APPLICATION_CREDENTIALS value '{gac_path}'"
+                     f" to be an existing file.")
+
+    return (parsed.verbose, parsed.dryrun, env_path, csv_path)
+
+
+def handle_units(row):
+    """
+    Convert each element of row dict into floating-point or decimal units.
+
+    The end-goal is to do calculations from on this data and present it
+    to humans.  Converting all units into fundimental / numeric values before
+    storage scales much better than burdening a script during final
+    human-presentation step where it may need to traverse hundreds of records.
+    """
+    result = {}
+    for key, value in row.items():
+        value = value.upper()
+        if value.endswith('S'):
+            result[key] = float(value.rstrip(' S'))
+        elif value.endswith('%'):
+            result[key] = float(value.rstrip(' %'))
+        elif value.endswith('KB'):
+            raw = float(value.strip(' KB'))
+            # First element is value, second is unit-string. Only numeric value is needed
+            float_bytes = convert_units(raw, BinaryUnits.KB, DecimalUnits.B)[0]
+            # Don't try to store partial-bytes, always round-up.
+            result[key] = int(ceil(float_bytes))
+        elif value.endswith('MB'):
+            raw = float(value.strip(' MB'))
+            float_bytes = convert_units(raw, BinaryUnits.MB, DecimalUnits.B)[0]
+            result[key] = int(ceil(float_bytes))
+        else:
+            # Don't store "bad" data in database, bail out so somebody can fix this script.
+            die(f"Can't parse units from '{key}' value '{value}'", code=3)
+        v(f"    Converted '{value}' -> {result[key]}")
+    return result
+
+
+def insert_data(bench_basis, meta_data, bench_data):
+    """Store bench_data and meta_data in an orderly-fashion wthin GCP firestore."""
+    db = firestore.Client()
+    batch = db.batch()  # Ensure data addition happens atomicly
+    # Categorize all benchmarks based on the instance-type they ran on.
+    doc_ref = db.collection('benchmarks').document(bench_basis['type'])
+    # Sub-collections must be anchored by a document, include all benchmark basis-details.
+    batch.set(doc_ref, bench_basis, merge=True)  # Document likely to already exist
+    v(f"Reticulating {bench_basis['type']} document for task {meta_data['task']}")
+    # Data points and metadata stored in a sub-collection of basis-document
+    data_ref = doc_ref.collection('data').document(str(meta_data['task']))
+    # Having data-point and meta-data nested in a document makes indexing simpler
+    item = {
+        'meta': meta_data,
+        'point': bench_data
+    }
+    batch.set(data_ref, item)
+    batch.commit()
+    v("Data point and environment details commited to database")
+
+
+def main(env_path, csv_path):
+    """Load environment basis, load and convert csv data into a nosql database."""
+    v(f"Processing environment '{env_path}' and benchmarks '{csv_path}'")
+    env.read_envfile(env_path)
+
+    if env.int('BENCH_ENV_VER') != 1:
+        die("Only version 1 of $BENCH_ENV_VER is supported")
+
+    bench_basis = {
+        'cpu': env.int('CPUTOTAL'),
+        'mem': int(ceil(convert_units(env.int('MEMTOTALKB'), BinaryUnits.KB, DecimalUnits.B)[0])),
+        'arch': env.str('UNAME_M'),
+        'type': env.str('INST_TYPE'),
+    }
+    v(f"Basis: {pformat(bench_basis)}")
+
+    meta_data = {
+        'ver': env.int('BENCH_ENV_VER'),  # identifies this metadata schema
+        'stamp': datetime.datetime.utcnow(),
+        'build': env.int('CIRRUS_BUILD_ID'),
+        'task': env.int('CIRRUS_TASK_ID'),  # collection-key
+        # Will be pull/# for PRs; branch-name for branches
+        'branch': env.str('CIRRUS_BRANCH'),
+        'dist': env.str('DISTRO_NV'),
+        'kern': env.str('UNAME_R'),
+    }
+    bench_data = {}
+
+    with open(csv_path) as csv_file:
+        reader = csv.DictReader(csv_file, dialect='unix', skipinitialspace=True)
+        for row in reader:
+            test_name = row.pop("Test Name")
+            bench_data[test_name] = handle_units(row)
+    v(f"Data: {pformat(bench_data)}")
+
+    if not DRYRUN:
+        insert_data(bench_basis, meta_data, bench_data)
+    v(f"Added benchmark data for task {meta_data['task']}")
+
+
+if __name__ == "__main__":
+    args = get_args(sys.argv)
+    if args[0]:
+        VERBOSE = True
+        v("Verbose-mode enabled")
+    if args[1]:
+        DRYRUN = True
+        v("Dry-run: Will not send data to firebase")
+    main(*args[1:])
diff --git a/bench_stuff/requirements.txt b/bench_stuff/requirements.txt
@@ -0,0 +1,22 @@
+binary~=1.0
+cachetools~=5.3
+certifi~=2022.12
+charset-normalizer~=3.1
+envparse~=0.2
+google-api-core~=2.11
+google-auth~=2.16
+google-cloud~=0.34
+google-cloud-core~=2.3
+google-cloud-firestore~=2.10
+googleapis-common-protos~=1.58
+grpcio~=1.51
+grpcio-status~=1.51
+idna~=3.4
+proto-plus~=1.22
+protobuf~=4.22
+pyasn1~=0.4
+pyasn1-modules~=0.2
+requests~=2.28
+rsa~=4.9
+six~=1.16
+urllib3~=1.26
diff --git a/bench_stuff/test/bench_stuff.py b/bench_stuff/test/bench_stuff.py
@@ -0,0 +1 @@
+../bench_stuff.py
diff --git a/bench_stuff/test/run_all_tests.sh b/bench_stuff/test/run_all_tests.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+set -e
+
+TESTDIR=$(dirname ${BASH_SOURCE[0]})
+
+if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+    echo "Lint/Style checking not supported under github actions: Skipping"
+    exit 0
+fi
+
+if [[ -x $(type -P flake8-3) ]]; then
+    cd "$TESTDIR"
+    set -a
+    virtualenv testvenv
+    source testvenv/bin/activate
+    testvenv/bin/python -m pip install --upgrade pip
+    pip3 install --requirement ../requirements.txt
+    set +a
+
+    ./test_bench_stuff.py -v
+
+    cd ..
+    flake8-3 --max-line-length=100 ./bench_stuff.py
+    flake8-3 --max-line-length=100 --extend-ignore=D101,D102,D103,D105 test/test_bench_stuff.py
+else
+    echo "Can't find flake-8-3 binary, is script executing inside CI container?"
+    exit 1
+fi