Skip to content

Commit

Permalink
Merge pull request #129 from cevich/bench_stuff
Browse files Browse the repository at this point in the history
[WIP] Add tool for handling podman benchmark data
  • Loading branch information
cevich authored Mar 8, 2023
2 parents 63703d3 + aa4ccb1 commit bbd4a0a
Show file tree
Hide file tree
Showing 12 changed files with 782 additions and 0 deletions.
43 changes: 43 additions & 0 deletions bench_stuff/.install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash

# Installs bench_stuff.py and a python virtual environment
# to execute with. NOT intended to be used directly
# by humans, should only be used indirectly by running
# ../bin/install_automation.sh <ver> bench_stuff

set -eo pipefail

source "$AUTOMATION_LIB_PATH/anchors.sh"
source "$AUTOMATION_LIB_PATH/console_output.sh"

INSTALL_PREFIX=$(realpath $AUTOMATION_LIB_PATH/../)
# Assume the directory this script is in, represents what is being installed
INSTALL_NAME=$(basename $(dirname ${BASH_SOURCE[0]}))
AUTOMATION_VERSION=$(automation_version)
[[ -n "$AUTOMATION_VERSION" ]] || \
die "Could not determine version of common automation libs, was 'install_automation.sh' successful?"

[[ -n "$(type -P virtualenv)" ]] || \
die "$INSTALL_NAME requires python3-virtualenv"

echo "Installing $INSTALL_NAME version $(automation_version) into $INSTALL_PREFIX"

unset INST_PERM_ARG
if [[ $UID -eq 0 ]]; then
INST_PERM_ARG="-o root -g root"
fi

cd $(dirname $(realpath "${BASH_SOURCE[0]}"))
virtualenv --clear --download \
$AUTOMATION_LIB_PATH/bs.venv
(
source $AUTOMATION_LIB_PATH/bs.venv/bin/activate
pip3 install --requirement ./requirements.txt
deactivate
)
install -v $INST_PERM_ARG -m '0644' -D -t "$INSTALL_PREFIX/lib/bs.venv/bin" \
./bench_stuff.py
install -v $INST_PERM_ARG -D -t "$INSTALL_PREFIX/bin" ./cirrus-ci_artifacts

# Needed for installer testing
echo "Successfully installed $INSTALL_NAME"
8 changes: 8 additions & 0 deletions bench_stuff/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
### Performance metrics stuffer

Python script which digests a `benchmarks.env` and `benchmarks.csv` file
into a meaningful JSON document-set, then uploads it to google firebase.
It's intended to be run from inside a container, in a podman CI environment.
Besides the two benchmark related files, it requires the env. var.
`$GOOGLE_APPLICATION_CREDENTIALS` is set to the path of a file containing
JSON encoded credentials with access to firebase.
24 changes: 24 additions & 0 deletions bench_stuff/bench_stuff
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

# This script wrapps bench_stuff inside a python
# virtual environment setup at install time. It should not
# be executed prior to installation.

set -e

# This is a convenience for callers that don't separately source this first
# in their automation setup.
if [[ -z "$AUTOMATION_LIB_PATH" ]] && [[ -r /etc/automation_environment ]]; then
source /etc/automation_environment
fi

if [[ -z "$AUTOMATION_LIB_PATH" ]]; then
(
echo "ERROR: Expecting \$AUTOMATION_LIB_PATH to be defined with the"
echo " installation directory of automation tooling."
) > /dev/stderr
exit 1
fi

source $AUTOMATION_LIB_PATH/bs.venv/bin/activate
exec python3 $AUTOMATION_LIB_PATH/bs.venv/bin/bench_stuff.py "$@"
186 changes: 186 additions & 0 deletions bench_stuff/bench_stuff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#!/usr/bin/env python3

"""
Digest `benchmarks.env` and `benchmarks.csv`, uploads to google firebase.
Expects to be called with $GOOGLE_APPLICATION_CREDENTIALS env. var. value
pointing at a JSON service account key file, with access to write firestore
data.
"""

import csv
import datetime
import os
import sys
from argparse import ArgumentParser
from math import ceil
from pathlib import Path
from pprint import pformat

# Ref: https://pypi.org/project/binary/
from binary import BinaryUnits, DecimalUnits, convert_units

# Ref: https://github.com/rconradharris/envparse
from envparse import env

# Ref: https://cloud.google.com/firestore/docs/create-database-server-client-library
from google.cloud import firestore

# Set True when --verbose flag is set
VERBOSE = False

# Set True when --dry-run flag is set
DRYRUN = False


def v(msg):
"""Print a helpful msg when the global VERBOSE is set true."""
if VERBOSE:
print(msg)


def die(msg, code=1):
"""Print an error message to stderr, then exit with code."""
sys.stderr.write(f"ERROR: {msg}\n")
sys.exit(code)


# Ref: https://docs.python.org/3.10/library/argparse.html
def get_args(argv):
"""Return parsed argument namespace object."""
parser = ArgumentParser(prog="bench_stuff", description=__doc__)
parser.add_argument('-v', '--verbose',
dest='verbose', action='store_true', default=False,
help='Show internal state/status while processing input/output.')
parser.add_argument('-d', '--dry-run',
dest='dryrun', action='store_true', default=False,
help="Process benchmark data but don't try to store anything.")
parser.add_argument('bench_dir', metavar='<benchmarks dirpath>', type=Path,
help=("Path to subdirectory containing benchmarks.env"
" and benchmarks.csv files."))
parsed = parser.parse_args(args=argv[1:])

# Ref: https://docs.python.org/3.10/library/pathlib.html#operators
env_path = parsed.bench_dir / "benchmarks.env"
csv_path = parsed.bench_dir / "benchmarks.csv"
f_err_fmt = "Expecting a path to a directory containing an {0} file, got '{1}' instead."
for file_path in (env_path, csv_path):
if not file_path.exists() or not file_path.is_file():
parser.error(f_err_fmt.format(file_path.name, str(file_path.parent)))

gac = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
if gac is None or gac.strip() == "":
parser.error("Expecting $GOOGLE_APPLICATION_CREDENTIALS to be defined/non-empty")
# Google's firestore module will ultimately consume this, do some
# basic checks up-front to provide a quick error message if possible.
gac_path = Path(gac)
if not gac_path.exists() or not gac_path.is_file():
parser.error(f"Expecting $GOOGLE_APPLICATION_CREDENTIALS value '{gac_path}'"
f" to be an existing file.")

return (parsed.verbose, parsed.dryrun, env_path, csv_path)


def handle_units(row):
"""
Convert each element of row dict into floating-point or decimal units.
The end-goal is to do calculations from on this data and present it
to humans. Converting all units into fundimental / numeric values before
storage scales much better than burdening a script during final
human-presentation step where it may need to traverse hundreds of records.
"""
result = {}
for key, value in row.items():
value = value.upper()
if value.endswith('S'):
result[key] = float(value.rstrip(' S'))
elif value.endswith('%'):
result[key] = float(value.rstrip(' %'))
elif value.endswith('KB'):
raw = float(value.strip(' KB'))
# First element is value, second is unit-string. Only numeric value is needed
float_bytes = convert_units(raw, BinaryUnits.KB, DecimalUnits.B)[0]
# Don't try to store partial-bytes, always round-up.
result[key] = int(ceil(float_bytes))
elif value.endswith('MB'):
raw = float(value.strip(' MB'))
float_bytes = convert_units(raw, BinaryUnits.MB, DecimalUnits.B)[0]
result[key] = int(ceil(float_bytes))
else:
# Don't store "bad" data in database, bail out so somebody can fix this script.
die(f"Can't parse units from '{key}' value '{value}'", code=3)
v(f" Converted '{value}' -> {result[key]}")
return result


def insert_data(bench_basis, meta_data, bench_data):
"""Store bench_data and meta_data in an orderly-fashion wthin GCP firestore."""
db = firestore.Client()
batch = db.batch() # Ensure data addition happens atomicly
# Categorize all benchmarks based on the instance-type they ran on.
doc_ref = db.collection('benchmarks').document(bench_basis['type'])
# Sub-collections must be anchored by a document, include all benchmark basis-details.
batch.set(doc_ref, bench_basis, merge=True) # Document likely to already exist
v(f"Reticulating {bench_basis['type']} document for task {meta_data['task']}")
# Data points and metadata stored in a sub-collection of basis-document
data_ref = doc_ref.collection('data').document(str(meta_data['task']))
# Having data-point and meta-data nested in a document makes indexing simpler
item = {
'meta': meta_data,
'point': bench_data
}
batch.set(data_ref, item)
batch.commit()
v("Data point and environment details commited to database")


def main(env_path, csv_path):
"""Load environment basis, load and convert csv data into a nosql database."""
v(f"Processing environment '{env_path}' and benchmarks '{csv_path}'")
env.read_envfile(env_path)

if env.int('BENCH_ENV_VER') != 1:
die("Only version 1 of $BENCH_ENV_VER is supported")

bench_basis = {
'cpu': env.int('CPUTOTAL'),
'mem': int(ceil(convert_units(env.int('MEMTOTALKB'), BinaryUnits.KB, DecimalUnits.B)[0])),
'arch': env.str('UNAME_M'),
'type': env.str('INST_TYPE'),
}
v(f"Basis: {pformat(bench_basis)}")

meta_data = {
'ver': env.int('BENCH_ENV_VER'), # identifies this metadata schema
'stamp': datetime.datetime.utcnow(),
'build': env.int('CIRRUS_BUILD_ID'),
'task': env.int('CIRRUS_TASK_ID'), # collection-key
# Will be pull/# for PRs; branch-name for branches
'branch': env.str('CIRRUS_BRANCH'),
'dist': env.str('DISTRO_NV'),
'kern': env.str('UNAME_R'),
}
bench_data = {}

with open(csv_path) as csv_file:
reader = csv.DictReader(csv_file, dialect='unix', skipinitialspace=True)
for row in reader:
test_name = row.pop("Test Name")
bench_data[test_name] = handle_units(row)
v(f"Data: {pformat(bench_data)}")

if not DRYRUN:
insert_data(bench_basis, meta_data, bench_data)
v(f"Added benchmark data for task {meta_data['task']}")


if __name__ == "__main__":
args = get_args(sys.argv)
if args[0]:
VERBOSE = True
v("Verbose-mode enabled")
if args[1]:
DRYRUN = True
v("Dry-run: Will not send data to firebase")
main(*args[1:])
22 changes: 22 additions & 0 deletions bench_stuff/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
binary~=1.0
cachetools~=5.3
certifi~=2022.12
charset-normalizer~=3.1
envparse~=0.2
google-api-core~=2.11
google-auth~=2.16
google-cloud~=0.34
google-cloud-core~=2.3
google-cloud-firestore~=2.10
googleapis-common-protos~=1.58
grpcio~=1.51
grpcio-status~=1.51
idna~=3.4
proto-plus~=1.22
protobuf~=4.22
pyasn1~=0.4
pyasn1-modules~=0.2
requests~=2.28
rsa~=4.9
six~=1.16
urllib3~=1.26
1 change: 1 addition & 0 deletions bench_stuff/test/bench_stuff.py
29 changes: 29 additions & 0 deletions bench_stuff/test/run_all_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

set -e

TESTDIR=$(dirname ${BASH_SOURCE[0]})

if [[ "$GITHUB_ACTIONS" == "true" ]]; then
echo "Lint/Style checking not supported under github actions: Skipping"
exit 0
fi

if [[ -x $(type -P flake8-3) ]]; then
cd "$TESTDIR"
set -a
virtualenv testvenv
source testvenv/bin/activate
testvenv/bin/python -m pip install --upgrade pip
pip3 install --requirement ../requirements.txt
set +a

./test_bench_stuff.py -v

cd ..
flake8-3 --max-line-length=100 ./bench_stuff.py
flake8-3 --max-line-length=100 --extend-ignore=D101,D102,D103,D105 test/test_bench_stuff.py
else
echo "Can't find flake-8-3 binary, is script executing inside CI container?"
exit 1
fi
Loading

0 comments on commit bbd4a0a

Please sign in to comment.