-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
31 lines (28 loc) · 1.33 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Arcane incantation to print all the other targets, from https://stackoverflow.com/a/26339924
help:
@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# \
Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$'
.PHONY: build_dataset
build_dataset:
mkdir -p data
mkdir -p data/raw
mkdir -p data/clean
wget -O data/raw/ukhra2022.xlsx https://hrcsonline.net/wp-content/uploads/2024/01/UKHRA2022_HRCS_public_dataset_v1-2_30Jan2024.xlsx
wget -O data/raw/ukhra2018.xlsx https://hrcsonline.net/wp-content/uploads/2020/01/UKHRA2018_HRCS_public_dataset_v1_27Jan2020.xlsx
wget -O data/raw/ukhra2014.xlsx https://hrcsonline.net/wp-content/uploads/2018/01/UK_Health_Research_Analysis_Data_2014_public_v1_27Oct2015.xlsx
python src/data_processing.py
.PHONY: preprocess
preprocess:
python src/preprocess.py \
--config "config/train_config.yaml" \
--clean-data "data/clean/ukhra_clean.parquet" \
--output-dir "data/preprocessed"
.PHONY: train
train:
python src/train.py \
--config-path "config/train_config.yaml" \
--train-path "data/preprocessed/train.parquet" \
--test-path "data/preprocessed/test.parquet" \
--value-counts-path "data/preprocessed/value_counts.json" \
--label-names-path "data/label_names/ukhra_ra.jsonl" \
--model-dir "data/model/"