From acb6106471a4b3069fe90952cc60c712365ae899 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Thu, 25 Jan 2024 13:50:17 +0000 Subject: [PATCH] Add changes for f50f04e367755c4de76433395e83173944f48c28 --- searchindex.js | 2 +- tutorials/01_basic.html | 24 +++---- tutorials/02_advanced.html | 45 ++++++------ tutorials/03_text.html | 138 ++++++++++++++++++------------------- 4 files changed, 103 insertions(+), 106 deletions(-) diff --git a/searchindex.js b/searchindex.js index 706f720b..e11ef721 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["faq", "feature_specifications", "index", "installation", "timeseriesflattener", "tutorials", "tutorials/01_basic", "tutorials/02_advanced", "tutorials/03_text"], "filenames": ["faq.rst", "feature_specifications.rst", "index.rst", "installation.rst", "timeseriesflattener.rst", "tutorials.rst", "tutorials/01_basic.ipynb", "tutorials/02_advanced.ipynb", "tutorials/03_text.ipynb"], "titles": ["Frequently Asked Questions", "Feature specifications", "timeseriesflattener", "Installation", "Timeseriesflattener", "Tutorials", "Introductory Tutorial", "Advanced Tutorial", "Adding text features"], "terms": {"If": [0, 6, 7, 8], "you": [0, 1, 4, 5, 6, 7, 8], "wish": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 8], "librari": 0, "your": [0, 3, 6, 7, 8], "research": 0, "pleas": [0, 2], "joss": 0, "paper": 0, "articl": 0, "bernstorff2023timeseriesflatten": 0, "titl": 0, "timeseriesflatten": [0, 3, 6, 7, 8], "A": [0, 2, 4, 6, 7], "python": [0, 2], "summar": 0, "featur": [0, 2, 5, 6], "from": [0, 1, 2, 4, 5, 6, 7], "medic": [0, 2, 6, 8], "time": [0, 1, 2, 4, 5, 7, 8], "seri": [0, 1, 2, 4, 6], "author": 0, "bernstorff": 0, "martin": 0, "enevoldsen": 0, "kenneth": 0, "damgaard": 0, "jakob": 0, "danielsen": 0, "andrea": 0, "hansen": 0, "lass": 0, "journal": 0, "open": 0, "sourc": [0, 1, 4], "softwar": 0, "volum": 0, "8": [0, 6, 7], "number": [0, 2, 6, 7, 8], "83": 0, "page": [0, 2], "5197": 0, "year": [0, 6, 7], "2023": 0, "Or": [0, 7], "prefer": 0, "apa": 0, "m": 0, "k": 0, "j": 0, "l": 0, "come": [0, 7], "an": [0, 1, 2, 6, 7, 8], "extens": 0, "In": [0, 1, 6, 7], "order": [0, 5], "ll": [0, 6, 7], "usual": 0, "want": [0, 6, 7, 8], "clone": 0, "repositori": 0, "build": 0, "also": [0, 5, 6], "instal": [0, 6, 7], "requir": [0, 1, 2, 4, 6, 7], "develop": 0, "depend": 0, "util": 0, "defin": [0, 1, 4, 8], "pyproject": 0, "toml": 0, "pip": [0, 3], "e": [0, 1, 2, 6, 7, 8], "dev": 0, "pytest": 0, "which": [0, 1, 2, 4, 5, 6, 7], "all": [0, 6, 7, 8], "folder": 0, "specif": [0, 4, 5, 7, 8], "can": [0, 1, 2, 5, 6, 7, 8], "desired_test": 0, "py": [0, 6, 7], "sphinx": 0, "It": [0, 6], "furo": 0, "theme": 0, "custom": 0, "style": [0, 6, 7], "To": [0, 2, 3, 5, 6, 7, 8], "make": [0, 2, 6, 7, 8], "doc": [0, 6], "text": [0, 5], "html": 0, "c": [0, 2, 7], "class": [1, 4, 6, 7], "coercedfloat": 1, "lookperiod": [1, 7], "fallback": [1, 6, 7, 8], "union": [1, 4], "float": 1, "int": [1, 4], "base": [1, 4], "object": [1, 4, 6, 7], "min_dai": [1, 6, 7], "max_dai": [1, 6, 7], "outcomespec": [1, 4, 6], "timeseries_df": [1, 6], "datafram": [1, 2, 4, 6, 7, 8], "feature_base_nam": [1, 6, 7], "str": [1, 4, 8], "lookahead_dai": [1, 6], "tupl": [1, 6], "aggregation_fn": [1, 6, 7, 8], "callabl": 1, "dataframegroupbi": 1, "incid": [1, 6], "bool": [1, 4], "prefix": [1, 4, 6], "outc": [1, 4, 6], "basemodel": [1, 4], "outcom": [1, 2, 5, 7], "paramet": [1, 4, 6], "valu": [1, 2, 4, 6, 7, 8], "should": [1, 4, 6, 8], "contain": [1, 4, 6, 8], "column": [1, 2, 4, 6, 7, 8], "entity_id": [1, 4, 6, 7, 8], "id": [1, 2, 6, 8], "entiti": [1, 6], "each": [1, 2, 5, 6, 8], "belong": 1, "The": [1, 2, 5, 6, 7], "timeseri": [1, 4, 6], "timestamp": [1, 4, 6, 7, 8], "datetim": [1, 6, 7], "note": [1, 6, 7, 8], "name": [1, 4, 6, 7, 8], "overridden": 1, "when": [1, 2, 6, 7], "initialis": 1, "gener": [1, 2, 5, 6, 7], "g": [1, 2, 6, 7, 8], "_": [1, 6, 7], "feature_baase_nam": 1, "metadata": [1, 4, 6, 8], "interv": [1, 6], "predict": [1, 2, 4, 5, 7, 8], "look": [1, 2, 6, 7], "two": [1, 4, 6, 7], "specifi": [1, 2, 5, 7], "resolv": 1, "0": [1, 6, 7, 8], "how": [1, 2, 5, 7, 8], "aggreg": [1, 2, 6], "multipl": [1, 2, 6, 7, 8], "within": [1, 2, 6, 8], "lookahead": [1, 2, 6], "dai": [1, 6, 8], "take": [1, 4, 6, 7, 8], "group": [1, 7, 8], "input": 1, "return": [1, 4, 6, 7, 8], "singl": [1, 2, 6], "i": [1, 2, 3, 6, 7, 8], "found": [1, 6], "window": [1, 2, 6, 7], "whether": [1, 6], "type": [1, 2, 4, 6, 7], "2": [1, 6, 7, 8], "diabet": [1, 6], "becaus": [1, 4, 6, 7], "onli": [1, 2, 6, 7, 8], "experi": [1, 6], "onc": [1, 6], "handl": [1, 6], "vectoris": 1, "wai": 1, "dure": 1, "resolut": 1, "faster": [1, 6, 7, 8], "than": [1, 2, 6], "non": 1, "occur": [1, 2, 6], "feature_nam": [1, 7], "default": [1, 4, 6], "pred": [1, 4, 6], "get_output_col_nam": 1, "get": [1, 3, 4], "output": [1, 4, 7, 8], "is_dichotom": 1, "check": [1, 6, 7, 8], "dichotom": 1, "properti": [1, 6, 7], "lookahead_period": 1, "model_config": [1, 4], "classvar": [1, 4], "configdict": [1, 4], "arbitrary_types_allow": [1, 4], "true": [1, 2, 4, 6, 7], "extra": [1, 6, 7], "forbid": 1, "frozen": 1, "configur": [1, 4], "model": [1, 2, 4, 6, 8], "dictionari": [1, 4], "conform": [1, 4], "pydant": [1, 4], "config": [1, 4], "model_field": [1, 4], "dict": [1, 4], "fieldinfo": [1, 4], "annot": [1, 4], "list": [1, 4, 5, 6, 7, 8], "fals": [1, 4, 6, 8], "about": [1, 4, 6], "field": [1, 4], "map": [1, 4], "thi": [1, 2, 4, 6, 7, 8], "replac": [1, 4], "__fields__": [1, 4], "v1": [1, 4], "predictorspec": [1, 4, 6], "lookbehind_dai": [1, 6, 7, 8], "predictor": [1, 2, 4, 5, 7], "lookbehind": [1, 2, 6, 7, 8], "lookbehind_period": [1, 7], "staticspec": [1, 4, 6], "static": [1, 5, 7], "can_be_coerced_losslessly_to_int": 1, "coerce_float": 1, "get_temporal_col_nam": 1, "tempor": [1, 5, 7, 8], "packag": [2, 5, 6, 7], "data": [2, 4, 5, 7, 8], "machin": 2, "learn": 2, "implement": [2, 7], "method": [2, 4, 7], "includ": 2, "convert": [2, 8], "ani": [2, 4, 6, 7, 8], "irregular": [2, 6], "row": [2, 6, 7, 8], "desir": 2, "construct": 2, "raw": 2, "ar": [2, 4, 6, 7, 8], "allow": [2, 4, 7], "patient": [2, 6, 8], "independ": 2, "set": [2, 4, 6], "particular": 2, "sever": [2, 8], "choic": 2, "one": [2, 4, 6, 7, 8], "need": [2, 6, 7, 8], "issu": [2, 6], "everi": [2, 6, 7], "physic": 2, "visit": 2, "morn": 2, "anoth": [2, 6], "clinic": [2, 8], "meaning": 2, "far": [2, 6, 8], "back": [2, 6], "ahead": [2, 6], "exist": 2, "point": [2, 6], "abov": [2, 6, 7, 8], "figur": 2, "graphic": 2, "repres": [2, 6], "terminologi": [2, 6], "determin": [2, 6], "wherea": 2, "futur": [2, 6], "refer": [2, 6], "b": 2, "label": [2, 6], "neg": 2, "never": [2, 6], "happen": [2, 6], "outsid": [2, 6], "posit": [2, 6], "insid": [2, 6], "exampl": [2, 6, 7, 8], "mean": [2, 6, 7, 8], "shown": [2, 6], "max": [2, 6], "min": [2, 6], "etc": [2, 6], "d": 2, "drop": [2, 6, 7, 8], "extend": [2, 6], "further": [2, 4, 6], "start": [2, 3, 6, 7, 8], "dataset": [2, 4, 5, 6, 7], "end": [2, 6, 7], "behaviour": 2, "option": [2, 4], "obtain": 2, "rich": 2, "represent": 2, "see": [2, 4, 6], "tutori": [2, 4, 8], "placehold": 2, "case": [2, 6], "report": 2, "request": 2, "github": [2, 3], "tracker": 2, "otherwis": 2, "discuss": [2, 6], "forum": 2, "bug": 2, "idea": 2, "usag": 2, "index": 2, "run": [3, 5], "follow": [3, 6], "line": [3, 6, 7], "termin": 3, "There": [3, 6, 7, 8], "discrep": 3, "between": 3, "latest": 3, "version": [3, 6, 7], "flatten": [4, 5, 8], "describ": [4, 6, 8], "speccollect": 4, "outcome_spec": [4, 6], "predictor_spec": 4, "static_spec": 4, "collect": 4, "spec": [4, 6, 7, 8], "prediction_times_df": [4, 6, 7, 8], "drop_pred_times_with_insufficient_look_dist": [4, 6, 7, 8], "cach": [4, 5], "featurecach": [4, 7], "none": [4, 6, 7], "entity_id_col_nam": [4, 6, 7, 8], "timestamp_col_nam": [4, 6, 7, 8], "predictor_col_name_prefix": 4, "outcome_col_name_prefix": 4, "n_worker": [4, 6, 7, 8], "60": [4, 7], "log_to_stdout": 4, "turn": [4, 8], "tabular": [4, 8], "add_ag": 4, "date_of_birth_df": 4, "date_of_birth_col_nam": 4, "date_of_birth": 4, "output_prefix": 4, "add": [4, 6, 7], "ag": 4, "ha": [4, 6, 7, 8], "its": [4, 6], "own": [4, 7], "function": [4, 6, 8], "veri": 4, "frequent": [4, 6], "match": 4, "self": [4, 6, 7], "add_spec": [4, 6, 7, 8], "sequenc": [4, 7], "queue": 4, "unprocess": [4, 6, 7, 8], "process": [4, 6, 7, 8], "until": 4, "call": [4, 6, 7], "comput": [4, 6, 7, 8], "get_df": [4, 6, 7, 8], "u": 4, "more": [4, 6, 7], "effecti": 4, "parallelis": 4, "most": [4, 6, 7], "complex": 4, "li": 4, "For": [4, 6, 7, 8], "document": 4, "those": 4, "present": [4, 6], "we": [5, 6, 7, 8], "recommend": 5, "go": [5, 6], "through": 5, "below": 5, "jupyt": 5, "notebook": 5, "download": 5, "local": [5, 6, 7], "introductori": 5, "load": [5, 7, 8], "advanc": [5, 6], "creat": [5, 6, 8], "combin": 5, "ad": [5, 6], "embed": 5, "especi": 6, "help": 6, "have": [6, 7, 8], "complic": 6, "train": 6, "simpl": 6, "explain": 6, "appli": 6, "consist": 6, "3": [6, 7, 8], "step": 6, "": [6, 7, 8], "simplest": 6, "first": [6, 7, 8], "predictin": 6, "element": 6, "context": 6, "skimpi": [6, 7], "import": [6, 7, 8], "skim": [6, 7], "test": [6, 7, 8], "load_synth_data": [6, 7, 8], "load_synth_prediction_tim": [6, 7, 8], "df_prediction_tim": 6, "sort_valu": 6, "summari": [6, 7], "count": [6, 7], "10000": [6, 7], "int64": [6, 7], "1": [6, 7, 8], "datetime64": [6, 7], "column_nam": [6, 7], "na": [6, 7, 8], "sd": [6, 7], "p0": [6, 7], "p25": [6, 7], "p50": [6, 7], "p75": [6, 7], "p100": [6, 7], "hist": [6, 7], "5000": [6, 7], "2900": [6, 7], "2500": 6, "4900": [6, 7], "7400": [6, 7], "last": [6, 7, 8], "frequenc": [6, 7], "1965": [6, 8], "01": [6, 7, 8], "02": [6, 7, 8], "09": [6, 8], "35": 6, "00": [6, 7, 8], "1969": [6, 7, 8], "12": [6, 7, 8], "31": [6, 7, 8], "21": [6, 7, 8], "42": [6, 7], "628": 6, "11": [6, 7, 8], "55": 6, "2005": 6, "03": [6, 8], "15": [6, 8], "07": [6, 8], "16": [6, 8], "4370": 6, "13": [6, 7, 8], "23": [6, 7, 8], "18": [6, 7, 8], "6152": 6, "1968": [6, 7, 8], "04": [6, 8], "6873": 6, "4": [6, 7, 8], "28": [6, 8], "33": 6, "9688": 6, "9996": 6, "17": [6, 7, 8], "1463": 6, "30": [6, 7, 8], "19": [6, 8], "3952": 6, "9997": 6, "1967": [6, 8], "06": [6, 8], "08": [6, 8], "52": [6, 8], "7926": 6, "9999": 6, "22": [6, 7, 8], "24": 6, "5720": 6, "14": [6, 8], "59": [6, 7], "here": 6, "Then": [6, 7], "our": [6, 7, 8], "differ": [6, 7], "timepoint": 6, "load_synth_predictor_float": [6, 7], "df_synth_predictor": 6, "100000": 6, "float64": [6, 7], "7500": 6, "5": [6, 7, 8], "9": [6, 7], "00015": 6, "7": [6, 7, 8], "10": [6, 7, 8], "37": 6, "95792": 6, "29": [6, 7], "799246": 6, "82592": 6, "05": [6, 7, 8], "6": [6, 7], "630007": 6, "1377": 6, "174793": 6, "28579": 6, "26": [6, 8], "981185": 6, "81247": 6, "44": [6, 7], "970382": 6, "10277": 6, "20": [6, 8], "304568": 6, "74701": 6, "671907": 6, "69566": 6, "41": [6, 8], "250538": 6, "40901": 6, "1966": [6, 8], "924175": 6, "96881": 6, "501553": 6, "again": 6, "could": 6, "sex": 6, "doesn": 6, "t": [6, 7], "chang": 6, "over": 6, "let": [6, 7, 8], "load_synth_sex": 6, "df_synth_sex": 6, "femal": 6, "9994": 6, "9995": 6, "9998": 6, "As": [6, 8], "And": 6, "lastli": 6, "ve": 6, "chosen": 6, "binari": 6, "store": 6, "infer": 6, "do": 6, "sinc": 6, "thei": [6, 7, 8], "section": 6, "load_synth_outcom": 6, "df_synth_outcom": 6, "3103": 6, "5100": 6, "7600": 6, "50": 6, "46": [6, 7], "6253": 6, "9964": 6, "6255": 6, "9966": 6, "6256": 6, "9968": 6, "6257": 6, "9970": 6, "6269": 6, "9992": 6, "53": [6, 7], "per": [6, 7], "now": [6, 7, 8], "recip": 6, "finish": 6, "firstli": 6, "main": 6, "decis": 6, "size": [6, 7], "given": 6, "indic": 6, "code": [6, 7], "feature_spec": [6, 7, 8], "single_spec": 6, "maximum": [6, 7], "panda": [6, 7, 8], "pd": [6, 8], "test_df": 6, "365": [6, 7, 8], "outcome_nam": 6, "argument": 6, "values_df": 6, "decid": 6, "least": 6, "correspond": [6, 8], "both": 6, "accomplish": 6, "dw_ek_borg": 6, "wa": [6, 8], "mark": 6, "after": 6, "where": 6, "event": 6, "perman": 6, "specifii": 6, "forward": 6, "search": 6, "certain": 6, "period": [6, 8], "befor": [6, 8], "instead": 6, "almost": 6, "entir": 6, "ident": 6, "except": 6, "past": 6, "numpi": [6, 7, 8], "np": [6, 7, 8], "temporal_predictor_spec": 6, "730": [6, 7, 8], "nan": [6, 7, 8], "predictor_nam": 6, "rang": 6, "similar": 6, "instanc": [6, 7], "might": [6, 7, 8], "182": 6, "easili": 6, "pass": [6, 8], "temporal_interval_predictor_spec": 6, "90": 6, "predictor_interval_nam": 6, "slightli": 6, "previou": 6, "provid": 6, "howev": [6, 7, 8], "By": 6, "filter": 6, "easi": 6, "manual": [6, 7], "sex_predictor_spec": 6, "input_col_name_overrid": 6, "df": [6, 7, 8], "tsflatten": 6, "re": [6, 8], "readi": 6, "instanti": 6, "along": 6, "add_": 6, "parallel": [6, 7, 8], "oper": 6, "across": 6, "core": [6, 7], "ts_flatten": [6, 7, 8], "applic": 6, "sai": [6, 7], "month": [6, 7, 8], "would": [6, 8], "compromis": 6, "generalis": 6, "some": [6, 7, 8], "edg": 6, "brief": 6, "2024": [6, 7, 8], "25": [6, 7, 8], "32": [6, 7, 8], "info": [6, 7, 8], "were": [6, 7, 8], "_drop_pred_time_if_insufficient_look_dist": [6, 7], "5999": 6, "99": 6, "worker": [6, 7, 8], "chunksiz": [6, 7, 8], "mai": [6, 7, 8], "progress": [6, 7, 8], "bar": [6, 7, 8], "move": [6, 7, 8], "batch": [6, 7, 8], "much": [6, 7, 8], "total": [6, 7, 8], "perform": [6, 7, 8], "100": [6, 7, 8], "39": [6, 7], "05it": 6, "align": [6, 7, 8], "littl": [6, 7, 8], "while": [6, 7, 8], "minut": [6, 7, 8], "000": [6, 7, 8], "concaten": [6, 7, 8], "Will": [6, 7, 8], "system": [6, 7, 8], "2_000_000": [6, 7, 8], "normal": [6, 7, 8], "took": [6, 7, 8], "004": 6, "second": [6, 7, 8], "merg": [6, 7, 8], "origin": [6, 7, 8], "4001": 6, "string": [6, 7], "2600": [6, 7], "outc_outcome_name_withi": 6, "064": 6, "n_365_days_maximum_fal": 6, "back_0_dichotom": 6, "pred_predictor_interv": 6, "2877": 6, "71": 6, "91": 6, "_name_within_30_to_90_d": 6, "ays_mean_fallback_nan": [6, 7], "pred_predictor_name_wit": 6, "72": 6, "097": 6, "hin_730_days_mean_fallb": 6, "ack_nan": 6, "pred_femal": 6, "49": 6, "word": [6, 7, 8], "prediction_time_uuid": [6, 7, 8], "outc_outcome_name_within_365_days_maximum_fallback_0_dichotom": 6, "pred_predictor_interval_name_within_30_to_90_days_mean_fallback_nan": 6, "pred_predictor_name_within_730_days_mean_fallback_nan": 6, "display": [6, 7], "shorten": [6, 7], "col": [6, 7], "shortened_pr": 6, "pred_x": 6, "shortened_pred_interv": 6, "pred_x_30_to_90": 6, "shortened_outcom": 6, "outc_i": 6, "renam": [6, 7], "pred_predictor_name_within_0_to_730_days_mean_fallback_nan": 6, "outc_outcome_name_within_0_to_365_days_maximum_fallback_0_dichotom": 6, "axi": [6, 7, 8], "set_table_attribut": [6, 7], "font": [6, 7], "14px": [6, 7], "importerror": [6, 7], "traceback": [6, 7], "recent": [6, 7], "cell": [6, 7], "file": [6, 7], "lib": [6, 7], "python3": [6, 7], "site": [6, 7], "frame": [6, 7], "1338": [6, 7], "1318": [6, 7], "1319": [6, 7, 8], "def": [6, 7, 8], "styler": [6, 7], "1320": [6, 7], "1321": [6, 7], "1322": [6, 7], "1336": [6, 7], "tabl": [6, 7], "visual": [6, 7], "user_guid": [6, 7], "ipynb": [6, 7], "1337": [6, 7], "io": [6, 7], "format": [6, 7, 8], "1340": [6, 7], "40": [6, 7], "shared_doc": [6, 7], "_shared_doc": [6, 7], "save_to_buff": [6, 7], "jinja2": [6, 7], "import_optional_depend": [6, 7], "style_rend": [6, 7], "47": [6, 7], "cssproperti": [6, 7], "48": [6, 7], "cssstyle": [6, 7], "56": [6, 7], "refactor_level": [6, 7], "57": [6, 7], "type_check": [6, 7], "compat": [6, 7], "_option": [6, 7], "161": [6, 7], "error": [6, 7], "min_vers": [6, 7], "159": [6, 7], "160": [6, 7], "elif": [6, 7], "rais": [6, 7], "msg": [6, 7], "163": [6, 7], "modul": [6, 7], "newer": [6, 7], "current": [6, 7], "classif": 6, "citizen": 6, "uniqu": 6, "identifi": 6, "prediciton": 6, "pred_": [6, 7], "outc_": 6, "basic": 7, "cover": [7, 8], "expand": 7, "effect": 7, "mani": 7, "so": [7, 8], "iter": 7, "without": 7, "complet": 7, "full": 7, "hand": 7, "rather": 7, "straightforward": 7, "what": 7, "hundr": 7, "amount": 7, "write": 7, "grow": 7, "quit": 7, "substanti": 7, "becom": 7, "consum": 7, "hard": 7, "navig": 7, "solv": 7, "problem": 7, "combinatori": 7, "group_spec": [7, 8], "predictorgroupspec": [7, 8], "nameddatafram": 7, "pprint": 7, "pred_spec_batch": 7, "named_datafram": [7, 8], "synth_predictor_float": 7, "1095": 7, "create_combin": [7, 8], "attribut": 7, "easier": 7, "namedatafram": 7, "exactli": 7, "load_synth_predictor_flaot": 7, "pred_synth_predictor_float_": 7, "result": [7, 8], "good": 7, "small": [7, 8], "highlight": 7, "pred_spec_batch_summari": 7, "pred_spec": 7, "__name__": 7, "print": [7, 8], "f": 7, "len": [7, 8], "know": 7, "bunch": 7, "quickli": 7, "But": 7, "next": 7, "ship": 7, "disk": 7, "feature_cach": 7, "cache_to_disk": 7, "diskcach": 7, "flattened_dataset": 7, "pathlib": 7, "path": 7, "feature_cache_dir": 7, "tmp": 7, "directori": 7, "save": 7, "just": 7, "them": 7, "won": 7, "alreadi": [7, 8], "new": 7, "abstract": 7, "redi": 7, "sql": 7, "everyth": 7, "work": 7, "6053": 7, "73it": 7, "92it": 7, "007": 7, "3947": 7, "pred_synth_predictor_fl": 7, "506": 7, "82": 7, "024": 7, "oat_within_365_to_730_d": 7, "ays_maximum_fallback_na": 7, "n": 7, "oat_within_1095_days_ma": 7, "ximum_fallback_nan": 7, "533": 7, "0084": 7, "oat_within_365_days_max": 7, "imum_fallback_nan": 7, "oat_within_365_days_mea": 7, "n_fallback_nan": 7, "oat_within_1095_days_m": 7, "an_fallback_nan": 7, "pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_1095_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_1095_days_mean_fallback_nan": 7, "pred_col": 7, "startswith": 7, "rename_dict": 7, "enumer": 7, "df_renam": 7, "base_col": 7, "renamed_col": 7, "dealt": 8, "show": 8, "out": 8, "synthet": 8, "other": 8, "load_synth_text": 8, "synth_text": 8, "head": 8, "4647": 8, "went": 8, "induc": 8, "coma": 8, "2007": 8, "taken": 8, "emerg": 8, "departm": 8, "5799": 8, "old": 8, "son": 8, "wh": 8, "had": 8, "been": 8, "left": 8, "bed": 8, "minu": 8, "4234": 8, "allergi": 8, "often": 8, "advantag": 8, "emb": 8, "speed": 8, "up": 8, "block": 8, "tf": 8, "idf": 8, "form": 8, "constraint": 8, "entitiy_id_col": 8, "timestamp_col": 8, "value_col": 8, "purpos": 8, "demonstr": 8, "fit": 8, "captur": 8, "sklearn": 8, "feature_extract": 8, "tfidfvector": 8, "embed_text_to_df": 8, "tfidf_model": 8, "max_featur": 8, "fit_transform": 8, "toarrai": 8, "get_feature_names_out": 8, "embedded_text": 8, "tolist": 8, "metadata_onli": 8, "embedded_text_with_metadata": 8, "concat": 8, "175872": 8, "182066": 8, "249848": 8, "158430": 8, "000000": 8, "023042": 8, "311389": 8, "529966": 8, "490203": 8, "479312": 8, "244870": 8, "135282": 8, "064337": 8, "465084": 8, "336859": 8, "151743": 8, "729861": 8, "179161": 8, "192367": 8, "232332": 8, "283402": 8, "336952": 8, "176422": 8, "238416": 8, "646879": 8, "250217": 8, "382277": 8, "165635": 8, "200046": 8, "183015": 8, "261115": 8, "125837": 8, "151906": 8, "205285": 8, "759528": 8, "403961": 8, "098747": 8, "493461": 8, "119196": 8, "272619": 8, "207444": 8, "045256": 8, "183475": 8, "588324": 8, "433253": 8, "235349": 8, "df_with_multiple_values_to_named_datafram": 8, "readili": 8, "suppli": 8, "df_transform": 8, "split": 8, "embedded_df": 8, "name_prefix": 8, "tfidf_": 8, "accord": 8, "inform": 8, "bow": 8, "kept": 8, "tfidf_and": 8, "emb_spec_batch": 8, "64it": 8, "68it": 8, "029": 8, "sake": 8, "dropna": 8, "pred_tfidf_was_within_365_days_mean_fallback_nan": 8, "pred_tfidf_and_within_365_days_mean_fallback_nan": 8, "pred_tfidf_or_within_365_days_mean_fallback_nan": 8, "pred_tfidf_in_within_730_days_mean_fallback_nan": 8, "pred_tfidf_of_within_730_days_mean_fallback_nan": 8, "pred_tfidf_the_within_730_days_mean_fallback_nan": 8, "pred_tfidf_to_within_730_days_mean_fallback_nan": 8, "pred_tfidf_was_within_730_days_mean_fallback_nan": 8, "pred_tfidf_for_within_365_days_mean_fallback_nan": 8, "pred_tfidf_and_within_730_days_mean_fallback_nan": 8, "pred_tfidf_of_within_365_days_mean_fallback_nan": 8, "pred_tfidf_that_within_365_days_mean_fallback_nan": 8, "pred_tfidf_the_within_365_days_mean_fallback_nan": 8, "pred_tfidf_patient_within_730_days_mean_fallback_nan": 8, "pred_tfidf_that_within_730_days_mean_fallback_nan": 8, "pred_tfidf_in_within_365_days_mean_fallback_nan": 8, "pred_tfidf_for_within_730_days_mean_fallback_nan": 8, "1917": 8, "4977": 8, "086927": 8, "145809": 8, "221549": 8, "483324": 8, "536339": 8, "534890": 8, "284485": 8, "088050": 8, "090356": 8, "133722": 8, "2463": 8, "6840": 8, "092896": 8, "155821": 8, "355142": 8, "258256": 8, "573168": 8, "285810": 8, "456030": 8, "376386": 8, "096561": 8, "071452": 8, "2580": 8, "260680": 8, "601521": 8, "639848": 8, "401014": 8, "2741": 8, "9832": 8, "36": 8, "335410": 8, "225044": 8, "128228": 8, "186493": 8, "236513": 8, "825558": 8, "164655": 8, "101924": 8, "103195": 8, "2931": 8, "7281": 8, "388547": 8, "289663": 8, "385111": 8, "280049": 8, "304425": 8, "464891": 8, "211934": 8, "043730": 8, "269251": 8, "332065": 8}, "objects": {"timeseriesflattener.feature_specs": [[1, 0, 0, "-", "single_specs"]], "timeseriesflattener.feature_specs.single_specs": [[1, 1, 1, "", "CoercedFloats"], [1, 1, 1, "", "LookPeriod"], [1, 1, 1, "", "OutcomeSpec"], [1, 1, 1, "", "PredictorSpec"], [1, 1, 1, "", "StaticSpec"], [1, 5, 1, "", "can_be_coerced_losslessly_to_int"], [1, 5, 1, "", "coerce_floats"], [1, 5, 1, "", "get_temporal_col_name"]], "timeseriesflattener.feature_specs.single_specs.CoercedFloats": [[1, 2, 1, "", "fallback"], [1, 2, 1, "", "lookperiod"]], "timeseriesflattener.feature_specs.single_specs.LookPeriod": [[1, 2, 1, "", "max_days"], [1, 2, 1, "", "min_days"]], "timeseriesflattener.feature_specs.single_specs.OutcomeSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "incident"], [1, 3, 1, "", "is_dichotomous"], [1, 2, 1, "", "lookahead_days"], [1, 4, 1, "", "lookahead_period"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.PredictorSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "lookbehind_days"], [1, 4, 1, "", "lookbehind_period"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.StaticSpec": [[1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener": [[4, 0, 0, "-", "flattened_dataset"]], "timeseriesflattener.flattened_dataset": [[4, 1, 1, "", "SpecCollection"], [4, 1, 1, "", "TimeseriesFlattener"]], "timeseriesflattener.flattened_dataset.SpecCollection": [[4, 2, 1, "", "model_config"], [4, 2, 1, "", "model_fields"], [4, 2, 1, "", "outcome_specs"], [4, 2, 1, "", "predictor_specs"], [4, 2, 1, "", "static_specs"]], "timeseriesflattener.flattened_dataset.TimeseriesFlattener": [[4, 3, 1, "", "add_age"], [4, 3, 1, "", "add_spec"], [4, 3, 1, "", "compute"], [4, 3, 1, "", "get_df"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:property", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "property", "Python property"], "5": ["py", "function", "Python function"]}, "titleterms": {"frequent": 0, "ask": [0, 2], "question": [0, 2], "cite": 0, "thi": 0, "packag": 0, "how": [0, 6], "do": 0, "i": 0, "test": 0, "code": 0, "run": 0, "suit": 0, "document": 0, "gener": [0, 8], "featur": [1, 7, 8], "specif": [1, 6], "timeseriesflatten": [1, 2, 4], "feature_spec": 1, "single_spec": 1, "function": 2, "where": 2, "indic": 2, "search": 2, "instal": 3, "flattened_dataset": 4, "tutori": [5, 6, 7], "get": 5, "start": 5, "introductori": 6, "load": 6, "data": 6, "predict": 6, "time": 6, "tempor": 6, "predictor": [6, 8], "static": 6, "outcom": 6, "specifi": 6, "flatten": 6, "advanc": 7, "creat": 7, "combin": 7, "cach": 7, "ad": 8, "text": 8, "The": 8, "dataset": 8, "from": 8, "embed": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Frequently Asked Questions": [[0, "frequently-asked-questions"]], "Citing this package": [[0, "citing-this-package"]], "How do I test the code and run the test suite?": [[0, "how-do-i-test-the-code-and-run-the-test-suite"]], "How is the documentation generated?": [[0, "how-is-the-documentation-generated"]], "Feature specifications": [[1, "feature-specifications"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "timeseriesflattener": [[2, "timeseriesflattener"]], "Functionality": [[2, "functionality"]], "Where to ask questions?": [[2, "where-to-ask-questions"]], "Indices and search": [[2, "indices-and-search"]], "Installation": [[3, "installation"]], "Timeseriesflattener": [[4, "timeseriesflattener"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]], "Tutorials": [[5, "tutorials"]], "Getting started": [[5, null]], "Introductory Tutorial": [[6, "introductory-tutorial"]], "Loading data": [[6, "loading-data"]], "Loading prediction times": [[6, "loading-prediction-times"]], "Loading a temporal predictor": [[6, "loading-a-temporal-predictor"]], "Loading a static predictor": [[6, "loading-a-static-predictor"]], "Loading a temporal outcome": [[6, "loading-a-temporal-outcome"]], "Specifying how to flatten the data": [[6, "specifying-how-to-flatten-the-data"]], "Temporal outcome specification": [[6, "temporal-outcome-specification"]], "Temporal predictor specification": [[6, "temporal-predictor-specification"]], "Static predictor specification": [[6, "static-predictor-specification"]], "Flattening": [[6, "flattening"]], "Advanced Tutorial": [[7, "advanced-tutorial"]], "Creating feature combinations": [[7, "creating-feature-combinations"]], "Caching": [[7, "caching"]], "Adding text features": [[8, "adding-text-features"]], "The dataset": [[8, "the-dataset"]], "Generating predictors from embedded text": [[8, "generating-predictors-from-embedded-text"]]}, "indexentries": {"coercedfloats (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats"]], "lookperiod (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod"]], "outcomespec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec"]], "predictorspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec"]], "staticspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec"]], "aggregation_fn (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.aggregation_fn"]], "aggregation_fn (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.aggregation_fn"]], "can_be_coerced_losslessly_to_int() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.can_be_coerced_losslessly_to_int"]], "coerce_floats() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.coerce_floats"]], "fallback (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.fallback"]], "fallback (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.fallback"]], "fallback (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.fallback"]], "feature_base_name (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.feature_base_name"]], "feature_base_name (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.feature_base_name"]], "feature_base_name (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.feature_base_name"]], "get_output_col_name() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.get_output_col_name"]], "get_output_col_name() (predictorspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.get_output_col_name"]], "get_output_col_name() (staticspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.get_output_col_name"]], "get_temporal_col_name() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.get_temporal_col_name"]], "incident (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.incident"]], "is_dichotomous() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.is_dichotomous"]], "lookahead_days (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_days"]], "lookahead_period (outcomespec property)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_period"]], "lookbehind_days (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_days"]], "lookbehind_period (predictorspec property)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_period"]], "lookperiod (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.lookperiod"]], "max_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.max_days"]], "min_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.min_days"]], "model_config (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_config"]], "model_config (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_config"]], "model_config (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_config"]], "model_fields (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_fields"]], "model_fields (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_fields"]], "model_fields (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_fields"]], "module": [[1, "module-timeseriesflattener.feature_specs.single_specs"], [4, "module-timeseriesflattener.flattened_dataset"]], "prefix (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.prefix"]], "prefix (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.prefix"]], "prefix (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.prefix"]], "timeseries_df (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.timeseries_df"]], "timeseries_df (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.timeseries_df"]], "timeseries_df (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.timeseries_df"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "speccollection (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection"]], "timeseriesflattener (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener"]], "add_age() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_age"]], "add_spec() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_spec"]], "compute() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.compute"]], "get_df() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.get_df"]], "model_config (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_config"]], "model_fields (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_fields"]], "outcome_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.outcome_specs"]], "predictor_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.predictor_specs"]], "static_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.static_specs"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["faq", "feature_specifications", "index", "installation", "timeseriesflattener", "tutorials", "tutorials/01_basic", "tutorials/02_advanced", "tutorials/03_text"], "filenames": ["faq.rst", "feature_specifications.rst", "index.rst", "installation.rst", "timeseriesflattener.rst", "tutorials.rst", "tutorials/01_basic.ipynb", "tutorials/02_advanced.ipynb", "tutorials/03_text.ipynb"], "titles": ["Frequently Asked Questions", "Feature specifications", "timeseriesflattener", "Installation", "Timeseriesflattener", "Tutorials", "Introductory Tutorial", "Advanced Tutorial", "Adding text features"], "terms": {"If": [0, 6, 7, 8], "you": [0, 1, 4, 5, 6, 7, 8], "wish": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 8], "librari": 0, "your": [0, 3, 6, 7, 8], "research": 0, "pleas": [0, 2], "joss": 0, "paper": 0, "articl": 0, "bernstorff2023timeseriesflatten": 0, "titl": 0, "timeseriesflatten": [0, 3, 6, 7, 8], "A": [0, 2, 4, 6, 7], "python": [0, 2], "summar": 0, "featur": [0, 2, 5, 6], "from": [0, 1, 2, 4, 5, 6, 7], "medic": [0, 2, 6, 8], "time": [0, 1, 2, 4, 5, 7, 8], "seri": [0, 1, 2, 4, 6], "author": 0, "bernstorff": 0, "martin": 0, "enevoldsen": 0, "kenneth": 0, "damgaard": 0, "jakob": 0, "danielsen": 0, "andrea": 0, "hansen": 0, "lass": 0, "journal": 0, "open": 0, "sourc": [0, 1, 4], "softwar": 0, "volum": 0, "8": [0, 6, 7], "number": [0, 2, 6, 7, 8], "83": 0, "page": [0, 2], "5197": 0, "year": [0, 6, 7], "2023": 0, "Or": [0, 7], "prefer": 0, "apa": 0, "m": 0, "k": 0, "j": 0, "l": 0, "come": [0, 7], "an": [0, 1, 2, 6, 7, 8], "extens": 0, "In": [0, 1, 6, 7], "order": [0, 5], "ll": [0, 6, 7], "usual": 0, "want": [0, 6, 7, 8], "clone": 0, "repositori": 0, "build": 0, "also": [0, 5, 6], "instal": [0, 6, 7], "requir": [0, 1, 2, 4, 6, 7], "develop": 0, "depend": 0, "util": 0, "defin": [0, 1, 4, 8], "pyproject": 0, "toml": 0, "pip": [0, 3], "e": [0, 1, 2, 6, 7, 8], "dev": 0, "pytest": 0, "which": [0, 1, 2, 4, 5, 6, 7], "all": [0, 6, 7, 8], "folder": 0, "specif": [0, 4, 5, 7, 8], "can": [0, 1, 2, 5, 6, 7, 8], "desired_test": 0, "py": [0, 6, 7], "sphinx": 0, "It": [0, 6], "furo": 0, "theme": 0, "custom": 0, "style": [0, 6, 7], "To": [0, 2, 3, 5, 6, 7, 8], "make": [0, 2, 6, 7, 8], "doc": [0, 6], "text": [0, 5], "html": 0, "c": [0, 2, 7], "class": [1, 4, 6, 7], "coercedfloat": 1, "lookperiod": [1, 7], "fallback": [1, 6, 7, 8], "union": [1, 4], "float": 1, "int": [1, 4], "base": [1, 4], "object": [1, 4, 6, 7], "min_dai": [1, 6, 7], "max_dai": [1, 6, 7], "outcomespec": [1, 4, 6], "timeseries_df": [1, 6], "datafram": [1, 2, 4, 6, 7, 8], "feature_base_nam": [1, 6, 7], "str": [1, 4, 8], "lookahead_dai": [1, 6], "tupl": [1, 6], "aggregation_fn": [1, 6, 7, 8], "callabl": 1, "dataframegroupbi": 1, "incid": [1, 6], "bool": [1, 4], "prefix": [1, 4, 6], "outc": [1, 4, 6], "basemodel": [1, 4], "outcom": [1, 2, 5, 7], "paramet": [1, 4, 6], "valu": [1, 2, 4, 6, 7, 8], "should": [1, 4, 6, 8], "contain": [1, 4, 6, 8], "column": [1, 2, 4, 6, 7, 8], "entity_id": [1, 4, 6, 7, 8], "id": [1, 2, 6, 8], "entiti": [1, 6], "each": [1, 2, 5, 6, 8], "belong": 1, "The": [1, 2, 5, 6, 7], "timeseri": [1, 4, 6], "timestamp": [1, 4, 6, 7, 8], "datetim": [1, 6, 7], "note": [1, 6, 7, 8], "name": [1, 4, 6, 7, 8], "overridden": 1, "when": [1, 2, 6, 7], "initialis": 1, "gener": [1, 2, 5, 6, 7], "g": [1, 2, 6, 7, 8], "_": [1, 6, 7], "feature_baase_nam": 1, "metadata": [1, 4, 6, 8], "interv": [1, 6], "predict": [1, 2, 4, 5, 7, 8], "look": [1, 2, 6, 7], "two": [1, 4, 6, 7], "specifi": [1, 2, 5, 7], "resolv": 1, "0": [1, 6, 7, 8], "how": [1, 2, 5, 7, 8], "aggreg": [1, 2, 6], "multipl": [1, 2, 6, 7, 8], "within": [1, 2, 6, 8], "lookahead": [1, 2, 6], "dai": [1, 6, 8], "take": [1, 4, 6, 7, 8], "group": [1, 7, 8], "input": 1, "return": [1, 4, 6, 7, 8], "singl": [1, 2, 6], "i": [1, 2, 3, 6, 7, 8], "found": [1, 6], "window": [1, 2, 6, 7], "whether": [1, 6], "type": [1, 2, 4, 6, 7], "2": [1, 6, 7, 8], "diabet": [1, 6], "becaus": [1, 4, 6, 7], "onli": [1, 2, 6, 7, 8], "experi": [1, 6], "onc": [1, 6], "handl": [1, 6], "vectoris": 1, "wai": 1, "dure": 1, "resolut": 1, "faster": [1, 6, 7, 8], "than": [1, 2, 6], "non": 1, "occur": [1, 2, 6], "feature_nam": [1, 7], "default": [1, 4, 6], "pred": [1, 4, 6], "get_output_col_nam": 1, "get": [1, 3, 4], "output": [1, 4, 7, 8], "is_dichotom": 1, "check": [1, 6, 7, 8], "dichotom": 1, "properti": [1, 6, 7], "lookahead_period": 1, "model_config": [1, 4], "classvar": [1, 4], "configdict": [1, 4], "arbitrary_types_allow": [1, 4], "true": [1, 2, 4, 6, 7], "extra": [1, 6, 7], "forbid": 1, "frozen": 1, "configur": [1, 4], "model": [1, 2, 4, 6, 8], "dictionari": [1, 4], "conform": [1, 4], "pydant": [1, 4], "config": [1, 4], "model_field": [1, 4], "dict": [1, 4], "fieldinfo": [1, 4], "annot": [1, 4], "list": [1, 4, 5, 6, 7, 8], "fals": [1, 4, 6, 8], "about": [1, 4, 6], "field": [1, 4], "map": [1, 4], "thi": [1, 2, 4, 6, 7, 8], "replac": [1, 4], "__fields__": [1, 4], "v1": [1, 4], "predictorspec": [1, 4, 6], "lookbehind_dai": [1, 6, 7, 8], "predictor": [1, 2, 4, 5, 7], "lookbehind": [1, 2, 6, 7, 8], "lookbehind_period": [1, 7], "staticspec": [1, 4, 6], "static": [1, 5, 7], "can_be_coerced_losslessly_to_int": 1, "coerce_float": 1, "get_temporal_col_nam": 1, "tempor": [1, 5, 7, 8], "packag": [2, 5, 6, 7], "data": [2, 4, 5, 7, 8], "machin": 2, "learn": 2, "implement": [2, 7], "method": [2, 4, 7], "includ": 2, "convert": [2, 8], "ani": [2, 4, 6, 7, 8], "irregular": [2, 6], "row": [2, 6, 7, 8], "desir": 2, "construct": 2, "raw": 2, "ar": [2, 4, 6, 7, 8], "allow": [2, 4, 7], "patient": [2, 6, 8], "independ": 2, "set": [2, 4, 6], "particular": 2, "sever": [2, 8], "choic": 2, "one": [2, 4, 6, 7, 8], "need": [2, 6, 7, 8], "issu": [2, 6], "everi": [2, 6, 7], "physic": 2, "visit": 2, "morn": 2, "anoth": [2, 6], "clinic": [2, 8], "meaning": 2, "far": [2, 6, 8], "back": [2, 6], "ahead": [2, 6], "exist": 2, "point": [2, 6], "abov": [2, 6, 7, 8], "figur": 2, "graphic": 2, "repres": [2, 6], "terminologi": [2, 6], "determin": [2, 6], "wherea": 2, "futur": [2, 6], "refer": [2, 6], "b": 2, "label": [2, 6], "neg": 2, "never": [2, 6], "happen": [2, 6], "outsid": [2, 6], "posit": [2, 6], "insid": [2, 6], "exampl": [2, 6, 7, 8], "mean": [2, 6, 7, 8], "shown": [2, 6], "max": [2, 6], "min": [2, 6], "etc": [2, 6], "d": 2, "drop": [2, 6, 7, 8], "extend": [2, 6], "further": [2, 4, 6], "start": [2, 3, 6, 7, 8], "dataset": [2, 4, 5, 6, 7], "end": [2, 6, 7], "behaviour": 2, "option": [2, 4], "obtain": 2, "rich": 2, "represent": 2, "see": [2, 4, 6], "tutori": [2, 4, 8], "placehold": 2, "case": [2, 6], "report": 2, "request": 2, "github": [2, 3], "tracker": 2, "otherwis": 2, "discuss": [2, 6], "forum": 2, "bug": 2, "idea": 2, "usag": 2, "index": 2, "run": [3, 5], "follow": [3, 6], "line": [3, 6, 7], "termin": 3, "There": [3, 6, 7, 8], "discrep": 3, "between": 3, "latest": 3, "version": [3, 6, 7], "flatten": [4, 5, 8], "describ": [4, 6, 8], "speccollect": 4, "outcome_spec": [4, 6], "predictor_spec": 4, "static_spec": 4, "collect": 4, "spec": [4, 6, 7, 8], "prediction_times_df": [4, 6, 7, 8], "drop_pred_times_with_insufficient_look_dist": [4, 6, 7, 8], "cach": [4, 5], "featurecach": [4, 7], "none": [4, 6, 7], "entity_id_col_nam": [4, 6, 7, 8], "timestamp_col_nam": [4, 6, 7, 8], "predictor_col_name_prefix": 4, "outcome_col_name_prefix": 4, "n_worker": [4, 6, 7, 8], "60": [4, 7], "log_to_stdout": 4, "turn": [4, 8], "tabular": [4, 8], "add_ag": 4, "date_of_birth_df": 4, "date_of_birth_col_nam": 4, "date_of_birth": 4, "output_prefix": 4, "add": [4, 6, 7], "ag": 4, "ha": [4, 6, 7, 8], "its": [4, 6], "own": [4, 7], "function": [4, 6, 8], "veri": 4, "frequent": [4, 6], "match": 4, "self": [4, 6, 7], "add_spec": [4, 6, 7, 8], "sequenc": [4, 7], "queue": 4, "unprocess": [4, 6, 7, 8], "process": [4, 6, 7, 8], "until": 4, "call": [4, 6, 7], "comput": [4, 6, 7, 8], "get_df": [4, 6, 7, 8], "u": 4, "more": [4, 6, 7], "effecti": 4, "parallelis": 4, "most": [4, 6, 7], "complex": 4, "li": 4, "For": [4, 6, 7, 8], "document": 4, "those": 4, "present": [4, 6], "we": [5, 6, 7, 8], "recommend": 5, "go": [5, 6], "through": 5, "below": 5, "jupyt": 5, "notebook": 5, "download": 5, "local": [5, 6, 7], "introductori": 5, "load": [5, 7, 8], "advanc": [5, 6], "creat": [5, 6, 8], "combin": 5, "ad": [5, 6], "embed": 5, "especi": 6, "help": 6, "have": [6, 7, 8], "complic": 6, "train": 6, "simpl": 6, "explain": 6, "appli": 6, "consist": 6, "3": [6, 7, 8], "step": 6, "": [6, 7, 8], "simplest": 6, "first": [6, 7, 8], "predictin": 6, "element": 6, "context": 6, "skimpi": [6, 7], "import": [6, 7, 8], "skim": [6, 7], "test": [6, 7, 8], "load_synth_data": [6, 7, 8], "load_synth_prediction_tim": [6, 7, 8], "df_prediction_tim": 6, "sort_valu": 6, "summari": [6, 7], "count": [6, 7], "10000": [6, 7], "int64": [6, 7], "1": [6, 7, 8], "datetime64": [6, 7], "column_nam": [6, 7], "na": [6, 7, 8], "sd": [6, 7], "p0": [6, 7], "p25": [6, 7], "p50": [6, 7], "p75": [6, 7], "p100": [6, 7], "hist": [6, 7], "5000": [6, 7], "2900": [6, 7], "2500": 6, "4900": [6, 7], "7400": [6, 7], "last": [6, 7, 8], "frequenc": [6, 7], "1965": [6, 8], "01": [6, 7, 8], "02": [6, 7, 8], "09": [6, 8], "35": 6, "00": [6, 7, 8], "1969": [6, 7, 8], "12": [6, 7, 8], "31": [6, 7, 8], "21": [6, 7, 8], "42": [6, 7], "628": 6, "11": [6, 7, 8], "55": 6, "2005": 6, "03": [6, 8], "15": [6, 8], "07": [6, 8], "16": [6, 8], "4370": 6, "13": [6, 7, 8], "23": [6, 8], "18": [6, 7, 8], "6152": 6, "1968": [6, 7, 8], "04": [6, 8], "6873": 6, "4": [6, 7, 8], "28": [6, 8], "33": 6, "9688": 6, "9996": 6, "17": [6, 7, 8], "1463": 6, "30": [6, 7, 8], "19": [6, 8], "3952": 6, "9997": 6, "1967": [6, 8], "06": [6, 8], "08": [6, 8], "52": 6, "7926": 6, "9999": 6, "22": [6, 8], "24": 6, "5720": 6, "14": [6, 8], "59": [6, 7], "here": 6, "Then": [6, 7], "our": [6, 7, 8], "differ": [6, 7], "timepoint": 6, "load_synth_predictor_float": [6, 7], "df_synth_predictor": 6, "100000": 6, "float64": [6, 7], "7500": 6, "5": [6, 7, 8], "9": [6, 7], "00015": 6, "7": [6, 7, 8], "10": [6, 7, 8], "37": 6, "95792": 6, "29": [6, 7], "799246": 6, "82592": 6, "05": [6, 7, 8], "6": [6, 7], "630007": 6, "1377": 6, "174793": 6, "28579": 6, "26": [6, 8], "981185": 6, "81247": 6, "44": [6, 7], "970382": 6, "10277": 6, "20": [6, 8], "304568": 6, "74701": 6, "671907": 6, "69566": 6, "41": [6, 8], "250538": 6, "40901": 6, "1966": [6, 8], "924175": 6, "96881": 6, "501553": 6, "again": 6, "could": 6, "sex": 6, "doesn": 6, "t": [6, 7], "chang": 6, "over": 6, "let": [6, 7, 8], "load_synth_sex": 6, "df_synth_sex": 6, "femal": 6, "9994": 6, "9995": 6, "9998": 6, "As": [6, 8], "And": 6, "lastli": 6, "ve": 6, "chosen": 6, "binari": 6, "store": 6, "infer": 6, "do": 6, "sinc": 6, "thei": [6, 7, 8], "section": 6, "load_synth_outcom": 6, "df_synth_outcom": 6, "3103": 6, "5100": 6, "7600": 6, "50": [6, 7, 8], "46": [6, 7], "6253": 6, "9964": 6, "6255": 6, "9966": 6, "6256": 6, "9968": 6, "6257": 6, "9970": 6, "6269": 6, "9992": 6, "53": [6, 7], "per": [6, 7], "now": [6, 7, 8], "recip": 6, "finish": 6, "firstli": 6, "main": 6, "decis": 6, "size": [6, 7], "given": 6, "indic": 6, "code": [6, 7], "feature_spec": [6, 7, 8], "single_spec": 6, "maximum": [6, 7], "panda": [6, 7, 8], "pd": [6, 8], "test_df": 6, "365": [6, 7, 8], "outcome_nam": 6, "argument": 6, "values_df": 6, "decid": 6, "least": 6, "correspond": [6, 8], "both": 6, "accomplish": 6, "dw_ek_borg": 6, "wa": [6, 8], "mark": 6, "after": 6, "where": 6, "event": 6, "perman": 6, "specifii": 6, "forward": 6, "search": 6, "certain": 6, "period": [6, 8], "befor": [6, 8], "instead": 6, "almost": 6, "entir": 6, "ident": 6, "except": 6, "past": 6, "numpi": [6, 7, 8], "np": [6, 7, 8], "temporal_predictor_spec": 6, "730": [6, 7, 8], "nan": [6, 7, 8], "predictor_nam": 6, "rang": 6, "similar": 6, "instanc": [6, 7], "might": [6, 7, 8], "182": 6, "easili": 6, "pass": [6, 8], "temporal_interval_predictor_spec": 6, "90": 6, "predictor_interval_nam": 6, "slightli": 6, "previou": 6, "provid": 6, "howev": [6, 7, 8], "By": 6, "filter": 6, "easi": 6, "manual": [6, 7], "sex_predictor_spec": 6, "input_col_name_overrid": 6, "df": [6, 7, 8], "tsflatten": 6, "re": [6, 8], "readi": 6, "instanti": 6, "along": 6, "add_": 6, "parallel": [6, 7, 8], "oper": 6, "across": 6, "core": [6, 7], "ts_flatten": [6, 7, 8], "applic": 6, "sai": [6, 7], "month": [6, 7, 8], "would": [6, 8], "compromis": 6, "generalis": 6, "some": [6, 7, 8], "edg": 6, "brief": 6, "2024": [6, 7, 8], "25": [6, 7, 8], "info": [6, 7, 8], "were": [6, 7, 8], "_drop_pred_time_if_insufficient_look_dist": [6, 7], "5999": 6, "99": 6, "worker": [6, 7, 8], "chunksiz": [6, 7, 8], "mai": [6, 7, 8], "progress": [6, 7, 8], "bar": [6, 7, 8], "move": [6, 7, 8], "batch": [6, 7, 8], "much": [6, 7, 8], "total": [6, 7, 8], "perform": [6, 7, 8], "100": [6, 7, 8], "40": [6, 7], "31it": 6, "align": [6, 7, 8], "littl": [6, 7, 8], "while": [6, 7, 8], "minut": [6, 7, 8], "000": [6, 7, 8], "concaten": [6, 7, 8], "Will": [6, 7, 8], "system": [6, 7, 8], "2_000_000": [6, 7, 8], "normal": [6, 7, 8], "took": [6, 7, 8], "004": 6, "second": [6, 7, 8], "merg": [6, 7, 8], "origin": [6, 7, 8], "4001": 6, "string": [6, 7], "2600": [6, 7], "outc_outcome_name_withi": 6, "064": 6, "n_365_days_maximum_fal": 6, "back_0_dichotom": 6, "pred_predictor_name_wit": 6, "72": 6, "097": 6, "hin_730_days_mean_fallb": 6, "ack_nan": 6, "pred_predictor_interv": 6, "2877": 6, "71": 6, "91": 6, "_name_within_30_to_90_d": 6, "ays_mean_fallback_nan": [6, 7], "pred_femal": 6, "49": 6, "39": 6, "word": [6, 7, 8], "prediction_time_uuid": [6, 7, 8], "outc_outcome_name_within_365_days_maximum_fallback_0_dichotom": 6, "pred_predictor_name_within_730_days_mean_fallback_nan": 6, "pred_predictor_interval_name_within_30_to_90_days_mean_fallback_nan": 6, "display": [6, 7], "shorten": [6, 7], "col": [6, 7], "shortened_pr": 6, "pred_x": 6, "shortened_pred_interv": 6, "pred_x_30_to_90": 6, "shortened_outcom": 6, "outc_i": 6, "renam": [6, 7], "pred_predictor_name_within_0_to_730_days_mean_fallback_nan": 6, "outc_outcome_name_within_0_to_365_days_maximum_fallback_0_dichotom": 6, "axi": [6, 7, 8], "set_table_attribut": [6, 7], "font": [6, 7], "14px": [6, 7], "importerror": [6, 7], "traceback": [6, 7], "recent": [6, 7], "cell": [6, 7], "file": [6, 7], "lib": [6, 7], "python3": [6, 7], "site": [6, 7], "frame": [6, 7], "1338": [6, 7], "1318": [6, 7], "1319": [6, 7, 8], "def": [6, 7, 8], "styler": [6, 7], "1320": [6, 7], "1321": [6, 7], "1322": [6, 7], "1336": [6, 7], "tabl": [6, 7], "visual": [6, 7], "user_guid": [6, 7], "ipynb": [6, 7], "1337": [6, 7], "io": [6, 7], "format": [6, 7, 8], "1340": [6, 7], "shared_doc": [6, 7], "_shared_doc": [6, 7], "save_to_buff": [6, 7], "jinja2": [6, 7], "import_optional_depend": [6, 7], "style_rend": [6, 7], "47": [6, 7, 8], "cssproperti": [6, 7], "48": [6, 7], "cssstyle": [6, 7], "56": [6, 7], "refactor_level": [6, 7], "57": [6, 7], "type_check": [6, 7], "compat": [6, 7], "_option": [6, 7], "161": [6, 7], "error": [6, 7], "min_vers": [6, 7], "159": [6, 7], "160": [6, 7], "elif": [6, 7], "rais": [6, 7], "msg": [6, 7], "163": [6, 7], "modul": [6, 7], "newer": [6, 7], "current": [6, 7], "classif": 6, "citizen": 6, "uniqu": 6, "identifi": 6, "prediciton": 6, "pred_": [6, 7], "outc_": 6, "basic": 7, "cover": [7, 8], "expand": 7, "effect": 7, "mani": 7, "so": [7, 8], "iter": 7, "without": 7, "complet": 7, "full": 7, "hand": 7, "rather": 7, "straightforward": 7, "what": 7, "hundr": 7, "amount": 7, "write": 7, "grow": 7, "quit": 7, "substanti": 7, "becom": 7, "consum": 7, "hard": 7, "navig": 7, "solv": 7, "problem": 7, "combinatori": 7, "group_spec": [7, 8], "predictorgroupspec": [7, 8], "nameddatafram": 7, "pprint": 7, "pred_spec_batch": 7, "named_datafram": [7, 8], "synth_predictor_float": 7, "1095": 7, "create_combin": [7, 8], "attribut": 7, "easier": 7, "namedatafram": 7, "exactli": 7, "load_synth_predictor_flaot": 7, "pred_synth_predictor_float_": 7, "result": [7, 8], "good": 7, "small": [7, 8], "highlight": 7, "pred_spec_batch_summari": 7, "pred_spec": 7, "__name__": 7, "print": [7, 8], "f": 7, "len": [7, 8], "know": 7, "bunch": 7, "quickli": 7, "But": 7, "next": 7, "ship": 7, "disk": 7, "feature_cach": 7, "cache_to_disk": 7, "diskcach": 7, "flattened_dataset": 7, "pathlib": 7, "path": 7, "feature_cache_dir": 7, "tmp": 7, "directori": 7, "save": 7, "just": 7, "them": 7, "won": 7, "alreadi": [7, 8], "new": 7, "abstract": 7, "redi": 7, "sql": 7, "everyth": 7, "work": 7, "6053": 7, "63it": 7, "45": 7, "85it": 7, "006": 7, "3947": 7, "pred_synth_predictor_fl": 7, "oat_within_1095_days_m": 7, "an_fallback_nan": 7, "506": 7, "82": 7, "024": 7, "oat_within_365_to_730_d": 7, "oat_within_1095_days_ma": 7, "ximum_fallback_nan": 7, "533": 7, "0084": 7, "oat_within_365_days_max": 7, "imum_fallback_nan": 7, "oat_within_365_days_mea": 7, "n_fallback_nan": 7, "ays_maximum_fallback_na": 7, "n": 7, "pred_synth_predictor_float_within_1095_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_1095_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan": 7, "pred_col": 7, "startswith": 7, "rename_dict": 7, "enumer": 7, "df_renam": 7, "base_col": 7, "renamed_col": 7, "dealt": 8, "show": 8, "out": 8, "synthet": 8, "other": 8, "load_synth_text": 8, "synth_text": 8, "head": 8, "4647": 8, "went": 8, "induc": 8, "coma": 8, "2007": 8, "taken": 8, "emerg": 8, "departm": 8, "5799": 8, "old": 8, "son": 8, "wh": 8, "had": 8, "been": 8, "left": 8, "bed": 8, "minu": 8, "4234": 8, "allergi": 8, "often": 8, "advantag": 8, "emb": 8, "speed": 8, "up": 8, "block": 8, "tf": 8, "idf": 8, "form": 8, "constraint": 8, "entitiy_id_col": 8, "timestamp_col": 8, "value_col": 8, "purpos": 8, "demonstr": 8, "fit": 8, "captur": 8, "sklearn": 8, "feature_extract": 8, "tfidfvector": 8, "embed_text_to_df": 8, "tfidf_model": 8, "max_featur": 8, "fit_transform": 8, "toarrai": 8, "get_feature_names_out": 8, "embedded_text": 8, "tolist": 8, "metadata_onli": 8, "embedded_text_with_metadata": 8, "concat": 8, "175872": 8, "182066": 8, "249848": 8, "158430": 8, "000000": 8, "023042": 8, "311389": 8, "529966": 8, "490203": 8, "479312": 8, "244870": 8, "135282": 8, "064337": 8, "465084": 8, "336859": 8, "151743": 8, "729861": 8, "179161": 8, "192367": 8, "232332": 8, "283402": 8, "336952": 8, "176422": 8, "238416": 8, "646879": 8, "250217": 8, "382277": 8, "165635": 8, "200046": 8, "183015": 8, "261115": 8, "125837": 8, "151906": 8, "205285": 8, "759528": 8, "403961": 8, "098747": 8, "493461": 8, "119196": 8, "272619": 8, "207444": 8, "045256": 8, "183475": 8, "588324": 8, "433253": 8, "235349": 8, "df_with_multiple_values_to_named_datafram": 8, "readili": 8, "suppli": 8, "df_transform": 8, "split": 8, "embedded_df": 8, "name_prefix": 8, "tfidf_": 8, "accord": 8, "inform": 8, "bow": 8, "kept": 8, "tfidf_and": 8, "emb_spec_batch": 8, "36it": 8, "10it": 8, "028": 8, "sake": 8, "dropna": 8, "pred_tfidf_to_within_730_days_mean_fallback_nan": 8, "pred_tfidf_or_within_730_days_mean_fallback_nan": 8, "pred_tfidf_that_within_365_days_mean_fallback_nan": 8, "pred_tfidf_and_within_730_days_mean_fallback_nan": 8, "pred_tfidf_was_within_730_days_mean_fallback_nan": 8, "pred_tfidf_in_within_730_days_mean_fallback_nan": 8, "pred_tfidf_in_within_365_days_mean_fallback_nan": 8, "pred_tfidf_and_within_365_days_mean_fallback_nan": 8, "pred_tfidf_the_within_730_days_mean_fallback_nan": 8, "pred_tfidf_for_within_730_days_mean_fallback_nan": 8, "pred_tfidf_to_within_365_days_mean_fallback_nan": 8, "pred_tfidf_was_within_365_days_mean_fallback_nan": 8, "pred_tfidf_of_within_365_days_mean_fallback_nan": 8, "pred_tfidf_patient_within_730_days_mean_fallback_nan": 8, "pred_tfidf_the_within_365_days_mean_fallback_nan": 8, "pred_tfidf_for_within_365_days_mean_fallback_nan": 8, "pred_tfidf_or_within_365_days_mean_fallback_nan": 8, "1917": 8, "4977": 8, "284485": 8, "221549": 8, "090356": 8, "145809": 8, "086927": 8, "483324": 8, "534890": 8, "088050": 8, "536339": 8, "133722": 8, "2463": 8, "6840": 8, "456030": 8, "355142": 8, "096561": 8, "155821": 8, "092896": 8, "258256": 8, "285810": 8, "376386": 8, "573168": 8, "071452": 8, "2580": 8, "639848": 8, "260680": 8, "601521": 8, "401014": 8, "2741": 8, "9832": 8, "36": 8, "164655": 8, "128228": 8, "225044": 8, "335410": 8, "186493": 8, "825558": 8, "101924": 8, "236513": 8, "103195": 8, "2931": 8, "7281": 8, "211934": 8, "385111": 8, "269251": 8, "289663": 8, "388547": 8, "280049": 8, "464891": 8, "043730": 8, "304425": 8, "332065": 8}, "objects": {"timeseriesflattener.feature_specs": [[1, 0, 0, "-", "single_specs"]], "timeseriesflattener.feature_specs.single_specs": [[1, 1, 1, "", "CoercedFloats"], [1, 1, 1, "", "LookPeriod"], [1, 1, 1, "", "OutcomeSpec"], [1, 1, 1, "", "PredictorSpec"], [1, 1, 1, "", "StaticSpec"], [1, 5, 1, "", "can_be_coerced_losslessly_to_int"], [1, 5, 1, "", "coerce_floats"], [1, 5, 1, "", "get_temporal_col_name"]], "timeseriesflattener.feature_specs.single_specs.CoercedFloats": [[1, 2, 1, "", "fallback"], [1, 2, 1, "", "lookperiod"]], "timeseriesflattener.feature_specs.single_specs.LookPeriod": [[1, 2, 1, "", "max_days"], [1, 2, 1, "", "min_days"]], "timeseriesflattener.feature_specs.single_specs.OutcomeSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "incident"], [1, 3, 1, "", "is_dichotomous"], [1, 2, 1, "", "lookahead_days"], [1, 4, 1, "", "lookahead_period"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.PredictorSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "lookbehind_days"], [1, 4, 1, "", "lookbehind_period"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.StaticSpec": [[1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener": [[4, 0, 0, "-", "flattened_dataset"]], "timeseriesflattener.flattened_dataset": [[4, 1, 1, "", "SpecCollection"], [4, 1, 1, "", "TimeseriesFlattener"]], "timeseriesflattener.flattened_dataset.SpecCollection": [[4, 2, 1, "", "model_config"], [4, 2, 1, "", "model_fields"], [4, 2, 1, "", "outcome_specs"], [4, 2, 1, "", "predictor_specs"], [4, 2, 1, "", "static_specs"]], "timeseriesflattener.flattened_dataset.TimeseriesFlattener": [[4, 3, 1, "", "add_age"], [4, 3, 1, "", "add_spec"], [4, 3, 1, "", "compute"], [4, 3, 1, "", "get_df"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:property", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "property", "Python property"], "5": ["py", "function", "Python function"]}, "titleterms": {"frequent": 0, "ask": [0, 2], "question": [0, 2], "cite": 0, "thi": 0, "packag": 0, "how": [0, 6], "do": 0, "i": 0, "test": 0, "code": 0, "run": 0, "suit": 0, "document": 0, "gener": [0, 8], "featur": [1, 7, 8], "specif": [1, 6], "timeseriesflatten": [1, 2, 4], "feature_spec": 1, "single_spec": 1, "function": 2, "where": 2, "indic": 2, "search": 2, "instal": 3, "flattened_dataset": 4, "tutori": [5, 6, 7], "get": 5, "start": 5, "introductori": 6, "load": 6, "data": 6, "predict": 6, "time": 6, "tempor": 6, "predictor": [6, 8], "static": 6, "outcom": 6, "specifi": 6, "flatten": 6, "advanc": 7, "creat": 7, "combin": 7, "cach": 7, "ad": 8, "text": 8, "The": 8, "dataset": 8, "from": 8, "embed": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Frequently Asked Questions": [[0, "frequently-asked-questions"]], "Citing this package": [[0, "citing-this-package"]], "How do I test the code and run the test suite?": [[0, "how-do-i-test-the-code-and-run-the-test-suite"]], "How is the documentation generated?": [[0, "how-is-the-documentation-generated"]], "Feature specifications": [[1, "feature-specifications"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "timeseriesflattener": [[2, "timeseriesflattener"]], "Functionality": [[2, "functionality"]], "Where to ask questions?": [[2, "where-to-ask-questions"]], "Indices and search": [[2, "indices-and-search"]], "Installation": [[3, "installation"]], "Timeseriesflattener": [[4, "timeseriesflattener"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]], "Tutorials": [[5, "tutorials"]], "Getting started": [[5, null]], "Introductory Tutorial": [[6, "introductory-tutorial"]], "Loading data": [[6, "loading-data"]], "Loading prediction times": [[6, "loading-prediction-times"]], "Loading a temporal predictor": [[6, "loading-a-temporal-predictor"]], "Loading a static predictor": [[6, "loading-a-static-predictor"]], "Loading a temporal outcome": [[6, "loading-a-temporal-outcome"]], "Specifying how to flatten the data": [[6, "specifying-how-to-flatten-the-data"]], "Temporal outcome specification": [[6, "temporal-outcome-specification"]], "Temporal predictor specification": [[6, "temporal-predictor-specification"]], "Static predictor specification": [[6, "static-predictor-specification"]], "Flattening": [[6, "flattening"]], "Advanced Tutorial": [[7, "advanced-tutorial"]], "Creating feature combinations": [[7, "creating-feature-combinations"]], "Caching": [[7, "caching"]], "Adding text features": [[8, "adding-text-features"]], "The dataset": [[8, "the-dataset"]], "Generating predictors from embedded text": [[8, "generating-predictors-from-embedded-text"]]}, "indexentries": {"coercedfloats (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats"]], "lookperiod (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod"]], "outcomespec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec"]], "predictorspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec"]], "staticspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec"]], "aggregation_fn (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.aggregation_fn"]], "aggregation_fn (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.aggregation_fn"]], "can_be_coerced_losslessly_to_int() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.can_be_coerced_losslessly_to_int"]], "coerce_floats() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.coerce_floats"]], "fallback (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.fallback"]], "fallback (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.fallback"]], "fallback (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.fallback"]], "feature_base_name (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.feature_base_name"]], "feature_base_name (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.feature_base_name"]], "feature_base_name (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.feature_base_name"]], "get_output_col_name() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.get_output_col_name"]], "get_output_col_name() (predictorspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.get_output_col_name"]], "get_output_col_name() (staticspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.get_output_col_name"]], "get_temporal_col_name() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.get_temporal_col_name"]], "incident (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.incident"]], "is_dichotomous() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.is_dichotomous"]], "lookahead_days (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_days"]], "lookahead_period (outcomespec property)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_period"]], "lookbehind_days (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_days"]], "lookbehind_period (predictorspec property)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_period"]], "lookperiod (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.lookperiod"]], "max_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.max_days"]], "min_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.min_days"]], "model_config (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_config"]], "model_config (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_config"]], "model_config (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_config"]], "model_fields (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_fields"]], "model_fields (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_fields"]], "model_fields (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_fields"]], "module": [[1, "module-timeseriesflattener.feature_specs.single_specs"], [4, "module-timeseriesflattener.flattened_dataset"]], "prefix (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.prefix"]], "prefix (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.prefix"]], "prefix (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.prefix"]], "timeseries_df (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.timeseries_df"]], "timeseries_df (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.timeseries_df"]], "timeseries_df (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.timeseries_df"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "speccollection (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection"]], "timeseriesflattener (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener"]], "add_age() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_age"]], "add_spec() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_spec"]], "compute() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.compute"]], "get_df() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.get_df"]], "model_config (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_config"]], "model_fields (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_fields"]], "outcome_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.outcome_specs"]], "predictor_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.predictor_specs"]], "static_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.static_specs"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]]}}) \ No newline at end of file diff --git a/tutorials/01_basic.html b/tutorials/01_basic.html index a6be672a..d77a3e95 100644 --- a/tutorials/01_basic.html +++ b/tutorials/01_basic.html @@ -991,31 +991,31 @@

Flattening -
2024-01-25 13:32:19 [INFO] There were unprocessed specs, computing...
+
2024-01-25 13:50:07 [INFO] There were unprocessed specs, computing...
 
-
2024-01-25 13:32:19 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 5999 (59.99%) rows
+
2024-01-25 13:50:07 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 5999 (59.99%) rows
 
-
2024-01-25 13:32:19 [INFO] Processing 3 temporal features in parallel with 1 workers. Chunksize is 3. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
+
2024-01-25 13:50:07 [INFO] Processing 3 temporal features in parallel with 1 workers. Chunksize is 3. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
 
  0%|          | 0/3 [00:00<?, ?it/s]
 
-
100%|██████████| 3/3 [00:00<00:00, 39.05it/s]
+
100%|██████████| 3/3 [00:00<00:00, 40.31it/s]
 
-
2024-01-25 13:32:19 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
+
2024-01-25 13:50:07 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
 
-
2024-01-25 13:32:19 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
+
2024-01-25 13:50:07 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
 
-
2024-01-25 13:32:19 [INFO] Concatenation took 0.004 seconds
+
2024-01-25 13:50:07 [INFO] Concatenation took 0.004 seconds
 
-
2024-01-25 13:32:19 [INFO] Merging with original df
+
2024-01-25 13:50:07 [INFO] Merging with original df
 
╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮
@@ -1036,12 +1036,12 @@ 

Flatteningoutc_outcome_name_withi │ 0 0 0.064 0.25 0 0 0 0 1▇ ▁ │ │ │ │ n_365_days_maximum_fall │ │ │ │ │ │ │ │ │ │ │ │ │ │ back_0_dichotomous │ │ │ │ │ │ │ │ │ │ │ │ -│ │ pred_predictor_interval 2877 71.91 5 2.8 0.02 2.6 5.1 7.4 10▇▇▇▇▇▇ │ │ -│ │ _name_within_30_to_90_d │ │ │ │ │ │ │ │ │ │ │ │ -│ │ ays_mean_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_predictor_name_wit 72 1.8 5 1.6 0.097 3.9 5 6 9.9▁▃▇▇▃▁ │ │ │ │ hin_730_days_mean_fallb │ │ │ │ │ │ │ │ │ │ │ │ │ │ ack_nan │ │ │ │ │ │ │ │ │ │ │ │ +│ │ pred_predictor_interval 2877 71.91 5 2.8 0.02 2.6 5.1 7.4 10▇▇▇▇▇▇ │ │ +│ │ _name_within_30_to_90_d │ │ │ │ │ │ │ │ │ │ │ │ +│ │ ays_mean_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_female 0 0 0.49 0.5 0 0 0 1 1▇ ▇ │ │ │ └─────────────────────────┴───────┴────────┴────────┴───────┴────────┴───────┴───────┴──────┴───────┴────────┘ │ │ datetime │ @@ -1062,8 +1062,8 @@

Flattening -
2024-01-25 13:32:22 [INFO] There were unprocessed specs, computing...
+
2024-01-25 13:50:10 [INFO] There were unprocessed specs, computing...
 
-
2024-01-25 13:32:22 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 6053 (60.53%) rows
+
2024-01-25 13:50:10 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 6053 (60.53%) rows
 
-
2024-01-25 13:32:22 [INFO] Processing 6 temporal features in parallel with 4 workers. Chunksize is 2. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
+
2024-01-25 13:50:10 [INFO] Processing 6 temporal features in parallel with 4 workers. Chunksize is 2. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
 
  0%|          | 0/6 [00:00<?, ?it/s]
 
-
 17%|█▋        | 1/6 [00:00<00:00,  9.73it/s]
+
 17%|█▋        | 1/6 [00:00<00:00,  8.63it/s]
 
-
100%|██████████| 6/6 [00:00<00:00, 39.92it/s]
+
100%|██████████| 6/6 [00:00<00:00, 45.85it/s]
 
-

+
2024-01-25 13:50:10 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
 
-
2024-01-25 13:32:23 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
+
2024-01-25 13:50:10 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
 
-
2024-01-25 13:32:23 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
+
2024-01-25 13:50:10 [INFO] Concatenation took 0.006 seconds
 
-
2024-01-25 13:32:23 [INFO] Concatenation took 0.007 seconds
-
-
-
2024-01-25 13:32:23 [INFO] Merging with original df
+
2024-01-25 13:50:10 [INFO] Merging with original df
 
@@ -438,25 +435,25 @@

Caching column_name ┃ NA NA % mean sd p0 p25 p50 p75 p100 hist ┃ │ │ ┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━┩ │ │ │ entity_id 0 0 5000 2900 0 2600 4900 740010000▇▇▇▇▇▇ │ │ -│ │ pred_synth_predictor_fl 506 12.82 6.6 2.6 0.024 4.8 7.3 8.8 10▂▂▃▃▆▇ │ │ +│ │ pred_synth_predictor_fl 7 0.18 5 1.3 0.29 4.1 5 5.8 9.9 ▂▇▇▁ │ │ +│ │ oat_within_1095_days_me │ │ │ │ │ │ │ │ │ │ │ │ +│ │ an_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ +│ │ pred_synth_predictor_fl 506 12.82 5.1 2.2 0.024 3.6 5 6.5 10▂▅▇▇▅▂ │ │ │ │ oat_within_365_to_730_d │ │ │ │ │ │ │ │ │ │ │ │ -│ │ ays_maximum_fallback_na │ │ │ │ │ │ │ │ │ │ │ │ -│ │ n │ │ │ │ │ │ │ │ │ │ │ │ +│ │ ays_mean_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_synth_predictor_fl 7 0.18 8.4 1.5 0.29 7.8 8.9 9.5 10 ▁▃▇ │ │ │ │ oat_within_1095_days_ma │ │ │ │ │ │ │ │ │ │ │ │ │ │ ximum_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_synth_predictor_fl 533 13.5 6.6 2.6 0.0084 4.8 7.3 8.8 10▁▂▃▃▆▇ │ │ │ │ oat_within_365_days_max │ │ │ │ │ │ │ │ │ │ │ │ │ │ imum_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ -│ │ pred_synth_predictor_fl 506 12.82 5.1 2.2 0.024 3.6 5 6.5 10▂▅▇▇▅▂ │ │ -│ │ oat_within_365_to_730_d │ │ │ │ │ │ │ │ │ │ │ │ -│ │ ays_mean_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_synth_predictor_fl 533 13.5 5 2.1 0.0084 3.6 5 6.4 9.9▂▅▇▇▅▂ │ │ │ │ oat_within_365_days_mea │ │ │ │ │ │ │ │ │ │ │ │ │ │ n_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ -│ │ pred_synth_predictor_fl 7 0.18 5 1.3 0.29 4.1 5 5.8 9.9 ▂▇▇▁ │ │ -│ │ oat_within_1095_days_me │ │ │ │ │ │ │ │ │ │ │ │ -│ │ an_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ +│ │ pred_synth_predictor_fl 506 12.82 6.6 2.6 0.024 4.8 7.3 8.8 10▂▂▃▃▆▇ │ │ +│ │ oat_within_365_to_730_d │ │ │ │ │ │ │ │ │ │ │ │ +│ │ ays_maximum_fallback_na │ │ │ │ │ │ │ │ │ │ │ │ +│ │ n │ │ │ │ │ │ │ │ │ │ │ │ │ └─────────────────────────┴──────┴────────┴───────┴───────┴─────────┴───────┴───────┴───────┴───────┴────────┘ │ │ datetime │ │ ┏━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ │ @@ -475,12 +472,12 @@

Caching
['entity_id',
  'timestamp',
  'prediction_time_uuid',
- 'pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan',
+ 'pred_synth_predictor_float_within_1095_days_mean_fallback_nan',
+ 'pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan',
  'pred_synth_predictor_float_within_1095_days_maximum_fallback_nan',
  'pred_synth_predictor_float_within_365_days_maximum_fallback_nan',
- 'pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan',
  'pred_synth_predictor_float_within_365_days_mean_fallback_nan',
- 'pred_synth_predictor_float_within_1095_days_mean_fallback_nan']
+ 'pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan']
 

diff --git a/tutorials/03_text.html b/tutorials/03_text.html index 896fdc3e..05dde920 100644 --- a/tutorials/03_text.html +++ b/tutorials/03_text.html @@ -623,31 +623,31 @@

Generating predictors from embedded text -
2024-01-25 13:32:26 [INFO] There were unprocessed specs, computing...
+
2024-01-25 13:50:13 [INFO] There were unprocessed specs, computing...
 
-
2024-01-25 13:32:26 [INFO] Processing 20 temporal features in parallel with 1 workers. Chunksize is 20. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
+
2024-01-25 13:50:13 [INFO] Processing 20 temporal features in parallel with 1 workers. Chunksize is 20. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
 
  0%|          | 0/20 [00:00<?, ?it/s]
 
-
  5%|▌         | 1/20 [00:00<00:07,  2.64it/s]
+
  5%|▌         | 1/20 [00:00<00:08,  2.36it/s]
 
-
100%|██████████| 20/20 [00:00<00:00, 52.68it/s]
+
100%|██████████| 20/20 [00:00<00:00, 47.10it/s]
 
-
2024-01-25 13:32:26 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
+
2024-01-25 13:50:13 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
 
-
2024-01-25 13:32:26 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
+
2024-01-25 13:50:13 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
 
-
2024-01-25 13:32:26 [INFO] Concatenation took 0.029 seconds
+
2024-01-25 13:50:13 [INFO] Concatenation took 0.028 seconds
 
-
2024-01-25 13:32:26 [INFO] Merging with original df
+
2024-01-25 13:50:13 [INFO] Merging with original df
 
@@ -682,24 +682,24 @@

Generating predictors from embedded text