From cdda9f6e00098f8ae643fd035e18b8b31cd51ada Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Tue, 13 Feb 2024 09:19:24 +0000 Subject: [PATCH] Add changes for a5d7e48ec1dc647ef743999e09e425f7fe7842e1 --- _static/documentation_options.js | 2 +- searchindex.js | 2 +- tutorials/01_basic.html | 24 ++--- tutorials/02_advanced.html | 52 +++++------ tutorials/03_text.html | 150 +++++++++++++++---------------- 5 files changed, 115 insertions(+), 115 deletions(-) diff --git a/_static/documentation_options.js b/_static/documentation_options.js index a68064db..d4feb60a 100644 --- a/_static/documentation_options.js +++ b/_static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: '1.16.0', + VERSION: '1.17.0', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/searchindex.js b/searchindex.js index e233247e..8a04154a 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["faq", "feature_specifications", "index", "installation", "timeseriesflattener", "tutorials", "tutorials/01_basic", "tutorials/02_advanced", "tutorials/03_text"], "filenames": ["faq.rst", "feature_specifications.rst", "index.rst", "installation.rst", "timeseriesflattener.rst", "tutorials.rst", "tutorials/01_basic.ipynb", "tutorials/02_advanced.ipynb", "tutorials/03_text.ipynb"], "titles": ["Frequently Asked Questions", "Feature specifications", "timeseriesflattener", "Installation", "Timeseriesflattener", "Tutorials", "Introductory Tutorial", "Advanced Tutorial", "Adding text features"], "terms": {"If": [0, 6, 7, 8], "you": [0, 1, 4, 5, 6, 7, 8], "wish": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 8], "librari": 0, "your": [0, 3, 6, 7, 8], "research": 0, "pleas": [0, 2], "joss": 0, "paper": 0, "articl": 0, "bernstorff2023timeseriesflatten": 0, "titl": 0, "timeseriesflatten": [0, 3, 6, 7, 8], "A": [0, 1, 2, 4, 6, 7], "python": [0, 2], "summar": 0, "featur": [0, 2, 5, 6], "from": [0, 1, 2, 4, 5, 6, 7], "medic": [0, 2, 6, 8], "time": [0, 1, 2, 4, 5, 7, 8], "seri": [0, 1, 2, 4, 6], "author": 0, "bernstorff": 0, "martin": 0, "enevoldsen": 0, "kenneth": 0, "damgaard": 0, "jakob": 0, "danielsen": 0, "andrea": 0, "hansen": 0, "lass": 0, "journal": 0, "open": 0, "sourc": [0, 1, 4], "softwar": 0, "volum": 0, "8": [0, 6, 7], "number": [0, 2, 6, 7, 8], "83": 0, "page": [0, 2], "5197": 0, "year": [0, 6, 7], "2023": 0, "Or": [0, 7], "prefer": 0, "apa": 0, "m": 0, "k": 0, "j": 0, "l": 0, "come": [0, 7], "an": [0, 1, 2, 6, 7, 8], "extens": 0, "In": [0, 1, 6, 7], "order": [0, 5], "ll": [0, 6, 7], "usual": 0, "want": [0, 6, 7, 8], "clone": 0, "repositori": 0, "build": 0, "also": [0, 5, 6], "instal": [0, 6, 7], "requir": [0, 1, 2, 4, 6, 7], "develop": 0, "depend": 0, "util": 0, "defin": [0, 1, 4, 8], "pyproject": 0, "toml": 0, "pip": [0, 3], "e": [0, 1, 2, 6, 7, 8], "dev": 0, "pytest": 0, "which": [0, 1, 2, 4, 5, 6, 7], "all": [0, 6, 7, 8], "folder": 0, "specif": [0, 4, 5, 7, 8], "can": [0, 1, 2, 5, 6, 7, 8], "desired_test": 0, "py": [0, 6, 7], "sphinx": 0, "It": [0, 6], "furo": 0, "theme": 0, "custom": 0, "style": [0, 6, 7], "To": [0, 2, 3, 5, 6, 7, 8], "make": [0, 2, 6, 7, 8], "doc": [0, 6], "text": [0, 5], "html": 0, "c": [0, 2, 7], "class": [1, 4, 6, 7], "coercedfloat": 1, "lookperiod": [1, 7], "fallback": [1, 6, 7, 8], "union": [1, 4], "float": 1, "int": [1, 4], "base": [1, 4], "object": [1, 4, 6, 7], "min_dai": [1, 6, 7], "max_dai": [1, 6, 7], "outcomespec": [1, 4, 6], "timeseries_df": [1, 6], "datafram": [1, 2, 4, 6, 7, 8], "feature_base_nam": [1, 6, 7], "str": [1, 4, 8], "lookahead_dai": [1, 6], "tupl": [1, 6], "aggregation_fn": [1, 6, 7, 8], "callabl": 1, "dataframegroupbi": 1, "incid": [1, 6], "bool": [1, 4], "prefix": [1, 4, 6], "outc": [1, 4, 6], "basemodel": [1, 4], "outcom": [1, 2, 5, 7], "paramet": [1, 4, 6], "valu": [1, 2, 4, 6, 7, 8], "should": [1, 4, 6, 8], "contain": [1, 4, 6, 8], "column": [1, 2, 4, 6, 7, 8], "entity_id": [1, 4, 6, 7, 8], "id": [1, 2, 6, 8], "entiti": [1, 6], "each": [1, 2, 5, 6, 8], "belong": 1, "The": [1, 2, 5, 6, 7], "timeseri": [1, 4, 6], "timestamp": [1, 4, 6, 7, 8], "datetim": [1, 6, 7], "note": [1, 6, 7, 8], "name": [1, 4, 6, 7, 8], "overridden": 1, "when": [1, 2, 6, 7], "initialis": 1, "gener": [1, 2, 5, 6, 7], "g": [1, 2, 6, 7, 8], "_": [1, 6, 7], "feature_baase_nam": 1, "metadata": [1, 4, 6, 8], "interv": [1, 6], "predict": [1, 2, 4, 5, 7, 8], "look": [1, 2, 6, 7], "two": [1, 4, 6, 7], "specifi": [1, 2, 5, 7], "resolv": 1, "0": [1, 6, 7, 8], "how": [1, 2, 5, 7, 8], "aggreg": [1, 2, 6], "multipl": [1, 2, 6, 7, 8], "within": [1, 2, 6, 8], "lookahead": [1, 2, 6], "dai": [1, 6, 8], "take": [1, 4, 6, 7, 8], "group": [1, 7, 8], "input": 1, "return": [1, 4, 6, 7, 8], "singl": [1, 2, 6], "i": [1, 2, 3, 6, 7, 8], "found": [1, 6], "window": [1, 2, 6, 7], "whether": [1, 6], "type": [1, 2, 4, 6, 7, 8], "2": [1, 6, 7, 8], "diabet": [1, 6], "becaus": [1, 4, 6, 7], "onli": [1, 2, 6, 7, 8], "experi": [1, 6], "onc": [1, 6], "handl": [1, 6], "vectoris": 1, "wai": 1, "dure": 1, "resolut": 1, "faster": [1, 6, 7, 8], "than": [1, 2, 6], "non": 1, "occur": [1, 2, 6], "feature_nam": [1, 7], "default": [1, 4, 6], "pred": [1, 4, 6], "get_output_col_nam": 1, "get": [1, 3, 4], "output": [1, 4, 7, 8], "is_dichotom": 1, "check": [1, 6, 7, 8], "dichotom": 1, "properti": [1, 6, 7], "lookahead_period": 1, "model_computed_field": [1, 4], "classvar": [1, 4], "dict": [1, 4], "computedfieldinfo": [1, 4], "dictionari": [1, 4], "comput": [1, 4, 6, 7, 8], "field": [1, 4], "correspond": [1, 4, 6, 8], "model_config": [1, 4], "configdict": [1, 4], "arbitrary_types_allow": [1, 4], "true": [1, 2, 4, 6, 7], "extra": [1, 6, 7], "forbid": 1, "frozen": 1, "configur": [1, 4], "model": [1, 2, 4, 6, 8], "conform": [1, 4], "pydant": [1, 4], "config": [1, 4], "model_field": [1, 4], "fieldinfo": [1, 4], "annot": [1, 4], "list": [1, 4, 5, 6, 7, 8], "fals": [1, 4, 6, 8], "about": [1, 4, 6], "map": [1, 4], "thi": [1, 2, 4, 6, 7, 8], "replac": [1, 4], "__fields__": [1, 4], "v1": [1, 4], "predictorspec": [1, 4, 6], "lookbehind_dai": [1, 6, 7, 8], "predictor": [1, 2, 4, 5, 7], "lookbehind": [1, 2, 6, 7, 8], "lookbehind_period": [1, 7], "staticspec": [1, 4, 6], "static": [1, 5, 7], "can_be_coerced_losslessly_to_int": 1, "coerce_float": 1, "get_temporal_col_nam": 1, "tempor": [1, 5, 7, 8], "packag": [2, 5, 6, 7], "data": [2, 4, 5, 7, 8], "machin": 2, "learn": 2, "implement": [2, 7], "method": [2, 4, 7], "includ": 2, "convert": [2, 8], "ani": [2, 4, 6, 7, 8], "irregular": [2, 6], "row": [2, 6, 7, 8], "desir": 2, "construct": 2, "raw": 2, "ar": [2, 4, 6, 7, 8], "allow": [2, 4, 7], "patient": [2, 6, 8], "independ": 2, "set": [2, 4, 6], "particular": 2, "sever": [2, 8], "choic": 2, "one": [2, 4, 6, 7, 8], "need": [2, 6, 7, 8], "issu": [2, 6], "everi": [2, 6, 7], "physic": 2, "visit": 2, "morn": 2, "anoth": [2, 6], "clinic": [2, 8], "meaning": 2, "far": [2, 6, 8], "back": [2, 6], "ahead": [2, 6], "exist": 2, "point": [2, 6], "abov": [2, 6, 7, 8], "figur": 2, "graphic": 2, "repres": [2, 6], "terminologi": [2, 6], "determin": [2, 6], "wherea": 2, "futur": [2, 6], "refer": [2, 6], "b": 2, "label": [2, 6], "neg": 2, "never": [2, 6], "happen": [2, 6], "outsid": [2, 6], "posit": [2, 6], "insid": [2, 6], "exampl": [2, 6, 7, 8], "mean": [2, 6, 7, 8], "shown": [2, 6], "max": [2, 6], "min": [2, 6], "etc": [2, 6], "d": 2, "drop": [2, 6, 7, 8], "extend": [2, 6], "further": [2, 4, 6], "start": [2, 3, 6, 7, 8], "dataset": [2, 4, 5, 6, 7], "end": [2, 6, 7], "behaviour": 2, "option": [2, 4], "obtain": 2, "rich": 2, "represent": 2, "see": [2, 4, 6], "tutori": [2, 4, 8], "placehold": 2, "case": [2, 6], "report": 2, "request": 2, "github": [2, 3], "tracker": 2, "otherwis": 2, "discuss": [2, 6], "forum": 2, "bug": 2, "idea": 2, "usag": 2, "index": 2, "run": [3, 5], "follow": [3, 6], "line": [3, 6, 7], "termin": 3, "There": [3, 6, 7, 8], "discrep": 3, "between": 3, "latest": 3, "version": [3, 6, 7], "flatten": [4, 5, 8], "describ": [4, 6, 8], "speccollect": 4, "outcome_spec": [4, 6], "predictor_spec": 4, "static_spec": 4, "collect": 4, "spec": [4, 6, 7, 8], "prediction_times_df": [4, 6, 7, 8], "drop_pred_times_with_insufficient_look_dist": [4, 6, 7, 8], "cach": [4, 5], "featurecach": [4, 7], "none": [4, 6, 7], "entity_id_col_nam": [4, 6, 7, 8], "timestamp_col_nam": [4, 6, 7, 8], "predictor_col_name_prefix": 4, "outcome_col_name_prefix": 4, "n_worker": [4, 6, 7, 8], "60": [4, 7], "log_to_stdout": 4, "turn": [4, 8], "tabular": [4, 8], "add_ag": 4, "date_of_birth_df": 4, "date_of_birth_col_nam": 4, "date_of_birth": 4, "output_prefix": 4, "add": [4, 6, 7], "ag": 4, "ha": [4, 6, 7, 8], "its": [4, 6], "own": [4, 7], "function": [4, 6, 8], "veri": 4, "frequent": [4, 6], "match": 4, "self": [4, 6, 7], "add_spec": [4, 6, 7, 8], "sequenc": [4, 7], "queue": 4, "unprocess": [4, 6, 7, 8], "process": [4, 6, 7, 8], "until": 4, "call": [4, 6, 7], "get_df": [4, 6, 7, 8], "u": 4, "more": [4, 6, 7], "effecti": 4, "parallelis": 4, "most": [4, 6, 7], "complex": 4, "li": 4, "For": [4, 6, 7, 8], "document": 4, "those": 4, "present": [4, 6], "we": [5, 6, 7, 8], "recommend": 5, "go": [5, 6], "through": 5, "below": 5, "jupyt": 5, "notebook": 5, "download": 5, "local": [5, 6, 7], "introductori": 5, "load": [5, 7, 8], "advanc": [5, 6], "creat": [5, 6, 8], "combin": 5, "ad": [5, 6], "embed": 5, "especi": 6, "help": 6, "have": [6, 7, 8], "complic": 6, "train": 6, "simpl": 6, "explain": 6, "appli": 6, "consist": 6, "3": [6, 7, 8], "step": 6, "": [6, 7, 8], "simplest": 6, "first": [6, 7, 8], "predictin": 6, "element": 6, "context": 6, "skimpi": [6, 7], "import": [6, 7, 8], "skim": [6, 7], "test": [6, 7, 8], "load_synth_data": [6, 7, 8], "load_synth_prediction_tim": [6, 7, 8], "df_prediction_tim": 6, "sort_valu": 6, "summari": [6, 7], "count": [6, 7], "10000": [6, 7], "int64": [6, 7], "1": [6, 7, 8], "datetime64": [6, 7], "column_nam": [6, 7], "na": [6, 7, 8], "sd": [6, 7], "p0": [6, 7], "p25": [6, 7], "p50": [6, 7], "p75": [6, 7], "p100": [6, 7], "hist": [6, 7], "5000": [6, 7], "2900": [6, 7], "2500": 6, "4900": [6, 7], "7400": [6, 7], "last": [6, 7, 8], "frequenc": [6, 7], "1965": [6, 8], "01": [6, 7, 8], "02": [6, 7, 8], "09": [6, 7, 8], "35": 6, "00": [6, 7, 8], "1969": [6, 7, 8], "12": [6, 7, 8], "31": [6, 7, 8], "21": [6, 7, 8], "42": [6, 7], "628": 6, "11": [6, 8], "55": 6, "2005": 6, "03": [6, 8], "15": [6, 7, 8], "07": [6, 8], "16": [6, 8], "4370": 6, "13": [6, 7, 8], "23": [6, 8], "18": [6, 7, 8], "6152": 6, "1968": [6, 7, 8], "04": [6, 8], "6873": 6, "4": [6, 7, 8], "28": [6, 8], "33": 6, "9688": 6, "9996": 6, "17": [6, 8], "1463": 6, "30": [6, 7, 8], "19": [6, 8], "3952": 6, "9997": 6, "1967": [6, 8], "06": [6, 8], "08": [6, 8], "52": 6, "7926": 6, "9999": 6, "22": [6, 8], "24": 6, "5720": 6, "14": [6, 8], "59": [6, 7], "here": 6, "Then": [6, 7], "our": [6, 7, 8], "differ": [6, 7], "timepoint": 6, "load_synth_predictor_float": [6, 7], "df_synth_predictor": 6, "100000": 6, "float64": [6, 7], "7500": 6, "5": [6, 7, 8], "9": [6, 7], "00015": 6, "7": [6, 7, 8], "10": [6, 7, 8], "37": 6, "95792": 6, "29": [6, 7], "799246": 6, "82592": 6, "05": [6, 7, 8], "6": [6, 7], "630007": 6, "1377": 6, "174793": 6, "28579": 6, "26": [6, 8], "981185": 6, "81247": 6, "44": [6, 7], "970382": 6, "10277": 6, "20": [6, 8], "304568": 6, "74701": 6, "671907": 6, "69566": 6, "41": [6, 8], "250538": 6, "40901": 6, "1966": [6, 8], "924175": 6, "96881": 6, "501553": 6, "again": 6, "could": 6, "sex": 6, "doesn": 6, "t": [6, 7], "chang": 6, "over": 6, "let": [6, 7, 8], "load_synth_sex": 6, "df_synth_sex": 6, "femal": 6, "9994": 6, "9995": 6, "9998": 6, "As": [6, 8], "And": 6, "lastli": 6, "ve": 6, "chosen": 6, "binari": 6, "store": 6, "infer": 6, "do": 6, "sinc": 6, "thei": [6, 7, 8], "section": 6, "load_synth_outcom": 6, "df_synth_outcom": 6, "3103": 6, "5100": 6, "7600": 6, "50": [6, 7, 8], "46": [6, 7], "6253": 6, "9964": 6, "6255": 6, "9966": 6, "6256": 6, "9968": 6, "6257": 6, "9970": 6, "6269": 6, "9992": 6, "53": [6, 7], "per": [6, 7], "now": [6, 7, 8], "recip": 6, "finish": 6, "firstli": 6, "main": 6, "decis": 6, "size": [6, 7], "given": 6, "indic": 6, "code": [6, 7], "panda": [6, 7, 8], "pd": [6, 8], "maximum": [6, 7], "feature_spec": [6, 7, 8], "single_spec": 6, "test_df": 6, "365": [6, 7, 8], "outcome_nam": 6, "argument": 6, "values_df": 6, "decid": 6, "least": 6, "both": 6, "accomplish": 6, "dw_ek_borg": 6, "wa": [6, 8], "mark": 6, "after": 6, "where": 6, "event": 6, "perman": 6, "specifii": 6, "forward": 6, "search": 6, "certain": 6, "period": [6, 8], "befor": [6, 8], "instead": 6, "almost": 6, "entir": 6, "ident": 6, "except": 6, "past": 6, "numpi": [6, 7, 8], "np": [6, 7, 8], "temporal_predictor_spec": 6, "730": [6, 7, 8], "nan": [6, 7, 8], "predictor_nam": 6, "rang": 6, "similar": 6, "instanc": [6, 7], "might": [6, 7, 8], "182": 6, "easili": 6, "pass": [6, 8], "temporal_interval_predictor_spec": 6, "90": 6, "predictor_interval_nam": 6, "slightli": 6, "previou": 6, "provid": 6, "howev": [6, 7, 8], "By": 6, "filter": 6, "easi": 6, "manual": [6, 7], "sex_predictor_spec": 6, "input_col_name_overrid": 6, "df": [6, 7, 8], "tsflatten": 6, "re": [6, 8], "readi": 6, "instanti": 6, "along": 6, "add_": 6, "parallel": [6, 7, 8], "oper": 6, "across": 6, "core": [6, 7], "ts_flatten": [6, 7, 8], "applic": 6, "sai": [6, 7], "month": [6, 7, 8], "would": [6, 8], "compromis": 6, "generalis": 6, "some": [6, 7, 8], "edg": 6, "brief": 6, "2024": [6, 7, 8], "54": 6, "info": [6, 7, 8], "were": [6, 7, 8], "_drop_pred_time_if_insufficient_look_dist": [6, 7], "5999": 6, "99": 6, "worker": [6, 7, 8], "chunksiz": [6, 7, 8], "mai": [6, 7, 8], "progress": [6, 7, 8], "bar": [6, 7, 8], "move": [6, 7, 8], "batch": [6, 7, 8], "much": [6, 7, 8], "total": [6, 7, 8], "perform": [6, 7, 8], "100": [6, 7, 8], "36": [6, 8], "77it": 6, "align": [6, 7, 8], "littl": [6, 7, 8], "while": [6, 7, 8], "minut": [6, 7, 8], "000": [6, 7, 8], "concaten": [6, 7, 8], "Will": [6, 7, 8], "system": [6, 7, 8], "2_000_000": [6, 7, 8], "normal": [6, 7, 8], "took": [6, 7, 8], "004": 6, "second": [6, 7, 8], "merg": [6, 7, 8], "origin": [6, 7, 8], "4001": 6, "string": [6, 7], "2600": [6, 7], "outc_outcome_name_withi": 6, "064": 6, "25": [6, 8], "n_365_days_maximum_fal": 6, "back_0_dichotom": 6, "pred_predictor_interv": 6, "2877": 6, "71": 6, "91": 6, "_name_within_30_to_90_d": 6, "ays_mean_fallback_nan": [6, 7], "pred_predictor_name_wit": 6, "72": 6, "097": 6, "hin_730_days_mean_fallb": 6, "ack_nan": 6, "pred_femal": 6, "49": 6, "39": 6, "word": [6, 7, 8], "prediction_time_uuid": [6, 7, 8], "outc_outcome_name_within_365_days_maximum_fallback_0_dichotom": 6, "pred_predictor_interval_name_within_30_to_90_days_mean_fallback_nan": 6, "pred_predictor_name_within_730_days_mean_fallback_nan": 6, "display": [6, 7], "shorten": [6, 7], "col": [6, 7], "shortened_pr": 6, "pred_x": 6, "shortened_pred_interv": 6, "pred_x_30_to_90": 6, "shortened_outcom": 6, "outc_i": 6, "renam": [6, 7], "pred_predictor_name_within_0_to_730_days_mean_fallback_nan": 6, "outc_outcome_name_within_0_to_365_days_maximum_fallback_0_dichotom": 6, "axi": [6, 7, 8], "set_table_attribut": [6, 7], "font": [6, 7], "14px": [6, 7], "importerror": [6, 7], "traceback": [6, 7], "recent": [6, 7], "cell": [6, 7], "file": [6, 7], "lib": [6, 7], "python3": [6, 7], "site": [6, 7], "frame": [6, 7], "1338": [6, 7], "1318": [6, 7], "1319": [6, 7, 8], "def": [6, 7, 8], "styler": [6, 7], "1320": [6, 7], "1321": [6, 7], "1322": [6, 7], "1336": [6, 7], "tabl": [6, 7], "visual": [6, 7], "user_guid": [6, 7], "ipynb": [6, 7], "1337": [6, 7], "io": [6, 7], "format": [6, 7, 8], "1340": [6, 7], "40": [6, 7], "shared_doc": [6, 7], "_shared_doc": [6, 7], "save_to_buff": [6, 7], "jinja2": [6, 7], "import_optional_depend": [6, 7], "style_rend": [6, 7], "47": [6, 7], "cssproperti": [6, 7], "48": [6, 7], "cssstyle": [6, 7], "56": [6, 7], "refactor_level": [6, 7], "57": [6, 7], "type_check": [6, 7], "compat": [6, 7], "_option": [6, 7], "161": [6, 7], "error": [6, 7], "min_vers": [6, 7], "159": [6, 7], "160": [6, 7], "elif": [6, 7], "rais": [6, 7], "msg": [6, 7], "163": [6, 7], "modul": [6, 7], "newer": [6, 7], "current": [6, 7], "classif": 6, "citizen": 6, "uniqu": 6, "identifi": 6, "prediciton": 6, "pred_": [6, 7], "outc_": 6, "basic": 7, "cover": [7, 8], "expand": 7, "effect": 7, "mani": 7, "so": [7, 8], "iter": 7, "without": 7, "complet": 7, "full": 7, "hand": 7, "rather": 7, "straightforward": 7, "what": 7, "hundr": 7, "amount": 7, "write": 7, "grow": 7, "quit": 7, "substanti": 7, "becom": 7, "consum": 7, "hard": 7, "navig": 7, "solv": 7, "problem": 7, "combinatori": 7, "pprint": 7, "group_spec": [7, 8], "nameddatafram": 7, "predictorgroupspec": [7, 8], "pred_spec_batch": 7, "named_datafram": [7, 8], "synth_predictor_float": 7, "1095": 7, "create_combin": [7, 8], "attribut": 7, "easier": 7, "namedatafram": 7, "exactli": 7, "load_synth_predictor_flaot": 7, "pred_synth_predictor_float_": 7, "result": [7, 8], "good": 7, "small": [7, 8], "highlight": 7, "pred_spec_batch_summari": 7, "pred_spec": 7, "__name__": 7, "print": [7, 8], "f": 7, "len": [7, 8], "know": 7, "bunch": 7, "quickli": 7, "But": 7, "next": 7, "ship": 7, "disk": 7, "pathlib": 7, "path": 7, "feature_cach": 7, "cache_to_disk": 7, "diskcach": 7, "flattened_dataset": 7, "feature_cache_dir": 7, "tmp": 7, "directori": 7, "save": 7, "just": 7, "them": 7, "won": 7, "alreadi": [7, 8], "new": 7, "abstract": 7, "redi": 7, "sql": 7, "everyth": 7, "work": 7, "6053": 7, "72it": 7, "17it": 7, "006": 7, "3947": 7, "pred_synth_predictor_fl": 7, "533": 7, "0084": 7, "oat_within_365_days_max": 7, "imum_fallback_nan": 7, "506": 7, "82": 7, "024": 7, "oat_within_365_to_730_d": 7, "ays_maximum_fallback_na": 7, "n": 7, "oat_within_365_days_mea": 7, "n_fallback_nan": 7, "oat_within_1095_days_ma": 7, "ximum_fallback_nan": 7, "oat_within_1095_days_m": 7, "an_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_1095_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_1095_days_mean_fallback_nan": 7, "pred_col": 7, "startswith": 7, "rename_dict": 7, "enumer": 7, "df_renam": 7, "base_col": 7, "renamed_col": 7, "dealt": 8, "show": 8, "out": 8, "synthet": 8, "other": 8, "load_synth_text": 8, "synth_text": 8, "head": 8, "4647": 8, "went": 8, "induc": 8, "coma": 8, "2007": 8, "taken": 8, "emerg": 8, "departm": 8, "5799": 8, "old": 8, "son": 8, "wh": 8, "had": 8, "been": 8, "left": 8, "bed": 8, "minu": 8, "4234": 8, "allergi": 8, "often": 8, "advantag": 8, "emb": 8, "speed": 8, "up": 8, "block": 8, "tf": 8, "idf": 8, "form": 8, "constraint": 8, "entitiy_id_col": 8, "timestamp_col": 8, "value_col": 8, "purpos": 8, "demonstr": 8, "fit": 8, "captur": 8, "sklearn": 8, "feature_extract": 8, "tfidfvector": 8, "embed_text_to_df": 8, "tfidf_model": 8, "max_featur": 8, "fit_transform": 8, "toarrai": 8, "get_feature_names_out": 8, "embedded_text": 8, "tolist": 8, "metadata_onli": 8, "embedded_text_with_metadata": 8, "concat": 8, "ignor": 8, "175872": 8, "182066": 8, "249848": 8, "158430": 8, "000000": 8, "023042": 8, "311389": 8, "529966": 8, "490203": 8, "479312": 8, "244870": 8, "135282": 8, "064337": 8, "465084": 8, "336859": 8, "151743": 8, "729861": 8, "179161": 8, "192367": 8, "232332": 8, "283402": 8, "336952": 8, "176422": 8, "238416": 8, "646879": 8, "250217": 8, "382277": 8, "165635": 8, "200046": 8, "183015": 8, "261115": 8, "125837": 8, "151906": 8, "205285": 8, "759528": 8, "403961": 8, "098747": 8, "493461": 8, "119196": 8, "272619": 8, "207444": 8, "045256": 8, "183475": 8, "588324": 8, "433253": 8, "235349": 8, "df_with_multiple_values_to_named_datafram": 8, "readili": 8, "suppli": 8, "df_transform": 8, "split": 8, "embedded_df": 8, "name_prefix": 8, "tfidf_": 8, "accord": 8, "inform": 8, "bow": 8, "kept": 8, "tfidf_and": 8, "emb_spec_batch": 8, "54it": 8, "69it": 8, "sake": 8, "dropna": 8, "pred_tfidf_or_within_730_days_mean_fallback_nan": 8, "pred_tfidf_was_within_365_days_mean_fallback_nan": 8, "pred_tfidf_patient_within_365_days_mean_fallback_nan": 8, "pred_tfidf_that_within_730_days_mean_fallback_nan": 8, "pred_tfidf_in_within_730_days_mean_fallback_nan": 8, "pred_tfidf_patient_within_730_days_mean_fallback_nan": 8, "pred_tfidf_of_within_365_days_mean_fallback_nan": 8, "pred_tfidf_of_within_730_days_mean_fallback_nan": 8, "pred_tfidf_for_within_365_days_mean_fallback_nan": 8, "pred_tfidf_in_within_365_days_mean_fallback_nan": 8, "pred_tfidf_or_within_365_days_mean_fallback_nan": 8, "pred_tfidf_for_within_730_days_mean_fallback_nan": 8, "pred_tfidf_to_within_730_days_mean_fallback_nan": 8, "pred_tfidf_that_within_365_days_mean_fallback_nan": 8, "pred_tfidf_and_within_365_days_mean_fallback_nan": 8, "pred_tfidf_was_within_730_days_mean_fallback_nan": 8, "pred_tfidf_and_within_730_days_mean_fallback_nan": 8, "1917": 8, "4977": 8, "221549": 8, "086927": 8, "133722": 8, "090356": 8, "483324": 8, "536339": 8, "088050": 8, "284485": 8, "145809": 8, "2463": 8, "6840": 8, "355142": 8, "092896": 8, "071452": 8, "096561": 8, "258256": 8, "573168": 8, "376386": 8, "456030": 8, "155821": 8, "2580": 8, "260680": 8, "401014": 8, "639848": 8, "2741": 8, "9832": 8, "128228": 8, "335410": 8, "103195": 8, "186493": 8, "236513": 8, "101924": 8, "164655": 8, "225044": 8, "2931": 8, "7281": 8, "385111": 8, "388547": 8, "332065": 8, "269251": 8, "280049": 8, "304425": 8, "043730": 8, "211934": 8, "289663": 8}, "objects": {"timeseriesflattener.feature_specs": [[1, 0, 0, "-", "single_specs"]], "timeseriesflattener.feature_specs.single_specs": [[1, 1, 1, "", "CoercedFloats"], [1, 1, 1, "", "LookPeriod"], [1, 1, 1, "", "OutcomeSpec"], [1, 1, 1, "", "PredictorSpec"], [1, 1, 1, "", "StaticSpec"], [1, 5, 1, "", "can_be_coerced_losslessly_to_int"], [1, 5, 1, "", "coerce_floats"], [1, 5, 1, "", "get_temporal_col_name"]], "timeseriesflattener.feature_specs.single_specs.CoercedFloats": [[1, 2, 1, "", "fallback"], [1, 2, 1, "", "lookperiod"]], "timeseriesflattener.feature_specs.single_specs.LookPeriod": [[1, 2, 1, "", "max_days"], [1, 2, 1, "", "min_days"]], "timeseriesflattener.feature_specs.single_specs.OutcomeSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "incident"], [1, 3, 1, "", "is_dichotomous"], [1, 2, 1, "", "lookahead_days"], [1, 4, 1, "", "lookahead_period"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.PredictorSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "lookbehind_days"], [1, 4, 1, "", "lookbehind_period"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.StaticSpec": [[1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener": [[4, 0, 0, "-", "flattened_dataset"]], "timeseriesflattener.flattened_dataset": [[4, 1, 1, "", "SpecCollection"], [4, 1, 1, "", "TimeseriesFlattener"]], "timeseriesflattener.flattened_dataset.SpecCollection": [[4, 2, 1, "", "model_computed_fields"], [4, 2, 1, "", "model_config"], [4, 2, 1, "", "model_fields"], [4, 2, 1, "", "outcome_specs"], [4, 2, 1, "", "predictor_specs"], [4, 2, 1, "", "static_specs"]], "timeseriesflattener.flattened_dataset.TimeseriesFlattener": [[4, 3, 1, "", "add_age"], [4, 3, 1, "", "add_spec"], [4, 3, 1, "", "compute"], [4, 3, 1, "", "get_df"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:property", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "property", "Python property"], "5": ["py", "function", "Python function"]}, "titleterms": {"frequent": 0, "ask": [0, 2], "question": [0, 2], "cite": 0, "thi": 0, "packag": 0, "how": [0, 6], "do": 0, "i": 0, "test": 0, "code": 0, "run": 0, "suit": 0, "document": 0, "gener": [0, 8], "featur": [1, 7, 8], "specif": [1, 6], "timeseriesflatten": [1, 2, 4], "feature_spec": 1, "single_spec": 1, "function": 2, "where": 2, "indic": 2, "search": 2, "instal": 3, "flattened_dataset": 4, "tutori": [5, 6, 7], "get": 5, "start": 5, "introductori": 6, "load": 6, "data": 6, "predict": 6, "time": 6, "tempor": 6, "predictor": [6, 8], "static": 6, "outcom": 6, "specifi": 6, "flatten": 6, "advanc": 7, "creat": 7, "combin": 7, "cach": 7, "ad": 8, "text": 8, "The": 8, "dataset": 8, "from": 8, "embed": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Frequently Asked Questions": [[0, "frequently-asked-questions"]], "Citing this package": [[0, "citing-this-package"]], "How do I test the code and run the test suite?": [[0, "how-do-i-test-the-code-and-run-the-test-suite"]], "How is the documentation generated?": [[0, "how-is-the-documentation-generated"]], "Feature specifications": [[1, "feature-specifications"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "timeseriesflattener": [[2, "timeseriesflattener"]], "Functionality": [[2, "functionality"]], "Where to ask questions?": [[2, "where-to-ask-questions"]], "Indices and search": [[2, "indices-and-search"]], "Installation": [[3, "installation"]], "Timeseriesflattener": [[4, "timeseriesflattener"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]], "Tutorials": [[5, "tutorials"]], "Getting started": [[5, null]], "Introductory Tutorial": [[6, "introductory-tutorial"]], "Loading data": [[6, "loading-data"]], "Loading prediction times": [[6, "loading-prediction-times"]], "Loading a temporal predictor": [[6, "loading-a-temporal-predictor"]], "Loading a static predictor": [[6, "loading-a-static-predictor"]], "Loading a temporal outcome": [[6, "loading-a-temporal-outcome"]], "Specifying how to flatten the data": [[6, "specifying-how-to-flatten-the-data"]], "Temporal outcome specification": [[6, "temporal-outcome-specification"]], "Temporal predictor specification": [[6, "temporal-predictor-specification"]], "Static predictor specification": [[6, "static-predictor-specification"]], "Flattening": [[6, "flattening"]], "Advanced Tutorial": [[7, "advanced-tutorial"]], "Creating feature combinations": [[7, "creating-feature-combinations"]], "Caching": [[7, "caching"]], "Adding text features": [[8, "adding-text-features"]], "The dataset": [[8, "the-dataset"]], "Generating predictors from embedded text": [[8, "generating-predictors-from-embedded-text"]]}, "indexentries": {"coercedfloats (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats"]], "lookperiod (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod"]], "outcomespec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec"]], "predictorspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec"]], "staticspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec"]], "aggregation_fn (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.aggregation_fn"]], "aggregation_fn (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.aggregation_fn"]], "can_be_coerced_losslessly_to_int() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.can_be_coerced_losslessly_to_int"]], "coerce_floats() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.coerce_floats"]], "fallback (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.fallback"]], "fallback (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.fallback"]], "fallback (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.fallback"]], "feature_base_name (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.feature_base_name"]], "feature_base_name (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.feature_base_name"]], "feature_base_name (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.feature_base_name"]], "get_output_col_name() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.get_output_col_name"]], "get_output_col_name() (predictorspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.get_output_col_name"]], "get_output_col_name() (staticspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.get_output_col_name"]], "get_temporal_col_name() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.get_temporal_col_name"]], "incident (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.incident"]], "is_dichotomous() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.is_dichotomous"]], "lookahead_days (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_days"]], "lookahead_period (outcomespec property)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_period"]], "lookbehind_days (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_days"]], "lookbehind_period (predictorspec property)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_period"]], "lookperiod (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.lookperiod"]], "max_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.max_days"]], "min_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.min_days"]], "model_computed_fields (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_computed_fields"]], "model_computed_fields (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_computed_fields"]], "model_computed_fields (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_computed_fields"]], "model_config (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_config"]], "model_config (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_config"]], "model_config (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_config"]], "model_fields (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_fields"]], "model_fields (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_fields"]], "model_fields (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_fields"]], "module": [[1, "module-timeseriesflattener.feature_specs.single_specs"], [4, "module-timeseriesflattener.flattened_dataset"]], "prefix (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.prefix"]], "prefix (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.prefix"]], "prefix (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.prefix"]], "timeseries_df (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.timeseries_df"]], "timeseries_df (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.timeseries_df"]], "timeseries_df (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.timeseries_df"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "speccollection (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection"]], "timeseriesflattener (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener"]], "add_age() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_age"]], "add_spec() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_spec"]], "compute() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.compute"]], "get_df() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.get_df"]], "model_computed_fields (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_computed_fields"]], "model_config (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_config"]], "model_fields (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_fields"]], "outcome_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.outcome_specs"]], "predictor_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.predictor_specs"]], "static_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.static_specs"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["faq", "feature_specifications", "index", "installation", "timeseriesflattener", "tutorials", "tutorials/01_basic", "tutorials/02_advanced", "tutorials/03_text"], "filenames": ["faq.rst", "feature_specifications.rst", "index.rst", "installation.rst", "timeseriesflattener.rst", "tutorials.rst", "tutorials/01_basic.ipynb", "tutorials/02_advanced.ipynb", "tutorials/03_text.ipynb"], "titles": ["Frequently Asked Questions", "Feature specifications", "timeseriesflattener", "Installation", "Timeseriesflattener", "Tutorials", "Introductory Tutorial", "Advanced Tutorial", "Adding text features"], "terms": {"If": [0, 6, 7, 8], "you": [0, 1, 4, 5, 6, 7, 8], "wish": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 8], "librari": 0, "your": [0, 3, 6, 7, 8], "research": 0, "pleas": [0, 2], "joss": 0, "paper": 0, "articl": 0, "bernstorff2023timeseriesflatten": 0, "titl": 0, "timeseriesflatten": [0, 3, 6, 7, 8], "A": [0, 1, 2, 4, 6, 7], "python": [0, 2], "summar": 0, "featur": [0, 2, 5, 6], "from": [0, 1, 2, 4, 5, 6, 7], "medic": [0, 2, 6, 8], "time": [0, 1, 2, 4, 5, 7, 8], "seri": [0, 1, 2, 4, 6], "author": 0, "bernstorff": 0, "martin": 0, "enevoldsen": 0, "kenneth": 0, "damgaard": 0, "jakob": 0, "danielsen": 0, "andrea": 0, "hansen": 0, "lass": 0, "journal": 0, "open": 0, "sourc": [0, 1, 4], "softwar": 0, "volum": 0, "8": [0, 6, 7], "number": [0, 2, 6, 7, 8], "83": 0, "page": [0, 2], "5197": 0, "year": [0, 6, 7], "2023": 0, "Or": [0, 7], "prefer": 0, "apa": 0, "m": 0, "k": 0, "j": 0, "l": 0, "come": [0, 7], "an": [0, 1, 2, 6, 7, 8], "extens": 0, "In": [0, 1, 6, 7], "order": [0, 5], "ll": [0, 6, 7], "usual": 0, "want": [0, 6, 7, 8], "clone": 0, "repositori": 0, "build": 0, "also": [0, 5, 6], "instal": [0, 6, 7], "requir": [0, 1, 2, 4, 6, 7], "develop": 0, "depend": 0, "util": 0, "defin": [0, 1, 4, 8], "pyproject": 0, "toml": 0, "pip": [0, 3], "e": [0, 1, 2, 6, 7, 8], "dev": 0, "pytest": 0, "which": [0, 1, 2, 4, 5, 6, 7], "all": [0, 6, 7, 8], "folder": 0, "specif": [0, 4, 5, 7, 8], "can": [0, 1, 2, 5, 6, 7, 8], "desired_test": 0, "py": [0, 6, 7], "sphinx": 0, "It": [0, 6], "furo": 0, "theme": 0, "custom": 0, "style": [0, 6, 7], "To": [0, 2, 3, 5, 6, 7, 8], "make": [0, 2, 6, 7, 8], "doc": [0, 6], "text": [0, 5], "html": 0, "c": [0, 2, 7], "class": [1, 4, 6, 7], "coercedfloat": 1, "lookperiod": [1, 7], "fallback": [1, 6, 7, 8], "union": [1, 4], "float": 1, "int": [1, 4], "base": [1, 4], "object": [1, 4, 6, 7], "min_dai": [1, 6, 7], "max_dai": [1, 6, 7], "outcomespec": [1, 4, 6], "timeseries_df": [1, 6], "datafram": [1, 2, 4, 6, 7, 8], "feature_base_nam": [1, 6, 7], "str": [1, 4, 8], "lookahead_dai": [1, 6], "tupl": [1, 6], "aggregation_fn": [1, 6, 7, 8], "callabl": 1, "dataframegroupbi": 1, "incid": [1, 6], "bool": [1, 4], "prefix": [1, 4, 6], "outc": [1, 4, 6], "basemodel": [1, 4], "outcom": [1, 2, 5, 7], "paramet": [1, 4, 6], "valu": [1, 2, 4, 6, 7, 8], "should": [1, 4, 6, 8], "contain": [1, 4, 6, 8], "column": [1, 2, 4, 6, 7, 8], "entity_id": [1, 4, 6, 7, 8], "id": [1, 2, 6, 8], "entiti": [1, 6], "each": [1, 2, 5, 6, 8], "belong": 1, "The": [1, 2, 5, 6, 7], "timeseri": [1, 4, 6], "timestamp": [1, 4, 6, 7, 8], "datetim": [1, 6, 7], "note": [1, 6, 7, 8], "name": [1, 4, 6, 7, 8], "overridden": 1, "when": [1, 2, 6, 7], "initialis": 1, "gener": [1, 2, 5, 6, 7], "g": [1, 2, 6, 7, 8], "_": [1, 6, 7], "feature_baase_nam": 1, "metadata": [1, 4, 6, 8], "interv": [1, 6], "predict": [1, 2, 4, 5, 7, 8], "look": [1, 2, 6, 7], "two": [1, 4, 6, 7], "specifi": [1, 2, 5, 7], "resolv": 1, "0": [1, 6, 7, 8], "how": [1, 2, 5, 7, 8], "aggreg": [1, 2, 6], "multipl": [1, 2, 6, 7, 8], "within": [1, 2, 6, 8], "lookahead": [1, 2, 6], "dai": [1, 6, 8], "take": [1, 4, 6, 7, 8], "group": [1, 7, 8], "input": 1, "return": [1, 4, 6, 7, 8], "singl": [1, 2, 6], "i": [1, 2, 3, 6, 7, 8], "found": [1, 6], "window": [1, 2, 6, 7], "whether": [1, 6], "type": [1, 2, 4, 6, 7, 8], "2": [1, 6, 7, 8], "diabet": [1, 6], "becaus": [1, 4, 6, 7], "onli": [1, 2, 6, 7, 8], "experi": [1, 6], "onc": [1, 6], "handl": [1, 6], "vectoris": 1, "wai": 1, "dure": 1, "resolut": 1, "faster": [1, 6, 7, 8], "than": [1, 2, 6], "non": 1, "occur": [1, 2, 6], "feature_nam": [1, 7], "default": [1, 4, 6], "pred": [1, 4, 6], "get_output_col_nam": 1, "get": [1, 3, 4], "output": [1, 4, 7, 8], "is_dichotom": 1, "check": [1, 6, 7, 8], "dichotom": 1, "properti": [1, 6, 7], "lookahead_period": 1, "model_computed_field": [1, 4], "classvar": [1, 4], "dict": [1, 4], "computedfieldinfo": [1, 4], "dictionari": [1, 4], "comput": [1, 4, 6, 7, 8], "field": [1, 4], "correspond": [1, 4, 6, 8], "model_config": [1, 4], "configdict": [1, 4], "arbitrary_types_allow": [1, 4], "true": [1, 2, 4, 6, 7], "extra": [1, 6, 7], "forbid": 1, "frozen": 1, "configur": [1, 4], "model": [1, 2, 4, 6, 8], "conform": [1, 4], "pydant": [1, 4], "config": [1, 4], "model_field": [1, 4], "fieldinfo": [1, 4], "annot": [1, 4], "list": [1, 4, 5, 6, 7, 8], "fals": [1, 4, 6, 8], "about": [1, 4, 6], "map": [1, 4], "thi": [1, 2, 4, 6, 7, 8], "replac": [1, 4], "__fields__": [1, 4], "v1": [1, 4], "predictorspec": [1, 4, 6], "lookbehind_dai": [1, 6, 7, 8], "predictor": [1, 2, 4, 5, 7], "lookbehind": [1, 2, 6, 7, 8], "lookbehind_period": [1, 7], "staticspec": [1, 4, 6], "static": [1, 5, 7], "can_be_coerced_losslessly_to_int": 1, "coerce_float": 1, "get_temporal_col_nam": 1, "tempor": [1, 5, 7, 8], "packag": [2, 5, 6, 7], "data": [2, 4, 5, 7, 8], "machin": 2, "learn": 2, "implement": [2, 7], "method": [2, 4, 7], "includ": 2, "convert": [2, 8], "ani": [2, 4, 6, 7, 8], "irregular": [2, 6], "row": [2, 6, 7, 8], "desir": 2, "construct": 2, "raw": 2, "ar": [2, 4, 6, 7, 8], "allow": [2, 4, 7], "patient": [2, 6, 8], "independ": 2, "set": [2, 4, 6], "particular": 2, "sever": [2, 8], "choic": 2, "one": [2, 4, 6, 7, 8], "need": [2, 6, 7, 8], "issu": [2, 6], "everi": [2, 6, 7], "physic": 2, "visit": 2, "morn": 2, "anoth": [2, 6], "clinic": [2, 8], "meaning": 2, "far": [2, 6, 8], "back": [2, 6], "ahead": [2, 6], "exist": 2, "point": [2, 6], "abov": [2, 6, 7, 8], "figur": 2, "graphic": 2, "repres": [2, 6], "terminologi": [2, 6], "determin": [2, 6], "wherea": 2, "futur": [2, 6], "refer": [2, 6], "b": 2, "label": [2, 6], "neg": 2, "never": [2, 6], "happen": [2, 6], "outsid": [2, 6], "posit": [2, 6], "insid": [2, 6], "exampl": [2, 6, 7, 8], "mean": [2, 6, 7, 8], "shown": [2, 6], "max": [2, 6], "min": [2, 6], "etc": [2, 6], "d": 2, "drop": [2, 6, 7, 8], "extend": [2, 6], "further": [2, 4, 6], "start": [2, 3, 6, 7, 8], "dataset": [2, 4, 5, 6, 7], "end": [2, 6, 7], "behaviour": 2, "option": [2, 4], "obtain": 2, "rich": 2, "represent": 2, "see": [2, 4, 6], "tutori": [2, 4, 8], "placehold": 2, "case": [2, 6], "report": 2, "request": 2, "github": [2, 3], "tracker": 2, "otherwis": 2, "discuss": [2, 6], "forum": 2, "bug": 2, "idea": 2, "usag": 2, "index": 2, "run": [3, 5], "follow": [3, 6], "line": [3, 6, 7], "termin": 3, "There": [3, 6, 7, 8], "discrep": 3, "between": 3, "latest": 3, "version": [3, 6, 7], "flatten": [4, 5, 8], "describ": [4, 6, 8], "speccollect": 4, "outcome_spec": [4, 6], "predictor_spec": 4, "static_spec": 4, "collect": 4, "spec": [4, 6, 7, 8], "prediction_times_df": [4, 6, 7, 8], "drop_pred_times_with_insufficient_look_dist": [4, 6, 7, 8], "cach": [4, 5], "featurecach": [4, 7], "none": [4, 6, 7], "entity_id_col_nam": [4, 6, 7, 8], "timestamp_col_nam": [4, 6, 7, 8], "predictor_col_name_prefix": 4, "outcome_col_name_prefix": 4, "n_worker": [4, 6, 7, 8], "60": [4, 7], "log_to_stdout": 4, "turn": [4, 8], "tabular": [4, 8], "add_ag": 4, "date_of_birth_df": 4, "date_of_birth_col_nam": 4, "date_of_birth": 4, "output_prefix": 4, "add": [4, 6, 7], "ag": 4, "ha": [4, 6, 7, 8], "its": [4, 6], "own": [4, 7], "function": [4, 6, 8], "veri": 4, "frequent": [4, 6], "match": 4, "self": [4, 6, 7], "add_spec": [4, 6, 7, 8], "sequenc": [4, 7], "queue": 4, "unprocess": [4, 6, 7, 8], "process": [4, 6, 7, 8], "until": 4, "call": [4, 6, 7], "get_df": [4, 6, 7, 8], "u": 4, "more": [4, 6, 7], "effecti": 4, "parallelis": 4, "most": [4, 6, 7], "complex": 4, "li": 4, "For": [4, 6, 7, 8], "document": 4, "those": 4, "present": [4, 6], "we": [5, 6, 7, 8], "recommend": 5, "go": [5, 6], "through": 5, "below": 5, "jupyt": 5, "notebook": 5, "download": 5, "local": [5, 6, 7], "introductori": 5, "load": [5, 7, 8], "advanc": [5, 6], "creat": [5, 6, 8], "combin": 5, "ad": [5, 6], "embed": 5, "especi": 6, "help": 6, "have": [6, 7, 8], "complic": 6, "train": 6, "simpl": 6, "explain": 6, "appli": 6, "consist": 6, "3": [6, 7, 8], "step": 6, "": [6, 7, 8], "simplest": 6, "first": [6, 7, 8], "predictin": 6, "element": 6, "context": 6, "skimpi": [6, 7], "import": [6, 7, 8], "skim": [6, 7], "test": [6, 7, 8], "load_synth_data": [6, 7, 8], "load_synth_prediction_tim": [6, 7, 8], "df_prediction_tim": 6, "sort_valu": 6, "summari": [6, 7], "count": [6, 7], "10000": [6, 7], "int64": [6, 7], "1": [6, 7, 8], "datetime64": [6, 7], "column_nam": [6, 7], "na": [6, 7, 8], "sd": [6, 7], "p0": [6, 7], "p25": [6, 7], "p50": [6, 7], "p75": [6, 7], "p100": [6, 7], "hist": [6, 7], "5000": [6, 7], "2900": [6, 7], "2500": 6, "4900": [6, 7], "7400": [6, 7], "last": [6, 7, 8], "frequenc": [6, 7], "1965": [6, 8], "01": [6, 7], "02": [6, 7, 8], "09": [6, 7, 8], "35": 6, "00": [6, 7, 8], "1969": [6, 7, 8], "12": [6, 7, 8], "31": [6, 7, 8], "21": [6, 7, 8], "42": [6, 7], "628": 6, "11": [6, 8], "55": 6, "2005": 6, "03": [6, 8], "15": [6, 8], "07": [6, 8], "16": [6, 8], "4370": 6, "13": [6, 7, 8], "23": [6, 8], "18": [6, 7, 8], "6152": 6, "1968": [6, 7, 8], "04": [6, 8], "6873": 6, "4": [6, 7, 8], "28": [6, 8], "33": 6, "9688": 6, "9996": 6, "17": [6, 7, 8], "1463": 6, "30": [6, 7, 8], "19": [6, 7, 8], "3952": 6, "9997": 6, "1967": [6, 8], "06": [6, 8], "08": [6, 8], "52": [6, 8], "7926": 6, "9999": 6, "22": [6, 8], "24": 6, "5720": 6, "14": [6, 8], "59": [6, 7], "here": 6, "Then": [6, 7], "our": [6, 7, 8], "differ": [6, 7], "timepoint": 6, "load_synth_predictor_float": [6, 7], "df_synth_predictor": 6, "100000": 6, "float64": [6, 7], "7500": 6, "5": [6, 7, 8], "9": [6, 7], "00015": 6, "7": [6, 7, 8], "10": [6, 7, 8], "37": 6, "95792": 6, "29": [6, 7], "799246": 6, "82592": 6, "05": [6, 7, 8], "6": [6, 7], "630007": 6, "1377": 6, "174793": 6, "28579": 6, "26": [6, 8], "981185": 6, "81247": 6, "44": [6, 7], "970382": 6, "10277": 6, "20": [6, 8], "304568": 6, "74701": 6, "671907": 6, "69566": 6, "41": [6, 8], "250538": 6, "40901": 6, "1966": [6, 8], "924175": 6, "96881": 6, "501553": 6, "again": 6, "could": 6, "sex": 6, "doesn": 6, "t": [6, 7], "chang": 6, "over": 6, "let": [6, 7, 8], "load_synth_sex": 6, "df_synth_sex": 6, "femal": 6, "9994": 6, "9995": 6, "9998": 6, "As": [6, 8], "And": 6, "lastli": 6, "ve": 6, "chosen": 6, "binari": 6, "store": 6, "infer": 6, "do": 6, "sinc": 6, "thei": [6, 7, 8], "section": 6, "load_synth_outcom": 6, "df_synth_outcom": 6, "3103": 6, "5100": 6, "7600": 6, "50": 6, "46": [6, 7], "6253": 6, "9964": 6, "6255": 6, "9966": 6, "6256": 6, "9968": 6, "6257": 6, "9970": 6, "6269": 6, "9992": 6, "53": [6, 7], "per": [6, 7], "now": [6, 7, 8], "recip": 6, "finish": 6, "firstli": 6, "main": 6, "decis": 6, "size": [6, 7], "given": 6, "indic": 6, "code": [6, 7], "panda": [6, 7, 8], "pd": [6, 8], "maximum": [6, 7], "feature_spec": [6, 7, 8], "single_spec": 6, "test_df": 6, "365": [6, 7, 8], "outcome_nam": 6, "argument": 6, "values_df": 6, "decid": 6, "least": 6, "both": 6, "accomplish": 6, "dw_ek_borg": 6, "wa": [6, 8], "mark": 6, "after": 6, "where": 6, "event": 6, "perman": 6, "specifii": 6, "forward": 6, "search": 6, "certain": 6, "period": [6, 8], "befor": [6, 8], "instead": 6, "almost": 6, "entir": 6, "ident": 6, "except": 6, "past": 6, "numpi": [6, 7, 8], "np": [6, 7, 8], "temporal_predictor_spec": 6, "730": [6, 7, 8], "nan": [6, 7, 8], "predictor_nam": 6, "rang": 6, "similar": 6, "instanc": [6, 7], "might": [6, 7, 8], "182": 6, "easili": 6, "pass": [6, 8], "temporal_interval_predictor_spec": 6, "90": 6, "predictor_interval_nam": 6, "slightli": 6, "previou": 6, "provid": 6, "howev": [6, 7, 8], "By": 6, "filter": 6, "easi": 6, "manual": [6, 7], "sex_predictor_spec": 6, "input_col_name_overrid": 6, "df": [6, 7, 8], "tsflatten": 6, "re": [6, 8], "readi": 6, "instanti": 6, "along": 6, "add_": 6, "parallel": [6, 7, 8], "oper": 6, "across": 6, "core": [6, 7], "ts_flatten": [6, 7, 8], "applic": 6, "sai": [6, 7], "month": [6, 7, 8], "would": [6, 8], "compromis": 6, "generalis": 6, "some": [6, 7, 8], "edg": 6, "brief": 6, "2024": [6, 7, 8], "info": [6, 7, 8], "were": [6, 7, 8], "_drop_pred_time_if_insufficient_look_dist": [6, 7], "5999": 6, "99": 6, "worker": [6, 7, 8], "chunksiz": [6, 7, 8], "mai": [6, 7, 8], "progress": [6, 7, 8], "bar": [6, 7, 8], "move": [6, 7, 8], "batch": [6, 7, 8], "much": [6, 7, 8], "total": [6, 7, 8], "perform": [6, 7, 8], "100": [6, 7, 8], "38": 6, "31it": 6, "align": [6, 7, 8], "littl": [6, 7, 8], "while": [6, 7, 8], "minut": [6, 7, 8], "000": [6, 7, 8], "concaten": [6, 7, 8], "Will": [6, 7, 8], "system": [6, 7, 8], "2_000_000": [6, 7, 8], "normal": [6, 7, 8], "took": [6, 7, 8], "004": 6, "second": [6, 7, 8], "merg": [6, 7, 8], "origin": [6, 7, 8], "4001": 6, "string": [6, 7], "2600": [6, 7], "pred_predictor_interv": 6, "2877": 6, "71": 6, "91": 6, "_name_within_30_to_90_d": 6, "ays_mean_fallback_nan": [6, 7], "pred_predictor_name_wit": 6, "72": 6, "097": 6, "hin_730_days_mean_fallb": 6, "ack_nan": 6, "outc_outcome_name_withi": 6, "064": 6, "25": [6, 8], "n_365_days_maximum_fal": 6, "back_0_dichotom": 6, "pred_femal": 6, "49": 6, "39": 6, "word": [6, 7, 8], "prediction_time_uuid": [6, 7, 8], "pred_predictor_interval_name_within_30_to_90_days_mean_fallback_nan": 6, "pred_predictor_name_within_730_days_mean_fallback_nan": 6, "outc_outcome_name_within_365_days_maximum_fallback_0_dichotom": 6, "display": [6, 7], "shorten": [6, 7], "col": [6, 7], "shortened_pr": 6, "pred_x": 6, "shortened_pred_interv": 6, "pred_x_30_to_90": 6, "shortened_outcom": 6, "outc_i": 6, "renam": [6, 7], "pred_predictor_name_within_0_to_730_days_mean_fallback_nan": 6, "outc_outcome_name_within_0_to_365_days_maximum_fallback_0_dichotom": 6, "axi": [6, 7, 8], "set_table_attribut": [6, 7], "font": [6, 7], "14px": [6, 7], "importerror": [6, 7], "traceback": [6, 7], "recent": [6, 7], "cell": [6, 7], "file": [6, 7], "lib": [6, 7], "python3": [6, 7], "site": [6, 7], "frame": [6, 7], "1338": [6, 7], "1318": [6, 7], "1319": [6, 7, 8], "def": [6, 7, 8], "styler": [6, 7], "1320": [6, 7], "1321": [6, 7], "1322": [6, 7], "1336": [6, 7], "tabl": [6, 7], "visual": [6, 7], "user_guid": [6, 7], "ipynb": [6, 7], "1337": [6, 7], "io": [6, 7], "format": [6, 7, 8], "1340": [6, 7], "40": [6, 7], "shared_doc": [6, 7], "_shared_doc": [6, 7], "save_to_buff": [6, 7], "jinja2": [6, 7], "import_optional_depend": [6, 7], "style_rend": [6, 7], "47": [6, 7], "cssproperti": [6, 7], "48": [6, 7], "cssstyle": [6, 7], "56": [6, 7], "refactor_level": [6, 7], "57": [6, 7], "type_check": [6, 7], "compat": [6, 7], "_option": [6, 7], "161": [6, 7], "error": [6, 7], "min_vers": [6, 7], "159": [6, 7], "160": [6, 7], "elif": [6, 7], "rais": [6, 7], "msg": [6, 7], "163": [6, 7], "modul": [6, 7], "newer": [6, 7], "current": [6, 7], "classif": 6, "citizen": 6, "uniqu": 6, "identifi": 6, "prediciton": 6, "pred_": [6, 7], "outc_": 6, "basic": 7, "cover": [7, 8], "expand": 7, "effect": 7, "mani": 7, "so": [7, 8], "iter": 7, "without": 7, "complet": 7, "full": 7, "hand": 7, "rather": 7, "straightforward": 7, "what": 7, "hundr": 7, "amount": 7, "write": 7, "grow": 7, "quit": 7, "substanti": 7, "becom": 7, "consum": 7, "hard": 7, "navig": 7, "solv": 7, "problem": 7, "combinatori": 7, "pprint": 7, "group_spec": [7, 8], "nameddatafram": 7, "predictorgroupspec": [7, 8], "pred_spec_batch": 7, "named_datafram": [7, 8], "synth_predictor_float": 7, "1095": 7, "create_combin": [7, 8], "attribut": 7, "easier": 7, "namedatafram": 7, "exactli": 7, "load_synth_predictor_flaot": 7, "pred_synth_predictor_float_": 7, "result": [7, 8], "good": 7, "small": [7, 8], "highlight": 7, "pred_spec_batch_summari": 7, "pred_spec": 7, "__name__": 7, "print": [7, 8], "f": 7, "len": [7, 8], "know": 7, "bunch": 7, "quickli": 7, "But": 7, "next": 7, "ship": 7, "disk": 7, "pathlib": 7, "path": 7, "feature_cach": 7, "cache_to_disk": 7, "diskcach": 7, "flattened_dataset": 7, "feature_cache_dir": 7, "tmp": 7, "directori": 7, "save": 7, "just": 7, "them": 7, "won": 7, "alreadi": [7, 8], "new": 7, "abstract": 7, "redi": 7, "sql": 7, "everyth": 7, "work": 7, "6053": 7, "64it": 7, "48it": 7, "006": 7, "3947": 7, "pred_synth_predictor_fl": 7, "506": 7, "82": 7, "024": 7, "oat_within_365_to_730_d": 7, "oat_within_1095_days_m": 7, "an_fallback_nan": 7, "oat_within_1095_days_ma": 7, "ximum_fallback_nan": 7, "533": 7, "0084": 7, "oat_within_365_days_mea": 7, "n_fallback_nan": 7, "oat_within_365_days_max": 7, "imum_fallback_nan": 7, "ays_maximum_fallback_na": 7, "n": 7, "pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_1095_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_1095_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan": 7, "pred_col": 7, "startswith": 7, "rename_dict": 7, "enumer": 7, "df_renam": 7, "base_col": 7, "renamed_col": 7, "dealt": 8, "show": 8, "out": 8, "synthet": 8, "other": 8, "load_synth_text": 8, "synth_text": 8, "head": 8, "4647": 8, "went": 8, "induc": 8, "coma": 8, "2007": 8, "taken": 8, "emerg": 8, "departm": 8, "5799": 8, "old": 8, "son": 8, "wh": 8, "had": 8, "been": 8, "left": 8, "bed": 8, "minu": 8, "4234": 8, "allergi": 8, "often": 8, "advantag": 8, "emb": 8, "speed": 8, "up": 8, "block": 8, "tf": 8, "idf": 8, "form": 8, "constraint": 8, "entitiy_id_col": 8, "timestamp_col": 8, "value_col": 8, "purpos": 8, "demonstr": 8, "fit": 8, "captur": 8, "sklearn": 8, "feature_extract": 8, "tfidfvector": 8, "embed_text_to_df": 8, "tfidf_model": 8, "max_featur": 8, "fit_transform": 8, "toarrai": 8, "get_feature_names_out": 8, "embedded_text": 8, "tolist": 8, "metadata_onli": 8, "embedded_text_with_metadata": 8, "concat": 8, "ignor": 8, "175872": 8, "182066": 8, "249848": 8, "158430": 8, "000000": 8, "023042": 8, "311389": 8, "529966": 8, "490203": 8, "479312": 8, "244870": 8, "135282": 8, "064337": 8, "465084": 8, "336859": 8, "151743": 8, "729861": 8, "179161": 8, "192367": 8, "232332": 8, "283402": 8, "336952": 8, "176422": 8, "238416": 8, "646879": 8, "250217": 8, "382277": 8, "165635": 8, "200046": 8, "183015": 8, "261115": 8, "125837": 8, "151906": 8, "205285": 8, "759528": 8, "403961": 8, "098747": 8, "493461": 8, "119196": 8, "272619": 8, "207444": 8, "045256": 8, "183475": 8, "588324": 8, "433253": 8, "235349": 8, "df_with_multiple_values_to_named_datafram": 8, "readili": 8, "suppli": 8, "df_transform": 8, "split": 8, "embedded_df": 8, "name_prefix": 8, "tfidf_": 8, "accord": 8, "inform": 8, "bow": 8, "kept": 8, "tfidf_and": 8, "emb_spec_batch": 8, "62it": 8, "38it": 8, "sake": 8, "dropna": 8, "pred_tfidf_to_within_365_days_mean_fallback_nan": 8, "pred_tfidf_in_within_730_days_mean_fallback_nan": 8, "pred_tfidf_in_within_365_days_mean_fallback_nan": 8, "pred_tfidf_the_within_365_days_mean_fallback_nan": 8, "pred_tfidf_was_within_365_days_mean_fallback_nan": 8, "pred_tfidf_was_within_730_days_mean_fallback_nan": 8, "pred_tfidf_of_within_365_days_mean_fallback_nan": 8, "pred_tfidf_patient_within_730_days_mean_fallback_nan": 8, "pred_tfidf_or_within_365_days_mean_fallback_nan": 8, "pred_tfidf_that_within_730_days_mean_fallback_nan": 8, "pred_tfidf_or_within_730_days_mean_fallback_nan": 8, "pred_tfidf_that_within_365_days_mean_fallback_nan": 8, "pred_tfidf_and_within_365_days_mean_fallback_nan": 8, "pred_tfidf_the_within_730_days_mean_fallback_nan": 8, "pred_tfidf_to_within_730_days_mean_fallback_nan": 8, "pred_tfidf_patient_within_365_days_mean_fallback_nan": 8, "pred_tfidf_of_within_730_days_mean_fallback_nan": 8, "1917": 8, "4977": 8, "284485": 8, "483324": 8, "534890": 8, "086927": 8, "536339": 8, "133722": 8, "221549": 8, "090356": 8, "145809": 8, "2463": 8, "6840": 8, "456030": 8, "258256": 8, "285810": 8, "092896": 8, "573168": 8, "071452": 8, "355142": 8, "096561": 8, "155821": 8, "2580": 8, "639848": 8, "601521": 8, "260680": 8, "401014": 8, "2741": 8, "9832": 8, "36": 8, "164655": 8, "186493": 8, "825558": 8, "335410": 8, "236513": 8, "103195": 8, "128228": 8, "225044": 8, "2931": 8, "7281": 8, "211934": 8, "280049": 8, "464891": 8, "388547": 8, "304425": 8, "332065": 8, "385111": 8, "269251": 8, "289663": 8}, "objects": {"timeseriesflattener.feature_specs": [[1, 0, 0, "-", "single_specs"]], "timeseriesflattener.feature_specs.single_specs": [[1, 1, 1, "", "CoercedFloats"], [1, 1, 1, "", "LookPeriod"], [1, 1, 1, "", "OutcomeSpec"], [1, 1, 1, "", "PredictorSpec"], [1, 1, 1, "", "StaticSpec"], [1, 5, 1, "", "can_be_coerced_losslessly_to_int"], [1, 5, 1, "", "coerce_floats"], [1, 5, 1, "", "get_temporal_col_name"]], "timeseriesflattener.feature_specs.single_specs.CoercedFloats": [[1, 2, 1, "", "fallback"], [1, 2, 1, "", "lookperiod"]], "timeseriesflattener.feature_specs.single_specs.LookPeriod": [[1, 2, 1, "", "max_days"], [1, 2, 1, "", "min_days"]], "timeseriesflattener.feature_specs.single_specs.OutcomeSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "incident"], [1, 3, 1, "", "is_dichotomous"], [1, 2, 1, "", "lookahead_days"], [1, 4, 1, "", "lookahead_period"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.PredictorSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "lookbehind_days"], [1, 4, 1, "", "lookbehind_period"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.StaticSpec": [[1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener": [[4, 0, 0, "-", "flattened_dataset"]], "timeseriesflattener.flattened_dataset": [[4, 1, 1, "", "SpecCollection"], [4, 1, 1, "", "TimeseriesFlattener"]], "timeseriesflattener.flattened_dataset.SpecCollection": [[4, 2, 1, "", "model_computed_fields"], [4, 2, 1, "", "model_config"], [4, 2, 1, "", "model_fields"], [4, 2, 1, "", "outcome_specs"], [4, 2, 1, "", "predictor_specs"], [4, 2, 1, "", "static_specs"]], "timeseriesflattener.flattened_dataset.TimeseriesFlattener": [[4, 3, 1, "", "add_age"], [4, 3, 1, "", "add_spec"], [4, 3, 1, "", "compute"], [4, 3, 1, "", "get_df"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:property", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "property", "Python property"], "5": ["py", "function", "Python function"]}, "titleterms": {"frequent": 0, "ask": [0, 2], "question": [0, 2], "cite": 0, "thi": 0, "packag": 0, "how": [0, 6], "do": 0, "i": 0, "test": 0, "code": 0, "run": 0, "suit": 0, "document": 0, "gener": [0, 8], "featur": [1, 7, 8], "specif": [1, 6], "timeseriesflatten": [1, 2, 4], "feature_spec": 1, "single_spec": 1, "function": 2, "where": 2, "indic": 2, "search": 2, "instal": 3, "flattened_dataset": 4, "tutori": [5, 6, 7], "get": 5, "start": 5, "introductori": 6, "load": 6, "data": 6, "predict": 6, "time": 6, "tempor": 6, "predictor": [6, 8], "static": 6, "outcom": 6, "specifi": 6, "flatten": 6, "advanc": 7, "creat": 7, "combin": 7, "cach": 7, "ad": 8, "text": 8, "The": 8, "dataset": 8, "from": 8, "embed": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Frequently Asked Questions": [[0, "frequently-asked-questions"]], "Citing this package": [[0, "citing-this-package"]], "How do I test the code and run the test suite?": [[0, "how-do-i-test-the-code-and-run-the-test-suite"]], "How is the documentation generated?": [[0, "how-is-the-documentation-generated"]], "Feature specifications": [[1, "feature-specifications"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "timeseriesflattener": [[2, "timeseriesflattener"]], "Functionality": [[2, "functionality"]], "Where to ask questions?": [[2, "where-to-ask-questions"]], "Indices and search": [[2, "indices-and-search"]], "Installation": [[3, "installation"]], "Timeseriesflattener": [[4, "timeseriesflattener"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]], "Tutorials": [[5, "tutorials"]], "Getting started": [[5, null]], "Introductory Tutorial": [[6, "introductory-tutorial"]], "Loading data": [[6, "loading-data"]], "Loading prediction times": [[6, "loading-prediction-times"]], "Loading a temporal predictor": [[6, "loading-a-temporal-predictor"]], "Loading a static predictor": [[6, "loading-a-static-predictor"]], "Loading a temporal outcome": [[6, "loading-a-temporal-outcome"]], "Specifying how to flatten the data": [[6, "specifying-how-to-flatten-the-data"]], "Temporal outcome specification": [[6, "temporal-outcome-specification"]], "Temporal predictor specification": [[6, "temporal-predictor-specification"]], "Static predictor specification": [[6, "static-predictor-specification"]], "Flattening": [[6, "flattening"]], "Advanced Tutorial": [[7, "advanced-tutorial"]], "Creating feature combinations": [[7, "creating-feature-combinations"]], "Caching": [[7, "caching"]], "Adding text features": [[8, "adding-text-features"]], "The dataset": [[8, "the-dataset"]], "Generating predictors from embedded text": [[8, "generating-predictors-from-embedded-text"]]}, "indexentries": {"coercedfloats (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats"]], "lookperiod (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod"]], "outcomespec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec"]], "predictorspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec"]], "staticspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec"]], "aggregation_fn (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.aggregation_fn"]], "aggregation_fn (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.aggregation_fn"]], "can_be_coerced_losslessly_to_int() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.can_be_coerced_losslessly_to_int"]], "coerce_floats() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.coerce_floats"]], "fallback (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.fallback"]], "fallback (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.fallback"]], "fallback (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.fallback"]], "feature_base_name (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.feature_base_name"]], "feature_base_name (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.feature_base_name"]], "feature_base_name (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.feature_base_name"]], "get_output_col_name() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.get_output_col_name"]], "get_output_col_name() (predictorspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.get_output_col_name"]], "get_output_col_name() (staticspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.get_output_col_name"]], "get_temporal_col_name() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.get_temporal_col_name"]], "incident (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.incident"]], "is_dichotomous() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.is_dichotomous"]], "lookahead_days (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_days"]], "lookahead_period (outcomespec property)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_period"]], "lookbehind_days (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_days"]], "lookbehind_period (predictorspec property)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_period"]], "lookperiod (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.lookperiod"]], "max_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.max_days"]], "min_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.min_days"]], "model_computed_fields (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_computed_fields"]], "model_computed_fields (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_computed_fields"]], "model_computed_fields (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_computed_fields"]], "model_config (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_config"]], "model_config (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_config"]], "model_config (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_config"]], "model_fields (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_fields"]], "model_fields (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_fields"]], "model_fields (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_fields"]], "module": [[1, "module-timeseriesflattener.feature_specs.single_specs"], [4, "module-timeseriesflattener.flattened_dataset"]], "prefix (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.prefix"]], "prefix (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.prefix"]], "prefix (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.prefix"]], "timeseries_df (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.timeseries_df"]], "timeseries_df (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.timeseries_df"]], "timeseries_df (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.timeseries_df"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "speccollection (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection"]], "timeseriesflattener (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener"]], "add_age() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_age"]], "add_spec() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_spec"]], "compute() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.compute"]], "get_df() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.get_df"]], "model_computed_fields (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_computed_fields"]], "model_config (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_config"]], "model_fields (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_fields"]], "outcome_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.outcome_specs"]], "predictor_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.predictor_specs"]], "static_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.static_specs"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]]}}) \ No newline at end of file diff --git a/tutorials/01_basic.html b/tutorials/01_basic.html index d279f3b9..7db14a61 100644 --- a/tutorials/01_basic.html +++ b/tutorials/01_basic.html @@ -991,31 +991,31 @@

Flattening -
2024-02-13 09:15:54 [INFO] There were unprocessed specs, computing...
+
2024-02-13 09:19:14 [INFO] There were unprocessed specs, computing...
 
-
2024-02-13 09:15:54 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 5999 (59.99%) rows
+
2024-02-13 09:19:14 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 5999 (59.99%) rows
 
-
2024-02-13 09:15:54 [INFO] Processing 3 temporal features in parallel with 1 workers. Chunksize is 3. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
+
2024-02-13 09:19:14 [INFO] Processing 3 temporal features in parallel with 1 workers. Chunksize is 3. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
 
  0%|          | 0/3 [00:00<?, ?it/s]
 
-
100%|██████████| 3/3 [00:00<00:00, 36.77it/s]
+
100%|██████████| 3/3 [00:00<00:00, 38.31it/s]
 
-
2024-02-13 09:15:54 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
+
2024-02-13 09:19:14 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
 
-
2024-02-13 09:15:54 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
+
2024-02-13 09:19:14 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
 
-
2024-02-13 09:15:54 [INFO] Concatenation took 0.004 seconds
+
2024-02-13 09:19:14 [INFO] Concatenation took 0.004 seconds
 
-
2024-02-13 09:15:54 [INFO] Merging with original df
+
2024-02-13 09:19:14 [INFO] Merging with original df
 
╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮
@@ -1033,15 +1033,15 @@ 

Flattening column_name ┃ NA NA % mean sd p0 p25 p50 p75 p100 hist ┃ │ │ ┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━╇━━━━━━━╇━━━━━━━━┩ │ │ │ entity_id 0 0 5000 2900 3 2600 5000750010000▇▇▇▇▇▇ │ │ -│ │ outc_outcome_name_withi 0 0 0.064 0.25 0 0 0 0 1▇ ▁ │ │ -│ │ n_365_days_maximum_fall │ │ │ │ │ │ │ │ │ │ │ │ -│ │ back_0_dichotomous │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_predictor_interval 2877 71.91 5 2.8 0.02 2.6 5.1 7.4 10▇▇▇▇▇▇ │ │ │ │ _name_within_30_to_90_d │ │ │ │ │ │ │ │ │ │ │ │ │ │ ays_mean_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_predictor_name_wit 72 1.8 5 1.6 0.097 3.9 5 6 9.9▁▃▇▇▃▁ │ │ │ │ hin_730_days_mean_fallb │ │ │ │ │ │ │ │ │ │ │ │ │ │ ack_nan │ │ │ │ │ │ │ │ │ │ │ │ +│ │ outc_outcome_name_withi 0 0 0.064 0.25 0 0 0 0 1▇ ▁ │ │ +│ │ n_365_days_maximum_fall │ │ │ │ │ │ │ │ │ │ │ │ +│ │ back_0_dichotomous │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_female 0 0 0.49 0.5 0 0 0 1 1▇ ▇ │ │ │ └─────────────────────────┴───────┴────────┴────────┴───────┴────────┴───────┴───────┴──────┴───────┴────────┘ │ │ datetime │ @@ -1061,9 +1061,9 @@

Flattening
['entity_id',
  'timestamp',
  'prediction_time_uuid',
- 'outc_outcome_name_within_365_days_maximum_fallback_0_dichotomous',
  'pred_predictor_interval_name_within_30_to_90_days_mean_fallback_nan',
  'pred_predictor_name_within_730_days_mean_fallback_nan',
+ 'outc_outcome_name_within_365_days_maximum_fallback_0_dichotomous',
  'pred_female']
 

diff --git a/tutorials/02_advanced.html b/tutorials/02_advanced.html index 4f18e972..1aba8d65 100644 --- a/tutorials/02_advanced.html +++ b/tutorials/02_advanced.html @@ -375,37 +375,37 @@

Caching -
2024-02-13 09:15:57 [INFO] There were unprocessed specs, computing...
+
2024-02-13 09:19:17 [INFO] There were unprocessed specs, computing...
 
-
2024-02-13 09:15:57 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 6053 (60.53%) rows
+
2024-02-13 09:19:17 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 6053 (60.53%) rows
 
-
2024-02-13 09:15:57 [INFO] Processing 6 temporal features in parallel with 4 workers. Chunksize is 2. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
+
2024-02-13 09:19:17 [INFO] Processing 6 temporal features in parallel with 4 workers. Chunksize is 2. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
 
  0%|          | 0/6 [00:00<?, ?it/s]
 
-
 50%|█████     | 3/6 [00:00<00:00, 21.72it/s]
+
 17%|█▋        | 1/6 [00:00<00:00,  9.64it/s]
 
-
100%|██████████| 6/6 [00:00<00:00, 40.17it/s]
+
100%|██████████| 6/6 [00:00<00:00, 42.48it/s]
 

 
-
2024-02-13 09:15:57 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
+
2024-02-13 09:19:17 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
 
-
2024-02-13 09:15:57 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
+
2024-02-13 09:19:17 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
 
-
2024-02-13 09:15:57 [INFO] Concatenation took 0.006 seconds
+
2024-02-13 09:19:17 [INFO] Concatenation took 0.006 seconds
 
-
2024-02-13 09:15:57 [INFO] Merging with original df
+
2024-02-13 09:19:17 [INFO] Merging with original df
 
@@ -434,6 +434,18 @@

Caching column_name ┃ NA NA % mean sd p0 p25 p50 p75 p100 hist ┃ │ │ ┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━┩ │ │ │ entity_id 0 0 5000 2900 0 2600 4900 740010000▇▇▇▇▇▇ │ │ +│ │ pred_synth_predictor_fl 506 12.82 5.1 2.2 0.024 3.6 5 6.5 10▂▅▇▇▅▂ │ │ +│ │ oat_within_365_to_730_d │ │ │ │ │ │ │ │ │ │ │ │ +│ │ ays_mean_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ +│ │ pred_synth_predictor_fl 7 0.18 5 1.3 0.29 4.1 5 5.8 9.9 ▂▇▇▁ │ │ +│ │ oat_within_1095_days_me │ │ │ │ │ │ │ │ │ │ │ │ +│ │ an_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ +│ │ pred_synth_predictor_fl 7 0.18 8.4 1.5 0.29 7.8 8.9 9.5 10 ▁▃▇ │ │ +│ │ oat_within_1095_days_ma │ │ │ │ │ │ │ │ │ │ │ │ +│ │ ximum_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ +│ │ pred_synth_predictor_fl 533 13.5 5 2.1 0.0084 3.6 5 6.4 9.9▂▅▇▇▅▂ │ │ +│ │ oat_within_365_days_mea │ │ │ │ │ │ │ │ │ │ │ │ +│ │ n_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_synth_predictor_fl 533 13.5 6.6 2.6 0.0084 4.8 7.3 8.8 10▁▂▃▃▆▇ │ │ │ │ oat_within_365_days_max │ │ │ │ │ │ │ │ │ │ │ │ │ │ imum_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ @@ -441,18 +453,6 @@

Cachingoat_within_365_to_730_d │ │ │ │ │ │ │ │ │ │ │ │ │ │ ays_maximum_fallback_na │ │ │ │ │ │ │ │ │ │ │ │ │ │ n │ │ │ │ │ │ │ │ │ │ │ │ -│ │ pred_synth_predictor_fl 533 13.5 5 2.1 0.0084 3.6 5 6.4 9.9▂▅▇▇▅▂ │ │ -│ │ oat_within_365_days_mea │ │ │ │ │ │ │ │ │ │ │ │ -│ │ n_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ -│ │ pred_synth_predictor_fl 7 0.18 8.4 1.5 0.29 7.8 8.9 9.5 10 ▁▃▇ │ │ -│ │ oat_within_1095_days_ma │ │ │ │ │ │ │ │ │ │ │ │ -│ │ ximum_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ -│ │ pred_synth_predictor_fl 506 12.82 5.1 2.2 0.024 3.6 5 6.5 10▂▅▇▇▅▂ │ │ -│ │ oat_within_365_to_730_d │ │ │ │ │ │ │ │ │ │ │ │ -│ │ ays_mean_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ -│ │ pred_synth_predictor_fl 7 0.18 5 1.3 0.29 4.1 5 5.8 9.9 ▂▇▇▁ │ │ -│ │ oat_within_1095_days_me │ │ │ │ │ │ │ │ │ │ │ │ -│ │ an_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ └─────────────────────────┴──────┴────────┴───────┴───────┴─────────┴───────┴───────┴───────┴───────┴────────┘ │ │ datetime │ │ ┏━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ │ @@ -471,12 +471,12 @@

Caching
['entity_id',
  'timestamp',
  'prediction_time_uuid',
- 'pred_synth_predictor_float_within_365_days_maximum_fallback_nan',
- 'pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan',
- 'pred_synth_predictor_float_within_365_days_mean_fallback_nan',
- 'pred_synth_predictor_float_within_1095_days_maximum_fallback_nan',
  'pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan',
- 'pred_synth_predictor_float_within_1095_days_mean_fallback_nan']
+ 'pred_synth_predictor_float_within_1095_days_mean_fallback_nan',
+ 'pred_synth_predictor_float_within_1095_days_maximum_fallback_nan',
+ 'pred_synth_predictor_float_within_365_days_mean_fallback_nan',
+ 'pred_synth_predictor_float_within_365_days_maximum_fallback_nan',
+ 'pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan']
 

diff --git a/tutorials/03_text.html b/tutorials/03_text.html index 57895867..9af0b276 100644 --- a/tutorials/03_text.html +++ b/tutorials/03_text.html @@ -622,31 +622,31 @@

Generating predictors from embedded text -
2024-02-13 09:16:00 [INFO] There were unprocessed specs, computing...
+
2024-02-13 09:19:20 [INFO] There were unprocessed specs, computing...
 
-
2024-02-13 09:16:00 [INFO] Processing 20 temporal features in parallel with 1 workers. Chunksize is 20. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
+
2024-02-13 09:19:20 [INFO] Processing 20 temporal features in parallel with 1 workers. Chunksize is 20. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
 
  0%|          | 0/20 [00:00<?, ?it/s]
 
-
  5%|▌         | 1/20 [00:00<00:07,  2.54it/s]
+
  5%|▌         | 1/20 [00:00<00:07,  2.62it/s]
 
-
100%|██████████| 20/20 [00:00<00:00, 50.69it/s]
+
100%|██████████| 20/20 [00:00<00:00, 52.38it/s]
 
-
2024-02-13 09:16:01 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
+
2024-02-13 09:19:21 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
 
-
2024-02-13 09:16:01 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
+
2024-02-13 09:19:21 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
 
-
2024-02-13 09:16:01 [INFO] Concatenation took 0.03 seconds
+
2024-02-13 09:19:21 [INFO] Concatenation took 0.03 seconds
 
-
2024-02-13 09:16:01 [INFO] Merging with original df
+
2024-02-13 09:19:21 [INFO] Merging with original df
 
@@ -681,24 +681,24 @@

Generating predictors from embedded text