diff --git a/searchindex.js b/searchindex.js index c47d94c5..53a1ab80 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["faq", "feature_specifications", "index", "installation", "timeseriesflattener", "tutorials", "tutorials/01_basic", "tutorials/02_advanced", "tutorials/03_text"], "filenames": ["faq.rst", "feature_specifications.rst", "index.rst", "installation.rst", "timeseriesflattener.rst", "tutorials.rst", "tutorials/01_basic.ipynb", "tutorials/02_advanced.ipynb", "tutorials/03_text.ipynb"], "titles": ["Frequently Asked Questions", "Feature specifications", "timeseriesflattener", "Installation", "Timeseriesflattener", "Tutorials", "Introductory Tutorial", "Advanced Tutorial", "Adding text features"], "terms": {"If": [0, 6, 7, 8], "you": [0, 1, 4, 5, 6, 7, 8], "wish": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 8], "librari": 0, "your": [0, 3, 6, 7, 8], "research": 0, "pleas": [0, 2], "joss": 0, "paper": 0, "articl": 0, "bernstorff2023timeseriesflatten": 0, "titl": 0, "timeseriesflatten": [0, 3, 6, 7, 8], "A": [0, 1, 2, 4, 6, 7], "python": [0, 2], "summar": 0, "featur": [0, 2, 5, 6], "from": [0, 1, 2, 4, 5, 6, 7], "medic": [0, 2, 6, 8], "time": [0, 1, 2, 4, 5, 7, 8], "seri": [0, 1, 2, 4, 6], "author": 0, "bernstorff": 0, "martin": 0, "enevoldsen": 0, "kenneth": 0, "damgaard": 0, "jakob": 0, "danielsen": 0, "andrea": 0, "hansen": 0, "lass": 0, "journal": 0, "open": 0, "sourc": [0, 1, 4], "softwar": 0, "volum": 0, "8": [0, 6, 7], "number": [0, 2, 6, 7, 8], "83": 0, "page": [0, 2], "5197": 0, "year": [0, 6, 7], "2023": 0, "Or": [0, 7], "prefer": 0, "apa": 0, "m": 0, "k": 0, "j": 0, "l": 0, "come": [0, 7], "an": [0, 1, 2, 6, 7, 8], "extens": 0, "In": [0, 1, 6, 7], "order": [0, 5], "ll": [0, 6, 7], "usual": 0, "want": [0, 6, 7, 8], "clone": 0, "repositori": 0, "build": 0, "also": [0, 5, 6], "instal": [0, 6, 7], "requir": [0, 1, 2, 4, 6, 7], "develop": 0, "depend": 0, "util": 0, "defin": [0, 1, 4, 8], "pyproject": 0, "toml": 0, "pip": [0, 3], "e": [0, 1, 2, 6, 7, 8], "dev": 0, "pytest": 0, "which": [0, 1, 2, 4, 5, 6, 7], "all": [0, 6, 7, 8], "folder": 0, "specif": [0, 4, 5, 7, 8], "can": [0, 1, 2, 5, 6, 7, 8], "desired_test": 0, "py": [0, 6, 7], "sphinx": 0, "It": [0, 6], "furo": 0, "theme": 0, "custom": 0, "style": [0, 6, 7], "To": [0, 2, 3, 5, 6, 7, 8], "make": [0, 2, 6, 7, 8], "doc": [0, 6], "text": [0, 5], "html": 0, "c": [0, 2, 7], "class": [1, 4, 6, 7], "coercedfloat": 1, "lookperiod": [1, 7], "fallback": [1, 6, 7, 8], "union": [1, 4], "float": 1, "int": [1, 4], "base": [1, 4], "object": [1, 4, 6, 7], "min_dai": [1, 6, 7], "max_dai": [1, 6, 7], "outcomespec": [1, 4, 6], "timeseries_df": [1, 6], "datafram": [1, 2, 4, 6, 7, 8], "feature_base_nam": [1, 6, 7], "str": [1, 4, 8], "lookahead_dai": [1, 6], "tupl": [1, 6], "aggregation_fn": [1, 6, 7, 8], "callabl": 1, "dataframegroupbi": 1, "incid": [1, 6], "bool": [1, 4], "prefix": [1, 4, 6], "outc": [1, 4, 6], "basemodel": [1, 4], "outcom": [1, 2, 5, 7], "paramet": [1, 4, 6], "valu": [1, 2, 4, 6, 7, 8], "should": [1, 4, 6, 8], "contain": [1, 4, 6, 8], "column": [1, 2, 4, 6, 7, 8], "entity_id": [1, 4, 6, 7, 8], "id": [1, 2, 6, 8], "entiti": [1, 6], "each": [1, 2, 5, 6, 8], "belong": 1, "The": [1, 2, 5, 6, 7], "timeseri": [1, 4, 6], "timestamp": [1, 4, 6, 7, 8], "datetim": [1, 6, 7], "note": [1, 6, 7, 8], "name": [1, 4, 6, 7, 8], "overridden": 1, "when": [1, 2, 6, 7], "initialis": 1, "gener": [1, 2, 5, 6, 7], "g": [1, 2, 6, 7, 8], "_": [1, 6, 7], "feature_baase_nam": 1, "metadata": [1, 4, 6, 8], "interv": [1, 6], "predict": [1, 2, 4, 5, 7, 8], "look": [1, 2, 6, 7], "two": [1, 4, 6, 7], "specifi": [1, 2, 5, 7], "resolv": 1, "0": [1, 6, 7, 8], "how": [1, 2, 5, 7, 8], "aggreg": [1, 2, 6], "multipl": [1, 2, 6, 7, 8], "within": [1, 2, 6, 8], "lookahead": [1, 2, 6], "dai": [1, 6, 8], "take": [1, 4, 6, 7, 8], "group": [1, 7, 8], "input": 1, "return": [1, 4, 6, 7, 8], "singl": [1, 2, 6], "i": [1, 2, 3, 6, 7, 8], "found": [1, 6], "window": [1, 2, 6, 7], "whether": [1, 6], "type": [1, 2, 4, 6, 7, 8], "2": [1, 6, 7, 8], "diabet": [1, 6], "becaus": [1, 4, 6, 7], "onli": [1, 2, 6, 7, 8], "experi": [1, 6], "onc": [1, 6], "handl": [1, 6], "vectoris": 1, "wai": 1, "dure": 1, "resolut": 1, "faster": [1, 6, 7, 8], "than": [1, 2, 6], "non": 1, "occur": [1, 2, 6], "feature_nam": [1, 7], "default": [1, 4, 6], "pred": [1, 4, 6], "get_output_col_nam": 1, "get": [1, 3, 4], "output": [1, 4, 7, 8], "is_dichotom": 1, "check": [1, 6, 7, 8], "dichotom": 1, "properti": [1, 6, 7], "lookahead_period": 1, "model_computed_field": [1, 4], "classvar": [1, 4], "dict": [1, 4], "computedfieldinfo": [1, 4], "dictionari": [1, 4], "comput": [1, 4, 6, 7, 8], "field": [1, 4], "correspond": [1, 4, 6, 8], "model_config": [1, 4], "configdict": [1, 4], "arbitrary_types_allow": [1, 4], "true": [1, 2, 4, 6, 7], "extra": [1, 6, 7], "forbid": 1, "frozen": 1, "configur": [1, 4], "model": [1, 2, 4, 6, 8], "conform": [1, 4], "pydant": [1, 4], "config": [1, 4], "model_field": [1, 4], "fieldinfo": [1, 4], "annot": [1, 4], "list": [1, 4, 5, 6, 7, 8], "fals": [1, 4, 6, 8], "about": [1, 4, 6], "map": [1, 4], "thi": [1, 2, 4, 6, 7, 8], "replac": [1, 4], "__fields__": [1, 4], "v1": [1, 4], "predictorspec": [1, 4, 6], "lookbehind_dai": [1, 6, 7, 8], "predictor": [1, 2, 4, 5, 7], "lookbehind": [1, 2, 6, 7, 8], "lookbehind_period": [1, 7], "staticspec": [1, 4, 6], "static": [1, 5, 7], "can_be_coerced_losslessly_to_int": 1, "coerce_float": 1, "get_temporal_col_nam": 1, "tempor": [1, 5, 7, 8], "packag": [2, 5, 6, 7], "data": [2, 4, 5, 7, 8], "machin": 2, "learn": 2, "implement": [2, 7], "method": [2, 4, 7], "includ": 2, "convert": [2, 8], "ani": [2, 4, 6, 7, 8], "irregular": [2, 6], "row": [2, 6, 7, 8], "desir": 2, "construct": 2, "raw": 2, "ar": [2, 4, 6, 7, 8], "allow": [2, 4, 7], "patient": [2, 6, 8], "independ": 2, "set": [2, 4, 6], "particular": 2, "sever": [2, 8], "choic": 2, "one": [2, 4, 6, 7, 8], "need": [2, 6, 7, 8], "issu": [2, 6], "everi": [2, 6, 7], "physic": 2, "visit": 2, "morn": 2, "anoth": [2, 6], "clinic": [2, 8], "meaning": 2, "far": [2, 6, 8], "back": [2, 6], "ahead": [2, 6], "exist": 2, "point": [2, 6], "abov": [2, 6, 7, 8], "figur": 2, "graphic": 2, "repres": [2, 6], "terminologi": [2, 6], "determin": [2, 6], "wherea": 2, "futur": [2, 6], "refer": [2, 6], "b": 2, "label": [2, 6], "neg": 2, "never": [2, 6], "happen": [2, 6], "outsid": [2, 6], "posit": [2, 6], "insid": [2, 6], "exampl": [2, 6, 7, 8], "mean": [2, 6, 7, 8], "shown": [2, 6], "max": [2, 6], "min": [2, 6], "etc": [2, 6], "d": 2, "drop": [2, 6, 7, 8], "extend": [2, 6], "further": [2, 4, 6], "start": [2, 3, 6, 7, 8], "dataset": [2, 4, 5, 6, 7], "end": [2, 6, 7], "behaviour": 2, "option": [2, 4], "obtain": 2, "rich": 2, "represent": 2, "see": [2, 4, 6], "tutori": [2, 4, 8], "placehold": 2, "case": [2, 6], "report": 2, "request": 2, "github": [2, 3], "tracker": 2, "otherwis": 2, "discuss": [2, 6], "forum": 2, "bug": 2, "idea": 2, "usag": 2, "index": 2, "run": [3, 5], "follow": [3, 6], "line": [3, 6, 7], "termin": 3, "There": [3, 6, 7, 8], "discrep": 3, "between": 3, "latest": 3, "version": [3, 6, 7], "flatten": [4, 5, 8], "describ": [4, 6, 8], "speccollect": 4, "outcome_spec": [4, 6], "predictor_spec": 4, "static_spec": 4, "collect": 4, "spec": [4, 6, 7, 8], "prediction_times_df": [4, 6, 7, 8], "drop_pred_times_with_insufficient_look_dist": [4, 6, 7, 8], "cach": [4, 5], "featurecach": [4, 7], "none": [4, 6, 7], "entity_id_col_nam": [4, 6, 7, 8], "timestamp_col_nam": [4, 6, 7, 8], "predictor_col_name_prefix": 4, "outcome_col_name_prefix": 4, "n_worker": [4, 6, 7, 8], "60": [4, 7], "log_to_stdout": 4, "turn": [4, 8], "tabular": [4, 8], "add_ag": 4, "date_of_birth_df": 4, "date_of_birth_col_nam": 4, "date_of_birth": 4, "output_prefix": 4, "add": [4, 6, 7], "ag": 4, "ha": [4, 6, 7, 8], "its": [4, 6], "own": [4, 7], "function": [4, 6, 8], "veri": 4, "frequent": [4, 6], "match": 4, "self": [4, 6, 7], "add_spec": [4, 6, 7, 8], "sequenc": [4, 7], "queue": 4, "unprocess": [4, 6, 7, 8], "process": [4, 6, 7, 8], "until": 4, "call": [4, 6, 7], "get_df": [4, 6, 7, 8], "u": 4, "more": [4, 6, 7], "effecti": 4, "parallelis": 4, "most": [4, 6, 7], "complex": 4, "li": 4, "For": [4, 6, 7, 8], "document": 4, "those": 4, "present": [4, 6], "we": [5, 6, 7, 8], "recommend": 5, "go": [5, 6], "through": 5, "below": 5, "jupyt": 5, "notebook": 5, "download": 5, "local": [5, 6, 7], "introductori": 5, "load": [5, 7, 8], "advanc": [5, 6], "creat": [5, 6, 8], "combin": 5, "ad": [5, 6], "embed": 5, "especi": 6, "help": 6, "have": [6, 7, 8], "complic": 6, "train": 6, "simpl": 6, "explain": 6, "appli": 6, "consist": 6, "3": [6, 7, 8], "step": 6, "": [6, 7, 8], "simplest": 6, "first": [6, 7, 8], "predictin": 6, "element": 6, "context": 6, "skimpi": [6, 7], "import": [6, 7, 8], "skim": [6, 7], "test": [6, 7, 8], "load_synth_data": [6, 7, 8], "load_synth_prediction_tim": [6, 7, 8], "df_prediction_tim": 6, "sort_valu": 6, "summari": [6, 7], "count": [6, 7], "10000": [6, 7], "int64": [6, 7], "1": [6, 7, 8], "datetime64": [6, 7], "column_nam": [6, 7], "na": [6, 7, 8], "sd": [6, 7], "p0": [6, 7], "p25": [6, 7], "p50": [6, 7], "p75": [6, 7], "p100": [6, 7], "hist": [6, 7], "5000": [6, 7], "2900": [6, 7], "2500": 6, "4900": [6, 7], "7400": [6, 7], "last": [6, 7, 8], "frequenc": [6, 7], "1965": [6, 8], "01": [6, 7], "02": [6, 7, 8], "09": [6, 8], "35": 6, "00": [6, 7, 8], "1969": [6, 7, 8], "12": [6, 7, 8], "31": [6, 7, 8], "21": [6, 7, 8], "42": [6, 7], "628": 6, "11": [6, 7, 8], "55": 6, "2005": 6, "03": [6, 8], "15": [6, 8], "07": [6, 8], "16": [6, 8], "4370": 6, "13": [6, 7], "23": [6, 8], "18": [6, 7, 8], "6152": 6, "1968": [6, 7, 8], "04": [6, 8], "6873": 6, "4": [6, 7, 8], "28": [6, 8], "33": [6, 8], "9688": 6, "9996": 6, "17": [6, 7, 8], "1463": 6, "30": [6, 7, 8], "19": [6, 8], "3952": 6, "9997": 6, "1967": [6, 8], "06": [6, 8], "08": [6, 8], "52": 6, "7926": 6, "9999": 6, "22": [6, 8], "24": 6, "5720": 6, "14": [6, 8], "59": [6, 7], "here": 6, "Then": [6, 7], "our": [6, 7, 8], "differ": [6, 7], "timepoint": 6, "load_synth_predictor_float": [6, 7], "df_synth_predictor": 6, "100000": 6, "float64": [6, 7], "7500": 6, "5": [6, 7, 8], "9": [6, 7], "00015": 6, "7": [6, 7, 8], "10": [6, 7, 8], "37": 6, "95792": 6, "29": [6, 7], "799246": 6, "82592": 6, "05": [6, 7, 8], "6": [6, 7], "630007": 6, "1377": 6, "174793": 6, "28579": 6, "26": [6, 8], "981185": 6, "81247": 6, "44": [6, 7], "970382": 6, "10277": 6, "20": [6, 8], "304568": 6, "74701": 6, "671907": 6, "69566": 6, "41": [6, 8], "250538": 6, "40901": 6, "1966": [6, 8], "924175": 6, "96881": 6, "501553": 6, "again": 6, "could": 6, "sex": 6, "doesn": 6, "t": [6, 7], "chang": 6, "over": 6, "let": [6, 7, 8], "load_synth_sex": 6, "df_synth_sex": 6, "femal": 6, "9994": 6, "9995": 6, "9998": 6, "As": [6, 8], "And": 6, "lastli": 6, "ve": 6, "chosen": 6, "binari": 6, "store": 6, "infer": 6, "do": 6, "sinc": 6, "thei": [6, 7, 8], "section": 6, "load_synth_outcom": 6, "df_synth_outcom": 6, "3103": 6, "5100": 6, "7600": 6, "50": 6, "46": [6, 7], "6253": 6, "9964": 6, "6255": 6, "9966": 6, "6256": 6, "9968": 6, "6257": 6, "9970": 6, "6269": 6, "9992": 6, "53": [6, 7], "per": [6, 7], "now": [6, 7, 8], "recip": 6, "finish": 6, "firstli": 6, "main": 6, "decis": 6, "size": [6, 7], "given": 6, "indic": 6, "code": [6, 7], "panda": [6, 7, 8], "pd": [6, 8], "maximum": [6, 7], "feature_spec": [6, 7, 8], "single_spec": 6, "test_df": 6, "365": [6, 7, 8], "outcome_nam": 6, "argument": 6, "values_df": 6, "decid": 6, "least": 6, "both": 6, "accomplish": 6, "dw_ek_borg": 6, "wa": [6, 8], "mark": 6, "after": 6, "where": 6, "event": 6, "perman": 6, "specifii": 6, "forward": 6, "search": 6, "certain": 6, "period": [6, 8], "befor": [6, 8], "instead": 6, "almost": 6, "entir": 6, "ident": 6, "except": 6, "past": 6, "numpi": [6, 7, 8], "np": [6, 7, 8], "temporal_predictor_spec": 6, "730": [6, 7, 8], "nan": [6, 7, 8], "predictor_nam": 6, "rang": 6, "similar": 6, "instanc": [6, 7], "might": [6, 7, 8], "182": 6, "easili": 6, "pass": [6, 8], "temporal_interval_predictor_spec": 6, "90": 6, "predictor_interval_nam": 6, "slightli": 6, "previou": 6, "provid": 6, "howev": [6, 7, 8], "By": 6, "filter": 6, "easi": 6, "manual": [6, 7], "sex_predictor_spec": 6, "input_col_name_overrid": 6, "df": [6, 7, 8], "tsflatten": 6, "re": [6, 8], "readi": 6, "instanti": 6, "along": 6, "add_": 6, "parallel": [6, 7, 8], "oper": 6, "across": 6, "core": [6, 7], "ts_flatten": [6, 7, 8], "applic": 6, "sai": [6, 7], "month": [6, 7, 8], "would": [6, 8], "compromis": 6, "generalis": 6, "some": [6, 7, 8], "edg": 6, "brief": 6, "2024": [6, 7, 8], "48": [6, 7, 8], "info": [6, 7, 8], "were": [6, 7, 8], "_drop_pred_time_if_insufficient_look_dist": [6, 7], "5999": 6, "99": 6, "worker": [6, 7, 8], "chunksiz": [6, 7, 8], "mai": [6, 7, 8], "progress": [6, 7, 8], "bar": [6, 7, 8], "move": [6, 7, 8], "batch": [6, 7, 8], "much": [6, 7, 8], "total": [6, 7, 8], "perform": [6, 7, 8], "100": [6, 7, 8], "40": [6, 7], "71it": [6, 8], "align": [6, 7, 8], "littl": [6, 7, 8], "while": [6, 7, 8], "minut": [6, 7, 8], "000": [6, 7, 8], "concaten": [6, 7, 8], "Will": [6, 7, 8], "system": [6, 7, 8], "2_000_000": [6, 7, 8], "normal": [6, 7, 8], "took": [6, 7, 8], "004": 6, "second": [6, 7, 8], "merg": [6, 7, 8], "origin": [6, 7, 8], "4001": 6, "string": [6, 7], "2600": [6, 7], "pred_predictor_name_wit": 6, "72": 6, "097": 6, "hin_730_days_mean_fallb": 6, "ack_nan": 6, "outc_outcome_name_withi": 6, "064": 6, "25": [6, 8], "n_365_days_maximum_fal": 6, "back_0_dichotom": 6, "pred_predictor_interv": 6, "2877": 6, "71": 6, "91": 6, "_name_within_30_to_90_d": 6, "ays_mean_fallback_nan": [6, 7], "pred_femal": 6, "49": 6, "39": 6, "word": [6, 7, 8], "prediction_time_uuid": [6, 7, 8], "pred_predictor_name_within_730_days_mean_fallback_nan": 6, "outc_outcome_name_within_365_days_maximum_fallback_0_dichotom": 6, "pred_predictor_interval_name_within_30_to_90_days_mean_fallback_nan": 6, "display": [6, 7], "shorten": [6, 7], "col": [6, 7], "shortened_pr": 6, "pred_x": 6, "shortened_pred_interv": 6, "pred_x_30_to_90": 6, "shortened_outcom": 6, "outc_i": 6, "renam": [6, 7], "pred_predictor_name_within_0_to_730_days_mean_fallback_nan": 6, "outc_outcome_name_within_0_to_365_days_maximum_fallback_0_dichotom": 6, "axi": [6, 7, 8], "set_table_attribut": [6, 7], "font": [6, 7], "14px": [6, 7], "importerror": [6, 7], "traceback": [6, 7], "recent": [6, 7], "cell": [6, 7], "file": [6, 7], "lib": [6, 7], "python3": [6, 7], "site": [6, 7], "frame": [6, 7], "1338": [6, 7], "1318": [6, 7], "1319": [6, 7, 8], "def": [6, 7, 8], "styler": [6, 7], "1320": [6, 7], "1321": [6, 7], "1322": [6, 7], "1336": [6, 7], "tabl": [6, 7], "visual": [6, 7], "user_guid": [6, 7], "ipynb": [6, 7], "1337": [6, 7], "io": [6, 7], "format": [6, 7, 8], "1340": [6, 7], "shared_doc": [6, 7], "_shared_doc": [6, 7], "save_to_buff": [6, 7], "jinja2": [6, 7], "import_optional_depend": [6, 7], "style_rend": [6, 7], "47": [6, 7], "cssproperti": [6, 7], "cssstyle": [6, 7], "56": [6, 7], "refactor_level": [6, 7], "57": [6, 7], "type_check": [6, 7], "compat": [6, 7], "_option": [6, 7], "161": [6, 7], "error": [6, 7], "min_vers": [6, 7], "159": [6, 7], "160": [6, 7], "elif": [6, 7], "rais": [6, 7], "msg": [6, 7], "163": [6, 7], "modul": [6, 7], "newer": [6, 7], "current": [6, 7], "classif": 6, "citizen": 6, "uniqu": 6, "identifi": 6, "prediciton": 6, "pred_": [6, 7], "outc_": 6, "basic": 7, "cover": [7, 8], "expand": 7, "effect": 7, "mani": 7, "so": [7, 8], "iter": 7, "without": 7, "complet": 7, "full": 7, "hand": 7, "rather": 7, "straightforward": 7, "what": 7, "hundr": 7, "amount": 7, "write": 7, "grow": 7, "quit": 7, "substanti": 7, "becom": 7, "consum": 7, "hard": 7, "navig": 7, "solv": 7, "problem": 7, "combinatori": 7, "pprint": 7, "group_spec": [7, 8], "nameddatafram": 7, "predictorgroupspec": [7, 8], "pred_spec_batch": 7, "named_datafram": [7, 8], "synth_predictor_float": 7, "1095": 7, "create_combin": [7, 8], "attribut": 7, "easier": 7, "namedatafram": 7, "exactli": 7, "load_synth_predictor_flaot": 7, "pred_synth_predictor_float_": 7, "result": [7, 8], "good": 7, "small": [7, 8], "highlight": 7, "pred_spec_batch_summari": 7, "pred_spec": 7, "__name__": 7, "print": [7, 8], "f": 7, "len": [7, 8], "know": 7, "bunch": 7, "quickli": 7, "But": 7, "next": 7, "ship": 7, "disk": 7, "pathlib": 7, "path": 7, "feature_cach": 7, "cache_to_disk": 7, "diskcach": 7, "flattened_dataset": 7, "feature_cache_dir": 7, "tmp": 7, "directori": 7, "save": 7, "just": 7, "them": 7, "won": 7, "alreadi": [7, 8], "new": 7, "abstract": 7, "redi": 7, "sql": 7, "everyth": 7, "work": 7, "6053": 7, "96it": 7, "04it": 7, "006": 7, "3947": 7, "pred_synth_predictor_fl": 7, "506": 7, "82": 7, "024": 7, "oat_within_365_to_730_d": 7, "oat_within_1095_days_m": 7, "an_fallback_nan": 7, "533": 7, "0084": 7, "oat_within_365_days_max": 7, "imum_fallback_nan": 7, "oat_within_365_days_mea": 7, "n_fallback_nan": 7, "ays_maximum_fallback_na": 7, "n": 7, "oat_within_1095_days_ma": 7, "ximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_1095_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_1095_days_maximum_fallback_nan": 7, "pred_col": 7, "startswith": 7, "rename_dict": 7, "enumer": 7, "df_renam": 7, "base_col": 7, "renamed_col": 7, "dealt": 8, "show": 8, "out": 8, "synthet": 8, "other": 8, "load_synth_text": 8, "synth_text": 8, "head": 8, "4647": 8, "went": 8, "induc": 8, "coma": 8, "2007": 8, "taken": 8, "emerg": 8, "departm": 8, "5799": 8, "old": 8, "son": 8, "wh": 8, "had": 8, "been": 8, "left": 8, "bed": 8, "minu": 8, "4234": 8, "allergi": 8, "often": 8, "advantag": 8, "emb": 8, "speed": 8, "up": 8, "block": 8, "tf": 8, "idf": 8, "form": 8, "constraint": 8, "entitiy_id_col": 8, "timestamp_col": 8, "value_col": 8, "purpos": 8, "demonstr": 8, "fit": 8, "captur": 8, "sklearn": 8, "feature_extract": 8, "tfidfvector": 8, "embed_text_to_df": 8, "tfidf_model": 8, "max_featur": 8, "fit_transform": 8, "toarrai": 8, "get_feature_names_out": 8, "embedded_text": 8, "tolist": 8, "metadata_onli": 8, "embedded_text_with_metadata": 8, "concat": 8, "ignor": 8, "175872": 8, "182066": 8, "249848": 8, "158430": 8, "000000": 8, "023042": 8, "311389": 8, "529966": 8, "490203": 8, "479312": 8, "244870": 8, "135282": 8, "064337": 8, "465084": 8, "336859": 8, "151743": 8, "729861": 8, "179161": 8, "192367": 8, "232332": 8, "283402": 8, "336952": 8, "176422": 8, "238416": 8, "646879": 8, "250217": 8, "382277": 8, "165635": 8, "200046": 8, "183015": 8, "261115": 8, "125837": 8, "151906": 8, "205285": 8, "759528": 8, "403961": 8, "098747": 8, "493461": 8, "119196": 8, "272619": 8, "207444": 8, "045256": 8, "183475": 8, "588324": 8, "433253": 8, "235349": 8, "df_with_multiple_values_to_named_datafram": 8, "readili": 8, "suppli": 8, "df_transform": 8, "split": 8, "embedded_df": 8, "name_prefix": 8, "tfidf_": 8, "accord": 8, "inform": 8, "bow": 8, "kept": 8, "tfidf_and": 8, "emb_spec_batch": 8, "32": 8, "54": 8, "13it": 8, "029": 8, "sake": 8, "dropna": 8, "pred_tfidf_and_within_365_days_mean_fallback_nan": 8, "pred_tfidf_in_within_365_days_mean_fallback_nan": 8, "pred_tfidf_was_within_365_days_mean_fallback_nan": 8, "pred_tfidf_in_within_730_days_mean_fallback_nan": 8, "pred_tfidf_or_within_365_days_mean_fallback_nan": 8, "pred_tfidf_or_within_730_days_mean_fallback_nan": 8, "pred_tfidf_that_within_730_days_mean_fallback_nan": 8, "pred_tfidf_patient_within_365_days_mean_fallback_nan": 8, "pred_tfidf_of_within_730_days_mean_fallback_nan": 8, "pred_tfidf_for_within_730_days_mean_fallback_nan": 8, "pred_tfidf_to_within_730_days_mean_fallback_nan": 8, "pred_tfidf_the_within_730_days_mean_fallback_nan": 8, "pred_tfidf_and_within_730_days_mean_fallback_nan": 8, "pred_tfidf_of_within_365_days_mean_fallback_nan": 8, "pred_tfidf_the_within_365_days_mean_fallback_nan": 8, "pred_tfidf_that_within_365_days_mean_fallback_nan": 8, "pred_tfidf_was_within_730_days_mean_fallback_nan": 8, "1917": 8, "4977": 8, "145809": 8, "483324": 8, "086927": 8, "221549": 8, "090356": 8, "133722": 8, "536339": 8, "088050": 8, "284485": 8, "534890": 8, "2463": 8, "6840": 8, "155821": 8, "258256": 8, "092896": 8, "355142": 8, "096561": 8, "071452": 8, "573168": 8, "376386": 8, "456030": 8, "285810": 8, "2580": 8, "260680": 8, "401014": 8, "639848": 8, "601521": 8, "2741": 8, "9832": 8, "36": 8, "225044": 8, "186493": 8, "335410": 8, "128228": 8, "103195": 8, "236513": 8, "101924": 8, "164655": 8, "825558": 8, "2931": 8, "7281": 8, "289663": 8, "280049": 8, "388547": 8, "385111": 8, "269251": 8, "332065": 8, "304425": 8, "043730": 8, "211934": 8, "464891": 8}, "objects": {"timeseriesflattener.feature_specs": [[1, 0, 0, "-", "single_specs"]], "timeseriesflattener.feature_specs.single_specs": [[1, 1, 1, "", "CoercedFloats"], [1, 1, 1, "", "LookPeriod"], [1, 1, 1, "", "OutcomeSpec"], [1, 1, 1, "", "PredictorSpec"], [1, 1, 1, "", "StaticSpec"], [1, 5, 1, "", "can_be_coerced_losslessly_to_int"], [1, 5, 1, "", "coerce_floats"], [1, 5, 1, "", "get_temporal_col_name"]], "timeseriesflattener.feature_specs.single_specs.CoercedFloats": [[1, 2, 1, "", "fallback"], [1, 2, 1, "", "lookperiod"]], "timeseriesflattener.feature_specs.single_specs.LookPeriod": [[1, 2, 1, "", "max_days"], [1, 2, 1, "", "min_days"]], "timeseriesflattener.feature_specs.single_specs.OutcomeSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "incident"], [1, 3, 1, "", "is_dichotomous"], [1, 2, 1, "", "lookahead_days"], [1, 4, 1, "", "lookahead_period"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.PredictorSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "lookbehind_days"], [1, 4, 1, "", "lookbehind_period"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.StaticSpec": [[1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener": [[4, 0, 0, "-", "flattened_dataset"]], "timeseriesflattener.flattened_dataset": [[4, 1, 1, "", "SpecCollection"], [4, 1, 1, "", "TimeseriesFlattener"]], "timeseriesflattener.flattened_dataset.SpecCollection": [[4, 2, 1, "", "model_computed_fields"], [4, 2, 1, "", "model_config"], [4, 2, 1, "", "model_fields"], [4, 2, 1, "", "outcome_specs"], [4, 2, 1, "", "predictor_specs"], [4, 2, 1, "", "static_specs"]], "timeseriesflattener.flattened_dataset.TimeseriesFlattener": [[4, 3, 1, "", "add_age"], [4, 3, 1, "", "add_spec"], [4, 3, 1, "", "compute"], [4, 3, 1, "", "get_df"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:property", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "property", "Python property"], "5": ["py", "function", "Python function"]}, "titleterms": {"frequent": 0, "ask": [0, 2], "question": [0, 2], "cite": 0, "thi": 0, "packag": 0, "how": [0, 6], "do": 0, "i": 0, "test": 0, "code": 0, "run": 0, "suit": 0, "document": 0, "gener": [0, 8], "featur": [1, 7, 8], "specif": [1, 6], "timeseriesflatten": [1, 2, 4], "feature_spec": 1, "single_spec": 1, "function": 2, "where": 2, "indic": 2, "search": 2, "instal": 3, "flattened_dataset": 4, "tutori": [5, 6, 7], "get": 5, "start": 5, "introductori": 6, "load": 6, "data": 6, "predict": 6, "time": 6, "tempor": 6, "predictor": [6, 8], "static": 6, "outcom": 6, "specifi": 6, "flatten": 6, "advanc": 7, "creat": 7, "combin": 7, "cach": 7, "ad": 8, "text": 8, "The": 8, "dataset": 8, "from": 8, "embed": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Frequently Asked Questions": [[0, "frequently-asked-questions"]], "Citing this package": [[0, "citing-this-package"]], "How do I test the code and run the test suite?": [[0, "how-do-i-test-the-code-and-run-the-test-suite"]], "How is the documentation generated?": [[0, "how-is-the-documentation-generated"]], "Feature specifications": [[1, "feature-specifications"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "timeseriesflattener": [[2, "timeseriesflattener"]], "Functionality": [[2, "functionality"]], "Where to ask questions?": [[2, "where-to-ask-questions"]], "Indices and search": [[2, "indices-and-search"]], "Installation": [[3, "installation"]], "Timeseriesflattener": [[4, "timeseriesflattener"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]], "Tutorials": [[5, "tutorials"]], "Getting started": [[5, null]], "Introductory Tutorial": [[6, "introductory-tutorial"]], "Loading data": [[6, "loading-data"]], "Loading prediction times": [[6, "loading-prediction-times"]], "Loading a temporal predictor": [[6, "loading-a-temporal-predictor"]], "Loading a static predictor": [[6, "loading-a-static-predictor"]], "Loading a temporal outcome": [[6, "loading-a-temporal-outcome"]], "Specifying how to flatten the data": [[6, "specifying-how-to-flatten-the-data"]], "Temporal outcome specification": [[6, "temporal-outcome-specification"]], "Temporal predictor specification": [[6, "temporal-predictor-specification"]], "Static predictor specification": [[6, "static-predictor-specification"]], "Flattening": [[6, "flattening"]], "Advanced Tutorial": [[7, "advanced-tutorial"]], "Creating feature combinations": [[7, "creating-feature-combinations"]], "Caching": [[7, "caching"]], "Adding text features": [[8, "adding-text-features"]], "The dataset": [[8, "the-dataset"]], "Generating predictors from embedded text": [[8, "generating-predictors-from-embedded-text"]]}, "indexentries": {"coercedfloats (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats"]], "lookperiod (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod"]], "outcomespec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec"]], "predictorspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec"]], "staticspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec"]], "aggregation_fn (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.aggregation_fn"]], "aggregation_fn (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.aggregation_fn"]], "can_be_coerced_losslessly_to_int() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.can_be_coerced_losslessly_to_int"]], "coerce_floats() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.coerce_floats"]], "fallback (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.fallback"]], "fallback (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.fallback"]], "fallback (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.fallback"]], "feature_base_name (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.feature_base_name"]], "feature_base_name (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.feature_base_name"]], "feature_base_name (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.feature_base_name"]], "get_output_col_name() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.get_output_col_name"]], "get_output_col_name() (predictorspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.get_output_col_name"]], "get_output_col_name() (staticspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.get_output_col_name"]], "get_temporal_col_name() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.get_temporal_col_name"]], "incident (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.incident"]], "is_dichotomous() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.is_dichotomous"]], "lookahead_days (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_days"]], "lookahead_period (outcomespec property)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_period"]], "lookbehind_days (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_days"]], "lookbehind_period (predictorspec property)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_period"]], "lookperiod (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.lookperiod"]], "max_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.max_days"]], "min_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.min_days"]], "model_computed_fields (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_computed_fields"]], "model_computed_fields (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_computed_fields"]], "model_computed_fields (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_computed_fields"]], "model_config (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_config"]], "model_config (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_config"]], "model_config (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_config"]], "model_fields (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_fields"]], "model_fields (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_fields"]], "model_fields (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_fields"]], "module": [[1, "module-timeseriesflattener.feature_specs.single_specs"], [4, "module-timeseriesflattener.flattened_dataset"]], "prefix (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.prefix"]], "prefix (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.prefix"]], "prefix (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.prefix"]], "timeseries_df (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.timeseries_df"]], "timeseries_df (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.timeseries_df"]], "timeseries_df (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.timeseries_df"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "speccollection (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection"]], "timeseriesflattener (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener"]], "add_age() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_age"]], "add_spec() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_spec"]], "compute() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.compute"]], "get_df() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.get_df"]], "model_computed_fields (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_computed_fields"]], "model_config (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_config"]], "model_fields (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_fields"]], "outcome_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.outcome_specs"]], "predictor_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.predictor_specs"]], "static_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.static_specs"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["faq", "feature_specifications", "index", "installation", "timeseriesflattener", "tutorials", "tutorials/01_basic", "tutorials/02_advanced", "tutorials/03_text"], "filenames": ["faq.rst", "feature_specifications.rst", "index.rst", "installation.rst", "timeseriesflattener.rst", "tutorials.rst", "tutorials/01_basic.ipynb", "tutorials/02_advanced.ipynb", "tutorials/03_text.ipynb"], "titles": ["Frequently Asked Questions", "Feature specifications", "timeseriesflattener", "Installation", "Timeseriesflattener", "Tutorials", "Introductory Tutorial", "Advanced Tutorial", "Adding text features"], "terms": {"If": [0, 6, 7, 8], "you": [0, 1, 4, 5, 6, 7, 8], "wish": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 8], "librari": 0, "your": [0, 3, 6, 7, 8], "research": 0, "pleas": [0, 2], "joss": 0, "paper": 0, "articl": 0, "bernstorff2023timeseriesflatten": 0, "titl": 0, "timeseriesflatten": [0, 3, 6, 7, 8], "A": [0, 1, 2, 4, 6, 7], "python": [0, 2], "summar": 0, "featur": [0, 2, 5, 6], "from": [0, 1, 2, 4, 5, 6, 7], "medic": [0, 2, 6, 8], "time": [0, 1, 2, 4, 5, 7, 8], "seri": [0, 1, 2, 4, 6], "author": 0, "bernstorff": 0, "martin": 0, "enevoldsen": 0, "kenneth": 0, "damgaard": 0, "jakob": 0, "danielsen": 0, "andrea": 0, "hansen": 0, "lass": 0, "journal": 0, "open": 0, "sourc": [0, 1, 4], "softwar": 0, "volum": 0, "8": [0, 6, 7], "number": [0, 2, 6, 7, 8], "83": 0, "page": [0, 2], "5197": 0, "year": [0, 6, 7], "2023": 0, "Or": [0, 7], "prefer": 0, "apa": 0, "m": 0, "k": 0, "j": 0, "l": 0, "come": [0, 7], "an": [0, 1, 2, 6, 7, 8], "extens": 0, "In": [0, 1, 6, 7], "order": [0, 5], "ll": [0, 6, 7], "usual": 0, "want": [0, 6, 7, 8], "clone": 0, "repositori": 0, "build": 0, "also": [0, 5, 6], "instal": [0, 6, 7], "requir": [0, 1, 2, 4, 6, 7], "develop": 0, "depend": 0, "util": 0, "defin": [0, 1, 4, 8], "pyproject": 0, "toml": 0, "pip": [0, 3], "e": [0, 1, 2, 6, 7, 8], "dev": 0, "pytest": 0, "which": [0, 1, 2, 4, 5, 6, 7], "all": [0, 6, 7, 8], "folder": 0, "specif": [0, 4, 5, 7, 8], "can": [0, 1, 2, 5, 6, 7, 8], "desired_test": 0, "py": [0, 6, 7], "sphinx": 0, "It": [0, 6], "furo": 0, "theme": 0, "custom": 0, "style": [0, 6, 7], "To": [0, 2, 3, 5, 6, 7, 8], "make": [0, 2, 6, 7, 8], "doc": [0, 6], "text": [0, 5], "html": 0, "c": [0, 2, 7], "class": [1, 4, 6, 7], "coercedfloat": 1, "lookperiod": [1, 7], "fallback": [1, 6, 7, 8], "union": [1, 4], "float": 1, "int": [1, 4], "base": [1, 4], "object": [1, 4, 6, 7], "min_dai": [1, 6, 7], "max_dai": [1, 6, 7], "outcomespec": [1, 4, 6], "timeseries_df": [1, 6], "datafram": [1, 2, 4, 6, 7, 8], "feature_base_nam": [1, 6, 7], "str": [1, 4, 8], "lookahead_dai": [1, 6], "tupl": [1, 6], "aggregation_fn": [1, 6, 7, 8], "callabl": 1, "dataframegroupbi": 1, "incid": [1, 6], "bool": [1, 4], "prefix": [1, 4, 6], "outc": [1, 4, 6], "basemodel": [1, 4], "outcom": [1, 2, 5, 7], "paramet": [1, 4, 6], "valu": [1, 2, 4, 6, 7, 8], "should": [1, 4, 6, 8], "contain": [1, 4, 6, 8], "column": [1, 2, 4, 6, 7, 8], "entity_id": [1, 4, 6, 7, 8], "id": [1, 2, 6, 8], "entiti": [1, 6], "each": [1, 2, 5, 6, 8], "belong": 1, "The": [1, 2, 5, 6, 7], "timeseri": [1, 4, 6], "timestamp": [1, 4, 6, 7, 8], "datetim": [1, 6, 7], "note": [1, 6, 7, 8], "name": [1, 4, 6, 7, 8], "overridden": 1, "when": [1, 2, 6, 7], "initialis": 1, "gener": [1, 2, 5, 6, 7], "g": [1, 2, 6, 7, 8], "_": [1, 6, 7], "feature_baase_nam": 1, "metadata": [1, 4, 6, 8], "interv": [1, 6], "predict": [1, 2, 4, 5, 7, 8], "look": [1, 2, 6, 7], "two": [1, 4, 6, 7], "specifi": [1, 2, 5, 7], "resolv": 1, "0": [1, 6, 7, 8], "how": [1, 2, 5, 7, 8], "aggreg": [1, 2, 6], "multipl": [1, 2, 6, 7, 8], "within": [1, 2, 6, 8], "lookahead": [1, 2, 6], "dai": [1, 6, 8], "take": [1, 4, 6, 7, 8], "group": [1, 7, 8], "input": 1, "return": [1, 4, 6, 7, 8], "singl": [1, 2, 6], "i": [1, 2, 3, 6, 7, 8], "found": [1, 6], "window": [1, 2, 6, 7], "whether": [1, 6], "type": [1, 2, 4, 6, 7, 8], "2": [1, 6, 7, 8], "diabet": [1, 6], "becaus": [1, 4, 6, 7], "onli": [1, 2, 6, 7, 8], "experi": [1, 6], "onc": [1, 6], "handl": [1, 6], "vectoris": 1, "wai": 1, "dure": 1, "resolut": 1, "faster": [1, 6, 7, 8], "than": [1, 2, 6], "non": 1, "occur": [1, 2, 6], "feature_nam": [1, 7], "default": [1, 4, 6], "pred": [1, 4, 6], "get_output_col_nam": 1, "get": [1, 3, 4], "output": [1, 4, 7, 8], "is_dichotom": 1, "check": [1, 6, 7, 8], "dichotom": 1, "properti": [1, 6, 7], "lookahead_period": 1, "model_computed_field": [1, 4], "classvar": [1, 4], "dict": [1, 4], "computedfieldinfo": [1, 4], "dictionari": [1, 4], "comput": [1, 4, 6, 7, 8], "field": [1, 4], "correspond": [1, 4, 6, 8], "model_config": [1, 4], "configdict": [1, 4], "arbitrary_types_allow": [1, 4], "true": [1, 2, 4, 6, 7], "extra": [1, 6, 7], "forbid": 1, "frozen": 1, "configur": [1, 4], "model": [1, 2, 4, 6, 8], "conform": [1, 4], "pydant": [1, 4], "config": [1, 4], "model_field": [1, 4], "fieldinfo": [1, 4], "annot": [1, 4], "list": [1, 4, 5, 6, 7, 8], "fals": [1, 4, 6, 8], "about": [1, 4, 6], "map": [1, 4], "thi": [1, 2, 4, 6, 7, 8], "replac": [1, 4], "__fields__": [1, 4], "v1": [1, 4], "predictorspec": [1, 4, 6], "lookbehind_dai": [1, 6, 7, 8], "predictor": [1, 2, 4, 5, 7], "lookbehind": [1, 2, 6, 7, 8], "lookbehind_period": [1, 7], "staticspec": [1, 4, 6], "static": [1, 5, 7], "can_be_coerced_losslessly_to_int": 1, "coerce_float": 1, "get_temporal_col_nam": 1, "tempor": [1, 5, 7, 8], "packag": [2, 5, 6, 7], "data": [2, 4, 5, 7, 8], "machin": 2, "learn": 2, "implement": [2, 7], "method": [2, 4, 7], "includ": 2, "convert": [2, 8], "ani": [2, 4, 6, 7, 8], "irregular": [2, 6], "row": [2, 6, 7, 8], "desir": 2, "construct": 2, "raw": 2, "ar": [2, 4, 6, 7, 8], "allow": [2, 4, 7], "patient": [2, 6, 8], "independ": 2, "set": [2, 4, 6], "particular": 2, "sever": [2, 8], "choic": 2, "one": [2, 4, 6, 7, 8], "need": [2, 6, 7, 8], "issu": [2, 6], "everi": [2, 6, 7], "physic": 2, "visit": 2, "morn": 2, "anoth": [2, 6], "clinic": [2, 8], "meaning": 2, "far": [2, 6, 8], "back": [2, 6], "ahead": [2, 6], "exist": 2, "point": [2, 6], "abov": [2, 6, 7, 8], "figur": 2, "graphic": 2, "repres": [2, 6], "terminologi": [2, 6], "determin": [2, 6], "wherea": 2, "futur": [2, 6], "refer": [2, 6], "b": 2, "label": [2, 6], "neg": 2, "never": [2, 6], "happen": [2, 6], "outsid": [2, 6], "posit": [2, 6], "insid": [2, 6], "exampl": [2, 6, 7, 8], "mean": [2, 6, 7, 8], "shown": [2, 6], "max": [2, 6], "min": [2, 6], "etc": [2, 6], "d": 2, "drop": [2, 6, 7, 8], "extend": [2, 6], "further": [2, 4, 6], "start": [2, 3, 6, 7, 8], "dataset": [2, 4, 5, 6, 7], "end": [2, 6, 7], "behaviour": 2, "option": [2, 4], "obtain": 2, "rich": 2, "represent": 2, "see": [2, 4, 6], "tutori": [2, 4, 8], "placehold": 2, "case": [2, 6], "report": 2, "request": 2, "github": [2, 3], "tracker": 2, "otherwis": 2, "discuss": [2, 6], "forum": 2, "bug": 2, "idea": 2, "usag": 2, "index": 2, "run": [3, 5], "follow": [3, 6], "line": [3, 6, 7], "termin": 3, "There": [3, 6, 7, 8], "discrep": 3, "between": 3, "latest": 3, "version": [3, 6, 7], "flatten": [4, 5, 8], "describ": [4, 6, 8], "speccollect": 4, "outcome_spec": [4, 6], "predictor_spec": 4, "static_spec": 4, "collect": 4, "spec": [4, 6, 7, 8], "prediction_times_df": [4, 6, 7, 8], "drop_pred_times_with_insufficient_look_dist": [4, 6, 7, 8], "cach": [4, 5], "featurecach": [4, 7], "none": [4, 6, 7], "entity_id_col_nam": [4, 6, 7, 8], "timestamp_col_nam": [4, 6, 7, 8], "predictor_col_name_prefix": 4, "outcome_col_name_prefix": 4, "n_worker": [4, 6, 7, 8], "60": [4, 7], "log_to_stdout": 4, "turn": [4, 8], "tabular": [4, 8], "add_ag": 4, "date_of_birth_df": 4, "date_of_birth_col_nam": 4, "date_of_birth": 4, "output_prefix": 4, "add": [4, 6, 7], "ag": 4, "ha": [4, 6, 7, 8], "its": [4, 6], "own": [4, 7], "function": [4, 6, 8], "veri": 4, "frequent": [4, 6], "match": 4, "self": [4, 6, 7], "add_spec": [4, 6, 7, 8], "sequenc": [4, 7], "queue": 4, "unprocess": [4, 6, 7, 8], "process": [4, 6, 7, 8], "until": 4, "call": [4, 6, 7], "get_df": [4, 6, 7, 8], "u": 4, "more": [4, 6, 7], "effecti": 4, "parallelis": 4, "most": [4, 6, 7], "complex": 4, "li": 4, "For": [4, 6, 7, 8], "document": 4, "those": 4, "present": [4, 6], "we": [5, 6, 7, 8], "recommend": 5, "go": [5, 6], "through": 5, "below": 5, "jupyt": 5, "notebook": 5, "download": 5, "local": [5, 6, 7], "introductori": 5, "load": [5, 7, 8], "advanc": [5, 6], "creat": [5, 6, 8], "combin": 5, "ad": [5, 6], "embed": 5, "especi": 6, "help": 6, "have": [6, 7, 8], "complic": 6, "train": 6, "simpl": 6, "explain": 6, "appli": 6, "consist": 6, "3": [6, 7, 8], "step": 6, "": [6, 7, 8], "simplest": 6, "first": [6, 7, 8], "predictin": 6, "element": 6, "context": 6, "skimpi": [6, 7], "import": [6, 7, 8], "skim": [6, 7], "test": [6, 7, 8], "load_synth_data": [6, 7, 8], "load_synth_prediction_tim": [6, 7, 8], "df_prediction_tim": 6, "sort_valu": 6, "summari": [6, 7], "count": [6, 7], "10000": [6, 7], "int64": [6, 7], "1": [6, 7, 8], "datetime64": [6, 7], "column_nam": [6, 7], "na": [6, 7, 8], "sd": [6, 7], "p0": [6, 7], "p25": [6, 7], "p50": [6, 7], "p75": [6, 7], "p100": [6, 7], "hist": [6, 7], "5000": [6, 7], "2900": [6, 7], "2500": 6, "4900": [6, 7], "7400": [6, 7], "last": [6, 7, 8], "frequenc": [6, 7], "1965": [6, 8], "01": [6, 7], "02": [6, 7, 8], "09": [6, 8], "35": 6, "00": [6, 7, 8], "1969": [6, 7, 8], "12": [6, 7, 8], "31": [6, 7, 8], "21": [6, 7, 8], "42": [6, 7], "628": 6, "11": [6, 8], "55": 6, "2005": 6, "03": [6, 8], "15": [6, 8], "07": [6, 8], "16": [6, 7, 8], "4370": 6, "13": [6, 7], "23": [6, 8], "18": [6, 7, 8], "6152": 6, "1968": [6, 7, 8], "04": [6, 8], "6873": 6, "4": [6, 7, 8], "28": [6, 8], "33": 6, "9688": 6, "9996": 6, "17": [6, 7, 8], "1463": 6, "30": [6, 7, 8], "19": [6, 8], "3952": 6, "9997": 6, "1967": [6, 8], "06": [6, 8], "08": [6, 7, 8], "52": 6, "7926": 6, "9999": 6, "22": [6, 8], "24": 6, "5720": 6, "14": [6, 8], "59": [6, 7], "here": 6, "Then": [6, 7], "our": [6, 7, 8], "differ": [6, 7], "timepoint": 6, "load_synth_predictor_float": [6, 7], "df_synth_predictor": 6, "100000": 6, "float64": [6, 7], "7500": 6, "5": [6, 7, 8], "9": [6, 7], "00015": 6, "7": [6, 7, 8], "10": [6, 7, 8], "37": 6, "95792": 6, "29": [6, 7], "799246": 6, "82592": 6, "05": [6, 7, 8], "6": [6, 7], "630007": 6, "1377": 6, "174793": 6, "28579": 6, "26": [6, 8], "981185": 6, "81247": 6, "44": [6, 7], "970382": 6, "10277": 6, "20": [6, 8], "304568": 6, "74701": 6, "671907": 6, "69566": 6, "41": [6, 8], "250538": 6, "40901": 6, "1966": [6, 8], "924175": 6, "96881": 6, "501553": 6, "again": 6, "could": 6, "sex": 6, "doesn": 6, "t": [6, 7], "chang": 6, "over": 6, "let": [6, 7, 8], "load_synth_sex": 6, "df_synth_sex": 6, "femal": 6, "9994": 6, "9995": 6, "9998": 6, "As": [6, 8], "And": 6, "lastli": 6, "ve": 6, "chosen": 6, "binari": 6, "store": 6, "infer": 6, "do": 6, "sinc": 6, "thei": [6, 7, 8], "section": 6, "load_synth_outcom": 6, "df_synth_outcom": 6, "3103": 6, "5100": 6, "7600": 6, "50": 6, "46": [6, 7], "6253": 6, "9964": 6, "6255": 6, "9966": 6, "6256": 6, "9968": 6, "6257": 6, "9970": 6, "6269": 6, "9992": 6, "53": [6, 7, 8], "per": [6, 7], "now": [6, 7, 8], "recip": 6, "finish": 6, "firstli": 6, "main": 6, "decis": 6, "size": [6, 7], "given": 6, "indic": 6, "code": [6, 7], "panda": [6, 7, 8], "pd": [6, 8], "maximum": [6, 7], "feature_spec": [6, 7, 8], "single_spec": 6, "test_df": 6, "365": [6, 7, 8], "outcome_nam": 6, "argument": 6, "values_df": 6, "decid": 6, "least": 6, "both": 6, "accomplish": 6, "dw_ek_borg": 6, "wa": [6, 8], "mark": 6, "after": 6, "where": 6, "event": 6, "perman": 6, "specifii": 6, "forward": 6, "search": 6, "certain": 6, "period": [6, 8], "befor": [6, 8], "instead": 6, "almost": 6, "entir": 6, "ident": 6, "except": 6, "past": 6, "numpi": [6, 7, 8], "np": [6, 7, 8], "temporal_predictor_spec": 6, "730": [6, 7, 8], "nan": [6, 7, 8], "predictor_nam": 6, "rang": 6, "similar": 6, "instanc": [6, 7], "might": [6, 7, 8], "182": 6, "easili": 6, "pass": [6, 8], "temporal_interval_predictor_spec": 6, "90": 6, "predictor_interval_nam": 6, "slightli": 6, "previou": 6, "provid": 6, "howev": [6, 7, 8], "By": 6, "filter": 6, "easi": 6, "manual": [6, 7], "sex_predictor_spec": 6, "input_col_name_overrid": 6, "df": [6, 7, 8], "tsflatten": 6, "re": [6, 8], "readi": 6, "instanti": 6, "along": 6, "add_": 6, "parallel": [6, 7, 8], "oper": 6, "across": 6, "core": [6, 7], "ts_flatten": [6, 7, 8], "applic": 6, "sai": [6, 7], "month": [6, 7, 8], "would": [6, 8], "compromis": 6, "generalis": 6, "some": [6, 7, 8], "edg": 6, "brief": 6, "2024": [6, 7, 8], "info": [6, 7, 8], "were": [6, 7, 8], "_drop_pred_time_if_insufficient_look_dist": [6, 7], "5999": 6, "99": 6, "worker": [6, 7, 8], "chunksiz": [6, 7, 8], "mai": [6, 7, 8], "progress": [6, 7, 8], "bar": [6, 7, 8], "move": [6, 7, 8], "batch": [6, 7, 8], "much": [6, 7, 8], "total": [6, 7, 8], "perform": [6, 7, 8], "100": [6, 7, 8], "39": 6, "91it": 6, "align": [6, 7, 8], "littl": [6, 7, 8], "while": [6, 7, 8], "minut": [6, 7, 8], "000": [6, 7, 8], "concaten": [6, 7, 8], "Will": [6, 7, 8], "system": [6, 7, 8], "2_000_000": [6, 7, 8], "normal": [6, 7, 8], "took": [6, 7, 8], "004": 6, "second": [6, 7, 8], "merg": [6, 7, 8], "origin": [6, 7, 8], "4001": 6, "string": [6, 7], "2600": [6, 7], "pred_predictor_interv": 6, "2877": 6, "71": 6, "91": 6, "_name_within_30_to_90_d": 6, "ays_mean_fallback_nan": [6, 7], "pred_predictor_name_wit": 6, "72": 6, "097": 6, "hin_730_days_mean_fallb": 6, "ack_nan": 6, "outc_outcome_name_withi": 6, "064": 6, "25": [6, 8], "n_365_days_maximum_fal": 6, "back_0_dichotom": 6, "pred_femal": 6, "49": 6, "word": [6, 7, 8], "prediction_time_uuid": [6, 7, 8], "pred_predictor_interval_name_within_30_to_90_days_mean_fallback_nan": 6, "pred_predictor_name_within_730_days_mean_fallback_nan": 6, "outc_outcome_name_within_365_days_maximum_fallback_0_dichotom": 6, "display": [6, 7], "shorten": [6, 7], "col": [6, 7], "shortened_pr": 6, "pred_x": 6, "shortened_pred_interv": 6, "pred_x_30_to_90": 6, "shortened_outcom": 6, "outc_i": 6, "renam": [6, 7], "pred_predictor_name_within_0_to_730_days_mean_fallback_nan": 6, "outc_outcome_name_within_0_to_365_days_maximum_fallback_0_dichotom": 6, "axi": [6, 7, 8], "set_table_attribut": [6, 7], "font": [6, 7], "14px": [6, 7], "importerror": [6, 7], "traceback": [6, 7], "recent": [6, 7], "cell": [6, 7], "file": [6, 7], "lib": [6, 7], "python3": [6, 7], "site": [6, 7], "frame": [6, 7], "1338": [6, 7], "1318": [6, 7], "1319": [6, 7, 8], "def": [6, 7, 8], "styler": [6, 7], "1320": [6, 7], "1321": [6, 7], "1322": [6, 7], "1336": [6, 7], "tabl": [6, 7], "visual": [6, 7], "user_guid": [6, 7], "ipynb": [6, 7], "1337": [6, 7], "io": [6, 7], "format": [6, 7, 8], "1340": [6, 7], "40": [6, 7], "shared_doc": [6, 7], "_shared_doc": [6, 7], "save_to_buff": [6, 7], "jinja2": [6, 7], "import_optional_depend": [6, 7], "style_rend": [6, 7], "47": [6, 7], "cssproperti": [6, 7], "48": [6, 7], "cssstyle": [6, 7], "56": [6, 7], "refactor_level": [6, 7], "57": [6, 7], "type_check": [6, 7], "compat": [6, 7], "_option": [6, 7], "161": [6, 7], "error": [6, 7], "min_vers": [6, 7], "159": [6, 7], "160": [6, 7], "elif": [6, 7], "rais": [6, 7], "msg": [6, 7], "163": [6, 7], "modul": [6, 7], "newer": [6, 7], "current": [6, 7], "classif": 6, "citizen": 6, "uniqu": 6, "identifi": 6, "prediciton": 6, "pred_": [6, 7], "outc_": 6, "basic": 7, "cover": [7, 8], "expand": 7, "effect": 7, "mani": 7, "so": [7, 8], "iter": 7, "without": 7, "complet": 7, "full": 7, "hand": 7, "rather": 7, "straightforward": 7, "what": 7, "hundr": 7, "amount": 7, "write": 7, "grow": 7, "quit": 7, "substanti": 7, "becom": 7, "consum": 7, "hard": 7, "navig": 7, "solv": 7, "problem": 7, "combinatori": 7, "pprint": 7, "group_spec": [7, 8], "nameddatafram": 7, "predictorgroupspec": [7, 8], "pred_spec_batch": 7, "named_datafram": [7, 8], "synth_predictor_float": 7, "1095": 7, "create_combin": [7, 8], "attribut": 7, "easier": 7, "namedatafram": 7, "exactli": 7, "load_synth_predictor_flaot": 7, "pred_synth_predictor_float_": 7, "result": [7, 8], "good": 7, "small": [7, 8], "highlight": 7, "pred_spec_batch_summari": 7, "pred_spec": 7, "__name__": 7, "print": [7, 8], "f": 7, "len": [7, 8], "know": 7, "bunch": 7, "quickli": 7, "But": 7, "next": 7, "ship": 7, "disk": 7, "pathlib": 7, "path": 7, "feature_cach": 7, "cache_to_disk": 7, "diskcach": 7, "flattened_dataset": 7, "feature_cache_dir": 7, "tmp": 7, "directori": 7, "save": 7, "just": 7, "them": 7, "won": 7, "alreadi": [7, 8], "new": 7, "abstract": 7, "redi": 7, "sql": 7, "everyth": 7, "work": 7, "6053": 7, "02it": 7, "43": 7, "99it": 7, "006": 7, "3947": 7, "pred_synth_predictor_fl": 7, "533": 7, "0084": 7, "oat_within_365_days_mea": 7, "n_fallback_nan": 7, "506": 7, "82": 7, "024": 7, "oat_within_365_to_730_d": 7, "oat_within_1095_days_ma": 7, "ximum_fallback_nan": 7, "ays_maximum_fallback_na": 7, "n": 7, "oat_within_1095_days_m": 7, "an_fallback_nan": 7, "oat_within_365_days_max": 7, "imum_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_1095_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan": 7, "pred_synth_predictor_float_within_1095_days_mean_fallback_nan": 7, "pred_synth_predictor_float_within_365_days_maximum_fallback_nan": 7, "pred_col": 7, "startswith": 7, "rename_dict": 7, "enumer": 7, "df_renam": 7, "base_col": 7, "renamed_col": 7, "dealt": 8, "show": 8, "out": 8, "synthet": 8, "other": 8, "load_synth_text": 8, "synth_text": 8, "head": 8, "4647": 8, "went": 8, "induc": 8, "coma": 8, "2007": 8, "taken": 8, "emerg": 8, "departm": 8, "5799": 8, "old": 8, "son": 8, "wh": 8, "had": 8, "been": 8, "left": 8, "bed": 8, "minu": 8, "4234": 8, "allergi": 8, "often": 8, "advantag": 8, "emb": 8, "speed": 8, "up": 8, "block": 8, "tf": 8, "idf": 8, "form": 8, "constraint": 8, "entitiy_id_col": 8, "timestamp_col": 8, "value_col": 8, "purpos": 8, "demonstr": 8, "fit": 8, "captur": 8, "sklearn": 8, "feature_extract": 8, "tfidfvector": 8, "embed_text_to_df": 8, "tfidf_model": 8, "max_featur": 8, "fit_transform": 8, "toarrai": 8, "get_feature_names_out": 8, "embedded_text": 8, "tolist": 8, "metadata_onli": 8, "embedded_text_with_metadata": 8, "concat": 8, "ignor": 8, "175872": 8, "182066": 8, "249848": 8, "158430": 8, "000000": 8, "023042": 8, "311389": 8, "529966": 8, "490203": 8, "479312": 8, "244870": 8, "135282": 8, "064337": 8, "465084": 8, "336859": 8, "151743": 8, "729861": 8, "179161": 8, "192367": 8, "232332": 8, "283402": 8, "336952": 8, "176422": 8, "238416": 8, "646879": 8, "250217": 8, "382277": 8, "165635": 8, "200046": 8, "183015": 8, "261115": 8, "125837": 8, "151906": 8, "205285": 8, "759528": 8, "403961": 8, "098747": 8, "493461": 8, "119196": 8, "272619": 8, "207444": 8, "045256": 8, "183475": 8, "588324": 8, "433253": 8, "235349": 8, "df_with_multiple_values_to_named_datafram": 8, "readili": 8, "suppli": 8, "df_transform": 8, "split": 8, "embedded_df": 8, "name_prefix": 8, "tfidf_": 8, "accord": 8, "inform": 8, "bow": 8, "kept": 8, "tfidf_and": 8, "emb_spec_batch": 8, "68it": 8, "46it": 8, "029": 8, "sake": 8, "dropna": 8, "pred_tfidf_the_within_365_days_mean_fallback_nan": 8, "pred_tfidf_or_within_365_days_mean_fallback_nan": 8, "pred_tfidf_for_within_730_days_mean_fallback_nan": 8, "pred_tfidf_patient_within_365_days_mean_fallback_nan": 8, "pred_tfidf_that_within_730_days_mean_fallback_nan": 8, "pred_tfidf_and_within_365_days_mean_fallback_nan": 8, "pred_tfidf_or_within_730_days_mean_fallback_nan": 8, "pred_tfidf_the_within_730_days_mean_fallback_nan": 8, "pred_tfidf_to_within_365_days_mean_fallback_nan": 8, "pred_tfidf_that_within_365_days_mean_fallback_nan": 8, "pred_tfidf_patient_within_730_days_mean_fallback_nan": 8, "pred_tfidf_to_within_730_days_mean_fallback_nan": 8, "pred_tfidf_was_within_365_days_mean_fallback_nan": 8, "pred_tfidf_was_within_730_days_mean_fallback_nan": 8, "pred_tfidf_in_within_365_days_mean_fallback_nan": 8, "pred_tfidf_and_within_730_days_mean_fallback_nan": 8, "pred_tfidf_in_within_730_days_mean_fallback_nan": 8, "1917": 8, "4977": 8, "534890": 8, "221549": 8, "088050": 8, "133722": 8, "090356": 8, "145809": 8, "284485": 8, "086927": 8, "483324": 8, "2463": 8, "6840": 8, "285810": 8, "355142": 8, "376386": 8, "071452": 8, "096561": 8, "155821": 8, "456030": 8, "092896": 8, "258256": 8, "2580": 8, "601521": 8, "401014": 8, "639848": 8, "260680": 8, "2741": 8, "9832": 8, "36": 8, "825558": 8, "128228": 8, "101924": 8, "103195": 8, "225044": 8, "164655": 8, "335410": 8, "186493": 8, "2931": 8, "7281": 8, "464891": 8, "385111": 8, "043730": 8, "332065": 8, "269251": 8, "289663": 8, "211934": 8, "388547": 8, "280049": 8}, "objects": {"timeseriesflattener.feature_specs": [[1, 0, 0, "-", "single_specs"]], "timeseriesflattener.feature_specs.single_specs": [[1, 1, 1, "", "CoercedFloats"], [1, 1, 1, "", "LookPeriod"], [1, 1, 1, "", "OutcomeSpec"], [1, 1, 1, "", "PredictorSpec"], [1, 1, 1, "", "StaticSpec"], [1, 5, 1, "", "can_be_coerced_losslessly_to_int"], [1, 5, 1, "", "coerce_floats"], [1, 5, 1, "", "get_temporal_col_name"]], "timeseriesflattener.feature_specs.single_specs.CoercedFloats": [[1, 2, 1, "", "fallback"], [1, 2, 1, "", "lookperiod"]], "timeseriesflattener.feature_specs.single_specs.LookPeriod": [[1, 2, 1, "", "max_days"], [1, 2, 1, "", "min_days"]], "timeseriesflattener.feature_specs.single_specs.OutcomeSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "incident"], [1, 3, 1, "", "is_dichotomous"], [1, 2, 1, "", "lookahead_days"], [1, 4, 1, "", "lookahead_period"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.PredictorSpec": [[1, 2, 1, "", "aggregation_fn"], [1, 2, 1, "", "fallback"], [1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "lookbehind_days"], [1, 4, 1, "", "lookbehind_period"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener.feature_specs.single_specs.StaticSpec": [[1, 2, 1, "", "feature_base_name"], [1, 3, 1, "", "get_output_col_name"], [1, 2, 1, "", "model_computed_fields"], [1, 2, 1, "", "model_config"], [1, 2, 1, "", "model_fields"], [1, 2, 1, "", "prefix"], [1, 2, 1, "", "timeseries_df"]], "timeseriesflattener": [[4, 0, 0, "-", "flattened_dataset"]], "timeseriesflattener.flattened_dataset": [[4, 1, 1, "", "SpecCollection"], [4, 1, 1, "", "TimeseriesFlattener"]], "timeseriesflattener.flattened_dataset.SpecCollection": [[4, 2, 1, "", "model_computed_fields"], [4, 2, 1, "", "model_config"], [4, 2, 1, "", "model_fields"], [4, 2, 1, "", "outcome_specs"], [4, 2, 1, "", "predictor_specs"], [4, 2, 1, "", "static_specs"]], "timeseriesflattener.flattened_dataset.TimeseriesFlattener": [[4, 3, 1, "", "add_age"], [4, 3, 1, "", "add_spec"], [4, 3, 1, "", "compute"], [4, 3, 1, "", "get_df"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:property", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "property", "Python property"], "5": ["py", "function", "Python function"]}, "titleterms": {"frequent": 0, "ask": [0, 2], "question": [0, 2], "cite": 0, "thi": 0, "packag": 0, "how": [0, 6], "do": 0, "i": 0, "test": 0, "code": 0, "run": 0, "suit": 0, "document": 0, "gener": [0, 8], "featur": [1, 7, 8], "specif": [1, 6], "timeseriesflatten": [1, 2, 4], "feature_spec": 1, "single_spec": 1, "function": 2, "where": 2, "indic": 2, "search": 2, "instal": 3, "flattened_dataset": 4, "tutori": [5, 6, 7], "get": 5, "start": 5, "introductori": 6, "load": 6, "data": 6, "predict": 6, "time": 6, "tempor": 6, "predictor": [6, 8], "static": 6, "outcom": 6, "specifi": 6, "flatten": 6, "advanc": 7, "creat": 7, "combin": 7, "cach": 7, "ad": 8, "text": 8, "The": 8, "dataset": 8, "from": 8, "embed": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Frequently Asked Questions": [[0, "frequently-asked-questions"]], "Citing this package": [[0, "citing-this-package"]], "How do I test the code and run the test suite?": [[0, "how-do-i-test-the-code-and-run-the-test-suite"]], "How is the documentation generated?": [[0, "how-is-the-documentation-generated"]], "Feature specifications": [[1, "feature-specifications"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "timeseriesflattener": [[2, "timeseriesflattener"]], "Functionality": [[2, "functionality"]], "Where to ask questions?": [[2, "where-to-ask-questions"]], "Indices and search": [[2, "indices-and-search"]], "Installation": [[3, "installation"]], "Timeseriesflattener": [[4, "timeseriesflattener"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]], "Tutorials": [[5, "tutorials"]], "Getting started": [[5, null]], "Introductory Tutorial": [[6, "introductory-tutorial"]], "Loading data": [[6, "loading-data"]], "Loading prediction times": [[6, "loading-prediction-times"]], "Loading a temporal predictor": [[6, "loading-a-temporal-predictor"]], "Loading a static predictor": [[6, "loading-a-static-predictor"]], "Loading a temporal outcome": [[6, "loading-a-temporal-outcome"]], "Specifying how to flatten the data": [[6, "specifying-how-to-flatten-the-data"]], "Temporal outcome specification": [[6, "temporal-outcome-specification"]], "Temporal predictor specification": [[6, "temporal-predictor-specification"]], "Static predictor specification": [[6, "static-predictor-specification"]], "Flattening": [[6, "flattening"]], "Advanced Tutorial": [[7, "advanced-tutorial"]], "Creating feature combinations": [[7, "creating-feature-combinations"]], "Caching": [[7, "caching"]], "Adding text features": [[8, "adding-text-features"]], "The dataset": [[8, "the-dataset"]], "Generating predictors from embedded text": [[8, "generating-predictors-from-embedded-text"]]}, "indexentries": {"coercedfloats (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats"]], "lookperiod (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod"]], "outcomespec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec"]], "predictorspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec"]], "staticspec (class in timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec"]], "aggregation_fn (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.aggregation_fn"]], "aggregation_fn (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.aggregation_fn"]], "can_be_coerced_losslessly_to_int() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.can_be_coerced_losslessly_to_int"]], "coerce_floats() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.coerce_floats"]], "fallback (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.fallback"]], "fallback (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.fallback"]], "fallback (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.fallback"]], "feature_base_name (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.feature_base_name"]], "feature_base_name (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.feature_base_name"]], "feature_base_name (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.feature_base_name"]], "get_output_col_name() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.get_output_col_name"]], "get_output_col_name() (predictorspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.get_output_col_name"]], "get_output_col_name() (staticspec method)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.get_output_col_name"]], "get_temporal_col_name() (in module timeseriesflattener.feature_specs.single_specs)": [[1, "timeseriesflattener.feature_specs.single_specs.get_temporal_col_name"]], "incident (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.incident"]], "is_dichotomous() (outcomespec method)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.is_dichotomous"]], "lookahead_days (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_days"]], "lookahead_period (outcomespec property)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.lookahead_period"]], "lookbehind_days (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_days"]], "lookbehind_period (predictorspec property)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.lookbehind_period"]], "lookperiod (coercedfloats attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.CoercedFloats.lookperiod"]], "max_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.max_days"]], "min_days (lookperiod attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.LookPeriod.min_days"]], "model_computed_fields (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_computed_fields"]], "model_computed_fields (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_computed_fields"]], "model_computed_fields (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_computed_fields"]], "model_config (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_config"]], "model_config (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_config"]], "model_config (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_config"]], "model_fields (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.model_fields"]], "model_fields (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.model_fields"]], "model_fields (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.model_fields"]], "module": [[1, "module-timeseriesflattener.feature_specs.single_specs"], [4, "module-timeseriesflattener.flattened_dataset"]], "prefix (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.prefix"]], "prefix (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.prefix"]], "prefix (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.prefix"]], "timeseries_df (outcomespec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.OutcomeSpec.timeseries_df"]], "timeseries_df (predictorspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.PredictorSpec.timeseries_df"]], "timeseries_df (staticspec attribute)": [[1, "timeseriesflattener.feature_specs.single_specs.StaticSpec.timeseries_df"]], "timeseriesflattener.feature_specs.single_specs": [[1, "module-timeseriesflattener.feature_specs.single_specs"]], "speccollection (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection"]], "timeseriesflattener (class in timeseriesflattener.flattened_dataset)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener"]], "add_age() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_age"]], "add_spec() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.add_spec"]], "compute() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.compute"]], "get_df() (timeseriesflattener method)": [[4, "timeseriesflattener.flattened_dataset.TimeseriesFlattener.get_df"]], "model_computed_fields (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_computed_fields"]], "model_config (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_config"]], "model_fields (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.model_fields"]], "outcome_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.outcome_specs"]], "predictor_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.predictor_specs"]], "static_specs (speccollection attribute)": [[4, "timeseriesflattener.flattened_dataset.SpecCollection.static_specs"]], "timeseriesflattener.flattened_dataset": [[4, "module-timeseriesflattener.flattened_dataset"]]}}) \ No newline at end of file diff --git a/tutorials/01_basic.html b/tutorials/01_basic.html index 567242c8..2b64e4a6 100644 --- a/tutorials/01_basic.html +++ b/tutorials/01_basic.html @@ -991,31 +991,31 @@

Flattening -
2024-02-12 11:48:26 [INFO] There were unprocessed specs, computing...
+
2024-02-12 12:08:12 [INFO] There were unprocessed specs, computing...
 
-
2024-02-12 11:48:26 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 5999 (59.99%) rows
+
2024-02-12 12:08:12 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 5999 (59.99%) rows
 
-
2024-02-12 11:48:26 [INFO] Processing 3 temporal features in parallel with 1 workers. Chunksize is 3. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
+
2024-02-12 12:08:12 [INFO] Processing 3 temporal features in parallel with 1 workers. Chunksize is 3. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
 
  0%|          | 0/3 [00:00<?, ?it/s]
 
-
100%|██████████| 3/3 [00:00<00:00, 40.71it/s]
+
100%|██████████| 3/3 [00:00<00:00, 39.91it/s]
 
-
2024-02-12 11:48:26 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
+
2024-02-12 12:08:12 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
 
-
2024-02-12 11:48:26 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
+
2024-02-12 12:08:12 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
 
-
2024-02-12 11:48:26 [INFO] Concatenation took 0.004 seconds
+
2024-02-12 12:08:12 [INFO] Concatenation took 0.004 seconds
 
-
2024-02-12 11:48:26 [INFO] Merging with original df
+
2024-02-12 12:08:12 [INFO] Merging with original df
 
╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮
@@ -1033,15 +1033,15 @@ 

Flattening column_name ┃ NA NA % mean sd p0 p25 p50 p75 p100 hist ┃ │ │ ┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━╇━━━━━━━╇━━━━━━━━┩ │ │ │ entity_id 0 0 5000 2900 3 2600 5000750010000▇▇▇▇▇▇ │ │ +│ │ pred_predictor_interval 2877 71.91 5 2.8 0.02 2.6 5.1 7.4 10▇▇▇▇▇▇ │ │ +│ │ _name_within_30_to_90_d │ │ │ │ │ │ │ │ │ │ │ │ +│ │ ays_mean_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_predictor_name_wit 72 1.8 5 1.6 0.097 3.9 5 6 9.9▁▃▇▇▃▁ │ │ │ │ hin_730_days_mean_fallb │ │ │ │ │ │ │ │ │ │ │ │ │ │ ack_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ outc_outcome_name_withi 0 0 0.064 0.25 0 0 0 0 1▇ ▁ │ │ │ │ n_365_days_maximum_fall │ │ │ │ │ │ │ │ │ │ │ │ │ │ back_0_dichotomous │ │ │ │ │ │ │ │ │ │ │ │ -│ │ pred_predictor_interval 2877 71.91 5 2.8 0.02 2.6 5.1 7.4 10▇▇▇▇▇▇ │ │ -│ │ _name_within_30_to_90_d │ │ │ │ │ │ │ │ │ │ │ │ -│ │ ays_mean_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_female 0 0 0.49 0.5 0 0 0 1 1▇ ▇ │ │ │ └─────────────────────────┴───────┴────────┴────────┴───────┴────────┴───────┴───────┴──────┴───────┴────────┘ │ │ datetime │ @@ -1061,9 +1061,9 @@

Flattening
['entity_id',
  'timestamp',
  'prediction_time_uuid',
+ 'pred_predictor_interval_name_within_30_to_90_days_mean_fallback_nan',
  'pred_predictor_name_within_730_days_mean_fallback_nan',
  'outc_outcome_name_within_365_days_maximum_fallback_0_dichotomous',
- 'pred_predictor_interval_name_within_30_to_90_days_mean_fallback_nan',
  'pred_female']
 

diff --git a/tutorials/02_advanced.html b/tutorials/02_advanced.html index 11f5ed4d..e86a1d30 100644 --- a/tutorials/02_advanced.html +++ b/tutorials/02_advanced.html @@ -375,34 +375,34 @@

Caching -
2024-02-12 11:48:29 [INFO] There were unprocessed specs, computing...
+
2024-02-12 12:08:16 [INFO] There were unprocessed specs, computing...
 
-
2024-02-12 11:48:29 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 6053 (60.53%) rows
+
2024-02-12 12:08:16 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 6053 (60.53%) rows
 
-
2024-02-12 11:48:29 [INFO] Processing 6 temporal features in parallel with 4 workers. Chunksize is 2. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
+
2024-02-12 12:08:16 [INFO] Processing 6 temporal features in parallel with 4 workers. Chunksize is 2. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
 
  0%|          | 0/6 [00:00<?, ?it/s]
 
-
 17%|█▋        | 1/6 [00:00<00:00,  7.96it/s]
+
 17%|█▋        | 1/6 [00:00<00:00,  8.02it/s]
 
-
100%|██████████| 6/6 [00:00<00:00, 46.04it/s]
+
100%|██████████| 6/6 [00:00<00:00, 43.99it/s]
 
-
2024-02-12 11:48:29 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
+
2024-02-12 12:08:16 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
 
-
2024-02-12 11:48:29 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
+
2024-02-12 12:08:16 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
 
-
2024-02-12 11:48:29 [INFO] Concatenation took 0.006 seconds
+
2024-02-12 12:08:16 [INFO] Concatenation took 0.006 seconds
 
-
2024-02-12 11:48:29 [INFO] Merging with original df
+
2024-02-12 12:08:16 [INFO] Merging with original df
 
@@ -431,25 +431,25 @@

Caching column_name ┃ NA NA % mean sd p0 p25 p50 p75 p100 hist ┃ │ │ ┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━┩ │ │ │ entity_id 0 0 5000 2900 0 2600 4900 740010000▇▇▇▇▇▇ │ │ +│ │ pred_synth_predictor_fl 533 13.5 5 2.1 0.0084 3.6 5 6.4 9.9▂▅▇▇▅▂ │ │ +│ │ oat_within_365_days_mea │ │ │ │ │ │ │ │ │ │ │ │ +│ │ n_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_synth_predictor_fl 506 12.82 5.1 2.2 0.024 3.6 5 6.5 10▂▅▇▇▅▂ │ │ │ │ oat_within_365_to_730_d │ │ │ │ │ │ │ │ │ │ │ │ │ │ ays_mean_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ +│ │ pred_synth_predictor_fl 7 0.18 8.4 1.5 0.29 7.8 8.9 9.5 10 ▁▃▇ │ │ +│ │ oat_within_1095_days_ma │ │ │ │ │ │ │ │ │ │ │ │ +│ │ ximum_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ +│ │ pred_synth_predictor_fl 506 12.82 6.6 2.6 0.024 4.8 7.3 8.8 10▂▂▃▃▆▇ │ │ +│ │ oat_within_365_to_730_d │ │ │ │ │ │ │ │ │ │ │ │ +│ │ ays_maximum_fallback_na │ │ │ │ │ │ │ │ │ │ │ │ +│ │ n │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_synth_predictor_fl 7 0.18 5 1.3 0.29 4.1 5 5.8 9.9 ▂▇▇▁ │ │ │ │ oat_within_1095_days_me │ │ │ │ │ │ │ │ │ │ │ │ │ │ an_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ │ pred_synth_predictor_fl 533 13.5 6.6 2.6 0.0084 4.8 7.3 8.8 10▁▂▃▃▆▇ │ │ │ │ oat_within_365_days_max │ │ │ │ │ │ │ │ │ │ │ │ │ │ imum_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ -│ │ pred_synth_predictor_fl 533 13.5 5 2.1 0.0084 3.6 5 6.4 9.9▂▅▇▇▅▂ │ │ -│ │ oat_within_365_days_mea │ │ │ │ │ │ │ │ │ │ │ │ -│ │ n_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ -│ │ pred_synth_predictor_fl 506 12.82 6.6 2.6 0.024 4.8 7.3 8.8 10▂▂▃▃▆▇ │ │ -│ │ oat_within_365_to_730_d │ │ │ │ │ │ │ │ │ │ │ │ -│ │ ays_maximum_fallback_na │ │ │ │ │ │ │ │ │ │ │ │ -│ │ n │ │ │ │ │ │ │ │ │ │ │ │ -│ │ pred_synth_predictor_fl 7 0.18 8.4 1.5 0.29 7.8 8.9 9.5 10 ▁▃▇ │ │ -│ │ oat_within_1095_days_ma │ │ │ │ │ │ │ │ │ │ │ │ -│ │ ximum_fallback_nan │ │ │ │ │ │ │ │ │ │ │ │ │ └─────────────────────────┴──────┴────────┴───────┴───────┴─────────┴───────┴───────┴───────┴───────┴────────┘ │ │ datetime │ │ ┏━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ │ @@ -468,12 +468,12 @@

Caching
['entity_id',
  'timestamp',
  'prediction_time_uuid',
- 'pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan',
- 'pred_synth_predictor_float_within_1095_days_mean_fallback_nan',
- 'pred_synth_predictor_float_within_365_days_maximum_fallback_nan',
  'pred_synth_predictor_float_within_365_days_mean_fallback_nan',
+ 'pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan',
+ 'pred_synth_predictor_float_within_1095_days_maximum_fallback_nan',
  'pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan',
- 'pred_synth_predictor_float_within_1095_days_maximum_fallback_nan']
+ 'pred_synth_predictor_float_within_1095_days_mean_fallback_nan',
+ 'pred_synth_predictor_float_within_365_days_maximum_fallback_nan']
 

diff --git a/tutorials/03_text.html b/tutorials/03_text.html index 02a7b312..9c925de6 100644 --- a/tutorials/03_text.html +++ b/tutorials/03_text.html @@ -622,31 +622,31 @@

Generating predictors from embedded text -
2024-02-12 11:48:32 [INFO] There were unprocessed specs, computing...
+
2024-02-12 12:08:19 [INFO] There were unprocessed specs, computing...
 
-
2024-02-12 11:48:32 [INFO] Processing 20 temporal features in parallel with 1 workers. Chunksize is 20. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
+
2024-02-12 12:08:19 [INFO] Processing 20 temporal features in parallel with 1 workers. Chunksize is 20. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.
 
  0%|          | 0/20 [00:00<?, ?it/s]
 
-
  5%|▌         | 1/20 [00:00<00:07,  2.71it/s]
+
  5%|▌         | 1/20 [00:00<00:07,  2.68it/s]
 
-
100%|██████████| 20/20 [00:00<00:00, 54.13it/s]
+
100%|██████████| 20/20 [00:00<00:00, 53.46it/s]
 
-
2024-02-12 11:48:33 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
+
2024-02-12 12:08:19 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).
 
-
2024-02-12 11:48:33 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
+
2024-02-12 12:08:19 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.
 
-
2024-02-12 11:48:33 [INFO] Concatenation took 0.029 seconds
+
2024-02-12 12:08:19 [INFO] Concatenation took 0.029 seconds
 
-
2024-02-12 11:48:33 [INFO] Merging with original df
+
2024-02-12 12:08:19 [INFO] Merging with original df
 
@@ -681,24 +681,24 @@

Generating predictors from embedded text