From 0a8387dba65e5141dce278ab0d4c1f99b50c1a30 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Wed, 22 May 2024 07:28:44 +0000 Subject: [PATCH] Add changes for 0106a848d9143fc8e2b151642a0eb7213c80fdf1 --- .../feature_specs/meta.html | 34 +++++--------- .../feature_specs/outcome.html | 13 +++--- .../feature_specs/prediction_times.html | 20 ++++----- .../feature_specs/predictor.html | 11 +++-- .../feature_specs/static.html | 14 +++--- .../feature_specs/timedelta.html | 4 +- .../feature_specs/timestamp_frame.html | 12 ++--- feature_specifications.html | 44 +++++++++---------- searchindex.js | 2 +- tutorials/01_basic.html | 6 +-- tutorials/02_advanced.html | 10 ++--- tutorials/03_text.html | 4 +- 12 files changed, 77 insertions(+), 97 deletions(-) diff --git a/_modules/timeseriesflattener/feature_specs/meta.html b/_modules/timeseriesflattener/feature_specs/meta.html index 1daf8cae..a4f0d0ee 100644 --- a/_modules/timeseriesflattener/feature_specs/meta.html +++ b/_modules/timeseriesflattener/feature_specs/meta.html @@ -228,31 +228,15 @@

Source code for timeseriesflattener.feature_specs.meta

from __future__ import annotations import datetime as dt -from collections.abc import Sequence from dataclasses import InitVar, dataclass -from typing import TYPE_CHECKING, Literal, Union +from typing import Literal import pandas as pd import polars as pl -from timeseriesflattener.feature_specs.default_column_names import default_entity_id_col_name - from .._frame_validator import _validate_col_name_columns_exist from ..frame_utilities.anyframe_to_lazyframe import _anyframe_to_lazyframe -if TYPE_CHECKING: - from typing_extensions import TypeAlias - - -ValueType = Union[int, float, str, None] -InitDF_T = Union[pl.LazyFrame, pl.DataFrame, pd.DataFrame] - - -LookDistance = dt.timedelta - - -LookDistances: TypeAlias = Sequence[Union[LookDistance, tuple[LookDistance, LookDistance]]] -
[docs]@dataclass class ValueFrame: @@ -264,12 +248,14 @@

Source code for timeseriesflattener.feature_specs.meta

Additional columns containing the values of the time series. The name of the columns will be used for feature naming. """ - init_df: InitVar[InitDF_T] - entity_id_col_name: str = default_entity_id_col_name + init_df: InitVar[pl.LazyFrame | pl.DataFrame | pd.DataFrame] + entity_id_col_name: str = "entity_id" value_timestamp_col_name: str = "timestamp" coerce_to_lazy: InitVar[bool] = True - def __post_init__(self, init_df: InitDF_T, coerce_to_lazy: bool): + def __post_init__( + self, init_df: pl.LazyFrame | pl.DataFrame | pd.DataFrame, coerce_to_lazy: bool + ): if coerce_to_lazy: self.df = _anyframe_to_lazyframe(init_df) else: @@ -290,8 +276,8 @@

Source code for timeseriesflattener.feature_specs.meta

[docs]@dataclass(frozen=True) class LookPeriod: - first: LookDistance - last: LookDistance + first: dt.timedelta + last: dt.timedelta def __post_init__(self): if self.first >= self.last: @@ -301,11 +287,11 @@

Source code for timeseriesflattener.feature_specs.meta

def _lookdistance_to_normalised_lookperiod( - lookdistance: LookDistance | tuple[LookDistance, LookDistance], + lookdistance: dt.timedelta | tuple[dt.timedelta, dt.timedelta], direction: Literal["ahead", "behind"], ) -> LookPeriod: is_ahead = direction == "ahead" - if isinstance(lookdistance, LookDistance): + if isinstance(lookdistance, dt.timedelta): return LookPeriod( first=dt.timedelta(days=0) if is_ahead else -lookdistance, last=lookdistance if is_ahead else dt.timedelta(0), diff --git a/_modules/timeseriesflattener/feature_specs/outcome.html b/_modules/timeseriesflattener/feature_specs/outcome.html index acfa2c24..f3f804a3 100644 --- a/_modules/timeseriesflattener/feature_specs/outcome.html +++ b/_modules/timeseriesflattener/feature_specs/outcome.html @@ -227,13 +227,14 @@

Source code for timeseriesflattener.feature_specs.outcome

 from __future__ import annotations
 
+import datetime as dt
 from dataclasses import InitVar, dataclass
 from typing import TYPE_CHECKING
 
 import polars as pl
 
 from .._frame_validator import _validate_col_name_columns_exist
-from .meta import LookDistances, ValueFrame, ValueType, _lookdistance_to_normalised_lookperiod
+from .meta import ValueFrame, _lookdistance_to_normalised_lookperiod
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -247,12 +248,14 @@ 

Source code for timeseriesflattener.feature_specs.outcome

"""Specification for an outcome. If your outcome is binary/boolean, you can use BooleanOutcomeSpec instead.""" value_frame: ValueFrame - lookahead_distances: InitVar[LookDistances] + lookahead_distances: InitVar[Sequence[dt.timedelta | tuple[dt.timedelta, dt.timedelta]]] aggregators: Sequence[Aggregator] - fallback: ValueType + fallback: int | float | str | None column_prefix: str = "outc" - def __post_init__(self, lookahead_distances: LookDistances): + def __post_init__( + self, lookahead_distances: Sequence[dt.timedelta | tuple[dt.timedelta, dt.timedelta]] + ): self.normalised_lookperiod = [ _lookdistance_to_normalised_lookperiod(lookdistance=lookdistance, direction="ahead") for lookdistance in lookahead_distances @@ -274,7 +277,7 @@

Source code for timeseriesflattener.feature_specs.outcome

""" init_frame: InitVar[TimestampValueFrame] - lookahead_distances: LookDistances + lookahead_distances: Sequence[dt.timedelta | tuple[dt.timedelta, dt.timedelta]] aggregators: Sequence[Aggregator] output_name: str column_prefix: str = "outc" diff --git a/_modules/timeseriesflattener/feature_specs/prediction_times.html b/_modules/timeseriesflattener/feature_specs/prediction_times.html index 2aaf1500..c7716399 100644 --- a/_modules/timeseriesflattener/feature_specs/prediction_times.html +++ b/_modules/timeseriesflattener/feature_specs/prediction_times.html @@ -230,21 +230,15 @@

Source code for timeseriesflattener.feature_specs.prediction_times

from dataclasses import InitVar, dataclass from typing import TYPE_CHECKING +import pandas as pd import polars as pl from .._frame_validator import _validate_col_name_columns_exist from ..frame_utilities.anyframe_to_lazyframe import _anyframe_to_lazyframe -from .default_column_names import ( - default_entity_id_col_name, - default_pred_time_col_name, - default_prediction_time_uuid_col_name, -) if TYPE_CHECKING: from collections.abc import Sequence - from .meta import InitDF_T -
[docs]@dataclass class PredictionTimeFrame: @@ -255,13 +249,15 @@

Source code for timeseriesflattener.feature_specs.prediction_times

timestamp_col_name: The name of the column containing the timestamps for when to make a prediction. """ - init_df: InitVar[InitDF_T] - entity_id_col_name: str = default_entity_id_col_name - timestamp_col_name: str = default_pred_time_col_name - prediction_time_uuid_col_name: str = default_prediction_time_uuid_col_name + init_df: InitVar[pl.LazyFrame | pl.DataFrame | pd.DataFrame] + entity_id_col_name: str = "entity_id" + timestamp_col_name: str = "pred_timestamp" + prediction_time_uuid_col_name: str = "prediction_time_uuid" coerce_to_lazy: InitVar[bool] = True - def __post_init__(self, init_df: InitDF_T, coerce_to_lazy: bool): + def __post_init__( + self, init_df: pl.LazyFrame | pl.DataFrame | pd.DataFrame, coerce_to_lazy: bool + ): if coerce_to_lazy: self.df = _anyframe_to_lazyframe(init_df) else: diff --git a/_modules/timeseriesflattener/feature_specs/predictor.html b/_modules/timeseriesflattener/feature_specs/predictor.html index c6e4ba23..06e646e8 100644 --- a/_modules/timeseriesflattener/feature_specs/predictor.html +++ b/_modules/timeseriesflattener/feature_specs/predictor.html @@ -227,11 +227,12 @@

Source code for timeseriesflattener.feature_specs.predictor

 from __future__ import annotations
 
+import datetime as dt
 from dataclasses import InitVar, dataclass
 from typing import TYPE_CHECKING
 
 from .._frame_validator import _validate_col_name_columns_exist
-from .meta import LookDistances, ValueFrame, ValueType, _lookdistance_to_normalised_lookperiod
+from .meta import ValueFrame, _lookdistance_to_normalised_lookperiod
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -252,12 +253,14 @@ 

Source code for timeseriesflattener.feature_specs.predictor

""" value_frame: ValueFrame - lookbehind_distances: InitVar[LookDistances] + lookbehind_distances: InitVar[Sequence[dt.timedelta | tuple[dt.timedelta, dt.timedelta]]] aggregators: Sequence[Aggregator] - fallback: ValueType + fallback: int | float | str | None column_prefix: str = "pred" - def __post_init__(self, lookbehind_distances: LookDistances): + def __post_init__( + self, lookbehind_distances: Sequence[dt.timedelta | tuple[dt.timedelta, dt.timedelta]] + ): self.normalised_lookperiod = [ _lookdistance_to_normalised_lookperiod(lookdistance=lookdistance, direction="behind") for lookdistance in lookbehind_distances diff --git a/_modules/timeseriesflattener/feature_specs/static.html b/_modules/timeseriesflattener/feature_specs/static.html index b5a3533d..d148e7ff 100644 --- a/_modules/timeseriesflattener/feature_specs/static.html +++ b/_modules/timeseriesflattener/feature_specs/static.html @@ -228,25 +228,21 @@

Source code for timeseriesflattener.feature_specs.static

from __future__ import annotations from dataclasses import InitVar, dataclass -from typing import TYPE_CHECKING +import pandas as pd import polars as pl from .._frame_validator import _validate_col_name_columns_exist from ..frame_utilities.anyframe_to_lazyframe import _anyframe_to_lazyframe -from .default_column_names import default_entity_id_col_name - -if TYPE_CHECKING: - from .meta import InitDF_T, ValueType
[docs]@dataclass class StaticFrame: - init_df: InitVar[InitDF_T] + init_df: InitVar[pl.LazyFrame | pl.DataFrame | pd.DataFrame] - entity_id_col_name: str = default_entity_id_col_name + entity_id_col_name: str = "entity_id" - def __post_init__(self, init_df: InitDF_T): + def __post_init__(self, init_df: pl.LazyFrame | pl.DataFrame | pd.DataFrame): self.df = _anyframe_to_lazyframe(init_df) _validate_col_name_columns_exist(obj=self) self.value_col_names = [col for col in self.df.columns if col != self.entity_id_col_name] @@ -268,7 +264,7 @@

Source code for timeseriesflattener.feature_specs.static

value_frame: StaticFrame column_prefix: str - fallback: ValueType
+ fallback: int | float | str | None
diff --git a/_modules/timeseriesflattener/feature_specs/timedelta.html b/_modules/timeseriesflattener/feature_specs/timedelta.html index da6252a1..503cb1ee 100644 --- a/_modules/timeseriesflattener/feature_specs/timedelta.html +++ b/_modules/timeseriesflattener/feature_specs/timedelta.html @@ -231,7 +231,7 @@

Source code for timeseriesflattener.feature_specs.timedelta

from typing import TYPE_CHECKING, Literal from .._frame_validator import _validate_col_name_columns_exist -from .meta import ValueFrame, ValueType +from .meta import ValueFrame if TYPE_CHECKING: import polars as pl @@ -242,7 +242,7 @@

Source code for timeseriesflattener.feature_specs.timedelta

[docs]@dataclass class TimeDeltaSpec: init_frame: TimestampValueFrame - fallback: ValueType + fallback: int | float | str | None output_name: str column_prefix: str = "pred" time_format: Literal["seconds", "minutes", "hours", "days", "years"] = "days" diff --git a/_modules/timeseriesflattener/feature_specs/timestamp_frame.html b/_modules/timeseriesflattener/feature_specs/timestamp_frame.html index 6d203047..05e005d4 100644 --- a/_modules/timeseriesflattener/feature_specs/timestamp_frame.html +++ b/_modules/timeseriesflattener/feature_specs/timestamp_frame.html @@ -228,16 +228,12 @@

Source code for timeseriesflattener.feature_specs.timestamp_frame

from __future__ import annotations from dataclasses import InitVar, dataclass -from typing import TYPE_CHECKING +import pandas as pd import polars as pl from .._frame_validator import _validate_col_name_columns_exist from ..frame_utilities.anyframe_to_lazyframe import _anyframe_to_lazyframe -from .default_column_names import default_entity_id_col_name - -if TYPE_CHECKING: - from .meta import InitDF_T
[docs]@dataclass @@ -249,11 +245,11 @@

Source code for timeseriesflattener.feature_specs.timestamp_frame

value_timestamp_col_name: The name of the column containing the timestamps. Must be a string, and the column's values must be datetimes. """ - init_df: InitVar[InitDF_T] - entity_id_col_name: str = default_entity_id_col_name + init_df: InitVar[pl.LazyFrame | pl.DataFrame | pd.DataFrame] + entity_id_col_name: str = "entity_id" value_timestamp_col_name: str = "timestamp" - def __post_init__(self, init_df: InitDF_T): + def __post_init__(self, init_df: pl.LazyFrame | pl.DataFrame | pd.DataFrame): self.df = _anyframe_to_lazyframe(init_df) _validate_col_name_columns_exist(obj=self) diff --git a/feature_specifications.html b/feature_specifications.html index 8a92dfba..7dbf8696 100644 --- a/feature_specifications.html +++ b/feature_specifications.html @@ -251,7 +251,7 @@

Feature specifications

timeseriesflattener.feature_specs#

-class PredictorSpec(value_frame: ValueFrame, lookbehind_distances: InitVar[LookDistances], aggregators: Sequence[Aggregator], fallback: ValueType, column_prefix: str = 'pred')[source]#
+class PredictorSpec(value_frame: ValueFrame, lookbehind_distances: InitVar[Sequence[dt.timedelta | tuple[dt.timedelta, dt.timedelta]]], aggregators: Sequence[Aggregator], fallback: int | float | str | None, column_prefix: str = 'pred')[source]#

Bases: object

Specification for a predictor.

@@ -277,12 +277,12 @@

Feature specifications
-fallback: ValueType#
+fallback: int | float | str | None#

-lookbehind_distances: InitVar[LookDistances]#
+lookbehind_distances: InitVar[Sequence[dt.timedelta | tuple[dt.timedelta, dt.timedelta]]]#
@@ -294,7 +294,7 @@

Feature specifications
-class PredictionTimeFrame(init_df: InitVar[InitDF_T], entity_id_col_name: str = 'entity_id', timestamp_col_name: str = 'pred_timestamp', prediction_time_uuid_col_name: str = 'prediction_time_uuid', coerce_to_lazy: InitVar[bool] = True)[source]#
+class PredictionTimeFrame(init_df: dataclasses.InitVar[polars.lazyframe.frame.LazyFrame | polars.dataframe.frame.DataFrame | pandas.core.frame.DataFrame], entity_id_col_name: str = 'entity_id', timestamp_col_name: str = 'pred_timestamp', prediction_time_uuid_col_name: str = 'prediction_time_uuid', coerce_to_lazy: dataclasses.InitVar[bool] = True)[source]#

Bases: object

Specification for prediction times, i.e. the times for which predictions are made.

@@ -304,7 +304,7 @@

Feature specifications
-coerce_to_lazy: InitVar[bool] = True#
+coerce_to_lazy: dataclasses.InitVar[bool] = True#
@@ -319,7 +319,7 @@

Feature specifications
-init_df: InitVar[InitDF_T]#
+init_df: dataclasses.InitVar[polars.lazyframe.frame.LazyFrame | polars.dataframe.frame.DataFrame | pandas.core.frame.DataFrame]#

@@ -341,7 +341,7 @@

Feature specifications
-class LookPeriod(first: 'LookDistance', last: 'LookDistance')[source]#
+class LookPeriod(first: 'dt.timedelta', last: 'dt.timedelta')[source]#

Bases: object

@@ -357,7 +357,7 @@

Feature specifications
-class ValueFrame(init_df: dataclasses.InitVar[Union[polars.lazyframe.frame.LazyFrame, polars.dataframe.frame.DataFrame, pandas.core.frame.DataFrame]], entity_id_col_name: str = 'entity_id', value_timestamp_col_name: str = 'timestamp', coerce_to_lazy: dataclasses.InitVar[bool] = True)[source]#
+class ValueFrame(init_df: dataclasses.InitVar[polars.lazyframe.frame.LazyFrame | polars.dataframe.frame.DataFrame | pandas.core.frame.DataFrame], entity_id_col_name: str = 'entity_id', value_timestamp_col_name: str = 'timestamp', coerce_to_lazy: dataclasses.InitVar[bool] = True)[source]#

Bases: object

A frame that contains the values of a time series.

@@ -383,7 +383,7 @@

Feature specifications
-init_df: dataclasses.InitVar[Union[polars.lazyframe.frame.LazyFrame, polars.dataframe.frame.DataFrame, pandas.core.frame.DataFrame]]#
+init_df: dataclasses.InitVar[polars.lazyframe.frame.LazyFrame | polars.dataframe.frame.DataFrame | pandas.core.frame.DataFrame]#

@@ -395,7 +395,7 @@

Feature specifications
-class BooleanOutcomeSpec(init_frame: InitVar[TimestampValueFrame], lookahead_distances: LookDistances, aggregators: Sequence[Aggregator], output_name: str, column_prefix: str = 'outc')[source]#
+class BooleanOutcomeSpec(init_frame: InitVar[TimestampValueFrame], lookahead_distances: Sequence[dt.timedelta | tuple[dt.timedelta, dt.timedelta]], aggregators: Sequence[Aggregator], output_name: str, column_prefix: str = 'outc')[source]#

Bases: object

Specification for a boolean outcome, e.g. whether a patient received a treatment or not.

@@ -425,7 +425,7 @@

Feature specifications
-lookahead_distances: LookDistances#
+lookahead_distances: Sequence[dt.timedelta | tuple[dt.timedelta, dt.timedelta]]#

@@ -437,7 +437,7 @@

Feature specifications
-class OutcomeSpec(value_frame: ValueFrame, lookahead_distances: InitVar[LookDistances], aggregators: Sequence[Aggregator], fallback: ValueType, column_prefix: str = 'outc')[source]#
+class OutcomeSpec(value_frame: ValueFrame, lookahead_distances: InitVar[Sequence[dt.timedelta | tuple[dt.timedelta, dt.timedelta]]], aggregators: Sequence[Aggregator], fallback: int | float | str | None, column_prefix: str = 'outc')[source]#

Bases: object

Specification for an outcome. If your outcome is binary/boolean, you can use BooleanOutcomeSpec instead.

@@ -457,12 +457,12 @@

Feature specifications
-fallback: ValueType#
+fallback: int | float | str | None#

-lookahead_distances: InitVar[LookDistances]#
+lookahead_distances: InitVar[Sequence[dt.timedelta | tuple[dt.timedelta, dt.timedelta]]]#
@@ -474,7 +474,7 @@

Feature specifications
-class StaticFrame(init_df: 'InitVar[InitDF_T]', entity_id_col_name: 'str' = 'entity_id')[source]#
+class StaticFrame(init_df: 'InitVar[pl.LazyFrame | pl.DataFrame | pd.DataFrame]', entity_id_col_name: 'str' = 'entity_id')[source]#

Bases: object

@@ -488,14 +488,14 @@

Feature specifications
-init_df: InitVar[InitDF_T]#
+init_df: dataclasses.InitVar[polars.lazyframe.frame.LazyFrame | polars.dataframe.frame.DataFrame | pandas.core.frame.DataFrame]#

-class StaticSpec(value_frame: StaticFrame, column_prefix: str, fallback: ValueType)[source]#
+class StaticSpec(value_frame: StaticFrame, column_prefix: str, fallback: int | float | str | None)[source]#

Bases: object

Specification for a static feature, e.g. the sex of a person.

@@ -510,7 +510,7 @@

Feature specifications
-fallback: ValueType#
+fallback: int | float | str | None#

@@ -522,7 +522,7 @@

Feature specifications
-class TimeDeltaSpec(init_frame: 'TimestampValueFrame', fallback: 'ValueType', output_name: 'str', column_prefix: 'str' = 'pred', time_format: "Literal['seconds', 'minutes', 'hours', 'days', 'years']" = 'days')[source]#
+class TimeDeltaSpec(init_frame: 'TimestampValueFrame', fallback: 'int | float | str | None', output_name: 'str', column_prefix: 'str' = 'pred', time_format: "Literal['seconds', 'minutes', 'hours', 'days', 'years']" = 'days')[source]#

Bases: object

@@ -536,7 +536,7 @@

Feature specifications
-fallback: ValueType#
+fallback: int | float | str | None#

@@ -567,7 +567,7 @@

Feature specifications
-class TimestampValueFrame(init_df: InitVar[InitDF_T], entity_id_col_name: str = 'entity_id', value_timestamp_col_name: str = 'timestamp')[source]#
+class TimestampValueFrame(init_df: dataclasses.InitVar[polars.lazyframe.frame.LazyFrame | polars.dataframe.frame.DataFrame | pandas.core.frame.DataFrame], entity_id_col_name: str = 'entity_id', value_timestamp_col_name: str = 'timestamp')[source]#

Bases: object

Timestamps, useful for computing e.g. age.

@@ -587,7 +587,7 @@

Feature specifications
-init_df: InitVar[InitDF_T]#
+init_df: dataclasses.InitVar[polars.lazyframe.frame.LazyFrame | polars.dataframe.frame.DataFrame | pandas.core.frame.DataFrame]#

diff --git a/searchindex.js b/searchindex.js index 059ec243..f5b2fb19 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["aggregators", "faq", "feature_specifications", "flattener", "index", "installation", "tutorials", "tutorials/01_basic", "tutorials/02_advanced", "tutorials/03_text", "tutorials/04_from_legacy"], "filenames": ["aggregators.rst", "faq.rst", "feature_specifications.rst", "flattener.rst", "index.rst", "installation.rst", "tutorials.rst", "tutorials/01_basic.ipynb", "tutorials/02_advanced.ipynb", "tutorials/03_text.ipynb", "tutorials/04_from_legacy.ipynb"], "titles": ["Aggregators", "Frequently Asked Questions", "Feature specifications", "Timeseriesflattener", "timeseriesflattener", "Installation", "Tutorials", "Introductory Tutorial", "Doing multiple things at once and time deltas", "Adding text features", "Creating features from legacy feature specifications"], "terms": {"class": [0, 2, 3, 7], "sourc": [0, 1, 2, 3], "base": [0, 2, 3, 8], "abc": 0, "name": [0, 2, 7, 8, 10], "str": [0, 2, 3, 9], "new_col_nam": 0, "previous_col_nam": 0, "countaggreg": 0, "return": [0, 8, 9], "count": [0, 7, 10], "non": 0, "null": 0, "valu": [0, 2, 4, 6, 7, 9, 10], "look": [0, 4, 7], "window": [0, 4, 7], "earliestaggreg": 0, "timestamp_col_nam": [0, 2, 7, 8, 9], "earliest": [0, 10], "hasvaluesaggreg": 0, "examin": 0, "whether": [0, 2], "ani": [0, 4, 7, 9], "exist": [0, 4], "column": [0, 2, 4, 7, 8, 9], "If": [0, 1, 2, 3, 7, 10], "so": [0, 8, 9, 10], "1": [0, 7, 8, 9, 10], "els": 0, "0": [0, 7, 8, 9, 10], "bool": [0, 2, 3], "latestaggreg": [0, 8], "latest": [0, 5, 10], "maxaggreg": [0, 7], "maximum": [0, 10], "max": [0, 4, 7], "meanaggreg": [0, 7, 8, 9], "mean": [0, 4, 7, 9, 10], "minaggreg": [0, 8], "minimum": [0, 10], "min": [0, 4, 7], "slopeaggreg": [0, 8], "slope": 0, "i": [0, 2, 4, 5, 7, 8, 9, 10], "e": [0, 1, 2, 4, 7], "correl": 0, "between": [0, 2, 5, 8], "timestamp": [0, 2, 7, 8, 9, 10], "sumaggreg": 0, "sum": [0, 10], "all": [0, 1, 7, 8, 9, 10], "varianceaggreg": 0, "varianc": [0, 10], "var": 0, "you": [1, 2, 3, 6, 7, 8, 9, 10], "wish": [1, 8], "us": [1, 2, 3, 4, 5, 6, 7, 8, 9], "librari": 1, "your": [1, 2, 5, 7], "research": 1, "pleas": [1, 4], "joss": 1, "paper": 1, "articl": 1, "bernstorff2023timeseriesflatten": 1, "titl": 1, "timeseriesflatten": [1, 5, 7, 8, 9, 10], "A": [1, 2, 3, 4, 7], "python": [1, 4], "summar": 1, "featur": [1, 3, 4, 6, 7], "from": [1, 4, 6, 7], "medic": [1, 4, 7], "time": [1, 2, 3, 4, 6], "seri": [1, 2, 3, 4, 7, 8], "author": 1, "bernstorff": 1, "martin": 1, "enevoldsen": 1, "kenneth": 1, "damgaard": 1, "jakob": 1, "danielsen": 1, "andrea": 1, "hansen": 1, "lass": 1, "journal": 1, "open": 1, "softwar": 1, "volum": 1, "8": 1, "number": [1, 3, 4, 7, 9], "83": 1, "page": [1, 4], "5197": 1, "year": [1, 2, 8], "2023": 1, "Or": 1, "prefer": 1, "apa": 1, "m": 1, "k": 1, "j": 1, "l": 1, "come": 1, "an": [1, 2, 4, 7, 8, 9], "extens": 1, "In": [1, 7, 8], "order": [1, 6], "ll": [1, 7], "usual": 1, "want": [1, 7, 8, 9, 10], "clone": 1, "repositori": 1, "build": 1, "also": [1, 6, 7], "instal": 1, "requir": [1, 4, 7, 8], "develop": 1, "depend": 1, "util": 1, "defin": [1, 8, 9, 10], "pyproject": 1, "toml": 1, "pip": [1, 5], "dev": 1, "pytest": 1, "which": [1, 2, 4, 6, 7, 8], "folder": 1, "specif": [1, 3, 6, 9], "can": [1, 2, 4, 6, 7, 8, 9], "desired_test": 1, "py": 1, "sphinx": 1, "It": [1, 7, 10], "furo": 1, "theme": 1, "custom": 1, "style": 1, "To": [1, 4, 5, 6, 7, 8, 9], "make": [1, 2, 4, 7, 8, 9], "doc": [1, 7], "text": [1, 6], "html": 1, "c": [1, 4, 9], "predictorspec": [2, 7, 8, 9, 10], "value_fram": [2, 7, 8], "valuefram": [2, 7, 8, 9], "lookbehind_dist": [2, 7, 8, 9], "initvar": 2, "lookdist": [2, 8], "aggreg": [2, 3, 4, 6, 7, 9, 10], "sequenc": [2, 3], "fallback": [2, 7, 8, 9, 10], "valuetyp": 2, "column_prefix": [2, 7, 9], "pred": [2, 7], "object": [2, 3, 7], "predictor": [2, 4, 6, 8], "The": [2, 3, 4, 6, 7, 8, 10], "must": [2, 7], "contain": [2, 3, 7, 8, 9], "entity_id_col_nam": [2, 7, 8, 9], "entiti": [2, 7, 8], "id": [2, 4, 7, 8, 9], "value_timestamp_col_nam": [2, 7, 8, 9], "each": [2, 4, 6, 7, 8, 9], "addit": 2, "properti": 2, "df": [2, 7, 8, 9, 10], "pl": [2, 8, 9], "lazyfram": 2, "predictiontimefram": [2, 3, 7, 8, 9], "init_df": [2, 7, 8, 9], "initdf_t": 2, "entity_id": [2, 7, 8, 9], "pred_timestamp": 2, "prediction_time_uuid_col_nam": 2, "prediction_time_uuid": [2, 7], "coerce_to_lazi": 2, "true": [2, 3, 4, 10], "predict": [2, 3, 4, 6, 8, 9], "ar": [2, 4, 7, 8, 9, 10], "made": 2, "datafram": [2, 4, 6, 7, 9, 10], "panda": [2, 7, 10], "polar": [2, 3, 8, 9], "when": [2, 4, 7], "collect": [2, 7, 8, 9], "required_column": [2, 3], "lookperiod": [2, 6], "first": [2, 7, 8, 9], "last": [2, 7], "timedelta": [2, 3, 6, 7, 9], "dataclass": 2, "union": 2, "frame": [2, 3], "core": [2, 7], "string": [2, 7], "": [2, 7, 8, 9, 10], "uniqu": [2, 7], "datetim": [2, 7, 8, 9], "booleanoutcomespec": [2, 7], "init_fram": [2, 7, 8], "timestampvaluefram": [2, 7, 8], "lookahead_dist": [2, 7], "output_nam": [2, 7, 8], "outc": [2, 7], "boolean": [2, 10], "outcom": [2, 4, 6], "g": [2, 4], "patient": [2, 4, 7, 9], "receiv": 2, "treatment": 2, "event": 2, "occur": [2, 4, 7], "outcomespec": [2, 7], "binari": [2, 7], "instead": [2, 7], "staticfram": [2, 7], "staticspec": [2, 7, 8], "static": [2, 3, 6], "sex": [2, 7], "person": 2, "timedeltaspec": [2, 8], "time_format": [2, 8], "liter": 2, "second": [2, 8], "minut": [2, 8], "hour": [2, 8], "dai": [2, 7, 8, 9], "delta": [2, 6], "calcul": [2, 8], "ag": [2, 8], "sinc": [2, 7, 10], "certain": [2, 7], "desir": [2, 4], "comput": [2, 3, 9], "predictiontime_fram": [3, 7, 8, 9], "compute_lazili": 3, "fals": 3, "n_worker": [3, 7], "int": [3, 8], "none": [3, 7], "aggregate_timeseri": [3, 7, 8, 9], "spec": [3, 7, 8, 9, 10], "valuespecif": 3, "step_siz": 3, "dt": [3, 7, 8, 9], "aggregatedfram": 3, "perform": 3, "paramet": [3, 7], "creat": [3, 6, 7, 8, 9], "step": [3, 7], "size": [3, 7], "chunk": 3, "reduc": 3, "encount": 3, "memori": 3, "issu": [3, 4, 7], "multipl": [3, 4, 6, 7, 9], "irregular": [3, 4, 7], "set": [3, 4, 7, 8], "done": 3, "lazili": 3, "worker": 3, "multiprocess": 3, "handl": [3, 7, 8], "entir": [3, 7], "otherwis": [3, 4], "specifi": [3, 4, 6, 8], "joblib": 3, "except": [3, 7], "missingcolumnnameerror": 3, "descript": 3, "specerror": 3, "specrequirementpair": 3, "missing_column": 3, "iter": 3, "packag": [4, 6], "gener": [4, 6, 7, 10], "data": [4, 6, 8, 9], "machin": 4, "learn": 4, "model": [4, 7, 9], "implement": 4, "method": 4, "includ": [4, 9], "convert": 4, "singl": [4, 7, 8, 9], "row": [4, 7, 9], "construct": 4, "raw": 4, "allow": 4, "independ": 4, "particular": 4, "sever": [4, 9], "choic": 4, "one": [4, 7], "need": [4, 7, 9], "everi": [4, 7], "physic": 4, "visit": 4, "morn": 4, "anoth": [4, 7], "clinic": [4, 9], "meaning": 4, "how": [4, 6, 9], "far": [4, 7, 8, 9, 10], "back": 4, "ahead": [4, 7], "lookbehind": [4, 7, 8, 9], "lookahead": [4, 7], "point": [4, 7], "abov": [4, 8, 10], "figur": 4, "graphic": 4, "repres": [4, 7], "terminologi": [4, 7], "determin": [4, 7, 8], "wherea": 4, "futur": [4, 7], "refer": [4, 7], "b": 4, "label": [4, 7], "neg": 4, "never": [4, 7], "happen": [4, 7], "outsid": [4, 7], "onli": [4, 7, 9], "posit": [4, 7], "insid": [4, 7], "within": [4, 7, 9], "exampl": [4, 7, 8, 9], "shown": [4, 7], "thi": [4, 7, 8, 9, 10], "etc": [4, 7], "obtain": 4, "rich": 4, "represent": 4, "see": [4, 7, 8], "tutori": [4, 8, 9], "case": [4, 7], "report": 4, "request": 4, "github": [4, 5], "tracker": 4, "discuss": 4, "forum": 4, "type": [4, 7], "bug": 4, "idea": 4, "usag": 4, "index": 4, "get": [5, 8, 10], "start": [5, 8, 9], "run": [5, 6], "follow": [5, 7], "line": 5, "termin": 5, "There": 5, "discrep": 5, "version": [5, 10], "we": [6, 7, 8, 9, 10], "recommend": 6, "go": [6, 7], "through": 6, "list": [6, 7, 8, 9], "below": [6, 10], "jupyt": 6, "notebook": 6, "download": 6, "local": 6, "introductori": 6, "load": [6, 8, 9], "tempor": [6, 8], "flatten": [6, 8, 9], "do": [6, 7, 10], "thing": 6, "onc": [6, 7], "function": [6, 7, 9], "same": [6, 7], "ad": [6, 7], "dataset": [6, 7, 8], "embed": 6, "legaci": 6, "timeseri": 7, "especi": 7, "help": 7, "have": [7, 8, 9, 10], "complic": 7, "train": 7, "simpl": [7, 8, 10], "explain": 7, "appli": [7, 8], "consist": 7, "3": [7, 8, 9, 10], "simplest": 7, "predictin": 7, "two": [7, 8], "element": 7, "about": 7, "context": 7, "frequent": 7, "__future__": [7, 8, 9, 10], "import": [7, 8, 9, 10], "annot": [7, 8, 9, 10], "skimpi": 7, "skim": 7, "test": [7, 8, 9, 10], "load_synth_data": [7, 8, 9], "load_synth_prediction_tim": [7, 8, 9], "df_prediction_tim": [7, 8], "sort": [7, 8], "summari": 7, "10000": 7, "int64": 7, "2": [7, 8, 9, 10], "datetime64": 7, "column_nam": 7, "na": [7, 9], "sd": 7, "p0": 7, "p25": 7, "p50": 7, "p75": 7, "p100": 7, "hist": 7, "4959": 7, "2886": 7, "2485": 7, "4922": 7, "7443": 7, "9999": 7, "frequenc": 7, "1965": [7, 8, 9], "01": [7, 8, 9, 10], "02": [7, 8, 9], "09": [7, 8, 9], "35": [7, 8], "00": [7, 8, 9], "1969": 7, "12": [7, 8, 9], "31": [7, 8, 9], "21": [7, 8, 9], "42": 7, "end": 7, "shape": [7, 8, 9], "10_000": 7, "entity_idtimestampi64datetim": 7, "\u03bc": [7, 8, 9], "01969": 7, "11": [7, 8, 9], "55": [7, 8], "0011965": 7, "03": [7, 8], "15": [7, 8, 9], "07": [7, 8, 9], "16": [7, 8, 9], "0021969": 7, "13": [7, 9], "23": [7, 9], "18": [7, 8], "0031968": 7, "04": [7, 8, 9], "0041965": 7, "28": 7, "33": [7, 8], "hellip": 7, "99961965": 7, "30": [7, 8], "17": [7, 8, 9], "19": [7, 8, 9], "0099961965": 7, "0099971967": 7, "06": [7, 8, 9], "08": [7, 8], "52": 7, "0099991965": 7, "14": [7, 8, 9], "59": 7, "0099991968": 7, "22": [7, 8, 9], "24": [7, 8], "here": 7, "note": [7, 8, 9], "Then": 7, "our": [7, 8, 9], "differ": [7, 8], "timepoint": 7, "load_synth_predictor_float": [7, 8], "df_synth_predictor": [7, 8], "100000": 7, "float64": 7, "4994": 7, "2887": 7, "2486": 7, "4996": 7, "7487": 7, "4": [7, 8, 10], "983": 7, "885": 7, "0001514": 7, "483": 7, "975": 7, "7": [7, 9], "486": 7, "10": [7, 8, 9], "37": 7, "100_000": 7, "entity_idtimestampvaluei64datetim": [7, 8, 9], "f6401967": 7, "000": [7, 8, 9], "17479301968": 7, "45": 7, "003": [7, 8], "07229301968": 7, "05": [7, 8, 9], "001": 7, "31575401969": 7, "20": [7, 8, 9], "002": [7, 8], "81248101967": 7, "26": 7, "981185": 7, "99991968": 7, "67190799991966": 7, "34": 7, "004": [7, 8], "15879699991966": 7, "27": [7, 8], "41445599991968": 7, "58": 7, "55249199991969": 7, "501553": 7, "again": 7, "more": [7, 8], "describ": [7, 9], "could": 7, "doesn": 7, "t": 7, "chang": [7, 10], "over": 7, "let": [7, 8, 9], "load_synth_sex": 7, "df_synth_sex": 7, "4999": 7, "2500": 7, "7500": 7, "femal": 7, "4984": 7, "5": [7, 8, 9], "9_999": 7, "entity_idfemalei64i640011213140": 7, "9995099960999719998199990": 7, "As": [7, 9], "should": [7, 8, 9, 10], "And": [7, 8, 9, 10], "lastli": 7, "ve": [7, 8], "chosen": 7, "store": 7, "experi": 7, "infer": 7, "thei": [7, 8], "section": 7, "load_synth_outcom": 7, "df_synth_outcom": 7, "3103": 7, "5032": 7, "2900": 7, "2499": 7, "5109": 7, "7555": 7, "9992": 7, "50": 7, "3_103": 7, "i648041967": 7, "51": [7, 8], "00164881965": 7, "0015301966": 7, "00198461969": 7, "00118581968": 7, "45221969": 7, "32": 7, "00193451965": 7, "47": 7, "00146531968": 7, "00171781966": 7, "00114141967": 7, "most": [7, 10], "per": [7, 8], "now": [7, 8, 9], "recip": 7, "finish": 7, "firstli": 7, "main": 7, "decis": 7, "given": 7, "indic": 7, "code": 7, "pd": [7, 10], "test_df": 7, "2020": 7, "outcome_spec": 7, "365": [7, 8, 9], "present": 7, "argument": 7, "default": 7, "found": 7, "values_df": 7, "For": [7, 9], "hardcod": 7, "decid": 7, "least": 7, "correspond": 7, "both": 7, "accomplish": 7, "specifii": 7, "forward": 7, "search": 7, "period": [7, 9], "befor": [7, 9], "interv": [7, 8], "min_dai": 7, "max_dai": 7, "tupl": [7, 8], "its": 7, "almost": 7, "ident": 7, "past": 7, "numpi": [7, 8, 9], "np": [7, 8, 9], "temporal_predictor_spec": 7, "renam": 7, "value_1": 7, "nan": [7, 8, 9], "730": [7, 8, 9], "output": [7, 8, 9, 10], "after": 7, "avoid": 7, "input": 7, "rang": 7, "similar": [7, 8], "instanc": 7, "might": [7, 8], "where": [7, 8], "182": 7, "easili": 7, "pass": [7, 8, 9], "lookbehind_dai": [7, 10], "temporal_interval_predictor_spec": 7, "value_2": 7, "slightli": [7, 8], "previou": 7, "provid": 7, "howev": 7, "add": [7, 8], "prefix": 7, "By": 7, "filter": [7, 8, 9], "easi": 7, "manual": 7, "sex_predictor_spec": 7, "don": 7, "re": [7, 9], "readi": [7, 9, 10], "instanti": 7, "along": 7, "metadata": [7, 9], "call": 7, "add_": 7, "parallel": 7, "oper": 7, "across": [7, 8], "process": [7, 8, 9], "pred_female_fallback_": 7, "4931": 7, "pred_value_1_within_0": 7, "1072": 7, "72": 7, "842": 7, "01491": 7, "851": 7, "023": 7, "6": [7, 8], "178": 7, "9": [7, 8], "946": 7, "_to_730_days_mean_f": 7, "lback_nan": 7, "pred_value_2_within_1": 7, "2060": 7, "008": 7, "222": 7, "0003901": 7, "014": 7, "56": 7, "997": 7, "0_to_365_days_mean_fa": 7, "llback_nan": 7, "outc_outcome_within_0": 7, "_to_365_days_max_fal": 7, "back_0": 7, "word": 7, "total": 7, "20000": 7, "pred_female_fallback_nan": 7, "pred_value_1_within_0_to_730_days_mean_fallback_nan": 7, "pred_value_2_within_10_to_365_days_mean_fallback_nan": 7, "outc_outcome_within_0_to_365_days_max_fallback_0": 7, "display": 7, "shorten": 7, "col": [7, 8], "shortened_pr": 7, "predx": 7, "shortened_predinterv": 7, "predx_30_to_90": 7, "shortened_outcom": 7, "outc_i": 7, "display_df": 7, "entity_idtimestampprediction_time_uuidpred_female_fallback_nanpredxpredx_30_to_90outc_yi64datetim": 7, "stri64f64f64i3298521965": 7, "quot": [7, 8, 9], "9852": [7, 8], "00000": [7, 8, 9], "1nannan014671965": 7, "1467": [7, 8], "0nannan011251965": 7, "1125": [7, 8], "0nannan06491965": 7, "649": [7, 8], "000000": [7, 8], "0nannan020701965": 7, "2070": [7, 8], "1nannan0": 7, "3341969": 7, "334": 7, "252526nan033631969": 7, "3363": 7, "6796672": 7, "409664079291969": 7, "7929": 7, "9435857": 7, "475979060021969": 7, "6002": 7, "5935837": 7, "07659808641969": 7, "864": 7, "520416nan0": 7, "classif": 7, "citizen": 7, "identifi": 7, "prediciton": 7, "pred_": 7, "outc_": 7, "realiti": 8, "d": 8, "like": [8, 9, 10], "addition": 8, "some": [8, 9], "common": 8, "current": 8, "than": 8, "what": 8, "cover": [8, 9], "luckili": 8, "extrem": 8, "combin": 8, "just": [8, 9], "suppli": 8, "off": 8, "interfac": 8, "featuer": 8, "work": [8, 10], "exactli": 8, "wai": 8, "illustr": 8, "head": [8, 9], "f6494761969": 8, "81699546311967": 8, "48": 8, "81807438901969": 8, "50378910981965": 8, "53": 8, "51504116261966": 8, "353115": 8, "helper": 8, "def": [8, 9], "make_timedelta_interv": 8, "start_dai": 8, "end_dai": 8, "predictor_spec": 8, "break": 8, "down": 8, "distanc": 8, "therefor": 8, "expect": 8, "n_aggreg": 8, "n_lookbehind_dist": 8, "entity_idtimestampprediction_time_uuidpred_value_within_0_to_30_days_mean_fallback_nanpred_value_within_0_to_30_days_latest_fallback_nanpred_value_within_30_to_365_days_mean_fallback_nanpred_value_within_30_to_365_days_latest_fallback_nanpred_value_within_365_to_730_days_mean_fallback_nanpred_value_within_365_to_730_days_latest_fallback_nani64datetim": 8, "strf64f64f64f64f64f6498521965": 8, "nannannannannannan14671965": 8, "nannannannannannan11251965": 8, "nannannannannannan6491965": 8, "nannannannannannan20701965": 8, "nannannannannannan": 8, "sometim": 8, "measur": 8, "manner": 8, "simpli": [8, 9], "new": 8, "simul": 8, "with_column": 8, "new_predictor": 8, "random": 8, "rand": 8, "entity_idtimestampvaluenew_predictori64datetim": 8, "f64f6494761969": 8, "8169950": 8, "14395546311967": 8, "8180740": 8, "81765938901969": 8, "5037890": 8, "96800310981965": 8, "5150410": 8, "47938416261966": 8, "3531150": 8, "485022": 8, "try": 8, "allgreg": 8, "n_predictor": 8, "entity_idtimestampprediction_time_uuidpred_value_within_0_to_30_days_min_fallback_nanpred_new_predictor_within_0_to_30_days_min_fallback_nanpred_value_within_0_to_30_days_slope_fallback_nanpred_new_predictor_within_0_to_30_days_slope_fallback_nanpred_value_within_30_to_365_days_min_fallback_nanpred_new_predictor_within_30_to_365_days_min_fallback_nanpred_value_within_30_to_365_days_slope_fallback_nanpred_new_predictor_within_30_to_365_days_slope_fallback_nanpred_value_within_365_to_730_days_min_fallback_nanpred_new_predictor_within_365_to_730_days_min_fallback_nanpred_value_within_365_to_730_days_slope_fallback_nanpred_new_predictor_within_365_to_730_days_slope_fallback_nani64datetim": 8, "strf64f64f64f64f64f64f64f64f64f64f64f6498521965": 8, "nannannannannannannannannannannannan14671965": 8, "nannannannannannannannannannannannan11251965": 8, "nannannannannannannannannannannannan6491965": 8, "nannannannannannannannannannannannan20701965": 8, "nannannannannannannannannannannannan": 8, "commonli": 8, "seen": 8, "u": 8, "birthdat": 8, "date": 8, "birth": 8, "load_synth_birthdai": 8, "df_birthdai": 8, "entity_idbirthdayi64datetim": 8, "90451932": 8, "0055321920": 8, "41": [8, 9], "0022421917": 8, "007891930": 8, "0097151926": 8, "age_spec": 8, "birthdai": 8, "without": 8, "entri": 8, "format": [8, 9], "take": [8, 9], "entity_idtimestampprediction_time_uuidpred_age_years_fallback_nani64datetim": 8, "strf6498521965": 8, "90965114671965": 8, "929511251965": 8, "5701576491965": 8, "09377120701965": 8, "49": 8, "886379": 8, "sure": 8, "9903": 8, "strf6499031965": 8, "36": 8, "67077399031968": 8, "1968": 8, "39": 8, "154004": 8, "dealt": 9, "tabular": 9, "show": 9, "out": 9, "alreadi": 9, "synthet": 9, "other": 9, "load_synth_text": 9, "synth_text": 9, "str46471967": 9, "went": 9, "medica": 9, "20071966": 9, "25": 9, "taken": 9, "em": 9, "57991967": 9, "13191969": 9, "had": [9, 10], "been": 9, "left": 9, "42341966": 9, "often": 9, "while": 9, "advantag": 9, "emb": 9, "speed": 9, "up": 9, "block": 9, "tf": 9, "idf": 9, "form": 9, "constraint": 9, "result": 9, "entity_id_col": 9, "timestamp_col": 9, "purpos": 9, "demonstr": 9, "fit": 9, "small": 9, "captur": 9, "sklearn": 9, "feature_extract": 9, "tfidfvector": 9, "embed_text_to_df": 9, "tfidf_model": 9, "max_featur": 9, "fit_transform": 9, "toarrai": 9, "schema": 9, "get_feature_names_out": 9, "tolist": 9, "embedded_text": 9, "to_list": 9, "drop": 9, "origin": 9, "metadata_onli": 9, "concaten": 9, "embedded_text_with_metadata": 9, "concat": 9, "horizont": 9, "entity_idtimestampandforinoforpatientthatthetowasi64datetim": 9, "f64f64f64f64f64f64f64f64f64f6446471967": 9, "1758720": 9, "1820660": 9, "2498480": 9, "158430": 9, "0230420": 9, "3113890": 9, "5299660": 9, "4902030": 9, "47931220071966": 9, "244870": 9, "1352820": 9, "0643370": 9, "4650840": 9, "3368590": 9, "1517430": 9, "7298610": 9, "1791610": 9, "057991967": 9, "1923670": 9, "2323320": 9, "2834020": 9, "3369520": 9, "1764220": 9, "2384160": 9, "6468790": 9, "2502170": 9, "38227713191969": 9, "1656350": 9, "2000460": 9, "1830150": 9, "2611150": 9, "1258370": 9, "1519060": 9, "2052850": 9, "7595280": 9, "4039610": 9, "09874742341966": 9, "4934610": 9, "1191960": 9, "2726190": 9, "2074440": 9, "0452560": 9, "1834750": 9, "5883240": 9, "4332530": 9, "235349": 9, "text_spec": 9, "pred_tfidf": 9, "would": 9, "normal": 9, "wa": 9, "check": 9, "selector": 9, "float": 9, "sake": 9, "all_horizont": 9, "is_not_nan": 9, "entity_idtimestampprediction_time_uuidpred_tfidf_and_within_0_to_365_days_mean_fallback_nanpred_tfidf_for_within_0_to_365_days_mean_fallback_nanpred_tfidf_in_within_0_to_365_days_mean_fallback_nanpred_tfidf_of_within_0_to_365_days_mean_fallback_nanpred_tfidf_or_within_0_to_365_days_mean_fallback_nanpred_tfidf_patient_within_0_to_365_days_mean_fallback_nanpred_tfidf_that_within_0_to_365_days_mean_fallback_nanpred_tfidf_the_within_0_to_365_days_mean_fallback_nanpred_tfidf_to_within_0_to_365_days_mean_fallback_nanpred_tfidf_was_within_0_to_365_days_mean_fallback_nanpred_tfidf_and_within_0_to_730_days_mean_fallback_nanpred_tfidf_for_within_0_to_730_days_mean_fallback_nanpred_tfidf_in_within_0_to_730_days_mean_fallback_nanpred_tfidf_of_within_0_to_730_days_mean_fallback_nanpred_tfidf_or_within_0_to_730_days_mean_fallback_nanpred_tfidf_patient_within_0_to_730_days_mean_fallback_nanpred_tfidf_that_within_0_to_730_days_mean_fallback_nanpred_tfidf_the_within_0_to_730_days_mean_fallback_nanpred_tfidf_to_within_0_to_730_days_mean_fallback_nanpred_tfidf_was_within_0_to_730_days_mean_fallback_nani64datetim": 9, "strf64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f6468401965": 9, "6840": 9, "1558210": 9, "3763860": 9, "2582560": 9, "5731680": 9, "3551420": 9, "0714520": 9, "0965610": 9, "285810": 9, "456030": 9, "0928960": 9, "09289620391966": 9, "2039": 9, "1966": 9, "1080150": 9, "5967440": 9, "113520": 9, "0990620": 9, "1338720": 9, "6934310": 9, "2107470": 9, "2575810": 9, "25758194961966": 9, "44": 9, "9496": 9, "2799550": 9, "309330": 9, "2942220": 9, "2567490": 9, "5134980": 9, "5462160": 9, "33380": 9, "333872811967": 9, "7281": 9, "1967": 9, "2896630": 9, "043730": 9, "2800490": 9, "3044250": 9, "3851110": 9, "3320650": 9, "2692510": 9, "4648910": 9, "2119340": 9, "3885470": 9, "38854774241967": 9, "7424": 9, "1539070": 9, "0929410": 9, "1700560": 9, "1078340": 9, "3897560": 9, "2822990": 9, "0635830": 9, "6822220": 9, "4754520": 9, "user": 10, "rewrit": 10, "written": 10, "api": 10, "predictorgroupspec": 10, "those": 10, "were": 10, "ones": 10, "sai": 10, "v1": 10, "aggregation_fn": 10, "change_per_dai": 10, "feature_spec": 10, "group_spec": 10, "nameddatafram": 10, "single_spec": 10, "version1predictorspec": 10, "legacy_spec": 10, "named_datafram": 10, "2013": 10, "dw_ek_borg": 10, "test2": 10, "create_combin": 10, "print": 10, "f": 10, "isinst": 10, "compat": 10, "replac": 10, "version2predictorspec": 10, "noqa": 10, "era001": 10, "from_legaci": 10, "new_spec": 10}, "objects": {"timeseriesflattener": [[0, 0, 0, "-", "aggregators"], [3, 0, 0, "-", "flattener"]], "timeseriesflattener.aggregators": [[0, 1, 1, "", "Aggregator"], [0, 1, 1, "", "CountAggregator"], [0, 1, 1, "", "EarliestAggregator"], [0, 1, 1, "", "HasValuesAggregator"], [0, 1, 1, "", "LatestAggregator"], [0, 1, 1, "", "MaxAggregator"], [0, 1, 1, "", "MeanAggregator"], [0, 1, 1, "", "MinAggregator"], [0, 1, 1, "", "SlopeAggregator"], [0, 1, 1, "", "SumAggregator"], [0, 1, 1, "", "VarianceAggregator"]], "timeseriesflattener.aggregators.Aggregator": [[0, 2, 1, "", "name"], [0, 3, 1, "", "new_col_name"]], "timeseriesflattener.aggregators.CountAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.EarliestAggregator": [[0, 2, 1, "", "name"], [0, 2, 1, "", "timestamp_col_name"]], "timeseriesflattener.aggregators.HasValuesAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.LatestAggregator": [[0, 2, 1, "", "name"], [0, 2, 1, "", "timestamp_col_name"]], "timeseriesflattener.aggregators.MaxAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.MeanAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.MinAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.SlopeAggregator": [[0, 2, 1, "", "name"], [0, 2, 1, "", "timestamp_col_name"]], "timeseriesflattener.aggregators.SumAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.VarianceAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.feature_specs": [[2, 0, 0, "-", "meta"], [2, 0, 0, "-", "outcome"], [2, 0, 0, "-", "prediction_times"], [2, 0, 0, "-", "predictor"], [2, 0, 0, "-", "static"], [2, 0, 0, "-", "timedelta"], [2, 0, 0, "-", "timestamp_frame"]], "timeseriesflattener.feature_specs.meta": [[2, 1, 1, "", "LookPeriod"], [2, 1, 1, "", "ValueFrame"]], "timeseriesflattener.feature_specs.meta.LookPeriod": [[2, 2, 1, "", "first"], [2, 2, 1, "", "last"]], "timeseriesflattener.feature_specs.meta.ValueFrame": [[2, 2, 1, "", "coerce_to_lazy"], [2, 3, 1, "", "collect"], [2, 2, 1, "", "entity_id_col_name"], [2, 2, 1, "", "init_df"], [2, 2, 1, "", "value_timestamp_col_name"]], "timeseriesflattener.feature_specs.outcome": [[2, 1, 1, "", "BooleanOutcomeSpec"], [2, 1, 1, "", "OutcomeSpec"]], "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec": [[2, 2, 1, "", "aggregators"], [2, 2, 1, "", "column_prefix"], [2, 4, 1, "", "df"], [2, 2, 1, "", "init_frame"], [2, 2, 1, "", "lookahead_distances"], [2, 2, 1, "", "output_name"]], "timeseriesflattener.feature_specs.outcome.OutcomeSpec": [[2, 2, 1, "", "aggregators"], [2, 2, 1, "", "column_prefix"], [2, 4, 1, "", "df"], [2, 2, 1, "", "fallback"], [2, 2, 1, "", "lookahead_distances"], [2, 2, 1, "", "value_frame"]], "timeseriesflattener.feature_specs.prediction_times": [[2, 1, 1, "", "PredictionTimeFrame"]], "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame": [[2, 2, 1, "", "coerce_to_lazy"], [2, 3, 1, "", "collect"], [2, 2, 1, "", "entity_id_col_name"], [2, 2, 1, "", "init_df"], [2, 2, 1, "", "prediction_time_uuid_col_name"], [2, 3, 1, "", "required_columns"], [2, 2, 1, "", "timestamp_col_name"]], "timeseriesflattener.feature_specs.predictor": [[2, 1, 1, "", "PredictorSpec"]], "timeseriesflattener.feature_specs.predictor.PredictorSpec": [[2, 2, 1, "", "aggregators"], [2, 2, 1, "", "column_prefix"], [2, 4, 1, "", "df"], [2, 2, 1, "", "fallback"], [2, 2, 1, "", "lookbehind_distances"], [2, 2, 1, "", "value_frame"]], "timeseriesflattener.feature_specs.static": [[2, 1, 1, "", "StaticFrame"], [2, 1, 1, "", "StaticSpec"]], "timeseriesflattener.feature_specs.static.StaticFrame": [[2, 3, 1, "", "collect"], [2, 2, 1, "", "entity_id_col_name"], [2, 2, 1, "", "init_df"]], "timeseriesflattener.feature_specs.static.StaticSpec": [[2, 2, 1, "", "column_prefix"], [2, 2, 1, "", "fallback"], [2, 2, 1, "", "value_frame"]], "timeseriesflattener.feature_specs.timedelta": [[2, 1, 1, "", "TimeDeltaSpec"]], "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec": [[2, 2, 1, "", "column_prefix"], [2, 4, 1, "", "df"], [2, 2, 1, "", "fallback"], [2, 2, 1, "", "init_frame"], [2, 2, 1, "", "output_name"], [2, 2, 1, "", "time_format"]], "timeseriesflattener.feature_specs.timestamp_frame": [[2, 1, 1, "", "TimestampValueFrame"]], "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame": [[2, 3, 1, "", "collect"], [2, 2, 1, "", "entity_id_col_name"], [2, 2, 1, "", "init_df"], [2, 2, 1, "", "value_timestamp_col_name"]], "timeseriesflattener.flattener": [[3, 1, 1, "", "Flattener"], [3, 5, 1, "", "MissingColumnNameError"], [3, 5, 1, "", "SpecError"], [3, 1, 1, "", "SpecRequirementPair"]], "timeseriesflattener.flattener.Flattener": [[3, 3, 1, "", "aggregate_timeseries"], [3, 2, 1, "", "compute_lazily"], [3, 2, 1, "", "n_workers"], [3, 2, 1, "", "predictiontime_frame"]], "timeseriesflattener.flattener.MissingColumnNameError": [[3, 2, 1, "", "description"]], "timeseriesflattener.flattener.SpecError": [[3, 2, 1, "", "description"]], "timeseriesflattener.flattener.SpecRequirementPair": [[3, 3, 1, "", "missing_columns"], [3, 2, 1, "", "required_columns"], [3, 2, 1, "", "spec"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:property", "5": "py:exception"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "property", "Python property"], "5": ["py", "exception", "Python exception"]}, "titleterms": {"aggreg": [0, 8], "timeseriesflatten": [0, 2, 3, 4], "frequent": 1, "ask": [1, 4], "question": [1, 4], "cite": 1, "thi": 1, "packag": 1, "how": [1, 7], "do": [1, 8], "i": 1, "test": 1, "code": 1, "run": 1, "suit": 1, "document": 1, "gener": [1, 9], "featur": [2, 8, 9, 10], "specif": [2, 7, 10], "feature_spec": 2, "flatten": [3, 7], "function": [4, 8], "where": 4, "indic": 4, "search": 4, "instal": 5, "tutori": [6, 7], "get": 6, "start": 6, "introductori": 7, "load": 7, "data": 7, "predict": 7, "time": [7, 8], "tempor": 7, "predictor": [7, 9], "static": 7, "outcom": 7, "specifi": 7, "multipl": 8, "thing": 8, "onc": 8, "delta": 8, "lookperiod": 8, "valu": 8, "from": [8, 9, 10], "same": 8, "datafram": 8, "timedelta": 8, "ad": 9, "text": 9, "The": 9, "dataset": 9, "embed": 9, "creat": 10, "legaci": 10}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Aggregators": [[0, "aggregators"]], "timeseriesflattener.aggregators": [[0, "module-timeseriesflattener.aggregators"]], "Frequently Asked Questions": [[1, "frequently-asked-questions"]], "Citing this package": [[1, "citing-this-package"]], "How do I test the code and run the test suite?": [[1, "how-do-i-test-the-code-and-run-the-test-suite"]], "How is the documentation generated?": [[1, "how-is-the-documentation-generated"]], "Feature specifications": [[2, "feature-specifications"]], "timeseriesflattener.feature_specs": [[2, "module-timeseriesflattener.feature_specs.predictor"]], "Timeseriesflattener": [[3, "timeseriesflattener"]], "timeseriesflattener.flattener": [[3, "module-timeseriesflattener.flattener"]], "timeseriesflattener": [[4, "timeseriesflattener"]], "Functionality": [[4, "functionality"]], "Where to ask questions?": [[4, "where-to-ask-questions"]], "Indices and search": [[4, "indices-and-search"]], "Installation": [[5, "installation"]], "Tutorials": [[6, "tutorials"]], "Getting started": [[6, null]], "Introductory Tutorial": [[7, "introductory-tutorial"]], "Loading data": [[7, "loading-data"]], "Loading prediction times": [[7, "loading-prediction-times"]], "Loading a temporal predictor": [[7, "loading-a-temporal-predictor"]], "Loading a static predictor": [[7, "loading-a-static-predictor"]], "Loading a temporal outcome": [[7, "loading-a-temporal-outcome"]], "Specifying how to flatten the data": [[7, "specifying-how-to-flatten-the-data"]], "Temporal outcome specification": [[7, "temporal-outcome-specification"]], "Temporal predictor specification": [[7, "temporal-predictor-specification"]], "Static predictor specification": [[7, "static-predictor-specification"]], "Flattening": [[7, "flattening"]], "Doing multiple things at once and time deltas": [[8, "doing-multiple-things-at-once-and-time-deltas"]], "Multiple aggregation functions and lookperiods": [[8, "multiple-aggregation-functions-and-lookperiods"]], "Multiple values from the same dataframe": [[8, "multiple-values-from-the-same-dataframe"]], "TimeDelta features": [[8, "timedelta-features"]], "Adding text features": [[9, "adding-text-features"]], "The dataset": [[9, "the-dataset"]], "Generating predictors from embedded text": [[9, "generating-predictors-from-embedded-text"]], "Creating features from legacy feature specifications": [[10, "creating-features-from-legacy-feature-specifications"]]}, "indexentries": {"aggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.Aggregator"]], "countaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.CountAggregator"]], "earliestaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.EarliestAggregator"]], "hasvaluesaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.HasValuesAggregator"]], "latestaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.LatestAggregator"]], "maxaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.MaxAggregator"]], "meanaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.MeanAggregator"]], "minaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.MinAggregator"]], "slopeaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.SlopeAggregator"]], "sumaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.SumAggregator"]], "varianceaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.VarianceAggregator"]], "module": [[0, "module-timeseriesflattener.aggregators"], [2, "module-timeseriesflattener.feature_specs.meta"], [2, "module-timeseriesflattener.feature_specs.outcome"], [2, "module-timeseriesflattener.feature_specs.prediction_times"], [2, "module-timeseriesflattener.feature_specs.predictor"], [2, "module-timeseriesflattener.feature_specs.static"], [2, "module-timeseriesflattener.feature_specs.timedelta"], [2, "module-timeseriesflattener.feature_specs.timestamp_frame"], [3, "module-timeseriesflattener.flattener"]], "name (aggregator attribute)": [[0, "timeseriesflattener.aggregators.Aggregator.name"]], "name (countaggregator attribute)": [[0, "timeseriesflattener.aggregators.CountAggregator.name"]], "name (earliestaggregator attribute)": [[0, "timeseriesflattener.aggregators.EarliestAggregator.name"]], "name (hasvaluesaggregator attribute)": [[0, "timeseriesflattener.aggregators.HasValuesAggregator.name"]], "name (latestaggregator attribute)": [[0, "timeseriesflattener.aggregators.LatestAggregator.name"]], "name (maxaggregator attribute)": [[0, "timeseriesflattener.aggregators.MaxAggregator.name"]], "name (meanaggregator attribute)": [[0, "timeseriesflattener.aggregators.MeanAggregator.name"]], "name (minaggregator attribute)": [[0, "timeseriesflattener.aggregators.MinAggregator.name"]], "name (slopeaggregator attribute)": [[0, "timeseriesflattener.aggregators.SlopeAggregator.name"]], "name (sumaggregator attribute)": [[0, "timeseriesflattener.aggregators.SumAggregator.name"]], "name (varianceaggregator attribute)": [[0, "timeseriesflattener.aggregators.VarianceAggregator.name"]], "new_col_name() (aggregator method)": [[0, "timeseriesflattener.aggregators.Aggregator.new_col_name"]], "timeseriesflattener.aggregators": [[0, "module-timeseriesflattener.aggregators"]], "timestamp_col_name (earliestaggregator attribute)": [[0, "timeseriesflattener.aggregators.EarliestAggregator.timestamp_col_name"]], "timestamp_col_name (latestaggregator attribute)": [[0, "timeseriesflattener.aggregators.LatestAggregator.timestamp_col_name"]], "timestamp_col_name (slopeaggregator attribute)": [[0, "timeseriesflattener.aggregators.SlopeAggregator.timestamp_col_name"]], "booleanoutcomespec (class in timeseriesflattener.feature_specs.outcome)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec"]], "lookperiod (class in timeseriesflattener.feature_specs.meta)": [[2, "timeseriesflattener.feature_specs.meta.LookPeriod"]], "outcomespec (class in timeseriesflattener.feature_specs.outcome)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec"]], "predictiontimeframe (class in timeseriesflattener.feature_specs.prediction_times)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame"]], "predictorspec (class in timeseriesflattener.feature_specs.predictor)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec"]], "staticframe (class in timeseriesflattener.feature_specs.static)": [[2, "timeseriesflattener.feature_specs.static.StaticFrame"]], "staticspec (class in timeseriesflattener.feature_specs.static)": [[2, "timeseriesflattener.feature_specs.static.StaticSpec"]], "timedeltaspec (class in timeseriesflattener.feature_specs.timedelta)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec"]], "timestampvalueframe (class in timeseriesflattener.feature_specs.timestamp_frame)": [[2, "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame"]], "valueframe (class in timeseriesflattener.feature_specs.meta)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame"]], "aggregators (booleanoutcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.aggregators"]], "aggregators (outcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.aggregators"]], "aggregators (predictorspec attribute)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.aggregators"]], "coerce_to_lazy (predictiontimeframe attribute)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.coerce_to_lazy"]], "coerce_to_lazy (valueframe attribute)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame.coerce_to_lazy"]], "collect() (predictiontimeframe method)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.collect"]], "collect() (staticframe method)": [[2, "timeseriesflattener.feature_specs.static.StaticFrame.collect"]], "collect() (timestampvalueframe method)": [[2, "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame.collect"]], "collect() (valueframe method)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame.collect"]], "column_prefix (booleanoutcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.column_prefix"]], "column_prefix (outcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.column_prefix"]], "column_prefix (predictorspec attribute)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.column_prefix"]], "column_prefix (staticspec attribute)": [[2, "timeseriesflattener.feature_specs.static.StaticSpec.column_prefix"]], "column_prefix (timedeltaspec attribute)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.column_prefix"]], "df (booleanoutcomespec property)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.df"]], "df (outcomespec property)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.df"]], "df (predictorspec property)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.df"]], "df (timedeltaspec property)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.df"]], "entity_id_col_name (predictiontimeframe attribute)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.entity_id_col_name"]], "entity_id_col_name (staticframe attribute)": [[2, "timeseriesflattener.feature_specs.static.StaticFrame.entity_id_col_name"]], "entity_id_col_name (timestampvalueframe attribute)": [[2, "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame.entity_id_col_name"]], "entity_id_col_name (valueframe attribute)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame.entity_id_col_name"]], "fallback (outcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.fallback"]], "fallback (predictorspec attribute)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.fallback"]], "fallback (staticspec attribute)": [[2, "timeseriesflattener.feature_specs.static.StaticSpec.fallback"]], "fallback (timedeltaspec attribute)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.fallback"]], "first (lookperiod attribute)": [[2, "timeseriesflattener.feature_specs.meta.LookPeriod.first"]], "init_df (predictiontimeframe attribute)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.init_df"]], "init_df (staticframe attribute)": [[2, "timeseriesflattener.feature_specs.static.StaticFrame.init_df"]], "init_df (timestampvalueframe attribute)": [[2, "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame.init_df"]], "init_df (valueframe attribute)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame.init_df"]], "init_frame (booleanoutcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.init_frame"]], "init_frame (timedeltaspec attribute)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.init_frame"]], "last (lookperiod attribute)": [[2, "timeseriesflattener.feature_specs.meta.LookPeriod.last"]], "lookahead_distances (booleanoutcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.lookahead_distances"]], "lookahead_distances (outcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.lookahead_distances"]], "lookbehind_distances (predictorspec attribute)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.lookbehind_distances"]], "output_name (booleanoutcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.output_name"]], "output_name (timedeltaspec attribute)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.output_name"]], "prediction_time_uuid_col_name (predictiontimeframe attribute)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.prediction_time_uuid_col_name"]], "required_columns() (predictiontimeframe method)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.required_columns"]], "time_format (timedeltaspec attribute)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.time_format"]], "timeseriesflattener.feature_specs.meta": [[2, "module-timeseriesflattener.feature_specs.meta"]], "timeseriesflattener.feature_specs.outcome": [[2, "module-timeseriesflattener.feature_specs.outcome"]], "timeseriesflattener.feature_specs.prediction_times": [[2, "module-timeseriesflattener.feature_specs.prediction_times"]], "timeseriesflattener.feature_specs.predictor": [[2, "module-timeseriesflattener.feature_specs.predictor"]], "timeseriesflattener.feature_specs.static": [[2, "module-timeseriesflattener.feature_specs.static"]], "timeseriesflattener.feature_specs.timedelta": [[2, "module-timeseriesflattener.feature_specs.timedelta"]], "timeseriesflattener.feature_specs.timestamp_frame": [[2, "module-timeseriesflattener.feature_specs.timestamp_frame"]], "timestamp_col_name (predictiontimeframe attribute)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.timestamp_col_name"]], "value_frame (outcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.value_frame"]], "value_frame (predictorspec attribute)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.value_frame"]], "value_frame (staticspec attribute)": [[2, "timeseriesflattener.feature_specs.static.StaticSpec.value_frame"]], "value_timestamp_col_name (timestampvalueframe attribute)": [[2, "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame.value_timestamp_col_name"]], "value_timestamp_col_name (valueframe attribute)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame.value_timestamp_col_name"]], "flattener (class in timeseriesflattener.flattener)": [[3, "timeseriesflattener.flattener.Flattener"]], "missingcolumnnameerror": [[3, "timeseriesflattener.flattener.MissingColumnNameError"]], "specerror": [[3, "timeseriesflattener.flattener.SpecError"]], "specrequirementpair (class in timeseriesflattener.flattener)": [[3, "timeseriesflattener.flattener.SpecRequirementPair"]], "aggregate_timeseries() (flattener method)": [[3, "timeseriesflattener.flattener.Flattener.aggregate_timeseries"]], "compute_lazily (flattener attribute)": [[3, "timeseriesflattener.flattener.Flattener.compute_lazily"]], "description (missingcolumnnameerror attribute)": [[3, "timeseriesflattener.flattener.MissingColumnNameError.description"]], "description (specerror attribute)": [[3, "timeseriesflattener.flattener.SpecError.description"]], "missing_columns() (specrequirementpair method)": [[3, "timeseriesflattener.flattener.SpecRequirementPair.missing_columns"]], "n_workers (flattener attribute)": [[3, "timeseriesflattener.flattener.Flattener.n_workers"]], "predictiontime_frame (flattener attribute)": [[3, "timeseriesflattener.flattener.Flattener.predictiontime_frame"]], "required_columns (specrequirementpair attribute)": [[3, "timeseriesflattener.flattener.SpecRequirementPair.required_columns"]], "spec (specrequirementpair attribute)": [[3, "timeseriesflattener.flattener.SpecRequirementPair.spec"]], "timeseriesflattener.flattener": [[3, "module-timeseriesflattener.flattener"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["aggregators", "faq", "feature_specifications", "flattener", "index", "installation", "tutorials", "tutorials/01_basic", "tutorials/02_advanced", "tutorials/03_text", "tutorials/04_from_legacy"], "filenames": ["aggregators.rst", "faq.rst", "feature_specifications.rst", "flattener.rst", "index.rst", "installation.rst", "tutorials.rst", "tutorials/01_basic.ipynb", "tutorials/02_advanced.ipynb", "tutorials/03_text.ipynb", "tutorials/04_from_legacy.ipynb"], "titles": ["Aggregators", "Frequently Asked Questions", "Feature specifications", "Timeseriesflattener", "timeseriesflattener", "Installation", "Tutorials", "Introductory Tutorial", "Doing multiple things at once and time deltas", "Adding text features", "Creating features from legacy feature specifications"], "terms": {"class": [0, 2, 3, 7], "sourc": [0, 1, 2, 3], "base": [0, 2, 3, 8], "abc": 0, "name": [0, 2, 7, 8, 10], "str": [0, 2, 3, 9], "new_col_nam": 0, "previous_col_nam": 0, "countaggreg": 0, "return": [0, 8, 9], "count": [0, 7, 10], "non": 0, "null": 0, "valu": [0, 2, 4, 6, 7, 9, 10], "look": [0, 4, 7], "window": [0, 4, 7], "earliestaggreg": 0, "timestamp_col_nam": [0, 2, 7, 8, 9], "earliest": [0, 10], "hasvaluesaggreg": 0, "examin": 0, "whether": [0, 2], "ani": [0, 4, 7, 9], "exist": [0, 4], "column": [0, 2, 4, 7, 8, 9], "If": [0, 1, 2, 3, 7, 10], "so": [0, 8, 9, 10], "1": [0, 7, 8, 9, 10], "els": 0, "0": [0, 7, 8, 9, 10], "bool": [0, 2, 3], "latestaggreg": [0, 8], "latest": [0, 5, 10], "maxaggreg": [0, 7], "maximum": [0, 10], "max": [0, 4, 7], "meanaggreg": [0, 7, 8, 9], "mean": [0, 4, 7, 9, 10], "minaggreg": [0, 8], "minimum": [0, 10], "min": [0, 4, 7], "slopeaggreg": [0, 8], "slope": 0, "i": [0, 2, 4, 5, 7, 8, 9, 10], "e": [0, 1, 2, 4, 7], "correl": 0, "between": [0, 2, 5, 8], "timestamp": [0, 2, 7, 8, 9, 10], "sumaggreg": 0, "sum": [0, 10], "all": [0, 1, 7, 8, 9, 10], "varianceaggreg": 0, "varianc": [0, 10], "var": 0, "you": [1, 2, 3, 6, 7, 8, 9, 10], "wish": [1, 8], "us": [1, 2, 3, 4, 5, 6, 7, 8, 9], "librari": 1, "your": [1, 2, 5, 7], "research": 1, "pleas": [1, 4], "joss": 1, "paper": 1, "articl": 1, "bernstorff2023timeseriesflatten": 1, "titl": 1, "timeseriesflatten": [1, 5, 7, 8, 9, 10], "A": [1, 2, 3, 4, 7], "python": [1, 4], "summar": 1, "featur": [1, 3, 4, 6, 7], "from": [1, 4, 6, 7], "medic": [1, 4, 7], "time": [1, 2, 3, 4, 6], "seri": [1, 2, 3, 4, 7, 8], "author": 1, "bernstorff": 1, "martin": 1, "enevoldsen": 1, "kenneth": 1, "damgaard": 1, "jakob": 1, "danielsen": 1, "andrea": 1, "hansen": 1, "lass": 1, "journal": 1, "open": 1, "softwar": 1, "volum": 1, "8": 1, "number": [1, 3, 4, 7, 9], "83": 1, "page": [1, 4], "5197": 1, "year": [1, 2, 8], "2023": 1, "Or": 1, "prefer": 1, "apa": 1, "m": 1, "k": 1, "j": 1, "l": 1, "come": 1, "an": [1, 2, 4, 7, 8, 9], "extens": 1, "In": [1, 7, 8], "order": [1, 6], "ll": [1, 7], "usual": 1, "want": [1, 7, 8, 9, 10], "clone": 1, "repositori": 1, "build": 1, "also": [1, 6, 7], "instal": 1, "requir": [1, 4, 7, 8], "develop": 1, "depend": 1, "util": 1, "defin": [1, 8, 9, 10], "pyproject": 1, "toml": 1, "pip": [1, 5], "dev": 1, "pytest": 1, "which": [1, 2, 4, 6, 7, 8], "folder": 1, "specif": [1, 3, 6, 9], "can": [1, 2, 4, 6, 7, 8, 9], "desired_test": 1, "py": 1, "sphinx": 1, "It": [1, 7, 10], "furo": 1, "theme": 1, "custom": 1, "style": 1, "To": [1, 4, 5, 6, 7, 8, 9], "make": [1, 2, 4, 7, 8, 9], "doc": [1, 7], "text": [1, 6], "html": 1, "c": [1, 4, 9], "predictorspec": [2, 7, 8, 9, 10], "value_fram": [2, 7, 8], "valuefram": [2, 7, 8, 9], "lookbehind_dist": [2, 7, 8, 9], "initvar": 2, "sequenc": [2, 3], "dt": [2, 3, 7, 8, 9], "timedelta": [2, 3, 6, 7, 9], "tupl": [2, 7, 8], "aggreg": [2, 3, 4, 6, 7, 9, 10], "fallback": [2, 7, 8, 9, 10], "int": [2, 3, 8], "float": [2, 9], "none": [2, 3, 7], "column_prefix": [2, 7, 9], "pred": [2, 7], "object": [2, 3, 7], "predictor": [2, 4, 6, 8], "The": [2, 3, 4, 6, 7, 8, 10], "must": [2, 7], "contain": [2, 3, 7, 8, 9], "entity_id_col_nam": [2, 7, 8, 9], "entiti": [2, 7, 8], "id": [2, 4, 7, 8, 9], "value_timestamp_col_nam": [2, 7, 8, 9], "each": [2, 4, 6, 7, 8, 9], "addit": 2, "properti": 2, "df": [2, 7, 8, 9, 10], "pl": [2, 8, 9], "lazyfram": 2, "predictiontimefram": [2, 3, 7, 8, 9], "init_df": [2, 7, 8, 9], "dataclass": 2, "polar": [2, 3, 8, 9], "frame": [2, 3], "datafram": [2, 4, 6, 7, 9, 10], "panda": [2, 7, 10], "core": [2, 7], "entity_id": [2, 7, 8, 9], "pred_timestamp": 2, "prediction_time_uuid_col_nam": 2, "prediction_time_uuid": [2, 7], "coerce_to_lazi": 2, "true": [2, 3, 4, 10], "predict": [2, 3, 4, 6, 8, 9], "ar": [2, 4, 7, 8, 9, 10], "made": 2, "when": [2, 4, 7], "collect": [2, 7, 8, 9], "required_column": [2, 3], "lookperiod": [2, 6], "first": [2, 7, 8, 9], "last": [2, 7], "string": [2, 7], "": [2, 7, 8, 9, 10], "uniqu": [2, 7], "datetim": [2, 7, 8, 9], "booleanoutcomespec": [2, 7], "init_fram": [2, 7, 8], "timestampvaluefram": [2, 7, 8], "lookahead_dist": [2, 7], "output_nam": [2, 7, 8], "outc": [2, 7], "boolean": [2, 10], "outcom": [2, 4, 6], "g": [2, 4], "patient": [2, 4, 7, 9], "receiv": 2, "treatment": 2, "event": 2, "occur": [2, 4, 7], "outcomespec": [2, 7], "binari": [2, 7], "instead": [2, 7], "staticfram": [2, 7], "pd": [2, 7, 10], "staticspec": [2, 7, 8], "static": [2, 3, 6], "sex": [2, 7], "person": 2, "timedeltaspec": [2, 8], "time_format": [2, 8], "liter": 2, "second": [2, 8], "minut": [2, 8], "hour": [2, 8], "dai": [2, 7, 8, 9], "delta": [2, 6], "calcul": [2, 8], "ag": [2, 8], "sinc": [2, 7, 10], "certain": [2, 7], "desir": [2, 4], "comput": [2, 3, 9], "predictiontime_fram": [3, 7, 8, 9], "compute_lazili": 3, "fals": 3, "n_worker": [3, 7], "aggregate_timeseri": [3, 7, 8, 9], "spec": [3, 7, 8, 9, 10], "valuespecif": 3, "step_siz": 3, "aggregatedfram": 3, "perform": 3, "paramet": [3, 7], "creat": [3, 6, 7, 8, 9], "step": [3, 7], "size": [3, 7], "chunk": 3, "reduc": 3, "encount": 3, "memori": 3, "issu": [3, 4, 7], "multipl": [3, 4, 6, 7, 9], "irregular": [3, 4, 7], "set": [3, 4, 7, 8], "done": 3, "lazili": 3, "worker": 3, "multiprocess": 3, "handl": [3, 7, 8], "entir": [3, 7], "otherwis": [3, 4], "specifi": [3, 4, 6, 8], "joblib": 3, "except": [3, 7], "missingcolumnnameerror": 3, "descript": 3, "specerror": 3, "specrequirementpair": 3, "missing_column": 3, "iter": 3, "packag": [4, 6], "gener": [4, 6, 7, 10], "data": [4, 6, 8, 9], "machin": 4, "learn": 4, "model": [4, 7, 9], "implement": 4, "method": 4, "includ": [4, 9], "convert": 4, "singl": [4, 7, 8, 9], "row": [4, 7, 9], "construct": 4, "raw": 4, "allow": 4, "independ": 4, "particular": 4, "sever": [4, 9], "choic": 4, "one": [4, 7], "need": [4, 7, 9], "everi": [4, 7], "physic": 4, "visit": 4, "morn": 4, "anoth": [4, 7], "clinic": [4, 9], "meaning": 4, "how": [4, 6, 9], "far": [4, 7, 8, 9, 10], "back": 4, "ahead": [4, 7], "lookbehind": [4, 7, 8, 9], "lookahead": [4, 7], "point": [4, 7], "abov": [4, 8, 10], "figur": 4, "graphic": 4, "repres": [4, 7], "terminologi": [4, 7], "determin": [4, 7, 8], "wherea": 4, "futur": [4, 7], "refer": [4, 7], "b": 4, "label": [4, 7], "neg": 4, "never": [4, 7], "happen": [4, 7], "outsid": [4, 7], "onli": [4, 7, 9], "posit": [4, 7], "insid": [4, 7], "within": [4, 7, 9], "exampl": [4, 7, 8, 9], "shown": [4, 7], "thi": [4, 7, 8, 9, 10], "etc": [4, 7], "obtain": 4, "rich": 4, "represent": 4, "see": [4, 7, 8], "tutori": [4, 8, 9], "case": [4, 7], "report": 4, "request": 4, "github": [4, 5], "tracker": 4, "discuss": 4, "forum": 4, "type": [4, 7], "bug": 4, "idea": 4, "usag": 4, "index": 4, "get": [5, 8, 10], "start": [5, 8, 9], "run": [5, 6], "follow": [5, 7], "line": 5, "termin": 5, "There": 5, "discrep": 5, "version": [5, 10], "we": [6, 7, 8, 9, 10], "recommend": 6, "go": [6, 7], "through": 6, "list": [6, 7, 8, 9], "below": [6, 10], "jupyt": 6, "notebook": 6, "download": 6, "local": 6, "introductori": 6, "load": [6, 8, 9], "tempor": [6, 8], "flatten": [6, 8, 9], "do": [6, 7, 10], "thing": 6, "onc": [6, 7], "function": [6, 7, 9], "same": [6, 7], "ad": [6, 7], "dataset": [6, 7, 8], "embed": 6, "legaci": 6, "timeseri": 7, "especi": 7, "help": 7, "have": [7, 8, 9, 10], "complic": 7, "train": 7, "simpl": [7, 8, 10], "explain": 7, "appli": [7, 8], "consist": 7, "3": [7, 8, 9, 10], "simplest": 7, "predictin": 7, "two": [7, 8], "element": 7, "about": 7, "context": 7, "frequent": 7, "__future__": [7, 8, 9, 10], "import": [7, 8, 9, 10], "annot": [7, 8, 9, 10], "skimpi": 7, "skim": 7, "test": [7, 8, 9, 10], "load_synth_data": [7, 8, 9], "load_synth_prediction_tim": [7, 8, 9], "df_prediction_tim": [7, 8], "sort": [7, 8], "summari": 7, "10000": 7, "int64": 7, "2": [7, 8, 9, 10], "datetime64": 7, "column_nam": 7, "na": [7, 9], "sd": 7, "p0": 7, "p25": 7, "p50": 7, "p75": 7, "p100": 7, "hist": 7, "4959": 7, "2886": 7, "2485": 7, "4922": 7, "7443": 7, "9999": 7, "frequenc": 7, "1965": [7, 8, 9], "01": [7, 8, 9, 10], "02": [7, 8, 9], "09": [7, 8, 9], "35": [7, 8], "00": [7, 8, 9], "1969": 7, "12": [7, 8, 9], "31": [7, 8, 9], "21": [7, 8, 9], "42": 7, "end": 7, "shape": [7, 8, 9], "10_000": 7, "entity_idtimestampi64datetim": 7, "\u03bc": [7, 8, 9], "01969": 7, "11": [7, 8, 9], "55": [7, 8], "0011965": 7, "03": [7, 8], "15": [7, 8, 9], "07": [7, 8, 9], "16": [7, 8, 9], "0021969": 7, "13": [7, 9], "23": [7, 9], "18": [7, 8], "0031968": 7, "04": [7, 8, 9], "0041965": 7, "28": 7, "33": [7, 8], "hellip": 7, "99961965": 7, "30": [7, 8], "17": [7, 8, 9], "19": [7, 8, 9], "0099961965": 7, "0099971967": 7, "06": [7, 8, 9], "08": [7, 8], "52": 7, "0099991965": 7, "14": [7, 8, 9], "59": 7, "0099991968": 7, "22": [7, 8, 9], "24": [7, 8], "here": 7, "note": [7, 8, 9], "Then": 7, "our": [7, 8, 9], "differ": [7, 8], "timepoint": 7, "load_synth_predictor_float": [7, 8], "df_synth_predictor": [7, 8], "100000": 7, "float64": 7, "4994": 7, "2887": 7, "2486": 7, "4996": 7, "7487": 7, "4": [7, 8, 10], "983": 7, "885": 7, "0001514": 7, "483": 7, "975": 7, "7": [7, 9], "486": 7, "10": [7, 8, 9], "37": 7, "100_000": 7, "entity_idtimestampvaluei64datetim": [7, 8, 9], "f6401967": 7, "000": [7, 8, 9], "17479301968": 7, "45": 7, "003": [7, 8], "07229301968": 7, "05": [7, 8, 9], "001": 7, "31575401969": 7, "20": [7, 8, 9], "002": [7, 8], "81248101967": 7, "26": 7, "981185": 7, "99991968": 7, "67190799991966": 7, "34": 7, "004": [7, 8], "15879699991966": 7, "27": [7, 8], "41445599991968": 7, "58": 7, "55249199991969": 7, "501553": 7, "again": 7, "more": [7, 8], "describ": [7, 9], "could": 7, "doesn": 7, "t": 7, "chang": [7, 10], "over": 7, "let": [7, 8, 9], "load_synth_sex": 7, "df_synth_sex": 7, "4999": 7, "2500": 7, "7500": 7, "femal": 7, "4984": 7, "5": [7, 8, 9], "9_999": 7, "entity_idfemalei64i640011213140": 7, "9995099960999719998199990": 7, "As": [7, 9], "should": [7, 8, 9, 10], "And": [7, 8, 9, 10], "lastli": 7, "ve": [7, 8], "chosen": 7, "store": 7, "experi": 7, "infer": 7, "thei": [7, 8], "section": 7, "load_synth_outcom": 7, "df_synth_outcom": 7, "3103": 7, "5032": 7, "2900": 7, "2499": 7, "5109": 7, "7555": 7, "9992": 7, "50": 7, "3_103": 7, "i6493821965": 7, "49": [7, 8], "00175181967": 7, "00148591969": 7, "00164911966": 7, "00148741969": 7, "76781968": 7, "29": 7, "00141021966": 7, "0012321965": 7, "00152431969": 7, "00125571968": 7, "25": [7, 9], "most": [7, 10], "per": [7, 8], "now": [7, 8, 9], "recip": 7, "finish": 7, "firstli": 7, "main": 7, "decis": 7, "given": 7, "indic": 7, "code": 7, "test_df": 7, "2020": 7, "outcome_spec": 7, "365": [7, 8, 9], "present": 7, "argument": 7, "default": 7, "found": 7, "values_df": 7, "For": [7, 9], "hardcod": 7, "decid": 7, "least": 7, "correspond": 7, "both": 7, "accomplish": 7, "specifii": 7, "forward": 7, "search": 7, "period": [7, 9], "befor": [7, 9], "interv": [7, 8], "min_dai": 7, "max_dai": 7, "its": 7, "almost": 7, "ident": 7, "past": 7, "numpi": [7, 8, 9], "np": [7, 8, 9], "temporal_predictor_spec": 7, "renam": 7, "value_1": 7, "nan": [7, 8, 9], "730": [7, 8, 9], "output": [7, 8, 9, 10], "after": 7, "avoid": 7, "input": 7, "rang": 7, "similar": [7, 8], "instanc": 7, "might": [7, 8], "where": [7, 8], "182": 7, "easili": 7, "pass": [7, 8, 9], "lookbehind_dai": [7, 10], "temporal_interval_predictor_spec": 7, "value_2": 7, "slightli": [7, 8], "previou": 7, "provid": 7, "howev": 7, "add": [7, 8], "prefix": 7, "By": 7, "filter": [7, 8, 9], "easi": 7, "manual": 7, "sex_predictor_spec": 7, "don": 7, "re": [7, 9], "readi": [7, 9, 10], "instanti": 7, "along": 7, "metadata": [7, 9], "call": 7, "add_": 7, "parallel": 7, "oper": 7, "across": [7, 8], "process": [7, 8, 9], "pred_female_fallback_": 7, "4931": 7, "pred_value_1_within_0": 7, "1072": 7, "72": 7, "842": 7, "01491": 7, "851": 7, "023": 7, "6": [7, 8], "178": 7, "9": [7, 8], "946": 7, "_to_730_days_mean_f": 7, "lback_nan": 7, "pred_value_2_within_1": 7, "2060": 7, "008": 7, "222": 7, "0003901": 7, "014": 7, "56": 7, "997": 7, "0_to_365_days_mean_fa": 7, "llback_nan": 7, "outc_outcome_within_0": 7, "_to_365_days_max_fal": 7, "back_0": 7, "word": 7, "total": 7, "20000": 7, "pred_female_fallback_nan": 7, "pred_value_1_within_0_to_730_days_mean_fallback_nan": 7, "pred_value_2_within_10_to_365_days_mean_fallback_nan": 7, "outc_outcome_within_0_to_365_days_max_fallback_0": 7, "display": 7, "shorten": 7, "col": [7, 8], "shortened_pr": 7, "predx": 7, "shortened_predinterv": 7, "predx_30_to_90": 7, "shortened_outcom": 7, "outc_i": 7, "display_df": 7, "entity_idtimestampprediction_time_uuidpred_female_fallback_nanpredxpredx_30_to_90outc_yi64datetim": 7, "stri64f64f64i3298521965": 7, "quot": [7, 8, 9], "9852": [7, 8], "00000": [7, 8, 9], "1nannan014671965": 7, "1467": [7, 8], "0nannan011251965": 7, "1125": [7, 8], "0nannan06491965": 7, "649": [7, 8], "000000": [7, 8], "0nannan020701965": 7, "2070": [7, 8], "1nannan0": 7, "3341969": 7, "32": 7, "334": 7, "252526nan033631969": 7, "3363": 7, "6796672": 7, "409664079291969": 7, "7929": 7, "9435857": 7, "475979060021969": 7, "6002": 7, "5935837": 7, "07659808641969": 7, "864": 7, "520416nan0": 7, "classif": 7, "citizen": 7, "identifi": 7, "prediciton": 7, "pred_": 7, "outc_": 7, "realiti": 8, "d": 8, "like": [8, 9, 10], "addition": 8, "some": [8, 9], "common": 8, "current": 8, "than": 8, "what": 8, "cover": [8, 9], "luckili": 8, "extrem": 8, "lookdist": 8, "combin": 8, "just": [8, 9], "suppli": 8, "off": 8, "interfac": 8, "featuer": 8, "work": [8, 10], "exactli": 8, "wai": 8, "illustr": 8, "head": [8, 9], "f6494761969": 8, "81699546311967": 8, "48": 8, "81807438901969": 8, "50378910981965": 8, "53": 8, "51504116261966": 8, "353115": 8, "helper": 8, "def": [8, 9], "make_timedelta_interv": 8, "start_dai": 8, "end_dai": 8, "predictor_spec": 8, "break": 8, "down": 8, "distanc": 8, "therefor": 8, "expect": 8, "n_aggreg": 8, "n_lookbehind_dist": 8, "entity_idtimestampprediction_time_uuidpred_value_within_0_to_30_days_mean_fallback_nanpred_value_within_0_to_30_days_latest_fallback_nanpred_value_within_30_to_365_days_mean_fallback_nanpred_value_within_30_to_365_days_latest_fallback_nanpred_value_within_365_to_730_days_mean_fallback_nanpred_value_within_365_to_730_days_latest_fallback_nani64datetim": 8, "strf64f64f64f64f64f6498521965": 8, "nannannannannannan14671965": 8, "nannannannannannan11251965": 8, "nannannannannannan6491965": 8, "nannannannannannan20701965": 8, "nannannannannannan": 8, "sometim": 8, "measur": 8, "manner": 8, "simpli": [8, 9], "new": 8, "simul": 8, "with_column": 8, "new_predictor": 8, "random": 8, "rand": 8, "entity_idtimestampvaluenew_predictori64datetim": 8, "f64f6494761969": 8, "8169950": 8, "67355446311967": 8, "8180740": 8, "27790438901969": 8, "5037890": 8, "26975510981965": 8, "5150410": 8, "7209716261966": 8, "3531150": 8, "16028": 8, "try": 8, "allgreg": 8, "n_predictor": 8, "entity_idtimestampprediction_time_uuidpred_value_within_0_to_30_days_min_fallback_nanpred_new_predictor_within_0_to_30_days_min_fallback_nanpred_value_within_0_to_30_days_slope_fallback_nanpred_new_predictor_within_0_to_30_days_slope_fallback_nanpred_value_within_30_to_365_days_min_fallback_nanpred_new_predictor_within_30_to_365_days_min_fallback_nanpred_value_within_30_to_365_days_slope_fallback_nanpred_new_predictor_within_30_to_365_days_slope_fallback_nanpred_value_within_365_to_730_days_min_fallback_nanpred_new_predictor_within_365_to_730_days_min_fallback_nanpred_value_within_365_to_730_days_slope_fallback_nanpred_new_predictor_within_365_to_730_days_slope_fallback_nani64datetim": 8, "strf64f64f64f64f64f64f64f64f64f64f64f6498521965": 8, "nannannannannannannannannannannannan14671965": 8, "nannannannannannannannannannannannan11251965": 8, "nannannannannannannannannannannannan6491965": 8, "nannannannannannannannannannannannan20701965": 8, "nannannannannannannannannannannannan": 8, "commonli": 8, "seen": 8, "u": 8, "birthdat": 8, "date": 8, "birth": 8, "load_synth_birthdai": 8, "df_birthdai": 8, "entity_idbirthdayi64datetim": 8, "90451932": 8, "0055321920": 8, "41": [8, 9], "0022421917": 8, "007891930": 8, "51": 8, "0097151926": 8, "age_spec": 8, "birthdai": 8, "without": 8, "entri": 8, "format": [8, 9], "take": [8, 9], "entity_idtimestampprediction_time_uuidpred_age_years_fallback_nani64datetim": 8, "strf6498521965": 8, "90965114671965": 8, "929511251965": 8, "5701576491965": 8, "09377120701965": 8, "886379": 8, "sure": 8, "9903": 8, "strf6499031965": 8, "36": 8, "67077399031968": 8, "1968": 8, "39": 8, "154004": 8, "dealt": 9, "tabular": 9, "show": 9, "out": 9, "alreadi": 9, "synthet": 9, "other": 9, "load_synth_text": 9, "synth_text": 9, "str46471967": 9, "went": 9, "medica": 9, "20071966": 9, "taken": 9, "em": 9, "57991967": 9, "13191969": 9, "had": [9, 10], "been": 9, "left": 9, "42341966": 9, "often": 9, "while": 9, "advantag": 9, "emb": 9, "speed": 9, "up": 9, "block": 9, "tf": 9, "idf": 9, "form": 9, "constraint": 9, "result": 9, "entity_id_col": 9, "timestamp_col": 9, "purpos": 9, "demonstr": 9, "fit": 9, "small": 9, "captur": 9, "sklearn": 9, "feature_extract": 9, "tfidfvector": 9, "embed_text_to_df": 9, "tfidf_model": 9, "max_featur": 9, "fit_transform": 9, "toarrai": 9, "schema": 9, "get_feature_names_out": 9, "tolist": 9, "embedded_text": 9, "to_list": 9, "drop": 9, "origin": 9, "metadata_onli": 9, "concaten": 9, "embedded_text_with_metadata": 9, "concat": 9, "horizont": 9, "entity_idtimestampandforinoforpatientthatthetowasi64datetim": 9, "f64f64f64f64f64f64f64f64f64f6446471967": 9, "1758720": 9, "1820660": 9, "2498480": 9, "158430": 9, "0230420": 9, "3113890": 9, "5299660": 9, "4902030": 9, "47931220071966": 9, "244870": 9, "1352820": 9, "0643370": 9, "4650840": 9, "3368590": 9, "1517430": 9, "7298610": 9, "1791610": 9, "057991967": 9, "1923670": 9, "2323320": 9, "2834020": 9, "3369520": 9, "1764220": 9, "2384160": 9, "6468790": 9, "2502170": 9, "38227713191969": 9, "1656350": 9, "2000460": 9, "1830150": 9, "2611150": 9, "1258370": 9, "1519060": 9, "2052850": 9, "7595280": 9, "4039610": 9, "09874742341966": 9, "4934610": 9, "1191960": 9, "2726190": 9, "2074440": 9, "0452560": 9, "1834750": 9, "5883240": 9, "4332530": 9, "235349": 9, "text_spec": 9, "pred_tfidf": 9, "would": 9, "normal": 9, "wa": 9, "check": 9, "selector": 9, "sake": 9, "all_horizont": 9, "is_not_nan": 9, "entity_idtimestampprediction_time_uuidpred_tfidf_and_within_0_to_365_days_mean_fallback_nanpred_tfidf_for_within_0_to_365_days_mean_fallback_nanpred_tfidf_in_within_0_to_365_days_mean_fallback_nanpred_tfidf_of_within_0_to_365_days_mean_fallback_nanpred_tfidf_or_within_0_to_365_days_mean_fallback_nanpred_tfidf_patient_within_0_to_365_days_mean_fallback_nanpred_tfidf_that_within_0_to_365_days_mean_fallback_nanpred_tfidf_the_within_0_to_365_days_mean_fallback_nanpred_tfidf_to_within_0_to_365_days_mean_fallback_nanpred_tfidf_was_within_0_to_365_days_mean_fallback_nanpred_tfidf_and_within_0_to_730_days_mean_fallback_nanpred_tfidf_for_within_0_to_730_days_mean_fallback_nanpred_tfidf_in_within_0_to_730_days_mean_fallback_nanpred_tfidf_of_within_0_to_730_days_mean_fallback_nanpred_tfidf_or_within_0_to_730_days_mean_fallback_nanpred_tfidf_patient_within_0_to_730_days_mean_fallback_nanpred_tfidf_that_within_0_to_730_days_mean_fallback_nanpred_tfidf_the_within_0_to_730_days_mean_fallback_nanpred_tfidf_to_within_0_to_730_days_mean_fallback_nanpred_tfidf_was_within_0_to_730_days_mean_fallback_nani64datetim": 9, "strf64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f6468401965": 9, "6840": 9, "1558210": 9, "3763860": 9, "2582560": 9, "5731680": 9, "3551420": 9, "0714520": 9, "0965610": 9, "285810": 9, "456030": 9, "0928960": 9, "09289620391966": 9, "2039": 9, "1966": 9, "1080150": 9, "5967440": 9, "113520": 9, "0990620": 9, "1338720": 9, "6934310": 9, "2107470": 9, "2575810": 9, "25758194961966": 9, "44": 9, "9496": 9, "2799550": 9, "309330": 9, "2942220": 9, "2567490": 9, "5134980": 9, "5462160": 9, "33380": 9, "333872811967": 9, "7281": 9, "1967": 9, "2896630": 9, "043730": 9, "2800490": 9, "3044250": 9, "3851110": 9, "3320650": 9, "2692510": 9, "4648910": 9, "2119340": 9, "3885470": 9, "38854774241967": 9, "7424": 9, "1539070": 9, "0929410": 9, "1700560": 9, "1078340": 9, "3897560": 9, "2822990": 9, "0635830": 9, "6822220": 9, "4754520": 9, "user": 10, "rewrit": 10, "written": 10, "api": 10, "predictorgroupspec": 10, "those": 10, "were": 10, "ones": 10, "sai": 10, "v1": 10, "aggregation_fn": 10, "change_per_dai": 10, "feature_spec": 10, "group_spec": 10, "nameddatafram": 10, "single_spec": 10, "version1predictorspec": 10, "legacy_spec": 10, "named_datafram": 10, "2013": 10, "dw_ek_borg": 10, "test2": 10, "create_combin": 10, "print": 10, "f": 10, "isinst": 10, "compat": 10, "replac": 10, "version2predictorspec": 10, "noqa": 10, "era001": 10, "from_legaci": 10, "new_spec": 10}, "objects": {"timeseriesflattener": [[0, 0, 0, "-", "aggregators"], [3, 0, 0, "-", "flattener"]], "timeseriesflattener.aggregators": [[0, 1, 1, "", "Aggregator"], [0, 1, 1, "", "CountAggregator"], [0, 1, 1, "", "EarliestAggregator"], [0, 1, 1, "", "HasValuesAggregator"], [0, 1, 1, "", "LatestAggregator"], [0, 1, 1, "", "MaxAggregator"], [0, 1, 1, "", "MeanAggregator"], [0, 1, 1, "", "MinAggregator"], [0, 1, 1, "", "SlopeAggregator"], [0, 1, 1, "", "SumAggregator"], [0, 1, 1, "", "VarianceAggregator"]], "timeseriesflattener.aggregators.Aggregator": [[0, 2, 1, "", "name"], [0, 3, 1, "", "new_col_name"]], "timeseriesflattener.aggregators.CountAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.EarliestAggregator": [[0, 2, 1, "", "name"], [0, 2, 1, "", "timestamp_col_name"]], "timeseriesflattener.aggregators.HasValuesAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.LatestAggregator": [[0, 2, 1, "", "name"], [0, 2, 1, "", "timestamp_col_name"]], "timeseriesflattener.aggregators.MaxAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.MeanAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.MinAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.SlopeAggregator": [[0, 2, 1, "", "name"], [0, 2, 1, "", "timestamp_col_name"]], "timeseriesflattener.aggregators.SumAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.aggregators.VarianceAggregator": [[0, 2, 1, "", "name"]], "timeseriesflattener.feature_specs": [[2, 0, 0, "-", "meta"], [2, 0, 0, "-", "outcome"], [2, 0, 0, "-", "prediction_times"], [2, 0, 0, "-", "predictor"], [2, 0, 0, "-", "static"], [2, 0, 0, "-", "timedelta"], [2, 0, 0, "-", "timestamp_frame"]], "timeseriesflattener.feature_specs.meta": [[2, 1, 1, "", "LookPeriod"], [2, 1, 1, "", "ValueFrame"]], "timeseriesflattener.feature_specs.meta.LookPeriod": [[2, 2, 1, "", "first"], [2, 2, 1, "", "last"]], "timeseriesflattener.feature_specs.meta.ValueFrame": [[2, 2, 1, "", "coerce_to_lazy"], [2, 3, 1, "", "collect"], [2, 2, 1, "", "entity_id_col_name"], [2, 2, 1, "", "init_df"], [2, 2, 1, "", "value_timestamp_col_name"]], "timeseriesflattener.feature_specs.outcome": [[2, 1, 1, "", "BooleanOutcomeSpec"], [2, 1, 1, "", "OutcomeSpec"]], "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec": [[2, 2, 1, "", "aggregators"], [2, 2, 1, "", "column_prefix"], [2, 4, 1, "", "df"], [2, 2, 1, "", "init_frame"], [2, 2, 1, "", "lookahead_distances"], [2, 2, 1, "", "output_name"]], "timeseriesflattener.feature_specs.outcome.OutcomeSpec": [[2, 2, 1, "", "aggregators"], [2, 2, 1, "", "column_prefix"], [2, 4, 1, "", "df"], [2, 2, 1, "", "fallback"], [2, 2, 1, "", "lookahead_distances"], [2, 2, 1, "", "value_frame"]], "timeseriesflattener.feature_specs.prediction_times": [[2, 1, 1, "", "PredictionTimeFrame"]], "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame": [[2, 2, 1, "", "coerce_to_lazy"], [2, 3, 1, "", "collect"], [2, 2, 1, "", "entity_id_col_name"], [2, 2, 1, "", "init_df"], [2, 2, 1, "", "prediction_time_uuid_col_name"], [2, 3, 1, "", "required_columns"], [2, 2, 1, "", "timestamp_col_name"]], "timeseriesflattener.feature_specs.predictor": [[2, 1, 1, "", "PredictorSpec"]], "timeseriesflattener.feature_specs.predictor.PredictorSpec": [[2, 2, 1, "", "aggregators"], [2, 2, 1, "", "column_prefix"], [2, 4, 1, "", "df"], [2, 2, 1, "", "fallback"], [2, 2, 1, "", "lookbehind_distances"], [2, 2, 1, "", "value_frame"]], "timeseriesflattener.feature_specs.static": [[2, 1, 1, "", "StaticFrame"], [2, 1, 1, "", "StaticSpec"]], "timeseriesflattener.feature_specs.static.StaticFrame": [[2, 3, 1, "", "collect"], [2, 2, 1, "", "entity_id_col_name"], [2, 2, 1, "", "init_df"]], "timeseriesflattener.feature_specs.static.StaticSpec": [[2, 2, 1, "", "column_prefix"], [2, 2, 1, "", "fallback"], [2, 2, 1, "", "value_frame"]], "timeseriesflattener.feature_specs.timedelta": [[2, 1, 1, "", "TimeDeltaSpec"]], "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec": [[2, 2, 1, "", "column_prefix"], [2, 4, 1, "", "df"], [2, 2, 1, "", "fallback"], [2, 2, 1, "", "init_frame"], [2, 2, 1, "", "output_name"], [2, 2, 1, "", "time_format"]], "timeseriesflattener.feature_specs.timestamp_frame": [[2, 1, 1, "", "TimestampValueFrame"]], "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame": [[2, 3, 1, "", "collect"], [2, 2, 1, "", "entity_id_col_name"], [2, 2, 1, "", "init_df"], [2, 2, 1, "", "value_timestamp_col_name"]], "timeseriesflattener.flattener": [[3, 1, 1, "", "Flattener"], [3, 5, 1, "", "MissingColumnNameError"], [3, 5, 1, "", "SpecError"], [3, 1, 1, "", "SpecRequirementPair"]], "timeseriesflattener.flattener.Flattener": [[3, 3, 1, "", "aggregate_timeseries"], [3, 2, 1, "", "compute_lazily"], [3, 2, 1, "", "n_workers"], [3, 2, 1, "", "predictiontime_frame"]], "timeseriesflattener.flattener.MissingColumnNameError": [[3, 2, 1, "", "description"]], "timeseriesflattener.flattener.SpecError": [[3, 2, 1, "", "description"]], "timeseriesflattener.flattener.SpecRequirementPair": [[3, 3, 1, "", "missing_columns"], [3, 2, 1, "", "required_columns"], [3, 2, 1, "", "spec"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:property", "5": "py:exception"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "property", "Python property"], "5": ["py", "exception", "Python exception"]}, "titleterms": {"aggreg": [0, 8], "timeseriesflatten": [0, 2, 3, 4], "frequent": 1, "ask": [1, 4], "question": [1, 4], "cite": 1, "thi": 1, "packag": 1, "how": [1, 7], "do": [1, 8], "i": 1, "test": 1, "code": 1, "run": 1, "suit": 1, "document": 1, "gener": [1, 9], "featur": [2, 8, 9, 10], "specif": [2, 7, 10], "feature_spec": 2, "flatten": [3, 7], "function": [4, 8], "where": 4, "indic": 4, "search": 4, "instal": 5, "tutori": [6, 7], "get": 6, "start": 6, "introductori": 7, "load": 7, "data": 7, "predict": 7, "time": [7, 8], "tempor": 7, "predictor": [7, 9], "static": 7, "outcom": 7, "specifi": 7, "multipl": 8, "thing": 8, "onc": 8, "delta": 8, "lookperiod": 8, "valu": 8, "from": [8, 9, 10], "same": 8, "datafram": 8, "timedelta": 8, "ad": 9, "text": 9, "The": 9, "dataset": 9, "embed": 9, "creat": 10, "legaci": 10}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Aggregators": [[0, "aggregators"]], "timeseriesflattener.aggregators": [[0, "module-timeseriesflattener.aggregators"]], "Frequently Asked Questions": [[1, "frequently-asked-questions"]], "Citing this package": [[1, "citing-this-package"]], "How do I test the code and run the test suite?": [[1, "how-do-i-test-the-code-and-run-the-test-suite"]], "How is the documentation generated?": [[1, "how-is-the-documentation-generated"]], "Feature specifications": [[2, "feature-specifications"]], "timeseriesflattener.feature_specs": [[2, "module-timeseriesflattener.feature_specs.predictor"]], "Timeseriesflattener": [[3, "timeseriesflattener"]], "timeseriesflattener.flattener": [[3, "module-timeseriesflattener.flattener"]], "timeseriesflattener": [[4, "timeseriesflattener"]], "Functionality": [[4, "functionality"]], "Where to ask questions?": [[4, "where-to-ask-questions"]], "Indices and search": [[4, "indices-and-search"]], "Installation": [[5, "installation"]], "Tutorials": [[6, "tutorials"]], "Getting started": [[6, null]], "Introductory Tutorial": [[7, "introductory-tutorial"]], "Loading data": [[7, "loading-data"]], "Loading prediction times": [[7, "loading-prediction-times"]], "Loading a temporal predictor": [[7, "loading-a-temporal-predictor"]], "Loading a static predictor": [[7, "loading-a-static-predictor"]], "Loading a temporal outcome": [[7, "loading-a-temporal-outcome"]], "Specifying how to flatten the data": [[7, "specifying-how-to-flatten-the-data"]], "Temporal outcome specification": [[7, "temporal-outcome-specification"]], "Temporal predictor specification": [[7, "temporal-predictor-specification"]], "Static predictor specification": [[7, "static-predictor-specification"]], "Flattening": [[7, "flattening"]], "Doing multiple things at once and time deltas": [[8, "doing-multiple-things-at-once-and-time-deltas"]], "Multiple aggregation functions and lookperiods": [[8, "multiple-aggregation-functions-and-lookperiods"]], "Multiple values from the same dataframe": [[8, "multiple-values-from-the-same-dataframe"]], "TimeDelta features": [[8, "timedelta-features"]], "Adding text features": [[9, "adding-text-features"]], "The dataset": [[9, "the-dataset"]], "Generating predictors from embedded text": [[9, "generating-predictors-from-embedded-text"]], "Creating features from legacy feature specifications": [[10, "creating-features-from-legacy-feature-specifications"]]}, "indexentries": {"aggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.Aggregator"]], "countaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.CountAggregator"]], "earliestaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.EarliestAggregator"]], "hasvaluesaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.HasValuesAggregator"]], "latestaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.LatestAggregator"]], "maxaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.MaxAggregator"]], "meanaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.MeanAggregator"]], "minaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.MinAggregator"]], "slopeaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.SlopeAggregator"]], "sumaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.SumAggregator"]], "varianceaggregator (class in timeseriesflattener.aggregators)": [[0, "timeseriesflattener.aggregators.VarianceAggregator"]], "module": [[0, "module-timeseriesflattener.aggregators"], [2, "module-timeseriesflattener.feature_specs.meta"], [2, "module-timeseriesflattener.feature_specs.outcome"], [2, "module-timeseriesflattener.feature_specs.prediction_times"], [2, "module-timeseriesflattener.feature_specs.predictor"], [2, "module-timeseriesflattener.feature_specs.static"], [2, "module-timeseriesflattener.feature_specs.timedelta"], [2, "module-timeseriesflattener.feature_specs.timestamp_frame"], [3, "module-timeseriesflattener.flattener"]], "name (aggregator attribute)": [[0, "timeseriesflattener.aggregators.Aggregator.name"]], "name (countaggregator attribute)": [[0, "timeseriesflattener.aggregators.CountAggregator.name"]], "name (earliestaggregator attribute)": [[0, "timeseriesflattener.aggregators.EarliestAggregator.name"]], "name (hasvaluesaggregator attribute)": [[0, "timeseriesflattener.aggregators.HasValuesAggregator.name"]], "name (latestaggregator attribute)": [[0, "timeseriesflattener.aggregators.LatestAggregator.name"]], "name (maxaggregator attribute)": [[0, "timeseriesflattener.aggregators.MaxAggregator.name"]], "name (meanaggregator attribute)": [[0, "timeseriesflattener.aggregators.MeanAggregator.name"]], "name (minaggregator attribute)": [[0, "timeseriesflattener.aggregators.MinAggregator.name"]], "name (slopeaggregator attribute)": [[0, "timeseriesflattener.aggregators.SlopeAggregator.name"]], "name (sumaggregator attribute)": [[0, "timeseriesflattener.aggregators.SumAggregator.name"]], "name (varianceaggregator attribute)": [[0, "timeseriesflattener.aggregators.VarianceAggregator.name"]], "new_col_name() (aggregator method)": [[0, "timeseriesflattener.aggregators.Aggregator.new_col_name"]], "timeseriesflattener.aggregators": [[0, "module-timeseriesflattener.aggregators"]], "timestamp_col_name (earliestaggregator attribute)": [[0, "timeseriesflattener.aggregators.EarliestAggregator.timestamp_col_name"]], "timestamp_col_name (latestaggregator attribute)": [[0, "timeseriesflattener.aggregators.LatestAggregator.timestamp_col_name"]], "timestamp_col_name (slopeaggregator attribute)": [[0, "timeseriesflattener.aggregators.SlopeAggregator.timestamp_col_name"]], "booleanoutcomespec (class in timeseriesflattener.feature_specs.outcome)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec"]], "lookperiod (class in timeseriesflattener.feature_specs.meta)": [[2, "timeseriesflattener.feature_specs.meta.LookPeriod"]], "outcomespec (class in timeseriesflattener.feature_specs.outcome)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec"]], "predictiontimeframe (class in timeseriesflattener.feature_specs.prediction_times)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame"]], "predictorspec (class in timeseriesflattener.feature_specs.predictor)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec"]], "staticframe (class in timeseriesflattener.feature_specs.static)": [[2, "timeseriesflattener.feature_specs.static.StaticFrame"]], "staticspec (class in timeseriesflattener.feature_specs.static)": [[2, "timeseriesflattener.feature_specs.static.StaticSpec"]], "timedeltaspec (class in timeseriesflattener.feature_specs.timedelta)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec"]], "timestampvalueframe (class in timeseriesflattener.feature_specs.timestamp_frame)": [[2, "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame"]], "valueframe (class in timeseriesflattener.feature_specs.meta)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame"]], "aggregators (booleanoutcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.aggregators"]], "aggregators (outcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.aggregators"]], "aggregators (predictorspec attribute)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.aggregators"]], "coerce_to_lazy (predictiontimeframe attribute)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.coerce_to_lazy"]], "coerce_to_lazy (valueframe attribute)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame.coerce_to_lazy"]], "collect() (predictiontimeframe method)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.collect"]], "collect() (staticframe method)": [[2, "timeseriesflattener.feature_specs.static.StaticFrame.collect"]], "collect() (timestampvalueframe method)": [[2, "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame.collect"]], "collect() (valueframe method)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame.collect"]], "column_prefix (booleanoutcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.column_prefix"]], "column_prefix (outcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.column_prefix"]], "column_prefix (predictorspec attribute)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.column_prefix"]], "column_prefix (staticspec attribute)": [[2, "timeseriesflattener.feature_specs.static.StaticSpec.column_prefix"]], "column_prefix (timedeltaspec attribute)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.column_prefix"]], "df (booleanoutcomespec property)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.df"]], "df (outcomespec property)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.df"]], "df (predictorspec property)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.df"]], "df (timedeltaspec property)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.df"]], "entity_id_col_name (predictiontimeframe attribute)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.entity_id_col_name"]], "entity_id_col_name (staticframe attribute)": [[2, "timeseriesflattener.feature_specs.static.StaticFrame.entity_id_col_name"]], "entity_id_col_name (timestampvalueframe attribute)": [[2, "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame.entity_id_col_name"]], "entity_id_col_name (valueframe attribute)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame.entity_id_col_name"]], "fallback (outcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.fallback"]], "fallback (predictorspec attribute)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.fallback"]], "fallback (staticspec attribute)": [[2, "timeseriesflattener.feature_specs.static.StaticSpec.fallback"]], "fallback (timedeltaspec attribute)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.fallback"]], "first (lookperiod attribute)": [[2, "timeseriesflattener.feature_specs.meta.LookPeriod.first"]], "init_df (predictiontimeframe attribute)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.init_df"]], "init_df (staticframe attribute)": [[2, "timeseriesflattener.feature_specs.static.StaticFrame.init_df"]], "init_df (timestampvalueframe attribute)": [[2, "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame.init_df"]], "init_df (valueframe attribute)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame.init_df"]], "init_frame (booleanoutcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.init_frame"]], "init_frame (timedeltaspec attribute)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.init_frame"]], "last (lookperiod attribute)": [[2, "timeseriesflattener.feature_specs.meta.LookPeriod.last"]], "lookahead_distances (booleanoutcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.lookahead_distances"]], "lookahead_distances (outcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.lookahead_distances"]], "lookbehind_distances (predictorspec attribute)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.lookbehind_distances"]], "output_name (booleanoutcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.BooleanOutcomeSpec.output_name"]], "output_name (timedeltaspec attribute)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.output_name"]], "prediction_time_uuid_col_name (predictiontimeframe attribute)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.prediction_time_uuid_col_name"]], "required_columns() (predictiontimeframe method)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.required_columns"]], "time_format (timedeltaspec attribute)": [[2, "timeseriesflattener.feature_specs.timedelta.TimeDeltaSpec.time_format"]], "timeseriesflattener.feature_specs.meta": [[2, "module-timeseriesflattener.feature_specs.meta"]], "timeseriesflattener.feature_specs.outcome": [[2, "module-timeseriesflattener.feature_specs.outcome"]], "timeseriesflattener.feature_specs.prediction_times": [[2, "module-timeseriesflattener.feature_specs.prediction_times"]], "timeseriesflattener.feature_specs.predictor": [[2, "module-timeseriesflattener.feature_specs.predictor"]], "timeseriesflattener.feature_specs.static": [[2, "module-timeseriesflattener.feature_specs.static"]], "timeseriesflattener.feature_specs.timedelta": [[2, "module-timeseriesflattener.feature_specs.timedelta"]], "timeseriesflattener.feature_specs.timestamp_frame": [[2, "module-timeseriesflattener.feature_specs.timestamp_frame"]], "timestamp_col_name (predictiontimeframe attribute)": [[2, "timeseriesflattener.feature_specs.prediction_times.PredictionTimeFrame.timestamp_col_name"]], "value_frame (outcomespec attribute)": [[2, "timeseriesflattener.feature_specs.outcome.OutcomeSpec.value_frame"]], "value_frame (predictorspec attribute)": [[2, "timeseriesflattener.feature_specs.predictor.PredictorSpec.value_frame"]], "value_frame (staticspec attribute)": [[2, "timeseriesflattener.feature_specs.static.StaticSpec.value_frame"]], "value_timestamp_col_name (timestampvalueframe attribute)": [[2, "timeseriesflattener.feature_specs.timestamp_frame.TimestampValueFrame.value_timestamp_col_name"]], "value_timestamp_col_name (valueframe attribute)": [[2, "timeseriesflattener.feature_specs.meta.ValueFrame.value_timestamp_col_name"]], "flattener (class in timeseriesflattener.flattener)": [[3, "timeseriesflattener.flattener.Flattener"]], "missingcolumnnameerror": [[3, "timeseriesflattener.flattener.MissingColumnNameError"]], "specerror": [[3, "timeseriesflattener.flattener.SpecError"]], "specrequirementpair (class in timeseriesflattener.flattener)": [[3, "timeseriesflattener.flattener.SpecRequirementPair"]], "aggregate_timeseries() (flattener method)": [[3, "timeseriesflattener.flattener.Flattener.aggregate_timeseries"]], "compute_lazily (flattener attribute)": [[3, "timeseriesflattener.flattener.Flattener.compute_lazily"]], "description (missingcolumnnameerror attribute)": [[3, "timeseriesflattener.flattener.MissingColumnNameError.description"]], "description (specerror attribute)": [[3, "timeseriesflattener.flattener.SpecError.description"]], "missing_columns() (specrequirementpair method)": [[3, "timeseriesflattener.flattener.SpecRequirementPair.missing_columns"]], "n_workers (flattener attribute)": [[3, "timeseriesflattener.flattener.Flattener.n_workers"]], "predictiontime_frame (flattener attribute)": [[3, "timeseriesflattener.flattener.Flattener.predictiontime_frame"]], "required_columns (specrequirementpair attribute)": [[3, "timeseriesflattener.flattener.SpecRequirementPair.required_columns"]], "spec (specrequirementpair attribute)": [[3, "timeseriesflattener.flattener.SpecRequirementPair.spec"]], "timeseriesflattener.flattener": [[3, "module-timeseriesflattener.flattener"]]}}) \ No newline at end of file diff --git a/tutorials/01_basic.html b/tutorials/01_basic.html index 18c604c3..a3710a65 100644 --- a/tutorials/01_basic.html +++ b/tutorials/01_basic.html @@ -452,7 +452,7 @@

Loading a temporal outcomeentity_idtimestampvaluei64datetime[μs]i648041967-03-22 20:51:00164881965-09-10 06:50:0015301966-12-16 04:24:00198461969-03-20 01:20:00118581968-12-28 18:17:001………45221969-07-12 01:32:00193451965-05-24 03:47:00146531968-01-28 14:15:00171781966-11-30 08:13:00114141967-06-07 15:23:001

+shape: (3_103, 3)
entity_idtimestampvalue
i64datetime[μs]i64
93821965-04-24 13:49:001
75181967-05-22 15:26:001
48591969-02-19 10:31:001
64911966-05-26 15:22:001
48741969-10-23 15:07:001
76781968-08-29 11:09:001
41021966-10-08 06:12:001
2321965-07-17 05:04:001
52431969-08-21 11:23:001
25571968-07-22 02:25:001

This dataframe should contain at most 1 row per ID, which is the first time they experience the outcome.

We now have 4 dataframes loaded: df_prediction_times, df_synth_predictors, df_synth_sex and df_synth_outcome.

@@ -607,7 +607,7 @@

Flattening -
Processing spec: ['female']
+
Processing spec: ['female']
 
Processing spec: ['value_1']
 
@@ -807,7 +807,7 @@

Flattening - + diff --git a/tutorials/02_advanced.html b/tutorials/02_advanced.html index 590bb864..26de8ccf 100644 --- a/tutorials/02_advanced.html +++ b/tutorials/02_advanced.html @@ -340,7 +340,7 @@

Multiple aggregation functions and lookperiods -

+shape: (5, 4)
entity_idtimestampvaluenew_predictor
i64datetime[μs]f64f64
94761969-03-05 08:08:000.8169950.673554
46311967-04-10 22:48:004.8180740.277904
38901969-12-15 14:07:002.5037890.269755
10981965-11-19 03:53:003.5150410.72097
16261966-05-03 14:07:004.3531150.16028

We make a PredictorSpec similar to above. Let’s try some new aggregators.

@@ -430,7 +430,7 @@

Multiple values from the same dataframe -
Processing spec: ['value', 'new_predictor']
+
Processing spec: ['value', 'new_predictor']
 

 
@@ -494,7 +494,7 @@ 

TimeDelta features -
Processing spec: ['age']
+
Processing spec: ['age']
 

 
@@ -617,7 +617,7 @@ 

TimeDelta features - + diff --git a/tutorials/03_text.html b/tutorials/03_text.html index d3d4ede8..0be6f9a6 100644 --- a/tutorials/03_text.html +++ b/tutorials/03_text.html @@ -369,7 +369,7 @@

Generating predictors from embedded text -
Processing spec: ['and', 'for', 'in', 'of', 'or', 'patient', 'that', 'the', 'to', 'was']
+
Processing spec: ['and', 'for', 'in', 'of', 'or', 'patient', 'that', 'the', 'to', 'was']
 

 
@@ -487,7 +487,7 @@ 

Generating predictors from embedded text - +