-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcomb_data_rev5.R
80 lines (61 loc) · 2.78 KB
/
comb_data_rev5.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
library(tidyverse)
library(timetk)
# Load data ----
prod_data <- read_csv('../../../../data/db_pull_production_data_raw_20201208.csv')
weather_data <- read_csv('../../../../data/db_pull_weather_data_raw_20201208.csv',
col_types = "Tnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn",
skip_empty_rows = TRUE)
feat_gross_prod_data <- read_csv('../../../../data/db_pull_feature_gross_production_20201209.csv')
# Clean data ----
prod_data <- prod_data %>%
drop_na() %>%
arrange('Time')
prod_data <- prod_data %>%
mutate(
across(.cols = contains("Wind"),
.fns = function(x) ifelse(x<0, NA, x))
)
weather_data <- weather_data %>%
drop_na() %>%
arrange('Time')
feat_gross_prod_data <- feat_gross_prod_data %>%
drop_na() %>%
arrange('Time')
prod_data <- prod_data %>%
filter_by_time(.start_date = (weather_data %>% head(1) %>% select(Time) %>% pull()))
# Prepare data ----
weather_data_padded <- weather_data %>%
pad_by_time(.date_var = Time,
.by="5 min") %>%
mutate_at(vars(`0_wind_speed_ms`:`9_temp_c`), .funs= ts_impute_vec, period=1)
feat_gross_prod_data_padded <- feat_gross_prod_data %>%
pad_by_time(.date_var = Time,
.by="5 min") %>%
mutate_at(vars(`0_wind`:`4_wind`), .funs= ts_impute_vec, period=1)
comb_data <- prod_data %>%
select(matches("(Time)|(Wind)")) %>%
left_join(weather_data_padded, by = c("Time" = "Time")) %>%
left_join(feat_gross_prod_data_padded, by = c("Time" = "Time")) %>%
select(matches("(Time)|(Wind)|wind|temp"))
comb_data <- comb_data %>%
drop_na() %>%
arrange('Time')
comb_data <- comb_data %>%
mutate(across(matches("_ms"), function(x) x**3))
comb_data %>%
plot_acf_diagnostics(Time,
Wind,
.ccf_vars = `1_wind`)
comb_data_padded <- comb_data %>%
pad_by_time(.date_var = Time,
.by="5 min") %>%
mutate_at(vars(`Wind`:`4_wind`), .funs= ts_impute_vec, period=1)
comb_data_padded <- comb_data_padded %>%
tk_augment_lags(
.value = matches("_wind|temp"),
.lags = c(1, 9, 276)) %>%
drop_na()
comb_data_padded <- comb_data_padded %>%
select(matches("(Time)|(Wind)|(1_wind_speed_ms_lag9)|(1_wind_lag9)|(1_wind_lag1)|(1_wind_speed_ms)|(1_wind_speed_ms_lag1)|(1_wind)|(3_wind_speed_ms_lag9)|(3_wind_speed_ms_lag1)|(3_wind_speed_ms)|(3_wind_lag9)|(2_wind)|(2_wind_lag9)|(2_wind_speed_ms)|(3_wind_lag1)|(2_wind_speed_ms_lag9)|(3_wind)|(2_wind_speed_ms_lag1)|(2_wind_lag1)|(Time_week)|(4_wind_lag9)|(4_wind_speed_ms_lag9)|(1_wind_speed_ms_lag276)|(4_wind_speed_ms_lag1)|(1_wind_lag276)|(Time_hour)|(2_wind_lag276)|(4_wind_speed_ms)|(Time_month)|(3_wind_lag276)|(2_wind_speed_ms_lag276)|(3_wind_speed_ms_lag276)|(4_wind_lag1)|(Time_quarter)|(4_wind)"
))
saveRDS(comb_data_padded, "../../data/processed/comb_data_rev5.rds")