-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1.3_lasso_adoption.r
76 lines (54 loc) · 2.31 KB
/
1.3_lasso_adoption.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
library(glmnet)
library(doParallel)
library(jsonlite)
source("1.0_utilities.r")
#0. LOAD THE DATA####
# with imputed data
df <- read.csv("data/input/imputed.csv", check.names = FALSE)
#1. SET PARAMETERS####
dependant <- "chatgpt_weekly_usages"
transformation <- "flat"
family <- "nb"
theta <- NULL
include_zeroes <- "no_zeroes"
nfold <- 5
n_iterations <- 1000
sample_partition <- 0.8
seed <- 2023
n_plots_to_save <- 10
# convert threshold to integer
threshold <- as.integer(n_iterations / 2)
if (family == "nb") {
if (is.null(theta)) {
# Create the full path to the folder
dist_folder <- file.path("data/LASSOs", paste0(dependant, "-", transformation, "-", include_zeroes, "-", family))
} else {
# Create the full path to the folder
dist_folder <- file.path("data/LASSOs", paste0(dependant, "-", transformation, "-", theta, "-", include_zeroes, "-", family))
}
} else {
# Create the full path to the folder
dist_folder <- file.path("data/LASSOs", paste0(dependant, "-", transformation, "-", include_zeroes, "-", family))
}
# create the folder if it does not exist
dir.create(dist_folder, showWarnings = FALSE)
# save the parameters in a text file as json using the jsonlite package
writeLines(toJSON(list(
dependant = dependant, transformation = transformation, family = family, theta = theta, include_zeroes = include_zeroes, nfold = nfold, n_iterations = n_iterations,
sample_partition = sample_partition, seed = seed, n_plots_to_save = n_plots_to_save)), file.path(dist_folder, "parameters.json"))
#2 SUBSET THE DATAFRAME####
df <- filter_df(df, "data/raw/independent_variables.txt", include_zeroes, dependant)
# Print the ratio
cat("Variance-to-Mean Ratio:", var(df[,dependant]) / mean(df[,dependant]), "\n")
if (transformation == "log") {
# log transform the dependant variable
df[,dependant] <- log(df[,dependant] )
# Print the ratio
cat("Variance-to-Mean Ratio (after logged):", var(df[,dependant]) / mean(df[,dependant]), "\n")
}
#3.LASSO ITERATIONS ####
# perform the iterations
results <- perform_iterations(df, seed, n_iterations, threshold, sample_partition, dist_folder, dependant, family, nfold, n_plots_to_save, theta)
df_coefs_summ<- results[[1]]
selected_features<- results[[2]]
print(selected_features)