diff --git a/DESCRIPTION b/DESCRIPTION
index 557b6691..8c244f15 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -7,6 +7,6 @@ Description: Implements the SOFUN modelling framework for simulating terrestrial
   calibration and benchmarking.
 License: GPL-3
 LazyData: true
-Depends: dplyr, purrr, lubridate, tidyr, raster, lubridate, stringi, sp, gensa, stringr, rland, readr
+Depends: dplyr, purrr, lubridate, tidyr, raster, lubridate, stringi, sp, GenSA, stringr, rlang, readr
 RoxygenNote: 7.1.0
 VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index 5c16501a..09222013 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,17 +2,11 @@
 
 export(calib_sofun)
 export(collect_drivers_sofun)
-export(download_from_remote)
+export(collect_obs_eval)
 export(eval_sofun)
 export(filter_days)
-export(get_obs_bysite_fluxnet2015)
-export(get_obs_bysite_gpp_gepisat)
-export(get_obs_calib)
-export(get_obs_eval)
-export(get_obs_eval2)
 export(get_stats)
 export(init_dates_dataframe)
-export(load_dependencies_rsofun)
 export(oob_calib_eval_sofun)
 export(prepare_setup_sofun)
 export(run_sofun_f_bysite)
@@ -30,6 +24,7 @@ import(readr)
 import(rlang)
 import(stringr)
 import(tidyr)
+importFrom(GenSA,GenSA)
 importFrom(grDevices,boxplot.stats)
 importFrom(grDevices,col2rgb)
 importFrom(grDevices,colorRampPalette)
diff --git a/R/calib_sofun.R b/R/calib_sofun.R
index a179587d..1ba3a603 100644
--- a/R/calib_sofun.R
+++ b/R/calib_sofun.R
@@ -2,68 +2,44 @@
 #'
 #' This is the main function that handles the calibration of SOFUN model parameters. 
 #' 
-#' @param settings_calib A list containing model calibration settings. See vignette_rsofun.pdf for more information and examples.
 #' @param df_drivers asdf
-#' @param settings_input A list containing model input settings. See vignette_rsofun.pdf for more information and examples.
-#' @param ddf_obs (Optional) A data frame containing observational data used for model calibration. Created by function \code{get_obs()}
+#' @param ddf_obs A data frame containing observational data used for model calibration. Created by function \code{get_obs_calib2()}
+#' @param settings A list containing model calibration settings. See vignette_rsofun.pdf for more information and examples.
 #'
 #' @return A complemented named list containing the calibration settings and optimised parameter values.
 #' @export
 #'
-#' @examples settings_calib <- calib_sofun( setup, settings_calib, settings_sims, settings_input )
+#' @examples \dontrun{ calib_sofun( df_drivers = filter(df_drivers, sitename %in% settings$sitenames),  ddf_obs = ddf_obs_calib, settings = settings)}
 #' 
-calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_obs = NA ){
+calib_sofun <- function( df_drivers, ddf_obs, settings ){
 
-  targetvars <- paste0( settings_calib$targetvars, "_obs")
+  targetvars <- paste0( settings$targetvars, "_obs")
   
   ## Use only calibsites
   df_drivers <- df_drivers %>% 
-    dplyr::filter(sitename %in% settings_calib$sitenames)
-  
-  ##----------------------------------------------------------------
-  ## Collect observational data used as calibration target
-  ##----------------------------------------------------------------
-  if (identical(NA,ddf_obs)){
-    print("Collecting observational target data ...")
-    ddf_obs <- get_obs_calib( 
-      settings_calib, 
-      dplyr::select(df_drivers, sitename, siteinfo) %>% tidyr::unnest(siteinfo), 
-      settings_input 
-      )
-  }
-
-  ##----------------------------------------------------------------
-  ## Apply filters
-  ##----------------------------------------------------------------
-  ## "filtering" pre-MODIS data
-  idxs <- which(ddf_obs$date < lubridate::ymd("2000-02-18"))
-  make_na_premodis <- function(vec, idxs){
-    vec[idxs] <- NA
-    return(vec)
-  }
-  ddf_obs <- ddf_obs %>% 
-    dplyr::mutate_at( vars(one_of(targetvars)), ~make_na_premodis(., idxs) )
-    
+    dplyr::filter(sitename %in% settings$sitenames)
 
   ## make global
-  targetvars_with_unc <- c(targetvars, paste0(settings_calib$targetvars, "_unc"))
+  targetvars_with_unc <- c(targetvars, paste0(settings$targetvars, "_unc"))
   ddf_obs <- ddf_obs %>% 
+    tidyr::unnest(data) %>% 
+    rename(gpp_obs = gpp) %>% 
     dplyr::select( date, sitename, one_of( targetvars_with_unc ) ) %>% 
-    dplyr::filter( sitename %in% settings_calib$sitenames )
+    dplyr::filter( sitename %in% settings$sitenames )
 
   if (nrow(ddf_obs)>0){
 
     ## Example run for getting structure of output file
-    if (identical(names(settings_calib$par),"kphio")){
+    if (identical(names(settings$par), "kphio")){
       ## For calibrating quantum yield efficiency only
       # cost_rmse <- cost_chisquared_kphio
       cost_rmse <- cost_rmse_kphio
     
-    } else if ( "kphio" %in% names(settings_calib$par) && "soilm_par_a" %in% names(settings_calib$par) && "soilm_par_b" %in% names(settings_calib$par) ){  
+    } else if ( "kphio" %in% names(settings$par) && "soilm_par_a" %in% names(settings$par) && "soilm_par_b" %in% names(settings$par) ){  
       ## Full stack calibration
       cost_rmse <- cost_rmse_fullstack   
 
-    } else if ( "vpdstress_par_a" %in% names(settings_calib$par) && "vpdstress_par_b" %in% names(settings_calib$par) && "vpdstress_par_m" %in% names(settings_calib$par) ){  
+    } else if ( "vpdstress_par_a" %in% names(settings$par) && "vpdstress_par_b" %in% names(settings$par) && "vpdstress_par_m" %in% names(settings$par) ){  
       ## Calibration of VPD stress function (P-model runs with soilmstress and tempstress on)
       # cost_rmse <- cost_chisquared_vpdstress
       cost_rmse <- cost_rmse_vpdstress
@@ -74,21 +50,21 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob
     ## Do the calibration
     ##----------------------------------------------------------------
     ## check directory where calibration results are stored
-    if (!dir.exists(settings_calib$dir_results)) system( paste0( "mkdir -p ", settings_calib$dir_results) )
+    if (!dir.exists(settings$dir_results)) system( paste0( "mkdir -p ", settings$dir_results) )
 
-    if (settings_calib$method=="gensa"){
+    if (settings$method=="gensa"){
       ##----------------------------------------------------------------
       ## calibrate the model parameters using GenSA (simulated annealing)
       ##----------------------------------------------------------------
       ptm <- proc.time()
       out_optim <- GenSA(
-                          par   = lapply( settings_calib$par, function(x) x$init ) %>% unlist(),  # initial parameter value, if NULL will be generated automatically
+                          par   = lapply( settings$par, function(x) x$init ) %>% unlist(),  # initial parameter value, if NULL will be generated automatically
                           fn    = cost_rmse,
-                          lower = lapply( settings_calib$par, function(x) x$lower ) %>% unlist(),
-                          upper = lapply( settings_calib$par, function(x) x$upper ) %>% unlist(),
+                          lower = lapply( settings$par, function(x) x$lower ) %>% unlist(),
+                          upper = lapply( settings$par, function(x) x$upper ) %>% unlist(),
                           control=list( 
                                         #temperature=4000, 
-                                        max.call=settings_calib$maxit,
+                                        max.call=settings$maxit,
                                         trace.mat=TRUE,
                                         threshold.stop=1e-4,
                                         max.time=300
@@ -99,15 +75,15 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob
       out_optim$time_optim <- proc.time() - ptm
       print(out_optim$par)
 
-      filn <- paste0( settings_calib$dir_results, "/out_gensa_", settings_calib$name, ".Rdata")
+      filn <- paste0( settings$dir_results, "/out_gensa_", settings$name, ".Rdata")
       print( paste0( "writing output from GenSA function to ", filn ) )
       save( out_optim, file = filn )
       
       # ## Test plot with the same SOFUN call
-      # plot_test_kphio( out_optim$par, subtitle = "", label = paste("CALIB test", settings_calib$name), makepdf = FALSE )    
+      # plot_test_kphio( out_optim$par, subtitle = "", label = paste("CALIB test", settings$name), makepdf = FALSE )    
 
 
-    } else if (settings_calib$method=="optimr"){
+    } else if (settings$method=="optimr"){
       ##----------------------------------------------------------------
       ## Calibrate the model parameters using R optimr(). Optimr is a 
       ## wrapper that implements different optimization methods.
@@ -117,22 +93,22 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob
       ##----------------------------------------------------------------
       ptm <- proc.time()
       out_optim <- optimr(
-                          par     = lapply( settings_calib$par, function(x) x$init ) %>% unlist(),  # initial parameter value, if NULL will be generated automatically
+                          par     = lapply( settings$par, function(x) x$init ) %>% unlist(),  # initial parameter value, if NULL will be generated automatically
                           fn      = cost_rmse,
-                          control = list( maxit = settings_calib$maxit )
+                          control = list( maxit = settings$maxit )
                           )
       proc.time() - ptm
       print(out_optim$par)
 
-      filn <- paste0( settings_calib$dir_results, "/out_optimr_", settings_calib$name, ".Rdata")
+      filn <- paste0( settings$dir_results, "/out_optimr_", settings$name, ".Rdata")
       print( paste0( "writing output from optimr function to ", filn ) )
       save( out_optim, file = filn )
 
       # ## Test plot with the same SOFUN call
-      # plot_test_kphio( out_optim$par, subtitle = "", label = paste("CALIB test", settings_calib$name), makepdf = FALSE )
+      # plot_test_kphio( out_optim$par, subtitle = "", label = paste("CALIB test", settings$name), makepdf = FALSE )
 
 
-    } else if (settings_calib$method=="BayesianTools"){
+    } else if (settings$method=="BayesianTools"){
       ##----------------------------------------------------------------
       ## Calibrate the model parameters using Bayesian calibration 
       ## implemented in 'BayesianTools'. I.e., the posterior density is
@@ -140,15 +116,15 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob
       ##----------------------------------------------------------------
       ptm <- proc.time()
       bt_setup    <- createBayesianSetup( cost_rmse( inverse=TRUE ), 
-                                          lower = apply( settings_calib$par, function(x) x$lower ) %>% unlist(), 
-                                          upper = lapply( settings_calib$par, function(x) x$upper ) %>% unlist() 
+                                          lower = apply( settings$par, function(x) x$lower ) %>% unlist(), 
+                                          upper = lapply( settings$par, function(x) x$upper ) %>% unlist() 
                                           )
-      bt_settings <- list( iterations = settings_calib$maxit,  message = TRUE )
+      bt_settings <- list( iterations = settings$maxit,  message = TRUE )
       out_optim <- runMCMC( bayesianSetup = bt_setup, sampler = "DEzs", settings = bt_settings )
       proc.time() - ptm
       
 
-    } else if (settings_calib$method=="linscale"){
+    } else if (settings$method=="linscale"){
       ##----------------------------------------------------------------
       ## Calibrate the quantum yield efficiency parameter ('kphio') 
       ## applied as a linear scalar to simulated GPP.
@@ -159,22 +135,22 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob
 
       ptm <- proc.time()
       out_optim <- optimr(
-                          par     = lapply( settings_calib$par, function(x) x$init ) %>% unlist(),  # initial parameter value, if NULL will be generated automatically
+                          par     = lapply( settings$par, function(x) x$init ) %>% unlist(),  # initial parameter value, if NULL will be generated automatically
                           fn      = cost_linscale_rmse,
-                          control = list( maxit = settings_calib$maxit )
+                          control = list( maxit = settings$maxit )
                           )
       proc.time() - ptm
       print(out_optim$par)
 
-      filn <- paste0( settings_calib$dir_results, "/out_linscale_", settings_calib$name, ".Rdata")
+      filn <- paste0( settings$dir_results, "/out_linscale_", settings$name, ".Rdata")
       print( paste0( "writing output from linscale function to ", filn ) )
       save( out_optim, file = filn )
 
     }
 
     # ## xxx Test---------------------------
-    # parvals <- seq( from = lapply( settings_calib$par, function(x) x$lower ) %>% unlist(),
-    #                 to   = lapply( settings_calib$par, function(x) x$upper ) %>% unlist(),
+    # parvals <- seq( from = lapply( settings$par, function(x) x$lower ) %>% unlist(),
+    #                 to   = lapply( settings$par, function(x) x$upper ) %>% unlist(),
     #                 length.out = 20 )
     # cost <- purrr::map( as.list( parvals ), ~cost_rmse( par = list(.) ) ) %>% unlist()
     # plot( parvals, cost, pch=16 )
@@ -182,31 +158,31 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob
     # ## -----------------------------------
     
     ## save optimised parameters
-    names(out_optim$par) <- names(settings_calib$par)
-    for (parnam in names(settings_calib$par)){
-      settings_calib$par[[ parnam ]]$opt <- out_optim$par[ parnam ]
+    names(out_optim$par) <- names(settings$par)
+    for (parnam in names(settings$par)){
+      settings$par[[ parnam ]]$opt <- out_optim$par[ parnam ]
     }
-    settings_calib$par_opt <- out_optim$par
+    settings$par_opt <- out_optim$par
     
   } else {
 
     print("WARNING: No observational target data left after filtering.")
 
-    settings_calib$par$kphio$opt          <- NA
-    settings_calib$par$temp_ramp_edge$opt <- NA
-    settings_calib$par$soilm_par_a$opt    <- NA
-    settings_calib$par$soilm_par_b$opt    <- NA
+    settings$par$kphio$opt          <- NA
+    settings$par$temp_ramp_edge$opt <- NA
+    settings$par$soilm_par_a$opt    <- NA
+    settings$par$soilm_par_b$opt    <- NA
 
   }
 
   ## Write calibrated parameters into a CSV file
-  vec <- unlist( unname( lapply( settings_calib$par, function(x) x$opt  )) )
-  df <- as_tibble(t(vec)) %>% setNames( names(settings_calib$par) )
-  filn <- paste0( settings_calib$dir_results, "/params_opt_", settings_calib$name, ".csv")
+  vec <- unlist( unname( lapply( settings$par, function(x) x$opt  )) )
+  df <- as_tibble(t(vec)) %>% setNames( names(settings$par) )
+  filn <- paste0( settings$dir_results, "/params_opt_", settings$name, ".csv")
   print( paste0( "writing calibrated parameters to ", filn ) )
   write_csv( df, path = filn )
 
-  return( settings_calib )
+  return( settings )
 
 }
 
@@ -226,7 +202,7 @@ cost_rmse_kphio <- function( par, ddf_obs, df_drivers, inverse = FALSE ){
     vpdstress_par_b = 0.2,
     vpdstress_par_m = 5
     )
-
+  
   df <- runread_sofun_f(
     df_drivers, 
     params_modl = params_modl, 
@@ -430,194 +406,4 @@ cost_mae <- function( par ){
   return(cost)
 }
 
-#' Get observational data for calibration
-#'
-#' Gets observational data for model calibration by looping over sites
-#' 
-#' @param settings_calib A list containing model calibration settings. See vignette_rsofun.pdf for more information and examples.
-#' @param settings_sims A list containing model simulation settings from \code{\link{prepare_setup_sofun}}.  See vignette_rsofun.pdf for more information and examples.
-#' @param settings_input A list containing model input settings. See vignette_rsofun.pdf for more information and examples.
-#'
-#' @return A data frame (tibble) containing observational data used for model calibration
-#' @export
-#'
-#' @examples ddf_obs <- get_obs_calib( settings_calib, settings_sims, settings_input )
-#' 
-##------------------------------------------------------------
-## Gets data by looping over sites
-##------------------------------------------------------------
-get_obs_calib <- function( settings_calib, settings_sims, settings_input ){
-
-
-  ##------------------------------------------------------------
-  ## Read raw observational data from files.
-  ## This creates a data frame (tibble) that contains
-  ## a column 'date' and columns for each target variable. The number of rows
-  ## corresponds to each simulation's length (number of days).
-  ##------------------------------------------------------------
-  ## loop over sites to get data frame with all variables
-  list_bysite <- lapply( settings_calib$sitenames, 
-                         function(x) get_obs_bysite(x,
-                                                    settings_calib = settings_calib, 
-                                                    settings_sims  = dplyr::filter(settings_sims, sitename == x),
-                                                    settings_input = settings_input) %>% 
-                           mutate( sitename=x ) 
-                         )
-  
-  ## combine dataframes from multiple sites along rows
-  ddf_obs <- dplyr::bind_rows( list_bysite )
-  rm("list_bysite")
-
-  return( ddf_obs )
-
-}
-
-
-##------------------------------------------------------------
-## Function returns a data frame (tibble) containing all the observational data
-## used as target variables for calibration for a given site, covering specified dates.
-##------------------------------------------------------------
-get_obs_bysite <- function( mysitename, settings_calib, settings_sims, settings_input ){
-
-  print(paste("getting obs for ", mysitename))
-
-  ## Initialise daily dataframe (WITHOUT LEAP YEARS, SOFUN USES FIXED 365-DAYS YEARS!)
-  ddf <- init_dates_dataframe( year(settings_sims$date_start), year(settings_sims$date_end), noleap = TRUE )
-
-  if ("gpp" %in% settings_calib$targetvars){
-
-    ## Interpret benchmarking data specification
-    datasource <- stringr::str_split( settings_calib$datasource, "_" ) %>% unlist()
-
-    if ("fluxnet2015" %in% datasource){
-      ##------------------------------------------------------------
-      ## Get FLUXNET 2015 data
-      ##------------------------------------------------------------
-      ## Make sure data is available for this site
-      getvars <- c("GPP_NT_VUT_REF", "GPP_DT_VUT_REF", "NEE_VUT_REF_NIGHT_QC", "NEE_VUT_REF_DAY_QC", "GPP_NT_VUT_SE", "GPP_DT_VUT_SE")
-      
-      error <- check_download_fluxnet2015( settings_calib$path_fluxnet2015, mysitename )
-      
-      ddf <- get_obs_bysite_fluxnet2015(
-        mysitename, 
-        path_fluxnet2015 = settings_input$path_fluxnet2015, 
-        timescale = settings_calib$timescale$gpp,
-        getvars = getvars, 
-        getswc = FALSE,
-        threshold_GPP = settings_calib$threshold_GPP, 
-        remove_neg = FALSE,
-        verbose = TRUE
-        ) %>% 
-        dplyr::mutate( gpp_obs = case_when("NT" %in% datasource & !("DT" %in% datasource) ~ GPP_NT_VUT_REF,
-                                           "DT" %in% datasource & !("NT" %in% datasource) ~ GPP_DT_VUT_REF
-                                           # "DT" %in% datasource &   "NT" %in% datasource  ~ (GPP_NT_VUT_REF+GPP_DT_VUT_REF)/2
-                                           ),
-                       gpp_unc = case_when("NT" %in% datasource & !("DT" %in% datasource) ~ GPP_NT_VUT_SE,
-                                           "DT" %in% datasource & !("NT" %in% datasource) ~ GPP_DT_VUT_SE )) %>% 
-        dplyr::right_join( ddf, by = "date" ) %>% 
-        dplyr::select(date, gpp_obs, gpp_unc)
-      
-      # ## replace observations with NA if uncertainty is very small -- only necessary when using chi-squared calibration
-      # targetvars_unc <- paste0(settings_calib$targetvars, "_unc")
-      # replace_zero_with_na <- function(obs, unc){
-      #   idxs <- which( unc < 0.0001 )
-      #   obs[idxs] <- NA
-      #   return(obs)
-      # }
-      # ddf <- ddf %>% 
-      #   dplyr::mutate( gpp_obs = replace_zero_with_na(gpp_obs, gpp_unc) )
-      # 
-      
-      # test <- sum(!is.na(ddf$gpp_obs))
-      # if (test<3) rlang::abort(paste0("Too hard filtering for site ", mysitename))
-
-    } else {
-      
-      ddf <- ddf %>% dplyr::mutate( gpp_obs = NA, gpp_unc = NA )
-      
-    }
-
-    if ("Ty" %in% datasource){
-      ##------------------------------------------------------------
-      ## Get GePiSaT data
-      ##------------------------------------------------------------
-      ## Make sure data is available for this site
-      error <- check_download_gepisat( settings_calib$path_gepisat, mysitename )
-
-      ddf_gepisat <- get_obs_bysite_gpp_gepisat( mysitename, settings_calib$path_gepisat, settings_calib$timescale )
-
-      ## add to other data frame and take take weighted average for updated 'gpp_obs'
-      if (!is.null(ddf_gepisat)){
-        
-        ddf <- ddf_gepisat %>%
-          ## Some GPP data looks weird when its error in resp. day is zero. Exclude this data.
-          mutate( gpp_obs = ifelse( gpp_err_obs == 0.0, NA, gpp_obs ) ) %>% 
-          dplyr::rename( gpp_obs_gepisat = gpp_obs ) %>%
-          right_join( ddf, by = "date" )
-        totlen <- length(datasource[ -which( datasource=="fluxnet2015" ) ])
-        if (totlen>1){
-          ddf$gpp_obs <- sum( c(ddf$gpp_obs * (totlen-1), ddf$gpp_obs_gepisat), na.rm = TRUE ) / totlen
-        } else {
-          ddf <- ddf %>% mutate( gpp_obs = gpp_obs_gepisat )
-        }
-        
-      } else {
-        ## No GePiSaT data available for this site. Consider all GPP data missing (NA).
-        ddf <- ddf %>% mutate( gpp_obs = NA )
-      }
-
-    }
-
-  }
-
-  if ("latenth" %in% settings_calib$targetvars){
-    
-    ## Interpret benchmarking data specification
-    datasource <- stringr::str_split( settings_calib$datasource, "_" ) %>% unlist()
-    
-    if ("fluxnet2015" %in% datasource){
-      ##------------------------------------------------------------
-      ## Get FLUXNET 2015 data
-      ##------------------------------------------------------------
-      ## Make sure data is available for this site
-      error <- check_download_fluxnet2015( settings_calib$path_fluxnet2015, mysitename )
-      
-      ddf <- get_obs_bysite_fluxnet2015(
-        mysitename, 
-        path_fluxnet2015 = settings_input$path_fluxnet2015, 
-        timescale = settings_calib$timescale$latenth,
-        getvars = c("LE_F_MDS", "LE_RANDUNC", "LE_F_MDS_QC"), 
-        getswc = FALSE,
-        threshold_LE = 0.6, 
-        remove_neg = FALSE,
-        verbose = TRUE
-        ) %>% 
-        dplyr::right_join( ddf, by = "date" ) %>% 
-        dplyr::rename(latenth_obs = latenth)
-      
-    } else {
-      
-      ddf <- ddf %>% dplyr::mutate( latenth_obs = NA, latenth_unc = NA )
-      
-    }
-    
-  }
-  
-  if ("wcont" %in% settings_calib$targetvars){
-
-    if ("fluxnet2015" %in% settings_calib$datasource$wcont){
-
-      ## Make sure data is available for this site
-      error <- check_download_fluxnet2015( settings_calib$path_fluxnet2015, mysitename )
-
-      ddf <- get_obs_bysite_wcont_fluxnet2015( mysitename, settings_calib$path_fluxnet2015, settings_calib$timescale ) %>%
-             dplyr::right_join( ddf, by = "date" )
-
-    }
-
-  }
-
-  return(ddf)
-
-}
 
diff --git a/R/collect_drivers_sofun.R b/R/collect_drivers_sofun.R
index a88820d6..8b31d342 100644
--- a/R/collect_drivers_sofun.R
+++ b/R/collect_drivers_sofun.R
@@ -2,9 +2,10 @@
 #'
 #' Collect all drivers for site-level simulations into a nested data frame with one row for each site
 #'
-#' @param settings A named list containing the simulation settings (see vignette_rsofun.pdf for more information and examples).
+#' @param siteinfo A data frame containing site meta info (rows for sites). Required columns are: \code{"sitename", "date_start", "date_end", "lon", "lat", "elv"}.
 #' @param meteo A data nested data frame with rows for each site and meteo forcing data time series nested inside a column named \code{"data"}
 #' @param fapar A data nested data frame with rows for each site and fAPAR forcing data time series nested inside a column named \code{"data"}
+#' @param co2 A data nested data frame with rows for each site and CO2 forcing data time series nested inside a column named \code{"data"}
 #' @param df_soiltexture
 #'
 #' @return
@@ -12,11 +13,11 @@
 #'
 #' @examples mod <- collect_drivers_sofun( settings = settings, forcing, df_soiltexture )
 #' 
-collect_drivers_sofun <- function( settings, meteo, fapar, df_soiltexture ){
+collect_drivers_sofun <- function( siteinfo, meteo, fapar, co2, df_soiltexture ){
   
   ## create mega-df containing all forcing data and parameters that vary by site (not model parameters!)
-  names_metainfo <- names(settings)[-which(names(settings) %in% c("sitename", "params_siml"))]
-  df_mega <- settings %>% 
+  names_metainfo <- names(siteinfo)[-which(names(siteinfo) %in% c("sitename", "params_siml"))]
+  df_mega <- siteinfo %>% 
     tidyr::nest(siteinfo = names_metainfo) %>% 
     left_join(
       meteo %>% 
@@ -28,9 +29,36 @@ collect_drivers_sofun <- function( settings, meteo, fapar, df_soiltexture ){
         rename(fapar = data),
       by = "sitename"
     ) %>% 
+    left_join(
+      co2 %>% 
+        rename(co2 = data),
+      by = "sitename"
+    ) %>% 
     mutate(df_soiltexture = purrr::map(as.list(seq(nrow(.))), ~return(df_soiltexture)))
   
+  ## use only interpolated fapar and combine meteo data and fapar into a single nested column 'forcing'
+  df_mega <- df_mega %>% 
+    mutate(fapar = purrr::map(fapar, ~dplyr::select(., date, fapar = modisvar_interpol))) %>% 
+    mutate(co2   = purrr::map(co2  , ~dplyr::select(., date, co2))) %>% 
+    mutate(forcing = purrr::map2(meteo, fapar, ~left_join( .x, .y, by = "date"))) %>% 
+    mutate(forcing = purrr::map2(forcing, co2, ~left_join( .x, .y, by = "date"))) %>% 
+    dplyr::select(-meteo, -fapar, -co2)
+  
+  ## interpolate to fill gaps in forcing time series
+  myapprox <- function(vec){
+    approx(vec, xout = 1:length(vec))$y
+  }
+  fill_na_forcing <- function(df){
+    vars <- names(df)[-which(names(df)=="date")]
+    df %>% 
+      mutate_at(vars, myapprox)
+  }
+  df_mega <- df_mega %>% 
+    mutate(forcing = purrr::map(forcing, ~fill_na_forcing(.)))
+
   return(df_mega)
 }
+
+
   
   
diff --git a/R/collect_obs_eval.R b/R/collect_obs_eval.R
new file mode 100644
index 00000000..a3c4cfc7
--- /dev/null
+++ b/R/collect_obs_eval.R
@@ -0,0 +1,80 @@
+#' Get observational data for model evaluation
+#'
+#' Gets observational data for model evaluation, given the benchmarking variable, and data source used 
+#' for the evaluation. This information is specified in the evaluation settings (argument \code{settings}).
+#'
+#' @param siteinfo A data frame containing site meta info (rows for sites). Required columns are: \code{"sitename", "date_start", "date_end", "lon", "lat", "elv"}.
+#' @param settings A list specifying evaluation settings (see vignette eval_sofun.pdf for more information and examples)
+#' @param adf A nested data frame with rows for sites and time series (annual) for each site nested inside the column \code{data}. 
+#' @param mdf A nested data frame with rows for sites and time series (monthly) for each site nested inside the column \code{data}. 
+#' @param ddf A nested data frame with rows for sites and time series (daily) for each site nested inside the column \code{data}. 
+#' @param hrdf A nested data frame with rows for sites and time series (hourly) for each site nested inside the column \code{data}. 
+#' @param hhdf A nested data frame with rows for sites and time series (half-hourly) for each site nested inside the column \code{data}. 
+#'
+#' @return A list containing nested data frames of observed values aggregated to several temporal scales (ddf for daily, xdf for X-daily 
+#' (determined by argument \code{agg}), mdf for monthly, adf for annual).
+#' @export
+#'
+#' @examples \dontrun{obs <- collect_obs_eval( settings = NULL, adf = NULL, mdf = NULL, ddf = NULL, hrdf = NULL, hhdf = NULL, agg = 8 )}
+#' 
+collect_obs_eval <- function( siteinfo, settings, adf = NULL, mdf = NULL, ddf = NULL, hrdf = NULL, hhdf = NULL ){
+
+  if (is.null(adf)){
+    rlang::abort("collect_obs_eval(): object provided by argument 'adf' is missing.")
+  }
+  
+  if (is.null(ddf)){
+    rlang::abort("collect_obs_eval(): object provided by argument 'ddf' is missing.")
+  }
+  
+  if (is.null(mdf)){	
+    rlang::abort("collect_obs_eval(): object provided by argument 'mdf' is missing.")
+  }
+  
+  ##------------------------------------------------------------
+  ## Aggregate daily data to multi-day periods
+  ## periods should start with the 1st of January each year, otherwise can't compute mean seasonal cycle
+  ##------------------------------------------------------------
+  ## Generate vector of starting dates of X-day periods, making sure the 1st of Jan is always the start of a new period
+  calc_xdf <- function(ddf, agg){
+    
+    firstyear <- ddf %>% mutate(year = lubridate::year(date)) %>% pull(year) %>% min()
+    lastyear  <- ddf %>% mutate(year = lubridate::year(date)) %>% pull(year) %>% max()
+    listyears <- seq( ymd(paste0(firstyear, "-01-01")), ymd(paste0(lastyear, "-01-01")), by = "year" )    
+    breaks <- purrr::map( as.list(listyears), ~seq( from=., by=paste0( agg, " days"), length.out = ceiling(365 / agg)) ) %>% Reduce(c,.)
+    
+    ## take mean across periods
+    evalvars <- names(settings$benchmark)
+    ddf %>% 
+      mutate( inbin = cut( date, breaks = breaks, right = FALSE ) ) %>%
+      tidyr::drop_na(inbin) %>% 
+      group_by( inbin ) %>%
+      summarise_at( vars(one_of(evalvars)), mean, na.rm=TRUE)
+
+  }
+  xdf <- ddf %>%
+    mutate(data = purrr::map(data, ~calc_xdf(., agg = settings$agg)))
+  
+  ## complement info required for evaluation
+  ddf <- siteinfo %>% 
+    dplyr::select(sitename, lon, lat, elv, classid, c4, whc, koeppen_code, igbp_land_use, plant_functional_type) %>% 
+    right_join(ddf, by = "sitename")
+  
+  xdf <- siteinfo %>% 
+    dplyr::select(sitename, lon, lat, elv, classid, c4, whc, koeppen_code, igbp_land_use, plant_functional_type) %>% 
+    right_join(xdf, by = "sitename")
+  
+  mdf <- siteinfo %>% 
+    dplyr::select(sitename, lon, lat, elv, classid, c4, whc, koeppen_code, igbp_land_use, plant_functional_type) %>% 
+    right_join(mdf, by = "sitename")
+  
+  adf <- siteinfo %>% 
+    dplyr::select(sitename, lon, lat, elv, classid, c4, whc, koeppen_code, igbp_land_use, plant_functional_type) %>% 
+    right_join(adf, by = "sitename")
+  
+  ## get breaks
+  breaks <- xdf %>% tidyr::unnest(data) %>% pull(inbin) %>% as.character() %>% lubridate::ymd()
+  
+	return( list( ddf = ddf, xdf = xdf, mdf = mdf, adf = adf, breaks = breaks ) )
+
+}
diff --git a/R/eval_sofun.R b/R/eval_sofun.R
index 355475fa..40e81f8b 100644
--- a/R/eval_sofun.R
+++ b/R/eval_sofun.R
@@ -25,11 +25,22 @@
 #' 
 eval_sofun <- function(mod, settings_eval, settings_sims, obs_eval = NA, overwrite = TRUE, doplot = FALSE, light = FALSE){
   
+  ## make model output a long flat table
+  mod <- mod %>% 
+    dplyr::rename(id = sitename) %>% 
+    tidyr::unnest(out_sofun)
+  
   ## Evaluate daily variables
-  out <- lapply( as.list(names(settings_eval$benchmark)), 
-                 function(x) eval_sofun_byvar(x, dplyr::select(mod, sitename, date, mod = eval(x)), settings_eval, settings_sims, obs_eval = obs_eval, overwrite = TRUE, doplot = FALSE, light = light)
-                 ) %>%
-          setNames(names(settings_eval$benchmark))
+  out <- purrr::map(
+    as.list(names(settings_eval$benchmark)),
+    ~eval_sofun_byvar(., dplyr::select(mod, sitename, date, mod = {{.}}), settings_eval, settings_sims, obs_eval = obs_eval, overwrite = TRUE, doplot = FALSE, light = light)
+    ) %>% 
+    setNames(names(settings_eval$benchmark))
+  
+  # out <- lapply( as.list(names(settings_eval$benchmark)), 
+  #                function(x) eval_sofun_byvar(x, dplyr::select(mod, sitename, date, mod = eval(x)), settings_eval, settings_sims, obs_eval = obs_eval, overwrite = TRUE, doplot = FALSE, light = light)
+  #                ) %>%
+  #         setNames(names(settings_eval$benchmark))
 
   return(out)
 }
@@ -82,7 +93,7 @@ eval_sofun_byvar <- function(varnam, ddf_mod, settings_eval, settings_sims, obs_
     ##------------------------------------------------------------
     ## Get observations for evaluation
     ##------------------------------------------------------------
-    if (identical(obs_eval, NA)) obs_eval <- get_obs_eval( settings_eval = settings_eval, settings_sims = settings_sims, overwrite = overwrite )
+    if (identical(obs_eval, NA)) rlang::abort("eval_sofun_byvar(): Object provided by argument 'eval_sofun_byvar' could not be identified.")
     
     ## detach
     if (varnam=="aet"){
@@ -90,10 +101,10 @@ eval_sofun_byvar <- function(varnam, ddf_mod, settings_eval, settings_sims, obs_
     } else {
       varnam_obs <- varnam
     }
-    adf <- obs_eval$adf %>% dplyr::select(sitename, date,  obs = {{varnam_obs}} ) # eval(varnam_obs))
-    mdf <- obs_eval$mdf %>% dplyr::select(sitename, date,  obs = {{varnam_obs}})
-    ddf <- obs_eval$ddf %>% dplyr::select(sitename, date,  obs = {{varnam_obs}})
-    xdf <- obs_eval$xdf %>% dplyr::select(sitename, inbin, obs = {{varnam_obs}})
+    adf <- obs_eval$adf %>% tidyr::unnest(data) %>% dplyr::select(sitename, date,  obs = {{varnam_obs}})
+    mdf <- obs_eval$mdf %>% tidyr::unnest(data) %>% dplyr::select(sitename, date,  obs = {{varnam_obs}})
+    ddf <- obs_eval$ddf %>% tidyr::unnest(data) %>% dplyr::select(sitename, date,  obs = {{varnam_obs}}, lat, koeppen_code)
+    xdf <- obs_eval$xdf %>% tidyr::unnest(data) %>% dplyr::select(sitename, inbin, obs = {{varnam_obs}})
     
     # obs_eval$adf <- NULL
     # obs_eval$mdf <- NULL
@@ -130,7 +141,7 @@ eval_sofun_byvar <- function(varnam, ddf_mod, settings_eval, settings_sims, obs_
     ## mean across multi-day period
     xdf <- ddf_mod %>% 
       # mutate( year = year(date), week = week(date) ) %>%
-      mutate( year = year(date), inbin = cut( date, breaks = obs_eval$breaks_xdf, right = FALSE ) ) %>%
+      mutate( year = year(date), inbin = cut( date, breaks = obs_eval$breaks, right = FALSE ) ) %>%
       tidyr::drop_na() %>% 
       group_by( sitename, inbin ) %>%
       summarise( mod_mean = mean( mod, na.rm = TRUE ), mod_min = min( mod, na.rm = TRUE ), mod_max = max( mod, na.rm = TRUE ), n_mod = sum(!is.na(mod)) ) %>%
@@ -305,7 +316,7 @@ eval_sofun_byvar <- function(varnam, ddf_mod, settings_eval, settings_sims, obs_
       if (!light){
         print("Evaluate mean seasonal cycle by climate zones...")
         meandoydf_byclim <- ddf %>% mutate( doy = yday(date) ) %>%
-          left_join( dplyr::select( metainfo_Tier1_sites_kgclimate_fluxnet2015, sitename, lat, koeppen_code ), by = "sitename" ) %>%   # 'metainfo_Tier1_sites_kgclimate_fluxnet2015' is lazy-loaded with library(rsofun)
+          # left_join( dplyr::select( metainfo_Tier1_sites_kgclimate_fluxnet2015, sitename, lat, koeppen_code ), by = "sitename" ) %>%   # 'metainfo_Tier1_sites_kgclimate_fluxnet2015' is lazy-loaded with library(rsofun)
           mutate( hemisphere = ifelse( lat>0, "north", "south" ) ) %>%
           # mutate( bias = mod - obs ) %>% 
           dplyr::select( -lat ) %>%
diff --git a/R/get_obs_bysite_fluxnet2015.R b/R/get_obs_bysite_fluxnet2015.R
deleted file mode 100644
index afcf717b..00000000
--- a/R/get_obs_bysite_fluxnet2015.R
+++ /dev/null
@@ -1,817 +0,0 @@
-#' Get FLUXNET 2015 observational data for one site.
-#'
-#' Function for reading observational GPP data from FLUXNET 2015 dataset
-#' and defining calibration target (which flux decomposition method etc.)
-#'
-#' @param sitename A character string specifying the site name for which 
-#' FLUXNET 2015 data is searched (based on the site name appearing as part 
-#' of the respective file name). Defaults to NA.
-#' @param path_fluxnet2015 A character string specifying the local path of 
-#' FLUXNET 2015 data.
-#' @param path_fluxnet2015_hh A character string specifying the local path of 
-#' half-hourly FLUXNET 2015 data, required to get daytime VPD. Defaults to 
-#' \code{NULL} (no daytime VPD is calculated).
-#' @param timescale A character specifying the time scale of FLUXNET 2015 
-#' data. Any of \code{c("d", "w", "m", "y")} for daily, weekly, monthly, 
-#' or yearly, respectively.
-#' @param getvars A vector of character strings corresponding to the 
-#' FLUXNET 2015 variable names as used in the original data files. See 
-#' \url{https://fluxnet.fluxdata.org/data/fluxnet2015-dataset/}. If argument
-#' \code{getswc==TRUE}, then soil water content data (variables starting with 
-#' \code{"SWC_}) are read.
-#' @param getswc  If \code{getswc==TRUE}, then all soil water content data 
-#' (variables starting with \code{"SWC_}) are read. Defaults to \code{TRUE}.
-#' @param threshold_GPP A numeric value (between 0 and 1 for daily, weekly,
-#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data). The value specifies the 
-#' threshold for excluding data during data cleaning. The threshold is 
-#' with respect to the data quality flag in the FLUXNET 2015 data, indicating
-#' the fraction of measured and good quality gapfilled data for daily, weekly,
-#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} 
-#' meaning no data is excluded.
-#' @param threshold_LE A numeric value (between 0 and 1 for daily, weekly,
-#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data). The value specifies the 
-#' threshold for excluding data during data cleaning. The threshold is 
-#' with respect to the data quality flag in the FLUXNET 2015 data, indicating
-#' the fraction of measured and good quality gapfilled data for daily, weekly,
-#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} 
-#' meaning no data is excluded.
-#' @param threshold_H A numeric value (between 0 and 1 for daily, weekly,
-#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data). The value specifies the 
-#' threshold for excluding data during data cleaning. The threshold is 
-#' with respect to the data quality flag in the FLUXNET 2015 data, indicating
-#' the fraction of measured and good quality gapfilled data for daily, weekly,
-#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} 
-#' meaning no data is excluded.
-#' @param threshold_SWC A numeric value (between 0 and 1 for daily, weekly,
-#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data). The value specifies the 
-#' threshold for excluding data during data cleaning. The threshold is 
-#' with respect to the data quality flag in the FLUXNET 2015 data, indicating
-#' the fraction of measured and good quality gapfilled data for daily, weekly,
-#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} 
-#' meaning no data is excluded.
-#' @param threshold_WS A numeric value (between 0 and 1 for daily, weekly,
-#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data). The value specifies the 
-#' threshold for excluding data during data cleaning. The threshold is 
-#' with respect to the data quality flag in the FLUXNET 2015 data, indicating
-#' the fraction of measured and good quality gapfilled data for daily, weekly,
-#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} 
-#' meaning no data is excluded.
-#' @param threshold_USTAR A numeric value (between 0 and 1 for daily, weekly,
-#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data). The value specifies the 
-#' threshold for excluding data during data cleaning. The threshold is 
-#' with respect to the data quality flag in the FLUXNET 2015 data, indicating
-#' the fraction of measured and good quality gapfilled data for daily, weekly,
-#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} 
-#' meaning no data is excluded.
-#' @param threshold_T A numeric value (between 0 and 1 for daily, weekly,
-#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data). The value specifies the 
-#' threshold for excluding data during data cleaning. The threshold is 
-#' with respect to the data quality flag in the FLUXNET 2015 data, indicating
-#' the fraction of measured and good quality gapfilled data for daily, weekly,
-#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} 
-#' meaning no data is excluded.
-#' @param threshold_NETRAD A numeric value (between 0 and 1 for daily, weekly,
-#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data). The value specifies the 
-#' threshold for excluding data during data cleaning. The threshold is 
-#' with respect to the data quality flag in the FLUXNET 2015 data, indicating
-#' the fraction of measured and good quality gapfilled data for daily, weekly,
-#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [
-#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} 
-#' meaning no data is excluded.
-#' @param return_qc A logical specifying whether quality control variables
-#' should be returned.
-#' @param remove_neg A logical specifying whether negative GPP values are to
-#' be removed (replaces with NA).
-#'
-#' @return A data frame (tibble) containing cleaned observational data,
-#' named and in units corresponding to rsofun standard.
-#' @export
-#'
-#' @examples df <- get_obs_bysite_fluxnet2015
-#' 
-get_obs_bysite_fluxnet2015 <- function( sitename, path_fluxnet2015, path_fluxnet2015_hh=NULL,
-  timescale, getvars, getswc=TRUE,
-  threshold_GPP=0.0, threshold_LE=0.0, threshold_H=0.0, threshold_SWC=0.0, 
-  threshold_WS=0.0, threshold_USTAR=0.0, threshold_T=0.0, threshold_NETRAD=0.0, return_qc=FALSE, 
-  remove_neg = FALSE, verbose=TRUE ){
-
-  if (verbose) print(paste("Getting FLUXNET data for", sitename, "..."))
-  
-  ## Take only file for this site
-  if (timescale=="d"){
-    ## Daily
-    filn <- list.files( path_fluxnet2015, 
-      pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_DD.*.csv" ), 
-      recursive = TRUE 
-      )
-  } else  if (timescale=="w"){
-    ## Weekly
-    filn <- list.files( path_fluxnet2015, 
-      pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_WW.*.csv" ), 
-      recursive = TRUE 
-      )
-  } else  if (timescale=="m"){
-    ## Monthly
-    filn <- list.files( path_fluxnet2015, 
-      pattern = paste0("FLX_", sitename, ".*_FLUXNET2015_FULLSET_MM.*.csv"), 
-      recursive = TRUE 
-      )
-  } else  if (timescale=="y"){
-    ## Annual
-    filn <- list.files( path_fluxnet2015, 
-      pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_YY.*.csv" ), 
-      recursive = TRUE 
-      )
-  } else  if (timescale=="hh"){
-    ## half-hourly
-    filn <- list.files( path_fluxnet2015, 
-      pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_HH.*.csv" ), 
-      recursive = TRUE 
-      )
-  }
-
-  # ## Use also quality flag data for each variable in 'getvars'
-  # obsvars <- tibble( getvars = getvars ) %>%
-  #   dplyr::filter(!(stringr::str_detect(., "UNC"))) %>% 
-  #   dplyr::pull(getvars)
-  # uncvars <- tibble( getvars = getvars ) %>%
-  #   dplyr::filter(stringr::str_detect(., "UNC")) %>% 
-  #   dplyr::pull(getvars)
-  # getvars <- c(obsvars, paste0(obsvars, "_QC"), uncvars)
-
-  if (length(filn)==0) rlang::abort(paste0("No files found for timescale ", timescale, " in sub-directories of ", path_fluxnet2015 ) )
-  if (length(filn)>1){
-    file.info_getsize <- function(filn){
-      file.info(filn)$size
-    }
-    rlang::warn("Reading only largest daily file available")
-    path_dd <- paste0(path_fluxnet2015, filn)
-    size_vec <- purrr::map_dbl(as.list(path_dd), ~file.info_getsize(.))
-    path_dd <- path_dd[which.max(size_vec)]
-    filn <- basename(path_dd)
-  }
-  
-  
-  # if (length(filn)>1){
-  #   filn <- filn[which(grepl("3.csv", filn))]
-  #   # rlang::warn(paste0("Multiple files found for timsescale ", timescale, " in sub-directories of ", path_fluxnet2015, ". Taking only ", filn ) )
-  # }
-  
-  ## This returns a data frame with columns (date, temp, prec, nrad, ppfd, vpd, ccov)
-  df <- get_obs_fluxnet2015_raw( sitename, 
-    path = paste0(path_fluxnet2015, filn), 
-    freq = timescale 
-    )
-    
-  ## For some sites, the NETRAD column is missing.
-  if ("NETRAD" %in% getvars && !("NETRAD" %in% names(df))){
-    df <- df %>% mutate(NETRAD = NA, NETRAD_QC = 0.0)
-  }
-  
-  ## Get daytime VPD separately
-  merge_df_vpd_day_dd <- FALSE
-  if ("VPD_F_DAY" %in% getvars && !(timescale == "hh")){
-    
-    if (is.null(path_fluxnet2015_hh)){
-      
-      rlang::warn("Argument path_fluxnet2015_hh is not provided. Daytime VPD could not be calculted.")
-      
-    } else {
-      
-      ## get half-hourly file name(s)
-      filn_hh <- list.files( path_fluxnet2015_hh, 
-                             pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_HH.*.csv" ), 
-                             recursive = TRUE 
-      )
-      if (length(filn_hh)>0){
-        use_hh <- TRUE
-      } else {
-        ## get hourly file name(s)
-        filn_hh <- list.files( path_fluxnet2015_hh, 
-                               pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_HR.*.csv" ), 
-                               recursive = TRUE 
-        )
-        use_hh <- FALSE
-        
-      }
-      
-      ## get file name(s) of file containing daily daytime VPD derived from half-hourly data
-      filename_dd_vpd <- list.files( path_fluxnet2015_hh,
-                                     pattern = paste0("FLX_", sitename, ".*_VPD_DAY.csv"),
-                                     recursive = FALSE)
-      
-      # filename_dd_vpd <- filn_hh %>% 
-      #   stringr::str_replace("HH", "DD") %>% 
-      #   stringr::str_replace(".csv", "_VPD_DAY.csv")
-      
-      if (length(filename_dd_vpd)>0){
-        
-        if (length(filename_dd_vpd)>1){
-          file.info_getsize <- function(filn){
-            file.info(filn)$size
-          }
-          rlang::warn("Reading only largest daily VPD file available")
-          path_dd_vpd <- paste0(path_fluxnet2015_hh, filename_dd_vpd)
-          size_vec <- purrr::map_dbl(as.list(path_dd_vpd), ~file.info_getsize(.))
-          path_dd_vpd <- path_dd_vpd[which.max(size_vec)]
-          filename_dd_vpd <- basename(path_dd_vpd)
-        }
-        
-        ## read directly
-        if (verbose) print(paste("Reading daytime VPD directly from:", paste0(path_fluxnet2015_hh, filename_dd_vpd)))
-        df_vpd_day_dd <- readr::read_csv(paste0(path_fluxnet2015_hh, filename_dd_vpd))
-        merge_df_vpd_day_dd <- TRUE
-        
-      } else {
-        
-        if (length(filn_hh)>0){
-          
-          path_hh <- paste0(path_fluxnet2015_hh, filn_hh)
-          
-          if (length(filn_hh)>1){
-            file.info_getsize <- function(filn){
-              file.info(filn)$size
-            }
-            rlang::warn("Reading only largest half-hourly file available")
-            size_vec <- purrr::map_dbl(as.list(path_hh), ~file.info_getsize(.))
-            path_hh <- path_hh[which.max(size_vec)]
-          }
-          
-          df_vpd_day_dd <- get_vpd_day_fluxnet2015_byfile(path_hh, write = TRUE)
-          merge_df_vpd_day_dd <- TRUE
-          
-        } else {
-          
-          if (length(filn_hh)==0) rlang::abort("No half-hourly files found (required to calculate VPD_DAY).")
-          
-          # if (verbose) rlang::warn("No half-hourly data available. Cannot get vpd_day for this site.")
-          # getvars <- getvars[-which(str_detect(getvars, "VPD_F_DAY"))]
-          # # getvars <- c(getvars, "VPD_F", "VPD_F_QC")
-          
-        } 
-        
-      }
-      
-      if (merge_df_vpd_day_dd){        
-        
-        if (timescale=="d"){
-          
-          # daily
-          df <- df %>% dplyr::left_join(df_vpd_day_dd, by="date")
-          
-        } else if (timescale=="w"){
-          
-          # weekly
-          df <- df_vpd_day_dd %>%
-            dplyr::mutate(year = lubridate::year(date),
-                          week = lubridate::week(date)) %>% 
-            dplyr::group_by(sitename, year, week) %>% 
-            dplyr::summarise(VPD_F_DAY        = mean(VPD_F, na.rm=TRUE),
-                             VPD_F_DAY_QC     = mean(VPD_F_QC, na.rm=TRUE),
-                             VPD_F_DAY_MDS    = mean(VPD_F_MDS, na.rm=TRUE),
-                             VPD_F_DAY_MDS_QC = mean(VPD_F_MDS_QC, na.rm=TRUE),
-                             VPD_DAY_ERA      = mean(VPD_ERA, na.rm=TRUE) ) %>% 
-            dplyr::right_join(df, by="date")
-          
-        } else if (timescale=="m"){
-          
-          # monthly
-          df <- df_vpd_day_dd %>%
-            dplyr::mutate(year = lubridate::year(date),
-                          moy = lubridate::month(date)) %>% 
-            dplyr::group_by(sitename, year, moy) %>% 
-            dplyr::summarise(VPD_F_DAY        = mean(VPD_F, na.rm=TRUE),
-                             VPD_F_DAY_QC     = mean(VPD_F_QC, na.rm=TRUE),
-                             VPD_F_DAY_MDS    = mean(VPD_F_MDS, na.rm=TRUE),
-                             VPD_F_DAY_MDS_QC = mean(VPD_F_MDS_QC, na.rm=TRUE),
-                             VPD_DAY_ERA      = mean(VPD_ERA, na.rm=TRUE) ) %>% 
-            dplyr::right_join(df, by="date")
-          
-        } else if (timescale=="y"){
-          
-          # annual
-          df <- df_vpd_day_dd %>%
-            dplyr::mutate(year = lubridate::year(date)) %>% 
-            dplyr::group_by(sitename, year) %>% 
-            dplyr::summarise(VPD_F_DAY        = mean(VPD_F, na.rm=TRUE),
-                             VPD_F_DAY_QC     = mean(VPD_F_QC, na.rm=TRUE),
-                             VPD_F_DAY_MDS    = mean(VPD_F_MDS, na.rm=TRUE),
-                             VPD_F_DAY_MDS_QC = mean(VPD_F_MDS_QC, na.rm=TRUE),
-                             VPD_DAY_ERA      = mean(VPD_ERA, na.rm=TRUE) ) %>% 
-            dplyr::right_join(df, by="date")
-          
-        }
-        
-      }
-      
-    }
-    
-  }
-  
-  ## retain all getvars, plus soil moisture if required
-  if (getswc){
-    df <- df %>% 
-      dplyr::select( ., date, one_of(getvars), starts_with("SWC_") )
-    # dplyr::mutate_at(., vars(starts_with("SWC_")), list(~as.numeric) ) # this has caused error
-  } else {
-    df <- df %>% dplyr::select( ., date, one_of(getvars) ) 
-  }
-  
-  # ## convert to numeric (weirdly isn't always) and subset (select)
-  # df <- df %>% 
-  #   dplyr::mutate_at( vars(one_of(getvars)), list(~as.numeric))
-  
-  ## Filter
-  ## air temperature
-  TA_vars <- getvars[which(grepl("TA_", getvars))]
-  TA_vars <- TA_vars[-which(grepl("_QC", TA_vars))]
-  for (ivar in TA_vars){
-    df <- df %>% clean_fluxnet_byvar(ivar, threshold_T)
-  }
-  
-  ## wind speed
-  WS_vars <- getvars[which(grepl("WS_", getvars))]
-  WS_vars <- WS_vars[-which(grepl("_QC", WS_vars))]
-  for (ivar in WS_vars){
-    df <- df %>% clean_fluxnet_byvar(ivar, threshold_WS)
-  }
-  
-  ## u-star
-  USTAR_vars <- getvars[which(grepl("USTAR_", getvars))]
-  USTAR_vars <- USTAR_vars[-which(grepl("_QC", USTAR_vars))]
-  for (ivar in USTAR_vars){
-    df <- df %>% clean_fluxnet_byvar(ivar, threshold_USTAR)
-  }
-  
-  ## net radiation
-  NETRAD_vars <- getvars[which(grepl("NETRAD", getvars))]
-  NETRAD_vars <- NETRAD_vars[-which(grepl("_QC", NETRAD_vars))]
-  for (ivar in NETRAD_vars){
-    df <- df %>% clean_fluxnet_byvar(ivar, threshold_NETRAD)
-  }  
-  
-  energyvars <- df %>% 
-    dplyr::select(starts_with("LE_"), starts_with("H_")) %>% 
-    dplyr::select(-ends_with("_QC")) %>% 
-    names()
-  
-  df <- df %>%
-    ## Unit conversion for sensible and latent heat flux: W m-2 -> J m-2 d-1
-    dplyr::mutate_at( vars(one_of(energyvars)), convert_energy_fluxnet2015)
-  
-  ## clean GPP data
-  if (any(grepl("GPP_", getvars))){
-    
-    if (any( !(c("GPP_NT_VUT_REF", "GPP_DT_VUT_REF", "NEE_VUT_REF_NIGHT_QC", "NEE_VUT_REF_DAY_QC") %in% getvars) ) ||
-        any(!(c("GPP_NT_VUT_REF", "GPP_DT_VUT_REF", "NEE_VUT_REF_NIGHT_QC", "NEE_VUT_REF_DAY_QC") %in% names(df)))){
-      rlang::abort("Not all variables read from file that are needed for data cleaning.")
-    } 
-    
-    # out_clean <- clean_fluxnet_gpp( 
-    #   df$GPP_NT_VUT_REF, 
-    #   df$GPP_DT_VUT_REF, 
-    #   df$NEE_VUT_REF_NIGHT_QC, 
-    #   df$NEE_VUT_REF_DAY_QC, 
-    #   threshold=threshold_GPP 
-    # )
-    # df$GPP_NT_VUT_REF <- out_clean$gpp_nt
-    # df$GPP_DT_VUT_REF <- out_clean$gpp_dt
-    
-    df <- df %>% 
-      clean_fluxnet_gpp(threshold = threshold_GPP, remove_neg = remove_neg)
-  }
-  
-  ## clean energy data (sensible and latent heat flux) data - often has spuriously equal values
-  if (any(grepl("LE_", getvars))){
-    if (any( !(c("LE_F_MDS", "LE_F_MDS_QC") %in% getvars) )) abort("Not all variables read from file that are needed for data cleaning.")
-    #df$LE_F_MDS_good <- clean_fluxnet_energy( df$LE_F_MDS, df$LE_F_MDS_QC, threshold_LE=0.0 )
-    df$LE_F_MDS <- clean_fluxnet_energy( df$LE_F_MDS, df$LE_F_MDS_QC, threshold=threshold_LE )
-  }
-  if (any(grepl("H_", getvars))){
-    if (any( !(c("H_F_MDS", "H_F_MDS_QC") %in% getvars) )) abort("Not all variables read from file that are needed for data cleaning.")
-    df$H_F_MDS  <- clean_fluxnet_energy( df$H_F_MDS, df$H_F_MDS_QC,   threshold=threshold_H )
-  }
-  
-  ## Soil moisture related stuff for daily data
-  if (timescale=="d" && getswc){
-    tmp <- df %>% dplyr::select( starts_with("SWC") )
-    if (ncol(tmp)>0){
-      swcvars   <- tmp %>% dplyr::select( -ends_with("QC") ) %>% names()
-      swcqcvars <- tmp %>% dplyr::select(  ends_with("QC") ) %>% names()
-      
-      # map( as.list(seq(length(swcvars))), ~clean_fluxnet_swc( df[[ swcvars[.] ]], df[[ swcqcvars[.] ]]) )
-      if (length(swcvars)>0){
-        for (ivar in 1:length(swcvars)){
-          df[[ swcvars[ivar] ]] <- clean_fluxnet_swc( df[[ swcvars[ivar] ]], df[[ swcqcvars[ivar] ]], frac_data_thresh = threshold_SWC )
-        }
-      }
-      
-      df <- df %>%
-        ## Normalise mean observational soil moisture to within minimum (=0) and maximum (=1), and
-        dplyr::mutate_at( vars(one_of(swcvars)), list(~norm_to_max(.)) ) %>%
-        
-        ## get mean observational soil moisture across different depths (if available)
-        dplyr::mutate( soilm_obs_mean = apply( dplyr::select( ., one_of(swcvars) ), 1, FUN = mean, na.rm = TRUE ) ) %>%
-        dplyr::mutate( soilm_obs_mean = ifelse( is.nan(soilm_obs_mean), NA, soilm_obs_mean ) )
-      if (verbose) rlang::warn("Converting: soilm_obs_mean = mean across different soil depths (SWC_F_MDS), with na.rm = TRUE" )
-      
-    }
-    
-  }
-  
-  ## check if anything is missing
-  if (any(!(getvars %in% names(df)))){
-    rlang::abort(paste("Not all getvars were found in file. Missing: ", getvars[which(!(getvars %in% names(df)))]))
-  }
-  
-  if (!return_qc){
-    df <- df %>% dplyr::select(-ends_with("_QC"))
-  }
-  
-  ## conversion factor from SPLASH: flux to energy conversion, umol/J (Meek et al., 1984)
-  kfFEC <- 2.04   
-  
-  ## Make unit conversions and shorter names
-  outgetvars <- c()
-  
-  ## Rename variables
-  if ("TA_F_DAY" %in% getvars){
-    if (verbose) rlang::warn("Renaming: temp_day = TA_F_DAY \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("TA_F_DAY")), list(~stringr::str_replace(., "TA_F_DAY", "temp_day")) )
-  }  
-  if ("TA_F" %in% getvars){
-    if (verbose) rlang::warn("Renaming: temp = TA_F \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("TA_F")), list(~stringr::str_replace(., "TA_F", "temp")) )
-  }
-  if ("P_F" %in% getvars){
-    if (verbose) rlang::warn("Renaming: prec = P_F (given and required in mm) \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("P_F")), list(~stringr::str_replace(., "P_F", "prec")) )
-  }
-  if ("WS_F" %in% getvars){
-    if (verbose) rlang::warn("Renaming: wspeed = WS_F (given and required in m s-1) \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("WS_F")), list(~stringr::str_replace(., "WS_F", "wspeed")) )
-  }
-  if ("USTAR" %in% getvars){
-    if (verbose) rlang::warn("Renaming: ustar = USTAR (given and required in m s-1) \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("USTAR")), list(~stringr::str_replace(., "USTAR", "ustar")) )
-  }
-  if ("LE_F_MDS" %in% getvars){
-    if (verbose) rlang::warn("Renaming: latenth = LE_F_MDS \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("LE_F_MDS")), list(~stringr::str_replace(., "LE_F_MDS", "latenth")) )
-  }
-  if ("LE_RANDUNC" %in% getvars){
-    if (verbose) rlang::warn("Renaming: latenth_unc = LE_RANDUNC \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("LE_RANDUNC")), list(~stringr::str_replace(., "LE_RANDUNC", "latenth_unc")) )
-  }
-  if ("H_F_MDS" %in% getvars){
-    if (verbose) rlang::warn("Renaming: sensibleh = H_F_MDS \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("H_F_MDS")), list(~stringr::str_replace(., "H_F_MDS", "sensibleh" )) )
-  }
-  if ("VPD_F_DAY" %in% getvars){
-    if (verbose) rlang::warn("Renaming: vpd_day = VPD_F_DAY \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("VPD_F_DAY")), list(~stringr::str_replace(., "VPD_F_DAY", "vpd_day" )) )
-  }
-  if ("VPD_F" %in% getvars){
-    if (verbose) rlang::warn("Renaming: vpd = VPD_F \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("VPD_F")), list(~stringr::str_replace(., "VPD_F", "vpd" )) )
-  }
-  if ("PA_F" %in% getvars){
-    if (verbose) rlang::warn("Renaming: patm = PA_F \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("PA_F")), list(~stringr::str_replace(., "PA_F", "patm" )) )
-  }
-  if ("SW_IN_F" %in% getvars){
-    if (verbose) rlang::warn("Renaming: swin = SW_IN_F  \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("SW_IN_F")), list(~stringr::str_replace(., "SW_IN_F", "swin" )) )
-  }
-  if ("NETRAD" %in% getvars){
-    if (verbose) rlang::warn("Renaming: netrad = NETRAD \n")
-    df <- df %>% dplyr::rename_at( vars(starts_with("NETRAD")), list(~stringr::str_replace(., "NETRAD", "netrad" )) )
-  }
-  
-  ## Convert units
-  if ("VPD_F_DAY" %in% getvars){
-    if (verbose) rlang::warn("Converting: vpd_day = vpd_day * 1e2 (given in hPa, required in Pa) \n")
-    df <- df %>% dplyr::mutate( vpd_day = vpd_day * 1e2 )
-  }
-  if ("VPD_F" %in% getvars){
-    if (verbose) rlang::warn("Converting: vpd = vpd * 1e2 (given in hPa, required in Pa) \n")
-    df <- df %>% dplyr::mutate( vpd = vpd * 1e2 )
-  }
-  if ("PA_F" %in% getvars){
-    if (verbose) rlang::warn("Converting: patm = patm * 1e3 (given in kPa, required in Pa) \n")
-    df <- df %>% dplyr::mutate( patm = patm * 1e3 )
-  }
-  if ("SW_IN_F" %in% getvars){
-    if (verbose) rlang::warn("Converting: swin = swin * 60 * 60 * 24 (given in W m-2, required in J m-2 d-1) \n")
-    df <- df %>% dplyr::mutate( swin = swin * 60 * 60 * 24 )
-    if (verbose) rlang::warn("Converting: ppfd = swin * kfFEC * 1.0e-6 (convert from J/m2/d to mol/m2/d; kfFEC = 2.04 is the flux-to-energy conversion, micro-mol/J (Meek et al., 1984)) \n")
-    df <- df %>% dplyr::mutate( ppfd = swin * kfFEC * 1.0e-6 )
-  }
-  if ("NETRAD" %in% getvars){
-    if (verbose) rlang::warn("Converting: netrad = NETRAD * 60 * 60 * 24 (given in W m-2 (avg.), required in J m-2 (daily total)) \n")
-    df <- df %>% dplyr::mutate( netrad = netrad * 60 * 60 * 24 )
-  }    
-  
-
-  # ## GPP: mean over DT and NT
-  # if ("GPP_NT_VUT_REF" %in% getvars){
-  #   if ("GPP_DT_VUT_REF" %in% getvars){
-  #     if (verbose) rlang::warn("Converting: gpp_obs = mean( GPP_NT_VUT_REF, GPP_DT_VUT_REF ) \n")
-  #     df <- df %>% dplyr::mutate( gpp_obs = apply( dplyr::select( df, GPP_NT_VUT_REF, GPP_DT_VUT_REF ), 1, FUN = mean, na.rm = FALSE ) )
-  #   } else {
-  #     if (verbose) rlang::warn("Converting: gpp_obs = GPP_NT_VUT_REF \n")
-  #     df <- df %>% dplyr::rename( gpp_obs = GPP_NT_VUT_REF )
-  #   }
-  # } else {
-  #   if ("GPP_DT_VUT_REF" %in% getvars){
-  #     if (verbose) rlang::warn("Converting: gpp_obs = GPP_DT_VUT_REF \n")
-  #     df <- df %>% dplyr::rename( gpp_obs = GPP_DT_VUT_REF )
-  #   }
-  # }
-  
-  
-  # Crude fix for a crude problem: some FLUXNET2015 files end on Dec 30 in the last year available
-  # Duplicate last row
-  lastrow <- df %>% dplyr::slice(nrow(df))
-  if (lubridate::month(lastrow$date)==12 && lubridate::mday(lastrow$date)==30){
-    lastrow$date <- lastrow$date + lubridate::days(1)
-    df <- df %>% 
-      bind_rows(lastrow)
-  }
-
-  # Crude fix for a crude problem: some FLUXNET2015 files have NA in first row for VPD
-  if ("vdp_day" %in% getvars){
-    if (is.na(df$vpd_day[1]) && !is.na(df$vpd_day[2])){
-      df$vpd_day[1] <- df$vpd_day[2]
-    }
-  }
-  
-  return(df)
-
-}
-
-clean_fluxnet_byvar <- function(df, varnam, threshold){
-  varnam_qc <- paste0(varnam, "_QC")
-  df[[varnam]][which(df[[varnam_qc]] < threshold)] <- NA
-  return(df)
-}
-
-##----------------------------------------------------------------------
-## Function for reading observational GPP data from FLUXNET 2015 dataset
-##----------------------------------------------------------------------
-get_obs_bysite_wcont_fluxnet2015 <- function( sitename, path_fluxnet2015, timescale ){
-
-  getvars <- "SWC"
-
-  ## Take only file for this site
-  if (timescale=="d"){
-    ## Daily
-    filn <- list.files( path_fluxnet2015, 
-      pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_DD.*.csv" ), 
-      recursive = TRUE 
-      )
-  } else  if (timescale=="w"){
-    ## Weekly
-    filn <- list.files( path_fluxnet2015, 
-      pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_WW.*.csv" ), 
-      recursive = TRUE 
-      )
-  } else  if (timescale=="m"){
-    ## Monthly
-    filn <- list.files( path_fluxnet2015, 
-      pattern = paste0(..., collapse = NULL), 
-      recursive = TRUE 
-      )
-  } else  if (timescale=="y"){
-    ## Annual
-    filn <- list.files( path_fluxnet2015, 
-      pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_YY.*.csv" ), 
-      recursive = TRUE 
-      )
-  }
-
-  if (length(filn)==0) abort(paste0("No files found for timescale ", timescale, "in sub-directories of ", path_fluxnet2015 ) )
-
-  ## This returns a data frame with columns (date, temp, prec, nrad, ppfd, vpd, ccov)
-  ddf <- get_obs_fluxnet2015_raw( sitename, 
-    path = paste0(path_fluxnet2015, filn),
-    freq = "d" 
-    )
-
-  ## convert to numeric (weirdly isn't always) and subset (select)
-  if ( identical( getvars , "SWC" ) ){
-    df <- df %>% dplyr::mutate_at( vars(starts_with(getvars)), list(name = ~as.numeric)) %>%
-                 dplyr::select( date, starts_with(getvars) )
-  } else {
-    df <- df %>%  dplyr::mutate_at( vars(one_of(getvars)), list(name = ~as.numeric)) %>%
-                  dplyr::select( date, one_of(getvars) )
-  }
-
-
-  swcvars   <- dplyr::select( vars(ddf), starts_with("SWC") ) %>% dplyr::select( vars(ddf), !ends_with("QC") ) %>% names()
-  swcqcvars <- dplyr::select( vars(ddf), starts_with("SWC") ) %>% dplyr::select( vars(ddf),  ends_with("QC") ) %>% names()
-
-  # map( as.list(seq(length(swcvars))), ~clean_fluxnet_swc( ddf[[ swcvars[.] ]], ddf[[ swcqcvars[.] ]], frac_data_thresh=0.5 ) )
-
-  if (length(swcvars)>0){
-    for (ivar in 1:length(swcvars)){
-      ddf[[ swcvars[ivar] ]] <- clean_fluxnet_swc( ddf[[ swcvars[ivar] ]], ddf[[ swcqcvars[ivar] ]], frac_data_thresh=0.5 )
-    }
-  }
-  
-  return(ddf)
-
-}   
-
-
-get_obs_fluxnet2015_raw <- function( sitename, path, freq="d" ){
-  ##--------------------------------------------------------------------
-  ## Function returns a dataframe containing all the data of the FLUXNET 
-  ## 2015 data file of respective temporal resolution.
-  ## Returns data in units given in the fluxnet 2015 dataset
-  ##--------------------------------------------------------------------
-  ## get data
-  df <-  readr::read_csv( path, na="-9999" ) #, col_types = cols()
-
-  ## get dates, their format differs slightly between temporal resolution
-  if ( freq=="y" ){
-
-    df <- df %>% dplyr::mutate( year = TIMESTAMP ) %>% dplyr::mutate( date = lubridate::ymd( paste0( as.character(year), "-01-01" ) ) )      
-
-  } else if ( freq=="w"){
-
-    df <- df %>% dplyr::mutate( date_start = lubridate::ymd(TIMESTAMP_START), date_end = lubridate::ymd(TIMESTAMP_END) ) %>% 
-                 dplyr::mutate( date = date_start )
-
-  } else if ( freq=="m" ){
-
-    df <- df %>% dplyr::mutate( year = substr( TIMESTAMP, start = 1, stop = 4 ), month = substr( TIMESTAMP, start = 5, stop = 6 ) ) %>%
-                 dplyr::mutate( date = lubridate::ymd( paste0( as.character(year), "-", as.character(month), "-01" ) ) )
-
-  } else if ( freq=="d" ){
-
-    df <- df %>% dplyr::mutate( date = lubridate::ymd( TIMESTAMP ) )
-
-  } else if ( freq=="hh" ){
-
-    df <- df %>%  dplyr::mutate( date_start = lubridate::ymd_hm( TIMESTAMP_START ),
-                                 date_end   = lubridate::ymd_hm( TIMESTAMP_END ) ) %>%
-                  dplyr::mutate( date = date_start )
-
-  }
-
-  return( df )
-
-}
-
-# ## Converts units of GPP variables from FLUXNET 2015 to SOFUN standard
-# convert_gpp_fluxnet2015 <- function( gpp ){
-#   # in FLUXNET 2015 given in umolCO2 m-2 s-1. converted to gC m-2 d-1
-#   c_molmass <- 12.0107  # molar mass of C
-#   gpp_coverted <- gpp * 1e-6 * 60 * 60 * 24 * c_molmass
-#   return(gpp_coverted)
-
-# }
-
-## Converts units of latent energy (LE) variables from FLUXNET 2015 to SOFUN standard
-convert_energy_fluxnet2015 <- function( le ){
-  ## W m-2 -> J m-2 d-1
-  le_converted <- as.numeric(le) * 60 * 60 * 24
-  return(le_converted)
-}
-
-clean_fluxnet_gpp <- function(df, nam_gpp_nt, nam_gpp_dt, nam_nt_qc, nam_dt_qc, threshold, remove_neg = FALSE){
-  ##--------------------------------------------------------------------
-  ## Cleans daily data using criteria 1-4 as documented in Tramontana et al., 2016
-  ## gpp_nt: based on nighttime flux decomposition ("NT")
-  ## gpp_dt: based on daytime flux decomposition ("DT")
-  ##--------------------------------------------------------------------
-  replace_with_na_qc <- function(gpp, qc, threshold){
-    gpp[which(qc < threshold)] <- NA
-    return(gpp)
-  }
-  replace_with_na_neg <- function(gpp){
-    gpp[which(gpp<0)] <- NA
-    return(gpp)
-  }  
-  replace_with_na_res <- function(gpp, res, q025, q975){
-    gpp[ res > q975 | res < q025  ] <- NA
-    return(gpp)
-  }
-  
-  df <- df %>% 
-    mutate(GPP_NT_VUT_REF = replace_with_na_qc(GPP_NT_VUT_REF, NEE_VUT_REF_NIGHT_QC, threshold),
-           GPP_DT_VUT_REF = replace_with_na_qc(GPP_DT_VUT_REF, NEE_VUT_REF_DAY_QC,   threshold))
-  
-  # ## Remove data points that are based on too much gap-filled data in the underlying half-hourly data
-  # gpp_nt[ which(qflag_nt < threshold) ] <- NA  ## based on fraction of data based on gap-filled half-hourly
-  # gpp_dt[ which(qflag_dt < threshold) ] <- NA  ## based on fraction of data based on gap-filled half-hourly
-
-  ## Remove data points where the two flux decompositions are inconsistent,
-  ## i.e. where the residual of their regression is above the 97.5% or below the 2.5% quantile. 
-  df <- df %>% 
-    mutate(res = GPP_NT_VUT_REF - GPP_DT_VUT_REF)
-
-  q025 <- quantile( df$res, probs = 0.025, na.rm=TRUE )
-  q975 <- quantile( df$res, probs = 0.975, na.rm=TRUE )
-  
-  df <- df %>% 
-    
-    ## remove data outside the quartiles of the residuals between the DT and NT estimates
-    mutate(GPP_NT_VUT_REF = replace_with_na_res(GPP_NT_VUT_REF, res, q025, q975),
-           GPP_DT_VUT_REF = replace_with_na_res(GPP_DT_VUT_REF, res, q025, q975)
-           ) %>% 
-
-    ## remove outliers
-    mutate(GPP_NT_VUT_REF = remove_outliers(GPP_NT_VUT_REF, coef = 1.5),
-           GPP_DT_VUT_REF = remove_outliers(GPP_DT_VUT_REF, coef = 1.5)
-           )
-  
-  ## remove negative GPP
-  if (remove_neg){
-    df <- df %>% 
-      mutate(GPP_NT_VUT_REF = replace_with_na_neg(GPP_NT_VUT_REF),
-             GPP_DT_VUT_REF = replace_with_na_neg(GPP_DT_VUT_REF)
-      )
-  }
-  
-  
-  return(df)
-}
-
-clean_fluxnet_energy <- function( energyflux, qflag_energyflux, threshold ){
-  ##--------------------------------------------------------------------
-  ##--------------------------------------------------------------------
-  ## Remove data points that are based on too much gap-filled data in the underlying half-hourly data
-  # frac_data_thresh <- 0.2  ## fraction of data based on gap-filled half-hourly
-  energyflux[ qflag_energyflux < threshold ] <- NA
-
-  if ( any(!is.na(qflag_energyflux)) ){ energyflux[ is.na(qflag_energyflux) ] <- NA }
-
-  energyflux <- identify_pattern( energyflux )
-
-  return( energyflux )
-}
-
-
-clean_fluxnet_swc <- function( swc, qflag_swc, frac_data_thresh=1.0 ){
-  ##--------------------------------------------------------------------
-  ## frac_data_thresh: fraction of data based on gap-filled half-hourly
-  ##--------------------------------------------------------------------
-  ## Remove data points that are based on too much gap-filled data in the underlying half-hourly data
-  swc[ which( qflag_swc < frac_data_thresh ) ] <- NA
-  swc <- as.numeric( swc )
-
-  return( swc )
-}
-
-
-norm_to_max <- function( vec ){
-  vec <- ( vec - min( vec, na.rm=TRUE ) ) / ( max( vec, na.rm=TRUE ) - min( vec, na.rm=TRUE ) )
-  return( vec )
-}
-
-
-identify_pattern <- function( vec ){
-
-  eps <- 1e-4
-
-  vec <- as.numeric(as.character(vec))
-  
-  ## identify all numbers that appear more than once (already suspicious)
-  counts <- as.data.frame( table( vec ) )
-  counts <- counts[ order(-counts$Freq),  ]
-  counts <- counts[ which(counts$Freq>3), ]
-
-  ## convert factors to numeric
-  counts$vec  <- as.numeric(levels(counts$vec))[counts$vec]
-
-  for (idx in 1:nrow(counts)){
-
-    ## find where this value appears
-    pos <- which( abs(vec-counts$vec[idx])<eps )
-
-    ## replace all numbers that appear more than twice with NA (assuming they are suspicious/wrong)
-    vec[ pos ] <- NA
-
-  }
-
-  return( vec )
-
-}
-
diff --git a/R/get_obs_bysite_gpp_gepisat.R b/R/get_obs_bysite_gpp_gepisat.R
deleted file mode 100644
index cbe4cf78..00000000
--- a/R/get_obs_bysite_gpp_gepisat.R
+++ /dev/null
@@ -1,98 +0,0 @@
-#' Get FLUXNET 2015 observational data for one site based on the GePiSaT method flux decomposition.
-#'
-#' Function for reading observational GPP data from GePiSaT (Tyler's 
-#' alternative flux decomposition) for FLUXNET 2015 stations.
-#'
-#' @param sitename A character string specifying the site name for which FLUXNET 2015 data is searched (based on the site name appearing as part of the respective file name). Defaults to NA.
-#' @param path_gepisat A character string specifying the local path of FLUXNET 2015 data.
-#' @param timescale A character specifying the time scale of FLUXNET 2015 data. Only available: \code{c("d")} for daily.
-#'
-#' @return A data frame (tibble) containint observational data.
-#' @export
-#'
-#' @examples df <- get_obs_bysite_gpp_gepisat
-#' 
-get_obs_bysite_gpp_gepisat <- function( sitename, path_gepisat, timescale = "d" ){
-
-    
-  ## Take only file for this site
-  if (timescale=="d"){
-
-    ## Daily
-    filn <- list.files( path_gepisat, 
-      pattern = paste0( sitename, ".*_daily_GPP.txt" ), 
-      recursive = TRUE 
-      )
-
-  } else {
-
-    abort("Aborting. GePiSaT data only available for daily time step.")
-
-  }
-
-  if (length(filn)>0){
-
-    ## This returns a data frame with columns (date, temp, prec, nrad, ppfd, vpd, ccov)
-    df <- get_obs_gepisat_raw( 
-      sitename = sitename, 
-      path = paste0(path_gepisat, filn), 
-      freq = timescale 
-      ) %>%
-      
-      ## Convert units
-      ## given in molCO2 m-2 d-1, converted to gC m-2 d-1
-      mutate_at( vars(starts_with("GPP_")), funs(convert_gpp_gepisat) ) %>%
-      
-      ## rename so that it is like in FLUXNET 2015
-      rename( gpp_obs = GPP_mol.m2, gpp_err_obs = GPP_err_mol.m2 ) %>%
-      
-      ## some are NaN
-      mutate( gpp_obs = ifelse( is.nan(gpp_obs), NA, gpp_obs ) )
-  
-  } else {
-
-    df <- NULL
-  
-  }
-  
-  return(df)
-
-}
-
-
-get_obs_gepisat_raw <- function( sitename, path, freq="d" ){
-  ##--------------------------------------------------------------------
-  ## Function returns a dataframe containing all the data of the GePiSaT
-  ## data file of respective temporal resolution.
-  ## Returns data in units given in the fluxnet 2015 dataset
-  ##--------------------------------------------------------------------
-  ## get data
-  df <-  readr::read_csv( path, na="-9999", col_types = cols() )
-
-  ## get dates, their format differs slightly between temporal resolution
-  if ( freq=="d" ){
-
-    df <- df %>% mutate( Timestamp = ymd( Timestamp ) ) %>% rename( date = Timestamp )
-
-  } else {
-
-    abort("get_obs_gepisat_raw() implemented only for daily time step.")
-
-  }
-
-  return( df )
-
-}
-
-
-##--------------------------------------------------------------------
-## Converts units of GPP variables from GePiSaT to SOFUN standard
-## in GePiSaT given in molCO2 m-2 d-1, converted to gC m-2 d-1
-##--------------------------------------------------------------------
-convert_gpp_gepisat <- function( gpp ){
-  c_molmass <- 12.0107  # molar mass of C
-  gpp_coverted <- gpp * c_molmass
-  return(gpp_coverted)
-}
-
-
diff --git a/R/get_obs_eval.R b/R/get_obs_eval.R
deleted file mode 100644
index 80a189d6..00000000
--- a/R/get_obs_eval.R
+++ /dev/null
@@ -1,335 +0,0 @@
-#' Get observational data for model evaluation
-#'
-#' Gets observational data for model evaluation, given the benchmarking variable, and data source used 
-#' for the evaluation. This information is specified in the evaluation settings (argument \code{settings_eval}).
-#'
-#' @param settings_eval A list specifying evaluation settings (see vignette eval_sofun.pdf for more information and examples)
-#' @param settings_sims A named list containing the simulation settings (see vignette_rsofun.pdf for more information and examples)
-#' @param overwrite (Optional) A logical specifying whether temporary data stored in \code{./tmpdir} should be overwritten. Defaults to \code{TRUE}.
-#' @param light (Optional) A logical specifying whether reduced data should saved. Defaults to \code{FALSE}.
-#' @param add_forcing (Optional) a logical specifying whether forcing data is to be added to data frames containing observational data.
-#'
-#' @return A list containing data frames of observed values aggregated to several temporal scales (ddf for daily, xdf for X-daily, mdf for monthly, adf for annual).
-#' @export
-#'
-#' @examples obs <- get_obs_eval( settings_eval, settings_sims, overwrite = TRUE )
-#' 
-get_obs_eval <- function( settings_eval, settings_sims, overwrite = TRUE, light = FALSE, add_forcing = FALSE){
-
-  ## Interpret benchmarking data specification
-  datasource <- settings_eval$benchmark %>% 
-    unlist() %>% 
-    stringr::str_split( ., "_" ) %>% 
-    unlist()
-  
-  ## determine variables (original names in files) to be read from benchmark data
-  evalvars <- names(settings_eval$benchmark)
-  getvars <- c()
-  if ("gpp" %in% names(settings_eval$benchmark)){
-    ## Get observational GPP data
-    getvars <- c(getvars, "GPP_NT_VUT_REF", "GPP_DT_VUT_REF", "NEE_VUT_REF_NIGHT_QC", "NEE_VUT_REF_DAY_QC")
-    settings_eval$benchmarkvar$gpp <- ifelse(settings_eval$benchmark$gpp == "fluxnet2015_NT",
-                                             "GPP_NT_VUT_REF",
-                                             ifelse(settings_eval$benchmark$gpp == "fluxnet2015_DT",
-                                                    "GPP_DT_VUT_REF",
-                                                    NA))
-  }
-  if ("netrad" %in% names(settings_eval$benchmark)){
-    getvars <- c(getvars, "NETRAD")
-    settings_eval$benchmarkvar$netrad <- "NETRAD"
-  }
-  if ("aet" %in% names(settings_eval$benchmark) || "latenth" %in% names(settings_eval$benchmark)){
-    getvars <- c(getvars, "LE_F_MDS", "LE_F_MDS_QC", "LE_RANDUNC")
-    evalvars[which(evalvars=="aet")] <- "latenth"
-    settings_eval$benchmarkvar$latenth <- "LE_F_MDS"
-  }
-    
-  if ("fluxnet2015" %in% datasource){
-    ##------------------------------------------------------------
-    ## Read annual observational data from FLUXNET 2015 files (from annual files!).
-    ##------------------------------------------------------------
-    ## loop over sites to get data frame with all variables
-    if (settings_eval$path_fluxnet2015_y!="" && !("Ty" %in% datasource)){
-      print("getting annual FLUXNET-2015_Tier1 data...")
-      print(settings_eval$path_fluxnet2015_y)
-      adf <- lapply( as.list(settings_eval$sitenames),
-                     function(x) get_obs_bysite_fluxnet2015(
-                       sitename = x,
-                       path_fluxnet2015 = settings_eval$path_fluxnet2015_y,
-                       path_fluxnet2015_hh = NULL,
-                       timescale = "y",
-                       getvars = getvars,
-                       getswc=!light,
-                       threshold_GPP=0,
-                       remove_neg = TRUE,
-                       verbose=FALSE
-                       ) %>%
-                       mutate( year =lubridate::year(date),
-                               sitename = x )) %>%
-        bind_rows()
-
-      ## change variable names
-      if ("gpp" %in% names(settings_eval$benchmark)){
-        adf <- adf %>%
-          change_names("gpp", settings_eval$benchmarkvar)
-      }
-
-
-    } else {
-
-      adf <-  lapply( as.list(settings_eval$sitenames),
-                      function(x) init_dates_dataframe(
-                        year(settings_sims$date_start[[x]]),
-                        year(settings_sims$date_end[[x]]),
-                        noleap = TRUE,
-                        freq = "years"
-                      ) %>%
-                        ## Remove outliers, i.e. when data is outside 1.5 times the inter-quartile range
-                        mutate( year =lubridate::year(date),
-                                sitename = x )) %>%
-        bind_rows()
-
-    }
-
-    ## remove pre-modis data
-    if (settings_eval$remove_premodis){
-      adf <- adf %>% filter( lubridate::year(date) >= 2000 )
-    }
-
-    ##------------------------------------------------------------
-    ## Read monthly observational data from FLUXNET 2015 files (from monthly files!).
-    ##------------------------------------------------------------
-    ## loop over sites to get data frame with all variables
-    if (settings_eval$path_fluxnet2015_m!="" && !("Ty" %in% datasource)){
-      print("getting monthly FLUXNET-2015_Tier1 data...")
-      mdf <- lapply( as.list(settings_eval$sitenames),
-                     function(x) get_obs_bysite_fluxnet2015(
-                       sitename = x,
-                       path_fluxnet2015 = settings_eval$path_fluxnet2015_m,
-                       path_fluxnet2015_hh = NULL,
-                       timescale = "m",
-                       getvars = getvars,
-                       getswc=!light,
-                       threshold_GPP=0.5,
-                       remove_neg = TRUE,
-                       verbose=FALSE
-                       ) %>%
-                       mutate( year =lubridate::year(date),
-                               sitename = x )) %>%
-        bind_rows()
-
-      ## change variable names
-      if ("gpp" %in% names(settings_eval$benchmark)){
-        mdf <- mdf %>%
-          change_names("gpp", settings_eval$benchmarkvar)
-      }
-
-    } else {
-
-      mdf <-  lapply( as.list(settings_eval$sitenames),
-                      function(x) init_dates_dataframe(
-                        year(settings_sims$date_start[[x]]),
-                        year(settings_sims$date_end[[x]]),
-                        noleap = TRUE,
-                        freq = "months"
-                      ) %>%
-                        ## Remove outliers, i.e. when data is outside 1.5 times the inter-quartile range
-                        mutate( year =lubridate::year(date),
-                                sitename = x )) %>%
-        bind_rows()
-    }
-
-    ## remove pre-modis data
-    if (settings_eval$remove_premodis){
-      mdf <- mdf %>% filter(lubridate::year(date) >= 2000 )
-    }
-
-    ##------------------------------------------------------------
-    ## Read daily observational data from FLUXNET 2015 files (from daily files!).
-    ##------------------------------------------------------------
-    if (settings_eval$path_fluxnet2015_d!="" && !("Ty" %in% datasource)){
-      print("getting daily FLUXNET-2015_Tier1 data...")
-      ddf <- lapply( as.list(settings_eval$sitenames),
-                     function(x) get_obs_bysite_fluxnet2015( 
-                       sitename = x, 
-                       path_fluxnet2015 = settings_eval$path_fluxnet2015_d, 
-                       path_fluxnet2015_hh = NULL,
-                       timescale = "d", 
-                       getvars = getvars, 
-                       getswc=!light,                              
-                       threshold_GPP=0.5, 
-                       remove_neg = FALSE,
-                       verbose=TRUE
-                       ) %>%
-                       mutate( year =lubridate::year(date),
-                               sitename = x )) %>% 
-        bind_rows()
-
-        ## change variable names
-        if ("gpp" %in% names(settings_eval$benchmark)){
-          ddf <- ddf %>% 
-            change_names("gpp", settings_eval$benchmarkvar)
-        }
-      
-    } else {
-      
-      ddf <-  lapply( as.list(settings_eval$sitenames),
-                      function(x) init_dates_dataframe(
-                        year(settings_sims$date_start[[x]]),
-                        year(settings_sims$date_end[[x]]),
-                        noleap = TRUE,
-                        freq = "days" 
-                      ) %>%
-                        ## Remove outliers, i.e. when data is outside 1.5 times the inter-quartile range
-                        mutate( year =lubridate::year(date),
-                                sitename = x )) %>%
-        bind_rows()
-      
-    }
-    
-    ## remove pre-modis data
-    if (settings_eval$remove_premodis){
-      ddf <- ddf %>% filter(lubridate::year(date) >= 2000 )
-    }    
-
-    ##------------------------------------------------------------
-    ## Read daily observational data from GEPISAT files (only daily files!).
-    ##------------------------------------------------------------
-    if ("Ty" %in% datasource){
-      ddf_gepisat <- lapply( as.list(settings_eval$sitenames),
-                             function(x) get_obs_bysite_gpp_gepisat( x,
-                                                                     settings_eval$path_gepisat_d,
-                                                                     timescale = "d" ) )
-      names(ddf_gepisat) <- settings_eval$sitenames
-      
-      missing_gepisat <- purrr::map_lgl( ddf_gepisat, ~identical(., NULL ) ) %>% which() %>% names()
-      settings_eval$sitenames_gepisat <- settings_eval$sitenames[which(!(settings_eval$sitenames %in% missing_gepisat))]
-      
-      ## Convert to one long data frame and add sitename to data frames inside the list
-      ddf_gepisat <- ddf_gepisat %>% 
-        bind_rows( .id = "sitename" ) %>%
-        mutate( gpp_obs = ifelse( date < "2000-02-18", NA, gpp_obs ) ) %>%  # remove pre-modis data
-        dplyr::rename( gpp = gpp_obs ) %>% 
-        
-        # remove weird data points where error is zero
-        mutate( gpp = ifelse( gpp_err_obs==0.0, NA, gpp ) )
-      
-      
-      ## ake weighted average for updated 'obs', aggregate, and add to other data frames (ddf, adf, mdf)
-      if (!is.null(ddf_gepisat)){
-        
-        ## daily
-        ddf <- ddf_gepisat %>% 
-          right_join( ddf, by = c("sitename", "date") ) %>% 
-          dplyr::select(-year, -year_dec)
-        
-        ## don't remember why I did this, commenting it out
-        # totlen <- length(datasource[ -which( datasource=="fluxnet2015" ) ])
-        #ddf$gpp <-  apply( dplyr::select( ddf, gpp, gpp_gepisat ), 1, stats::weighted.mean, c((totlen-1)/totlen, 1/totlen), na.rm=TRUE )
-        
-        # ##------------------------------------------------------------
-        # ## Add forcing data to daily data frame (for evaluation of functional relationships)
-        # ##------------------------------------------------------------
-        # ddf <- lapply( as.list(settings_eval$sitenames), function(x) get_forcing_from_csv( x, settings_sims ) ) %>%
-        #   bind_rows() %>%
-        #   dplyr::select(-year_dec.x, -year_dec.y) %>%
-        #   right_join( ddf, by = c("sitename", "date") )
-        
-        ## monthly
-        mdf <- mdf %>% mutate( year=year(date), moy=month(date) )
-        mdf <- ddf %>%
-          mutate( moy=month(date), year=year(date) ) %>% 
-          group_by( sitename, year, moy ) %>% 
-          summarise( gpp = sum(gpp) ) %>% 
-          right_join( mdf, by = c("sitename", "year", "moy") )
-        
-        ## annual
-        adf <- ddf %>%
-          mutate( year=year(date) ) %>% 
-          group_by( sitename, year ) %>% 
-          summarise( gpp = sum(gpp) ) %>% 
-          right_join( adf, by = c("sitename", "year") )
-        
-      } else {
-        ddf <- ddf %>% mutate( gpp_gepisat = NA )
-      }
-    }
-    
-    ##------------------------------------------------------------
-    ## Add forcing data to daily data frame (for evaluation of functional relationships)
-    ##------------------------------------------------------------
-    if (add_forcing){
-      print("adding forcing data...")
-      ddf <- lapply( as.list(settings_eval$sitenames), function(x) get_forcing_from_csv( x, settings_sims ) ) %>%
-        bind_rows() %>%
-        right_join( ddf, by = c("sitename", "date") )
-    }
-
-    ##------------------------------------------------------------
-    ## Aggregate to multi-day periods
-    ## periods should start with the 1st of January each year, otherwise can't compute mean seasonal cycle
-    ##------------------------------------------------------------
-    # ## 8-day periods corresponding to MODIS dates (problem: doesn't start with Jan 1 each year)
-    #  breaks <- modisdates <- readr::read_csv( "modisdates.csv" )$date
-    
-    # ## aggregate to weeks
-    # xdf <- ddf %>% mutate( inbin = week(date) ) %>%
-    #                group_by( sitename, year, inbin ) %>%
-    #                summarise( gpp = mean( gpp, na.rm=TRUE) )
-    
-    ## Generate vector of starting dates of X-day periods, making sure the 1st of Jan is always the start of a new period
-    listyears <- seq( ymd("1990-01-01"), ymd("2018-01-01"), by = "year" )    
-    breaks <- purrr::map( as.list(listyears), ~seq( from=., by=paste0( settings_eval$agg, " days"), length.out = ceiling(365 / settings_eval$agg)) ) %>% Reduce(c,.)
-
-    ## take mean across periods
-    xdf <- ddf %>% mutate( inbin = cut( date, breaks = breaks, right = FALSE ) ) %>%
-      group_by( sitename, inbin ) %>%
-      summarise_at( vars(one_of(evalvars)), mean, na.rm=TRUE)
-
-  }
-
-  return( list( ddf = ddf, xdf = xdf, mdf = mdf, adf = adf, breaks_xdf = breaks ) )
-}
-
-
-## Read forcing data from CSV file prepared for SOFUN input
-get_forcing_from_csv <- function( sitename, settings_sims ){
-  
-  ## get climate data
-  dir <- paste0( settings_sims$path_input, "/sitedata/climate/", sitename )
-  csvfiln <- paste0( dir, "/clim_daily_lev2_", sitename, ".csv" )
-  if (file.exists(csvfiln)){
-    ddf <- readr::read_csv( csvfiln )
-  } else {
-    ddf <- tibble(date=ymd("2001-01-01"))
-  }
-  
-  ## get fapar data
-  dir <- paste0( settings_sims$path_input, "/sitedata/fapar/", sitename )
-  csvfiln <- paste0( dir, "/fapar_daily_", sitename, ".csv" )
-  if (file.exists(csvfiln)){
-    ddf <- readr::read_csv( csvfiln ) %>%
-      mutate( fapar = as.numeric(modisvar_interpol)) %>%
-      dplyr::select(date, fapar) %>% 
-      right_join( ddf, by = "date" )
-  } else {
-    ddf <- tibble(date=ymd("2001-01-01"))
-  }
-  
-  return(ddf)
-  
-}
-
-change_names <- function(df, varnam, benchmarkvar){
-  
-  df[[varnam]] <- df[[benchmarkvar[[varnam]]]]
-  df <- df %>% 
-    dplyr::select(-matches(benchmarkvar[[varnam]]))
-  
-  if (varnam=="gpp"){
-    df <- df %>% 
-      dplyr::select(-starts_with("GPP_"))
-  }
-  
-  return(df)      
-}
diff --git a/R/get_obs_eval2.R b/R/get_obs_eval2.R
deleted file mode 100644
index 2a316dd0..00000000
--- a/R/get_obs_eval2.R
+++ /dev/null
@@ -1,111 +0,0 @@
-#' Get observational data for model evaluation
-#'
-#' Gets observational data for model evaluation, given the benchmarking variable, and data source used 
-#' for the evaluation. This information is specified in the evaluation settings (argument \code{settings_eval}).
-#'
-#' @param settings_eval A list specifying evaluation settings (see vignette eval_sofun.pdf for more information and examples)
-#' @param settings_sims A named list containing the simulation settings (see vignette_rsofun.pdf for more information and examples)
-#' @param overwrite (Optional) A logical specifying whether temporary data stored in \code{./tmpdir} should be overwritten. Defaults to \code{TRUE}.
-#' @param light (Optional) A logical specifying whether reduced data should saved. Defaults to \code{FALSE}.
-#' @param add_forcing (Optional) a logical specifying whether forcing data is to be added to data frames containing observational data.
-#'
-#' @return A list containing data frames of observed values aggregated to several temporal scales (ddf for daily, xdf for X-daily, mdf for monthly, adf for annual).
-#' @export
-#'
-#' @examples obs <- get_obs_eval2( siteinfo = NULL, settings_eval = NULL, adf = NULL, mdf = NULL, ddf = NULL, hrdf = NULL, hhdf = NULL, agg = 8 )
-#' 
-get_obs_eval2 <- function( siteinfo = NULL, settings_eval = NULL, adf = NULL, mdf = NULL, ddf = NULL, hrdf = NULL, hhdf = NULL, agg = 8 ){
-
-  evalvars <- names(settings_eval$benchmark)
-  
-	if (is.null(adf) || is.null(mdf) || is.null(ddf)){  # nothing done with hourly or half-hourly data yet
-		##------------------------------------------
-	  ## Interpret benchmarking data specification
-		##------------------------------------------
-	  datasource <- settings_eval$benchmark %>% 
-	    unlist() %>% 
-	    stringr::str_split( ., "_" ) %>% 
-	    unlist()
-	  
-	  if ("fluxnet2015" %in% datasource){    
-	    
-	    if (is.null(adf) || is.null(mdf) || is.null(ddf)){
-	      ##------------------------------------------
-	      ## determine variables (original names in files) to be read from benchmark data
-	      ##------------------------------------------
-	      getvars <- c()
-	      if ("gpp" %in% names(settings_eval$benchmark)){
-	        ## Get observational GPP data
-	        getvars <- c(getvars, "GPP_NT_VUT_REF", "GPP_DT_VUT_REF", "NEE_VUT_REF_NIGHT_QC", "NEE_VUT_REF_DAY_QC")
-	        settings_eval$benchmarkvar$gpp <- ifelse(settings_eval$benchmark$gpp == "fluxnet2015_NT",
-	                                                 "GPP_NT_VUT_REF",
-	                                                 ifelse(settings_eval$benchmark$gpp == "fluxnet2015_DT",
-	                                                        "GPP_DT_VUT_REF",
-	                                                        NA))
-	      }
-	      if ("netrad" %in% names(settings_eval$benchmark)){
-	        getvars <- c(getvars, "NETRAD")
-	        settings_eval$benchmarkvar$netrad <- "NETRAD"
-	      }
-	      if ("aet" %in% names(settings_eval$benchmark) || "latenth" %in% names(settings_eval$benchmark)){
-	        getvars <- list(latenth = "LE_F_MDS", latenth_qc = "LE_F_MDS_QC", latenth_unc = "LE_RANDUNC")
-	        getvars <- c(getvars, "LE_F_MDS", "LE_F_MDS_QC", "LE_RANDUNC")
-	        settings_eval$benchmarkvar$latenth <- "LE_F_MDS"
-	      }
-	    }
-
-			##------------------------------------------
-			## Ingest data
-			##------------------------------------------
-			if (is.null(adf)){
-				adf_eval <- ingest(
-				  siteinfo = siteinfo,
-				  source    = "fluxnet2015", 
-				  getvars   = getvars,
-				  dir       = settings_eval$path_fluxnet2015_y,
-				  settings  = list(threshold_LE = 0.8, getswc = TRUE),
-				  timescale = "y"
-				  )
-			}
-
-			if (is.null(ddf)){
-				ddf_eval <- ingest(
-				  siteinfo = siteinfo,
-				  source    = "fluxnet2015", 
-				  getvars   = getvars,
-				  dir       = "~/data/FLUXNET-2015_Tier1/20191024/DD/",
-				  settings  = list(threshold_LE = 0.8, getswc = TRUE),
-				  timescale = "d"
-				  )
-			}
-
-			if (is.null(mdf)){	
-				mdf_eval <- ingest(
-				  siteinfo = siteinfo,
-				  source    = "fluxnet2015", 
-				  getvars   = getvars,
-				  dir       = settings_eval$path_fluxnet2015_m,
-				  settings  = list(threshold_LE = 0.8, getswc = TRUE),
-				  timescale = "m"
-				  )
-			}
-		}
-	}
-
-
-  ##------------------------------------------------------------
-  ## Aggregate daily data to multi-day periods
-  ## periods should start with the 1st of January each year, otherwise can't compute mean seasonal cycle
-  ##------------------------------------------------------------
-  ## Generate vector of starting dates of X-day periods, making sure the 1st of Jan is always the start of a new period
-  listyears <- seq( ymd("1990-01-01"), ymd("2018-01-01"), by = "year" )    
-  breaks <- purrr::map( as.list(listyears), ~seq( from=., by=paste0( agg, " days"), length.out = ceiling(365 / agg)) ) %>% Reduce(c,.)
-
-  ## take mean across periods
-  xdf <- ddf %>% mutate( inbin = cut( date, breaks = breaks, right = FALSE ) ) %>%
-    group_by( sitename, inbin ) %>%
-    summarise_at( vars(one_of(evalvars)), mean, na.rm=TRUE)
-
-	return( list( ddf = ddf, xdf = xdf, mdf = mdf, adf = adf, breaks_xdf = breaks ) )
-
-}
diff --git a/R/run_sofun_f_bysite.R b/R/run_sofun_f_bysite.R
index 87e5f16b..0202d0e0 100644
--- a/R/run_sofun_f_bysite.R
+++ b/R/run_sofun_f_bysite.R
@@ -76,6 +76,7 @@ run_sofun_f_bysite <- function( sitename, params_siml, siteinfo, forcing, df_soi
       rlang::warn(paste(" Number of years data: ", nrow(forcing)/365))
       rlang::warn(paste(" Number of simulation years: ", params_siml$nyeartrend))
       rlang::warn(" Returning a dummy data frame.")
+      do_continue <- FALSE
     }
   }
 
diff --git a/README.md b/README.md
index 93880faa..253165b6 100644
--- a/README.md
+++ b/README.md
@@ -5,14 +5,13 @@
 
 Provides a wrapper for the SOFUN model implemented in Fortran. Shared memory (calling Fortran functions from within R) increases speed and facilitates use and installation. The package provides the following functionalities:
 
-- Collecting input data for a large number of site-scale simulations (data ingest)
 - Calibrating model parameters
 - Running the model and getting outputs directly back into R ("tidy" data)
 - Evaluating outputs (benchmarking)
 
-So far, rsofun implements the P-model ([Stocker et al., 2019 GMDD](https://www.geosci-model-dev-discuss.net/gmd-2019-200/)) and was used for simulations presented in [Stocker et al., 2019 GMDD](https://www.geosci-model-dev-discuss.net/gmd-2019-200/). Dataset-specific steps (data ingest, and benchmarking) are so far only implemented for using GPP or ET data from FLUXNET 2015. However, all steps are implemented in a modular and generic way in order to provide an extendible framework.
+So far, rsofun implements the P-model ([Stocker et al., 2019 GMDD](https://www.geosci-model-dev-discuss.net/gmd-2019-200/)) and was used for simulations presented in [Stocker et al., 2019 GMDD](https://www.geosci-model-dev-discuss.net/gmd-2019-200/). Input data, used as model forcing, is collected using the [ingestr](https://stineb.github.io/ingestr/) package.
 
-Parallelisation for a large number of site-level simulations is provided using the *multidplyr* R package. Calibration uses the *gensa* R package. 
+Parallelisation for a large number of site-level simulations is provided using the *multidplyr* R package. Calibration uses the *GenSA* R package. 
 
 ## Installation
 
@@ -46,313 +45,5 @@ load_dependencies_rsofun()
 
 ## Example run
 
-```r
-library(dplyr)
-library(rsofun)
-load_dependencies_rsofun()
-```
-
-### Simulation settings
-
-Make sure we have a site meta info file containing information about longitude, latitude, elevation, years of data availability, etc. Use the FLUXNET 2015 meta info data provided with rsofun.
-
-```r
-path_siteinfo <- "~/.siteinfo_example_fortran.csv"
-siteinfo <- rsofun::metainfo_Tier1_sites_kgclimate_fluxnet2015 %>% 
-  dplyr::filter(sitename %in% c("FR-Pue", "FR-LBr", "IT-Noe")) %>%
-  write_csv(path = path_siteinfo)
-```
-
-Define the simulation parameters.
-```r
-params_siml <- list(
-  spinup             = TRUE,
-  spinupyears        = 10,
-  recycle            = 1,
-  soilmstress        = FALSE,
-  tempstress         = FALSE,
-  calc_aet_fapar_vpd = FALSE,
-  in_ppfd            = TRUE,
-  in_netrad          = FALSE,
-  const_clim_year    = -9999,
-  const_lu_year      = -9999,
-  const_co2_year     = -9999,
-  const_ndep_year    = -9999,
-  const_nfert_year   = -9999,
-  outdt              = 1,
-  ltre               = FALSE,
-  ltne               = FALSE,
-  ltrd               = FALSE,
-  ltnd               = FALSE,
-  lgr3               = TRUE,
-  lgn3               = FALSE,
-  lgr4               = FALSE
-  )
-```
-
-Run `prepare_setup_sofun()` to define the simulation settings that contain all the information specified by the two steps above (meta info, and simulation parameters).
-```r
-settings_sims <- prepare_setup_sofun(siteinfo = siteinfo, params_siml = params_siml)
-```
-
-### Define model parameters
-
-First, let's do it by hand (calibration of parameters is shown later).
-```r
-params_modl <- list(
-  kphio           = 0.04997714009213085,
-  soilm_par_a     = 1.0,
-  soilm_par_b     = 0.0,
-  vpdstress_par_a = 0.2,
-  vpdstress_par_b = 0.2,
-  vpdstress_par_m = 5
-  )
-```
-
-### Define soil parameters
-
-For now, this is implemented as an illustration. Should be made site-specific.
-```r
-list_soiltexture <- list(
-  top = list(fsand = 0.4, fclay = 0.3, forg = 0.1, fgravel = 0.1),
-  bottom = list(fsand = 0.4, fclay = 0.3, forg = 0.1, fgravel = 0.1)
-)
-```
-
-### Get input
-
-First, define input settings.
-```r
-settings_input <-  list(
-    data                     = NA,
-    temperature              = "fluxnet2015",
-    precipitation            = "fluxnet2015",
-    vpd                      = "fluxnet2015",
-    ppfd                     = "fluxnet2015",
-    netrad                   = "fluxnet2015",  #  c("fluxnet2015", "watch_wfdei"),
-    patm                     = "fluxnet2015",
-    netrad                   = NA,
-    cloudcover               = "cru",
-    path_input               = "~/sofun_inputs/example/",
-    path_watch_wfdei         = "~/data/watch_wfdei/",
-    path_cru                 = "~/data/cru/ts_4.01/",
-    path_MODIS_FPAR_MCD15A3H = "~/data/fluxnet_subsets/fapar_MODIS_FPAR_MCD15A3H_gee_MCD15A3H_fluxnet2015_gee_subset/",
-    path_MODIS_EVI_MOD13Q1   = "~/data/fluxnet_subsets/fapar_MODIS_EVI_MOD13Q1_gee_MOD13Q1_fluxnet2015_gee_subset/",
-    path_co2                 = "~/data/co2/cCO2_rcp85_const850-1765.csv",
-    path_fluxnet2015         = "~/data/FLUXNET-2015_Tier1/20191024/DD/",
-    path_fluxnet2015_hh      = "~/data/FLUXNET-2015_Tier1/20191024/HH/",
-    get_from_remote          = FALSE,
-    settings_gee             = get_settings_gee( 
-      bundle      = "fpar", 
-      python_path = "/Users/benjaminstocker/Library/Enthought/Canopy_64bit/User/bin/python",
-      gee_path    = "~/gee_subset/gee_subset/"
-      ),
-  fapar = "MODIS_FPAR_MCD15A3H",
-  splined_fapar = TRUE
-  )
-```
-
-Then, get the input data.
-```r
-ddf_input <- prepare_input_sofun(
-  settings_input             = settings_input,
-  settings_sims              = settings_sims,
-  overwrite_csv_climate_lev1 = FALSE,
-  overwrite_csv_climate_lev2 = TRUE,
-  overwrite_csv_climate_lev3 = TRUE,
-  overwrite_rdata_climate    = TRUE,
-  overwrite_csv_fapar        = FALSE,
-  verbose                    = FALSE
-  )
-```
-
-### Run the model
-
-Run the model for all the sites specified in the first step.
-```r
-df_drivers <- collect_drivers_sofun( 
-  settings       = settings_sims, 
-  forcing        = ddf_input, 
-  df_soiltexture = df_soiltexture
-  )
-
-## by spelling out arguments
-mod <- run_sofun_f_bysite( 
-  df_drivers$sitename[1], 
-  df_drivers$params_siml[[1]], 
-  df_drivers$siteinfo[[1]], 
-  df_drivers$forcing[[1]], 
-  df_drivers$df_soiltexture[[1]], 
-  params_modl = params_modl, 
-  makecheck = TRUE 
-  )
-
-## The advantage of using the nested data frame 'df_drivers' is that the
-## function 'run_sofun_f_bysite' can be applied to each row using 'pmap'
-## Doing it like this, the outputs are stored in a new column 'out_sofun'.
-## This same code is implemented in 'run_sofun_f'
-df_output <- runread_sofun_f(
-     df_drivers, 
-     params_modl = params_modl, 
-     makecheck = TRUE,
-     parallel = FALSE
-     )
-
-df_output$out_sofun[[1]] %>% 
-  ggplot(aes(x=date, y=gpp)) +
-  geom_line() + 
-  labs(title = df_output$sitename[[1]], subtitle = "SOFUN output")
-```
-
-### Calibrate
-
-Define calibration settings.
-```r
-## Define calibration settings common for all setups
-settings_calib <- list(
-  method              = "gensa",
-  targetvars          = c("gpp"),
-  timescale           = list( gpp = "d" ),
-  path_fluxnet2015    = "~/data/FLUXNET-2015_Tier1/20191024/DD/",
-  path_fluxnet2015_hh = "~/data/FLUXNET-2015_Tier1/20191024/HH/",
-  threshold_GPP       = 0.5,
-  path_gepisat        = "~/data/gepisat/v3_fluxnet2015/daily_gpp/",
-  maxit               = 5, # (5 for gensa) (30 for optimr)    #
-  sitenames           = mysites,
-  filter_temp_max     = 35.0,
-  filter_drought      = FALSE,
-  metric              = "rmse",
-  dir_results         = "./",
-  name                = "ORG",
-  par                 = list( kphio = list( lower=0.03, upper=0.07, init=0.0496 ) ),
-  datasource          = list( gpp = "fluxnet2015_NT" ),
-  filter_temp_min     = NA,
-  filter_soilm_min    = NA
- )
-```
- 
-Get calibration target data.
-```r
-ddf_obs_calib <- get_obs_calib( 
-  settings_calib = settings_calib, 
-  dplyr::select(df_drivers, sitename, siteinfo) %>% tidyr::unnest(siteinfo), 
-  settings_input
-  )
-```
-
-Calibrate the model.
-
-```r
-set.seed(1982)
-settings_calib <- calib_sofun( 
-  settings_calib, 
-  df_drivers, 
-  ddf_obs = ddf_obs_calib 
-  )
-```
-
-The calibrated parameters are returned by `calib_sofun()` as part of the list:
-```r
-print(settings_calib$par_opt)
-```
-
-
-### Evaluate
-
-Run the model once again with these parameters and evaluate results.
-```r
-mylist <- readr::read_csv("~/eval_pmodel/myselect_fluxnet2015.csv") %>% 
-  dplyr::filter( use==1 ) %>% 
-  dplyr::pull( Site )
-
-settings_eval <- list(
-  sitenames           = settings_sims$sitename,
-  sitenames_siteplots = mylist,
-  agg                 = 8,
-  path_fluxnet2015_d  = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1d/original/unpacked/",
-  path_fluxnet2015_w  = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_7d/original/unpacked/",
-  path_fluxnet2015_m  = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1m/original/unpacked/",
-  path_fluxnet2015_y  = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1y/original/unpacked/",
-  path_gepisat_d      = "~/data/gepisat/v3_fluxnet2015/daily_gpp/",
-  benchmark           = list( gpp = c("fluxnet2015_NT") ),
-  remove_premodis     = TRUE
-  )
-```
-
-Get evaluation data (benchmarking data).
-```r
-filn <- "./obs_eval.Rdata"
-if (file.exists(filn)){
-  load(filn)
-} else {
-  obs_eval  <- get_obs_eval( 
-    settings_eval = settings_eval, 
-    settings_sims = settings_sims, 
-    overwrite     = TRUE, 
-    light         = TRUE,
-    add_forcing   = FALSE
-  )
-  save(obs_eval, file = filn)
-} 
-```
-
-Now run the model with calibrated parameters.
-```r
-params_modl <- list(
-  kphio           = settings_calib$par_opt[["kphio"]],
-  soilm_par_a     = 1.0,
-  soilm_par_b     = 0.0,
-  vpdstress_par_a = 0.2,
-  vpdstress_par_b = 0.2,
-  vpdstress_par_m = 5
-  )
-
-mod <- runread_sofun_f(
-  df_drivers, 
-  params_modl = params_modl, 
-  makecheck = TRUE
-  ) %>% 
-  rename(id = sitename) %>% 
-  unnest(out_sofun)
-```
-
-And finally do the evaluation.
-```r
-out_eval <- eval_sofun( 
-  mod, 
-  settings_eval, 
-  settings_sims, 
-  obs_eval = obs_eval, 
-  overwrite = TRUE, 
-  light = FALSE 
-  )
-```
-
-Print some results.
-```r
-out_eval$gpp$fluxnet2015$metrics$xdaily_pooled
-```
-
-```r
-out_eval$gpp$fluxnet2015$data$xdf %>% rbeni::analyse_modobs2("mod", "obs", type = "heat")
-```
-
-```r
-out_eval$gpp$fluxnet2015$data$ddf %>% 
-  dplyr::filter(sitename=="BE-Vie" & year(date) < 2005) %>% 
-  ggplot(aes(x = date)) +
-  geom_line(aes(y = obs), col="black") +
-  geom_line(aes(y = mod), col="red") + 
-  labs(title = "BE-Vie")
-
-out_eval$gpp$fluxnet2015$data$ddf %>% 
-  dplyr::filter(sitename=="AU-Dry" & year(date) > 2010) %>% 
-  ggplot(aes(x = date)) +
-  geom_line(aes(y = obs), col="black") +
-  geom_line(aes(y = mod), col="red") + 
-  labs(title = "AU-Dry")
-```
-
-## Acknowledgements
+See [here](https://rpubs.com/stineb/rsofun) for an example.
 
-The main author (B. Stocker) was funded by Marie Sklodowska-Curie fellowship H2020-MSCA-IF-2015, project FIBER, grant number 701329.
diff --git a/_config.yml b/_config.yml
deleted file mode 100644
index 2f7efbea..00000000
--- a/_config.yml
+++ /dev/null
@@ -1 +0,0 @@
-theme: jekyll-theme-minimal
\ No newline at end of file
diff --git a/docs/site_libs/navigation-1.1/codefolding.js b/docs/site_libs/navigation-1.1/codefolding.js
index 305b508d..8cb6311e 100644
--- a/docs/site_libs/navigation-1.1/codefolding.js
+++ b/docs/site_libs/navigation-1.1/codefolding.js
@@ -17,12 +17,12 @@ window.initializeCodeFolding = function(show) {
   var currentIndex = 1;
 
   // select all R code blocks
-  var rCodeBlocks = $('pre.r, pre.python, pre.bash, pre.sql, pre.cpp, pre.stan');
+  var rCodeBlocks = $('pre.r, pre.python, pre.bash, pre.sql, pre.cpp, pre.stan, pre.julia');
   rCodeBlocks.each(function() {
 
     // create a collapsable div to wrap the code in
     var div = $('<div class="collapse r-code-collapse"></div>');
-    if (show)
+    if (show || $(this)[0].classList.contains('fold-show'))
       div.addClass('in');
     var id = 'rcode-643E0F36' + currentIndex++;
     div.attr('id', id);
diff --git a/docs/site_libs/tocify-1.9.1/jquery.tocify.css b/docs/site_libs/tocify-1.9.1/jquery.tocify.css
index bee8f33a..21021388 100644
--- a/docs/site_libs/tocify-1.9.1/jquery.tocify.css
+++ b/docs/site_libs/tocify-1.9.1/jquery.tocify.css
@@ -11,8 +11,6 @@
     margin-left: 2%;
     position: fixed;
     border: 1px solid #ccc;
-    webkit-border-radius: 6px;
-    moz-border-radius: 6px;
     border-radius: 6px;
 }
 
@@ -45,11 +43,15 @@
 .tocify-subheader .tocify-subheader {
     text-indent: 30px;
 }
-
-/* Further indents third level subheader elements. You can continue this pattern if you have more nested elements. */
 .tocify-subheader .tocify-subheader .tocify-subheader {
     text-indent: 40px;
 }
+.tocify-subheader .tocify-subheader .tocify-subheader .tocify-subheader {
+    text-indent: 50px;
+}
+.tocify-subheader .tocify-subheader .tocify-subheader .tocify-subheader .tocify-subheader {
+    text-indent: 60px;
+}
 
 /* Twitter Bootstrap Override Style */
 .tocify .tocify-item > a, .tocify .nav-list .nav-header {
diff --git a/docs/site_libs/tocify-1.9.1/jquery.tocify.js b/docs/site_libs/tocify-1.9.1/jquery.tocify.js
index 259d1935..ba24a7dd 100644
--- a/docs/site_libs/tocify-1.9.1/jquery.tocify.js
+++ b/docs/site_libs/tocify-1.9.1/jquery.tocify.js
@@ -386,13 +386,13 @@
 
           item.append($("<a/>", {
 
-            "text": self.text()
+            "html": self.html()
 
           }));
 
         } else {
 
-          item.text(self.text());
+          item.html(self.html());
 
         }
 
diff --git a/docs/usage.html b/docs/usage.html
index 2c8749df..95497a77 100644
--- a/docs/usage.html
+++ b/docs/usage.html
@@ -7,6 +7,7 @@
 <meta charset="utf-8" />
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 <meta name="generator" content="pandoc" />
+<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
 
 
 
@@ -73,9 +74,7 @@
 
 <link rel="stylesheet" href="site_libs/font-awesome-4.5.0/css/font-awesome.css" type="text/css" />
 
-</head>
 
-<body>
 
 <style type = "text/css">
 .main-container {
@@ -89,7 +88,6 @@
 }
 img {
   max-width:100%;
-  height: auto;
 }
 .tabbed-pane {
   padding-top: 12px;
@@ -194,8 +192,6 @@
 });
 </script>
 
-<div class="container-fluid main-container">
-
 <!-- tabsets -->
 
 <style type="text/css">
@@ -252,49 +248,10 @@
 }
 </style>
 
-<script>
-$(document).ready(function () {
-  window.buildTabsets("TOC");
-});
-
-$(document).ready(function () {
-  $('.tabset-dropdown > .nav-tabs > li').click(function () {
-    $(this).parent().toggleClass('nav-tabs-open')
-  });
-});
-</script>
-
 <!-- code folding -->
 
 
 
-<script>
-$(document).ready(function ()  {
-
-    // move toc-ignore selectors from section div to header
-    $('div.section.toc-ignore')
-        .removeClass('toc-ignore')
-        .children('h1,h2,h3,h4,h5').addClass('toc-ignore');
-
-    // establish options
-    var options = {
-      selectors: "h1,h2,h3",
-      theme: "bootstrap3",
-      context: '.toc-content',
-      hashGenerator: function (text) {
-        return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
-      },
-      ignoreSelector: ".toc-ignore",
-      scrollTo: 0
-    };
-    options.showAndHide = true;
-    options.smoothScroll = true;
-
-    // tocify
-    var toc = $("#TOC").tocify(options).data("toc-tocify");
-});
-</script>
-
 <style type="text/css">
 
 #TOC {
@@ -342,8 +299,6 @@
 
 .tocify-subheader .tocify-item {
   font-size: 0.90em;
-  padding-left: 25px;
-  text-indent: 0;
 }
 
 .tocify .list-group-item {
@@ -353,6 +308,16 @@
 
 </style>
 
+
+
+</head>
+
+<body>
+
+
+<div class="container-fluid main-container">
+
+
 <!-- setup 3col/9col grid for toc_float and main content  -->
 <div class="row-fluid">
 <div class="col-xs-12 col-sm-4 col-md-3">
@@ -542,6 +507,49 @@ <h2>Examples</h2>
 
 </script>
 
+<!-- tabsets -->
+
+<script>
+$(document).ready(function () {
+  window.buildTabsets("TOC");
+});
+
+$(document).ready(function () {
+  $('.tabset-dropdown > .nav-tabs > li').click(function () {
+    $(this).parent().toggleClass('nav-tabs-open')
+  });
+});
+</script>
+
+<!-- code folding -->
+
+<script>
+$(document).ready(function ()  {
+
+    // move toc-ignore selectors from section div to header
+    $('div.section.toc-ignore')
+        .removeClass('toc-ignore')
+        .children('h1,h2,h3,h4,h5').addClass('toc-ignore');
+
+    // establish options
+    var options = {
+      selectors: "h1,h2,h3",
+      theme: "bootstrap3",
+      context: '.toc-content',
+      hashGenerator: function (text) {
+        return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
+      },
+      ignoreSelector: ".toc-ignore",
+      scrollTo: 0
+    };
+    options.showAndHide = true;
+    options.smoothScroll = true;
+
+    // tocify
+    var toc = $("#TOC").tocify(options).data("toc-tocify");
+});
+</script>
+
 <!-- dynamically load mathjax for compatibility with self-contained -->
 <script>
   (function () {
diff --git a/vignettes/example.Rmd b/vignettes/example.Rmd
index e96480a3..7a2dd702 100644
--- a/vignettes/example.Rmd
+++ b/vignettes/example.Rmd
@@ -11,11 +11,12 @@ output:
 library(rsofun)
 library(dplyr)
 library(readr)
+library(ingestr)
 ```
 
 ## Example using rsofun for P-model simulations
 
-The following describes the use of `rsofun` for an ensemble of site-scale P-model simulations, including steps for model calibration and evaluation (benchmarking). `rsofun` is designed in a modular and hierarchical fashion. This enables multiple setups within the same modelling framework. The P-model implementation in `rsofun` is described in Stocker et al. (2020) **Geosci. Mod. Dev.**. 
+The following describes the use of `rsofun` for an ensemble of site-scale P-model simulations, including steps for model calibration and evaluation (benchmarking). `rsofun` is designed in a modular and hierarchical fashion. This enables multiple setups within the same modelling framework. The P-model implementation in `rsofun` is described in Stocker et al. (2020) *Geosci. Mod. Dev.*. 
 
 In the P-model setup, the model requires time series of daily meteorological data as input and is calibrated against observational GPP data. This example describes simulations at a subset of FLUXNET 2015 Tier 1 sites, using GPP based on the night-time flux partitioning method as calibration target and benchmark (no worries, an out-of-sample calibration/evaluation function is available, too). 
 
@@ -23,7 +24,8 @@ In the P-model setup, the model requires time series of daily meteorological dat
 
 We manually define a subset of sites that are part of the FLUXNET 2015 Tier 1 set of sites:
 ```{r}
-mysites <- c("BE-Vie", "DE-Tha", "DK-Sor", "FI-Hyy", "IT-Col", "NL-Loo", "US-MMS", "US-WCr", "US-UMB", "US-Syv", "DE-Hai")
+# mysites <- c("BE-Vie", "DE-Tha", "DK-Sor", "FI-Hyy", "IT-Col", "NL-Loo", "US-MMS", "US-WCr", "US-UMB", "US-Syv", "FR-Pue")
+mysites <- "FR-Pue"
 ```
 
 A small number of meta data variables have to be specified for each site specifically to define the simulation years. This information is also used for input, calibration, and evaluation data ingestion. Required meta information is specified for each site (in rows) and a number of variables:
@@ -38,7 +40,13 @@ A small number of meta data variables have to be specified for each site specifi
 This information is provided in file `siteinfo_fluxnet2015.csv`. This file is created as described in (and using code from) [metainfo_fluxnet2015](https://github.com/stineb/metainfo_fluxnet2015). 
 ```{r}
 siteinfo <- readr::read_csv("~/rsofun/siteinfo_fluxnet2015.csv") %>%
-  filter(sitename %in% mysites) %>%
+  filter(sitename %in% mysites)
+
+## take only year 2007 to 2014, corresponding to subset of data for site FR-Pue provided in this package as demo
+siteinfo <- siteinfo %>% 
+  mutate(year_start = 2007, year_end = 2014)
+
+siteinfo <- siteinfo %>% 
   mutate(date_start = lubridate::ymd(paste0(year_start, "-01-01"))) %>%
   mutate(date_end = lubridate::ymd(paste0(year_end, "-12-31")))
 ```
@@ -77,7 +85,7 @@ params_siml <- list(
 
 Run `prepare_setup_sofun()` to define the simulation settings that contain all the information specified by the two steps above (meta info, and simulation parameters), global simulation parameters are wrapped inside an additional column `params_siml`, added to the site meta info dataframe.
 ```{r}
-settings_sims <- prepare_setup_sofun(siteinfo = siteinfo, params_siml = params_siml)
+siteinfo <- prepare_setup_sofun(siteinfo = siteinfo, params_siml = params_siml)
 ```
 
 ### Define model parameters
@@ -114,11 +122,11 @@ The following ingests meteorological data from the FLUXNET 2015 files for variab
 ```{r message=FALSE, warning=FALSE}
 library(ingestr)
 ddf_fluxnet <- ingest(
-  siteinfo = siteinfo,
+  siteinfo  = siteinfo %>% filter(sitename %in% mysites),
   source    = "fluxnet2015",
-  getvars   = list(temp = "TA_F_DAY", prec = "P_F", vpd  = "VPD_F_DAY", swin =  "SW_IN_F", patm = "PA_F"),
-  dir       = "~/data/FLUXNET-2015_Tier1/20191024/DD/",
-  settings  = list(dir_hh = "~/data/FLUXNET-2015_Tier1/20191024/HH/", getswc = FALSE),
+  getvars   = list(temp = "TA_F_DAY", prec = "P_F", vpd  = "VPD_F", swin =  "SW_IN_F", patm = "PA_F"),
+  dir       = paste0(path.package("rsofun"), "/extdata/"),
+  settings  = list(dir_hh = paste0(path.package("rsofun"), "/extdata/"), dir_hr = paste0(path.package("rsofun"), "/extdata/"), getswc = FALSE),
   timescale = "d"
   )
 ```
@@ -126,7 +134,7 @@ ddf_fluxnet <- ingest(
 Some meteo data is not available from FLUXNET. Extract it from CRU global climate files instead.
 ```{r}
 ddf_cru <- ingest(
-  siteinfo = siteinfo,
+  siteinfo  = siteinfo %>% filter(sitename %in% mysites),
   source    = "cru",
   getvars   = list(ccov = "cld"),
   dir       = "~/data/cru/ts_4.01/"
@@ -157,95 +165,53 @@ settings_gee <- get_settings_gee(
   python_path = system("which python", intern = TRUE),
   gee_path = "~/google_earth_engine_subsets/gee_subset/",
   data_path = "~/data/gee_subsets/",
-  splined = TRUE,
-  do_plot_interpolated = FALSE,
+  method_interpol = "linear",
+  keep = FALSE,
   overwrite_raw = FALSE,
-  overwrite_interpol = FALSE
+  overwrite_interpol = TRUE
   )
 ```
 
 This can now be used to download the data to the directory specified by argument `data_path` of function `get_settings_gee()` and to read data into R.
 ```{r}
-ddf_fapar <- ingest(
-  siteinfo = siteinfo,
+df_gee_modis_fpar <- ingest(
+  siteinfo  = siteinfo %>% filter(sitename %in% mysites),
   source = "gee",
   settings = settings_gee,
   verbose = FALSE
   )
 ```
 
-#### Collect all drivers
+#### CO2
 
-Finally, we can collect forcing data, simulation parameters, and site meta info into a single object that will be used to drive rsofun.
+Ingesting CO2 data is particularly simple. We can safely assume it's well mixed in the atmosphere (independent of site location), and we can use a annual mean value for all days in respective years.  
 ```{r}
-df_drivers <- collect_drivers_sofun( 
-  settings       = settings_sims, 
-  meteo          = ddf_meteo, 
-  fapar          = ddf_fapar,
-  df_soiltexture = df_soiltexture
+df_co2 <- ingest(
+  siteinfo  = siteinfo %>% filter(sitename %in% mysites),
+  source = "co2",
+  verbose = FALSE,
+  settings = list(path = "~/data/co2/cCO2_rcp85_const850-1765.csv")
   )
 ```
 
+#### Collect all drivers
 
-
-
-
-
-First, define input settings.
+Finally, we can collect forcing data, simulation parameters, and site meta info into a single object that will be used to drive rsofun.
 ```{r}
-settings_input <-  list(
-    data                     = NA,
-    temperature              = "fluxnet2015",
-    precipitation            = "fluxnet2015",
-    vpd                      = "fluxnet2015",
-    ppfd                     = "fluxnet2015",
-    netrad                   = "fluxnet2015",  #  c("fluxnet2015", "watch_wfdei"),
-    patm                     = "fluxnet2015",
-    netrad                   = NA,
-    cloudcover               = "cru",
-    path_input               = "~/sofun_inputs/example/",
-    path_watch_wfdei         = "~/data/watch_wfdei/",
-    path_cru                 = "~/data/cru/ts_4.01/",
-    path_MODIS_FPAR_MCD15A3H = "~/data/fluxnet_subsets/fapar_MODIS_FPAR_MCD15A3H_gee_MCD15A3H_fluxnet2015_gee_subset/",
-    path_MODIS_EVI_MOD13Q1   = "~/data/fluxnet_subsets/fapar_MODIS_EVI_MOD13Q1_gee_MOD13Q1_fluxnet2015_gee_subset/",
-    path_co2                 = "~/data/co2/cCO2_rcp85_const850-1765.csv",
-    path_fluxnet2015         = "~/data/FLUXNET-2015_Tier1/20191024/DD/",
-    path_fluxnet2015_hh      = "~/data/FLUXNET-2015_Tier1/20191024/HH/",
-    get_from_remote          = FALSE,
-    settings_gee             = get_settings_gee( 
-      bundle      = "fpar", 
-      python_path = "/Users/benjaminstocker/Library/Enthought/Canopy_64bit/User/bin/python",
-      gee_path    = "~/gee_subset/gee_subset/"
-      ),
-  fapar = "MODIS_FPAR_MCD15A3H",
-  splined_fapar = TRUE
+df_drivers <- collect_drivers_sofun( 
+  siteinfo       = siteinfo %>% filter(sitename %in% mysites),
+  meteo          = ddf_meteo, 
+  fapar          = df_gee_modis_fpar,
+  co2            = df_co2,
+  df_soiltexture = df_soiltexture
   )
 ```
 
-Then, get the input data.
-```{r message=FALSE, warning=FALSE, echo=FALSE}
-ddf_input <- prepare_input_sofun(
-  settings_input             = settings_input,
-  settings_sims              = settings_sims,
-  overwrite_csv_climate_lev1 = FALSE,
-  overwrite_csv_climate_lev2 = TRUE,
-  overwrite_csv_climate_lev3 = TRUE,
-  overwrite_rdata_climate    = TRUE,
-  overwrite_csv_fapar        = FALSE,
-  verbose                    = FALSE
-  )
-```
 
 ### Run the model
 
 Run the model for all the sites specified in the first step.
 ```{r}
-df_drivers <- collect_drivers_sofun( 
-  settings       = settings_sims, 
-  forcing        = ddf_input, 
-  df_soiltexture = df_soiltexture
-  )
-
 ## run for a single site
 mod <- run_sofun_f_bysite( 
   df_drivers$sitename[1], 
@@ -285,6 +251,7 @@ print(ptm)
 #   units = 's'
 # )
 
+library(ggplot2)
 df_output$out_sofun[[1]] %>% 
   ggplot(aes(x=date, y=gpp)) +
   geom_line() + 
@@ -295,46 +262,47 @@ df_output$out_sofun[[1]] %>%
 
 Define calibration settings.
 ```{r}
-## Define calibration settings common for all setups
 settings_calib <- list(
   method              = "gensa",
   targetvars          = c("gpp"),
   timescale           = list( gpp = "d" ),
-  path_fluxnet2015    = "~/data/FLUXNET-2015_Tier1/20191024/DD/",
-  path_fluxnet2015_hh = "~/data/FLUXNET-2015_Tier1/20191024/HH/",
-  threshold_GPP       = 0.5,
-  path_gepisat        = "~/data/gepisat/v3_fluxnet2015/daily_gpp/",
   maxit               = 5, # (5 for gensa) (30 for optimr)    #
-  sitenames           = mysites,
-  filter_temp_max     = 35.0,
-  filter_drought      = FALSE,
+  sitenames           = "FR-Pue",
   metric              = "rmse",
   dir_results         = "./",
   name                = "ORG",
-  par                 = list( kphio = list( lower=0.03, upper=0.07, init=0.0496 ) ),
-  datasource          = list( gpp = "fluxnet2015_NT" ),
-  filter_temp_min     = NA,
-  filter_soilm_min    = NA
+  par                 = list( kphio = list( lower=0.03, upper=0.07, init=0.0496 ) )
  )
 ```
 
-Get calibration target data.
+Use the [ingestr](https://github.com/stineb/ingestr) package once again, now for collecting calibration target data. I.e., GPP based on the nighttime flux decomposition method.
 ```{r warning=FALSE, message=FALSE}
-ddf_obs_calib <- get_obs_calib(
-  settings_calib = settings_calib,
-  dplyr::select(df_drivers, sitename, siteinfo) %>% tidyr::unnest(siteinfo),
-  settings_input
+settings_ingestr_fluxnet <- list(
+  dir_hh = "~/data/FLUXNET-2015_Tier1/20191024/HH/", 
+  getswc = FALSE,
+  filter_ntdt = TRUE,
+  threshold_GPP = 0.8,
+  remove_neg = FALSE
+  )
+
+ddf_fluxnet_gpp <- ingestr::ingest(
+  siteinfo = filter(siteinfo, sitename == "FR-Pue"),
+  source    = "fluxnet2015",
+  getvars = list(gpp = "GPP_NT_VUT_REF",
+                 gpp_unc = "GPP_NT_VUT_SE"),
+  dir = "~/data/FLUXNET-2015_Tier1/20191024/DD/",
+  settings = settings_ingestr_fluxnet,
+  timescale = "d"
   )
 ```
 
 Calibrate the model.
-
 ```{r warning=FALSE, message=FALSE}
 set.seed(1982)
 settings_calib <- calib_sofun(
-  settings_calib,
-  df_drivers,
-  ddf_obs = ddf_obs_calib
+  df_drivers = filter(df_drivers, sitename %in% settings_calib$sitenames),  # use only one site
+  ddf_obs = ddf_fluxnet_gpp,
+  settings = settings_calib
   )
 ```
 
@@ -348,65 +316,78 @@ print(settings_calib$par_opt)
 
 Run the model once again with these parameters and evaluate results.
 ```{r warning=FALSE, message=FALSE}
-mylist <- readr::read_csv("~/eval_pmodel/myselect_fluxnet2015.csv") %>% 
-  dplyr::filter( use==1 ) %>% 
-  dplyr::pull( Site )
+params_modl$kphio <- settings_calib$par_opt["kphio"]
 
-settings_eval <- list(
-  sitenames           = settings_sims$sitename,
-  sitenames_siteplots = mylist,
-  agg                 = 8,
-  path_fluxnet2015_d  = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1d/original/unpacked/",
-  path_fluxnet2015_w  = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_7d/original/unpacked/",
-  path_fluxnet2015_m  = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1m/original/unpacked/",
-  path_fluxnet2015_y  = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1y/original/unpacked/",
-  path_gepisat_d      = "~/data/gepisat/v3_fluxnet2015/daily_gpp/",
-  benchmark           = list( gpp = c("fluxnet2015_NT") ),
-  remove_premodis     = TRUE
-  )
+df_output <- runread_sofun_f(
+     df_drivers, 
+     params_modl = params_modl, 
+     makecheck = TRUE,
+     parallel = FALSE
+     )
 ```
 
+
 Get evaluation data (benchmarking data).
-```{r warning=FALSE, message=FALSE}
-filn <- "./obs_eval.Rdata"
-if (file.exists(filn)){
-  load(filn)
-} else {
-  obs_eval  <- get_obs_eval( 
-    settings_eval = settings_eval, 
-    settings_sims = settings_sims, 
-    overwrite     = TRUE, 
-    light         = TRUE,
-    add_forcing   = FALSE
+```{r message=FALSE, warning=FALSE}
+## get data for idfferent time scales separately
+settings_fluxnet <- list(
+  getswc = FALSE,
+  filter_ntdt = TRUE,
+  threshold_GPP = 0.8,
+  remove_neg = FALSE
+  )
+
+ddf_eval <- ingest(
+  siteinfo  = siteinfo %>% filter(sitename %in% mysites),
+  source    = "fluxnet2015",
+  getvars   = list(gpp = "GPP_NT_VUT_REF",
+                   gpp_unc = "GPP_NT_VUT_SE"),
+  dir       = paste0(path.package("rsofun"), "/extdata/"),
+  settings  = settings_fluxnet,
+  timescale = "d"
+  )
+
+mdf_eval <- ingest(
+  siteinfo  = siteinfo %>% filter(sitename %in% mysites),
+  source    = "fluxnet2015",
+  getvars   = list(gpp = "GPP_NT_VUT_REF",
+                   gpp_unc = "GPP_NT_VUT_SE"),
+  dir       = paste0(path.package("rsofun"), "/extdata/"),
+  settings  = settings_fluxnet,
+  timescale = "m"
+  )
+
+adf_eval <- ingest(
+  siteinfo  = siteinfo %>% filter(sitename %in% mysites),
+  source    = "fluxnet2015",
+  getvars   = list(gpp = "GPP_NT_VUT_REF",
+                   gpp_unc = "GPP_NT_VUT_SE"),
+  dir       = paste0(path.package("rsofun"), "/extdata/"),
+  settings  = settings_fluxnet,
+  timescale = "y"
   )
-  save(obs_eval, file = filn)
-} 
 ```
 
-Now run the model with calibrated parameters.
+Use rsofun to create a standardised object used for benchmarking the model output.
 ```{r}
-params_modl <- list(
-  kphio           = settings_calib$par_opt[["kphio"]],
-  soilm_par_a     = 1.0,
-  soilm_par_b     = 0.0,
-  vpdstress_par_a = 0.2,
-  vpdstress_par_b = 0.2,
-  vpdstress_par_m = 5
+settings_eval <- list(
+  benchmark = list( gpp = c("fluxnet2015") ),
+  sitenames = mysites,
+  agg       = 8  # An integer specifying the number of days used to define the width of bins for daily data aggregated to several days
+  )
+obs_eval <- collect_obs_eval( 
+  siteinfo = siteinfo %>% filter(sitename %in% mysites),
+  settings = settings_eval, 
+  adf = adf_eval, 
+  mdf = mdf_eval, 
+  ddf = ddf_eval 
   )
-
-mod <- runread_sofun_f(
-  df_drivers, 
-  params_modl = params_modl, 
-  makecheck = TRUE
-  ) %>% 
-  rename(id = sitename) %>% 
-  unnest(out_sofun)
 ```
 
 And finally do the evaluation.
 ```{r warning=FALSE, message=FALSE}
 out_eval <- eval_sofun( 
-  mod, 
+  df_output, 
   settings_eval, 
   settings_sims, 
   obs_eval = obs_eval, 
@@ -420,22 +401,12 @@ Print some results.
 out_eval$gpp$fluxnet2015$metrics$xdaily_pooled
 ```
 
+Get the [rbeni](https://github.com/stineb/rbeni) R package for nice plotting functions that can be used with the output of `eval_sofun()`.
 ```{r}
-out_eval$gpp$fluxnet2015$data$xdf %>% rbeni::analyse_modobs2("mod", "obs", type = "heat")
-```
-
-```{r}
-out_eval$gpp$fluxnet2015$data$ddf %>% 
-  dplyr::filter(sitename=="BE-Vie" & year(date) < 2005) %>% 
-  ggplot(aes(x = date)) +
-  geom_line(aes(y = obs), col="black") +
-  geom_line(aes(y = mod), col="red") + 
-  labs(title = "BE-Vie")
-
-out_eval$gpp$fluxnet2015$data$ddf %>% 
-  dplyr::filter(sitename=="AU-Dry" & year(date) > 2010) %>% 
-  ggplot(aes(x = date)) +
-  geom_line(aes(y = obs), col="black") +
-  geom_line(aes(y = mod), col="red") + 
-  labs(title = "AU-Dry")
+library(rbeni)
+gg <- out_eval$gpp$fluxnet2015$data$xdf %>% analyse_modobs2("mod", "obs", type = "heat")
+gg$gg +
+  labs(title = "FR-Pue: modelled vs. observed GPP", 
+       x = expression(paste("Modelled GPP (gC m"^{-2}, "d"^{-1}, ")")), 
+       y = expression(paste("Observed GPP (gC m"^{-2}, "d"^{-1}, ")")))
 ```