diff --git a/DESCRIPTION b/DESCRIPTION index 557b6691..8c244f15 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,6 +7,6 @@ Description: Implements the SOFUN modelling framework for simulating terrestrial calibration and benchmarking. License: GPL-3 LazyData: true -Depends: dplyr, purrr, lubridate, tidyr, raster, lubridate, stringi, sp, gensa, stringr, rland, readr +Depends: dplyr, purrr, lubridate, tidyr, raster, lubridate, stringi, sp, GenSA, stringr, rlang, readr RoxygenNote: 7.1.0 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 5c16501a..09222013 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,17 +2,11 @@ export(calib_sofun) export(collect_drivers_sofun) -export(download_from_remote) +export(collect_obs_eval) export(eval_sofun) export(filter_days) -export(get_obs_bysite_fluxnet2015) -export(get_obs_bysite_gpp_gepisat) -export(get_obs_calib) -export(get_obs_eval) -export(get_obs_eval2) export(get_stats) export(init_dates_dataframe) -export(load_dependencies_rsofun) export(oob_calib_eval_sofun) export(prepare_setup_sofun) export(run_sofun_f_bysite) @@ -30,6 +24,7 @@ import(readr) import(rlang) import(stringr) import(tidyr) +importFrom(GenSA,GenSA) importFrom(grDevices,boxplot.stats) importFrom(grDevices,col2rgb) importFrom(grDevices,colorRampPalette) diff --git a/R/calib_sofun.R b/R/calib_sofun.R index a179587d..1ba3a603 100644 --- a/R/calib_sofun.R +++ b/R/calib_sofun.R @@ -2,68 +2,44 @@ #' #' This is the main function that handles the calibration of SOFUN model parameters. #' -#' @param settings_calib A list containing model calibration settings. See vignette_rsofun.pdf for more information and examples. #' @param df_drivers asdf -#' @param settings_input A list containing model input settings. See vignette_rsofun.pdf for more information and examples. -#' @param ddf_obs (Optional) A data frame containing observational data used for model calibration. Created by function \code{get_obs()} +#' @param ddf_obs A data frame containing observational data used for model calibration. Created by function \code{get_obs_calib2()} +#' @param settings A list containing model calibration settings. See vignette_rsofun.pdf for more information and examples. #' #' @return A complemented named list containing the calibration settings and optimised parameter values. #' @export #' -#' @examples settings_calib <- calib_sofun( setup, settings_calib, settings_sims, settings_input ) +#' @examples \dontrun{ calib_sofun( df_drivers = filter(df_drivers, sitename %in% settings$sitenames), ddf_obs = ddf_obs_calib, settings = settings)} #' -calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_obs = NA ){ +calib_sofun <- function( df_drivers, ddf_obs, settings ){ - targetvars <- paste0( settings_calib$targetvars, "_obs") + targetvars <- paste0( settings$targetvars, "_obs") ## Use only calibsites df_drivers <- df_drivers %>% - dplyr::filter(sitename %in% settings_calib$sitenames) - - ##---------------------------------------------------------------- - ## Collect observational data used as calibration target - ##---------------------------------------------------------------- - if (identical(NA,ddf_obs)){ - print("Collecting observational target data ...") - ddf_obs <- get_obs_calib( - settings_calib, - dplyr::select(df_drivers, sitename, siteinfo) %>% tidyr::unnest(siteinfo), - settings_input - ) - } - - ##---------------------------------------------------------------- - ## Apply filters - ##---------------------------------------------------------------- - ## "filtering" pre-MODIS data - idxs <- which(ddf_obs$date < lubridate::ymd("2000-02-18")) - make_na_premodis <- function(vec, idxs){ - vec[idxs] <- NA - return(vec) - } - ddf_obs <- ddf_obs %>% - dplyr::mutate_at( vars(one_of(targetvars)), ~make_na_premodis(., idxs) ) - + dplyr::filter(sitename %in% settings$sitenames) ## make global - targetvars_with_unc <- c(targetvars, paste0(settings_calib$targetvars, "_unc")) + targetvars_with_unc <- c(targetvars, paste0(settings$targetvars, "_unc")) ddf_obs <- ddf_obs %>% + tidyr::unnest(data) %>% + rename(gpp_obs = gpp) %>% dplyr::select( date, sitename, one_of( targetvars_with_unc ) ) %>% - dplyr::filter( sitename %in% settings_calib$sitenames ) + dplyr::filter( sitename %in% settings$sitenames ) if (nrow(ddf_obs)>0){ ## Example run for getting structure of output file - if (identical(names(settings_calib$par),"kphio")){ + if (identical(names(settings$par), "kphio")){ ## For calibrating quantum yield efficiency only # cost_rmse <- cost_chisquared_kphio cost_rmse <- cost_rmse_kphio - } else if ( "kphio" %in% names(settings_calib$par) && "soilm_par_a" %in% names(settings_calib$par) && "soilm_par_b" %in% names(settings_calib$par) ){ + } else if ( "kphio" %in% names(settings$par) && "soilm_par_a" %in% names(settings$par) && "soilm_par_b" %in% names(settings$par) ){ ## Full stack calibration cost_rmse <- cost_rmse_fullstack - } else if ( "vpdstress_par_a" %in% names(settings_calib$par) && "vpdstress_par_b" %in% names(settings_calib$par) && "vpdstress_par_m" %in% names(settings_calib$par) ){ + } else if ( "vpdstress_par_a" %in% names(settings$par) && "vpdstress_par_b" %in% names(settings$par) && "vpdstress_par_m" %in% names(settings$par) ){ ## Calibration of VPD stress function (P-model runs with soilmstress and tempstress on) # cost_rmse <- cost_chisquared_vpdstress cost_rmse <- cost_rmse_vpdstress @@ -74,21 +50,21 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob ## Do the calibration ##---------------------------------------------------------------- ## check directory where calibration results are stored - if (!dir.exists(settings_calib$dir_results)) system( paste0( "mkdir -p ", settings_calib$dir_results) ) + if (!dir.exists(settings$dir_results)) system( paste0( "mkdir -p ", settings$dir_results) ) - if (settings_calib$method=="gensa"){ + if (settings$method=="gensa"){ ##---------------------------------------------------------------- ## calibrate the model parameters using GenSA (simulated annealing) ##---------------------------------------------------------------- ptm <- proc.time() out_optim <- GenSA( - par = lapply( settings_calib$par, function(x) x$init ) %>% unlist(), # initial parameter value, if NULL will be generated automatically + par = lapply( settings$par, function(x) x$init ) %>% unlist(), # initial parameter value, if NULL will be generated automatically fn = cost_rmse, - lower = lapply( settings_calib$par, function(x) x$lower ) %>% unlist(), - upper = lapply( settings_calib$par, function(x) x$upper ) %>% unlist(), + lower = lapply( settings$par, function(x) x$lower ) %>% unlist(), + upper = lapply( settings$par, function(x) x$upper ) %>% unlist(), control=list( #temperature=4000, - max.call=settings_calib$maxit, + max.call=settings$maxit, trace.mat=TRUE, threshold.stop=1e-4, max.time=300 @@ -99,15 +75,15 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob out_optim$time_optim <- proc.time() - ptm print(out_optim$par) - filn <- paste0( settings_calib$dir_results, "/out_gensa_", settings_calib$name, ".Rdata") + filn <- paste0( settings$dir_results, "/out_gensa_", settings$name, ".Rdata") print( paste0( "writing output from GenSA function to ", filn ) ) save( out_optim, file = filn ) # ## Test plot with the same SOFUN call - # plot_test_kphio( out_optim$par, subtitle = "", label = paste("CALIB test", settings_calib$name), makepdf = FALSE ) + # plot_test_kphio( out_optim$par, subtitle = "", label = paste("CALIB test", settings$name), makepdf = FALSE ) - } else if (settings_calib$method=="optimr"){ + } else if (settings$method=="optimr"){ ##---------------------------------------------------------------- ## Calibrate the model parameters using R optimr(). Optimr is a ## wrapper that implements different optimization methods. @@ -117,22 +93,22 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob ##---------------------------------------------------------------- ptm <- proc.time() out_optim <- optimr( - par = lapply( settings_calib$par, function(x) x$init ) %>% unlist(), # initial parameter value, if NULL will be generated automatically + par = lapply( settings$par, function(x) x$init ) %>% unlist(), # initial parameter value, if NULL will be generated automatically fn = cost_rmse, - control = list( maxit = settings_calib$maxit ) + control = list( maxit = settings$maxit ) ) proc.time() - ptm print(out_optim$par) - filn <- paste0( settings_calib$dir_results, "/out_optimr_", settings_calib$name, ".Rdata") + filn <- paste0( settings$dir_results, "/out_optimr_", settings$name, ".Rdata") print( paste0( "writing output from optimr function to ", filn ) ) save( out_optim, file = filn ) # ## Test plot with the same SOFUN call - # plot_test_kphio( out_optim$par, subtitle = "", label = paste("CALIB test", settings_calib$name), makepdf = FALSE ) + # plot_test_kphio( out_optim$par, subtitle = "", label = paste("CALIB test", settings$name), makepdf = FALSE ) - } else if (settings_calib$method=="BayesianTools"){ + } else if (settings$method=="BayesianTools"){ ##---------------------------------------------------------------- ## Calibrate the model parameters using Bayesian calibration ## implemented in 'BayesianTools'. I.e., the posterior density is @@ -140,15 +116,15 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob ##---------------------------------------------------------------- ptm <- proc.time() bt_setup <- createBayesianSetup( cost_rmse( inverse=TRUE ), - lower = apply( settings_calib$par, function(x) x$lower ) %>% unlist(), - upper = lapply( settings_calib$par, function(x) x$upper ) %>% unlist() + lower = apply( settings$par, function(x) x$lower ) %>% unlist(), + upper = lapply( settings$par, function(x) x$upper ) %>% unlist() ) - bt_settings <- list( iterations = settings_calib$maxit, message = TRUE ) + bt_settings <- list( iterations = settings$maxit, message = TRUE ) out_optim <- runMCMC( bayesianSetup = bt_setup, sampler = "DEzs", settings = bt_settings ) proc.time() - ptm - } else if (settings_calib$method=="linscale"){ + } else if (settings$method=="linscale"){ ##---------------------------------------------------------------- ## Calibrate the quantum yield efficiency parameter ('kphio') ## applied as a linear scalar to simulated GPP. @@ -159,22 +135,22 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob ptm <- proc.time() out_optim <- optimr( - par = lapply( settings_calib$par, function(x) x$init ) %>% unlist(), # initial parameter value, if NULL will be generated automatically + par = lapply( settings$par, function(x) x$init ) %>% unlist(), # initial parameter value, if NULL will be generated automatically fn = cost_linscale_rmse, - control = list( maxit = settings_calib$maxit ) + control = list( maxit = settings$maxit ) ) proc.time() - ptm print(out_optim$par) - filn <- paste0( settings_calib$dir_results, "/out_linscale_", settings_calib$name, ".Rdata") + filn <- paste0( settings$dir_results, "/out_linscale_", settings$name, ".Rdata") print( paste0( "writing output from linscale function to ", filn ) ) save( out_optim, file = filn ) } # ## xxx Test--------------------------- - # parvals <- seq( from = lapply( settings_calib$par, function(x) x$lower ) %>% unlist(), - # to = lapply( settings_calib$par, function(x) x$upper ) %>% unlist(), + # parvals <- seq( from = lapply( settings$par, function(x) x$lower ) %>% unlist(), + # to = lapply( settings$par, function(x) x$upper ) %>% unlist(), # length.out = 20 ) # cost <- purrr::map( as.list( parvals ), ~cost_rmse( par = list(.) ) ) %>% unlist() # plot( parvals, cost, pch=16 ) @@ -182,31 +158,31 @@ calib_sofun <- function( settings_calib, df_drivers, settings_input = NA, ddf_ob # ## ----------------------------------- ## save optimised parameters - names(out_optim$par) <- names(settings_calib$par) - for (parnam in names(settings_calib$par)){ - settings_calib$par[[ parnam ]]$opt <- out_optim$par[ parnam ] + names(out_optim$par) <- names(settings$par) + for (parnam in names(settings$par)){ + settings$par[[ parnam ]]$opt <- out_optim$par[ parnam ] } - settings_calib$par_opt <- out_optim$par + settings$par_opt <- out_optim$par } else { print("WARNING: No observational target data left after filtering.") - settings_calib$par$kphio$opt <- NA - settings_calib$par$temp_ramp_edge$opt <- NA - settings_calib$par$soilm_par_a$opt <- NA - settings_calib$par$soilm_par_b$opt <- NA + settings$par$kphio$opt <- NA + settings$par$temp_ramp_edge$opt <- NA + settings$par$soilm_par_a$opt <- NA + settings$par$soilm_par_b$opt <- NA } ## Write calibrated parameters into a CSV file - vec <- unlist( unname( lapply( settings_calib$par, function(x) x$opt )) ) - df <- as_tibble(t(vec)) %>% setNames( names(settings_calib$par) ) - filn <- paste0( settings_calib$dir_results, "/params_opt_", settings_calib$name, ".csv") + vec <- unlist( unname( lapply( settings$par, function(x) x$opt )) ) + df <- as_tibble(t(vec)) %>% setNames( names(settings$par) ) + filn <- paste0( settings$dir_results, "/params_opt_", settings$name, ".csv") print( paste0( "writing calibrated parameters to ", filn ) ) write_csv( df, path = filn ) - return( settings_calib ) + return( settings ) } @@ -226,7 +202,7 @@ cost_rmse_kphio <- function( par, ddf_obs, df_drivers, inverse = FALSE ){ vpdstress_par_b = 0.2, vpdstress_par_m = 5 ) - + df <- runread_sofun_f( df_drivers, params_modl = params_modl, @@ -430,194 +406,4 @@ cost_mae <- function( par ){ return(cost) } -#' Get observational data for calibration -#' -#' Gets observational data for model calibration by looping over sites -#' -#' @param settings_calib A list containing model calibration settings. See vignette_rsofun.pdf for more information and examples. -#' @param settings_sims A list containing model simulation settings from \code{\link{prepare_setup_sofun}}. See vignette_rsofun.pdf for more information and examples. -#' @param settings_input A list containing model input settings. See vignette_rsofun.pdf for more information and examples. -#' -#' @return A data frame (tibble) containing observational data used for model calibration -#' @export -#' -#' @examples ddf_obs <- get_obs_calib( settings_calib, settings_sims, settings_input ) -#' -##------------------------------------------------------------ -## Gets data by looping over sites -##------------------------------------------------------------ -get_obs_calib <- function( settings_calib, settings_sims, settings_input ){ - - - ##------------------------------------------------------------ - ## Read raw observational data from files. - ## This creates a data frame (tibble) that contains - ## a column 'date' and columns for each target variable. The number of rows - ## corresponds to each simulation's length (number of days). - ##------------------------------------------------------------ - ## loop over sites to get data frame with all variables - list_bysite <- lapply( settings_calib$sitenames, - function(x) get_obs_bysite(x, - settings_calib = settings_calib, - settings_sims = dplyr::filter(settings_sims, sitename == x), - settings_input = settings_input) %>% - mutate( sitename=x ) - ) - - ## combine dataframes from multiple sites along rows - ddf_obs <- dplyr::bind_rows( list_bysite ) - rm("list_bysite") - - return( ddf_obs ) - -} - - -##------------------------------------------------------------ -## Function returns a data frame (tibble) containing all the observational data -## used as target variables for calibration for a given site, covering specified dates. -##------------------------------------------------------------ -get_obs_bysite <- function( mysitename, settings_calib, settings_sims, settings_input ){ - - print(paste("getting obs for ", mysitename)) - - ## Initialise daily dataframe (WITHOUT LEAP YEARS, SOFUN USES FIXED 365-DAYS YEARS!) - ddf <- init_dates_dataframe( year(settings_sims$date_start), year(settings_sims$date_end), noleap = TRUE ) - - if ("gpp" %in% settings_calib$targetvars){ - - ## Interpret benchmarking data specification - datasource <- stringr::str_split( settings_calib$datasource, "_" ) %>% unlist() - - if ("fluxnet2015" %in% datasource){ - ##------------------------------------------------------------ - ## Get FLUXNET 2015 data - ##------------------------------------------------------------ - ## Make sure data is available for this site - getvars <- c("GPP_NT_VUT_REF", "GPP_DT_VUT_REF", "NEE_VUT_REF_NIGHT_QC", "NEE_VUT_REF_DAY_QC", "GPP_NT_VUT_SE", "GPP_DT_VUT_SE") - - error <- check_download_fluxnet2015( settings_calib$path_fluxnet2015, mysitename ) - - ddf <- get_obs_bysite_fluxnet2015( - mysitename, - path_fluxnet2015 = settings_input$path_fluxnet2015, - timescale = settings_calib$timescale$gpp, - getvars = getvars, - getswc = FALSE, - threshold_GPP = settings_calib$threshold_GPP, - remove_neg = FALSE, - verbose = TRUE - ) %>% - dplyr::mutate( gpp_obs = case_when("NT" %in% datasource & !("DT" %in% datasource) ~ GPP_NT_VUT_REF, - "DT" %in% datasource & !("NT" %in% datasource) ~ GPP_DT_VUT_REF - # "DT" %in% datasource & "NT" %in% datasource ~ (GPP_NT_VUT_REF+GPP_DT_VUT_REF)/2 - ), - gpp_unc = case_when("NT" %in% datasource & !("DT" %in% datasource) ~ GPP_NT_VUT_SE, - "DT" %in% datasource & !("NT" %in% datasource) ~ GPP_DT_VUT_SE )) %>% - dplyr::right_join( ddf, by = "date" ) %>% - dplyr::select(date, gpp_obs, gpp_unc) - - # ## replace observations with NA if uncertainty is very small -- only necessary when using chi-squared calibration - # targetvars_unc <- paste0(settings_calib$targetvars, "_unc") - # replace_zero_with_na <- function(obs, unc){ - # idxs <- which( unc < 0.0001 ) - # obs[idxs] <- NA - # return(obs) - # } - # ddf <- ddf %>% - # dplyr::mutate( gpp_obs = replace_zero_with_na(gpp_obs, gpp_unc) ) - # - - # test <- sum(!is.na(ddf$gpp_obs)) - # if (test<3) rlang::abort(paste0("Too hard filtering for site ", mysitename)) - - } else { - - ddf <- ddf %>% dplyr::mutate( gpp_obs = NA, gpp_unc = NA ) - - } - - if ("Ty" %in% datasource){ - ##------------------------------------------------------------ - ## Get GePiSaT data - ##------------------------------------------------------------ - ## Make sure data is available for this site - error <- check_download_gepisat( settings_calib$path_gepisat, mysitename ) - - ddf_gepisat <- get_obs_bysite_gpp_gepisat( mysitename, settings_calib$path_gepisat, settings_calib$timescale ) - - ## add to other data frame and take take weighted average for updated 'gpp_obs' - if (!is.null(ddf_gepisat)){ - - ddf <- ddf_gepisat %>% - ## Some GPP data looks weird when its error in resp. day is zero. Exclude this data. - mutate( gpp_obs = ifelse( gpp_err_obs == 0.0, NA, gpp_obs ) ) %>% - dplyr::rename( gpp_obs_gepisat = gpp_obs ) %>% - right_join( ddf, by = "date" ) - totlen <- length(datasource[ -which( datasource=="fluxnet2015" ) ]) - if (totlen>1){ - ddf$gpp_obs <- sum( c(ddf$gpp_obs * (totlen-1), ddf$gpp_obs_gepisat), na.rm = TRUE ) / totlen - } else { - ddf <- ddf %>% mutate( gpp_obs = gpp_obs_gepisat ) - } - - } else { - ## No GePiSaT data available for this site. Consider all GPP data missing (NA). - ddf <- ddf %>% mutate( gpp_obs = NA ) - } - - } - - } - - if ("latenth" %in% settings_calib$targetvars){ - - ## Interpret benchmarking data specification - datasource <- stringr::str_split( settings_calib$datasource, "_" ) %>% unlist() - - if ("fluxnet2015" %in% datasource){ - ##------------------------------------------------------------ - ## Get FLUXNET 2015 data - ##------------------------------------------------------------ - ## Make sure data is available for this site - error <- check_download_fluxnet2015( settings_calib$path_fluxnet2015, mysitename ) - - ddf <- get_obs_bysite_fluxnet2015( - mysitename, - path_fluxnet2015 = settings_input$path_fluxnet2015, - timescale = settings_calib$timescale$latenth, - getvars = c("LE_F_MDS", "LE_RANDUNC", "LE_F_MDS_QC"), - getswc = FALSE, - threshold_LE = 0.6, - remove_neg = FALSE, - verbose = TRUE - ) %>% - dplyr::right_join( ddf, by = "date" ) %>% - dplyr::rename(latenth_obs = latenth) - - } else { - - ddf <- ddf %>% dplyr::mutate( latenth_obs = NA, latenth_unc = NA ) - - } - - } - - if ("wcont" %in% settings_calib$targetvars){ - - if ("fluxnet2015" %in% settings_calib$datasource$wcont){ - - ## Make sure data is available for this site - error <- check_download_fluxnet2015( settings_calib$path_fluxnet2015, mysitename ) - - ddf <- get_obs_bysite_wcont_fluxnet2015( mysitename, settings_calib$path_fluxnet2015, settings_calib$timescale ) %>% - dplyr::right_join( ddf, by = "date" ) - - } - - } - - return(ddf) - -} diff --git a/R/collect_drivers_sofun.R b/R/collect_drivers_sofun.R index a88820d6..8b31d342 100644 --- a/R/collect_drivers_sofun.R +++ b/R/collect_drivers_sofun.R @@ -2,9 +2,10 @@ #' #' Collect all drivers for site-level simulations into a nested data frame with one row for each site #' -#' @param settings A named list containing the simulation settings (see vignette_rsofun.pdf for more information and examples). +#' @param siteinfo A data frame containing site meta info (rows for sites). Required columns are: \code{"sitename", "date_start", "date_end", "lon", "lat", "elv"}. #' @param meteo A data nested data frame with rows for each site and meteo forcing data time series nested inside a column named \code{"data"} #' @param fapar A data nested data frame with rows for each site and fAPAR forcing data time series nested inside a column named \code{"data"} +#' @param co2 A data nested data frame with rows for each site and CO2 forcing data time series nested inside a column named \code{"data"} #' @param df_soiltexture #' #' @return @@ -12,11 +13,11 @@ #' #' @examples mod <- collect_drivers_sofun( settings = settings, forcing, df_soiltexture ) #' -collect_drivers_sofun <- function( settings, meteo, fapar, df_soiltexture ){ +collect_drivers_sofun <- function( siteinfo, meteo, fapar, co2, df_soiltexture ){ ## create mega-df containing all forcing data and parameters that vary by site (not model parameters!) - names_metainfo <- names(settings)[-which(names(settings) %in% c("sitename", "params_siml"))] - df_mega <- settings %>% + names_metainfo <- names(siteinfo)[-which(names(siteinfo) %in% c("sitename", "params_siml"))] + df_mega <- siteinfo %>% tidyr::nest(siteinfo = names_metainfo) %>% left_join( meteo %>% @@ -28,9 +29,36 @@ collect_drivers_sofun <- function( settings, meteo, fapar, df_soiltexture ){ rename(fapar = data), by = "sitename" ) %>% + left_join( + co2 %>% + rename(co2 = data), + by = "sitename" + ) %>% mutate(df_soiltexture = purrr::map(as.list(seq(nrow(.))), ~return(df_soiltexture))) + ## use only interpolated fapar and combine meteo data and fapar into a single nested column 'forcing' + df_mega <- df_mega %>% + mutate(fapar = purrr::map(fapar, ~dplyr::select(., date, fapar = modisvar_interpol))) %>% + mutate(co2 = purrr::map(co2 , ~dplyr::select(., date, co2))) %>% + mutate(forcing = purrr::map2(meteo, fapar, ~left_join( .x, .y, by = "date"))) %>% + mutate(forcing = purrr::map2(forcing, co2, ~left_join( .x, .y, by = "date"))) %>% + dplyr::select(-meteo, -fapar, -co2) + + ## interpolate to fill gaps in forcing time series + myapprox <- function(vec){ + approx(vec, xout = 1:length(vec))$y + } + fill_na_forcing <- function(df){ + vars <- names(df)[-which(names(df)=="date")] + df %>% + mutate_at(vars, myapprox) + } + df_mega <- df_mega %>% + mutate(forcing = purrr::map(forcing, ~fill_na_forcing(.))) + return(df_mega) } + + diff --git a/R/collect_obs_eval.R b/R/collect_obs_eval.R new file mode 100644 index 00000000..a3c4cfc7 --- /dev/null +++ b/R/collect_obs_eval.R @@ -0,0 +1,80 @@ +#' Get observational data for model evaluation +#' +#' Gets observational data for model evaluation, given the benchmarking variable, and data source used +#' for the evaluation. This information is specified in the evaluation settings (argument \code{settings}). +#' +#' @param siteinfo A data frame containing site meta info (rows for sites). Required columns are: \code{"sitename", "date_start", "date_end", "lon", "lat", "elv"}. +#' @param settings A list specifying evaluation settings (see vignette eval_sofun.pdf for more information and examples) +#' @param adf A nested data frame with rows for sites and time series (annual) for each site nested inside the column \code{data}. +#' @param mdf A nested data frame with rows for sites and time series (monthly) for each site nested inside the column \code{data}. +#' @param ddf A nested data frame with rows for sites and time series (daily) for each site nested inside the column \code{data}. +#' @param hrdf A nested data frame with rows for sites and time series (hourly) for each site nested inside the column \code{data}. +#' @param hhdf A nested data frame with rows for sites and time series (half-hourly) for each site nested inside the column \code{data}. +#' +#' @return A list containing nested data frames of observed values aggregated to several temporal scales (ddf for daily, xdf for X-daily +#' (determined by argument \code{agg}), mdf for monthly, adf for annual). +#' @export +#' +#' @examples \dontrun{obs <- collect_obs_eval( settings = NULL, adf = NULL, mdf = NULL, ddf = NULL, hrdf = NULL, hhdf = NULL, agg = 8 )} +#' +collect_obs_eval <- function( siteinfo, settings, adf = NULL, mdf = NULL, ddf = NULL, hrdf = NULL, hhdf = NULL ){ + + if (is.null(adf)){ + rlang::abort("collect_obs_eval(): object provided by argument 'adf' is missing.") + } + + if (is.null(ddf)){ + rlang::abort("collect_obs_eval(): object provided by argument 'ddf' is missing.") + } + + if (is.null(mdf)){ + rlang::abort("collect_obs_eval(): object provided by argument 'mdf' is missing.") + } + + ##------------------------------------------------------------ + ## Aggregate daily data to multi-day periods + ## periods should start with the 1st of January each year, otherwise can't compute mean seasonal cycle + ##------------------------------------------------------------ + ## Generate vector of starting dates of X-day periods, making sure the 1st of Jan is always the start of a new period + calc_xdf <- function(ddf, agg){ + + firstyear <- ddf %>% mutate(year = lubridate::year(date)) %>% pull(year) %>% min() + lastyear <- ddf %>% mutate(year = lubridate::year(date)) %>% pull(year) %>% max() + listyears <- seq( ymd(paste0(firstyear, "-01-01")), ymd(paste0(lastyear, "-01-01")), by = "year" ) + breaks <- purrr::map( as.list(listyears), ~seq( from=., by=paste0( agg, " days"), length.out = ceiling(365 / agg)) ) %>% Reduce(c,.) + + ## take mean across periods + evalvars <- names(settings$benchmark) + ddf %>% + mutate( inbin = cut( date, breaks = breaks, right = FALSE ) ) %>% + tidyr::drop_na(inbin) %>% + group_by( inbin ) %>% + summarise_at( vars(one_of(evalvars)), mean, na.rm=TRUE) + + } + xdf <- ddf %>% + mutate(data = purrr::map(data, ~calc_xdf(., agg = settings$agg))) + + ## complement info required for evaluation + ddf <- siteinfo %>% + dplyr::select(sitename, lon, lat, elv, classid, c4, whc, koeppen_code, igbp_land_use, plant_functional_type) %>% + right_join(ddf, by = "sitename") + + xdf <- siteinfo %>% + dplyr::select(sitename, lon, lat, elv, classid, c4, whc, koeppen_code, igbp_land_use, plant_functional_type) %>% + right_join(xdf, by = "sitename") + + mdf <- siteinfo %>% + dplyr::select(sitename, lon, lat, elv, classid, c4, whc, koeppen_code, igbp_land_use, plant_functional_type) %>% + right_join(mdf, by = "sitename") + + adf <- siteinfo %>% + dplyr::select(sitename, lon, lat, elv, classid, c4, whc, koeppen_code, igbp_land_use, plant_functional_type) %>% + right_join(adf, by = "sitename") + + ## get breaks + breaks <- xdf %>% tidyr::unnest(data) %>% pull(inbin) %>% as.character() %>% lubridate::ymd() + + return( list( ddf = ddf, xdf = xdf, mdf = mdf, adf = adf, breaks = breaks ) ) + +} diff --git a/R/eval_sofun.R b/R/eval_sofun.R index 355475fa..40e81f8b 100644 --- a/R/eval_sofun.R +++ b/R/eval_sofun.R @@ -25,11 +25,22 @@ #' eval_sofun <- function(mod, settings_eval, settings_sims, obs_eval = NA, overwrite = TRUE, doplot = FALSE, light = FALSE){ + ## make model output a long flat table + mod <- mod %>% + dplyr::rename(id = sitename) %>% + tidyr::unnest(out_sofun) + ## Evaluate daily variables - out <- lapply( as.list(names(settings_eval$benchmark)), - function(x) eval_sofun_byvar(x, dplyr::select(mod, sitename, date, mod = eval(x)), settings_eval, settings_sims, obs_eval = obs_eval, overwrite = TRUE, doplot = FALSE, light = light) - ) %>% - setNames(names(settings_eval$benchmark)) + out <- purrr::map( + as.list(names(settings_eval$benchmark)), + ~eval_sofun_byvar(., dplyr::select(mod, sitename, date, mod = {{.}}), settings_eval, settings_sims, obs_eval = obs_eval, overwrite = TRUE, doplot = FALSE, light = light) + ) %>% + setNames(names(settings_eval$benchmark)) + + # out <- lapply( as.list(names(settings_eval$benchmark)), + # function(x) eval_sofun_byvar(x, dplyr::select(mod, sitename, date, mod = eval(x)), settings_eval, settings_sims, obs_eval = obs_eval, overwrite = TRUE, doplot = FALSE, light = light) + # ) %>% + # setNames(names(settings_eval$benchmark)) return(out) } @@ -82,7 +93,7 @@ eval_sofun_byvar <- function(varnam, ddf_mod, settings_eval, settings_sims, obs_ ##------------------------------------------------------------ ## Get observations for evaluation ##------------------------------------------------------------ - if (identical(obs_eval, NA)) obs_eval <- get_obs_eval( settings_eval = settings_eval, settings_sims = settings_sims, overwrite = overwrite ) + if (identical(obs_eval, NA)) rlang::abort("eval_sofun_byvar(): Object provided by argument 'eval_sofun_byvar' could not be identified.") ## detach if (varnam=="aet"){ @@ -90,10 +101,10 @@ eval_sofun_byvar <- function(varnam, ddf_mod, settings_eval, settings_sims, obs_ } else { varnam_obs <- varnam } - adf <- obs_eval$adf %>% dplyr::select(sitename, date, obs = {{varnam_obs}} ) # eval(varnam_obs)) - mdf <- obs_eval$mdf %>% dplyr::select(sitename, date, obs = {{varnam_obs}}) - ddf <- obs_eval$ddf %>% dplyr::select(sitename, date, obs = {{varnam_obs}}) - xdf <- obs_eval$xdf %>% dplyr::select(sitename, inbin, obs = {{varnam_obs}}) + adf <- obs_eval$adf %>% tidyr::unnest(data) %>% dplyr::select(sitename, date, obs = {{varnam_obs}}) + mdf <- obs_eval$mdf %>% tidyr::unnest(data) %>% dplyr::select(sitename, date, obs = {{varnam_obs}}) + ddf <- obs_eval$ddf %>% tidyr::unnest(data) %>% dplyr::select(sitename, date, obs = {{varnam_obs}}, lat, koeppen_code) + xdf <- obs_eval$xdf %>% tidyr::unnest(data) %>% dplyr::select(sitename, inbin, obs = {{varnam_obs}}) # obs_eval$adf <- NULL # obs_eval$mdf <- NULL @@ -130,7 +141,7 @@ eval_sofun_byvar <- function(varnam, ddf_mod, settings_eval, settings_sims, obs_ ## mean across multi-day period xdf <- ddf_mod %>% # mutate( year = year(date), week = week(date) ) %>% - mutate( year = year(date), inbin = cut( date, breaks = obs_eval$breaks_xdf, right = FALSE ) ) %>% + mutate( year = year(date), inbin = cut( date, breaks = obs_eval$breaks, right = FALSE ) ) %>% tidyr::drop_na() %>% group_by( sitename, inbin ) %>% summarise( mod_mean = mean( mod, na.rm = TRUE ), mod_min = min( mod, na.rm = TRUE ), mod_max = max( mod, na.rm = TRUE ), n_mod = sum(!is.na(mod)) ) %>% @@ -305,7 +316,7 @@ eval_sofun_byvar <- function(varnam, ddf_mod, settings_eval, settings_sims, obs_ if (!light){ print("Evaluate mean seasonal cycle by climate zones...") meandoydf_byclim <- ddf %>% mutate( doy = yday(date) ) %>% - left_join( dplyr::select( metainfo_Tier1_sites_kgclimate_fluxnet2015, sitename, lat, koeppen_code ), by = "sitename" ) %>% # 'metainfo_Tier1_sites_kgclimate_fluxnet2015' is lazy-loaded with library(rsofun) + # left_join( dplyr::select( metainfo_Tier1_sites_kgclimate_fluxnet2015, sitename, lat, koeppen_code ), by = "sitename" ) %>% # 'metainfo_Tier1_sites_kgclimate_fluxnet2015' is lazy-loaded with library(rsofun) mutate( hemisphere = ifelse( lat>0, "north", "south" ) ) %>% # mutate( bias = mod - obs ) %>% dplyr::select( -lat ) %>% diff --git a/R/get_obs_bysite_fluxnet2015.R b/R/get_obs_bysite_fluxnet2015.R deleted file mode 100644 index afcf717b..00000000 --- a/R/get_obs_bysite_fluxnet2015.R +++ /dev/null @@ -1,817 +0,0 @@ -#' Get FLUXNET 2015 observational data for one site. -#' -#' Function for reading observational GPP data from FLUXNET 2015 dataset -#' and defining calibration target (which flux decomposition method etc.) -#' -#' @param sitename A character string specifying the site name for which -#' FLUXNET 2015 data is searched (based on the site name appearing as part -#' of the respective file name). Defaults to NA. -#' @param path_fluxnet2015 A character string specifying the local path of -#' FLUXNET 2015 data. -#' @param path_fluxnet2015_hh A character string specifying the local path of -#' half-hourly FLUXNET 2015 data, required to get daytime VPD. Defaults to -#' \code{NULL} (no daytime VPD is calculated). -#' @param timescale A character specifying the time scale of FLUXNET 2015 -#' data. Any of \code{c("d", "w", "m", "y")} for daily, weekly, monthly, -#' or yearly, respectively. -#' @param getvars A vector of character strings corresponding to the -#' FLUXNET 2015 variable names as used in the original data files. See -#' \url{https://fluxnet.fluxdata.org/data/fluxnet2015-dataset/}. If argument -#' \code{getswc==TRUE}, then soil water content data (variables starting with -#' \code{"SWC_}) are read. -#' @param getswc If \code{getswc==TRUE}, then all soil water content data -#' (variables starting with \code{"SWC_}) are read. Defaults to \code{TRUE}. -#' @param threshold_GPP A numeric value (between 0 and 1 for daily, weekly, -#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data). The value specifies the -#' threshold for excluding data during data cleaning. The threshold is -#' with respect to the data quality flag in the FLUXNET 2015 data, indicating -#' the fraction of measured and good quality gapfilled data for daily, weekly, -#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} -#' meaning no data is excluded. -#' @param threshold_LE A numeric value (between 0 and 1 for daily, weekly, -#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data). The value specifies the -#' threshold for excluding data during data cleaning. The threshold is -#' with respect to the data quality flag in the FLUXNET 2015 data, indicating -#' the fraction of measured and good quality gapfilled data for daily, weekly, -#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} -#' meaning no data is excluded. -#' @param threshold_H A numeric value (between 0 and 1 for daily, weekly, -#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data). The value specifies the -#' threshold for excluding data during data cleaning. The threshold is -#' with respect to the data quality flag in the FLUXNET 2015 data, indicating -#' the fraction of measured and good quality gapfilled data for daily, weekly, -#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} -#' meaning no data is excluded. -#' @param threshold_SWC A numeric value (between 0 and 1 for daily, weekly, -#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data). The value specifies the -#' threshold for excluding data during data cleaning. The threshold is -#' with respect to the data quality flag in the FLUXNET 2015 data, indicating -#' the fraction of measured and good quality gapfilled data for daily, weekly, -#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} -#' meaning no data is excluded. -#' @param threshold_WS A numeric value (between 0 and 1 for daily, weekly, -#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data). The value specifies the -#' threshold for excluding data during data cleaning. The threshold is -#' with respect to the data quality flag in the FLUXNET 2015 data, indicating -#' the fraction of measured and good quality gapfilled data for daily, weekly, -#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} -#' meaning no data is excluded. -#' @param threshold_USTAR A numeric value (between 0 and 1 for daily, weekly, -#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data). The value specifies the -#' threshold for excluding data during data cleaning. The threshold is -#' with respect to the data quality flag in the FLUXNET 2015 data, indicating -#' the fraction of measured and good quality gapfilled data for daily, weekly, -#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} -#' meaning no data is excluded. -#' @param threshold_T A numeric value (between 0 and 1 for daily, weekly, -#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data). The value specifies the -#' threshold for excluding data during data cleaning. The threshold is -#' with respect to the data quality flag in the FLUXNET 2015 data, indicating -#' the fraction of measured and good quality gapfilled data for daily, weekly, -#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} -#' meaning no data is excluded. -#' @param threshold_NETRAD A numeric value (between 0 and 1 for daily, weekly, -#' monthly, and annual data or 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data). The value specifies the -#' threshold for excluding data during data cleaning. The threshold is -#' with respect to the data quality flag in the FLUXNET 2015 data, indicating -#' the fraction of measured and good quality gapfilled data for daily, weekly, -#' monthly, and annual data and 0 [measured], 1 [good quality gapfill], 2 [ -#' medium], 3 [poor] for half-hourly data. Defaults to \code{threshold_GPP=0} -#' meaning no data is excluded. -#' @param return_qc A logical specifying whether quality control variables -#' should be returned. -#' @param remove_neg A logical specifying whether negative GPP values are to -#' be removed (replaces with NA). -#' -#' @return A data frame (tibble) containing cleaned observational data, -#' named and in units corresponding to rsofun standard. -#' @export -#' -#' @examples df <- get_obs_bysite_fluxnet2015 -#' -get_obs_bysite_fluxnet2015 <- function( sitename, path_fluxnet2015, path_fluxnet2015_hh=NULL, - timescale, getvars, getswc=TRUE, - threshold_GPP=0.0, threshold_LE=0.0, threshold_H=0.0, threshold_SWC=0.0, - threshold_WS=0.0, threshold_USTAR=0.0, threshold_T=0.0, threshold_NETRAD=0.0, return_qc=FALSE, - remove_neg = FALSE, verbose=TRUE ){ - - if (verbose) print(paste("Getting FLUXNET data for", sitename, "...")) - - ## Take only file for this site - if (timescale=="d"){ - ## Daily - filn <- list.files( path_fluxnet2015, - pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_DD.*.csv" ), - recursive = TRUE - ) - } else if (timescale=="w"){ - ## Weekly - filn <- list.files( path_fluxnet2015, - pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_WW.*.csv" ), - recursive = TRUE - ) - } else if (timescale=="m"){ - ## Monthly - filn <- list.files( path_fluxnet2015, - pattern = paste0("FLX_", sitename, ".*_FLUXNET2015_FULLSET_MM.*.csv"), - recursive = TRUE - ) - } else if (timescale=="y"){ - ## Annual - filn <- list.files( path_fluxnet2015, - pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_YY.*.csv" ), - recursive = TRUE - ) - } else if (timescale=="hh"){ - ## half-hourly - filn <- list.files( path_fluxnet2015, - pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_HH.*.csv" ), - recursive = TRUE - ) - } - - # ## Use also quality flag data for each variable in 'getvars' - # obsvars <- tibble( getvars = getvars ) %>% - # dplyr::filter(!(stringr::str_detect(., "UNC"))) %>% - # dplyr::pull(getvars) - # uncvars <- tibble( getvars = getvars ) %>% - # dplyr::filter(stringr::str_detect(., "UNC")) %>% - # dplyr::pull(getvars) - # getvars <- c(obsvars, paste0(obsvars, "_QC"), uncvars) - - if (length(filn)==0) rlang::abort(paste0("No files found for timescale ", timescale, " in sub-directories of ", path_fluxnet2015 ) ) - if (length(filn)>1){ - file.info_getsize <- function(filn){ - file.info(filn)$size - } - rlang::warn("Reading only largest daily file available") - path_dd <- paste0(path_fluxnet2015, filn) - size_vec <- purrr::map_dbl(as.list(path_dd), ~file.info_getsize(.)) - path_dd <- path_dd[which.max(size_vec)] - filn <- basename(path_dd) - } - - - # if (length(filn)>1){ - # filn <- filn[which(grepl("3.csv", filn))] - # # rlang::warn(paste0("Multiple files found for timsescale ", timescale, " in sub-directories of ", path_fluxnet2015, ". Taking only ", filn ) ) - # } - - ## This returns a data frame with columns (date, temp, prec, nrad, ppfd, vpd, ccov) - df <- get_obs_fluxnet2015_raw( sitename, - path = paste0(path_fluxnet2015, filn), - freq = timescale - ) - - ## For some sites, the NETRAD column is missing. - if ("NETRAD" %in% getvars && !("NETRAD" %in% names(df))){ - df <- df %>% mutate(NETRAD = NA, NETRAD_QC = 0.0) - } - - ## Get daytime VPD separately - merge_df_vpd_day_dd <- FALSE - if ("VPD_F_DAY" %in% getvars && !(timescale == "hh")){ - - if (is.null(path_fluxnet2015_hh)){ - - rlang::warn("Argument path_fluxnet2015_hh is not provided. Daytime VPD could not be calculted.") - - } else { - - ## get half-hourly file name(s) - filn_hh <- list.files( path_fluxnet2015_hh, - pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_HH.*.csv" ), - recursive = TRUE - ) - if (length(filn_hh)>0){ - use_hh <- TRUE - } else { - ## get hourly file name(s) - filn_hh <- list.files( path_fluxnet2015_hh, - pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_HR.*.csv" ), - recursive = TRUE - ) - use_hh <- FALSE - - } - - ## get file name(s) of file containing daily daytime VPD derived from half-hourly data - filename_dd_vpd <- list.files( path_fluxnet2015_hh, - pattern = paste0("FLX_", sitename, ".*_VPD_DAY.csv"), - recursive = FALSE) - - # filename_dd_vpd <- filn_hh %>% - # stringr::str_replace("HH", "DD") %>% - # stringr::str_replace(".csv", "_VPD_DAY.csv") - - if (length(filename_dd_vpd)>0){ - - if (length(filename_dd_vpd)>1){ - file.info_getsize <- function(filn){ - file.info(filn)$size - } - rlang::warn("Reading only largest daily VPD file available") - path_dd_vpd <- paste0(path_fluxnet2015_hh, filename_dd_vpd) - size_vec <- purrr::map_dbl(as.list(path_dd_vpd), ~file.info_getsize(.)) - path_dd_vpd <- path_dd_vpd[which.max(size_vec)] - filename_dd_vpd <- basename(path_dd_vpd) - } - - ## read directly - if (verbose) print(paste("Reading daytime VPD directly from:", paste0(path_fluxnet2015_hh, filename_dd_vpd))) - df_vpd_day_dd <- readr::read_csv(paste0(path_fluxnet2015_hh, filename_dd_vpd)) - merge_df_vpd_day_dd <- TRUE - - } else { - - if (length(filn_hh)>0){ - - path_hh <- paste0(path_fluxnet2015_hh, filn_hh) - - if (length(filn_hh)>1){ - file.info_getsize <- function(filn){ - file.info(filn)$size - } - rlang::warn("Reading only largest half-hourly file available") - size_vec <- purrr::map_dbl(as.list(path_hh), ~file.info_getsize(.)) - path_hh <- path_hh[which.max(size_vec)] - } - - df_vpd_day_dd <- get_vpd_day_fluxnet2015_byfile(path_hh, write = TRUE) - merge_df_vpd_day_dd <- TRUE - - } else { - - if (length(filn_hh)==0) rlang::abort("No half-hourly files found (required to calculate VPD_DAY).") - - # if (verbose) rlang::warn("No half-hourly data available. Cannot get vpd_day for this site.") - # getvars <- getvars[-which(str_detect(getvars, "VPD_F_DAY"))] - # # getvars <- c(getvars, "VPD_F", "VPD_F_QC") - - } - - } - - if (merge_df_vpd_day_dd){ - - if (timescale=="d"){ - - # daily - df <- df %>% dplyr::left_join(df_vpd_day_dd, by="date") - - } else if (timescale=="w"){ - - # weekly - df <- df_vpd_day_dd %>% - dplyr::mutate(year = lubridate::year(date), - week = lubridate::week(date)) %>% - dplyr::group_by(sitename, year, week) %>% - dplyr::summarise(VPD_F_DAY = mean(VPD_F, na.rm=TRUE), - VPD_F_DAY_QC = mean(VPD_F_QC, na.rm=TRUE), - VPD_F_DAY_MDS = mean(VPD_F_MDS, na.rm=TRUE), - VPD_F_DAY_MDS_QC = mean(VPD_F_MDS_QC, na.rm=TRUE), - VPD_DAY_ERA = mean(VPD_ERA, na.rm=TRUE) ) %>% - dplyr::right_join(df, by="date") - - } else if (timescale=="m"){ - - # monthly - df <- df_vpd_day_dd %>% - dplyr::mutate(year = lubridate::year(date), - moy = lubridate::month(date)) %>% - dplyr::group_by(sitename, year, moy) %>% - dplyr::summarise(VPD_F_DAY = mean(VPD_F, na.rm=TRUE), - VPD_F_DAY_QC = mean(VPD_F_QC, na.rm=TRUE), - VPD_F_DAY_MDS = mean(VPD_F_MDS, na.rm=TRUE), - VPD_F_DAY_MDS_QC = mean(VPD_F_MDS_QC, na.rm=TRUE), - VPD_DAY_ERA = mean(VPD_ERA, na.rm=TRUE) ) %>% - dplyr::right_join(df, by="date") - - } else if (timescale=="y"){ - - # annual - df <- df_vpd_day_dd %>% - dplyr::mutate(year = lubridate::year(date)) %>% - dplyr::group_by(sitename, year) %>% - dplyr::summarise(VPD_F_DAY = mean(VPD_F, na.rm=TRUE), - VPD_F_DAY_QC = mean(VPD_F_QC, na.rm=TRUE), - VPD_F_DAY_MDS = mean(VPD_F_MDS, na.rm=TRUE), - VPD_F_DAY_MDS_QC = mean(VPD_F_MDS_QC, na.rm=TRUE), - VPD_DAY_ERA = mean(VPD_ERA, na.rm=TRUE) ) %>% - dplyr::right_join(df, by="date") - - } - - } - - } - - } - - ## retain all getvars, plus soil moisture if required - if (getswc){ - df <- df %>% - dplyr::select( ., date, one_of(getvars), starts_with("SWC_") ) - # dplyr::mutate_at(., vars(starts_with("SWC_")), list(~as.numeric) ) # this has caused error - } else { - df <- df %>% dplyr::select( ., date, one_of(getvars) ) - } - - # ## convert to numeric (weirdly isn't always) and subset (select) - # df <- df %>% - # dplyr::mutate_at( vars(one_of(getvars)), list(~as.numeric)) - - ## Filter - ## air temperature - TA_vars <- getvars[which(grepl("TA_", getvars))] - TA_vars <- TA_vars[-which(grepl("_QC", TA_vars))] - for (ivar in TA_vars){ - df <- df %>% clean_fluxnet_byvar(ivar, threshold_T) - } - - ## wind speed - WS_vars <- getvars[which(grepl("WS_", getvars))] - WS_vars <- WS_vars[-which(grepl("_QC", WS_vars))] - for (ivar in WS_vars){ - df <- df %>% clean_fluxnet_byvar(ivar, threshold_WS) - } - - ## u-star - USTAR_vars <- getvars[which(grepl("USTAR_", getvars))] - USTAR_vars <- USTAR_vars[-which(grepl("_QC", USTAR_vars))] - for (ivar in USTAR_vars){ - df <- df %>% clean_fluxnet_byvar(ivar, threshold_USTAR) - } - - ## net radiation - NETRAD_vars <- getvars[which(grepl("NETRAD", getvars))] - NETRAD_vars <- NETRAD_vars[-which(grepl("_QC", NETRAD_vars))] - for (ivar in NETRAD_vars){ - df <- df %>% clean_fluxnet_byvar(ivar, threshold_NETRAD) - } - - energyvars <- df %>% - dplyr::select(starts_with("LE_"), starts_with("H_")) %>% - dplyr::select(-ends_with("_QC")) %>% - names() - - df <- df %>% - ## Unit conversion for sensible and latent heat flux: W m-2 -> J m-2 d-1 - dplyr::mutate_at( vars(one_of(energyvars)), convert_energy_fluxnet2015) - - ## clean GPP data - if (any(grepl("GPP_", getvars))){ - - if (any( !(c("GPP_NT_VUT_REF", "GPP_DT_VUT_REF", "NEE_VUT_REF_NIGHT_QC", "NEE_VUT_REF_DAY_QC") %in% getvars) ) || - any(!(c("GPP_NT_VUT_REF", "GPP_DT_VUT_REF", "NEE_VUT_REF_NIGHT_QC", "NEE_VUT_REF_DAY_QC") %in% names(df)))){ - rlang::abort("Not all variables read from file that are needed for data cleaning.") - } - - # out_clean <- clean_fluxnet_gpp( - # df$GPP_NT_VUT_REF, - # df$GPP_DT_VUT_REF, - # df$NEE_VUT_REF_NIGHT_QC, - # df$NEE_VUT_REF_DAY_QC, - # threshold=threshold_GPP - # ) - # df$GPP_NT_VUT_REF <- out_clean$gpp_nt - # df$GPP_DT_VUT_REF <- out_clean$gpp_dt - - df <- df %>% - clean_fluxnet_gpp(threshold = threshold_GPP, remove_neg = remove_neg) - } - - ## clean energy data (sensible and latent heat flux) data - often has spuriously equal values - if (any(grepl("LE_", getvars))){ - if (any( !(c("LE_F_MDS", "LE_F_MDS_QC") %in% getvars) )) abort("Not all variables read from file that are needed for data cleaning.") - #df$LE_F_MDS_good <- clean_fluxnet_energy( df$LE_F_MDS, df$LE_F_MDS_QC, threshold_LE=0.0 ) - df$LE_F_MDS <- clean_fluxnet_energy( df$LE_F_MDS, df$LE_F_MDS_QC, threshold=threshold_LE ) - } - if (any(grepl("H_", getvars))){ - if (any( !(c("H_F_MDS", "H_F_MDS_QC") %in% getvars) )) abort("Not all variables read from file that are needed for data cleaning.") - df$H_F_MDS <- clean_fluxnet_energy( df$H_F_MDS, df$H_F_MDS_QC, threshold=threshold_H ) - } - - ## Soil moisture related stuff for daily data - if (timescale=="d" && getswc){ - tmp <- df %>% dplyr::select( starts_with("SWC") ) - if (ncol(tmp)>0){ - swcvars <- tmp %>% dplyr::select( -ends_with("QC") ) %>% names() - swcqcvars <- tmp %>% dplyr::select( ends_with("QC") ) %>% names() - - # map( as.list(seq(length(swcvars))), ~clean_fluxnet_swc( df[[ swcvars[.] ]], df[[ swcqcvars[.] ]]) ) - if (length(swcvars)>0){ - for (ivar in 1:length(swcvars)){ - df[[ swcvars[ivar] ]] <- clean_fluxnet_swc( df[[ swcvars[ivar] ]], df[[ swcqcvars[ivar] ]], frac_data_thresh = threshold_SWC ) - } - } - - df <- df %>% - ## Normalise mean observational soil moisture to within minimum (=0) and maximum (=1), and - dplyr::mutate_at( vars(one_of(swcvars)), list(~norm_to_max(.)) ) %>% - - ## get mean observational soil moisture across different depths (if available) - dplyr::mutate( soilm_obs_mean = apply( dplyr::select( ., one_of(swcvars) ), 1, FUN = mean, na.rm = TRUE ) ) %>% - dplyr::mutate( soilm_obs_mean = ifelse( is.nan(soilm_obs_mean), NA, soilm_obs_mean ) ) - if (verbose) rlang::warn("Converting: soilm_obs_mean = mean across different soil depths (SWC_F_MDS), with na.rm = TRUE" ) - - } - - } - - ## check if anything is missing - if (any(!(getvars %in% names(df)))){ - rlang::abort(paste("Not all getvars were found in file. Missing: ", getvars[which(!(getvars %in% names(df)))])) - } - - if (!return_qc){ - df <- df %>% dplyr::select(-ends_with("_QC")) - } - - ## conversion factor from SPLASH: flux to energy conversion, umol/J (Meek et al., 1984) - kfFEC <- 2.04 - - ## Make unit conversions and shorter names - outgetvars <- c() - - ## Rename variables - if ("TA_F_DAY" %in% getvars){ - if (verbose) rlang::warn("Renaming: temp_day = TA_F_DAY \n") - df <- df %>% dplyr::rename_at( vars(starts_with("TA_F_DAY")), list(~stringr::str_replace(., "TA_F_DAY", "temp_day")) ) - } - if ("TA_F" %in% getvars){ - if (verbose) rlang::warn("Renaming: temp = TA_F \n") - df <- df %>% dplyr::rename_at( vars(starts_with("TA_F")), list(~stringr::str_replace(., "TA_F", "temp")) ) - } - if ("P_F" %in% getvars){ - if (verbose) rlang::warn("Renaming: prec = P_F (given and required in mm) \n") - df <- df %>% dplyr::rename_at( vars(starts_with("P_F")), list(~stringr::str_replace(., "P_F", "prec")) ) - } - if ("WS_F" %in% getvars){ - if (verbose) rlang::warn("Renaming: wspeed = WS_F (given and required in m s-1) \n") - df <- df %>% dplyr::rename_at( vars(starts_with("WS_F")), list(~stringr::str_replace(., "WS_F", "wspeed")) ) - } - if ("USTAR" %in% getvars){ - if (verbose) rlang::warn("Renaming: ustar = USTAR (given and required in m s-1) \n") - df <- df %>% dplyr::rename_at( vars(starts_with("USTAR")), list(~stringr::str_replace(., "USTAR", "ustar")) ) - } - if ("LE_F_MDS" %in% getvars){ - if (verbose) rlang::warn("Renaming: latenth = LE_F_MDS \n") - df <- df %>% dplyr::rename_at( vars(starts_with("LE_F_MDS")), list(~stringr::str_replace(., "LE_F_MDS", "latenth")) ) - } - if ("LE_RANDUNC" %in% getvars){ - if (verbose) rlang::warn("Renaming: latenth_unc = LE_RANDUNC \n") - df <- df %>% dplyr::rename_at( vars(starts_with("LE_RANDUNC")), list(~stringr::str_replace(., "LE_RANDUNC", "latenth_unc")) ) - } - if ("H_F_MDS" %in% getvars){ - if (verbose) rlang::warn("Renaming: sensibleh = H_F_MDS \n") - df <- df %>% dplyr::rename_at( vars(starts_with("H_F_MDS")), list(~stringr::str_replace(., "H_F_MDS", "sensibleh" )) ) - } - if ("VPD_F_DAY" %in% getvars){ - if (verbose) rlang::warn("Renaming: vpd_day = VPD_F_DAY \n") - df <- df %>% dplyr::rename_at( vars(starts_with("VPD_F_DAY")), list(~stringr::str_replace(., "VPD_F_DAY", "vpd_day" )) ) - } - if ("VPD_F" %in% getvars){ - if (verbose) rlang::warn("Renaming: vpd = VPD_F \n") - df <- df %>% dplyr::rename_at( vars(starts_with("VPD_F")), list(~stringr::str_replace(., "VPD_F", "vpd" )) ) - } - if ("PA_F" %in% getvars){ - if (verbose) rlang::warn("Renaming: patm = PA_F \n") - df <- df %>% dplyr::rename_at( vars(starts_with("PA_F")), list(~stringr::str_replace(., "PA_F", "patm" )) ) - } - if ("SW_IN_F" %in% getvars){ - if (verbose) rlang::warn("Renaming: swin = SW_IN_F \n") - df <- df %>% dplyr::rename_at( vars(starts_with("SW_IN_F")), list(~stringr::str_replace(., "SW_IN_F", "swin" )) ) - } - if ("NETRAD" %in% getvars){ - if (verbose) rlang::warn("Renaming: netrad = NETRAD \n") - df <- df %>% dplyr::rename_at( vars(starts_with("NETRAD")), list(~stringr::str_replace(., "NETRAD", "netrad" )) ) - } - - ## Convert units - if ("VPD_F_DAY" %in% getvars){ - if (verbose) rlang::warn("Converting: vpd_day = vpd_day * 1e2 (given in hPa, required in Pa) \n") - df <- df %>% dplyr::mutate( vpd_day = vpd_day * 1e2 ) - } - if ("VPD_F" %in% getvars){ - if (verbose) rlang::warn("Converting: vpd = vpd * 1e2 (given in hPa, required in Pa) \n") - df <- df %>% dplyr::mutate( vpd = vpd * 1e2 ) - } - if ("PA_F" %in% getvars){ - if (verbose) rlang::warn("Converting: patm = patm * 1e3 (given in kPa, required in Pa) \n") - df <- df %>% dplyr::mutate( patm = patm * 1e3 ) - } - if ("SW_IN_F" %in% getvars){ - if (verbose) rlang::warn("Converting: swin = swin * 60 * 60 * 24 (given in W m-2, required in J m-2 d-1) \n") - df <- df %>% dplyr::mutate( swin = swin * 60 * 60 * 24 ) - if (verbose) rlang::warn("Converting: ppfd = swin * kfFEC * 1.0e-6 (convert from J/m2/d to mol/m2/d; kfFEC = 2.04 is the flux-to-energy conversion, micro-mol/J (Meek et al., 1984)) \n") - df <- df %>% dplyr::mutate( ppfd = swin * kfFEC * 1.0e-6 ) - } - if ("NETRAD" %in% getvars){ - if (verbose) rlang::warn("Converting: netrad = NETRAD * 60 * 60 * 24 (given in W m-2 (avg.), required in J m-2 (daily total)) \n") - df <- df %>% dplyr::mutate( netrad = netrad * 60 * 60 * 24 ) - } - - - # ## GPP: mean over DT and NT - # if ("GPP_NT_VUT_REF" %in% getvars){ - # if ("GPP_DT_VUT_REF" %in% getvars){ - # if (verbose) rlang::warn("Converting: gpp_obs = mean( GPP_NT_VUT_REF, GPP_DT_VUT_REF ) \n") - # df <- df %>% dplyr::mutate( gpp_obs = apply( dplyr::select( df, GPP_NT_VUT_REF, GPP_DT_VUT_REF ), 1, FUN = mean, na.rm = FALSE ) ) - # } else { - # if (verbose) rlang::warn("Converting: gpp_obs = GPP_NT_VUT_REF \n") - # df <- df %>% dplyr::rename( gpp_obs = GPP_NT_VUT_REF ) - # } - # } else { - # if ("GPP_DT_VUT_REF" %in% getvars){ - # if (verbose) rlang::warn("Converting: gpp_obs = GPP_DT_VUT_REF \n") - # df <- df %>% dplyr::rename( gpp_obs = GPP_DT_VUT_REF ) - # } - # } - - - # Crude fix for a crude problem: some FLUXNET2015 files end on Dec 30 in the last year available - # Duplicate last row - lastrow <- df %>% dplyr::slice(nrow(df)) - if (lubridate::month(lastrow$date)==12 && lubridate::mday(lastrow$date)==30){ - lastrow$date <- lastrow$date + lubridate::days(1) - df <- df %>% - bind_rows(lastrow) - } - - # Crude fix for a crude problem: some FLUXNET2015 files have NA in first row for VPD - if ("vdp_day" %in% getvars){ - if (is.na(df$vpd_day[1]) && !is.na(df$vpd_day[2])){ - df$vpd_day[1] <- df$vpd_day[2] - } - } - - return(df) - -} - -clean_fluxnet_byvar <- function(df, varnam, threshold){ - varnam_qc <- paste0(varnam, "_QC") - df[[varnam]][which(df[[varnam_qc]] < threshold)] <- NA - return(df) -} - -##---------------------------------------------------------------------- -## Function for reading observational GPP data from FLUXNET 2015 dataset -##---------------------------------------------------------------------- -get_obs_bysite_wcont_fluxnet2015 <- function( sitename, path_fluxnet2015, timescale ){ - - getvars <- "SWC" - - ## Take only file for this site - if (timescale=="d"){ - ## Daily - filn <- list.files( path_fluxnet2015, - pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_DD.*.csv" ), - recursive = TRUE - ) - } else if (timescale=="w"){ - ## Weekly - filn <- list.files( path_fluxnet2015, - pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_WW.*.csv" ), - recursive = TRUE - ) - } else if (timescale=="m"){ - ## Monthly - filn <- list.files( path_fluxnet2015, - pattern = paste0(..., collapse = NULL), - recursive = TRUE - ) - } else if (timescale=="y"){ - ## Annual - filn <- list.files( path_fluxnet2015, - pattern = paste0( "FLX_", sitename, ".*_FLUXNET2015_FULLSET_YY.*.csv" ), - recursive = TRUE - ) - } - - if (length(filn)==0) abort(paste0("No files found for timescale ", timescale, "in sub-directories of ", path_fluxnet2015 ) ) - - ## This returns a data frame with columns (date, temp, prec, nrad, ppfd, vpd, ccov) - ddf <- get_obs_fluxnet2015_raw( sitename, - path = paste0(path_fluxnet2015, filn), - freq = "d" - ) - - ## convert to numeric (weirdly isn't always) and subset (select) - if ( identical( getvars , "SWC" ) ){ - df <- df %>% dplyr::mutate_at( vars(starts_with(getvars)), list(name = ~as.numeric)) %>% - dplyr::select( date, starts_with(getvars) ) - } else { - df <- df %>% dplyr::mutate_at( vars(one_of(getvars)), list(name = ~as.numeric)) %>% - dplyr::select( date, one_of(getvars) ) - } - - - swcvars <- dplyr::select( vars(ddf), starts_with("SWC") ) %>% dplyr::select( vars(ddf), !ends_with("QC") ) %>% names() - swcqcvars <- dplyr::select( vars(ddf), starts_with("SWC") ) %>% dplyr::select( vars(ddf), ends_with("QC") ) %>% names() - - # map( as.list(seq(length(swcvars))), ~clean_fluxnet_swc( ddf[[ swcvars[.] ]], ddf[[ swcqcvars[.] ]], frac_data_thresh=0.5 ) ) - - if (length(swcvars)>0){ - for (ivar in 1:length(swcvars)){ - ddf[[ swcvars[ivar] ]] <- clean_fluxnet_swc( ddf[[ swcvars[ivar] ]], ddf[[ swcqcvars[ivar] ]], frac_data_thresh=0.5 ) - } - } - - return(ddf) - -} - - -get_obs_fluxnet2015_raw <- function( sitename, path, freq="d" ){ - ##-------------------------------------------------------------------- - ## Function returns a dataframe containing all the data of the FLUXNET - ## 2015 data file of respective temporal resolution. - ## Returns data in units given in the fluxnet 2015 dataset - ##-------------------------------------------------------------------- - ## get data - df <- readr::read_csv( path, na="-9999" ) #, col_types = cols() - - ## get dates, their format differs slightly between temporal resolution - if ( freq=="y" ){ - - df <- df %>% dplyr::mutate( year = TIMESTAMP ) %>% dplyr::mutate( date = lubridate::ymd( paste0( as.character(year), "-01-01" ) ) ) - - } else if ( freq=="w"){ - - df <- df %>% dplyr::mutate( date_start = lubridate::ymd(TIMESTAMP_START), date_end = lubridate::ymd(TIMESTAMP_END) ) %>% - dplyr::mutate( date = date_start ) - - } else if ( freq=="m" ){ - - df <- df %>% dplyr::mutate( year = substr( TIMESTAMP, start = 1, stop = 4 ), month = substr( TIMESTAMP, start = 5, stop = 6 ) ) %>% - dplyr::mutate( date = lubridate::ymd( paste0( as.character(year), "-", as.character(month), "-01" ) ) ) - - } else if ( freq=="d" ){ - - df <- df %>% dplyr::mutate( date = lubridate::ymd( TIMESTAMP ) ) - - } else if ( freq=="hh" ){ - - df <- df %>% dplyr::mutate( date_start = lubridate::ymd_hm( TIMESTAMP_START ), - date_end = lubridate::ymd_hm( TIMESTAMP_END ) ) %>% - dplyr::mutate( date = date_start ) - - } - - return( df ) - -} - -# ## Converts units of GPP variables from FLUXNET 2015 to SOFUN standard -# convert_gpp_fluxnet2015 <- function( gpp ){ -# # in FLUXNET 2015 given in umolCO2 m-2 s-1. converted to gC m-2 d-1 -# c_molmass <- 12.0107 # molar mass of C -# gpp_coverted <- gpp * 1e-6 * 60 * 60 * 24 * c_molmass -# return(gpp_coverted) - -# } - -## Converts units of latent energy (LE) variables from FLUXNET 2015 to SOFUN standard -convert_energy_fluxnet2015 <- function( le ){ - ## W m-2 -> J m-2 d-1 - le_converted <- as.numeric(le) * 60 * 60 * 24 - return(le_converted) -} - -clean_fluxnet_gpp <- function(df, nam_gpp_nt, nam_gpp_dt, nam_nt_qc, nam_dt_qc, threshold, remove_neg = FALSE){ - ##-------------------------------------------------------------------- - ## Cleans daily data using criteria 1-4 as documented in Tramontana et al., 2016 - ## gpp_nt: based on nighttime flux decomposition ("NT") - ## gpp_dt: based on daytime flux decomposition ("DT") - ##-------------------------------------------------------------------- - replace_with_na_qc <- function(gpp, qc, threshold){ - gpp[which(qc < threshold)] <- NA - return(gpp) - } - replace_with_na_neg <- function(gpp){ - gpp[which(gpp<0)] <- NA - return(gpp) - } - replace_with_na_res <- function(gpp, res, q025, q975){ - gpp[ res > q975 | res < q025 ] <- NA - return(gpp) - } - - df <- df %>% - mutate(GPP_NT_VUT_REF = replace_with_na_qc(GPP_NT_VUT_REF, NEE_VUT_REF_NIGHT_QC, threshold), - GPP_DT_VUT_REF = replace_with_na_qc(GPP_DT_VUT_REF, NEE_VUT_REF_DAY_QC, threshold)) - - # ## Remove data points that are based on too much gap-filled data in the underlying half-hourly data - # gpp_nt[ which(qflag_nt < threshold) ] <- NA ## based on fraction of data based on gap-filled half-hourly - # gpp_dt[ which(qflag_dt < threshold) ] <- NA ## based on fraction of data based on gap-filled half-hourly - - ## Remove data points where the two flux decompositions are inconsistent, - ## i.e. where the residual of their regression is above the 97.5% or below the 2.5% quantile. - df <- df %>% - mutate(res = GPP_NT_VUT_REF - GPP_DT_VUT_REF) - - q025 <- quantile( df$res, probs = 0.025, na.rm=TRUE ) - q975 <- quantile( df$res, probs = 0.975, na.rm=TRUE ) - - df <- df %>% - - ## remove data outside the quartiles of the residuals between the DT and NT estimates - mutate(GPP_NT_VUT_REF = replace_with_na_res(GPP_NT_VUT_REF, res, q025, q975), - GPP_DT_VUT_REF = replace_with_na_res(GPP_DT_VUT_REF, res, q025, q975) - ) %>% - - ## remove outliers - mutate(GPP_NT_VUT_REF = remove_outliers(GPP_NT_VUT_REF, coef = 1.5), - GPP_DT_VUT_REF = remove_outliers(GPP_DT_VUT_REF, coef = 1.5) - ) - - ## remove negative GPP - if (remove_neg){ - df <- df %>% - mutate(GPP_NT_VUT_REF = replace_with_na_neg(GPP_NT_VUT_REF), - GPP_DT_VUT_REF = replace_with_na_neg(GPP_DT_VUT_REF) - ) - } - - - return(df) -} - -clean_fluxnet_energy <- function( energyflux, qflag_energyflux, threshold ){ - ##-------------------------------------------------------------------- - ##-------------------------------------------------------------------- - ## Remove data points that are based on too much gap-filled data in the underlying half-hourly data - # frac_data_thresh <- 0.2 ## fraction of data based on gap-filled half-hourly - energyflux[ qflag_energyflux < threshold ] <- NA - - if ( any(!is.na(qflag_energyflux)) ){ energyflux[ is.na(qflag_energyflux) ] <- NA } - - energyflux <- identify_pattern( energyflux ) - - return( energyflux ) -} - - -clean_fluxnet_swc <- function( swc, qflag_swc, frac_data_thresh=1.0 ){ - ##-------------------------------------------------------------------- - ## frac_data_thresh: fraction of data based on gap-filled half-hourly - ##-------------------------------------------------------------------- - ## Remove data points that are based on too much gap-filled data in the underlying half-hourly data - swc[ which( qflag_swc < frac_data_thresh ) ] <- NA - swc <- as.numeric( swc ) - - return( swc ) -} - - -norm_to_max <- function( vec ){ - vec <- ( vec - min( vec, na.rm=TRUE ) ) / ( max( vec, na.rm=TRUE ) - min( vec, na.rm=TRUE ) ) - return( vec ) -} - - -identify_pattern <- function( vec ){ - - eps <- 1e-4 - - vec <- as.numeric(as.character(vec)) - - ## identify all numbers that appear more than once (already suspicious) - counts <- as.data.frame( table( vec ) ) - counts <- counts[ order(-counts$Freq), ] - counts <- counts[ which(counts$Freq>3), ] - - ## convert factors to numeric - counts$vec <- as.numeric(levels(counts$vec))[counts$vec] - - for (idx in 1:nrow(counts)){ - - ## find where this value appears - pos <- which( abs(vec-counts$vec[idx])0){ - - ## This returns a data frame with columns (date, temp, prec, nrad, ppfd, vpd, ccov) - df <- get_obs_gepisat_raw( - sitename = sitename, - path = paste0(path_gepisat, filn), - freq = timescale - ) %>% - - ## Convert units - ## given in molCO2 m-2 d-1, converted to gC m-2 d-1 - mutate_at( vars(starts_with("GPP_")), funs(convert_gpp_gepisat) ) %>% - - ## rename so that it is like in FLUXNET 2015 - rename( gpp_obs = GPP_mol.m2, gpp_err_obs = GPP_err_mol.m2 ) %>% - - ## some are NaN - mutate( gpp_obs = ifelse( is.nan(gpp_obs), NA, gpp_obs ) ) - - } else { - - df <- NULL - - } - - return(df) - -} - - -get_obs_gepisat_raw <- function( sitename, path, freq="d" ){ - ##-------------------------------------------------------------------- - ## Function returns a dataframe containing all the data of the GePiSaT - ## data file of respective temporal resolution. - ## Returns data in units given in the fluxnet 2015 dataset - ##-------------------------------------------------------------------- - ## get data - df <- readr::read_csv( path, na="-9999", col_types = cols() ) - - ## get dates, their format differs slightly between temporal resolution - if ( freq=="d" ){ - - df <- df %>% mutate( Timestamp = ymd( Timestamp ) ) %>% rename( date = Timestamp ) - - } else { - - abort("get_obs_gepisat_raw() implemented only for daily time step.") - - } - - return( df ) - -} - - -##-------------------------------------------------------------------- -## Converts units of GPP variables from GePiSaT to SOFUN standard -## in GePiSaT given in molCO2 m-2 d-1, converted to gC m-2 d-1 -##-------------------------------------------------------------------- -convert_gpp_gepisat <- function( gpp ){ - c_molmass <- 12.0107 # molar mass of C - gpp_coverted <- gpp * c_molmass - return(gpp_coverted) -} - - diff --git a/R/get_obs_eval.R b/R/get_obs_eval.R deleted file mode 100644 index 80a189d6..00000000 --- a/R/get_obs_eval.R +++ /dev/null @@ -1,335 +0,0 @@ -#' Get observational data for model evaluation -#' -#' Gets observational data for model evaluation, given the benchmarking variable, and data source used -#' for the evaluation. This information is specified in the evaluation settings (argument \code{settings_eval}). -#' -#' @param settings_eval A list specifying evaluation settings (see vignette eval_sofun.pdf for more information and examples) -#' @param settings_sims A named list containing the simulation settings (see vignette_rsofun.pdf for more information and examples) -#' @param overwrite (Optional) A logical specifying whether temporary data stored in \code{./tmpdir} should be overwritten. Defaults to \code{TRUE}. -#' @param light (Optional) A logical specifying whether reduced data should saved. Defaults to \code{FALSE}. -#' @param add_forcing (Optional) a logical specifying whether forcing data is to be added to data frames containing observational data. -#' -#' @return A list containing data frames of observed values aggregated to several temporal scales (ddf for daily, xdf for X-daily, mdf for monthly, adf for annual). -#' @export -#' -#' @examples obs <- get_obs_eval( settings_eval, settings_sims, overwrite = TRUE ) -#' -get_obs_eval <- function( settings_eval, settings_sims, overwrite = TRUE, light = FALSE, add_forcing = FALSE){ - - ## Interpret benchmarking data specification - datasource <- settings_eval$benchmark %>% - unlist() %>% - stringr::str_split( ., "_" ) %>% - unlist() - - ## determine variables (original names in files) to be read from benchmark data - evalvars <- names(settings_eval$benchmark) - getvars <- c() - if ("gpp" %in% names(settings_eval$benchmark)){ - ## Get observational GPP data - getvars <- c(getvars, "GPP_NT_VUT_REF", "GPP_DT_VUT_REF", "NEE_VUT_REF_NIGHT_QC", "NEE_VUT_REF_DAY_QC") - settings_eval$benchmarkvar$gpp <- ifelse(settings_eval$benchmark$gpp == "fluxnet2015_NT", - "GPP_NT_VUT_REF", - ifelse(settings_eval$benchmark$gpp == "fluxnet2015_DT", - "GPP_DT_VUT_REF", - NA)) - } - if ("netrad" %in% names(settings_eval$benchmark)){ - getvars <- c(getvars, "NETRAD") - settings_eval$benchmarkvar$netrad <- "NETRAD" - } - if ("aet" %in% names(settings_eval$benchmark) || "latenth" %in% names(settings_eval$benchmark)){ - getvars <- c(getvars, "LE_F_MDS", "LE_F_MDS_QC", "LE_RANDUNC") - evalvars[which(evalvars=="aet")] <- "latenth" - settings_eval$benchmarkvar$latenth <- "LE_F_MDS" - } - - if ("fluxnet2015" %in% datasource){ - ##------------------------------------------------------------ - ## Read annual observational data from FLUXNET 2015 files (from annual files!). - ##------------------------------------------------------------ - ## loop over sites to get data frame with all variables - if (settings_eval$path_fluxnet2015_y!="" && !("Ty" %in% datasource)){ - print("getting annual FLUXNET-2015_Tier1 data...") - print(settings_eval$path_fluxnet2015_y) - adf <- lapply( as.list(settings_eval$sitenames), - function(x) get_obs_bysite_fluxnet2015( - sitename = x, - path_fluxnet2015 = settings_eval$path_fluxnet2015_y, - path_fluxnet2015_hh = NULL, - timescale = "y", - getvars = getvars, - getswc=!light, - threshold_GPP=0, - remove_neg = TRUE, - verbose=FALSE - ) %>% - mutate( year =lubridate::year(date), - sitename = x )) %>% - bind_rows() - - ## change variable names - if ("gpp" %in% names(settings_eval$benchmark)){ - adf <- adf %>% - change_names("gpp", settings_eval$benchmarkvar) - } - - - } else { - - adf <- lapply( as.list(settings_eval$sitenames), - function(x) init_dates_dataframe( - year(settings_sims$date_start[[x]]), - year(settings_sims$date_end[[x]]), - noleap = TRUE, - freq = "years" - ) %>% - ## Remove outliers, i.e. when data is outside 1.5 times the inter-quartile range - mutate( year =lubridate::year(date), - sitename = x )) %>% - bind_rows() - - } - - ## remove pre-modis data - if (settings_eval$remove_premodis){ - adf <- adf %>% filter( lubridate::year(date) >= 2000 ) - } - - ##------------------------------------------------------------ - ## Read monthly observational data from FLUXNET 2015 files (from monthly files!). - ##------------------------------------------------------------ - ## loop over sites to get data frame with all variables - if (settings_eval$path_fluxnet2015_m!="" && !("Ty" %in% datasource)){ - print("getting monthly FLUXNET-2015_Tier1 data...") - mdf <- lapply( as.list(settings_eval$sitenames), - function(x) get_obs_bysite_fluxnet2015( - sitename = x, - path_fluxnet2015 = settings_eval$path_fluxnet2015_m, - path_fluxnet2015_hh = NULL, - timescale = "m", - getvars = getvars, - getswc=!light, - threshold_GPP=0.5, - remove_neg = TRUE, - verbose=FALSE - ) %>% - mutate( year =lubridate::year(date), - sitename = x )) %>% - bind_rows() - - ## change variable names - if ("gpp" %in% names(settings_eval$benchmark)){ - mdf <- mdf %>% - change_names("gpp", settings_eval$benchmarkvar) - } - - } else { - - mdf <- lapply( as.list(settings_eval$sitenames), - function(x) init_dates_dataframe( - year(settings_sims$date_start[[x]]), - year(settings_sims$date_end[[x]]), - noleap = TRUE, - freq = "months" - ) %>% - ## Remove outliers, i.e. when data is outside 1.5 times the inter-quartile range - mutate( year =lubridate::year(date), - sitename = x )) %>% - bind_rows() - } - - ## remove pre-modis data - if (settings_eval$remove_premodis){ - mdf <- mdf %>% filter(lubridate::year(date) >= 2000 ) - } - - ##------------------------------------------------------------ - ## Read daily observational data from FLUXNET 2015 files (from daily files!). - ##------------------------------------------------------------ - if (settings_eval$path_fluxnet2015_d!="" && !("Ty" %in% datasource)){ - print("getting daily FLUXNET-2015_Tier1 data...") - ddf <- lapply( as.list(settings_eval$sitenames), - function(x) get_obs_bysite_fluxnet2015( - sitename = x, - path_fluxnet2015 = settings_eval$path_fluxnet2015_d, - path_fluxnet2015_hh = NULL, - timescale = "d", - getvars = getvars, - getswc=!light, - threshold_GPP=0.5, - remove_neg = FALSE, - verbose=TRUE - ) %>% - mutate( year =lubridate::year(date), - sitename = x )) %>% - bind_rows() - - ## change variable names - if ("gpp" %in% names(settings_eval$benchmark)){ - ddf <- ddf %>% - change_names("gpp", settings_eval$benchmarkvar) - } - - } else { - - ddf <- lapply( as.list(settings_eval$sitenames), - function(x) init_dates_dataframe( - year(settings_sims$date_start[[x]]), - year(settings_sims$date_end[[x]]), - noleap = TRUE, - freq = "days" - ) %>% - ## Remove outliers, i.e. when data is outside 1.5 times the inter-quartile range - mutate( year =lubridate::year(date), - sitename = x )) %>% - bind_rows() - - } - - ## remove pre-modis data - if (settings_eval$remove_premodis){ - ddf <- ddf %>% filter(lubridate::year(date) >= 2000 ) - } - - ##------------------------------------------------------------ - ## Read daily observational data from GEPISAT files (only daily files!). - ##------------------------------------------------------------ - if ("Ty" %in% datasource){ - ddf_gepisat <- lapply( as.list(settings_eval$sitenames), - function(x) get_obs_bysite_gpp_gepisat( x, - settings_eval$path_gepisat_d, - timescale = "d" ) ) - names(ddf_gepisat) <- settings_eval$sitenames - - missing_gepisat <- purrr::map_lgl( ddf_gepisat, ~identical(., NULL ) ) %>% which() %>% names() - settings_eval$sitenames_gepisat <- settings_eval$sitenames[which(!(settings_eval$sitenames %in% missing_gepisat))] - - ## Convert to one long data frame and add sitename to data frames inside the list - ddf_gepisat <- ddf_gepisat %>% - bind_rows( .id = "sitename" ) %>% - mutate( gpp_obs = ifelse( date < "2000-02-18", NA, gpp_obs ) ) %>% # remove pre-modis data - dplyr::rename( gpp = gpp_obs ) %>% - - # remove weird data points where error is zero - mutate( gpp = ifelse( gpp_err_obs==0.0, NA, gpp ) ) - - - ## ake weighted average for updated 'obs', aggregate, and add to other data frames (ddf, adf, mdf) - if (!is.null(ddf_gepisat)){ - - ## daily - ddf <- ddf_gepisat %>% - right_join( ddf, by = c("sitename", "date") ) %>% - dplyr::select(-year, -year_dec) - - ## don't remember why I did this, commenting it out - # totlen <- length(datasource[ -which( datasource=="fluxnet2015" ) ]) - #ddf$gpp <- apply( dplyr::select( ddf, gpp, gpp_gepisat ), 1, stats::weighted.mean, c((totlen-1)/totlen, 1/totlen), na.rm=TRUE ) - - # ##------------------------------------------------------------ - # ## Add forcing data to daily data frame (for evaluation of functional relationships) - # ##------------------------------------------------------------ - # ddf <- lapply( as.list(settings_eval$sitenames), function(x) get_forcing_from_csv( x, settings_sims ) ) %>% - # bind_rows() %>% - # dplyr::select(-year_dec.x, -year_dec.y) %>% - # right_join( ddf, by = c("sitename", "date") ) - - ## monthly - mdf <- mdf %>% mutate( year=year(date), moy=month(date) ) - mdf <- ddf %>% - mutate( moy=month(date), year=year(date) ) %>% - group_by( sitename, year, moy ) %>% - summarise( gpp = sum(gpp) ) %>% - right_join( mdf, by = c("sitename", "year", "moy") ) - - ## annual - adf <- ddf %>% - mutate( year=year(date) ) %>% - group_by( sitename, year ) %>% - summarise( gpp = sum(gpp) ) %>% - right_join( adf, by = c("sitename", "year") ) - - } else { - ddf <- ddf %>% mutate( gpp_gepisat = NA ) - } - } - - ##------------------------------------------------------------ - ## Add forcing data to daily data frame (for evaluation of functional relationships) - ##------------------------------------------------------------ - if (add_forcing){ - print("adding forcing data...") - ddf <- lapply( as.list(settings_eval$sitenames), function(x) get_forcing_from_csv( x, settings_sims ) ) %>% - bind_rows() %>% - right_join( ddf, by = c("sitename", "date") ) - } - - ##------------------------------------------------------------ - ## Aggregate to multi-day periods - ## periods should start with the 1st of January each year, otherwise can't compute mean seasonal cycle - ##------------------------------------------------------------ - # ## 8-day periods corresponding to MODIS dates (problem: doesn't start with Jan 1 each year) - # breaks <- modisdates <- readr::read_csv( "modisdates.csv" )$date - - # ## aggregate to weeks - # xdf <- ddf %>% mutate( inbin = week(date) ) %>% - # group_by( sitename, year, inbin ) %>% - # summarise( gpp = mean( gpp, na.rm=TRUE) ) - - ## Generate vector of starting dates of X-day periods, making sure the 1st of Jan is always the start of a new period - listyears <- seq( ymd("1990-01-01"), ymd("2018-01-01"), by = "year" ) - breaks <- purrr::map( as.list(listyears), ~seq( from=., by=paste0( settings_eval$agg, " days"), length.out = ceiling(365 / settings_eval$agg)) ) %>% Reduce(c,.) - - ## take mean across periods - xdf <- ddf %>% mutate( inbin = cut( date, breaks = breaks, right = FALSE ) ) %>% - group_by( sitename, inbin ) %>% - summarise_at( vars(one_of(evalvars)), mean, na.rm=TRUE) - - } - - return( list( ddf = ddf, xdf = xdf, mdf = mdf, adf = adf, breaks_xdf = breaks ) ) -} - - -## Read forcing data from CSV file prepared for SOFUN input -get_forcing_from_csv <- function( sitename, settings_sims ){ - - ## get climate data - dir <- paste0( settings_sims$path_input, "/sitedata/climate/", sitename ) - csvfiln <- paste0( dir, "/clim_daily_lev2_", sitename, ".csv" ) - if (file.exists(csvfiln)){ - ddf <- readr::read_csv( csvfiln ) - } else { - ddf <- tibble(date=ymd("2001-01-01")) - } - - ## get fapar data - dir <- paste0( settings_sims$path_input, "/sitedata/fapar/", sitename ) - csvfiln <- paste0( dir, "/fapar_daily_", sitename, ".csv" ) - if (file.exists(csvfiln)){ - ddf <- readr::read_csv( csvfiln ) %>% - mutate( fapar = as.numeric(modisvar_interpol)) %>% - dplyr::select(date, fapar) %>% - right_join( ddf, by = "date" ) - } else { - ddf <- tibble(date=ymd("2001-01-01")) - } - - return(ddf) - -} - -change_names <- function(df, varnam, benchmarkvar){ - - df[[varnam]] <- df[[benchmarkvar[[varnam]]]] - df <- df %>% - dplyr::select(-matches(benchmarkvar[[varnam]])) - - if (varnam=="gpp"){ - df <- df %>% - dplyr::select(-starts_with("GPP_")) - } - - return(df) -} diff --git a/R/get_obs_eval2.R b/R/get_obs_eval2.R deleted file mode 100644 index 2a316dd0..00000000 --- a/R/get_obs_eval2.R +++ /dev/null @@ -1,111 +0,0 @@ -#' Get observational data for model evaluation -#' -#' Gets observational data for model evaluation, given the benchmarking variable, and data source used -#' for the evaluation. This information is specified in the evaluation settings (argument \code{settings_eval}). -#' -#' @param settings_eval A list specifying evaluation settings (see vignette eval_sofun.pdf for more information and examples) -#' @param settings_sims A named list containing the simulation settings (see vignette_rsofun.pdf for more information and examples) -#' @param overwrite (Optional) A logical specifying whether temporary data stored in \code{./tmpdir} should be overwritten. Defaults to \code{TRUE}. -#' @param light (Optional) A logical specifying whether reduced data should saved. Defaults to \code{FALSE}. -#' @param add_forcing (Optional) a logical specifying whether forcing data is to be added to data frames containing observational data. -#' -#' @return A list containing data frames of observed values aggregated to several temporal scales (ddf for daily, xdf for X-daily, mdf for monthly, adf for annual). -#' @export -#' -#' @examples obs <- get_obs_eval2( siteinfo = NULL, settings_eval = NULL, adf = NULL, mdf = NULL, ddf = NULL, hrdf = NULL, hhdf = NULL, agg = 8 ) -#' -get_obs_eval2 <- function( siteinfo = NULL, settings_eval = NULL, adf = NULL, mdf = NULL, ddf = NULL, hrdf = NULL, hhdf = NULL, agg = 8 ){ - - evalvars <- names(settings_eval$benchmark) - - if (is.null(adf) || is.null(mdf) || is.null(ddf)){ # nothing done with hourly or half-hourly data yet - ##------------------------------------------ - ## Interpret benchmarking data specification - ##------------------------------------------ - datasource <- settings_eval$benchmark %>% - unlist() %>% - stringr::str_split( ., "_" ) %>% - unlist() - - if ("fluxnet2015" %in% datasource){ - - if (is.null(adf) || is.null(mdf) || is.null(ddf)){ - ##------------------------------------------ - ## determine variables (original names in files) to be read from benchmark data - ##------------------------------------------ - getvars <- c() - if ("gpp" %in% names(settings_eval$benchmark)){ - ## Get observational GPP data - getvars <- c(getvars, "GPP_NT_VUT_REF", "GPP_DT_VUT_REF", "NEE_VUT_REF_NIGHT_QC", "NEE_VUT_REF_DAY_QC") - settings_eval$benchmarkvar$gpp <- ifelse(settings_eval$benchmark$gpp == "fluxnet2015_NT", - "GPP_NT_VUT_REF", - ifelse(settings_eval$benchmark$gpp == "fluxnet2015_DT", - "GPP_DT_VUT_REF", - NA)) - } - if ("netrad" %in% names(settings_eval$benchmark)){ - getvars <- c(getvars, "NETRAD") - settings_eval$benchmarkvar$netrad <- "NETRAD" - } - if ("aet" %in% names(settings_eval$benchmark) || "latenth" %in% names(settings_eval$benchmark)){ - getvars <- list(latenth = "LE_F_MDS", latenth_qc = "LE_F_MDS_QC", latenth_unc = "LE_RANDUNC") - getvars <- c(getvars, "LE_F_MDS", "LE_F_MDS_QC", "LE_RANDUNC") - settings_eval$benchmarkvar$latenth <- "LE_F_MDS" - } - } - - ##------------------------------------------ - ## Ingest data - ##------------------------------------------ - if (is.null(adf)){ - adf_eval <- ingest( - siteinfo = siteinfo, - source = "fluxnet2015", - getvars = getvars, - dir = settings_eval$path_fluxnet2015_y, - settings = list(threshold_LE = 0.8, getswc = TRUE), - timescale = "y" - ) - } - - if (is.null(ddf)){ - ddf_eval <- ingest( - siteinfo = siteinfo, - source = "fluxnet2015", - getvars = getvars, - dir = "~/data/FLUXNET-2015_Tier1/20191024/DD/", - settings = list(threshold_LE = 0.8, getswc = TRUE), - timescale = "d" - ) - } - - if (is.null(mdf)){ - mdf_eval <- ingest( - siteinfo = siteinfo, - source = "fluxnet2015", - getvars = getvars, - dir = settings_eval$path_fluxnet2015_m, - settings = list(threshold_LE = 0.8, getswc = TRUE), - timescale = "m" - ) - } - } - } - - - ##------------------------------------------------------------ - ## Aggregate daily data to multi-day periods - ## periods should start with the 1st of January each year, otherwise can't compute mean seasonal cycle - ##------------------------------------------------------------ - ## Generate vector of starting dates of X-day periods, making sure the 1st of Jan is always the start of a new period - listyears <- seq( ymd("1990-01-01"), ymd("2018-01-01"), by = "year" ) - breaks <- purrr::map( as.list(listyears), ~seq( from=., by=paste0( agg, " days"), length.out = ceiling(365 / agg)) ) %>% Reduce(c,.) - - ## take mean across periods - xdf <- ddf %>% mutate( inbin = cut( date, breaks = breaks, right = FALSE ) ) %>% - group_by( sitename, inbin ) %>% - summarise_at( vars(one_of(evalvars)), mean, na.rm=TRUE) - - return( list( ddf = ddf, xdf = xdf, mdf = mdf, adf = adf, breaks_xdf = breaks ) ) - -} diff --git a/R/run_sofun_f_bysite.R b/R/run_sofun_f_bysite.R index 87e5f16b..0202d0e0 100644 --- a/R/run_sofun_f_bysite.R +++ b/R/run_sofun_f_bysite.R @@ -76,6 +76,7 @@ run_sofun_f_bysite <- function( sitename, params_siml, siteinfo, forcing, df_soi rlang::warn(paste(" Number of years data: ", nrow(forcing)/365)) rlang::warn(paste(" Number of simulation years: ", params_siml$nyeartrend)) rlang::warn(" Returning a dummy data frame.") + do_continue <- FALSE } } diff --git a/README.md b/README.md index 93880faa..253165b6 100644 --- a/README.md +++ b/README.md @@ -5,14 +5,13 @@ Provides a wrapper for the SOFUN model implemented in Fortran. Shared memory (calling Fortran functions from within R) increases speed and facilitates use and installation. The package provides the following functionalities: -- Collecting input data for a large number of site-scale simulations (data ingest) - Calibrating model parameters - Running the model and getting outputs directly back into R ("tidy" data) - Evaluating outputs (benchmarking) -So far, rsofun implements the P-model ([Stocker et al., 2019 GMDD](https://www.geosci-model-dev-discuss.net/gmd-2019-200/)) and was used for simulations presented in [Stocker et al., 2019 GMDD](https://www.geosci-model-dev-discuss.net/gmd-2019-200/). Dataset-specific steps (data ingest, and benchmarking) are so far only implemented for using GPP or ET data from FLUXNET 2015. However, all steps are implemented in a modular and generic way in order to provide an extendible framework. +So far, rsofun implements the P-model ([Stocker et al., 2019 GMDD](https://www.geosci-model-dev-discuss.net/gmd-2019-200/)) and was used for simulations presented in [Stocker et al., 2019 GMDD](https://www.geosci-model-dev-discuss.net/gmd-2019-200/). Input data, used as model forcing, is collected using the [ingestr](https://stineb.github.io/ingestr/) package. -Parallelisation for a large number of site-level simulations is provided using the *multidplyr* R package. Calibration uses the *gensa* R package. +Parallelisation for a large number of site-level simulations is provided using the *multidplyr* R package. Calibration uses the *GenSA* R package. ## Installation @@ -46,313 +45,5 @@ load_dependencies_rsofun() ## Example run -```r -library(dplyr) -library(rsofun) -load_dependencies_rsofun() -``` - -### Simulation settings - -Make sure we have a site meta info file containing information about longitude, latitude, elevation, years of data availability, etc. Use the FLUXNET 2015 meta info data provided with rsofun. - -```r -path_siteinfo <- "~/.siteinfo_example_fortran.csv" -siteinfo <- rsofun::metainfo_Tier1_sites_kgclimate_fluxnet2015 %>% - dplyr::filter(sitename %in% c("FR-Pue", "FR-LBr", "IT-Noe")) %>% - write_csv(path = path_siteinfo) -``` - -Define the simulation parameters. -```r -params_siml <- list( - spinup = TRUE, - spinupyears = 10, - recycle = 1, - soilmstress = FALSE, - tempstress = FALSE, - calc_aet_fapar_vpd = FALSE, - in_ppfd = TRUE, - in_netrad = FALSE, - const_clim_year = -9999, - const_lu_year = -9999, - const_co2_year = -9999, - const_ndep_year = -9999, - const_nfert_year = -9999, - outdt = 1, - ltre = FALSE, - ltne = FALSE, - ltrd = FALSE, - ltnd = FALSE, - lgr3 = TRUE, - lgn3 = FALSE, - lgr4 = FALSE - ) -``` - -Run `prepare_setup_sofun()` to define the simulation settings that contain all the information specified by the two steps above (meta info, and simulation parameters). -```r -settings_sims <- prepare_setup_sofun(siteinfo = siteinfo, params_siml = params_siml) -``` - -### Define model parameters - -First, let's do it by hand (calibration of parameters is shown later). -```r -params_modl <- list( - kphio = 0.04997714009213085, - soilm_par_a = 1.0, - soilm_par_b = 0.0, - vpdstress_par_a = 0.2, - vpdstress_par_b = 0.2, - vpdstress_par_m = 5 - ) -``` - -### Define soil parameters - -For now, this is implemented as an illustration. Should be made site-specific. -```r -list_soiltexture <- list( - top = list(fsand = 0.4, fclay = 0.3, forg = 0.1, fgravel = 0.1), - bottom = list(fsand = 0.4, fclay = 0.3, forg = 0.1, fgravel = 0.1) -) -``` - -### Get input - -First, define input settings. -```r -settings_input <- list( - data = NA, - temperature = "fluxnet2015", - precipitation = "fluxnet2015", - vpd = "fluxnet2015", - ppfd = "fluxnet2015", - netrad = "fluxnet2015", # c("fluxnet2015", "watch_wfdei"), - patm = "fluxnet2015", - netrad = NA, - cloudcover = "cru", - path_input = "~/sofun_inputs/example/", - path_watch_wfdei = "~/data/watch_wfdei/", - path_cru = "~/data/cru/ts_4.01/", - path_MODIS_FPAR_MCD15A3H = "~/data/fluxnet_subsets/fapar_MODIS_FPAR_MCD15A3H_gee_MCD15A3H_fluxnet2015_gee_subset/", - path_MODIS_EVI_MOD13Q1 = "~/data/fluxnet_subsets/fapar_MODIS_EVI_MOD13Q1_gee_MOD13Q1_fluxnet2015_gee_subset/", - path_co2 = "~/data/co2/cCO2_rcp85_const850-1765.csv", - path_fluxnet2015 = "~/data/FLUXNET-2015_Tier1/20191024/DD/", - path_fluxnet2015_hh = "~/data/FLUXNET-2015_Tier1/20191024/HH/", - get_from_remote = FALSE, - settings_gee = get_settings_gee( - bundle = "fpar", - python_path = "/Users/benjaminstocker/Library/Enthought/Canopy_64bit/User/bin/python", - gee_path = "~/gee_subset/gee_subset/" - ), - fapar = "MODIS_FPAR_MCD15A3H", - splined_fapar = TRUE - ) -``` - -Then, get the input data. -```r -ddf_input <- prepare_input_sofun( - settings_input = settings_input, - settings_sims = settings_sims, - overwrite_csv_climate_lev1 = FALSE, - overwrite_csv_climate_lev2 = TRUE, - overwrite_csv_climate_lev3 = TRUE, - overwrite_rdata_climate = TRUE, - overwrite_csv_fapar = FALSE, - verbose = FALSE - ) -``` - -### Run the model - -Run the model for all the sites specified in the first step. -```r -df_drivers <- collect_drivers_sofun( - settings = settings_sims, - forcing = ddf_input, - df_soiltexture = df_soiltexture - ) - -## by spelling out arguments -mod <- run_sofun_f_bysite( - df_drivers$sitename[1], - df_drivers$params_siml[[1]], - df_drivers$siteinfo[[1]], - df_drivers$forcing[[1]], - df_drivers$df_soiltexture[[1]], - params_modl = params_modl, - makecheck = TRUE - ) - -## The advantage of using the nested data frame 'df_drivers' is that the -## function 'run_sofun_f_bysite' can be applied to each row using 'pmap' -## Doing it like this, the outputs are stored in a new column 'out_sofun'. -## This same code is implemented in 'run_sofun_f' -df_output <- runread_sofun_f( - df_drivers, - params_modl = params_modl, - makecheck = TRUE, - parallel = FALSE - ) - -df_output$out_sofun[[1]] %>% - ggplot(aes(x=date, y=gpp)) + - geom_line() + - labs(title = df_output$sitename[[1]], subtitle = "SOFUN output") -``` - -### Calibrate - -Define calibration settings. -```r -## Define calibration settings common for all setups -settings_calib <- list( - method = "gensa", - targetvars = c("gpp"), - timescale = list( gpp = "d" ), - path_fluxnet2015 = "~/data/FLUXNET-2015_Tier1/20191024/DD/", - path_fluxnet2015_hh = "~/data/FLUXNET-2015_Tier1/20191024/HH/", - threshold_GPP = 0.5, - path_gepisat = "~/data/gepisat/v3_fluxnet2015/daily_gpp/", - maxit = 5, # (5 for gensa) (30 for optimr) # - sitenames = mysites, - filter_temp_max = 35.0, - filter_drought = FALSE, - metric = "rmse", - dir_results = "./", - name = "ORG", - par = list( kphio = list( lower=0.03, upper=0.07, init=0.0496 ) ), - datasource = list( gpp = "fluxnet2015_NT" ), - filter_temp_min = NA, - filter_soilm_min = NA - ) -``` - -Get calibration target data. -```r -ddf_obs_calib <- get_obs_calib( - settings_calib = settings_calib, - dplyr::select(df_drivers, sitename, siteinfo) %>% tidyr::unnest(siteinfo), - settings_input - ) -``` - -Calibrate the model. - -```r -set.seed(1982) -settings_calib <- calib_sofun( - settings_calib, - df_drivers, - ddf_obs = ddf_obs_calib - ) -``` - -The calibrated parameters are returned by `calib_sofun()` as part of the list: -```r -print(settings_calib$par_opt) -``` - - -### Evaluate - -Run the model once again with these parameters and evaluate results. -```r -mylist <- readr::read_csv("~/eval_pmodel/myselect_fluxnet2015.csv") %>% - dplyr::filter( use==1 ) %>% - dplyr::pull( Site ) - -settings_eval <- list( - sitenames = settings_sims$sitename, - sitenames_siteplots = mylist, - agg = 8, - path_fluxnet2015_d = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1d/original/unpacked/", - path_fluxnet2015_w = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_7d/original/unpacked/", - path_fluxnet2015_m = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1m/original/unpacked/", - path_fluxnet2015_y = "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1y/original/unpacked/", - path_gepisat_d = "~/data/gepisat/v3_fluxnet2015/daily_gpp/", - benchmark = list( gpp = c("fluxnet2015_NT") ), - remove_premodis = TRUE - ) -``` - -Get evaluation data (benchmarking data). -```r -filn <- "./obs_eval.Rdata" -if (file.exists(filn)){ - load(filn) -} else { - obs_eval <- get_obs_eval( - settings_eval = settings_eval, - settings_sims = settings_sims, - overwrite = TRUE, - light = TRUE, - add_forcing = FALSE - ) - save(obs_eval, file = filn) -} -``` - -Now run the model with calibrated parameters. -```r -params_modl <- list( - kphio = settings_calib$par_opt[["kphio"]], - soilm_par_a = 1.0, - soilm_par_b = 0.0, - vpdstress_par_a = 0.2, - vpdstress_par_b = 0.2, - vpdstress_par_m = 5 - ) - -mod <- runread_sofun_f( - df_drivers, - params_modl = params_modl, - makecheck = TRUE - ) %>% - rename(id = sitename) %>% - unnest(out_sofun) -``` - -And finally do the evaluation. -```r -out_eval <- eval_sofun( - mod, - settings_eval, - settings_sims, - obs_eval = obs_eval, - overwrite = TRUE, - light = FALSE - ) -``` - -Print some results. -```r -out_eval$gpp$fluxnet2015$metrics$xdaily_pooled -``` - -```r -out_eval$gpp$fluxnet2015$data$xdf %>% rbeni::analyse_modobs2("mod", "obs", type = "heat") -``` - -```r -out_eval$gpp$fluxnet2015$data$ddf %>% - dplyr::filter(sitename=="BE-Vie" & year(date) < 2005) %>% - ggplot(aes(x = date)) + - geom_line(aes(y = obs), col="black") + - geom_line(aes(y = mod), col="red") + - labs(title = "BE-Vie") - -out_eval$gpp$fluxnet2015$data$ddf %>% - dplyr::filter(sitename=="AU-Dry" & year(date) > 2010) %>% - ggplot(aes(x = date)) + - geom_line(aes(y = obs), col="black") + - geom_line(aes(y = mod), col="red") + - labs(title = "AU-Dry") -``` - -## Acknowledgements +See [here](https://rpubs.com/stineb/rsofun) for an example. -The main author (B. Stocker) was funded by Marie Sklodowska-Curie fellowship H2020-MSCA-IF-2015, project FIBER, grant number 701329. diff --git a/_config.yml b/_config.yml deleted file mode 100644 index 2f7efbea..00000000 --- a/_config.yml +++ /dev/null @@ -1 +0,0 @@ -theme: jekyll-theme-minimal \ No newline at end of file diff --git a/docs/site_libs/navigation-1.1/codefolding.js b/docs/site_libs/navigation-1.1/codefolding.js index 305b508d..8cb6311e 100644 --- a/docs/site_libs/navigation-1.1/codefolding.js +++ b/docs/site_libs/navigation-1.1/codefolding.js @@ -17,12 +17,12 @@ window.initializeCodeFolding = function(show) { var currentIndex = 1; // select all R code blocks - var rCodeBlocks = $('pre.r, pre.python, pre.bash, pre.sql, pre.cpp, pre.stan'); + var rCodeBlocks = $('pre.r, pre.python, pre.bash, pre.sql, pre.cpp, pre.stan, pre.julia'); rCodeBlocks.each(function() { // create a collapsable div to wrap the code in var div = $('
'); - if (show) + if (show || $(this)[0].classList.contains('fold-show')) div.addClass('in'); var id = 'rcode-643E0F36' + currentIndex++; div.attr('id', id); diff --git a/docs/site_libs/tocify-1.9.1/jquery.tocify.css b/docs/site_libs/tocify-1.9.1/jquery.tocify.css index bee8f33a..21021388 100644 --- a/docs/site_libs/tocify-1.9.1/jquery.tocify.css +++ b/docs/site_libs/tocify-1.9.1/jquery.tocify.css @@ -11,8 +11,6 @@ margin-left: 2%; position: fixed; border: 1px solid #ccc; - webkit-border-radius: 6px; - moz-border-radius: 6px; border-radius: 6px; } @@ -45,11 +43,15 @@ .tocify-subheader .tocify-subheader { text-indent: 30px; } - -/* Further indents third level subheader elements. You can continue this pattern if you have more nested elements. */ .tocify-subheader .tocify-subheader .tocify-subheader { text-indent: 40px; } +.tocify-subheader .tocify-subheader .tocify-subheader .tocify-subheader { + text-indent: 50px; +} +.tocify-subheader .tocify-subheader .tocify-subheader .tocify-subheader .tocify-subheader { + text-indent: 60px; +} /* Twitter Bootstrap Override Style */ .tocify .tocify-item > a, .tocify .nav-list .nav-header { diff --git a/docs/site_libs/tocify-1.9.1/jquery.tocify.js b/docs/site_libs/tocify-1.9.1/jquery.tocify.js index 259d1935..ba24a7dd 100644 --- a/docs/site_libs/tocify-1.9.1/jquery.tocify.js +++ b/docs/site_libs/tocify-1.9.1/jquery.tocify.js @@ -386,13 +386,13 @@ item.append($("", { - "text": self.text() + "html": self.html() })); } else { - item.text(self.text()); + item.html(self.html()); } diff --git a/docs/usage.html b/docs/usage.html index 2c8749df..95497a77 100644 --- a/docs/usage.html +++ b/docs/usage.html @@ -7,6 +7,7 @@ + @@ -73,9 +74,7 @@ - - - - - - + + + + + + + +
+ +
@@ -542,6 +507,49 @@

Examples

+ + + + + + + +