Skip to content


feat: test this app.R file
Browse files Browse the repository at this point in the history
  • Loading branch information
gregfrasco committed Nov 12, 2024
1 parent 4f81ae6 commit 200b943
Showing 1 changed file with 37 additions and 100 deletions.
137 changes: 37 additions & 100 deletions comets_shinyapp_example/app.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,74 +5,45 @@ library(data.table)
library(shinyhelper) # For adding help tooltips

# Use conflicted to set preferences for conflicting functions
conflict_prefer("between", "data.table")
conflict_prefer("filter", "dplyr")
conflict_prefer("first", "dplyr")
conflict_prefer("hour", "lubridate")
conflict_prefer("isoweek", "lubridate")
conflict_prefer("lag", "dplyr")
conflict_prefer("last", "dplyr")
conflict_prefer("mday", "lubridate")
conflict_prefer("minute", "lubridate")
conflict_prefer("month", "lubridate")
conflict_prefer("quarter", "lubridate")
conflict_prefer("second", "lubridate")
conflict_prefer("transpose", "purrr")
conflict_prefer("wday", "lubridate")
conflict_prefer("week", "lubridate")
conflict_prefer("yday", "lubridate")
conflict_prefer("year", "lubridate")

abundance_df = fread("./species_abundance_filt.csv",
nThread = 8, drop = 1, header = T) %>%
rename("temperature"="soilTemp","pH" = "soilInCaClpH", "abundance"="percentage")

# Read in data file for species selections - info on environmental preferences
organism_df_to_subset = fread("./organism_data_to_subset.csv", drop = 1, header = T) %>%
mutate(taxon=`Species of interest`) %>%
rename("Genome source" = source)

# Read in data file to just show species names/links
organism_df_to_print = fread("./organism_data_to_print.csv", drop = 1, header = T)

# Read in biome data file
biome_info = fread("./nlcd_key.csv", drop = 1, header = T)
biome_choices = biome_info$nlcdClass
names(biome_choices) = biome_info$prettyNlcd

abundance_df$biome = biome_info[match(abundance_df$nlcdClass, biome_info$nlcdClass),]$prettyNlcd

# Load Data
abundance_data_filt <- fread("./species_abundance_filt.csv", nThread = 8, drop = 1, header = TRUE)
df_to_subset <- fread("./organism_data_to_subset.csv", drop = 1, header = TRUE)
df_to_print <- fread("./organism_data_to_print.csv", drop = 1, header = TRUE)
biome_info <- fread("./nlcd_key.csv", drop = 1, header = TRUE)
taxonomy <- fread("./organism_taxonomy.csv", drop = 1, header = TRUE)

library(shinyhelper) # For adding help tooltips
# Prepare Biome Choices
biome_choices <- biome_info$nlcdClass
names(biome_choices) <- biome_info$prettyNlcd

# UI
# Define UI
ui <- fluidPage(
titlePanel("SoilMicrobeDB: An Interactive Database of Soil Microbial Genomes"),

tags$p("The SoilMicrobeDB is a collection of over 30,000 soil microbial genomes, each annotated with ecological preferences for environmental conditions such as pH, temperature, and biome type. This tool allows you to filter, analyze, and visualize data on microbial species across different soil environments, using the sample collection from the National Ecological Observatory Network (NEON)."),
tags$p("The SoilMicrobeDB is a collection of over 30,000 soil microbial genomes..."),

selectInput("biome", "Select Biome", choices = unique(organism_df_to_subset$biome), multiple = TRUE) %>%
selectInput("biome", "Select Biome", choices = unique(df_to_subset$biome), multiple = TRUE) %>%
type = "inline",
title = "Biome Preference",
content = "Biome preference is assigned if a taxon is present in at least 2% of samples within the biome. Biomes are assigned to each NEON sample using the National Land Cover Database.",
content = "Biome preference is assigned if a taxon is present in at least 2%...",
icon = "question-circle"

sliderInput("pH_range", "pH Preference Range", min = 3, max = 9, value = c(3, 9)) %>%
type = "inline",
title = "pH Preference",
content = "pH preference of each taxon is assigned as the peak of a LOESS curve fit to abundance data across the range of pH values. Click on a taxon to visualize or download this data.",
content = "pH preference of each taxon is assigned as the peak...",
icon = "question-circle"
Expand All @@ -81,7 +52,7 @@ ui <- fluidPage(
type = "inline",
title = "Temperature Preference",
content = "Temperature preference of each tacon is assigned as the peak of a LOESS curve fit to abundance data across the range of temperature values. Click on a taxon to visualize or download this data.",
content = "Temperature preference of each taxon...",
icon = "question-circle"
Expand All @@ -94,15 +65,15 @@ ui <- fluidPage(

# Server
# Define Server
server <- function(input, output, session) {

# Initialize shinyhelper
shinyhelper::observe_helpers(withMathJax = TRUE)

# Reactive Filtered Organism DataFrame
filtered_organism_df <- reactive({
organism_df_to_subset %>%
df_to_subset %>%
(is.null(input$biome) || biome %in% input$biome),
between(pH_preference, input$pH_range[1], input$pH_range[2]),
Expand All @@ -112,12 +83,12 @@ server <- function(input, output, session) {

# Display Organism Data Table
output$organism_table <- DT::renderDataTable({
filtered_organism_df() %>% select(Kingdom, Genus, "Species of interest", "Genome source", "Functional in COMETS?")
filtered_organism_df() %>% select(Kingdom, Genus, `Species of interest`, `Genome source`, `Functional in COMETS?`)
}, selection = 'single')

# Download Filtered Organism Data
output$download_organism <- downloadHandler(
filename = function() { paste("filtered_organism_data.csv") },
filename = function() { "filtered_organism_data.csv" },
content = function(file) { write.csv(filtered_organism_df(), file, row.names = FALSE) }

Expand All @@ -127,26 +98,21 @@ server <- function(input, output, session) {
selected_row <- filtered_organism_df()[input$organism_table_rows_selected, ]
selected_taxon <- selected_row$taxon

# Filter abundance_df for selected taxon and check for data
abundance_filtered <- abundance_df %>%
filter(taxon == selected_taxon)
# Filter abundance data for selected taxon
abundance_filtered <- abundance_data_filt %>% filter(taxon == selected_taxon)

if (nrow(abundance_filtered) == 0) {
# Show error message if no data is available for the selected taxon
title = paste("Abundance Analysis for", selected_taxon),
tags$p("Error: No abundance data available for this taxon."),
footer = modalButton("Close")
} else {
# Render plots if data is available
size = "l",
title = paste("Abundance of", selected_taxon, "in NEON soil samples"),
tags$p(paste("Match Criteria:", selected_row$`Match criteria`)),
tags$p("Genome Link:", ifelse(!$genome_link), selected_row$genome_link, "N/A")),
footer = tagList(
downloadButton("download_filtered_abundance", "Download Taxon Abundance Data"),
Expand All @@ -159,69 +125,40 @@ server <- function(input, output, session) {
output$pH_plot <- renderPlot({
selected_taxon <- filtered_organism_df()[input$organism_table_rows_selected, "taxon"]
abundance_data <- abundance_df %>% filter(taxon == selected_taxon)

if (nrow(abundance_data) == 0) {
return(NULL) # Avoid rendering if no data
abundance_data <- abundance_data_filt %>% filter(taxon == selected_taxon)

aes(x = pH, y = abundance)) +#, color=nlcdClass)) +
position=position_jitter(width = .01, height=0), size=2) +
#geom_smooth(method = "loess",show.legend = F, span=.7) +
#method = "loess",
show.legend = F, se=F) +
ggplot(abundance_data, aes(x = pH, y = abundance, color = biome)) +
geom_point(alpha = .5, position = position_jitter(width = .01, height = 0), size = 2) +
geom_smooth(method = "gam", show.legend = FALSE, se = FALSE) +
theme_bw(base_size = 18) +
#scale_y_sqrt() +
xlab("Soil pH") +
labs(title = paste("Abundance vs. pH for", selected_taxon)) +
ylab("Microbial abundance")
xlab("Soil pH") + ylab("Microbial abundance") +
labs(title = paste("Abundance vs. pH for", selected_taxon))

# Temperature Plot
output$temperature_plot <- renderPlot({
selected_taxon <- filtered_organism_df()[input$organism_table_rows_selected, "taxon"]
abundance_data <- abundance_df %>% filter(taxon == selected_taxon)

if (nrow(abundance_data) == 0) {
return(NULL) # Avoid rendering if no data
abundance_data <- abundance_data_filt %>% filter(taxon == selected_taxon)

aes(x = temperature, y = abundance#, color=nlcdClass
)) +
position=position_jitter(width = .01, height=0), size=2) +
# geom_smooth(method = "loess", show.legend = F, span=.7) +
#method = "loess",
show.legend = F, se=F) +
ggplot(abundance_data, aes(x = temperature, y = abundance, color = biome)) +
geom_point(alpha = .5, position = position_jitter(width = .01, height = 0), size = 2) +
geom_smooth(method = "gam", show.legend = FALSE, se = FALSE) +
theme_bw(base_size = 18) +
#scale_y_sqrt() +
xlab("Soil temperature") +
ylab("Microbial abundance") +
xlab("Soil temperature") + ylab("Microbial abundance") +
labs(title = paste("Abundance vs. temperature for", selected_taxon))


# Download Filtered Abundance Data
output$download_filtered_abundance <- downloadHandler(
filename = function() { paste("taxon_abundance_data.csv") },
filename = function() { "taxon_abundance_data.csv" },
content = function(file) {
selected_taxon <- filtered_organism_df()[input$organism_table_rows_selected, "taxon"]
abundance_data <- abundance_df %>% filter(taxon == selected_taxon)

if (nrow(abundance_data) > 0) {
write.csv(abundance_data, file, row.names = FALSE)
abundance_data <- abundance_data_filt %>% filter(taxon == selected_taxon)
write.csv(abundance_data, file, row.names = FALSE)

# Run the Application
shinyApp(ui, server)

0 comments on commit 200b943

Please sign in to comment.