dh.variables <- read.csv("../1_Input/01_datahub_parameters.csv")
source("C:/Users/x63d979/Desktop/DataHub_Workflow_2024-04-30_mr/2_Code/01_CheckSetup_2024-04-01.R")
ref <- read.csv("../1_Input/05_Wqx_ResultsForReference.csv")
dh.sites <- read.csv("../1_Input/06_WqxDatahub_SiteCompare.csv")
library(stringr)
library(tidyverse)
install.packages(c('tidyverse', 'sf', 'mapview'))
install.packages(c("tidyverse", "sf", "mapview"))
library(tidyverse)
library(sf)
library(mapview)
fishdata =  read_csv("C:\Users\x63d979\Desktop\Rpractice\fishdat.csv")
fishdata <- read_csv("C:\Users\x63d979\Desktop\Rpractice\fishdat.csv")
fishdata <- read_csv("fishdat.csv")
fishdata <- read_csv("C:\Users\x63d979\Desktop\Rpractice\fishdat.csv")
fishdata <- read_csv('C:\Users\x63d979\Desktop\Rpractice\fishdat.csv')
wkdir()
fishdata <- read_csv('fishdat.csv')
cd..
# Setup -------------------------------------------------------------------
cat("\014") # clear console
rm(list=ls()); # remove all objects from workspace
require(knitr)
library(dplyr)
library(readxl)
library(kableExtra)
library(tidyverse)
library(janitor)
library(reshape2)
# Load results data -------------------------------------------------------
results_long <- read.csv('../3_IntermediateOutput/results_long.csv')
getwd()
library(dataRetrieval)
library(dplyr)
# Data profiles: "Project Data"
project_data <- readWQPdata(statecode = "MT",
service = "Project")
# Data profiles: "Organization Data"
org_data <- readWQPdata(statecode = "MT",
service = "Organization")
# Data profiles: "Organization Data"
org_data <- readWQPdata(statecode = "MT",
service = "Organization")
# Data profiles: "Sample Results (physical/chemical metadata)":
samp_data <- readWQPdata(ProjectIdentifier = "MST",
dataProfile = "resultPhysChem")
getwd()
setwd("C:/Users/x63d979/Desktop/WQP dataRetrieval")
# Setup -------------------------------------------------------------------
cat("\014") # clear console
rm(list=ls()); # remove all objects from workspace
library(dataRetrieval)
library(dplyr)
# Documentation:
# https://www.rdocumentation.org/packages/dataRetrieval/versions/2.7.11/topics/readWQPdata
# https://cran.r-project.org/web/packages/dataRetrieval/refman/dataRetrieval.html#readWQPdata
# Data profiles: "Project Data"
project_data <- readWQPdata(statecode = "MT",
countycode = "Gallatin",
service = "Project")
# Data profiles: "Organization Data"
org_data <- readWQPdata(statecode = "MT",
countycode = "Gallatin",
service = "Organization")
# Data profiles: "Sample Results (physical/chemical metadata)":
samp_data <- readWQPdata(siteid = "USGS-04024315",
dataProfile = "resultPhysChem")
View(samp_data)
samp_data <- readWQPdata(project = "GLWQD-SWMN",
dataProfile = "resultPhysChem",
service = "Result")
# Data profiles: "Site Data Only"
site_data <- readWQPdata(project = "GLWQD-SWMN",
service = "Station")
View(samp_data)
View(site_data)
View(site_data)
# Restructure physchem data from WQX into wide format and export as .csv
# setup ####
rm(list=ls()); # remove all objects from workspace
cat("\014") # clear console
Sys.setenv(TZ='Utc') ## R don't change Date/Times; Utc= don't switch data timezone you are in
install.packages("dplyr", "tidyverse")
library(dplyr)
library(tidyverse)
# Import Data ####
data <- read.csv("../1_Input/resultphyschem_Yaak.csv")
install.packages("dplyr", "tidyverse")
library(dplyr)
library(tidyverse)
# Import Data ####
data <- read.csv("../1_Input/resultphyschem_Yaak.csv")
setwd("C:/Users/x63d979/Desktop/WXQ_to_Excel_2025-05-05_gj/2_Code")
library(dplyr)
library(tidyverse)
# Import Data ####
data <- read.csv("../1_Input/resultphyschem_Yaak.csv")
# Clean data ---------------------------------------------------------
# Replace retired names
data$CharacteristicName[data$CharacteristicName ==
"Nutrient-nitrogen***retired***use TOTAL NITROGEN, MIXED FORMS with speciation AS N"] <-
"Total Nitrogen, mixed forms"
data$CharacteristicName[data$CharacteristicName ==
"Inorganic nitrogen (nitrate and nitrite) ***retired***use Nitrate + Nitrite"] <-
"Nitrate + Nitrite as N"
# Drop results that are "Not Reported"
data <- data %>%
filter(is.na(ResultDetectionConditionText) | ResultDetectionConditionText != "Not Reported")
# Drop results that are Quality Control Sample-Field Blank or Quality Control Sample-Field Replicate
data <- data[!(data$ActivityTypeCode == "Quality Control Sample-Field Blank"),]
data <- data[!(data$ActivityTypeCode == "Quality Control Sample-Field Replicate"),]
# change non-detect numeric values to zeros and set units to be same as detection limit units
for (i in 1:nrow(data)){
if (isTRUE(data[i, "ResultDetectionConditionText"] == "Not Detected") == TRUE){
data[i, "ResultMeasureValue"] <- 0
data[i, "ResultMeasure.MeasureUnitCode"] <- data[i, "DetectionQuantitationLimitMeasure.MeasureUnitCode"]
}
}
# Relevant columns only
data1 <- data[, c('MonitoringLocationName', 'ActivityStartDate',
'CharacteristicName', 'ResultMeasureValue',
'ResultMeasure.MeasureUnitCode', 'ActivityTypeCode')]
# Rename columns
colnames(data1) <- c('site', 'date', 'param', 'result', 'unit', 'type')
# Ensure date column is in datetime format
data1$date <- as.Date(data1$date)
# Ensure result column is in numeric format
data1$result <- as.numeric(data1$result)
# Convert all ug/L into mg/L before reshaping-----------------------------------------------------
for (i in 1:nrow(data1)){
# If unit is ug/L
if (isTRUE(data1[i, "unit"] == "ug/L")){
# Convert result from ug/L to mg/L
data1[i, "result"] <- data1[i, "result"] * 0.001
# Update row's unit to mg/L
data1[i, "unit"]  <- "mg/L"
}
}
# Reshape from long to wide format ----------------------------------------
# Separate numeric and non-numeric values
data_long <- data1 %>%
mutate(
numeric_value = suppressWarnings(as.numeric(result)),
is_numeric = !is.na(numeric_value)
)
# Reshape numeric values with median
numeric_wide <- data_long %>%
filter(is_numeric) %>%
group_by(site, date, param) %>%
summarise(result = median(numeric_value, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = param, values_from = result, names_glue = "{param}_result")
# Reshape unit values (keeping as character)
unit_wide <- data_long %>%
group_by(site, date, param) %>%
summarise(unit = ifelse(length(unique(unit)) == 1,
unique(unit),
paste(unique(unit), collapse = "; ")), .groups = "drop") %>%
pivot_wider(names_from = param, values_from = unit, names_glue = "{param}_unit")
# Combine the two wide formats
data_wide <- left_join(numeric_wide, unit_wide, by = c("site", "date"))
# Reorder columns
data_wide <- data_wide %>%
select(order(colnames(.))) %>%
relocate(site, date)
# Save as csv in output folder --------------------------------------------
# Save wide data in 3_Output folder
write.csv(data_wide, "../3_Output/ExcelReadyData.csv")
# Import Data ####
data <- read.csv("../1_Input/resultphyschem_Carbon.csv")
# Clean data ---------------------------------------------------------
# Replace retired names
data$CharacteristicName[data$CharacteristicName ==
"Nutrient-nitrogen***retired***use TOTAL NITROGEN, MIXED FORMS with speciation AS N"] <-
"Total Nitrogen, mixed forms"
data$CharacteristicName[data$CharacteristicName ==
"Inorganic nitrogen (nitrate and nitrite) ***retired***use Nitrate + Nitrite"] <-
"Nitrate + Nitrite as N"
# Drop results that are "Not Reported"
data <- data %>%
filter(is.na(ResultDetectionConditionText) | ResultDetectionConditionText != "Not Reported")
# Drop results that are Quality Control Sample-Field Blank or Quality Control Sample-Field Replicate
data <- data[!(data$ActivityTypeCode == "Quality Control Sample-Field Blank"),]
data <- data[!(data$ActivityTypeCode == "Quality Control Sample-Field Replicate"),]
# change non-detect numeric values to zeros and set units to be same as detection limit units
for (i in 1:nrow(data)){
if (isTRUE(data[i, "ResultDetectionConditionText"] == "Not Detected") == TRUE){
data[i, "ResultMeasureValue"] <- 0
data[i, "ResultMeasure.MeasureUnitCode"] <- data[i, "DetectionQuantitationLimitMeasure.MeasureUnitCode"]
}
}
# Relevant columns only
data1 <- data[, c('MonitoringLocationName', 'ActivityStartDate',
'CharacteristicName', 'ResultMeasureValue',
'ResultMeasure.MeasureUnitCode', 'ActivityTypeCode')]
# Rename columns
colnames(data1) <- c('site', 'date', 'param', 'result', 'unit', 'type')
# Ensure date column is in datetime format
data1$date <- as.Date(data1$date)
# Ensure result column is in numeric format
data1$result <- as.numeric(data1$result)
# Convert all ug/L into mg/L before reshaping-----------------------------------------------------
for (i in 1:nrow(data1)){
# If unit is ug/L
if (isTRUE(data1[i, "unit"] == "ug/L")){
# Convert result from ug/L to mg/L
data1[i, "result"] <- data1[i, "result"] * 0.001
# Update row's unit to mg/L
data1[i, "unit"]  <- "mg/L"
}
}
# Reshape from long to wide format ----------------------------------------
# Separate numeric and non-numeric values
data_long <- data1 %>%
mutate(
numeric_value = suppressWarnings(as.numeric(result)),
is_numeric = !is.na(numeric_value)
)
# Reshape numeric values with median
numeric_wide <- data_long %>%
filter(is_numeric) %>%
group_by(site, date, param) %>%
summarise(result = median(numeric_value, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = param, values_from = result, names_glue = "{param}_result")
# Reshape unit values (keeping as character)
unit_wide <- data_long %>%
group_by(site, date, param) %>%
summarise(unit = ifelse(length(unique(unit)) == 1,
unique(unit),
paste(unique(unit), collapse = "; ")), .groups = "drop") %>%
pivot_wider(names_from = param, values_from = unit, names_glue = "{param}_unit")
# Combine the two wide formats
data_wide <- left_join(numeric_wide, unit_wide, by = c("site", "date"))
# Reorder columns
data_wide <- data_wide %>%
select(order(colnames(.))) %>%
relocate(site, date)
# Save as csv in output folder --------------------------------------------
# Save wide data in 3_Output folder
write.csv(data_wide, "../3_Output/ExcelReadyData.csv")
# Restructure physchem data from WQX into wide format and export as .csv
# setup ####
rm(list=ls()); # remove all objects from workspace
cat("\014") # clear console
Sys.setenv(TZ='Utc') ## R don't change Date/Times; Utc= don't switch data timezone you are in
library(dplyr)
library(tidyverse)
# Import Data ####
data <- read.csv("../1_Input/resultphyschem.csv")
# Clean data ---------------------------------------------------------
# Replace retired names
data$CharacteristicName[data$CharacteristicName ==
"Nutrient-nitrogen***retired***use TOTAL NITROGEN, MIXED FORMS with speciation AS N"] <-
"Total Nitrogen, mixed forms"
data$CharacteristicName[data$CharacteristicName ==
"Inorganic nitrogen (nitrate and nitrite) ***retired***use Nitrate + Nitrite"] <-
"Nitrate + Nitrite as N"
# Drop results that are "Not Reported"
data <- data %>%
filter(is.na(ResultDetectionConditionText) | ResultDetectionConditionText != "Not Reported")
# Drop results that are Quality Control Sample-Field Blank or Quality Control Sample-Field Replicate
data <- data[!(data$ActivityTypeCode == "Quality Control Sample-Field Blank"),]
data <- data[!(data$ActivityTypeCode == "Quality Control Sample-Field Replicate"),]
# change non-detect numeric values to zeros and set units to be same as detection limit units
for (i in 1:nrow(data)){
if (isTRUE(data[i, "ResultDetectionConditionText"] == "Not Detected") == TRUE){
data[i, "ResultMeasureValue"] <- 0
data[i, "ResultMeasure.MeasureUnitCode"] <- data[i, "DetectionQuantitationLimitMeasure.MeasureUnitCode"]
}
}
# Relevant columns only
data1 <- data[, c('MonitoringLocationName', 'ActivityStartDate',
'CharacteristicName', 'ResultMeasureValue',
'ResultMeasure.MeasureUnitCode', 'ActivityTypeCode')]
# Rename columns
colnames(data1) <- c('site', 'date', 'param', 'result', 'unit', 'type')
# Ensure date column is in datetime format
data1$date <- as.Date(data1$date)
# Ensure result column is in numeric format
data1$result <- as.numeric(data1$result)
# Convert all ug/L into mg/L before reshaping-----------------------------------------------------
for (i in 1:nrow(data1)){
# If unit is ug/L
if (isTRUE(data1[i, "unit"] == "ug/L")){
# Convert result from ug/L to mg/L
data1[i, "result"] <- data1[i, "result"] * 0.001
# Update row's unit to mg/L
data1[i, "unit"]  <- "mg/L"
}
}
# Reshape from long to wide format ----------------------------------------
# Separate numeric and non-numeric values
data_long <- data1 %>%
mutate(
numeric_value = suppressWarnings(as.numeric(result)),
is_numeric = !is.na(numeric_value)
)
# Reshape numeric values with median
numeric_wide <- data_long %>%
filter(is_numeric) %>%
group_by(site, date, param) %>%
summarise(result = median(numeric_value, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = param, values_from = result, names_glue = "{param}_result")
# Reshape unit values (keeping as character)
unit_wide <- data_long %>%
group_by(site, date, param) %>%
summarise(unit = ifelse(length(unique(unit)) == 1,
unique(unit),
paste(unique(unit), collapse = "; ")), .groups = "drop") %>%
pivot_wider(names_from = param, values_from = unit, names_glue = "{param}_unit")
# Combine the two wide formats
data_wide <- left_join(numeric_wide, unit_wide, by = c("site", "date"))
# Reorder columns
data_wide <- data_wide %>%
select(order(colnames(.))) %>%
relocate(site, date)
# Save as csv in output folder --------------------------------------------
# Save wide data in 3_Output folder
write.csv(data_wide, "../3_Output/ExcelReadyData.csv")
