# Restructure physchem data from WQX into wide format and export as .csv 

# setup ####
rm(list=ls()); # remove all objects from workspace
cat("\014") # clear console
Sys.setenv(TZ='Utc') ## R don't change Date/Times; Utc= don't switch data timezone you are in

library(dplyr)
library(tidyverse)

# Import Data ####
data <- read.csv("../1_Input/resultphyschem.csv")


# Clean data ---------------------------------------------------------


# Replace retired names
data$CharacteristicName[data$CharacteristicName == 
                          "Nutrient-nitrogen***retired***use TOTAL NITROGEN, MIXED FORMS with speciation AS N"] <- 
  "Total Nitrogen, mixed forms"

data$CharacteristicName[data$CharacteristicName == 
                          "Inorganic nitrogen (nitrate and nitrite) ***retired***use Nitrate + Nitrite"] <-
  "Nitrate + Nitrite as N"

# Drop results that are "Not Reported" 
data <- data %>%
  filter(is.na(ResultDetectionConditionText) | ResultDetectionConditionText != "Not Reported")


# Drop results that are Quality Control Sample-Field Blank or Quality Control Sample-Field Replicate
data <- data[!(data$ActivityTypeCode == "Quality Control Sample-Field Blank"),]
data <- data[!(data$ActivityTypeCode == "Quality Control Sample-Field Replicate"),]


# change non-detect numeric values to zeros and set units to be same as detection limit units
for (i in 1:nrow(data)){
  if (isTRUE(data[i, "ResultDetectionConditionText"] == "Not Detected") == TRUE){
    data[i, "ResultMeasureValue"] <- 0
    data[i, "ResultMeasure.MeasureUnitCode"] <- data[i, "DetectionQuantitationLimitMeasure.MeasureUnitCode"]
  }
}

# Relevant columns only
data1 <- data[, c('MonitoringLocationName', 'ActivityStartDate',
                  'CharacteristicName', 'ResultMeasureValue', 
                  'ResultMeasure.MeasureUnitCode', 'ActivityTypeCode')]
# Rename columns
colnames(data1) <- c('site', 'date', 'param', 'result', 'unit', 'type')

# Ensure date column is in datetime format
data1$date <- as.Date(data1$date)

# Ensure result column is in numeric format
data1$result <- as.numeric(data1$result)


# Convert all ug/L into mg/L before reshaping-----------------------------------------------------

for (i in 1:nrow(data1)){
  # If unit is ug/L
  if (isTRUE(data1[i, "unit"] == "ug/L")){
    # Convert result from ug/L to mg/L
    data1[i, "result"] <- data1[i, "result"] * 0.001
    # Update row's unit to mg/L
    data1[i, "unit"]  <- "mg/L"
  }
}


# Reshape from long to wide format ----------------------------------------


# Separate numeric and non-numeric values
data_long <- data1 %>%
  mutate(
    numeric_value = suppressWarnings(as.numeric(result)),
    is_numeric = !is.na(numeric_value)
  )

# Reshape numeric values with median
numeric_wide <- data_long %>%
  filter(is_numeric) %>%
  group_by(site, date, param) %>%
  summarise(result = median(numeric_value, na.rm = TRUE), .groups = "drop") %>%
  pivot_wider(names_from = param, values_from = result, names_glue = "{param}_result")


# Reshape unit values (keeping as character)
unit_wide <- data_long %>%
  group_by(site, date, param) %>%
  summarise(unit = ifelse(length(unique(unit)) == 1, 
                          unique(unit), 
                          paste(unique(unit), collapse = "; ")), .groups = "drop") %>%
  pivot_wider(names_from = param, values_from = unit, names_glue = "{param}_unit")


# Combine the two wide formats
data_wide <- left_join(numeric_wide, unit_wide, by = c("site", "date"))

# Reorder columns
data_wide <- data_wide %>%
  select(order(colnames(.))) %>%
  relocate(site, date) 


# Save as csv in output folder --------------------------------------------

# Save wide data in 3_Output folder
write.csv(data_wide, "../3_Output/ExcelReadyData.csv")
