library(tidyverse)
library(tbeptools)
library(extrafont)
library(sf)
library(lubridate)
library(patchwork)
library(mapview)
library(ggord)
library(vegan)
library(FactoMineR)
library(here)

loadfonts(device = 'win', quiet = T)

data(segmask)
data(bswqdat)
data(rswqdat)
data(kbrdat)
data(rsstatloc)

##
# fish kill data

# data from https://public.myfwc.com/fwri/FishKillReport/searchresults.aspx
# requested hillsborough, pinellas, manatee 1/1/95 to 10/5/21, attributed to red tide
fishdat <- read.csv(here('data-raw/FishKillResultReport.csv')) %>% 
  select(
    date = textBox6, 
    county = tEMPDataTextBox,
    city = cOUNTYDataTextBox, 
    waterbody = lOCATIONDataTextBox,
    species = textBox18
  ) %>% 
  mutate(
    date = mdy(date),
    yr = year(date),
    week = floor_date(date, unit = 'week'), 
    week = factor(format(week, '%b %d')), 
    week = factor(week, levels = as.character(unique(week))), 
    county = case_when(
      county %in% c('Pinellas ', 'Pinellas', 'pinellas') ~ 'Pinellas', 
      county %in% c('Hillsborough', 'Hillsborough ') ~ 'Hillsborough', 
      T ~ county
    ),
    city = gsub('\\s+$', '', city),
    city = gsub('^St\\s', 'St. ', city),
    city = case_when(
      city %in% c('St. pete Beach', 'St. Pete Beach', 'St. Petersburg Beach') ~ 'St. Petersburg Beach', 
      city %in% 'Tierra Ceia' ~ 'Terra Ceia', 
      city %in% 'dunedin' ~ 'Dunedin',
      T ~ city
    )
  )


# levels for week, starts on first of week from jan through july
weeklv <- seq.Date(from = as.Date('2021-01-01'), to = Sys.Date(), by = 'days') %>% 
  lubridate::floor_date(unit = 'week') %>% 
  unique %>% 
  tibble(
    dt = ., 
    yr = year(dt), 
    mo = month(dt), 
    lb = format(dt, '%b %d')
  ) %>%
  filter(yr > 2020) %>% 
  filter(mo <= 10) %>% 
  pull(lb)

##
# combine kbr and wq data

# combine tbeptools stations with pp station
stat1 <- st_as_sf(stations, coords = c('Longitude', 'Latitude'), crs = 4326) %>% 
  mutate(station = as.character(epchc_station)) %>% 
  select(station)  
stat2 <- rsstatloc %>% 
  select(station)
stat <- bind_rows(stat1, stat2) %>% 
  arrange(station) %>% 
  filter(!duplicated(station))

bsdat <- bswqdat %>% 
  select(station, date, var, uni, val)

wqdat <- rswqdat %>% 
  filter(source %in% c('epchc', 'fldep')) %>% 
  bind_rows(bsdat) %>% 
  select(station, date, var, uni, val) %>%
  filter(var %in% c('chla', 'tn', 'sal', 'temp')) %>% 
  mutate(
    uni = case_when(
      var == 'chla' ~ 'ugl',
      var == 'sal' ~ 'ppt', 
      var == 'temp' ~ 'c', 
      var == 'tn' ~ 'mgl'
    )
  ) %>% 
  inner_join(stat, ., by = 'station') %>% 
  .[tbseg, ] %>% 
  select(date, station, date, var, uni, val)

# combine all
# filter by mtb, ltb, 1990 to recent
# remove sal, temp outliers (visually verified)
wqkbrdat <- bind_rows(wqdat, kbrdat) %>% 
  filter(year(date) >= 1995) %>% 
  filter(!is.na(val)) %>% 
  .[tbseg[tbseg$bay_segment %in% c('LTB', 'MTB'), ], ] %>% 
  arrange(date, var) %>% 
  mutate(
    mo = month(date),
    yr = year(date)
  ) %>% 
  group_by(var, mo) %>% 
  mutate(
    minv = quantile(val, prob = 0.01, na.rm = T),
    maxv = quantile(val, prob = 0.99, na.rm = T)
    ) %>% 
  ungroup %>%
  mutate( 
    torm = case_when(
      var %in% c('sal', 'temp') & (val < minv | val > maxv) ~ 1, 
      T ~ 0
    ), 
    lbs = case_when(
      var == 'chla' ~ 'Chl-a (ug/L)', 
      var == 'tn' ~ 'TN (mg/L)', 
      var == 'sal' ~ 'Sal (ppt)', 
      var == 'temp' ~ 'Temp (c)',
      var == 'kb' ~ 'K. brevis (100k cells/L)'
    )
  ) %>% 
  filter(torm == 0) %>% 
  select(date, station, var, uni, lbs, val)

# sums
dtrng <- range(kbrdat$date)
cnt <- kbrdat %>% 
  filter(var == 'kb') %>% 
  nrow()
cmbdtrng <- range(wqkbrdat$date)
cmbcnt <- wqkbrdat %>% 
  filter(var == 'kb') %>% 
  nrow()

All HAB data were obtained from https://www.ncei.noaa.gov/maps/habsos/maps.htm. Cell count data include all observations in Tampa Bay from 1953-09-18 to 2021-10-04, including 22588 observations. For analysis, stations were clipped to include only lower and middle Tampa Bay from 1995-01-04 to 2021-10-04, including 7049 observations. Long-term water quality data included all observations available from routine monitoring programs for Hillsborough and Manatee counties. All cell counts are concentrations shown as 100k cell/L.

The record of K. brevis cell counts from 1990 to present in Middle Tampa Bay is below.

# MTB subset
toplo <- kbrdat %>%
  .[tbseg[tbseg$bay_segment == 'MTB', ], ] %>%
  filter(var == 'kb') %>% 
  mutate(
    dtgrp = quarter(date),
    yr = year(date)
  ) %>%
  st_set_geometry(NULL) %>%
  # filter(year(date) >= 1990) %>%
  mutate(
    yr = factor(yr, levels = seq(min(yr), max(yr)))
  ) %>%
  group_by(yr) %>%
  summarise(
    cnt = n(),
    minv = quantile(val, prob = 0.1, na.rm = T),
    medv = quantile(val, prob = 0.5, na.rm = T),
    maxv = quantile(val, prob = 0.9, na.rm = T),
    .groups = 'drop'
  ) %>%
  complete(yr)

# plot
p <- ggplot(toplo, aes(x = yr)) +
  geom_crossbar(aes(ymin = minv, y = medv, ymax = maxv), width = 0.75, fill = '#00806E', fatten = 1) +
  theme_minimal() +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, size = 7, hjust = 1),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_blank()
  ) +
  labs(
    y = 'Cells (100k / L)',
    title = expression(paste(italic('K. brevis'), ' concentrations in Middle Tampa Bay')),
    subtitle = paste('Bars are 10th/90th percentile with median, n =', sum(toplo$cnt, na.rm = T)),
    caption = 'Data from FWC/NOAA HABSOS; https://www.ncei.noaa.gov/maps/habsos/maps.htm\nPlot created by TBEP'
  )

p

Showing the same results for Middle Tampa Bay but as boxplots on log-scale.

# MTB subset
toplo <- kbrdat %>%
  .[tbseg[tbseg$bay_segment == 'MTB', ], ] %>%
  filter(var == 'kb') %>% 
  mutate(
    dtgrp = quarter(date),
    yr = year(date)
  ) %>%
  st_set_geometry(NULL) %>%
  # filter(year(date) >= 1990) %>%
  mutate(
    yr = factor(yr, levels = seq(min(yr), max(yr)))
  ) %>%
  group_by(yr) %>%
  summarise(
    cnt = n(),
    y0 = min(val, na.rm = T), 
    y10 = quantile(val, prob = 0.1, na.rm = T),
    y50 = quantile(val, prob = 0.5, na.rm = T),
    y90 = quantile(val, prob = 0.9, na.rm = T),
    y100 = max(val, na.rm = T),
    .groups = 'drop'
  ) %>%
  complete(yr)

# plot
p <- ggplot(toplo, aes(x = yr)) +
  geom_boxplot(
    aes(ymin = y0, lower = y10, middle = y50, upper = y90, ymax = y100),
    stat = "identity", width = 0.75, fill = '#00806E'
    ) +
  scale_y_log10(labels = function(x) as.numeric(format(x, scientific = FALSE))) +
  theme_minimal() +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, size = 7, hjust = 1),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_blank()
  ) +
  labs(
    y = 'Cells (100k / L)',
    title = expression(paste(italic('K. brevis'), ' concentrations in Middle Tampa Bay, log-scale')),
    subtitle = paste('Boxplot summaries show the entire range of values from min, 10th %tile, median, 90th %tile, and max, n =', sum(toplo$cnt, na.rm = T)),
    caption = 'Data from FWC/NOAA HABSOS; https://www.ncei.noaa.gov/maps/habsos/maps.htm\nPlot created by TBEP'
  )

p

Showing the same results for Middle Tampa Bay but as boxplots on log-scale for weeks in 2021.

# MTB subset
toplo <- kbrdat %>%
  .[tbseg[tbseg$bay_segment == 'MTB', ], ] %>%
  filter(var == 'kb') %>% 
  filter(year(date) >= 2021) %>%
  mutate(
    week = floor_date(date, unit = 'week'),
    week = factor(format(week, '%b %d')), 
    week = factor(week, levels = weeklv)
  ) %>%
  st_set_geometry(NULL) %>%
  group_by(week) %>%
  summarise(
    cnt = n(),
    y0 = min(val, na.rm = T), 
    y10 = quantile(val, prob = 0.1, na.rm = T),
    y50 = quantile(val, prob = 0.5, na.rm = T),
    y90 = quantile(val, prob = 0.9, na.rm = T),
    y100 = max(val, na.rm = T),
    .groups = 'drop'
  ) %>%
  complete(week)

# plot
p <- ggplot(toplo, aes(x = week)) +
  geom_boxplot(
    aes(ymin = y0, lower = y10, middle = y50, upper = y90, ymax = y100),
    stat = "identity", width = 0.75, fill = '#00806E'
  ) +
  scale_y_log10(labels = function(x) as.numeric(format(x, scientific = FALSE))) +
  theme_minimal() +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, size = 10, hjust = 1),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_blank()
  ) +
  labs(
    y = 'Cells (100k / L)',
    title = expression(paste(italic('K. brevis'), ' concentrations in Middle Tampa Bay, log-scale')),
    subtitle = paste('Boxplot summaries show the entire range of values from min, 10th %tile, median, 90th %tile, and max, n =', sum(toplo$cnt, na.rm = T)),
    caption = 'Data from FWC/NOAA HABSOS; https://www.ncei.noaa.gov/maps/habsos/maps.htm\nPlot created by TBEP'
  )

p

The record of K. brevis cell counts from 1990 to present in Lower Tampa Bay is below.

# LTB subset
toplo <- kbrdat %>%
  .[tbseg[tbseg$bay_segment == 'LTB', ], ] %>%
  filter(var == 'kb') %>% 
  mutate(
    dtgrp = quarter(date),
    yr = year(date)
  ) %>%
  st_set_geometry(NULL) %>%
  # filter(year(date) >= 1990) %>%
  mutate(
    yr = factor(yr, levels = seq(min(yr), max(yr)))
  ) %>%
  group_by(yr) %>%
  summarise(
    cnt = n(),
    minv = quantile(val, prob = 0.1, na.rm = T),
    medv = quantile(val, prob = 0.5, na.rm = T),
    maxv = quantile(val, prob = 0.9, na.rm = T),
    .groups = 'drop'
  ) %>%
  complete(yr)

# plot
p <- ggplot(toplo, aes(x = yr)) +
  geom_crossbar(aes(ymin = minv, y = medv, ymax = maxv), width = 0.75, fill = '#00806E', fatten = 1) +
  theme_minimal() +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, size = 7, hjust = 1),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_blank()
  ) +
  labs(
    y = 'Cells (100k / L)',
    title = expression(paste(italic('K. brevis'), ' concentrations in Lower Tampa Bay')),
    subtitle = paste('Bars are 10th/90th percentile with median, n =', sum(toplo$cnt, na.rm = T)),
    caption = 'Data from FWC/NOAA HABSOS; https://www.ncei.noaa.gov/maps/habsos/maps.htm\nPlot created by TBEP'
  )

p

Showing the same results for Lower Tampa Bay but as boxplots on log-scale.

# LTB subset
toplo <- kbrdat %>%
  .[tbseg[tbseg$bay_segment == 'LTB', ], ] %>%
  filter(var == 'kb') %>% 
  mutate(
    dtgrp = quarter(date),
    yr = year(date)
  ) %>%
  st_set_geometry(NULL) %>%
  # filter(year(date) >= 1990) %>%
  mutate(
    yr = factor(yr, levels = seq(min(yr), max(yr)))
  ) %>%
  group_by(yr) %>%
  summarise(
    cnt = n(),
    y0 = min(val, na.rm = T), 
    y10 = quantile(val, prob = 0.1, na.rm = T),
    y50 = quantile(val, prob = 0.5, na.rm = T),
    y90 = quantile(val, prob = 0.9, na.rm = T),
    y100 = max(val, na.rm = T),
    .groups = 'drop'
  ) %>%
  complete(yr)

# plot
p <- ggplot(toplo, aes(x = yr)) +
  geom_boxplot(
    aes(ymin = y0, lower = y10, middle = y50, upper = y90, ymax = y100),
    stat = "identity", width = 0.75, fill = '#00806E'
    ) +
  scale_y_log10(labels = function(x) as.numeric(format(x, scientific = FALSE))) +
  theme_minimal() +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, size = 7, hjust = 1),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_blank()
  ) +
  labs(
    y = 'Cells (100k / L)',
    title = expression(paste(italic('K. brevis'), ' concentrations in Lower Tampa Bay, log-scale')),
    subtitle = paste('Boxplot summaries show the entire range of values from min, 10th %tile, median, 90th %tile, and max, n =', sum(toplo$cnt, na.rm = T)),
    caption = 'Data from FWC/NOAA HABSOS; https://www.ncei.noaa.gov/maps/habsos/maps.htm\nPlot created by TBEP'
  )

p

Showing the same results for Lower Tampa Bay but as boxplots on log-scale for weeks in 2021.

# MTB subset
toplo <- kbrdat %>%
  .[tbseg[tbseg$bay_segment == 'LTB', ], ] %>%
  filter(var == 'kb') %>% 
  filter(year(date) >= 2021) %>%
  mutate(
    week = floor_date(date, unit = 'week'),
    week = factor(format(week, '%b %d')), 
    week = factor(week, levels = weeklv)
  ) %>%
  st_set_geometry(NULL) %>%
  group_by(week) %>%
  summarise(
    cnt = n(),
    y0 = min(val, na.rm = T), 
    y10 = quantile(val, prob = 0.1, na.rm = T),
    y50 = quantile(val, prob = 0.5, na.rm = T),
    y90 = quantile(val, prob = 0.9, na.rm = T),
    y100 = max(val, na.rm = T),
    .groups = 'drop'
  ) %>%
  complete(week)

# plot
p <- ggplot(toplo, aes(x = week)) +
  geom_boxplot(
    aes(ymin = y0, lower = y10, middle = y50, upper = y90, ymax = y100),
    stat = "identity", width = 0.75, fill = '#00806E'
  ) +
  scale_y_log10(labels = function(x) as.numeric(format(x, scientific = FALSE))) +
  theme_minimal() +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, size = 10, hjust = 1),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_blank()
  ) +
  labs(
    y = 'Cells (100k / L)',
    title = expression(paste(italic('K. brevis'), ' concentrations in Lower Tampa Bay, log-scale')),
    subtitle = paste('Boxplot summaries show the entire range of values from min, 10th %tile, median, 90th %tile, and max, n =', sum(toplo$cnt, na.rm = T)),
    caption = 'Data from FWC/NOAA HABSOS; https://www.ncei.noaa.gov/maps/habsos/maps.htm\nPlot created by TBEP'
  )

p

The FWC Fish Kill database was queried to obtain a record of all fish kill reports attributed to red tide. Below is a summary for St. Petersburg and Tampa.

toplo1 <- fishdat %>% 
  filter(city %in% c('Tampa', 'St. Petersburg')) %>% 
  mutate(
    yr = factor(yr, levels = seq(min(yr), max(yr)))
  ) %>%
  group_by(yr, city) %>% 
  summarise(
    cnt = n(), 
    .groups = 'drop'
  ) %>% 
  complete(yr)

p1 <- ggplot(toplo1, aes(x = yr, fill = city, y = cnt)) + 
  geom_bar(stat = 'identity', colour = 'darkgrey') + 
  labs(
    x = 'Year',
    y = 'No. of fish kill reports',
    # caption = 'Counts are from FWRI Fish Kill Database, attributed to Red Tide'
  ) +
  scale_y_continuous(expand = c(0, 0)) +
  scale_fill_brewer(palette = 'Pastel1') + 
  theme_minimal(base_size = 16, base_family = 'Lato') + 
  theme(
    axis.ticks.x = element_line(),
    # axis.title.x = element_blank(), 
    axis.text.x = element_text(size = 8, angle = 45, hjust = 1),
    legend.title = element_blank(), 
    legend.position = 'top', 
    panel.grid.minor.y = element_blank(), 
    panel.grid.minor.x = element_blank(), 
    panel.grid.major.x = element_blank(), 
    plot.caption = element_text(size = 10)
  )


toplo2 <- fishdat %>% 
  filter(city %in% c('Tampa', 'St. Petersburg')) %>% 
  filter(yr >= 2021) %>%  
  group_by(week, city) %>% 
  summarise(
    cnt = n(), 
    .groups = 'drop'
  ) %>% 
  mutate(
    week = factor(week, levels = weeklv)
  ) %>% 
  complete(week)

p2 <- ggplot(toplo2, aes(x = week, fill = city, y = cnt)) + 
  geom_bar(stat = 'identity', colour = 'darkgrey') + 
  labs(
    x = 'Week of',
    y = 'No. of fish kill reports',
    caption = 'Counts are from FWRI Fish Kill Database, attributed to Red Tide'
  ) +
  scale_y_continuous(expand = c(0, 0)) +
  scale_fill_brewer(palette = 'Pastel1') + 
  theme_minimal(base_size = 16, base_family = 'Lato') + 
  theme(
    axis.ticks.x = element_line(),
    # axis.title.x = element_blank(), 
    axis.text.x = element_text(size = 8, angle = 45, hjust = 1),
    legend.title = element_blank(), 
    legend.position = 'top', 
    panel.grid.minor.y = element_blank(), 
    panel.grid.minor.x = element_blank(), 
    panel.grid.major.x = element_blank(), 
    plot.caption = element_text(size = 10)
  )

p1 + p2 + plot_layout(ncol = 1, guides = 'collect') & theme(legend.position = 'top')

The map below shows water quality and K. brevis cell counts that were combined for analysis. Water quality data included total nitrogen, salinity, and temperature.

tomap <- wqkbrdat %>% 
  mutate(
    typ = case_when(
      var == 'kb' ~ 'K. brevis', 
      var != 'kb' ~ 'water quality'
    )
  ) %>% 
  select(typ) %>% 
  unique
mapview(tomap, zol = 'typ', layer.name = NA, homebutton = F)

Below shows the long-term trends for K. brevis and water quality in Middle and Lower Tampa Bay. The vertical lines indicate the range of values observed for each month and year. Some outliers for salinity and temperature that were clearly out of range were removed. Note that cell counts and total nitrogen are on log-scale.

toplo <- wqkbrdat %>% 
  st_set_geometry(NULL) %>% 
  mutate(
    date = floor_date(date, unit = 'month')
  ) %>% 
  group_by(var, lbs, date) %>% 
  summarise(
    minv = min(val, na.rm = T), 
    maxv = max(val, na.rm = T),
    .groups = 'drop'
  )

toplo1 <- toplo %>% 
  filter(var %in% c('kb', 'tn', 'chla')) %>% 
  mutate(
    lbs = factor(lbs, levels = c( "K. brevis (100k cells/L)", "Chl-a (ug/L)", "TN (mg/L)"))
  )
toplo2 <- toplo %>% 
  filter(var %in% c('sal', 'temp'))

pthm <- theme_bw() + 
  theme(
    axis.title = element_blank(),
    # axis.text.x = element_text(angle = 45, size = 10, hjust = 1),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_blank(), 
    strip.background = element_blank(), 
    strip.placement = 'outside'
  )

p1 <- ggplot(toplo1, aes(x = date)) + 
  facet_wrap(~lbs, ncol = 1, scales = 'free_y', strip.position = 'left') + 
  geom_errorbar(aes(ymin = minv, ymax = maxv), width = 0) +
  pthm + 
  theme(
    axis.text.x = element_blank(), 
    axis.ticks.x = element_blank()
  ) +
  scale_y_log10() + 
  labs(subtitle = 'Min/max observed values by month and year')

p2 <- ggplot(toplo2, aes(x = date)) + 
  facet_wrap(~lbs, ncol = 1, scales = 'free_y', strip.position = 'left') + 
  geom_errorbar(aes(ymin = minv, ymax = maxv), width = 0) +
  pthm +
  labs(caption = 'Data for middle and lower Tampa Bay')

p1 + p2 + plot_layout(ncol = 1, heights = c(0.6, 0.4))

Below is the same plot as above but only showing weekly observations from April 2021 to present. Data in this period have been collected more frequently than the historical record. Values are actual observations and not summarized as above.

toplo <- wqkbrdat %>% 
  st_set_geometry(NULL) %>%
  filter(month(date) >=4 & year(date) >= 2021) 

toplo1 <- toplo %>% 
  filter(var %in% c('kb', 'tn', 'chla')) %>% 
  mutate(
    lbs = factor(lbs, levels = c( "K. brevis (100k cells/L)", "Chl-a (ug/L)", "TN (mg/L)"))
  )
toplo2 <- toplo %>% 
  filter(var %in% c('sal', 'temp'))

pthm <- theme_bw() + 
  theme(
    axis.title = element_blank(),
    # axis.text.x = element_text(angle = 45, size = 10, hjust = 1),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_blank(), 
    strip.background = element_blank(), 
    strip.placement = 'outside'
  )

p1 <- ggplot(toplo1, aes(x = date, group = date)) + 
  facet_wrap(~lbs, ncol = 1, scales = 'free_y', strip.position = 'left') + 
  geom_point(aes(y = val)) +
  pthm + 
  theme(
    axis.text.x = element_blank(), 
    axis.ticks.x = element_blank()
  ) +
  scale_y_log10() + 
  labs(subtitle = 'Actual observed values by date')

p2 <- ggplot(toplo2, aes(x = date)) + 
  facet_wrap(~lbs, ncol = 1, scales = 'free_y', strip.position = 'left') + 
  geom_point(aes(y = val)) + 
  pthm +
  labs(caption = 'Data for middle and lower Tampa Bay')

p1 + p2 + plot_layout(ncol = 1, heights = c(0.6, 0.4))

Because water quality variables are highly seasonal, the monthly long-term averages for chlorophyll, total nitrogen, salinity, and temperature were subtracted from each observation. This creates a more robust signal of actual trends by removing normal seasonal variation. Note that the long-term monthly median for K. brevis is zero.

tomod <- wqkbrdat %>%
  st_set_geometry(NULL) %>% 
  mutate(
    mo = month(date),
    date = floor_date(date, unit = 'month')
  ) %>% 
  group_by(var, mo) %>% 
  mutate(
    seasval = median(val, na.rm = T), 
    val = case_when(
      val != 'kb' ~ val - seasval, 
      T ~ val
    )
  )
  
toplo <- tomod %>% 
  mutate(
    date = floor_date(date, unit = 'month')
  ) %>% 
  group_by(var, lbs, date) %>% 
  summarise(
    minv = min(val, na.rm = T), 
    maxv = max(val, na.rm = T),
    .groups = 'drop'
  )

toplo1 <- toplo %>% 
  filter(var %in% c('kb'))
toplo2 <- toplo %>% 
  filter(var %in% c('sal', 'temp', 'tn', 'chla')) %>% 
  mutate(
    lbs = factor(lbs, levels = c('Chl-a (ug/L)', 'TN (mg/L)', 'Sal (ppt)', 'Temp (c)'))
  )

pthm <- theme_bw() + 
  theme(
    axis.title = element_blank(),
    # axis.text.x = element_text(angle = 45, size = 10, hjust = 1),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_blank(), 
    strip.background = element_blank(), 
    strip.placement = 'outside'
  )

p1 <- ggplot(toplo1, aes(x = date)) + 
  facet_wrap(~lbs, ncol = 1, scales = 'free_y', strip.position = 'left') + 
  geom_errorbar(aes(ymin = minv, ymax = maxv), width = 0) +
  pthm + 
  theme(
    axis.text.x = element_blank(), 
    axis.ticks.x = element_blank()
  ) +
  scale_y_log10() + 
  labs(subtitle = 'Min/max observed values by month and year')

p2 <- ggplot(toplo2, aes(x = date)) + 
  facet_wrap(~lbs, ncol = 1, scales = 'free_y', strip.position = 'left') + 
  geom_errorbar(aes(ymin = minv, ymax = maxv), width = 0) +
  pthm +
  labs(caption = 'Data for middle and lower Tampa Bay')

p1 + p2 + plot_layout(ncol = 1, heights = c(0.2, 0.8))

A multivariate analysis was then applied to the data to compare all variables in space and time. The median across all observations by month if before April 2021 and by week if after April 2021 were estimated to group all variables by a common date. The water quality data were based on the observed values minus the long-term monthly median to remove the seasonal effect. The cell concentrations were log + 1 transformed. Finally, the data were standardized to zero mean and unit variance. A principal components analysis was then applied.

The following shows the first three principal components from the analysis applied to all data.

dat <- wqkbrdat %>%
  # .[tbseg[tbseg$bay_segment %in% 'MTB', ], ] %>%
  st_set_geometry(NULL) %>% 
  mutate(
    mo = month(date),
    date = case_when(
      year(date) >= 2021 & month(date) >= 4 ~ floor_date(date, unit = 'week'),
      T ~ floor_date(date, unit = 'month')
    )
  ) %>%
  group_by(var, mo) %>%
  mutate(
    seasval = median(val, na.rm = T),
    val = case_when(
      !var %in% c('kb') ~ val - seasval,
      T ~ val
    )
  ) %>%
  ungroup() %>% 
  # filter(mo %in% c(3:7)) %>%
  group_by(date, var) %>% 
  summarise(
    val = median(val, na.rm = T), 
    .groups = 'drop'
    ) %>%
  spread(var, val) %>% 
  na.omit %>% 
  mutate(
    kb = log10(1 + kb),
    grp = case_when(
      year(date) >= 2021 & month(date) >= 4 ~ 'PP', 
      T ~ 'Baseline'
    )
  )

grps <- dat %>% 
  pull(grp)

tomod <- dat %>% 
  select(-grp) %>% 
  column_to_rownames('date') %>% 
  decostand(method = 'standardize')

ppp <- PCA(tomod, scale.unit = T, graph = F)
p1 <- ggord(ppp, grp_in = grps, vec_ext = 5, alpha = 0.8, size = 2, txt = 6, arrow = 0.4, repel = T, coord_fix = F, ellipse = F) +
  theme(legend.position = 'none')
p2 <- ggord(ppp, grp_in = grps, axes = c('2', '3'), vec_ext = 5, alpha = 0.8, size = 2, txt = 6, arrow = 0.4, repel = T, coord_fix = F, ellipse = F) 

p1 + p2 + plot_layout(ncol = 2, width = c(0.5, 0.5))

The same analysis was applied to only weeks including and after April 2021.

tomod <- dat %>% 
  filter(grp == 'PP') %>% 
  select(-grp) %>% 
  column_to_rownames('date') %>% 
  decostand(method = 'standardize')

ppp <- PCA(tomod, scale.unit = T, graph = F)
p1 <- ggord(ppp, vec_ext = 3, alpha = 0.8, size = 2, txt = 6, arrow = 0.4, repel = T, coord_fix = F, ellipse = F) +
  theme(legend.position = 'none')
p2 <- ggord(ppp, axes = c('2', '3'), vec_ext = 3, alpha = 0.8, size = 2, txt = 6, arrow = 0.4, repel = T, coord_fix = F, ellipse = F) 

p1 + p2 + plot_layout(ncol = 2, width = c(0.5, 0.5))

The same analysis was applied to only weeks including and after April 2021 and only for middle Tampa Bay.

dat <- wqkbrdat %>%
  .[tbseg[tbseg$bay_segment %in% 'MTB', ], ] %>%
  st_set_geometry(NULL) %>% 
  mutate(
    mo = month(date),
    date = case_when(
      year(date) >= 2021 & month(date) >= 4 ~ floor_date(date, unit = 'week'),
      T ~ floor_date(date, unit = 'month')
    )
  ) %>%
  group_by(var, mo) %>%
  mutate(
    seasval = median(val, na.rm = T),
    val = case_when(
      !var %in% c('kb') ~ val - seasval,
      T ~ val
    )
  ) %>%
  ungroup() %>% 
  # filter(mo %in% c(3:7)) %>%
  group_by(date, var) %>% 
  summarise(
    val = median(val, na.rm = T), 
    .groups = 'drop'
    ) %>%
  spread(var, val) %>% 
  na.omit %>% 
  mutate(
    kb = log10(1 + kb),
    grp = case_when(
      year(date) >= 2021 & month(date) >= 4 ~ 'PP', 
      T ~ 'Baseline'
    )
  ) %>% 
  filter(grp == 'PP')

grps <- dat %>% 
  pull(grp)

tomod <- dat %>% 
  select(-grp) %>% 
  column_to_rownames('date') %>% 
  decostand(method = 'standardize')

ppp <- PCA(tomod, scale.unit = T, graph = F)
p1 <- ggord(ppp, vec_ext = 3, alpha = 0.8, size = 2, txt = 6, arrow = 0.4, repel = T, coord_fix = F, ellipse = F) +
  theme(legend.position = 'none')
p2 <- ggord(ppp, axes = c('2', '3'), vec_ext = 3, alpha = 0.8, size = 2, txt = 6, arrow = 0.4, repel = T, coord_fix = F, ellipse = F) 

p1 + p2 + plot_layout(ncol = 2, width = c(0.5, 0.5))