This document provides a short attempt at detiding a chlorophyll time series using a genereralized additive model. All source materials are here.
library(tidyverse)
library(patchwork)
library(lubridate)
library(here)
library(mgcv)
library(knitr)
library(reactable)
library(mgcViz)
opts_chunk$set(warning = FALSE, message = FALSE, echo = T)
load(file = here('data/datprc.RData'))
load(file = here('data/tiddat.RData'))
tab_fun <- function(datin, dig = 2, rows = 5){
out <- reactable(datin,
defaultColDef = colDef(
footerStyle = list(fontWeight = "bold"),
format = colFormat(digits = dig, separators = F),
resizable = TRUE
),
filterable = T,
defaultPageSize = rows
)
return(out)
}
Chlorophyll data from station 30 in South Bay are combined with a vector of tidal mixing velocity as a rough measure of spring/neap tidal effects. The tidal mixing velocity is from an ADCP deployed at Dumbarton Bridge and was estimated as the squared daily maximum of velocities. Higher values suggest greater tidal mixing.
# chl data station 30
chldat <- datprc %>%
filter(station == 30) %>%
filter(param == 'chl')
# subset tidal data by date range in chldat
tiddat <- tiddat %>%
filter(date >= min(chldat$date) & date <= max(chldat$date))
# combine chl and tidal data
alldat <- chldat %>%
left_join(tiddat, by = 'date')
tab_fun(alldat)
A GAM was created following similar methods in Beck et al. 2022.
kyr <- length(unique(alldat$yr)) * 12
mod <- gam(log10(value) ~ s(cont_year, k = kyr) + s(tidmix, k = 20),
data = alldat)
The detided results are estimated using a prediction matrix with continuous year and the range of tidal values observed applied to each time step. The predicted values are then averaged across the tidal predictions for each time step.
# get tide-normalized data
prdnrm <- range(alldat$tidmix, na.rm = T) %>%
{seq(.[1], .[2], length.out = 10)} %>%
crossing(
tidmix = .,
cont_year = alldat$cont_year
)
tab_fun(prdnrm)
prdnrm <- prdnrm %>%
mutate(
prd = predict(mod, .)
) %>%
group_by(cont_year) %>%
summarise(
value_dtd = mean(prd, na.rm = T),
.groups = 'drop'
)
tab_fun(prdnrm)
The detided results are combined with the original data.
# combine tide-normalizd data with all data
toplo <- alldat %>%
full_join(prdnrm, by = 'cont_year') %>%
mutate(
value_fit = 10^predict(mod, newdata = .),
value_dtd = 10^value_dtd
) %>%
select(
date,
value,
value_fit,
value_dtd
)
ggplot(toplo, aes(x = date)) +
geom_point(aes(y = value)) +
geom_line(aes(y = value_fit, color = 'Fit'), size = 1) +
geom_line(aes(y = value_dtd, color = 'Detided'), size = 1) +
scale_colour_manual(values= c('blue', 'red')) +
scale_y_log10() +
theme_minimal() +
labs(
color = NULL,
x = NULL,
y = 'Chl-a (ug/L)'
)
Detiding didn’t appear to do much in this example. Looking at a scatterplot of the tidal mixing velocity vs chlorophyll shows there isn’t much of a relationship.
ggplot(alldat, aes(x = tidmix, y = value)) +
geom_point(aes(y = value)) +
geom_smooth() +
scale_y_log10() +
theme_minimal() +
labs(
color = NULL,
x = bquote('Tidal mixing '~ (m/s)^2),
y = 'Chl-a (ug/L)'
)
The predicted spline for tidal mixing is also flat.
plot(getViz(mod), select = 2)
Finally, here’s a breakdown of the distribution of chlorophyll for different ranges of the tidal mixing values, where the ranges are defined by quartiles.
toplo <- alldat %>%
mutate(
tidqrt = cut(tidmix,
breaks = quantile(tidmix, c(0, 0.25, 0.5, 0.75, 1), na.rm = T),
labels = c('1Q', '2Q', '3Q', '4Q')
),
yrbrk = cut(yr,
breaks = c(-Inf, 1980, 1990, 2000, 2010, Inf),
labels = c('pre-1980', '1980-1990', '1990-2000', '2000-2010', 'post-2010')
),
seasbrk = factor(quarter(date),
levels = c('1', '2', '3', '4'),
labels = c('JFM', 'AMJ', 'JAS', 'OND')
)
) %>%
filter(!is.na(tidqrt))
p1 <- ggplot(toplo, aes(x = tidqrt, y = value)) +
geom_boxplot(color = 'tomato1', alpha = 0.2, size = 1, outlier.shape = NA, fill = 'tomato1') +
geom_jitter(size = 0.5) +
scale_y_log10() +
theme_minimal() +
labs(
x = NULL,
y = NULL,
title = 'Chlorophyll values by tidal mixing quartiles',
subtitle = '(a) Distribution for all data'
)
p2 <- ggplot(toplo, aes(x = tidqrt, y = value)) +
geom_boxplot(color = 'tomato1', alpha = 0.2, size = 1, outlier.shape = NA, fill = 'tomato1') +
facet_wrap(~yrbrk, ncol = 5) +
geom_jitter(size = 0.5) +
scale_y_log10() +
theme_minimal() +
labs(
x = NULL,
y = 'Chl-a (ug/L)',
subtitle = '(b) Distribution by annual periods'
)
p3 <- ggplot(toplo, aes(x = tidqrt, y = value)) +
geom_boxplot(color = 'tomato1', alpha = 0.2, size = 1, outlier.shape = NA, fill = 'tomato1') +
facet_wrap(~seasbrk, ncol = 4) +
geom_jitter(size = 0.5) +
scale_y_log10() +
theme_minimal() +
labs(
x = 'Tidal mixing quartile',
y = NULL,
subtitle = '(c) Distribution by season'
)
p1 + p2 + p3 + plot_layout(ncol = 1)