params <-
  list(EVAL = TRUE)

## ----echo = FALSE----------------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  message = FALSE,
  warning = FALSE,
  eval = if (isTRUE(exists("params"))) params$EVAL else FALSE
)


## ----setup, include=FALSE--------------------------------------------------------
knitr::opts_chunk$set(
  echo = TRUE,
  dpi = 100,
  fig.asp = 0.8,
  fig.width = 6,
  out.width = "60%",
  fig.align = "center"
)
library(mvgam)
library(ggplot2)
theme_set(theme_bw(base_size = 12, base_family = "serif"))


## --------------------------------------------------------------------------------
load(url("https://github.com/atsa-es/MARSS/raw/master/data/lakeWAplankton.rda"))


## --------------------------------------------------------------------------------
outcomes <- c("Greens", "Bluegreens", "Diatoms", "Unicells", "Other.algae")


## --------------------------------------------------------------------------------
# loop across each plankton group to create the long datframe
plankton_data <- do.call(
  rbind,
  lapply(outcomes, function(x) {
    # create a group-specific dataframe with counts labelled 'y'
    # and the group name in the 'series' variable
    data.frame(
      year = lakeWAplanktonTrans[, "Year"],
      month = lakeWAplanktonTrans[, "Month"],
      y = lakeWAplanktonTrans[, x],
      series = x,
      temp = lakeWAplanktonTrans[, "Temp"]
    )
  })
) %>%
  # change the 'series' label to a factor
  dplyr::mutate(series = factor(series)) %>%
  # filter to only include some years in the data
  dplyr::filter(year >= 1965 & year < 1975) %>%
  dplyr::arrange(year, month) %>%
  dplyr::group_by(series) %>%
  # z-score the counts so they are approximately standard normal
  dplyr::mutate(y = as.vector(scale(y))) %>%
  # add the time indicator
  dplyr::mutate(time = dplyr::row_number()) %>%
  dplyr::ungroup()


## --------------------------------------------------------------------------------
head(plankton_data)


## --------------------------------------------------------------------------------
dplyr::glimpse(plankton_data)


## --------------------------------------------------------------------------------
plot_mvgam_series(data = plankton_data, series = "all")


## --------------------------------------------------------------------------------
plankton_data %>%
  dplyr::filter(series == "Other.algae") %>%
  ggplot(aes(x = time, y = temp)) +
  geom_line(size = 1.1) +
  geom_line(aes(y = y), col = "white", size = 1.3) +
  geom_line(aes(y = y), col = "darkred", size = 1.1) +
  ylab("z-score") +
  xlab("Time") +
  ggtitle("Temperature (black) vs Other algae (red)")


## --------------------------------------------------------------------------------
plankton_data %>%
  dplyr::filter(series == "Diatoms") %>%
  ggplot(aes(x = time, y = temp)) +
  geom_line(size = 1.1) +
  geom_line(aes(y = y), col = "white", size = 1.3) +
  geom_line(aes(y = y), col = "darkred", size = 1.1) +
  ylab("z-score") +
  xlab("Time") +
  ggtitle("Temperature (black) vs Diatoms (red)")


## --------------------------------------------------------------------------------
plankton_train <- plankton_data %>%
  dplyr::filter(time <= 112)
plankton_test <- plankton_data %>%
  dplyr::filter(time > 112)


## ----notrend_mod, include = FALSE, results='hide'--------------------------------
notrend_mod <- mvgam(
  y ~
    te(temp, month, k = c(4, 4)) +
    te(temp, month, k = c(4, 4), by = series) -
    1,
  family = gaussian(),
  data = plankton_train,
  newdata = plankton_test,
  trend_model = "None"
)


## ----eval=FALSE------------------------------------------------------------------
# notrend_mod <- mvgam(
#   y ~
#     # tensor of temp and month to capture
#     # "global" seasonality
#     te(temp, month, k = c(4, 4)) +
#
#     # series-specific deviation tensor products
#     te(temp, month, k = c(4, 4), by = series) - 1,
#   family = gaussian(),
#   data = plankton_train,
#   newdata = plankton_test,
#   trend_model = "None"
# )

## --------------------------------------------------------------------------------
plot_mvgam_smooth(notrend_mod, smooth = 1)


## --------------------------------------------------------------------------------
plot_mvgam_smooth(notrend_mod, smooth = 2)


## --------------------------------------------------------------------------------
plot_mvgam_smooth(notrend_mod, smooth = 3)


## --------------------------------------------------------------------------------
plot(notrend_mod, type = "forecast", series = 1)


## --------------------------------------------------------------------------------
plot(notrend_mod, type = "forecast", series = 2)


## --------------------------------------------------------------------------------
plot(notrend_mod, type = "forecast", series = 3)


## --------------------------------------------------------------------------------
plot(notrend_mod, type = "residuals", series = 1)


## --------------------------------------------------------------------------------
plot(notrend_mod, type = "residuals", series = 3)


## --------------------------------------------------------------------------------
priors <- get_mvgam_priors(
  # observation formula, which has no terms in it
  y ~ -1,

  # process model formula, which includes the smooth functions
  trend_formula = ~ te(temp, month, k = c(4, 4)) +
    te(temp, month, k = c(4, 4), by = trend) -
    1,

  # VAR1 model with uncorrelated process errors
  trend_model = VAR(),
  family = gaussian(),
  data = plankton_train
)


## --------------------------------------------------------------------------------
priors[, 3]


## --------------------------------------------------------------------------------
priors[, 4]


## --------------------------------------------------------------------------------
priors <- c(
  prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2),
  prior(normal(0.5, 0.25), class = sigma)
)


## ----var_mod, include = FALSE, results='hide'------------------------------------
var_mod <- mvgam(
  y ~ -1,
  trend_formula = ~
    # tensor of temp and month should capture
    # seasonality
    te(temp, month, k = c(4, 4)) +
      # need to use 'trend' rather than series
      # here
      te(temp, month, k = c(4, 4), by = trend) -
      1
  ,
  family = gaussian(),
  data = plankton_train,
  newdata = plankton_test,
  trend_model = VAR(),
  priors = priors,
  adapt_delta = 0.99,
  burnin = 1000
)


## ----eval=FALSE------------------------------------------------------------------
# var_mod <- mvgam(
#   # observation formula, which is empty
#   forumla = y ~ -1,
#
#   # process model formula, which includes the smooth functions
#   trend_formula = ~ te(temp, month, k = c(4, 4)) +
#     te(temp, month, k = c(4, 4), by = trend) - 1,
#
#   # VAR1 model with uncorrelated process errors
#   trend_model = VAR(),
#   family = gaussian(),
#   data = plankton_train,
#   newdata = plankton_test,
#
#   # include the updated priors
#   priors = priors,
#   silent = 2
# )

## --------------------------------------------------------------------------------
summary(var_mod, include_betas = FALSE)


## --------------------------------------------------------------------------------
plot(var_mod, "smooths", trend_effects = TRUE)


## ----warning=FALSE, message=FALSE------------------------------------------------
mcmc_plot(
  var_mod,
  variable = 'A',
  regex = TRUE,
  type = 'hist',
  facet_args = list(dir = 'v')
)


## ----warning=FALSE, message=FALSE------------------------------------------------
mcmc_plot(
  var_mod,
  variable = 'Sigma',
  regex = TRUE,
  type = 'hist',
  facet_args = list(dir = 'v')
)


## ----warning=FALSE, message=FALSE------------------------------------------------
mcmc_plot(var_mod, variable = "sigma_obs", regex = TRUE, type = "hist")


## --------------------------------------------------------------------------------
priors <- c(
  prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2),
  prior(normal(0.5, 0.25), class = sigma)
)


## ----varcor_mod, include = FALSE, results='hide'---------------------------------
varcor_mod <- mvgam(
  y ~ -1,
  trend_formula = ~
    # tensor of temp and month should capture
    # seasonality
    te(temp, month, k = c(4, 4)) +
      # need to use 'trend' rather than series
      # here
      te(temp, month, k = c(4, 4), by = trend) -
      1
  ,
  family = gaussian(),
  data = plankton_train,
  newdata = plankton_test,
  trend_model = VAR(cor = TRUE),
  burnin = 1000,
  adapt_delta = 0.99,
  priors = priors
)


## ----eval=FALSE------------------------------------------------------------------
# varcor_mod <- mvgam(
#   # observation formula, which remains empty
#   formula = y ~ -1,
#
#   # process model formula, which includes the smooth functions
#   trend_formula = ~ te(temp, month, k = c(4, 4)) +
#     te(temp, month, k = c(4, 4), by = trend) - 1,
#
#   # VAR1 model with correlated process errors
#   trend_model = VAR(cor = TRUE),
#   family = gaussian(),
#   data = plankton_train,
#   newdata = plankton_test,
#
#   # include the updated priors
#   priors = priors,
#   silent = 2
# )

## ----warning=FALSE, message=FALSE------------------------------------------------
mcmc_plot(
  varcor_mod,
  variable = 'Sigma',
  regex = TRUE,
  type = 'hist',
  facet_args = list(dir = 'v')
)


## --------------------------------------------------------------------------------
Sigma_post <- as.matrix(
  varcor_mod,
  variable = "Sigma",
  regex = TRUE
)
median_correlations <- cov2cor(
  matrix(apply(Sigma_post, 2, median), nrow = 5, ncol = 5)
)
rownames(median_correlations) <-
  colnames(median_correlations) <-
    levels(plankton_train$series)

round(median_correlations, 2)


## --------------------------------------------------------------------------------
irfs <- irf(varcor_mod, h = 12)


## --------------------------------------------------------------------------------
summary(irfs)


## --------------------------------------------------------------------------------
plot(irfs, series = 3)


## --------------------------------------------------------------------------------
fevds <- fevd(varcor_mod, h = 12)
plot(fevds)


## --------------------------------------------------------------------------------
# create forecast objects for each model
fcvar <- forecast(var_mod)
fcvarcor <- forecast(varcor_mod)

# plot the difference in variogram scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better
diff_scores <- score(fcvarcor, score = "variogram")$all_series$score -
  score(fcvar, score = "variogram")$all_series$score
plot(
  diff_scores,
  pch = 16,
  cex = 1.25,
  col = "darkred",
  ylim = c(
    -1 * max(abs(diff_scores), na.rm = TRUE),
    max(abs(diff_scores), na.rm = TRUE)
  ),
  bty = "l",
  xlab = "Forecast horizon",
  ylab = expression(variogram[VAR1cor] ~ -~ variogram[VAR1])
)
abline(h = 0, lty = "dashed")


## --------------------------------------------------------------------------------
# plot the difference in energy scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better
diff_scores <- score(fcvarcor, score = "energy")$all_series$score -
  score(fcvar, score = "energy")$all_series$score
plot(
  diff_scores,
  pch = 16,
  cex = 1.25,
  col = "darkred",
  ylim = c(
    -1 * max(abs(diff_scores), na.rm = TRUE),
    max(abs(diff_scores), na.rm = TRUE)
  ),
  bty = "l",
  xlab = "Forecast horizon",
  ylab = expression(energy[VAR1cor] ~ -~ energy[VAR1])
)
abline(h = 0, lty = "dashed")


## --------------------------------------------------------------------------------
description <- how_to_cite(varcor_mod)


## ----eval = FALSE----------------------------------------------------------------
# description

## ----echo=FALSE------------------------------------------------------------------
cat("Methods text skeleton\n")
cat(insight::format_message(description$methods_text))


## ----echo=FALSE------------------------------------------------------------------
cat("\nPrimary references\n")
for (i in seq_along(description$citations)) {
  cat(insight::format_message(description$citations[[i]]))
  cat('\n')
}
cat("\nOther useful references\n")
for (i in seq_along(description$other_citations)) {
  cat(insight::format_message(description$other_citations[[i]]))
  cat('\n')
}