## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7, 
  fig.height = 5,
  dpi = 300,
  warning = FALSE,
  message = FALSE
)

## ----setup--------------------------------------------------------------------
library(mitey)
library(dplyr)
library(tidyr)
library(ggplot2)
library(purrr)
library(fdrtool)
library(cowplot) 
library(here)
library(outbreaks)

## ----simulate-si-data---------------------------------------------------------
set.seed(1234)

# Parameters for simulation
N <- 500           # Number of observations
true_mean <- 15    # True mean serial interval (days)
true_sd <- 3       # True standard deviation (days)
route_weights <- c(0.2, 0.5, 0.2, 0.1)  # Weights for transmission routes

# Generate data for different transmission routes
CP <- rhalfnorm((route_weights[1]*N), theta=sqrt(pi/2)/(sqrt(2)*true_sd))  # Co-Primary
PS <- rnorm(route_weights[2]*N, mean=true_mean, sd=true_sd)                # Primary-Secondary
PT <- rnorm(route_weights[3]*N, mean=2*true_mean, sd=sqrt(2)*true_sd)      # Primary-Tertiary
PQ <- rnorm(route_weights[4]*N, mean=3*true_mean, sd=sqrt(3)*true_sd)      # Primary-Quaternary

# Combine and round to days
sim_icc_intervals <- round(c(CP, PS, PT, PQ))

# Visualize the simulated data
hist(sim_icc_intervals, 
     breaks = seq(min(sim_icc_intervals)-0.5, max(sim_icc_intervals)+0.5, by=1),
     main = "Simulated ICC Intervals", 
     xlab = "Days since index case onset",
     col = "lightblue")

## ----si-estimate-sim----------------------------------------------------------
# Estimate serial interval assuming Normal distribution
si_results <- si_estim(sim_icc_intervals, dist = "normal")
si_results

## ----si_estimate-sim-res, echo=FALSE------------------------------------------
# Display results
cat("True parameters:\n")
cat("Mean:", true_mean, "days\n")
cat("SD:", true_sd, "days\n\n")

cat("Estimated parameters:\n")
cat("Mean:", round(si_results$mean[1], 2), "days\n")
cat("SD:", round(si_results$sd[1], 2), "days\n")

## ----plot-sim-fit, fig.height=5, fig.width=7----------------------------------
# Extract weights
weights <- c(si_results$wts[1], 
            si_results$wts[2] + si_results$wts[3],
            si_results$wts[4] + si_results$wts[5], 
            si_results$wts[6] + si_results$wts[7])

# Plot the fitted distribution
plot_si_fit(
  dat = sim_icc_intervals,
  mean = si_results$mean[1],
  sd = si_results$sd[1],
  weights = weights,
  dist = "normal"
) +
  ggtitle("Fitted Serial Interval Distribution (Simulated Data)") +
  theme(plot.title = element_text(hjust = 0.5))

## ----load-real-si-data--------------------------------------------------------
# Load scabies ICC interval data
file_path <-  here("vignettes", "data", "si_data.rds")
scabies_si_data <- readRDS(file_path)

## ----si-estimate-real---------------------------------------------------------
# Estimate serial interval for each study
result_by_study <- scabies_si_data %>%
  group_by(study) %>%
  summarise(result = list(si_estim(icc_interval))) %>%
  mutate(
    mean = map_dbl(result, "mean"),
    sd = map_dbl(result, "sd"),
    wts = map(result, "wts")
  ) %>%
  select(-result)

# Display results
result_by_study %>%
  select(study, mean, sd) %>%
  mutate(across(c(mean, sd), round, 2)) %>%
  arrange(mean) %>%
  knitr::kable(caption = "Estimated mean and standard deviation of serial interval (days) by study")

## ----process-si-results-------------------------------------------------------
# Process weights for plotting
result_wide <- result_by_study %>%
  unnest(wts) %>%
  pivot_longer(
    cols = c(mean, sd, wts),
    names_to = "statistic",
    values_to = "value"
  ) %>%
  group_by(study, statistic) %>%
  mutate(
    occurrence = row_number(),
    statistic = if_else(statistic == "wts", paste0("weight_", occurrence), statistic)
  ) %>%
  filter(statistic != "mean" | occurrence == 1) %>%
  filter(statistic != "sd" | occurrence == 1) %>%
  select(-occurrence) %>%
  ungroup() %>%
  pivot_wider(
    names_from = statistic,
    values_from = value
  )

# Merge with original data for plotting
df_merged <- scabies_si_data %>%
  left_join(result_wide, by = "study", relationship = "many-to-many")

## ----plot-si-multi, fig.height=8, fig.width=10--------------------------------
# Create a function to generate plot for each study
plot_study <- function(study_data) {
  study_name <- unique(study_data$study)
  
  plot_si_fit(
    dat = study_data$icc_interval,
    mean = study_data$mean[1],
    sd = study_data$sd[1],
    weights = c(study_data$weight_1[1], 
                study_data$weight_2[1] + study_data$weight_3[1],
                study_data$weight_4[1] + study_data$weight_5[1], 
                study_data$weight_6[1] + study_data$weight_7[1]),
    dist = "normal",
    scaling_factor = 0.25
  ) +
    ggtitle(study_name) +
    theme(plot.title = element_text(hjust = 0.5, size = 11))
}

# Generate plots for each study
study_plots <- df_merged %>%
  group_by(study) %>%
  group_split() %>%
  map(plot_study)

# Combine plots
combined_plot <- plot_grid(
  plotlist = study_plots,
  labels = "AUTO",
  ncol = 2
)

# Display combined plot
combined_plot

## ----func_arguments, eval=FALSE-----------------------------------------------
# wallinga_lipsitch(
#   incidence,          # Vector of case counts
#   dates,              # Vector of dates corresponding to incidence
#   si_mean,            # Mean of serial interval distribution (days)
#   si_sd,              # Standard deviation of serial interval
#   si_dist = "gamma",  # Distribution type ("gamma" or "normal")
#   smoothing = 0,      # Window size for smoothing (0 for no smoothing)
#   bootstrap = FALSE,  # Whether to compute bootstrap CIs
#   n_bootstrap = 1000, # Number of bootstrap samples
#   conf_level = 0.95,  # Confidence level for intervals
#   shift = FALSE       # Whether to shift estimates by one serial interval
# )

## ----sim-rt-------------------------------------------------------------------
set.seed(42)

# Simulation parameters
t_end <- 100        # Simulation duration (days)
si_mean <- 7        # Mean serial interval (days)
si_sd <- 2          # SD of serial interval (days)

# Define a time-varying reproduction number function
# This creates a pattern where Rt starts high, decreases below 1, then increases again
true_rt <- function(t) {
  if(t < 20) return(2.5)                  # Initial high Rt
  if(t < 40) return(2.5 - 0.075 * (t - 20)) # Linear decrease
  if(t < 60) return(1.0)                  # Stable period at Rt=1
  if(t < 80) return(1.0 + 0.05 * (t - 60))  # Linear increase
  return(2.0)                             # Final high Rt
}

# Create vector of true Rt values for plotting
true_rt_values <- sapply(1:t_end, true_rt)

# Initialize with some seed cases
cases <- numeric(t_end)
cases[1:5] <- c(1, 2, 3, 5, 8)

# Serial interval distribution (discretized normal)
si_pmf <- dnorm(0:30, mean = si_mean, sd = si_sd)
si_pmf <- si_pmf / sum(si_pmf)  # Normalize to sum to 1

# Generate incidence using renewal equation model
for(t in 6:t_end) {
  # Calculate expected new cases
  lambda <- 0
  for(s in 1:min(t-1, length(si_pmf))) {
    lambda <- lambda + cases[t-s] * true_rt(t-s) * si_pmf[s]
  }
  
  # Add randomness (negative binomial distribution)
  cases[t] <- rnbinom(1, mu = lambda, size = 10)
}

# Create dates sequence
sim_dates <- seq.Date(as.Date("2023-01-01"), by = "day", length.out = t_end)

# Create data frame
sim_epidemic <- data.frame(
  date = sim_dates,
  cases = cases,
  true_rt = true_rt_values
)

# Plot the simulated epidemic curve with true Rt
p1 <- ggplot(sim_epidemic, aes(x = date, y = cases)) +
  geom_col(fill = "steelblue") +
  labs(
    x = "Date",
    y = "Daily Cases",
    title = "Simulated Epidemic Curve"
  ) +
  theme_minimal()

p2 <- ggplot(sim_epidemic, aes(x = date, y = true_rt)) +
  geom_line(color = "red", linewidth = 1) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
  labs(
    x = "Date",
    y = "Reproduction Number",
    title = "True Time-Varying Rt"
  ) +
  ylim(0, 3) +
  theme_minimal()

# Combine plots
plot_grid(p1, p2, ncol = 1)

## ----estimate-rt--------------------------------------------------------------
# Estimate Rt using wallinga_lipsitch
rt_estimates <- wallinga_lipsitch(
  incidence = sim_epidemic$cases,
  dates = sim_epidemic$date,
  si_mean = si_mean,
  si_sd = si_sd,
  si_dist = "normal",
  smoothing = 7,        # 7-day smoothing window
  bootstrap = TRUE,
  n_bootstrap = 100     # Use more in practice
)

# Convert to data frame for plotting
rt_est_df <- as.data.frame(rt_estimates)

# Compare estimated vs true Rt
ggplot() +
  # True Rt
  geom_line(
    data = sim_epidemic, 
    aes(x = date, y = true_rt, color = "True Rt"),
    linewidth = 1
  ) +
  
  # Estimated Rt (with right-truncation correction)
  geom_line(
    data = rt_est_df, 
    aes(x = date, y = R_corrected, color = "Estimated Rt"),
    linewidth = 1
  ) +
  
  # Confidence intervals
  geom_ribbon(
    data = rt_est_df,
    aes(x = date, ymin = R_corrected_lower, ymax = R_corrected_upper),
    fill = "blue", alpha = 0.2
  ) +
  
  # R=1 threshold
  geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
  
  # Styling
  scale_color_manual(
    values = c("True Rt" = "red", "Estimated Rt" = "blue"),
    name = ""
  ) +
  labs(
    x = "Date",
    y = "Reproduction Number (Rt)",
    title = "Comparison of True vs. Estimated Rt",
    subtitle = "Using Wallinga-Lipsitch method with 7-day smoothing"
  ) +
  ylim(0, 3) +
  theme_minimal() +
  theme(legend.position = "bottom")

## ----rt-shift-----------------------------------------------------------------
# Re-estimate with shift=TRUE
rt_shifted <- wallinga_lipsitch(
  incidence = sim_epidemic$cases,
  dates = sim_epidemic$date,
  si_mean = si_mean,
  si_sd = si_sd,
  si_dist = "normal",
  smoothing = 7,
  bootstrap = FALSE,  # Skip bootstrap for speed
  shift = TRUE        # Use shift parameter
)

# Compare true Rt with both original and shifted estimates
ggplot() +
  # True Rt
  geom_line(
    data = sim_epidemic, 
    aes(x = date, y = true_rt, color = "True Rt"),
    linewidth = 1
  ) +
  
  # Original estimated Rt
  geom_line(
    data = rt_est_df, 
    aes(x = date, y = R_corrected, color = "Estimated Rt (original)"),
    linewidth = 1
  ) +
  
  # Shifted estimated Rt
  geom_line(
    data = as.data.frame(rt_shifted), 
    aes(x = shifted_date, y = R_corrected, color = "Estimated Rt (shifted)"),
    linewidth = 1
  ) +
  
  # R=1 threshold
  geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
  
  # Styling
  scale_color_manual(
    values = c("True Rt" = "red", 
               "Estimated Rt (original)" = "blue",
               "Estimated Rt (shifted)" = "green"),
    name = ""
  ) +
  labs(
    x = "Date",
    y = "Reproduction Number (Rt)",
    title = "Effect of the 'shift' parameter on Rt estimates",
    subtitle = "Shifting forward by one serial interval (7 days)"
  ) +
  ylim(0, 3) +
  theme_minimal() +
  theme(legend.position = "bottom")

## ----get_zika_data------------------------------------------------------------
data(zika_girardot_2015)

# Examine the data structure
str(zika_girardot_2015)

## ----zike_epicurve, echo=FALSE------------------------------------------------
zika_epicurve <- ggplot(zika_girardot_2015, aes(x = date, y = cases)) +
  geom_col(fill = "steelblue") +
  scale_x_date(date_breaks = "14 days", date_labels = "%d-%m-%Y") +
  labs(
    x = "Date of onset",
    y = "Number of cases",
    title = "Daily Zika virus cases in Girardot, Colombia (2015-2016)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

zika_epicurve

## ----rt-estimate-zika---------------------------------------------------------
# Parameters for Zika
zika_si_mean <- 7  # days
zika_si_sd <- 1.5     # days

# Estimate Rt using the Wallinga-Lipsitch method
rt_zika <- wallinga_lipsitch(
  incidence = zika_girardot_2015$cases,
  dates = zika_girardot_2015$date,
  si_mean = zika_si_mean,
  si_sd = zika_si_sd,
  si_dist = "gamma",
  smoothing = 0,
  bootstrap = TRUE,
  n_bootstrap = 100,
  conf_level = 0.95
)

head(rt_zika)

## ----rt_viz-------------------------------------------------------------------
# Prepare data for visualization
rt_plot_data <- rt_zika %>%
  filter(!is.na(R_corrected)) %>%
  # Skip the first 7 days of unstable estimates
  filter(date > min(date) + 7)

# Plot Rt over time
rt_plot <- ggplot(rt_plot_data, aes(x = date)) +
  geom_ribbon(aes(ymin = R_corrected_lower, ymax = R_corrected_upper), 
              fill = "#21908C", alpha = 0.2) +
  geom_line(aes(y = R_corrected), color = "#21908C", size = 1) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
  #coord_cartesian(ylim = c(0, 3)) +
  scale_x_date(date_breaks = "14 days", date_labels = "%d-%m-%Y") +
  labs(
    x = "Time", 
    y = "Reproduction number (Rt)",
    title = "Estimated reproduction number for Zika in Girardot, Colombia",
    subtitle = paste0("Serial interval: ", zika_si_mean, " days (SD: ", zika_si_sd, " days)")
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Show Rt plot with epicurve
plot_grid(zika_epicurve, rt_plot, ncol = 1, rel_heights = c(1, 1.5))