## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8,
  fig.height = 6
)

## ----setup--------------------------------------------------------------------
library(manureshed)

## ----custom_thresholds, eval=FALSE--------------------------------------------
# # More restrictive (exclude more areas)
# results_conservative <- run_builtin_analysis(
#   scale = "huc8",
#   year = 2016,
#   nutrients = "nitrogen",
#   cropland_threshold = 2000  # 2000 acres instead of 1234
# )
# 
# # Less restrictive (include more areas)
# results_liberal <- run_builtin_analysis(
#   scale = "huc8",
#   year = 2016,
#   nutrients = "nitrogen",
#   cropland_threshold = 500   # 500 acres
# )
# 
# # Compare the difference
# conservative_excluded <- sum(results_conservative$agricultural$N_class == "Excluded")
# liberal_excluded <- sum(results_liberal$agricultural$N_class == "Excluded")
# 
# print(paste("Conservative:", conservative_excluded, "excluded"))
# print(paste("Liberal:", liberal_excluded, "excluded"))

## ----auto_threshold, eval=FALSE-----------------------------------------------
# # Load data for threshold calculation
# county_data <- load_builtin_nugis("county", 2016)
# huc8_data <- load_builtin_nugis("huc8", 2016)
# 
# # Calculate appropriate threshold for HUC8
# custom_threshold <- get_cropland_threshold(
#   scale = "huc8",
#   county_data = county_data,
#   target_data = huc8_data,
#   baseline_ha = 750  # Use 750 ha instead of 500 ha baseline
# )
# 
# print(paste("Custom threshold:", round(custom_threshold, 2), "acres"))
# 
# # Use in analysis
# results <- run_builtin_analysis(
#   scale = "huc8",
#   year = 2016,
#   nutrients = "nitrogen",
#   cropland_threshold = custom_threshold
# )

## ----state_analysis, eval=FALSE-----------------------------------------------
# # Analyze specific states
# iowa_results <- run_state_analysis(
#   state = "IA",
#   scale = "county",
#   year = 2016,
#   nutrients = c("nitrogen", "phosphorus"),
#   include_wwtp = TRUE
# )
# 
# # Quick state analysis with maps
# texas_results <- quick_state_analysis(
#   state = "TX",
#   scale = "huc8",
#   year = 2015,
#   nutrients = "nitrogen",
#   create_maps = TRUE,
#   create_networks = TRUE
# )

## ----multi_state, eval=FALSE--------------------------------------------------
# # Compare agricultural states
# corn_belt_states <- c("IA", "IL", "IN", "NE", "OH")
# state_results <- list()
# 
# for (state in corn_belt_states) {
#   state_results[[state]] <- run_state_analysis(
#     state = state,
#     scale = "county",
#     year = 2016,
#     nutrients = "nitrogen",
#     include_wwtp = TRUE,
#     verbose = FALSE  # Reduce output
#   )
# }
# 
# # Compare state summaries
# state_summary <- do.call(rbind, lapply(names(state_results), function(state) {
#   result <- state_results[[state]]
#   classes <- table(result$agricultural$N_class)
#   data.frame(
#     State = state,
#     Total_Counties = sum(classes),
#     Source_Counties = classes["Source"] %||% 0,
#     Sink_Counties = sum(classes[c("Sink_Deficit", "Sink_Fertilizer")], na.rm = TRUE),
#     Source_Percent = round((classes["Source"] %||% 0) / sum(classes) * 100, 1)
#   )
# }))
# 
# print(state_summary)

## ----batch_years, eval=FALSE--------------------------------------------------
# # Analyze multiple years efficiently
# batch_results <- batch_analysis_years(
#   years = 2012:2016,
#   scale = "huc8",
#   nutrients = "nitrogen",
#   include_wwtp = TRUE,
#   create_comparative_plots = TRUE
# )
# 
# # Check which years succeeded
# successful_years <- names(batch_results$results)
# print(paste("Successful years:", paste(successful_years, collapse = ", ")))

## ----parallel, eval=FALSE-----------------------------------------------------
# # Use multiple CPU cores
# parallel_results <- batch_analysis_parallel(
#   years = 2014:2016,
#   n_cores = 2,  # Use 2 cores
#   scale = "county",
#   nutrients = "nitrogen",
#   include_wwtp = TRUE
# )
# 
# # Check results
# successful <- sum(!sapply(parallel_results, function(x) "error" %in% names(x)))
# print(paste("Successful analyses:", successful, "out of", length(parallel_results)))

## ----enhanced_batch, eval=FALSE-----------------------------------------------
# # Full batch analysis with all visualizations
# enhanced_results <- batch_analysis_enhanced(
#   years = 2015:2016,  # Use fewer years for demonstration
#   scale = "county",
#   nutrients = "nitrogen",
#   create_all_visualizations = TRUE,
#   create_comparative_plots = TRUE
# )

## ----benchmarking, eval=FALSE-------------------------------------------------
# # Benchmark different configurations
# benchmark_results <- benchmark_analysis(
#   scale = "county",
#   year = 2016,
#   nutrients = "nitrogen",
#   n_runs = 3,
#   include_wwtp = TRUE
# )
# 
# print(benchmark_results)
# 
# # Compare scales
# scales_to_test <- c("county", "huc8", "huc2")
# benchmark_comparison <- list()
# 
# for (scale in scales_to_test) {
#   benchmark_comparison[[scale]] <- benchmark_analysis(
#     scale = scale,
#     year = 2016,
#     nutrients = "nitrogen",
#     n_runs = 2,
#     include_wwtp = TRUE
#   )
# }
# 
# # Compare timing
# timing_comparison <- data.frame(
#   Scale = scales_to_test,
#   Mean_Time_Seconds = sapply(benchmark_comparison, function(x) x$timing$mean),
#   Memory_MB = sapply(benchmark_comparison, function(x) x$memory_mb$mean)
# )
# 
# print(timing_comparison)

## ----memory_management, eval=FALSE--------------------------------------------
# # Clear cache to free up space
# clear_data_cache()
# 
# # Check package health
# health_status <- health_check(verbose = TRUE)
# 
# # Test OSF connection
# connection_ok <- test_osf_connection()
# print(paste("OSF connection:", ifelse(connection_ok, "OK", "Failed")))

## ----transition_analysis, eval=FALSE------------------------------------------
# # Run analysis
# results <- run_builtin_analysis(
#   scale = "huc8",
#   year = 2016,
#   nutrients = "nitrogen",
#   include_wwtp = TRUE
# )
# 
# # Calculate centroids for spatial analysis
# centroids <- add_centroid_coordinates(results$integrated$nitrogen)
# 
# # Calculate transition probabilities
# agri_transitions <- calculate_transition_probabilities(
#   centroids, "N_class"
# )
# 
# combined_transitions <- calculate_transition_probabilities(
#   centroids, "combined_N_class"
# )
# 
# # Compare transition matrices
# print("Agricultural transitions:")
# print(agri_transitions)
# 
# print("Combined (WWTP + Agricultural) transitions:")
# print(combined_transitions)
# 
# # Create network plots
# create_network_plot(
#   agri_transitions, "nitrogen", "Agricultural",
#   "network_agricultural.png"
# )
# 
# create_network_plot(
#   combined_transitions, "nitrogen", "Combined",
#   "network_combined.png"
# )

## ----spatial_stats, eval=FALSE------------------------------------------------
# # Calculate spatial statistics
# library(sf)
# 
# # Get results as spatial data
# spatial_results <- results$integrated$nitrogen
# 
# # Calculate areas of different classifications
# class_areas <- spatial_results %>%
#   mutate(area_km2 = as.numeric(st_area(.)) / 1000000) %>%
#   st_drop_geometry() %>%
#   group_by(combined_N_class) %>%
#   summarise(
#     count = n(),
#     total_area_km2 = sum(area_km2),
#     mean_area_km2 = mean(area_km2),
#     .groups = 'drop'
#   )
# 
# print(class_areas)
# 
# # Find neighboring relationships
# neighbors <- st_touches(spatial_results)
# neighbor_summary <- data.frame(
#   ID = spatial_results$ID,
#   N_neighbors = lengths(neighbors),
#   Class = spatial_results$combined_N_class
# )
# 
# print("Average neighbors by classification:")
# neighbor_summary %>%
#   group_by(Class) %>%
#   summarise(mean_neighbors = mean(N_neighbors), .groups = 'drop')

## ----custom_workflow, eval=FALSE----------------------------------------------
# # Example: Livestock-intensive regions analysis
# analyze_livestock_regions <- function(states, year = 2016) {
# 
#   results <- list()
# 
#   for (state in states) {
#     # Custom threshold for livestock regions
#     county_data <- load_builtin_nugis("county", year)
# 
#     # Calculate state-specific threshold
#     state_county <- county_data[substr(county_data$ID, 1, 2) == get_state_fips(state), ]
#     custom_threshold <- quantile(state_county$cropland, 0.25)  # 25th percentile
# 
#     # Run analysis
#     state_result <- run_state_analysis(
#       state = state,
#       scale = "county",
#       year = year,
#       nutrients = "nitrogen",
#       include_wwtp = TRUE,
#       cropland_threshold = custom_threshold,
#       verbose = FALSE
#     )
# 
#     results[[state]] <- state_result
#   }
# 
#   return(results)
# }
# 
# # Apply to livestock states
# livestock_states <- c("IA", "NE", "NC", "MN")
# livestock_results <- analyze_livestock_regions(livestock_states, 2016)

## ----time_series, eval=FALSE--------------------------------------------------
# # Custom time series analysis
# analyze_trends <- function(scale, years, nutrient = "nitrogen") {
# 
#   trend_data <- list()
# 
#   for (year in years) {
#     # Check if WWTP data available
#     use_wwtp <- year >= 2007 && year <= 2016
# 
#     result <- run_builtin_analysis(
#       scale = scale,
#       year = year,
#       nutrients = nutrient,
#       include_wwtp = use_wwtp,
#       save_outputs = FALSE,
#       verbose = FALSE
#     )
# 
#     # Extract classification summary
#     if (use_wwtp && "integrated" %in% names(result)) {
#       classes <- table(result$integrated[[nutrient]][[paste0("combined_", toupper(substr(nutrient, 1, 1)), "_class")]])
#     } else {
#       classes <- table(result$agricultural[[paste0(toupper(substr(nutrient, 1, 1)), "_class")]])
#     }
# 
#     trend_data[[as.character(year)]] <- list(
#       year = year,
#       wwtp_included = use_wwtp,
#       classes = classes,
#       source_count = classes["Source"] %||% 0,
#       sink_count = sum(classes[c("Sink_Deficit", "Sink_Fertilizer")], na.rm = TRUE)
#     )
#   }
# 
#   return(trend_data)
# }
# 
# # Analyze nitrogen trends
# nitrogen_trends <- analyze_trends("huc8", 2008:2016, "nitrogen")
# 
# # Convert to data frame for plotting
# trend_df <- do.call(rbind, lapply(nitrogen_trends, function(x) {
#   data.frame(
#     Year = x$year,
#     WWTP_Included = x$wwtp_included,
#     Source_Count = x$source_count,
#     Sink_Count = x$sink_count,
#     Total_Units = sum(x$classes)
#   )
# }))
# 
# print(trend_df)

## ----data_export, eval=FALSE--------------------------------------------------
# # Export for GIS software
# gis_files <- export_for_gis(
#   results,
#   output_dir = "gis_export",
#   formats = c("shapefile", "geojson")
# )
# 
# # Export for publication
# pub_files <- export_for_publication(
#   results,
#   output_dir = "publication_export",
#   dpi = 600
# )
# 
# # Export for policy makers
# policy_files <- export_for_policy(
#   results,
#   output_dir = "policy_export"
# )
# 
# print("Created files:")
# print(c(gis_files, pub_files, policy_files))

## ----package_integration, eval=FALSE------------------------------------------
# # Example: Using with tigris for custom boundaries
# if (requireNamespace("tigris", quietly = TRUE)) {
#   # Get state boundary
#   ohio_boundary <- tigris::states() %>%
#     filter(STUSPS == "OH") %>%
#     st_transform(5070)  # Transform to analysis CRS
# 
#   # Spatial filter results
#   ohio_watersheds <- st_filter(results$integrated$nitrogen, ohio_boundary)
# 
#   print(paste("Ohio watersheds:", nrow(ohio_watersheds)))
# }
# 
# # Example: Using with nhdplusTools for watershed delineation
# if (requireNamespace("nhdplusTools", quietly = TRUE)) {
#   # Find watershed for a specific point
#   point <- st_sfc(st_point(c(-83.0458, 42.3314)), crs = 4326)  # Detroit
#   watershed_info <- nhdplusTools::discover_nhdplus_id(point)
# 
#   # Filter results to this watershed
#   if (!is.null(watershed_info$huc8)) {
#     watershed_result <- results$integrated$nitrogen %>%
#       filter(ID == watershed_info$huc8)
#     print(watershed_result)
#   }
# }

## ----advanced_validation, eval=FALSE------------------------------------------
# # Comprehensive quality check
# validation_report <- list()
# 
# # Check data completeness
# validation_report$completeness <- list(
#   total_units = nrow(results$agricultural),
#   missing_n_class = sum(is.na(results$agricultural$N_class)),
#   excluded_percent = sum(results$agricultural$N_class == "Excluded", na.rm = TRUE) /
#                     nrow(results$agricultural) * 100
# )
# 
# # Check balance calculations
# if ("integrated" %in% names(results)) {
#   n_data <- results$integrated$nitrogen
#   validation_report$balance_check <- list(
#     impossible_sources = sum(n_data$combined_N_surplus <= 0 &
#                             n_data$combined_N_class == "Source", na.rm = TRUE),
#     impossible_sinks = sum(n_data$combined_N_surplus > 0 &
#                           n_data$combined_N_class %in% c("Sink_Deficit", "Sink_Fertilizer"), na.rm = TRUE)
#   )
# }
# 
# # Check spatial validity
# if (inherits(results$agricultural, "sf")) {
#   validation_report$spatial_check <- list(
#     invalid_geometries = sum(!st_is_valid(results$agricultural)),
#     crs = st_crs(results$agricultural)$input
#   )
# }
# 
# print("Validation Report:")
# print(str(validation_report))

## ----performance_tips, eval=FALSE---------------------------------------------
# # 1. Use appropriate scales
# # County: ~3000 units, good for policy analysis
# # HUC8: ~2000 units, good for watershed analysis
# # HUC2: ~18 units, good for regional analysis
# 
# # 2. Manage memory for large analyses
# gc()  # Garbage collection
# clear_data_cache()  # Clear package cache
# 
# # 3. Use parallel processing for multiple years
# # But be careful not to use too many cores
# 
# # 4. Save intermediate results
# save_spatial_data(results$agricultural, "intermediate_results.rds")

## ----reproducibility, eval=FALSE----------------------------------------------
# # Always document your analysis parameters
# analysis_params <- list(
#   scale = "huc8",
#   year = 2016,
#   nutrients = "nitrogen",
#   cropland_threshold = 1234,
#   include_wwtp = TRUE,
#   analysis_date = Sys.Date(),
#   package_version = packageVersion("manureshed")
# )
# 
# # Save parameters with results
# results$analysis_params <- analysis_params
# save_analysis_summary(results, "analysis_summary.json", format = "json")