The negative binomial regression algorithm fits a negative binomial regression model with a time term and order 1 Fourier terms to a baseline period that spans 2 or more years. Inclusion of Fourier terms in the model is intended to account for seasonality common in multi-year weekly time series of counts. Order 1 sine and cosine terms are included to account for annual seasonality that is common to syndromes and diseases such as influenza, RSV, and norovirus. Each baseline model is used to make weekly forecasts for all weeks following the baseline period. One-sided upper 95 computed for each week in the prediction period. Alarms are signaled for any week during for which weekly counts fall above the upper bound of the prediction interval.
alert_nbinom(df, t = date, y = count, include_time = TRUE, baseline_end)
df | A data frame, data frame extension (e.g. a tibble), or a lazy data frame. |
---|---|
t | Name of the column of type Date containing the dates |
y | Name of the column of type Numeric containing counts |
include_time | Logical indicating whether or not to include time term
in regression model (default is |
baseline_end | Object of type Date defining the end of the baseline/training period |
A data frame with model estimates, upper prediction interval bounds, a binary alarm indicator field, and a binary indicator field of whether or not a time term was included.
# Example 1 df <- data.frame( date = seq(as.Date("2014-01-05"), as.Date("2022-02-05"), "weeks"), count = rpois(length(seq(as.Date("2014-01-05"), as.Date("2022-02-05"), "weeks")), 25) ) head(df) df_nbinom <- alert_nbinom(df, baseline_end = as.Date("2020-03-01")) head(df_nbinom) # Example 2 library(ggplot2) simulated_ts <- simulated_data ## Time series with seasonality, moderate counts ts <- subset(simulated_ts, id == "Scenario #1") head(ts) df_nbinom <- alert_nbinom(ts, t = date, y = cases, baseline_end = as.Date("2021-12-26")) head(df_nbinom) ## Visualize alert df_nbinom %>% ggplot() + theme_classic() + geom_line(aes(x = date, y = cases), linewidth = 0.3) + geom_line(aes(x = date, y = estimate), color = "blue", linewidth = 0.3) + geom_line(aes(x = date, y = threshold), color = "red", linewidth = 0.3, linetype = "dashed") + geom_point(data = subset(df_nbinom, alarm), aes(x = date, y = cases), color = "red", shape = 21, size = 2.5) + scale_y_continuous( limits = c(0, 80), expand = c(0, 0), name = "Weekly Count" ) + scale_x_date( breaks = seq.Date(from = min(ts$date), to = max(ts$date), by = "4 month"), name = "MMWR Week Date" ) + theme( axis.text.x = element_text(angle = 90, vjust = 0.5), axis.ticks.length = unit(0.25, "cm") ) + labs( title = "Negative Binomial Regression Algorithm Results for Simulated Time Series #1", subtitle = "Annual seasonality with moderate counts" ) if (FALSE) { # Example 3: Data from NSSP-ESSENCE, national counts for CDC Respiratory Synctial Virus v1 library(Rnssp) library(ggplot2) myProfile <- create_profile() url <- "https://essence2.syndromicsurveillance.org/nssp_essence/api/timeSeries?endDate=12 Feb2022&ccddCategory=cdc%20respiratory%20syncytial%20virus%20v1&percentParam=noPercent &geographySystem=hospitaldhhsregion&datasource=va_hospdreg&detector=nodetectordetector &startDate=29Dec2013&timeResolution=weekly&hasBeenE=1&medicalGroupingSystem=essencesyndromes &userId=2362&aqtTarget=TimeSeries" url <- url %>% gsub("\n", "", .) api_data <- get_api_data(url) df <- api_data$timeSeriesData df_nbinom <- alert_nbinom(df, baseline_end = as.Date("2020-03-01")) ### Visualize alert df_nbinom %>% ggplot() + geom_line(aes(x = date, y = count), linewidth = 0.3) + geom_line(aes(x = date, y = estimate), color = "blue", linewidth = 0.3) + geom_line(aes(x = date, y = threshold), color = "red", linewidth = 0.3, linetype = "dashed") + geom_point(data = subset(df_nbinom, alarm), aes(x = date, y = count), color = "red", size = 0.7) + theme_classic() + scale_x_date( date_breaks = "6 month", date_labels = "%b-%Y" ) + scale_y_continuous( breaks = scales::pretty_breaks(n = 5), labels = scales::comma ) + theme( axis.text.x = element_text(angle = 90, vjust = 0.5), axis.ticks.length = unit(0.25, "cm") ) + labs( x = "Date", y = "Weekly ED Encounters", subtitle = "Negative binomial regression algorithm with order-1 Fourier terms" ) + geom_vline(xintercept = as.Date("2020-03-01"), linetype = "dotted", linewidth = 0.5) + annotate( geom = "text", x = as.Date("2020-03-01") - 270, y = Inf, label = "End of baseline\nMarch 1, 2020", family = "Candara", vjust = 1.7, size = 3 ) }