The negative binomial regression algorithm fits a negative binomial regression model with a time term and order 1 Fourier terms to a baseline period that spans 2 or more years. Inclusion of Fourier terms in the model is intended to account for seasonality common in multi-year weekly time series of counts. Order 1 sine and cosine terms are included to account for annual seasonality that is common to syndromes and diseases such as influenza, RSV, and norovirus. Each baseline model is used to make weekly forecasts for all weeks following the baseline period. One-sided upper 95 computed for each week in the prediction period. Alarms are signaled for any week during for which weekly counts fall above the upper bound of the prediction interval.

alert_nbinom(df, t = date, y = count, include_time = TRUE, baseline_end)

Arguments

df

A data frame, data frame extension (e.g. a tibble), or a lazy data frame.

t

Name of the column of type Date containing the dates

y

Name of the column of type Numeric containing counts

include_time

Logical indicating whether or not to include time term in regression model (default is TRUE)

baseline_end

Object of type Date defining the end of the baseline/training period

Value

A data frame with model estimates, upper prediction interval bounds, a binary alarm indicator field, and a binary indicator field of whether or not a time term was included.

Examples

# Example 1

df <- data.frame(
  date = seq(as.Date("2014-01-05"), as.Date("2022-02-05"), "weeks"),
  count = rpois(length(seq(as.Date("2014-01-05"), as.Date("2022-02-05"), "weeks")), 25)
)

head(df)

df_nbinom <- alert_nbinom(df, baseline_end = as.Date("2020-03-01"))

head(df_nbinom)


# Example 2
library(ggplot2)

simulated_ts <- simulated_data

## Time series with seasonality, moderate counts
ts <- subset(simulated_ts, id == "Scenario #1")

head(ts)

df_nbinom <- alert_nbinom(ts, t = date, y = cases,
                       baseline_end = as.Date("2021-12-26"))

head(df_nbinom)


## Visualize alert
df_nbinom %>%
 ggplot() +
 theme_classic() +
 geom_line(aes(x = date, y = cases), linewidth = 0.3) +
 geom_line(aes(x = date, y = estimate), color = "blue", linewidth = 0.3) +
 geom_line(aes(x = date, y = threshold), color = "red", linewidth = 0.3,
            linetype = "dashed") +
 geom_point(data = subset(df_nbinom, alarm), aes(x = date, y = cases),
            color = "red", shape = 21, size = 2.5) +
 scale_y_continuous(
   limits = c(0, 80),
   expand = c(0, 0),
   name = "Weekly Count"
 ) +
 scale_x_date(
   breaks = seq.Date(from = min(ts$date), to = max(ts$date), by = "4 month"),
   name = "MMWR Week Date"
 ) +
 theme(
   axis.text.x = element_text(angle = 90, vjust = 0.5),
   axis.ticks.length = unit(0.25, "cm")
 ) +
 labs(
   title = "Negative Binomial Regression Algorithm Results for Simulated Time Series #1",
   subtitle = "Annual seasonality with moderate counts"
 )


if (FALSE) {
# Example 3: Data from NSSP-ESSENCE, national counts for CDC Respiratory Synctial Virus v1

library(Rnssp)
library(ggplot2)

myProfile <- create_profile()

url <- "https://essence2.syndromicsurveillance.org/nssp_essence/api/timeSeries?endDate=12
Feb2022&ccddCategory=cdc%20respiratory%20syncytial%20virus%20v1&percentParam=noPercent
&geographySystem=hospitaldhhsregion&datasource=va_hospdreg&detector=nodetectordetector
&startDate=29Dec2013&timeResolution=weekly&hasBeenE=1&medicalGroupingSystem=essencesyndromes
&userId=2362&aqtTarget=TimeSeries"


url <- url %>% gsub("\n", "", .)

api_data <- get_api_data(url)

df <- api_data$timeSeriesData


df_nbinom <- alert_nbinom(df, baseline_end = as.Date("2020-03-01"))


### Visualize alert
df_nbinom %>%
  ggplot() +
  geom_line(aes(x = date, y = count), linewidth = 0.3) +
  geom_line(aes(x = date, y = estimate), color = "blue", linewidth = 0.3) +
  geom_line(aes(x = date, y = threshold), color = "red", linewidth = 0.3,
            linetype = "dashed") +
  geom_point(data = subset(df_nbinom, alarm),
             aes(x = date, y = count), color = "red", size = 0.7) +
  theme_classic() +
  scale_x_date(
    date_breaks = "6 month",
    date_labels = "%b-%Y"
  ) +
  scale_y_continuous(
    breaks = scales::pretty_breaks(n = 5),
    labels = scales::comma
  ) +
  theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5),
    axis.ticks.length = unit(0.25, "cm")
  ) +
  labs(
    x = "Date",
    y = "Weekly ED Encounters",
    subtitle = "Negative binomial regression algorithm with order-1 Fourier terms"
  ) +
  geom_vline(xintercept = as.Date("2020-03-01"), linetype = "dotted", linewidth = 0.5) +
  annotate(
    geom = "text",
    x = as.Date("2020-03-01") - 270,
    y = Inf, label = "End of baseline\nMarch 1, 2020",
    family = "Candara", vjust = 1.7, size = 3
  )

}