Skip to contents
library(forecasttools)
library(scoringutils)
#> scoringutils 2.0.0 introduces major changes. We'd love your feedback!
#> <https://github.com/epiforecasts/scoringutils/issues>. To use the old version,
#> run: `remotes::install_github('epiforecasts/scoringutils@v1.2.2')`
#> This message is displayed once per session.
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> 
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> 
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
library(knitr)

In this vignette, we use forecasttools to capture the current state of the FluSight forecast hub (see here), and then score the forecasts according to a proper scoring rule. We do the scoring with scoringutils.

Generating a table of forecasts against truth data.

First, we create a table of forecast predictions formatted to work with scoringutils functions using create_table_for_scoring(). Generally, we expect users to use create_table_for_scoring() with a local path to the forecast repository which updates from GitHub by default. In this case, we download the hub first.

hub_url <- "https://github.com/cdcepi/FluSight-forecast-hub"
hub_path <- fs::path(tempdir(), "flusight-hub")
download_hub(
  hub_url = hub_url,
  hub_path = hub_path,
  force = TRUE
)
forecast_and_target <- create_table_for_scoring(hub_path)
#>  Updating superseded URL `Infectious-Disease-Modeling-hubs` to `hubverse-org`
#>  Updating superseded URL `Infectious-Disease-Modeling-hubs` to `hubverse-org`
#> New names:
#> Rows: 6148 Columns: 6
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (2): location, location_name dbl (3): ...1, value, weekly_rate date (1): date
#>  Use `spec()` to retrieve the full column specification for this data. 
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#>  `` -> `...1`

There are 39 different models that have been submitted to FluSight.

forecast_and_target$model |> unique()
#>  [1] "CADPH-FluCAT_Ensemble"    "CEPH-Rtrend_fluH"        
#>  [3] "CMU-TimeSeries"           "CU-ensemble"             
#>  [5] "FluSight-baseline"        "FluSight-ensemble"       
#>  [7] "FluSight-lop_norm"        "GH-model"                
#>  [9] "GT-FluFNP"                "ISU_NiemiLab-ENS"        
#> [11] "ISU_NiemiLab-NLH"         "ISU_NiemiLab-SIR"        
#> [13] "JHU_CSSE-CSSE_Ensemble"   "LUcompUncertLab-chimera" 
#> [15] "LosAlamos_NAU-CModel_Flu" "MIGHTE-Nsemble"          
#> [17] "MOBS-GLEAM_FLUH"          "NIH-Flu_ARIMA"           
#> [19] "NU_UCSD-GLEAM_AI_FLUH"    "PSI-PROF"                
#> [21] "PSI-PROF_beta"            "SGroup-RandomForest"     
#> [23] "SigSci-CREG"              "SigSci-TSENS"            
#> [25] "Stevens-GBR"              "UGA_flucast-Copycat"     
#> [27] "UGA_flucast-INFLAenza"    "UGA_flucast-OKeeffe"     
#> [29] "UGuelph-CompositeCurve"   "UGuelphensemble-GRYPHON" 
#> [31] "UM-DeepOutbreak"          "UMass-flusion"           
#> [33] "UMass-trends_ensemble"    "UNC_IDD-InfluPaint"      
#> [35] "UVAFluX-Ensemble"         "VTSanghani-Ensemble"     
#> [37] "cfa-flumech"              "cfarenewal-cfaepimlight" 
#> [39] "fjordhest-ensemble"

There are 53 locations, either states or territories, which have been targets for forecasting.

forecast_and_target$location_name |>
  unique() |>
  na.omit()
#>  [1] "California"           "Alabama"              "Alaska"              
#>  [4] "Arizona"              "Arkansas"             "Colorado"            
#>  [7] "Connecticut"          "Delaware"             "District of Columbia"
#> [10] "Florida"              "Georgia"              "Hawaii"              
#> [13] "Idaho"                "Illinois"             "Indiana"             
#> [16] "Iowa"                 "Kansas"               "Kentucky"            
#> [19] "Louisiana"            "Maine"                "Maryland"            
#> [22] "Massachusetts"        "Michigan"             "Minnesota"           
#> [25] "Mississippi"          "Missouri"             "Montana"             
#> [28] "Nebraska"             "Nevada"               "New Hampshire"       
#> [31] "New Jersey"           "New Mexico"           "New York"            
#> [34] "North Carolina"       "North Dakota"         "Ohio"                
#> [37] "Oklahoma"             "Oregon"               "Pennsylvania"        
#> [40] "Rhode Island"         "South Carolina"       "South Dakota"        
#> [43] "Tennessee"            "Texas"                "Utah"                
#> [46] "Vermont"              "Virginia"             "Washington"          
#> [49] "West Virginia"        "Wisconsin"            "Wyoming"             
#> [52] "Puerto Rico"          "US"                  
#> attr(,"na.action")
#> [1] 2
#> attr(,"class")
#> [1] "omit"

Tabular scoring of forecasts

scoringutils gives functions for creating summarized scores including: interval scores, skill relative to a chosen baseline, and coverage at different prediction quantiles. Here we show the scores for US overall forecasts by models for all forecasting dates so far.

chosen_location <- "US"

forecast_and_target |>
  filter(location == chosen_location) |>
  as_forecast_quantile(
    observed = "true_value",
    predicted = "prediction",
    quantile_level = "quantile"
  ) |>
  score() |>
  summarise_scores(
    by = "model",
    relative_skill = TRUE,
    baseline = "FluSight-ensemble"
  ) |>
  summarise_scores(
    fun = signif,
    digits = 2
  ) |>
  kable()
#>  Some rows containing NA values may be removed. This is fine if not
#>   unexpected.
model wis overprediction underprediction dispersion bias interval_coverage_50 interval_coverage_90 ae_median
CEPH-Rtrend_fluH 1500 220 820 510 -0.340 0.530 0.85 2400
CMU-TimeSeries 2000 250 780 940 -0.220 0.580 0.94 3000
CU-ensemble 1800 590 700 500 -0.230 0.520 0.77 2600
FluSight-baseline 2100 760 1100 260 0.081 0.091 0.78 3000
FluSight-ensemble 1500 340 630 490 -0.210 0.450 0.90 2400
FluSight-lop_norm 1400 250 500 670 -0.200 0.550 0.98 2300
GH-model 8900 0 8800 73 -1.000 0.000 0.00 9100
GT-FluFNP 2700 580 1800 360 -0.300 0.260 0.45 3600
ISU_NiemiLab-ENS 2200 240 1500 420 -0.470 0.330 0.56 2900
ISU_NiemiLab-NLH 1700 170 1200 330 -0.360 0.380 0.59 2300
ISU_NiemiLab-SIR 2900 750 1600 530 -0.420 0.310 0.48 3900
JHU_CSSE-CSSE_Ensemble 900 160 310 440 -0.059 0.560 0.96 1400
LUcompUncertLab-chimera 2000 870 840 290 -0.220 0.210 0.50 2600
LosAlamos_NAU-CModel_Flu 7200 5400 1600 200 -0.170 0.049 0.21 7900
MIGHTE-Nsemble 1400 320 680 400 -0.140 0.500 0.82 2100
MOBS-GLEAM_FLUH 1400 180 610 640 -0.340 0.550 0.92 2300
NIH-Flu_ARIMA 2300 110 740 1500 -0.180 0.580 0.92 2200
NU_UCSD-GLEAM_AI_FLUH 2200 620 730 840 -0.160 0.510 0.89 3500
PSI-PROF 1400 350 390 640 0.036 0.540 0.85 2200
PSI-PROF_beta 1800 430 650 730 0.073 0.540 0.84 2700
SGroup-RandomForest 1700 100 990 640 -0.240 0.500 0.92 2700
SigSci-CREG 1100 390 380 330 -0.120 0.310 0.77 1700
SigSci-TSENS 1600 380 710 540 -0.100 0.550 0.85 2400
Stevens-GBR 2300 60 1800 440 -0.530 0.270 0.49 3100
UGA_flucast-Copycat 1700 200 920 570 -0.280 0.470 0.87 2600
UGA_flucast-INFLAenza 1700 190 1200 380 -0.072 0.340 0.81 2500
UGA_flucast-OKeeffe 500 0 390 110 -0.750 0.170 0.75 820
UGuelph-CompositeCurve 3500 2200 800 510 0.065 0.088 0.50 4800
UGuelphensemble-GRYPHON 1800 540 820 450 -0.130 0.330 0.86 2700
UM-DeepOutbreak 2000 230 460 1400 -0.086 0.710 0.83 2100
UMass-flusion 1100 240 320 510 -0.033 0.590 0.99 1700
UMass-trends_ensemble 2000 720 890 370 -0.056 0.310 0.56 2700
UNC_IDD-InfluPaint 2800 1700 850 240 -0.180 0.160 0.38 3600
UVAFluX-Ensemble 2100 1100 570 460 -0.130 0.410 0.69 2900
VTSanghani-Ensemble 2700 960 1300 390 -0.130 0.200 0.45 3600
cfa-flumech 2600 1600 480 500 0.059 0.270 0.61 3700
cfarenewal-cfaepimlight 1900 520 870 500 -0.310 0.350 0.78 3000
fjordhest-ensemble 1700 460 550 650 -0.200 0.490 0.93 2700