Scoring Flusight submissions using `scoringutils`
Source:vignettes/scoring-flu-forecasts.Rmd
scoring-flu-forecasts.Rmd
library(forecasttools)
library(scoringutils)
#> scoringutils 2.0.0 introduces major changes. We'd love your feedback!
#> <https://github.com/epiforecasts/scoringutils/issues>. To use the old version,
#> run: `remotes::install_github('epiforecasts/scoringutils@v1.2.2')`
#> This message is displayed once per session.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#>
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#>
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
library(knitr)
In this vignette, we use forecasttools
to capture the
current state of the FluSight forecast hub (see here), and
then score the forecasts according to a proper scoring
rule. We do the scoring with scoringutils
.
Generating a table of forecasts against truth data.
First, we create a table of forecast predictions formatted to work
with scoringutils
functions using
create_table_for_scoring()
. Generally, we expect users to
use create_table_for_scoring()
with a local path to the
forecast repository which updates from GitHub by default. In this case,
we download the hub first.
hub_url <- "https://github.com/cdcepi/FluSight-forecast-hub"
hub_path <- fs::path(tempdir(), "flusight-hub")
download_hub(
hub_url = hub_url,
hub_path = hub_path,
force = TRUE
)
forecast_and_target <- create_table_for_scoring(hub_path)
#> ℹ Updating superseded URL `Infectious-Disease-Modeling-hubs` to `hubverse-org`
#> ℹ Updating superseded URL `Infectious-Disease-Modeling-hubs` to `hubverse-org`
#> New names:
#> Rows: 6148 Columns: 6
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (2): location, location_name dbl (3): ...1, value, weekly_rate date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> • `` -> `...1`
There are 39 different models that have been submitted to FluSight.
forecast_and_target$model |> unique()
#> [1] "CADPH-FluCAT_Ensemble" "CEPH-Rtrend_fluH"
#> [3] "CMU-TimeSeries" "CU-ensemble"
#> [5] "FluSight-baseline" "FluSight-ensemble"
#> [7] "FluSight-lop_norm" "GH-model"
#> [9] "GT-FluFNP" "ISU_NiemiLab-ENS"
#> [11] "ISU_NiemiLab-NLH" "ISU_NiemiLab-SIR"
#> [13] "JHU_CSSE-CSSE_Ensemble" "LUcompUncertLab-chimera"
#> [15] "LosAlamos_NAU-CModel_Flu" "MIGHTE-Nsemble"
#> [17] "MOBS-GLEAM_FLUH" "NIH-Flu_ARIMA"
#> [19] "NU_UCSD-GLEAM_AI_FLUH" "PSI-PROF"
#> [21] "PSI-PROF_beta" "SGroup-RandomForest"
#> [23] "SigSci-CREG" "SigSci-TSENS"
#> [25] "Stevens-GBR" "UGA_flucast-Copycat"
#> [27] "UGA_flucast-INFLAenza" "UGA_flucast-OKeeffe"
#> [29] "UGuelph-CompositeCurve" "UGuelphensemble-GRYPHON"
#> [31] "UM-DeepOutbreak" "UMass-flusion"
#> [33] "UMass-trends_ensemble" "UNC_IDD-InfluPaint"
#> [35] "UVAFluX-Ensemble" "VTSanghani-Ensemble"
#> [37] "cfa-flumech" "cfarenewal-cfaepimlight"
#> [39] "fjordhest-ensemble"
There are 53 locations, either states or territories, which have been targets for forecasting.
forecast_and_target$location_name |>
unique() |>
na.omit()
#> [1] "California" "Alabama" "Alaska"
#> [4] "Arizona" "Arkansas" "Colorado"
#> [7] "Connecticut" "Delaware" "District of Columbia"
#> [10] "Florida" "Georgia" "Hawaii"
#> [13] "Idaho" "Illinois" "Indiana"
#> [16] "Iowa" "Kansas" "Kentucky"
#> [19] "Louisiana" "Maine" "Maryland"
#> [22] "Massachusetts" "Michigan" "Minnesota"
#> [25] "Mississippi" "Missouri" "Montana"
#> [28] "Nebraska" "Nevada" "New Hampshire"
#> [31] "New Jersey" "New Mexico" "New York"
#> [34] "North Carolina" "North Dakota" "Ohio"
#> [37] "Oklahoma" "Oregon" "Pennsylvania"
#> [40] "Rhode Island" "South Carolina" "South Dakota"
#> [43] "Tennessee" "Texas" "Utah"
#> [46] "Vermont" "Virginia" "Washington"
#> [49] "West Virginia" "Wisconsin" "Wyoming"
#> [52] "Puerto Rico" "US"
#> attr(,"na.action")
#> [1] 2
#> attr(,"class")
#> [1] "omit"
Tabular scoring of forecasts
scoringutils
gives functions for creating summarized
scores including: interval scores, skill relative to a chosen baseline,
and coverage at different prediction quantiles. Here we show the scores
for US overall forecasts by models for all forecasting dates so far.
chosen_location <- "US"
forecast_and_target |>
filter(location == chosen_location) |>
as_forecast_quantile(
observed = "true_value",
predicted = "prediction",
quantile_level = "quantile"
) |>
score() |>
summarise_scores(
by = "model",
relative_skill = TRUE,
baseline = "FluSight-ensemble"
) |>
summarise_scores(
fun = signif,
digits = 2
) |>
kable()
#> ℹ Some rows containing NA values may be removed. This is fine if not
#> unexpected.
model | wis | overprediction | underprediction | dispersion | bias | interval_coverage_50 | interval_coverage_90 | ae_median |
---|---|---|---|---|---|---|---|---|
CEPH-Rtrend_fluH | 1500 | 220 | 820 | 510 | -0.340 | 0.530 | 0.85 | 2400 |
CMU-TimeSeries | 2000 | 250 | 780 | 940 | -0.220 | 0.580 | 0.94 | 3000 |
CU-ensemble | 1800 | 590 | 700 | 500 | -0.230 | 0.520 | 0.77 | 2600 |
FluSight-baseline | 2100 | 760 | 1100 | 260 | 0.081 | 0.091 | 0.78 | 3000 |
FluSight-ensemble | 1500 | 340 | 630 | 490 | -0.210 | 0.450 | 0.90 | 2400 |
FluSight-lop_norm | 1400 | 250 | 500 | 670 | -0.200 | 0.550 | 0.98 | 2300 |
GH-model | 8900 | 0 | 8800 | 73 | -1.000 | 0.000 | 0.00 | 9100 |
GT-FluFNP | 2700 | 580 | 1800 | 360 | -0.300 | 0.260 | 0.45 | 3600 |
ISU_NiemiLab-ENS | 2200 | 240 | 1500 | 420 | -0.470 | 0.330 | 0.56 | 2900 |
ISU_NiemiLab-NLH | 1700 | 170 | 1200 | 330 | -0.360 | 0.380 | 0.59 | 2300 |
ISU_NiemiLab-SIR | 2900 | 750 | 1600 | 530 | -0.420 | 0.310 | 0.48 | 3900 |
JHU_CSSE-CSSE_Ensemble | 900 | 160 | 310 | 440 | -0.059 | 0.560 | 0.96 | 1400 |
LUcompUncertLab-chimera | 2000 | 870 | 840 | 290 | -0.220 | 0.210 | 0.50 | 2600 |
LosAlamos_NAU-CModel_Flu | 7200 | 5400 | 1600 | 200 | -0.170 | 0.049 | 0.21 | 7900 |
MIGHTE-Nsemble | 1400 | 320 | 680 | 400 | -0.140 | 0.500 | 0.82 | 2100 |
MOBS-GLEAM_FLUH | 1400 | 180 | 610 | 640 | -0.340 | 0.550 | 0.92 | 2300 |
NIH-Flu_ARIMA | 2300 | 110 | 740 | 1500 | -0.180 | 0.580 | 0.92 | 2200 |
NU_UCSD-GLEAM_AI_FLUH | 2200 | 620 | 730 | 840 | -0.160 | 0.510 | 0.89 | 3500 |
PSI-PROF | 1400 | 350 | 390 | 640 | 0.036 | 0.540 | 0.85 | 2200 |
PSI-PROF_beta | 1800 | 430 | 650 | 730 | 0.073 | 0.540 | 0.84 | 2700 |
SGroup-RandomForest | 1700 | 100 | 990 | 640 | -0.240 | 0.500 | 0.92 | 2700 |
SigSci-CREG | 1100 | 390 | 380 | 330 | -0.120 | 0.310 | 0.77 | 1700 |
SigSci-TSENS | 1600 | 380 | 710 | 540 | -0.100 | 0.550 | 0.85 | 2400 |
Stevens-GBR | 2300 | 60 | 1800 | 440 | -0.530 | 0.270 | 0.49 | 3100 |
UGA_flucast-Copycat | 1700 | 200 | 920 | 570 | -0.280 | 0.470 | 0.87 | 2600 |
UGA_flucast-INFLAenza | 1700 | 190 | 1200 | 380 | -0.072 | 0.340 | 0.81 | 2500 |
UGA_flucast-OKeeffe | 500 | 0 | 390 | 110 | -0.750 | 0.170 | 0.75 | 820 |
UGuelph-CompositeCurve | 3500 | 2200 | 800 | 510 | 0.065 | 0.088 | 0.50 | 4800 |
UGuelphensemble-GRYPHON | 1800 | 540 | 820 | 450 | -0.130 | 0.330 | 0.86 | 2700 |
UM-DeepOutbreak | 2000 | 230 | 460 | 1400 | -0.086 | 0.710 | 0.83 | 2100 |
UMass-flusion | 1100 | 240 | 320 | 510 | -0.033 | 0.590 | 0.99 | 1700 |
UMass-trends_ensemble | 2000 | 720 | 890 | 370 | -0.056 | 0.310 | 0.56 | 2700 |
UNC_IDD-InfluPaint | 2800 | 1700 | 850 | 240 | -0.180 | 0.160 | 0.38 | 3600 |
UVAFluX-Ensemble | 2100 | 1100 | 570 | 460 | -0.130 | 0.410 | 0.69 | 2900 |
VTSanghani-Ensemble | 2700 | 960 | 1300 | 390 | -0.130 | 0.200 | 0.45 | 3600 |
cfa-flumech | 2600 | 1600 | 480 | 500 | 0.059 | 0.270 | 0.61 | 3700 |
cfarenewal-cfaepimlight | 1900 | 520 | 870 | 500 | -0.310 | 0.350 | 0.78 | 3000 |
fjordhest-ensemble | 1700 | 460 | 550 | 650 | -0.200 | 0.490 | 0.93 | 2700 |