API reference

gather_draws

gather_draws(
    data: InferenceData,
    group: str = "posterior",
    combined: bool = True,
    var_names: Iterable[str] | None = None,
    filter_vars: str | None = None,
    num_samples: int | None = None,
    rng: bool | int | Generator | None = None,
    value_name: str | None = None,
    variable_name: str | None = None,
) -> DataFrame

Convert an ArviZ InferenceData object to a polars DataFrame of tidy (gathered) draws, using the syntax of arviz.extract.

Parameters:

Name	Type	Description	Default
`data`	`InferenceData`	Data to convert.	required
`group`	`str`	`group` parameter passed to `arviz.extract`.	`'posterior'`
`combined`	`bool`	`combined` parameter passed to `arviz.extract`.	`True`
`var_names`	`Iterable[str] \| None`	`var_names` parameter passed to `arviz.extract`.	`None`
`filter_vars`	`str \| None`	`filter_vars` parameter passed to `arviz.extract`.	`None`
`num_samples`	`int \| None`	`num_samples` parameter passed to `arviz.extract`.	`None`
`rng`	`bool \| int \| Generator \| None`	`rng` parameter passed to `arviz.extract`.	`None`
`value_name`	`str \| None`	Name for the value column in the output DataFrame. if `None` (default), use `"value"`.	`None`
`variable_name`	`str \| None`	Name for the variable column in the output DataFrame. if `None` (default), use `"variable"`.	`None`

Returns:

Type	Description
`DataFrame`	The DataFrame of tidy (gathered) draws, including standard columns to identify a unique sample (typically `"chain"` and `"draw"`), a column of variable names, a column of associated variable values, plus (as needed) columns that index array-valued variables.

Source code in polarbayes/gather.py

def gather_draws(
    data: az.InferenceData,
    group: str = "posterior",
    combined: bool = True,
    var_names: Iterable[str] | None = None,
    filter_vars: str | None = None,
    num_samples: int | None = None,
    rng: bool | int | np.random.Generator | None = None,
    value_name: str | None = None,
    variable_name: str | None = None,
) -> pl.DataFrame:
    """
    Convert an ArviZ InferenceData object to a polars
    DataFrame of tidy (gathered) draws, using the syntax of
    [`arviz.extract`][].

    Parameters
    ----------
    data
        Data to convert.

    group
        `group` parameter passed to [`arviz.extract`][].

    combined
        `combined` parameter passed to [`arviz.extract`][].

    var_names
        `var_names` parameter passed to [`arviz.extract`][].

    filter_vars
        `filter_vars` parameter passed to [`arviz.extract`][].

    num_samples
        `num_samples` parameter passed to [`arviz.extract`][].

    rng
        `rng` parameter passed to [`arviz.extract`][].

    value_name
        Name for the value column in the output DataFrame. if `None` (default),
        use `"value"`.

    variable_name
        Name for the variable column in the output DataFrame. if `None` (default),
        use `"variable"`.

    Returns
    -------
    pl.DataFrame
        The DataFrame of tidy (gathered) draws, including
        standard columns to identify a unique sample
        (typically `"chain"` and `"draw"`), a column of variable
        names, a column of associated variable values,
        plus (as needed) columns that index array-valued variables.
    """
    if variable_name is None:
        variable_name = VARIABLE_NAME
    if value_name is None:
        value_name = VALUE_NAME
    # need to extract all variables jointly to ensure same
    # draws for each
    extracted = az.extract(
        data,
        group=group,
        combined=combined,
        var_names=var_names,
        filter_vars=filter_vars,
        num_samples=num_samples,
        keep_dataset=True,
        rng=rng,
    )
    var_names = extracted.data_vars.keys()
    result = pl.concat(
        [
            gather_variables(
                *spread_draws_and_get_index_cols(
                    extracted,
                    group=group,
                    var_names=var,
                    combined=False,
                    filter_vars=None,
                    num_samples=None,
                    rng=False,
                ),
                variable_name=variable_name,
                value_name=value_name,
            )
            for var in var_names
        ],
        how="diagonal_relaxed",
    )
    # Need to order output columns here as well as
    # in gather_variables() calls in case later gather_variables()
    # calls add new index columns that were not present due to earlier
    # calls, in which case those index columns will be out of order.
    index_cols_ordered = order_index_column_names(
        [x for x in result.columns if x not in [variable_name, value_name]]
    )

    return result.select(index_cols_ordered + [variable_name, value_name])

gather_variables

gather_variables(
    data: LazyFrame | DataFrame,
    index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None,
    value_name: str | None = None,
    variable_name: str | None = None,
)

Gather variable columns into key-value pairs. Light wrapper of pl.DataFrame.unpivot designed for use with spread_draws output.

Parameters:

Name	Type	Description	Default
`data`	`LazyFrame \| DataFrame`	Input DataFrame to (un)pivot from wide to long format.	required
`index`	`ColumnNameOrSelector \| Sequence[ColumnNameOrSelector] \| None`	Polars expression selecting mandatory or optional columns to index the gather. Passed as the `index` argument to `pl.DataFrame.unpivot`. If `None` (default), use the columns `["chain", "draw"]` if they are present. Those are the MCMC index columns created when `spread_draws` is called on a standard `az.InferenceData` object.	`None`
`value_name`	`str \| None`	Name for the value column in the output DataFrame. If `None` (default), use `"value"`.	`None`
`variable_name`	`str \| None`	Name for the variable column in the output DataFrame. If `None` (default), use `"variable"`.	`None`

Returns:

Type	Description
`LazyFrame \| DataFrame`	Unpivoted (pivoted longer) tidy data frame with index columns plus variable name and value columns.

Raises:

Type	Description
`ValueError`	If `value_name` or `variable_name` conflicts with requested index columns.

Source code in polarbayes/gather.py

def gather_variables(
    data: pl.LazyFrame | pl.DataFrame,
    index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None,
    value_name: str | None = None,
    variable_name: str | None = None,
):
    """
    Gather variable columns into key-value pairs.
    Light wrapper of [`pl.DataFrame.unpivot`][polars.DataFrame.unpivot]
    designed for use with
    [`spread_draws`][polarbayes.spread.spread_draws] output.

    Parameters
    ----------
    data
        Input DataFrame to (un)pivot from wide to long format.
    index
        Polars expression selecting mandatory or optional columns to
        index the gather. Passed as the `index` argument to
        [`pl.DataFrame.unpivot`][polars.DataFrame.unpivot].
        If `None` (default), use the columns
        `["chain", "draw"]` if they are present. Those are the MCMC
        index columns created when
        [`spread_draws`][polarbayes.spread.spread_draws] is called on
        a standard [`az.InferenceData`][arviz.InferenceData] object.

    value_name
        Name for the value column in the output DataFrame.
        If `None` (default), use `"value"`.

    variable_name
        Name for the variable column in the output DataFrame.
        If `None` (default), use `"variable"`.

    Returns
    -------
    pl.LazyFrame | pl.DataFrame
        Unpivoted (pivoted longer) tidy data frame with index columns plus
        variable name and value columns.

    Raises
    ------
    ValueError
        If `value_name` or `variable_name` conflicts with requested
        index columns.
    """
    if variable_name is None:
        variable_name = VARIABLE_NAME
    if value_name is None:
        value_name = VALUE_NAME
    if index is None:
        index = cs.by_name(CHAIN_NAME, DRAW_NAME, require_all=False)

    index_names = order_index_column_names(
        data.select(index).collect_schema().names()
    )

    # more informative error message than `unpivot()` gives on its own
    [
        _assert_not_in_index_columns(k, v, index_names)
        for k, v in dict(
            value_name=value_name, variable_name=variable_name
        ).items()
    ]

    return data.unpivot(
        index=index, variable_name=variable_name, value_name=value_name
    ).select(index_names + [variable_name, value_name])  # order output columns

spread_draws

spread_draws(
    data: InferenceData,
    group: str = "posterior",
    combined: bool = True,
    var_names: Iterable[str] | None = None,
    filter_vars: str | None = None,
    num_samples: int | None = None,
    rng: bool | int | Generator | None = None,
) -> DataFrame

Convert an ArviZ InferenceData object to a polars DataFrame of tidy (spread) draws, using the syntax of arviz.extract.

Parameters:

Name	Type	Description	Default
`data`	`InferenceData`	Data to convert.	required
`group`	`str`	`group` parameter passed to `arviz.extract`.	`'posterior'`
`combined`	`bool`	`combined` parameter passed to `arviz.extract`.	`True`
`var_names`	`Iterable[str] \| None`	`var_names` parameter passed to `arviz.extract`.	`None`
`filter_vars`	`str \| None`	`var_names` parameter passed to `arviz.extract`.	`None`
`num_samples`	`int \| None`	`num_samples` parameter passed to `arviz.extract`.	`None`
`rng`	`bool \| int \| Generator \| None`	`rng` parameter passed to `arviz.extract`.	`None`

Returns:

Type	Description
`DataFrame`	The DataFrame of tidy draws. Consists of columns named for variables and index columns. Columns named for variables contain the sampled values of those variables. Index columns include standard columns to identify a unique sample (typically `"chain"` and `"draw"`) plus (as needed) columns that index array-valued variables.

Source code in polarbayes/spread.py

def spread_draws(
    data: az.InferenceData,
    group: str = "posterior",
    combined: bool = True,
    var_names: Iterable[str] | None = None,
    filter_vars: str | None = None,
    num_samples: int | None = None,
    rng: bool | int | np.random.Generator | None = None,
) -> pl.DataFrame:
    """
    Convert an ArviZ InferenceData object to a polars
    DataFrame of tidy (spread) draws, using the syntax of
    [`arviz.extract`][].

    Parameters
    ----------
    data
        Data to convert.

    group
        `group` parameter passed to [`arviz.extract`][].

    combined
        `combined` parameter passed to [`arviz.extract`][].

    var_names
        `var_names` parameter passed to [`arviz.extract`][].

    filter_vars
        `var_names` parameter passed to [`arviz.extract`][].

    num_samples
        `num_samples` parameter passed to [`arviz.extract`][].

    rng
        `rng` parameter passed to [`arviz.extract`][].

    Returns
    -------
    pl.DataFrame
        The DataFrame of tidy draws. Consists of columns named for
        variables and index columns. Columns named for variables
        contain the sampled values of those variables. Index columns
        include standard columns to identify a unique
        sample (typically `"chain"` and `"draw"`) plus (as needed)
        columns that index array-valued variables.
    """
    result, _ = spread_draws_and_get_index_cols(
        data,
        group=group,
        combined=combined,
        var_names=var_names,
        filter_vars=filter_vars,
        num_samples=num_samples,
        rng=rng,
    )
    return result

spread_draws_and_get_index_cols

spread_draws_and_get_index_cols(
    data: InferenceData,
    group: str = "posterior",
    combined: bool = True,
    var_names: Iterable[str] | None = None,
    filter_vars: str | None = None,
    num_samples: int | None = None,
    rng: bool | int | Generator | None = None,
) -> tuple[DataFrame, tuple]

Convert an ArviZ InferenceData object to a polars DataFrame of tidy (spread) draws, using the syntax of arviz.extract. Return that DataFrame alongside a tuple giving the names of the DataFrame's index columns.

Parameters:

Name	Type	Description	Default
`data`	`InferenceData`	Data to convert.	required
`group`	`str`	`group` parameter passed to `arviz.extract`.	`'posterior'`
`combined`	`bool`	`combined` parameter passed to `arviz.extract`.	`True`
`var_names`	`Iterable[str] \| None`	`var_names` parameter passed to `arviz.extract`.	`None`
`filter_vars`	`str \| None`	`filter_vars` parameter passed to `arviz.extract`.	`None`
`num_samples`	`int \| None`	`num_samples` parameter passed to `arviz.extract`.	`None`
`rng`	`bool \| int \| Generator \| None`	`rng` parameter passed to `arviz.extract`.	`None`

Returns:

Type Description

tuple[DataFrame, tuple]

Two-entry whose first entry is the DataFrame, and whose second entry is a tuple giving the names of that DataFrame's index columns. The DataFrame consists of columns named for variables and index columns. Columns named for variables contain the sampled values of those variables. Index columns include standard columns to identify a unique sample (typically "chain" and "draw") plus (as needed) columns that index array-valued variables.

Source code in polarbayes/spread.py

def spread_draws_and_get_index_cols(
    data: az.InferenceData,
    group: str = "posterior",
    combined: bool = True,
    var_names: Iterable[str] | None = None,
    filter_vars: str | None = None,
    num_samples: int | None = None,
    rng: bool | int | np.random.Generator | None = None,
) -> tuple[pl.DataFrame, tuple]:
    """
    Convert an ArviZ InferenceData object to a polars
    DataFrame of tidy (spread) draws, using the syntax of
    arviz.extract. Return that DataFrame alongside a tuple
    giving the names of the DataFrame's index columns.

    Parameters
    ----------
    data
        Data to convert.

    group
        `group` parameter passed to [`arviz.extract`][].

    combined
        `combined` parameter passed to [`arviz.extract`][].

    var_names
        `var_names` parameter passed to [`arviz.extract`][].

    filter_vars
        `filter_vars` parameter passed to [`arviz.extract`][].

    num_samples
        `num_samples` parameter passed to [`arviz.extract`][].

    rng
        `rng` parameter passed to [`arviz.extract`][].

    Returns
    -------
    tuple[pl.DataFrame, tuple]
        Two-entry whose first entry is the DataFrame, and whose
        second entry is a tuple giving the names of that DataFrame's
        index columns. The DataFrame consists of columns named for
        variables and index columns. Columns named for variables
        contain the sampled values of those variables. Index columns
        include standard columns to identify a unique
        sample (typically `"chain"` and `"draw"`) plus (as needed)
        columns that index array-valued variables.
    """

    df = spread_draws_to_pandas_(
        data,
        group=group,
        combined=combined,
        var_names=var_names,
        filter_vars=filter_vars,
        num_samples=num_samples,
        rng=rng,
    )
    df, index_cols = pl.DataFrame(df.reset_index()), df.index.names
    index_cols_ordered = order_index_column_names(index_cols)

    return (
        df.select(
            cs.by_name(index_cols_ordered, require_all=True),
            cs.exclude(index_cols_ordered),
        ),
        index_cols_ordered,
    )