__all__ = ["AccumTable", "accum_ratio", "accum_ratiop", "accum_cols"]
import warnings
from collections import OrderedDict
import numpy as np
from .rt_accum2 import Accum2
from .rt_categorical import Categorical
from .rt_enum import TypeRegister
from .rt_numpy import full
[docs]
class AccumTable(Accum2):
"""
Enables the creation of tables with values calculated by various reducing functions.
:py:class:`~.rt_accumtable.AccumTable` is a wrapper on :py:class:`~.rt_accum2.Accum2`
and can generate tables with multiple footer rows and margin columns, which
represent values calculated by a variety of reducing functions.
An :py:class:`~.rt_accumtable.AccumTable` holds multiple tables at once. For
example, an :py:class:`~.rt_accumtable.AccumTable` can hold the tables calculated by
the mean, sum, and variance reducing functions. All tables in the
:py:class:`~.rt_accumtable.AccumTable` are grouped by the same two
:py:class:`~.rt_categorical.Categorical` objects.
Each table in the :py:class:`~.rt_accumtable.AccumTable` has these three parts:
* **Inner table** - a table of values calculated by a reducing function and indexed
by row and column groups.
* **Footer row** - a row on the bottom margin that contains the calculated value
for each column group.
* **Margin column** - a column on the right margin that contains the calculated
value for each row group.
After creating an :py:class:`~.rt_accumtable.AccumTable`, you can generate a
:py:class:`~.rt_dataset.Dataset` to view the calculated values as a table. You can
customize the generated table by specifying one inner table, a set of footer rows,
and a set of margin columns.
You create an :py:class:`~.rt_accumtable.AccumTable` and generate a table with the
following multistep process:
#. Pass two :py:class:`~.rt_categorical.Categorical` objects to create an
:py:class:`~.rt_accumtable.AccumTable` and to specify the row and column groups.
#. Add tables to the :py:class:`~.rt_accumtable.AccumTable` by setting its elements
to :py:class:`~.rt_dataset.Dataset` objects of values calculated by a reducing
function. For a list of reducing functions, see
:doc:`/tutorial/tutorial_cat_reduce`.
#. Specify which summary rows and columns you want to include in a generated table
using :py:meth:`~.rt_accumtable.AccumTable.set_footer_rows` and
:py:meth:`~.rt_accumtable.AccumTable.set_margin_columns`.
#. Generate a table view with the specified summary rows and columns using
:py:meth:`~.rt_accumtable.AccumTable.gen`.
Parameters
----------
cat_rows : :py:class:`~.rt_categorical.Categorical`
The row groups used to accumlate the values.
cat_cols : :py:class:`~.rt_categorical.Categorical`
The column groups used to accumlate the values.
filter : ndarray
Boolean mask array applied to arrays before grouping, reducing, and addition
to the :py:class:`~.rt_accumtable.AccumTable`.
showfilter : bool
Controls whether the returned table contains row or column groups that result
entirely in `0` or `nan` when the filter is applied.
See Also
--------
:py:class:`.rt_accum2.Accum2` :
The parent class for :py:class:`~.rt_accumtable.AccumTable`.
:py:class:`.rt_categorical.Categorical` :
A class that efficiently stores an array of repeated strings and is used for
groupby operations.
:py:class:`.rt_groupbyops.GroupByOps` :
A class that holds the reducing functions used to create an
:py:class:`~.rt_accumtable.AccumTable`.
Examples
--------
Construct a :py:class:`~.rt_dataset.Dataset` for the following examples:
>>> ds = rt.Dataset()
>>> ds.Zeros = [0, 0, 0, 0, 0]
>>> ds.Ones = [1, 1, 1, 1, 1]
>>> ds.Twos = [2, 2, 2, 2, 2]
>>> ds.Nans = [rt.nan, rt.nan, rt.nan, rt.nan, rt.nan]
>>> ds.Ints = [0, 1, 2, 3, 4]
>>> ds.Groups = rt.Cat(["Group1", "Group2", "Group1", "Group1", "Group2"])
>>> ds.Letters = rt.Cat(["A", "B", "C", "A", "C"])
>>> ds
# Zeros Ones Twos Nans Ints Groups Letters
- ----- ---- ---- ---- ---- ------ -------
0 0 1 2 nan 0 Group1 A
1 0 1 2 nan 1 Group2 B
2 0 1 2 nan 2 Group1 C
3 0 1 2 nan 3 Group1 A
4 0 1 2 nan 4 Group2 C
<BLANKLINE>
[5 rows x 7 columns] total bytes: 225.0 B
**Create an AccumTable**
Pass two :py:class:`~.rt_categorical.Categorical` objects to create the row and
column groups for the :py:class:`~.rt_accumtable.AccumTable`:
>>> at = rt.AccumTable(ds.Groups, ds.Letters)
>>> at
Inner Tables: []
Margin Columns: []
Footer Rows: []
The :py:class:`~.rt_accumtable.Accumtable` doesn't yet hold any inner tables. Add a
table using a reducing function. This example adds a table with values calculated by
:py:meth:`~.rt_groupbyops.GroupByOps.count`:
>>> at["Count"] = at.count()
>>> at["Count"]
*Groups A B C Count
------- - - - -----
Group1 2 0 1 3
Group2 0 1 1 2
------- - - - -----
Count 2 1 2 5
<BLANKLINE>
[2 rows x 5 columns] total bytes: 52.0 B
The :py:class:`~.rt_accumtable.AccumTable` now holds the Count table:
>>> at
Inner Tables: ['Count']
Margin Columns: ['Count']
Footer Rows: ['Count']
Add more tables to the :py:class:`~.rt_accumtable.AccumTable` using different
reducing functions:
>>> at["Sum Ints"] = at.sum(ds.Ints)
>>> at["Mean Double"] = at.mean(ds.Ints * ds.Twos)
>>> at["Variance Ints"] = at.var(ds.Ints)
>>> at
Inner Tables: ['Count', 'Sum Ints', 'Mean Double', 'Variance Ints']
Margin Columns: ['Count', 'Sum Ints', 'Mean Double', 'Variance Ints']
Footer Rows: ['Count', 'Sum Ints', 'Mean Double', 'Variance Ints']
Generate a table with multiple summary rows and columns using
:py:meth:`~.rt_accumtable.AccumTable.gen`. Pass the name of the inner table that you
want to include in the generated table:
>>> at.gen("Sum Ints")
*Groups A B C Sum Ints Count Mean Double Variance Ints
------------- ---- ---- ---- -------- ----- ----------- -------------
Group1 3 0 2 5 3 3.33 2.33
Group2 0 1 4 5 2 5.00 4.50
------------- ---- ---- ---- -------- ----- ----------- -------------
Sum Ints 3 1 6 10
Count 2 1 2 5
Mean Double 3.00 2.00 6.00 4.00
Variance Ints 4.50 nan 2.00 2.00
<BLANKLINE>
[2 rows x 8 columns] total bytes: 124.0 B
By default, all summary rows and columns appear in the generated table. Specify
which summary rows and columns appear using
:py:meth:`~.rt_accumtable.AccumTable.set_footer_rows` and
:py:meth:`~.rt_accumtable.AccumTable.set_margin_columns`:
>>> at.set_footer_rows(["Count", "Sum Ints"])
>>> at.set_margin_columns(["Variance Ints"])
>>> at
Inner Tables: ['Count', 'Sum Ints', 'Mean Double', 'Variance Ints']
Margin Columns: ['Variance Ints']
Footer Rows: ['Count', 'Sum Ints']
Generate the table with the specified summary rows and columns:
>>> at.gen("Sum Ints")
*Groups A B C Sum Ints Variance Ints
-------- - - - -------- -------------
Group1 3 0 2 5 2.33
Group2 0 1 4 5 4.50
-------- - - - -------- -------------
Sum Ints 3 1 6 10
Count 2 1 2
<BLANKLINE>
[2 rows x 6 columns] total bytes: 92.0 B
"""
# -------------------------------------------------------
def __init__(cls, cat_rows, cat_cols, filter=None, showfilter=False):
pass
def __new__(cls, cat_rows, cat_cols, filter=None, showfilter=False):
instance = super(AccumTable, cls).__new__(cls, cat_rows, cat_cols, filter, showfilter)
instance._inner = OrderedDict()
instance._rows = OrderedDict()
instance._cols = OrderedDict()
instance._default_inner_name = None
return instance
# -------------------------------------------------------
[docs]
def __repr__(self):
"""
Return a string representation of the :py:class:`~.rt_accumtable.AccumTable`.
Returns
-------
str
The :py:class:`~.rt_accumtable.AccumTable` as a string.
"""
res = "Inner Tables: " + str(list(self._inner.keys())) + "\n"
res += "Margin Columns: " + str(list(self._cols.keys())) + "\n"
res += "Footer Rows: " + str(list(self._rows.keys()))
return res
# -------------------------------------------------------
[docs]
def __setitem__(self, name: str, ds):
"""
Add an inner table, corresponding footer row, and corresponding margin column to
the :py:class:`~.rt_accumtable.AccumTable`.
Parameters
----------
name : str
Name of the inner table and its corresponding footer row and margin column.
ds : :py:class:`~.rt_dataset.Dataset`
The :py:class:`~.rt_dataset.Dataset` that provides data for the inner table,
footer row, and margin column.
Raises
------
IndexError
If ``name`` is not a string.
ValueError
If ``ds`` is not a :py:class:`~.rt_dataset.Dataset`.
"""
if not type(name) is str:
raise IndexError("name must be a string table name")
if not isinstance(ds, TypeRegister.Dataset):
raise ValueError("ds must be a Dataset")
self._inner[name] = ds
self._rows[name] = None
self._cols[name] = None
self._rename_summary_row_and_col(ds, name)
self._default_inner_name = name
# -------------------------------------------------------
[docs]
def __getitem__(self, index: str):
"""
Return the inner table, footer row, and margin column corresponding to ``index``.
Parameters
----------
index : str
Name of the inner table, footer row, and margin column to return.
Returns
-------
:py:class:`~.rt_dataset.Dataset`
The inner table, footer row, and margin column corresponding to ``index``.
Raises
------
IndexError
If ``index`` is not a string.
"""
if not type(index) is str:
raise IndexError("Index must be a string table name")
self._default_inner_name = index
return self._inner[index]
# -------------------------------------------------------
[docs]
def _rename_summary_row_and_col(self, ds, new_name: str):
"""
Parameters
----------
ds : Dataset
The dataset
new_name : str
the new name for the summary column and footer row
Returns
-------
Dataset
"""
col_names = ds.summary_get_names()
if len(col_names) == 1:
ds.col_rename(col_names[0], new_name)
footers = ds.footer_get_dict()
if len(footers) == 1:
old_name = list(footers.keys())[0]
nd = list(footers.values())[0]
ds.footer_remove(old_name)
ds.footer_set_values(new_name, nd)
return ds
# -------------------------------------------------------
[docs]
def gen(self, table_name=None, format=None, ref_table=None, remove_blanks=True):
"""
Generate a table with one inner table and multiple footer rows and margin
columns from an :py:class:`~.rt_accumtable.AccumTable`.
Parameters
----------
table_name : str, optional
The name of the inner table that appears in the generated table. If not
provided, the last-created inner table appears in the generated table.
format : dict of {str : func}, optional
(Not yet implemented) A dictionary used to specify the formatting of each
cell in the table. Each key is a formatting type, such as "bold", "color",
and "background", and each value is a function that applies conditional
formatting to each table cell. For example, ``format={"bold": lambda v: v > 0}``
applies bold formatting to all cells with positive values.
ref_table : str or :py:class:`~.rt_dataset.Dataset`, optional
(Not yet implemented) The name of an :py:class:`~.rt_accumtable.AccumTable`
or a :py:class:`~.rt_dataset.Dataset` of the same shape that acts as a
format reference for the generated table.
remove_blanks : bool, default `True`
Controls whether rows and columns consisting entirely of `0` and `nan`
are removed from the generated table.
Returns
-------
:py:class:`.rt_dataset.Dataset`
A table generated from the :py:class:`~.rt_accumtable.AccumTable`, including
footer rows and margin columns.
See Also
--------
:py:class:`.rt_accumtable.AccumTable` :
The class containing :py:meth:`~.rt_accumtable.AccumTable.gen`.
:py:meth:`.rt_accumtable.AccumTable.set_footer_rows` :
The method that sets the footer rows for the :py:class:`.rt_accumtable.AccumTable`
and its generated tables.
:py:meth:`.rt_accumtable.AccumTable.set_margin_columns` :
The method that sets the margin columns for the :py:class:`.rt_accumtable.AccumTable`
and its generated tables.
Examples
--------
Construct a :py:class:`~.rt_dataset.Dataset` for the following examples:
>>> ds = rt.Dataset()
>>> ds.Zeros = [0, 0, 0, 0, 0]
>>> ds.Ones = [1, 1, 1, 1, 1]
>>> ds.Twos = [2, 2, 2, 2, 2]
>>> ds.Nans = [rt.nan, rt.nan, rt.nan, rt.nan, rt.nan]
>>> ds.Ints = [0, 1, 2, 3, 4]
>>> ds.Groups = rt.Cat(["Group1", "Group2", "Group1", "Group1", "Group2"])
>>> ds.Letters = rt.Cat(["A", "B", "C", "A", "C"])
>>> ds
# Zeros Ones Twos Nans Ints Groups Letters
- ----- ---- ---- ---- ---- ------ -------
0 0 1 2 nan 0 Group1 A
1 0 1 2 nan 1 Group2 B
2 0 1 2 nan 2 Group1 C
3 0 1 2 nan 3 Group1 A
4 0 1 2 nan 4 Group2 C
<BLANKLINE>
[5 rows x 7 columns] total bytes: 225.0 B
Construct an :py:class:`~.rt_accumtable.AccumTable` from that data:
>>> at = rt.AccumTable(ds.Groups, ds.Letters)
>>> at["Count"] = at.count()
>>> at["Sum Ints"] = at.sum(ds.Ints)
>>> at["Mean Double"] = at.mean(ds.Ints * ds.Twos)
>>> at
Inner Tables: ['Count', 'Sum Ints', 'Mean Double']
Margin Columns: ['Count', 'Sum Ints', 'Mean Double']
Footer Rows: ['Count', 'Sum Ints', 'Mean Double']
Generate a table from this :py:class:`~.rt_accumtable.AccumTable` using default
parameter values:
>>> at.gen()
*Groups A B C Mean Double Count Sum Ints
----------- ---- ---- ---- ----------- ----- --------
Group1 3.00 nan 4.00 3.33 3 5
Group2 nan 2.00 8.00 5.00 2 5
----------- ---- ---- ---- ----------- ----- --------
Mean Double 3.00 2.00 6.00 4.00
Count 2 1 2 5
Sum Ints 3 1 6 10
<BLANKLINE>
[2 rows x 7 columns] total bytes: 108.0 B
Without specifying ``table_name``, the last-created inner table, Mean Double,
appears as the generated inner table and the first footer row and margin column.
Pass an inner table name to generate a specific table:
>>> at.gen("Sum Ints")
*Groups A B C Sum Ints Count Mean Double
----------- ---- ---- ---- -------- ----- -----------
Group1 3 0 2 5 3 3.33
Group2 0 1 4 5 2 5.00
----------- ---- ---- ---- -------- ----- -----------
Sum Ints 3 1 6 10
Count 2 1 2 5
Mean Double 3.00 2.00 6.00 4.00
<BLANKLINE>
[2 rows x 7 columns] total bytes: 108.0 B
"""
# Get the displayed, inner table
table_name = self._default_inner_name if table_name is None else table_name
self._default_inner_name = table_name
if table_name is None:
raise ValueError("Must specify a table name")
orig = self._inner[table_name]
# Remove blanks, as required, and set the row filter
if remove_blanks:
(clean, row_filter, _) = orig.copy().trim(ret_filters=True)
row_filter = row_filter if row_filter is not None else slice(None, None, None)
else:
clean = orig.copy()
row_filter = slice(None, None, None)
# Add the margin columns to the right
summary_names = clean.summary_get_names()
for mar_col in [col for col in list(self._cols.keys()) if col != table_name]:
clean[mar_col] = self._inner[mar_col][row_filter, mar_col]
summary_names += [mar_col]
clean.summary_set_names(summary_names)
# Add the footer rows at the bottom
for footer_row in [row for row in list(self._rows.keys()) if row != table_name]:
fd = list(self._inner[footer_row].footer_get_dict(footer_row).values())[0]
delete = [k for k in fd.keys() if not k in clean.keys()]
for key in delete:
del fd[key]
clean.footer_set_values(footer_row, fd)
return clean
# -------------------------------------------------------
[docs]
def set_margin_columns(self, cols):
"""
Specify the margin columns that appear in a generated
:py:class:`~.rt_accumtable.Accumtable`.
Pass a list of inner table names to set the corresponding margin columns for
the :py:class:`~.rt_accumtable.AccumTable` instance. The margin columns contain
values calculated by a reducing function and grouped by the
:py:class:`~.rt_accumtable.AccumTable` rows.
When you generate a table using :py:meth:`~.rt_accumtable.AccumTable.gen`, the
margin column corresponding to the inner table appears first. Then, the
remaining margin columns appear in the order you passed them to
:py:meth:`~.rt_accumtable.AccumTable.set_margin_columns`.
Passing an empty list removes all margin columns from the generated table, except
for the margin column corresponding to the inner table.
Parameters
----------
cols : list of str
A list of inner table names, in the order you want the margin columns to
appear in a generated table.
See Also
--------
:py:class:`.rt_accumtable.AccumTable` :
The class containing :py:meth:`~.rt_accumtable.AccumTable.set_margin_columns`.
:py:meth:`.rt_accumtable.AccumTable.gen` :
The method that generates a table from an :py:class:`.rt_accumtable.AccumTable`.
:py:meth:`.rt_accumtable.AccumTable.set_footer_rows` :
The method that sets the footer rows for the :py:class:`.rt_accumtable.AccumTable`
and its generated tables.
Examples
--------
Construct a :py:class:`~.rt_dataset.Dataset` for the following examples:
>>> ds = rt.Dataset()
>>> ds.Zeros = [0, 0, 0, 0, 0]
>>> ds.Ones = [1, 1, 1, 1, 1]
>>> ds.Twos = [2, 2, 2, 2, 2]
>>> ds.Nans = [rt.nan, rt.nan, rt.nan, rt.nan, rt.nan]
>>> ds.Ints = [0, 1, 2, 3, 4]
>>> ds.Groups = rt.Cat(["Group1", "Group2", "Group1", "Group1", "Group2"])
>>> ds.Letters = rt.Cat(["A", "B", "C", "A", "C"])
>>> ds
# Zeros Ones Twos Nans Ints Groups Letters
- ----- ---- ---- ---- ---- ------ -------
0 0 1 2 nan 0 Group1 A
1 0 1 2 nan 1 Group2 B
2 0 1 2 nan 2 Group1 C
3 0 1 2 nan 3 Group1 A
4 0 1 2 nan 4 Group2 C
<BLANKLINE>
[5 rows x 7 columns] total bytes: 225.0 B
Construct an :py:class:`~.rt_accumtable.AccumTable` from that data:
>>> at = rt.AccumTable(ds.Groups, ds.Letters)
>>> at["Count"] = at.count()
>>> at["Sum Ints"] = at.sum(ds.Ints)
>>> at["Mean Double"] = at.mean(ds.Ints * ds.Twos)
>>> at["Variance Ints"] = at.var(ds.Ints)
>>> at
Inner Tables: ['Count', 'Sum Ints', 'Mean Double', 'Variance Ints']
Margin Columns: ['Count', 'Sum Ints', 'Mean Double', 'Variance Ints']
Footer Rows: ['Count', 'Sum Ints', 'Mean Double', 'Variance Ints']
When you generate a table from the :py:class:`~.rt_accumtable.AccumTable`
without setting the margin columns, all margin columns appear in the generated
table:
>>> at.gen("Sum Ints")
*Groups A B C Sum Ints Count Mean Double Variance Ints
------------- ---- ---- ---- -------- ----- ----------- -------------
Group1 3 0 2 5 3 3.33 2.33
Group2 0 1 4 5 2 5.00 4.50
------------- ---- ---- ---- -------- ----- ----------- -------------
Sum Ints 3 1 6 10
Count 2 1 2 5
Mean Double 3.00 2.00 6.00 4.00
Variance Ints 4.50 nan 2.00 2.00
<BLANKLINE>
[2 rows x 8 columns] total bytes: 124.0 B
Pass a list of inner table names from the :py:class:`~.rt_accumtable.AccumTable`
to set the corresponding margin columns in a generated table:
>>> at.set_margin_columns(["Variance Ints", "Count"])
>>> at
Inner Tables: ['Count', 'Sum Ints', 'Mean Double', 'Variance Ints']
Margin Columns: ['Variance Ints', 'Count']
Footer Rows: ['Count', 'Sum Ints', 'Mean Double', 'Variance Ints']
Generate a table to see the new set of margin columns:
>>> at.gen("Sum Ints")
*Groups A B C Sum Ints Variance Ints Count
------------- ---- ---- ---- -------- ------------- -----
Group1 3 0 2 5 2.33 3
Group2 0 1 4 5 4.50 2
------------- ---- ---- ---- -------- ------------- -----
Sum Ints 3 1 6 10
Count 2 1 2 5
Mean Double 3.00 2.00 6.00
Variance Ints 4.50 nan 2.00 2.00
<BLANKLINE>
[2 rows x 7 columns] total bytes: 108.0 B
Pass an empty list to remove all margin columns from the generated table, except
for the margin column corresponding to the inner table. In this example, the
Sum Ints margin column remains in the generated table:
>>> at.set_margin_columns([])
>>> at
Inner Tables: ['Count', 'Sum Ints', 'Mean Double', 'Variance Ints']
Margin Columns: []
Footer Rows: ['Count', 'Sum Ints', 'Mean Double', 'Variance Ints']
>>> at.gen("Sum Ints")
*Groups A B C Sum Ints
------------- ---- ---- ---- --------
Group1 3 0 2 5
Group2 0 1 4 5
------------- ---- ---- ---- --------
Sum Ints 3 1 6 10
Count 2 1 2
Mean Double 3.00 2.00 6.00
Variance Ints 4.50 nan 2.00
<BLANKLINE>
[2 rows x 5 columns] total bytes: 76.0 B
"""
self._cols = OrderedDict()
for k in cols:
self._cols[k] = None
# -------------------------------------------------------
[docs]
def accum_ratio(
cat1,
cat2=None,
val1=None,
val2=None,
filt1=None,
filt2=None,
func1="nansum",
func2=None,
return_table=False,
include_numer=False,
include_denom=True,
remove_blanks=False,
):
"""
Generate a :py:class:`~.rt_dataset.Dataset` of ratios between values calculated by
reducing functions for two arrays.
:py:func:`~.rt_accumtable.accum_ratio` performs the following actions:
#. Creates an :py:class:`~.rt_accumtable.AccumTable` using the groups of ``cat1``
and ``cat2``.
#. Aggregates the data from the ``val1`` and ``val2`` arrays according to the
``func1`` and ``func2`` reducing functions.
#. Calculates a ratio between the values calculated by the reducing functions for
``val1`` and ``val2``.
#. Returns either a :py:class:`~.rt_dataset.Dataset` or an
:py:class:`~.rt_accumtable.AccumTable`, depending on the value of
``return_table``.
By default, :py:func:`~.rt_accumtable.accum_ratio` returns a
:py:class:`~.rt_dataset.Dataset` with a ``"Ratio"`` inner table. If ``return_table``
is set to `True`, the function returns an :py:class:`~.rt_accumtable.AccumTable`,
which can be converted to a :py:class:`~.rt_dataset.Dataset` using the
:py:meth:`~.rt_accumtable.AccumTable.gen` method. Generating a
:py:class:`~.rt_dataset.Dataset` gives you more control over which inner table,
footer rows, and margin columns are included in the result.
:py:func:`~.rt_accumtable.accum_ratio` supports only reducing functions that take an
array as a parameter. For example ``count()`` isn't valid, as it doesn't accept an
array as an input argument. For a list of reducing functions, see
:doc:`/tutorial/tutorial_cat_reduce`.
Parameters
----------
cat1 : :py:class:`~.rt_categorical.Categorical`
The row groups used to accumulate the values.
cat2 : :py:class:`~.rt_categorical.Categorical`, optional
The column groups used to accumulate the values. If not
provided, :py:func:`~.rt_accumtable.accum_ratio` uses a
:py:class:`~.rt_categorical.Categorical` with a single group, ``"NotGrouped"``.
val1 : array
The numerator for the calculated ratio.
val2 : array
The denominator for the calculated ratio.
filt1 : array of bool, optional
Boolean filter for ``val1`` array. The filter array must be the same length as
``val1`` and ``val2``.
filt2 : array of bool, optional
Boolean filter for ``val2`` array. The filter array must be the same length as
``val1`` and ``val2``. If not provided, the filter is the same as ``filt1``.
func1 : str, default ``"nansum"``
String of the name of the reducing function (for example, ``"sum"`` or
``"nanmean"``) used to reduce ``val1`` before calculating the ratio.
func2 : str, optional
String of the name of the reducing function (for example, ``"sum"`` or
``"nanmean"``) used to reduce ``val2`` before calculating the ratio. If not
provided, the ``func1`` is applied to ``val2``.
return_table : bool, default `False`
If `False` (the default), returns a :py:class:`~.rt_dataset.Dataset` with the
calculated ratio. If set to `True`, returns an
:py:class:`~.rt_accumtable.AccumTable` from which you can generate a
:py:class:`~.rt_dataset.Dataset`. The returned
:py:class:`~.rt_accumtable.AccumTable` has ``"Numer"``, ``"Denom"``, and
``"Ratio"`` inner tables, footer rows, and margin columns.
include_numer : bool, default `False`
If set to `True`, include the values calculated by the reducing function for
``val1`` as a row and column in the returned table. Ignored if ``return_table``
is `True`.
include_denom : bool, default `True`
If `True` (the default), include the values calculated by the reducing function
for ``val2`` as a row and column in the returned table. Ignored if
``return_table`` is `True`.
remove_blanks : bool, default `False`
If set to `True`, removes rows and columns that consist entirely of `0` or
`nan` from the returned table.
Returns
-------
:py:class:`.rt_dataset.Dataset` or :py:class:`.rt_accumtable.AccumTable`
Either a :py:class:`~.rt_dataset.Dataset` with a view of the calculated ratio,
or an :py:class:`~.rt_accumtable.AccumTable`, depending on ``return_table``.
See Also
--------
:py:class:`.rt_accum2.Accum2` :
The parent class for :py:class:`~.rt_accumtable.AccumTable`.
:py:class:`.rt_accumtable.AccumTable` :
A wrapper on :py:class:`~.rt_accum2.Accum2` that enables the creation of tables
that combine the results of multiple tables generated from the
:py:class:`~.rt_accum2.Accum2` object.
:py:class:`.rt_categorical.Categorical` :
A class that efficiently stores an array of repeated strings and is used for
groupby operations.
:py:class:`.rt_groupbyops.GroupByOps` :
A class that holds the reducing functions used by
:py:func:`~.rt_accumtable.accum_ratio`.
Examples
--------
Construct a :py:class:`~.rt_dataset.Dataset` for the following examples:
>>> ds = rt.Dataset()
>>> ds.Zeros = [0, 0, 0, 0, 0]
>>> ds.Ones = [1, 1, 1, 1, 1]
>>> ds.Twos = [2, 2, 2, 2, 2]
>>> ds.Nans = [rt.nan, rt.nan, rt.nan, rt.nan, rt.nan]
>>> ds.Ints = [0, 1, 2, 3, 4]
>>> ds.Groups = rt.Cat(["Group1", "Group2", "Group1", "Group1", "Group2"])
>>> ds.Letters = rt.Cat(["A", "B", "C", "A", "C"])
>>> ds
# Zeros Ones Twos Nans Ints Groups Letters
- ----- ---- ---- ---- ---- ------ -------
0 0 1 2 nan 0 Group1 A
1 0 1 2 nan 1 Group2 B
2 0 1 2 nan 2 Group1 C
3 0 1 2 nan 3 Group1 A
4 0 1 2 nan 4 Group2 C
<BLANKLINE>
[5 rows x 7 columns] total bytes: 225.0 B
**Calculate a ratio between the values calculated by a reducing function**
This example returns a :py:class:`~.rt_dataset.Dataset` that holds ratios between
the values calculated by the default reducing function
(:py:meth:`~.rt_groupbyops.GroupByOps.nansum`) for Ints and Ones.
>>> rt.accum_ratio(cat1=ds.Groups,
... cat2=ds.Letters,
... val1=ds.Ints,
... val2=ds.Ones)
*Groups A B C Ratio Denom
------- ---- ---- ---- ----- -----
Group1 1.50 nan 2.00 1.67 3
Group2 nan 1.00 4.00 2.50 2
------- ---- ---- ---- ----- -----
Ratio 1.50 1.00 3.00 2.00
Denom 2 1 2 5
<BLANKLINE>
[2 rows x 6 columns] total bytes: 92.0 B
**Return an AccumTable**
Pass `True` to ``return_table`` to return an :py:class:`~.rt_accumtable.AccumTable`
instead of a :py:class:`~.rt_dataset.Dataset`:
>>> returned_accumtable = rt.accum_ratio(cat1=ds.Groups,
... cat2=ds.Letters,
... val1=ds.Ints,
... val2=ds.Ones,
... func1="nansum",
... return_table=True)
>>> returned_accumtable
Inner Tables: ['Numer', 'Denom', 'Ratio']
Margin Columns: ['Numer', 'Denom', 'Ratio']
Footer Rows: ['Numer', 'Denom', 'Ratio']
Use :py:meth:`~.rt_accumtable.AccumTable.gen` to create a
:py:class:`~.rt_dataset.Dataset` from the returned
:py:class:`~.rt_accumtable.AccumTable`:
>>> returned_accumtable.gen()
*Groups A B C Ratio Numer Denom
------- ---- ---- ---- ----- ----- -----
Group1 1.50 nan 2.00 1.67 5 3
Group2 nan 1.00 4.00 2.50 5 2
------- ---- ---- ---- ----- ----- -----
Ratio 1.50 1.00 3.00 2.00
Numer 3 1 6 10
Denom 2 1 2 5
<BLANKLINE>
[2 rows x 7 columns] total bytes: 108.0 B
**Filter the arrays before calculating ratios**
Pass filters to ``filt1`` and ``filt2`` to filter ``val1`` and ``val2`` before
reducing and ratio calculation:
>>> c_filter = ds.Letters == "C"
>>> even_filter = ds.Ints % 2 == 0
>>> rt.accum_ratio(cat1=ds.Groups,
... cat2=ds.Letters,
... val1=ds.Ints,
... val2=ds.Ones,
... func1="nansum",
... filt1=c_filter,
... filt2=even_filter)
*Groups A B C Ratio Denom
------- ---- --- ---- ----- -----
Group1 0.00 nan 2.00 1.00 2
Group2 nan nan 4.00 4.00 1
------- ---- --- ---- ----- -----
Ratio 0.00 nan 3.00 2.00
Denom 1 0 2 3
<BLANKLINE>
[2 rows x 6 columns] total bytes: 92.0 B
**Remove blank rows and columns**
Pass `True` to ``remove_blanks`` to remove the rows and columns consisting entirely
of `0` and `nan`. This example removes the blank lines from the filtered
:py:class:`~.rt_dataset.Dataset`:
>>> c_filter = ds.Letters == "C"
>>> even_filter = ds.Ints % 2 == 0
>>> rt.accum_ratio(cat1=ds.Groups,
... cat2=ds.Letters,
... val1=ds.Ints,
... val2=ds.Ones,
... func1="nansum",
... filt1=c_filter,
... filt2=even_filter,
... remove_blanks=True)
*Groups C Ratio Denom
------- ---- ----- -----
Group1 2.00 1.00 2
Group2 4.00 4.00 1
------- ---- ----- -----
Ratio 3.00 2.00
Denom 2 3
<BLANKLINE>
[2 rows x 4 columns] total bytes: 60.0 B
**Include non-ratio values calculated by reducing functions**
Pass `True` to ``include_numer`` and ``include_denom`` to add summary rows and
columns with the non-ratio values calculated by the reducing functions. Numer
contains values for ``val1`` calculated with ``func1``. Denom contains values for
``val2`` calculated with ``func2``. This example doesn't include ``func2``, so
:py:func:`~.rt_accumtable.accum_ratio` uses ``func1`` for ``val2``.
>>> rt.accum_ratio(cat1=ds.Groups,
... cat2=ds.Letters,
... val1=ds.Ints,
... val2=ds.Ones,
... func1="nansum",
... include_numer=True,
... include_denom=True)
*Groups A B C Ratio Numer Denom
------- ---- ---- ---- ----- ----- -----
Group1 1.50 nan 2.00 1.67 5 3
Group2 nan 1.00 4.00 2.50 5 2
------- ---- ---- ---- ----- ----- -----
Ratio 1.50 1.00 3.00 2.00
Numer 3 1 6 10
Denom 2 1 2 5
<BLANKLINE>
[2 rows x 7 columns] total bytes: 108.0 B
"""
# Handle missing inputs
if val1 is None:
raise ValueError("Missing argument val1")
if (
(val2 is None) & (cat2 is not None) & (val1 is not None)
): # Passing as accum_ratio(cat1, val1, val2), omitting cat2 argument
val2 = val1
val1 = cat2
cat2 = None
if filt1 is None:
filt1 = full(val1.shape[0], True, dtype=bool) # This was playa.utils.truecol
if filt2 is None:
filt2 = filt1
if func2 is None:
func2 = func1
if cat2 is None:
cat2 = Categorical(full(val1.shape[0], 1, dtype=np.int8), ["NotGrouped"]) # This was playa.utils.onescol
# Handle name collisions
for key in ["Numer", "Denom", "Ratio"]:
if key in cat2.categories():
cat2.category_replace(key, key + "_")
# Compute accum
accum = AccumTable(cat1, cat2)
func1 = getattr(accum, func1)
func2 = getattr(accum, func2)
# TODO: In the future, when arbitrary functions are allowed in Accum2 calls, handle a missing attr here by passing it in by name
accum["Numer"] = func1(val1, filter=filt1)
accum["Denom"] = func2(val2, filter=filt2)
accum["Ratio"] = accum["Numer"] / accum["Denom"]
if return_table:
return accum
else:
footers = [label for (label, boolean) in zip(["Numer", "Denom"], [include_numer, include_denom]) if boolean]
accum.set_margin_columns(footers)
accum.set_footer_rows(footers)
return accum.gen("Ratio", remove_blanks=remove_blanks)
[docs]
def accum_ratiop(
cat1,
cat2=None,
val=None,
filter=None,
func="nansum",
norm_by="T",
include_total=True,
remove_blanks=False,
filt=None,
):
"""
Generate a :py:class:`~.rt_dataset.Dataset` of ratios displayed as percentages
between the individual values of a table calculated with a reducing function and
the value of the entire :py:class:`~.rt_accumtable.AccumTable`, its rows,
or its columns calculated with the same reducing function.
:py:func:`~.rt_accumtable.accum_ratiop` performs the following actions:
* Creates an :py:class:`~rt_accumtable.Accumtable` using the groups of ``cat1`` and
``cat2``.
* Aggregates the data from the ``val`` array according to the ``func`` reducing
function.
* Calculates a ratio as a percent for each cell in the inner table, footer row, and
margin column. The numerator of each ratio is the calculated value for the cell,
and the denominator is the calculated value for that row, that column, or the
table, depending on the value of ``norm_by``.
* Generates and returns a :py:class:`~.rt_dataset.Dataset` from the
:py:class:`~.rt_accumtable.AccumTable` with percentile values.
:py:func:`~.rt_accumtable.accum_ratiop` supports only reducing functions that take an
array as a parameter. For example, ``count()`` isn't valid, as it doesn't accept an
array as an input argument. For a list of reducing functions, see
:doc:`/tutorial/tutorial_cat_reduce`.
Parameters
----------
cat1 : :py:class:`~.rt_categorical.Categorical`
The row groups used for accumulation.
cat2 : :py:class:`~.rt_categorical.Categorical`, optional
The column groups used for accumulation. If not provided,
:py:func:`~.rt_accumtable.accum_ratiop` uses a
:py:class:`~.rt_categorical.Categorical` with a single group, ``"NotGrouped"``.
val : array
The array used as the numerator for percentile calculation.
filter : array of bool, optional
Filter for ``val``. The ``filter`` array must be the same length as ``val``.
Replaces the deprecated ``filt`` parameter.
func : str
String of the name of the reducing function used to reduce ``val`` before
calculating the percentile.
norm_by : {"T", "C", "R"}, default "T"
Controls the values used as the denominator for the ratio calculation:
* "T" selects the calculated value for the entire
:py:class:`~.rt_accumtable.AccumTable`.
* "C" selects the calculated value for each column.
* "R" selects the calculated value for each row.
include_total : bool, default `True`
Adds a summary row and column of values calculated by ``func`` to the returned
:py:class:`~.rt_dataset.Dataset`.
remove_blanks : bool, default `True`
If `True`, removes rows and columns that consist entirely of `0` or
`nan` from the returned table.
filt : array of bool, optional
Deprecated and replaced with ``filter``.
Returns
-------
:py:class:`~.rt_dataset.Dataset`
A table of percent ratios for the array.
See Also
--------
:py:class:`.rt_accum2.Accum2` :
The parent class for :py:class:`~.rt_accumtable.AccumTable`.
:py:class:`.rt_accumtable.AccumTable` :
A wrapper on :py:class:`~.rt_accum2.Accum2` that enables the creation of tables
that combine the results of multiple tables generated from the
:py:class:`~.rt_accum2.Accum2` object.
:py:class:`.rt_categorical.Categorical` :
A class that efficiently stores an array of repeated strings and is used for
groupby operations.
:py:class:`.rt_groupbyops.GroupByOps` :
A class that holds the reducing functions used by
:py:func:`~.rt_accumtable.accum_ratiop`.
Examples
--------
Construct a :py:class:`~.rt_dataset.Dataset` for the following examples:
>>> ds = rt.Dataset()
>>> ds.Zeros = [0, 0, 0, 0, 0]
>>> ds.Ones = [1, 1, 1, 1, 1]
>>> ds.Twos = [2, 2, 2, 2, 2]
>>> ds.Ints = [0, 1, 2, 3, 4]
>>> ds.Groups = rt.Cat(["Group1", "Group2", "Group1", "Group1", "Group2"])
>>> ds.Letters = rt.Cat(["A", "B", "C", "A", "C"])
>>> ds
# Zeros Ones Twos Ints Groups Letters
- ----- ---- ---- ---- ------ -------
0 0 1 2 0 Group1 A
1 0 1 2 1 Group2 B
2 0 1 2 2 Group1 C
3 0 1 2 3 Group1 A
4 0 1 2 4 Group2 C
<BLANKLINE>
[5 rows x 6 columns] total bytes: 185.0 B
**Calculate percentiles compared to total**
>>> rt.accum_ratiop(cat1=ds.Groups,
... cat2=ds.Letters,
... val=ds.Ints)
*Groups A B C TotalRatio Total
---------- ----- ----- ----- ---------- -----
Group1 30.00 0.00 20.00 50.00 5
Group2 0.00 10.00 40.00 50.00 5
---------- ----- ----- ----- ---------- -----
TotalRatio 30.00 10.00 60.00 100.00
Total 3 1 6 10
<BLANKLINE>
[2 rows x 6 columns] total bytes: 92.0 B
Pass ``"nanmean"`` to ``func`` to calculate the ratio as a percent between the
mean for each inner table cell and the total mean:
>>> rt.accum_ratiop(cat1=ds.Groups,
... cat2=ds.Letters,
... val=ds.Ints,
... func="nanmean")
*Groups A B C TotalRatio Total
---------- ----- ----- ------ ---------- -----
Group1 75.00 nan 100.00 83.33 1.67
Group2 nan 50.00 200.00 125.00 2.50
---------- ----- ----- ------ ---------- -----
TotalRatio 75.00 50.00 150.00 100.00
Total 1.50 1.00 3.00 2.00
<BLANKLINE>
[2 rows x 6 columns] total bytes: 92.0 B
**Calculate percentiles compared to row**
Pass ``"R"`` to ``norm_by``:
>>> rt.accum_ratiop(cat1=ds.Groups,
... cat2=ds.Letters,
... val=ds.Ints,
... func="nanmean",
... norm_by="R",
... include_total=False)
*Groups A B C TotalRatio
---------- ----- ----- ------ ----------
Group1 90.00 nan 120.00 100.00
Group2 nan 40.00 160.00 100.00
---------- ----- ----- ------ ----------
TotalRatio 75.00 50.00 150.00 100.00
<BLANKLINE>
[2 rows x 5 columns] total bytes: 76.0 B
**Calculate percentiles compared to column**
Pass ``"C"`` to ``norm_by``:
>>> rt.accum_ratiop(cat1=ds.Groups,
... cat2=ds.Letters,
... val=ds.Ints,
... func="nanmean",
... norm_by="C",
... include_total=False)
*Groups A B C TotalRatio
---------- ------ ------ ------ ----------
Group1 100.00 nan 66.67 83.33
Group2 nan 100.00 133.33 125.00
---------- ------ ------ ------ ----------
TotalRatio 100.00 100.00 100.00 100.00
<BLANKLINE>
[2 rows x 5 columns] total bytes: 76.0 B
**Filter the array before calculating percentiles**
Create a filter for ``val`` and pass it to ``filter``. This example selects for data
in ``val`` in the ``"carrot"`` group:
>>> c_filter = ds.Letters == "C"
>>> rt.accum_ratiop(cat1=ds.Groups,
... cat2=ds.Letters,
... val=ds.Ints,
... filter=c_filter,
... func="nansum",
... include_total=False)
*Groups A B C TotalRatio
---------- ---- ---- ------ ----------
Group1 0.00 0.00 33.33 33.33
Group2 0.00 0.00 66.67 66.67
---------- ---- ---- ------ ----------
TotalRatio 0.00 0.00 100.00 100.00
<BLANKLINE>
[2 rows x 5 columns] total bytes: 76.0 B
**Remove blank rows and columns**
Pass `True` to ``remove_blanks`` to remove the rows and columns consisting entirely
of `0` and `nan`. This example removes the blank lines from the filtered
:py:class:`~.rt_dataset.Dataset`:
>>> rt.accum_ratiop(cat1=ds.Groups,
... cat2=ds.Letters,
... val=ds.Ints,
... filter=c_filter,
... func="nansum",
... include_total=False,
... remove_blanks=True)
*Groups C TotalRatio
---------- ------ ----------
Group1 33.33 33.33
Group2 66.67 66.67
---------- ------ ----------
TotalRatio 100.00 100.00
<BLANKLINE>
[2 rows x 3 columns] total bytes: 44.0 B
**Include the total values calculated by reducing functions**
Pass `True` to ``include_total`` to add a ``"Total"`` row and column to the returned
:py:class:`~.rt_dataset.Dataset`. The total represents the values calculated by the
reducing function before percentile calculation.
>>> rt.accum_ratiop(cat1=ds.Groups,
... cat2=ds.Letters,
... val=ds.Ints,
... include_total=True)
*Groups A B C TotalRatio Total
---------- ----- ----- ----- ---------- -----
Group1 30.00 0.00 20.00 50.00 5
Group2 0.00 10.00 40.00 50.00 5
---------- ----- ----- ----- ---------- -----
TotalRatio 30.00 10.00 60.00 100.00
Total 3 1 6 10
<BLANKLINE>
[2 rows x 6 columns] total bytes: 92.0 B
"""
# Handle missing inputs
if val is None:
val = full(cat1.shape[0], 1, dtype=np.float64) # This was playa.utils.onescol
if filter is None:
if filt is not None: # Temporary until deprecated
warnings.warn(
'Kwarg "filt" is being deprecated for "filter" to align with common syntax. "filt" will be removed in a future version',
FutureWarning,
)
filter = filt
else:
filter = full(val.shape[0], True, dtype=bool) # This was playa.utils.truecol
if cat2 is None:
cat2 = Categorical(full(val.shape[0], 1, dtype=np.int8), ["NotGrouped"]) # This was playa.utils.onescol
# Compute accum
accum = AccumTable(cat1, cat2)
func_name = func
func = getattr(accum, func_name)
# TODO: In the future, when arbitrary functions are allowed in Accum2 calls, handle a missing attr here by passing it in by name
accum["TotalRatio"] = func(val, filter=filter)
if include_total:
accum["Total"] = func(val, filter=filter)
accumr = accum.gen("TotalRatio", remove_blanks=remove_blanks)
if include_total:
keys = accumr.keys()[1:-1]
else:
keys = accumr.keys()[1:]
if norm_by.upper() == "T":
total = accumr.footer_get_dict()["TotalRatio"]["TotalRatio"]
accumr.footer_set_values(
"TotalRatio", {key: 100 * item / total for (key, item) in accumr.footer_get_dict()["TotalRatio"].items()}
)
for col in keys:
accumr[col] = 100 * accumr[col] / total
elif norm_by.upper() == "R":
total = accumr.footer_get_dict()["TotalRatio"]["TotalRatio"]
accumr.footer_set_values(
"TotalRatio", {key: 100 * item / total for (key, item) in accumr.footer_get_dict()["TotalRatio"].items()}
)
for col in keys:
accumr[col] = 100 * accumr[col] / accumr.TotalRatio
elif norm_by.upper() == "C":
for col in keys:
total = accumr.footer_get_dict()["TotalRatio"][col]
accumr[col] = 100 * accumr[col] / total
accumr.footer_set_values(
"TotalRatio", {key: 100.0 for (key, item) in accumr.footer_get_dict()["TotalRatio"].items()}
)
else:
raise ValueError(f"Invalid norm_by selection: {norm_by}. Valid choices are T, R, C.")
return accumr
[docs]
def accum_cols(cat, val_list, name_list=None, filt_list=None, func_list="nansum", remove_blanks=False):
"""
Apply reducing functions to multiple arrays that are grouped by a
:py:class:`~.rt_categorical.Categorical`.
The returned :py:class:`~.rt_dataset.Dataset` contains values calculated by a
reducing function for each :py:class:`~.rt_categorical.Categorical` group from each
of the arrays in ``val_list``. It also contains the calculated value for each of the
original arrays in the ``Total`` row.
:py:func:`~.rt_accumtable.accum_cols` supports only reducing functions that take an
array as a parameter. For example ``count()`` isn't valid, as it doesn't accept an
array as an input argument. For a list of reducing functions, see
:doc:`/tutorial/tutorial_cat_reduce`.
Parameters
----------
cat : :py:class:`~.rt_categorical.Categorical`
A :py:class:`~.rt_categorical.Categorical` that specifies the groups for
reducing the ``val_list`` array.
val_list : array or list of arrays
Array or list of arrays that ``func_list`` is applied to.
:py:func:`~.rt_accumtable.accum_cols` returns an array for each element in
``val_list``. If an element of ``val_list`` is itself a two-element list of two
arrays, :py:func:`~.rt_accumtable.accum_cols` calculates a ratio between the
values calculated by a reducing function for the two arrays.
:py:func:`~.rt_accumtable.accum_ratio` performs this calculation using ``cat``,
the two arrays, the respective filter, and the respective reducing function as
arguments.
If the second element of the two-element list is ``"p"`` or ``"P"``,
:py:func:`~.rt_accumtable.accum_cols` calculates a ratio displayed as a
percentage between the individual values of a table calculated with a reducing
function and the calculated value of the entire
:py:class:`~.rt_accumtable.AccumTable`. :py:func:`~.rt_accumtable.accum_ratiop`
performs this calculation using ``cat``, the first element of the two element
list, the respective filter, and the respective reducing function as arguments.
name_list : list, optional
List of column names in the returned :py:class:`~.rt_dataset.Dataset`. If not
provided, the returned columns have names ``colN``.
filt_list : array of bool or list of array of bool, optional
Either a filter array that applies to all arrays in ``val_list`` or a list of
filters, where each filter applies to the respective array in ``val_list``. Each
filter must be the same length as the arrays in ``val_list``.
func_list : str or list of str, default "nansum"
Either a string of the name of a reducing function (for example, ``"sum"`` or
``"nanmean"``) or a list of strings of reducing function names. Passing a string
applies the single reducing function to all arrays in ``val_list``. Passing a
list of strings applies each reducing function to the respective array in
``val_list``. Note the following two exceptions:
* If you pass more functions than there are arrays in ``val_list``, the extra
functions without respective arrays in ``val_list`` are ignored.
* If you pass fewer functions than arrays, the returned
:py:class:`~.rt_dataset.Dataset` contains only same number of columns as there
are functions in ``func_list``.
remove_blanks : bool, default `False`
If `True`, removes rows and columns that consist entirely of `0` or `nan` from
the returned :py:class:`~.rt_dataset.Dataset`.
Returns
-------
:py:class:`.rt_dataset.Dataset`
A table of the values calculated by the reducing functions for each element of
``val_list``.
See Also
--------
:py:class:`.rt_accum2.Accum2` :
The parent class for :py:class:`~.rt_accumtable.AccumTable`.
:py:class:`.rt_accumtable.AccumTable` :
A wrapper on :py:class:`~.rt_accum2.Accum2` that enables the creation of tables
that combine the results of multiple tables generated from the
:py:class:`~.rt_accum2.Accum2` object.
:py:class:`.rt_categorical.Categorical` :
A class that efficiently stores an array of repeated strings and is used for
groupby operations.
:py:class:`.rt_groupbyops.GroupByOps` :
A class that holds the reducing functions used by
:py:func:`~.rt_accumtable.accum_cols`.
Examples
--------
Construct a :py:class:`~.rt_dataset.Dataset` for the following examples:
>>> ds = rt.Dataset()
>>> ds.Zeros = [0, 0, 0, 0, 0]
>>> ds.Ones = [1, 1, 1, 1, 1]
>>> ds.Twos = [2, 2, 2, 2, 2]
>>> ds.Nans = [rt.nan, rt.nan, rt.nan, rt.nan, rt.nan]
>>> ds.Ints = [0, 1, 2, 3, 4]
>>> ds.Groups = ["Group1", "Group2", "Group1", "Group1", "Group2"]
>>> ds.Groups = rt.Cat(ds.Groups)
>>> ds
# Zeros Ones Twos Nans Ints Groups
- ----- ---- ---- ---- ---- ------
0 0 1 2 nan 0 Group1
1 0 1 2 nan 1 Group2
2 0 1 2 nan 2 Group1
3 0 1 2 nan 3 Group1
4 0 1 2 nan 4 Group2
<BLANKLINE>
[5 rows x 6 columns] total bytes: 217.0 B
**Apply one reducing function to all arrays**
Pass a single function name as a string to ``func_list``. This example applies the
:py:meth:`~.rt_groupbyops.GroupByOps.sum` reducing function to all arrays in
``val_list``:
>>> rt.accum_cols(cat=ds.Groups,
... val_list=[ds.Zeros, ds.Ones, ds.Twos, ds.Nans, ds.Ints],
... func_list="sum")
*Groups col0 col1 col2 col3 col4
------- ---- ---- ---- ---- ----
Group1 0 3 6 nan 5
Group2 0 2 4 nan 5
------- ---- ---- ---- ---- ----
Total 0 5 10 nan 10
<BLANKLINE>
[2 rows x 6 columns] total bytes: 92.0 B
Without passing a ``name_list`` to :py:func:`~.rt_accumtable.accum_cols`, the
default column names appear in the returned table.
**Apply a different reducing function to each array**
Pass a list of function names as strings to ``func_list``. This example applies a
respective function in ``func_list`` to each of the arrays in ``val_list``:
>>> rt.accum_cols(cat=ds.Groups,
... val_list=[ds.Zeros, ds.Ones, ds.Twos, ds.Nans, ds.Ints],
... name_list=["Zeros sum", "Ones mean", "Twos var", "NaNs nansum", "Ints mean"],
... func_list=["sum", "mean", "var", "nansum", "mean"])
*Groups Zeros sum Ones mean Twos var NaNs nansum Ints mean
------- --------- --------- -------- ----------- ---------
Group1 0 1.00 0.00 0.00 1.67
Group2 0 1.00 0.00 0.00 2.50
------- --------- --------- -------- ----------- ---------
Total 0 1.00 0.00 0.00 2.00
<BLANKLINE>
[2 rows x 6 columns] total bytes: 92.0 B
**Include ratio arrays**
Pass a list of two arrays to ``val_list`` to return the ratio of the
values calculated by :py:meth:`~.rt_groupbyops.GroupByOps.sum` for the two arrays:
>>> rt.accum_cols(cat=ds.Groups,
... val_list=[ds.Ints, ds.Ones, [ds.Ints, ds.Ones]],
... name_list=["Ints sum", "Ones sum", "Ints:Ones sum ratio"],
... func_list="sum")
*Groups Ints sum Ones sum Ints:Ones sum ratio
------- -------- -------- -------------------
Group1 5 3 1.67
Group2 5 2 2.50
------- -------- -------- -------------------
Total 10 5 2.00
<BLANKLINE>
[2 rows x 4 columns] total bytes: 60.0 B
The values returned for the two-element list in ``val_list`` are ratios between the
values calculated by :py:meth:`~.rt_groupbyops.GroupByOps.sum` for Ints as the
numerator and for Ones as the denominator. :py:func:`~.rt_accumtable.accum_cols`
uses :py:func:`~.rt_accumtable.accum_ratio` to calculate this ratio. In the previous
example, :py:func:`~.rt_accumtable.accum_ratio` is passed the following arguments:
>>> ints_ones_ratio = rt.accum_ratio(cat1=ds.Groups,
... cat2=rt.Categorical(np.full(ds.Groups.shape[0], 1, dtype=np.int8), ["NotGrouped"]),
... val1=ds.Ints,
... val2=ds.Ones,
... func1="sum",
... func2="sum",
... remove_blanks=False)
>>> ints_ones_ratio["NotGrouped"]
FastArray([1.66666667, 2.5 ])
**Include percentile arrays**
Pass two-element lists with an array and ``"p"`` to ``val_list`` to return the ratio
of the values calculated by :py:meth:`~.rt_groupbyops.GroupByOps.sum` for the
grouped array values compared to the total value for the array, displayed as a
percent:
>>> rt.accum_cols(cat=ds.Groups,
... val_list=[ds.Ones, ds.Ints, [ds.Ones, "p"], [ds.Ints, "p"]],
... name_list=["Ones sum", "Ints sum", "Ones percent", "Ints percent"],
... func_list="sum")
*Groups Ones sum Ints sum Ones percent Ints percent
------- -------- -------- ------------ ------------
Group1 3 5 60.00 50.00
Group2 2 5 40.00 50.00
------- -------- -------- ------------ ------------
Total 5 10 100.00 100.00
<BLANKLINE>
[2 rows x 5 columns] total bytes: 76.0 B
The values returned for the two-element lists are the percent ratios. Group1
of the Ones sum column is 3 and the Total for Ones sum is 5. The ratio of these two
numbers as a percent is 60.00, as displayed in the Ones percent column.
:py:func:`~.rt_accumtable.accum_cols` uses :py:func:`~.rt_accumtable.accum_ratiop`
to calculate this percent ratio. In the previous example,
:py:func:`~.rt_accumtable.accum_ratiop` is passed the following arguments to
calculate the Ones percent column:
>>> ones_ratiop = rt.accum_ratiop(cat1=ds.Groups,
... cat2=rt.Categorical(np.full(ds.Groups.shape[0], 1, dtype=np.int8), ["NotGrouped"]),
... val=ds.Ones,
... filter=None,
... func="sum",
... norm_by="T",
... include_total=False,
... remove_blanks=False)
>>> ones_ratiop["NotGrouped"]
FastArray([60., 40.])
**Filter all arrays with a single boolean mask**
Pass an array of booleans to ``filt_list`` to filter all arrays in ``val_list``:
>>> greater_3_filter = ds.Ints > 3
>>> rt.accum_cols(cat=ds.Groups,
... val_list=[ds.Zeros, ds.Ones, ds.Twos, ds.Nans, ds.Ints],
... name_list=["Zeros sum", "Ones sum", "Twos sum", "NaNs sum", "Ints sum"],
... filt_list=greater_3_filter,
... func_list="sum")
*Groups Zeros sum Ones sum Twos sum NaNs sum Ints sum
------- --------- -------- -------- -------- --------
Group1 0 0 0 0.00 0
Group2 0 1 2 nan 4
------- --------- -------- -------- -------- --------
Total 0 1 2 nan 4
<BLANKLINE>
[2 rows x 6 columns] total bytes: 92.0 B
**Filter each array with a different boolean mask**
Pass an array of boolean arrays to ``filt_list`` to filter the respective arrays in
``val_list``:
>>> even_zeros = ds.Zeros % 2 == 0
>>> even_ones = ds.Ones % 2 == 0
>>> even_twos = ds.Twos % 2 == 0
>>> even_nans = ds.Nans % 2 == 0
>>> even_ints = ds.Ints % 2 == 0
>>> rt.accum_cols(cat=ds.Groups,
... val_list=[ds.Zeros, ds.Ones, ds.Twos, ds.Nans, ds.Ints],
... name_list=["Zeros sum", "Ones sum", "Twos sum", "NaNs sum", "Ints sum"],
... filt_list=[even_zeros, even_ones, even_twos, even_nans, even_ints],
... func_list="sum")
*Groups Zeros sum Ones sum Twos sum NaNs sum Ints sum
------- --------- -------- -------- -------- --------
Group1 0 0 6 0.00 2
Group2 0 0 4 0.00 4
------- --------- -------- -------- -------- --------
Total 0 0 10 0.00 6
<BLANKLINE>
[2 rows x 6 columns] total bytes: 92.0 B
**Remove blank values**
Pass `True` to ``remove_blanks`` to remove all rows and columns from the returned
:py:class:`~.rt_dataset.Dataset` that consist entirely of `0` or `nan`:
>>> rt.accum_cols(cat=ds.Groups,
... val_list=[ds.Zeros, ds.Ones, ds.Twos, ds.Nans, ds.Ints],
... name_list=["Zeros sum", "Ones sum", "Twos sum", "NaNs sum", "Ints sum"],
... func_list="sum",
... remove_blanks=True)
*Groups Ones sum Twos sum Ints sum
------- -------- -------- --------
Group1 3 6 5
Group2 2 4 5
------- -------- -------- --------
Total 5 10 10
<BLANKLINE>
[2 rows x 4 columns] total bytes: 60.0 B
"""
# Handle mistyped inputs
if not isinstance(cat, Categorical):
cat = Categorical(cat)
if not isinstance(val_list, list):
val_list = [val_list]
# Handle missing inputs
if name_list is None:
name_list = [f"col{n}" for n in range(len(val_list))]
if filt_list is None:
val_fst = val_list[0]
shape = val_fst.shape[0] if isinstance(val_fst, np.ndarray) else val_fst[0].shape[0]
filt_list = full(shape, True, dtype=bool) # This was playa.utils.truecol
if not isinstance(func_list, list):
func_list = [func_list for _ in val_list]
if not isinstance(filt_list, list):
filt_list = [filt_list for _ in val_list]
# Compute accum
temp_cat = Categorical(full(cat.shape[0], 1, dtype=np.int8), ["NotGrouped"]) # This was playa.utils.onescol
accum = Accum2(cat, temp_cat)
for val, name, filt, func in zip(val_list, name_list, filt_list, func_list):
func_name = func
func = getattr(accum, func_name)
if isinstance(val, list): # Special cases
if isinstance(val[1], str): # Named cases
if val[1] in "pP": # accum_ratiop type
curr_data = accum_ratiop(cat, temp_cat, val[0], filt, func_name, "T", False, False)
else:
raise ValueError(f'Invalid accum_cols specifier "{val[1]}" in second argument for column {name}')
else: # accum_ratio type
curr_data = accum_ratio(
cat, temp_cat, val[0], val[1], filt, filt, func_name, func_name, remove_blanks=False
)
else:
curr_data = func(val, filter=filt)
try:
results[name] = curr_data["NotGrouped"]
except NameError:
# Get number of keys in (potentially) multikey categorical. This only happens once.
cat_width = len(cat.category_dict)
results = curr_data[:, 0:cat_width]
results.footer_remove()
results[name] = curr_data["NotGrouped"]
footer_val = list(curr_data.footer_get_dict().values())[0].get("NotGrouped", 0.0)
results.footer_set_values("Total", {name: footer_val})
if remove_blanks:
return results.trim()
else:
return results
# keep this as the last line
TypeRegister.AccumTable = AccumTable