from __future__ import annotations
__all__ = ["FastArray", "Threading", "Recycle", "Ledger"]
import logging
import os
import warnings
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Mapping,
Optional,
Sequence,
Tuple,
Union,
)
import numpy as np
import numpy.typing as npt
import riptide_cpp as rc
from numpy.core.numeric import ScalarType
from .config import get_global_settings
from .rt_enum import (
INVALID_DICT,
MATH_OPERATION,
REDUCE_FUNCTIONS,
ROLLING_FUNCTIONS,
TIMEWINDOW_FUNCTIONS,
NumpyCharTypes,
TypeRegister,
gBinaryBitwiseUFuncs,
gBinaryLogicalUFuncs,
gBinaryUFuncs,
gNumpyScalarType,
gReduceUFuncs,
gUnaryUFuncs,
)
from .rt_grouping import Grouping
from .rt_misc import _use_autocomplete_placeholder
from .rt_mlutils import normalize_minmax, normalize_zscore
from .rt_numpy import (
_searchsorted,
asanyarray,
bool_to_fancy,
crc32c,
empty,
empty_like,
full,
groupbyhash,
hstack,
ismember,
issorted,
ones,
repeat,
searchsorted,
sort,
tile,
unique,
where,
zeros,
min_scalar_type,
)
from .rt_sds import save_sds
from .rt_stats import statx
from .rt_utils import describe, sample, rolling_quantile_funcParam
from .Utils.common import cached_weakref_property
from .Utils.rt_display_properties import (
DisplayConvert,
ItemFormat,
default_item_formats,
)
try:
# optional extra routines if bottleneck installed
import bottleneck as bn
except Exception:
pass
if TYPE_CHECKING:
from .rt_dataset import Dataset
from .rt_str import FAString
# pyarrow is an optional dependency.
try:
import pyarrow as pa
except ImportError:
pass
# Create a logger for this module.
logger = logging.getLogger(__name__)
NUMPY_CONVERSION_TABLE: Mapping[Callable, REDUCE_FUNCTIONS] = {
np.sum: REDUCE_FUNCTIONS.REDUCE_SUM,
np.nansum: REDUCE_FUNCTIONS.REDUCE_NANSUM,
np.amin: REDUCE_FUNCTIONS.REDUCE_MIN,
np.nanmin: REDUCE_FUNCTIONS.REDUCE_NANMIN,
np.amax: REDUCE_FUNCTIONS.REDUCE_MAX,
np.nanmax: REDUCE_FUNCTIONS.REDUCE_NANMAX,
np.var: REDUCE_FUNCTIONS.REDUCE_VAR,
np.nanvar: REDUCE_FUNCTIONS.REDUCE_NANVAR,
np.mean: REDUCE_FUNCTIONS.REDUCE_MEAN,
np.nanmean: REDUCE_FUNCTIONS.REDUCE_NANMEAN,
np.std: REDUCE_FUNCTIONS.REDUCE_STD,
np.nanstd: REDUCE_FUNCTIONS.REDUCE_NANSTD,
np.argmin: REDUCE_FUNCTIONS.REDUCE_ARGMIN,
np.nanargmin: REDUCE_FUNCTIONS.REDUCE_NANARGMIN,
np.argmax: REDUCE_FUNCTIONS.REDUCE_ARGMAX,
np.nanargmax: REDUCE_FUNCTIONS.REDUCE_NANARGMAX,
# np.any: REDUCE_FUNCTIONS.REDUCE_ANY,
# np.all: REDUCE_FUNCTIONS.REDUCE_ALL,
}
import math
import numba as nb
def _isnan(x):
raise RuntimeError("Unexpected call")
@nb.extending.overload(_isnan)
def __isnan(x):
if x == nb.int8:
return lambda x: x == nb.int8(-128)
elif x == nb.int16:
return lambda x: x == nb.int16(-32768)
elif x == nb.int32:
return lambda x: x == nb.int32(0x80000000)
elif x == nb.int64:
return lambda x: x == nb.int64(0x8000000000000000)
elif x == nb.uint8:
return lambda x: x == nb.uint8(0xFF)
elif x == nb.uint16:
return lambda x: x == nb.uint16(0xFFFF)
elif x == nb.uint32:
return lambda x: x == nb.uint32(0xFFFFFFFF)
elif x == nb.uint64:
return lambda x: x == nb.uint64(0xFFFFFFFFFFFFFFFF)
else:
return lambda x: math.isnan(x)
@nb.njit(parallel=True, cache=get_global_settings().enable_numba_cache)
def _fnansumhelper(x, filter):
ret = 0
length = 0
for i in nb.prange(len(x)):
if filter[i] and not _isnan(x[i]):
ret += x[i]
length += 1
return (ret, length)
def _fnansum(x, filter):
return _fnansumhelper(x, filter)[0]
def _fnanmean(x, filter):
(tot, n) = _fnansumhelper(x, filter)
return tot / n
@nb.njit(parallel=True, cache=get_global_settings().enable_numba_cache)
def _fnanvar(x, filter):
abc = 0.0
length = 0
for i in nb.prange(len(x)):
if filter[i] and not _isnan(x[i]):
abc += x[i]
length += 1
mean = abc / length
ret = 0.0
for i in nb.prange(len(x)):
if filter[i] and not _isnan(x[i]):
ret += (x[i] - mean) ** 2
if length > 1:
return ret / (length - 1)
if length == 1:
return np.NaN
if length == 0:
raise ValueError("Tried to take the variance of an empty array.")
def _fnanstd(x, filter):
return math.sqrt(_fnanvar(x, filter))
@nb.njit(parallel=True, cache=get_global_settings().enable_numba_cache)
def _fsumhelper(x, filter):
ret = 0
length = 0
for i in nb.prange(len(x)):
if filter[i]:
ret += x[i]
length += 1
return (ret, length)
def _fsum(x, filter):
return _fsumhelper(x, filter)[0]
def _fmean(x, filter):
(tot, n) = _fsumhelper(x, filter)
return tot / n
@nb.njit(parallel=True, cache=get_global_settings().enable_numba_cache)
def _fvar(x, filter):
abc = 0.0
length = 0
for i in nb.prange(len(x)):
if filter[i]:
abc += x[i]
length += 1
mean = abc / length
ret = 0.0
for i in nb.prange(len(x)):
if filter[i]:
ret += (x[i] - mean) ** 2
if length > 1:
return ret / (length - 1)
if length == 1:
return np.NaN
if length == 0:
raise ValueError("Tried to take the variance of an empty array.")
def _fstd(x, filter):
return math.sqrt(_fvar(x, filter))
# --------------------------------------------------------------
def FA_FROM_UINT8(uint8arr):
"""
Used in de-pickling
"""
return rc.CompressDecompressArrays([uint8arr], 1)[0]
# --------------------------------------------------------------
def FA_FROM_BYTESTRING(bytestring):
"""
Used in de-pickling when tostring() used (currently disabled)
"""
return FA_FROM_UINT8(np.frombuffer(bytestring, dtype=np.uint8))
# --------------------------------------------------------------
def logical_find_common_type(arraytypes, scalartypes, scalarval):
"""
assumes one scalar and one array
"""
scalar = scalartypes[0]
array = arraytypes[0]
unsigned = False
isinteger = False
# TJD this routine needs to be rewritten
# can check isinstance(scalar,(np.integer, int))
# if this comes in as np.int64 and not a dtype, we convert to a dtype
if not hasattr(scalar, "char"):
scalar = np.dtype(scalar)
if scalar.char in NumpyCharTypes.UnsignedInteger:
unsigned = True
isinteger = True
if scalar.char in NumpyCharTypes.Integer:
isinteger = True
if not isinteger:
# go by numpy upscale rules
# NOTE: should consider allowing integer ^ True -- or changing a bool scalar to an int
# print("punting not integer scalar", scalar)
return np.find_common_type(arraytypes, scalartypes)
unsigned = False
isinteger = False
try:
if array.char in NumpyCharTypes.UnsignedInteger:
unsigned = True
isinteger = True
if array.char in NumpyCharTypes.Integer:
isinteger = True
except:
pass
# if isinstance(array, int):
# isinteger = True
# IF ARRAY IS UNSIGNED BY SCALAR IS SIGNED upcast
if not isinteger:
# go by numpy upscale rules
# NOTE: should consider allowing integer ^ True -- or changing a bool scalar to an int
# print("punting not integer array", array)
return np.find_common_type(arraytypes, scalartypes)
final = None
scalarval = int(scalarval)
# Determine the possible integer upscaling based on the scalar value
if unsigned:
if scalarval <= 255:
final = np.uint8
elif scalarval <= 65535:
final = np.uint16
elif scalarval <= (2**32 - 1):
final = np.uint32
elif scalarval <= (2**64 - 1):
final = np.uint64
else:
final = np.float64
else:
if scalarval >= -128 and scalarval <= 127:
final = np.int8
elif scalarval >= -32768 and scalarval <= 32767:
final = np.int16
elif scalarval >= -(2**31) and scalarval <= (2**31 - 1):
final = np.int32
elif scalarval >= -(2**63) and scalarval <= (2**63 - 1):
final = np.int64
else:
final = np.float64
final = np.dtype(final)
# do not allow downcasting
if array.num < final.num:
# print("returning final", final)
return final
return array
# if type(args[0]) in ScalarType:
# print("converting arg2 to ", final_dtype)
# args[1] = args[1].astype(final_dtype);
# else:
# print("converting arg1 to ", final_dtype)
# args[0] = args[0].astype(final_dtype);
# --------------------------------------------------------------
def _ASTYPE(self, dtype):
"""internal call from array_ufunc to convert arrays. returns numpy arrays"""
# return self.astype(dtype)
to_num = dtype.num
if self.dtype.num <= 13 and to_num <= 13:
if FastArray.SafeConversions:
# perform a safe conversion understanding sentinels
return TypeRegister.MathLedger._AS_FA_TYPE(self, to_num)._np
else:
# perform unsafe conversion NOT understanding sentinels
return TypeRegister.MathLedger._AS_FA_TYPE_UNSAFE(self, to_num)._np
return self.astype(dtype)
# --------------------------------------------------------------
# --------------------------------------------------------------
[docs]
class FastArray(np.ndarray):
"""
A `FastArray` is a 1-dimensional array of items that are the same data type.
Because it's a subclass of NumPy's `numpy.ndarray`, all ``ndarray`` functions and attributes
can be used with `FastArray` objects. However, Riptable optimizes many of NumPy's
functions to make them faster and more memory-efficient. Riptable has also added
some methods.
`FastArray` objects with more than 1 dimension are not supported.
See `NumPy's
docs <https://numpy.org/devdocs/reference/generated/numpy.ndarray.html>`_ for
details on all ``ndarray`` methods and attributes.
Parameters
----------
arr : array, iterable, or scalar value
Contains data to be stored in the `FastArray`.
**kwargs
Additional keyword arguments to be passed to the function.
Notes
-----
To improve performance, `FastArray` objects take over some of NumPy's universal functions
(ufuncs), use array recycling and multiple threads, and pass certain method calls to
`Bottleneck <https://kwgoodman.github.io/bottleneck-doc/index.html>`_.
Note that whenever Riptable has implemented its own version of
an existing NumPy method, a call to the NumPy method results in a call to the
optimized Riptable version instead. We encourage users to directly call the Riptable
method in order to avoid any confusion as to what method is actually being called.
See the list of `NumPy Methods Optimized by Riptable for FastArrays
<https:tutorial_numpy_rt.html>`_.
Examples
--------
**Construct a FastArray**
Pass a list to the constructor:
>>> rt.FastArray([1, 2, 3, 4, 5])
FastArray([1, 2, 3, 4, 5])
>>> #NOTE: rt.FA also works.
>>> rt.FA([1.0, 2.0, 3.0, 4.0, 5.0])
FastArray([1., 2., 3., 4., 5.])
Or use a utility function:
>>> rt.full(10, 0.7)
FastArray([0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7])
>>> rt.arange(10)
FastArray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
You can optionally specify a data type:
>>> x = rt.FastArray([3, 6, 10], dtype = rt.float64)
>>> x, x.dtype
(FastArray([ 3., 6., 10.]), dtype('float64'))
>>> # Using a string shortcut:
>>> x = rt.FastArray([3,6,10], dtype = 'float64')
>>> x, x.dtype
(FastArray([ 3., 6., 10.]), dtype('float64'))
By default, characters are stored as byte strings. When ``unicode=True``,
the `FastArray` allows Unicode characters.
>>> rt.FA(list('abc'), unicode=True)
FastArray(['a', 'b', 'c'], dtype='<U1')
To convert an existing NumPy array, use the `FastArray` constructor.
>>> np_arr = np.array([1, 2, 3])
>>> rt.FA(np_arr)
FastArray([1, 2, 3])
To view the NumPy array as a `FastArray` (which is slightly less expensive than
using the constructor), use the `view` method.
>>> fa = np_arr.view(rt.FA)
>>> fa
FastArray([1, 2, 3])
To view it as a NumPy array again:
>>> fa.view(np.ndarray)
array([1, 2, 3])
>>> # Alternatively:
>>> fa._np
array([1, 2, 3])
**Get a Subset of a FastArray**
You can use standard Python slicing notation or fancy indexing to access a
subset of a `FastArray`.
>>> # Create a FastArray:
>>> array = rt.arange(8)**2
>>> array
FastArray([ 0, 1, 4, 9, 16, 25, 36, 49])
>>> # Use Python slicing to get elements 2, 3, and 4:
>>> array[2:5]
FastArray([ 4, 9, 16])
>>> # Use fancy indexing to get elements 2, 4, and 1 (in that order):
>>> array[[2, 4, 1]]
FastArray([ 4, 16, 1])
For more details, see the examples for 1-dimensional arrays in NumPy's docs:
`Indexing on ndarrays <https://numpy.org/doc/stable/user/basics.indexing.html>`_.
Note that slicing creates a view of the array and does not copy the underlying data;
modifying the slice modifies the original array. Fancy indexing creates a copy of
the extracted data; modifying this array does not modify the original array.
You can also pass a Boolean mask array.
>>> # Create a Boolean mask:
>>> evenMask = (array % 2 == 0)
>>> evenMask
FastArray([ True, False, True, False, True, False, True, False])
>>> # Index using the Boolean mask:
>>> array[evenMask]
FastArray([ 0, 4, 16, 36])
**How to Subclass FastArray**
Include the required class definition:
>>> class TestSubclass(rt.FastArray):
... def __new__(cls, arr, **args):
... # Before this call, arr needs to be a np.ndarray instance.
... return arr.view(cls)
... def __init__(self, arr, **args):
... pass
If the subclass is computable, you might define your own math operations. In these
operations, you might define what the subclass can be computed with. For examples of
new definitions, see the `DateTimeNano` class.
Common operations to hook are comparisons (``__eq__()``, ``__ne__()``, ``__gt__()``,
``__lt__()``, ``__le__()``, ``__ge__()``) and basic math functions (``__add__()``,
``__sub__()``, ``__mul__()``, etc.).
Bracket indexing operations are very common. If the subclass needs to set or return
a value other than that in the underlying array, you need to take over
`__getitem__()` or `__setitem__()`.
Indexing is also used in display. For regular console/notebook display, you need to
take over:
* `__repr__()`
* `__str__()`
* `_repr_html_()` (for JupyterLab and Jupyter notebooks)
If the array is being displayed in a `Dataset` and you require certain formatting, you
need to define two more methods:
``display_query_properties()``
Returns an `ItemFormat` object (see `rt.Utils.rt_display_properties`)
``display_convert_func()``
The conversion function returned by ``display_query_properties()``
must return a string. Each item being displayed, the result of ``__getitem__()``
at a single index, will go through this function individually, accompanied by
an `ItemFormat` object.
Many Riptable operations need to return arrays of the same class they received. To
ensure that your subclass will retain its special properties, you need to take over
`newclassfrominstance()`. Failure to take this over will often result in an object
with uninitialized variables.
`copy()` is another method that is called generically in Riptable routines, and
needs to be taken over to retain subclass properties.
For a view of the underlying `FastArray`, you can use the `_fa` property.
"""
# Defines a generic np.ndarray subclass, that can cache numpy arrays
# Static Class VARIABLES
# change this to show or less values on __repr__
MAX_DISPLAY_LEN = 10
# set to 2 or 3 for extra debug information
Verbose = 1
# set to true for reusing numpy arrays instead of deleting them completely
Recycle = True
# set to true to preserve sentinels during internal array_ufunc calculations
SafeConversions = True
# set to false to be just normal numpy
FasterUFunc = True
NEW_ARRAY_FUNCTION_ENABLED = False
"""Enable implementation of array function protocol (default False)."""
# 0=Quiet, 1=Warn, 2=Exception
WarningLevel = 1
# set to true to not allow ararys we do not support
NoTolerance = False
# set to false to not compress when pickling
CompressPickle = True
# a dictionary to avoid repeating warnings in multiple places
# TODO: wrap this in a class so that warnings can be turned on/off
WarningDict = {
"multiple_dimensions": "FastArray contains two or more dimensions greater than one - shape:{}. Problems may occur."
}
# For reduction operations, the identity element of the operation (for operations
# where such an element is defined).
# N.B. As of numpy 1.19 it does not appear there's a straightforward way of getting from
# something like ``np.sum`` back to ``np.add``, from which we could get the .identity property.
# If that ever changes, this dictionary would no longer be necessary so it can+should be removed.
_reduce_op_identity_value: Mapping[REDUCE_FUNCTIONS, Any] = {
REDUCE_FUNCTIONS.REDUCE_ALL: True, # np.all(np.array([]))
REDUCE_FUNCTIONS.REDUCE_ANY: False, # np.any(np.array([]))
REDUCE_FUNCTIONS.REDUCE_NANSUM: np.add.identity,
REDUCE_FUNCTIONS.REDUCE_SUM: np.add.identity,
}
# --------------------------------------------------------------------------
[docs]
class _ArrayFunctionHelper:
# TODO add usage examples
"""
Array function helper is responsible maintaining the array function protocol array implementations in the
form of the following API:
- get_array_function: given the Numpy function, returns overridden array function
- get_array_function_type_compatibility_check: given the Numpy function, returns overridden array function type compatibility check
- register_array_function: a function decorator whose argument is the Numpy function to override and the function that will override it
- register_array_function_type_compatibility: similar to register_array_function, but guards against incompatible array function protocol type arguments for the given Numpy function
- deregister: deregistration of the Numpy function and type compatibility override
- deregister_array_function_type_compatibility: deregistration of Numpy function type compatibility override
"""
# TODO design consideration - using a single dict with tuple type compatibility and redirected callables
# where a default type compatibility check can be the default value
# a dictionary that maps numpy functions to our custom variants
HANDLED_FUNCTIONS: Dict[callable, callable] = {}
"""Dictionary of Numpy API function with overridden functions."""
HANDLED_TYPE_COMPATIBILITY_CHECK: Dict[callable, callable] = {}
"""Dictionary of type compatibility functions per each Numpy API overridden function."""
[docs]
@classmethod
def get_array_function(cls, np_function: Callable) -> Optional[Callable]:
"""
Given the Numpy function, returns overridden array function if implemented, otherwise None.
Parameters
----------
np_function: callable
The overridden Numpy array function.
Returns
-------
callable, optional
The overridden function as a callable or None if it's not implemented.
"""
return cls.HANDLED_FUNCTIONS.get(np_function, None)
[docs]
@classmethod
def get_array_function_type_compatibility_check(cls, np_function: Callable) -> Optional[Callable]:
"""
Given the Numpy function, returns the corresponding array function type compatibility callable, otherwise None.
Parameters
----------
np_function: callable
The overridden Numpy array function.
Returns
-------
callable, optional
The overridden type compatibility function as a callable or None if it's not implemented.
"""
return cls.HANDLED_TYPE_COMPATIBILITY_CHECK.get(np_function, None)
[docs]
@classmethod
def register_array_function(cls, np_function: Callable) -> Callable:
"""
A function decorator whose argument is the Numpy function to override and the function that will override it.
This registers the `np_function` with the function that it decorates.
Parameters
----------
np_function: callable
The overridden Numpy array function.
Returns
-------
callable
The decorator that registers `np_function` with the decorated function.
"""
# @wraps(np_function)
def decorator(func):
cls.HANDLED_FUNCTIONS[np_function] = func
if logger.isEnabledFor(logging.DEBUG):
logger.debug(
f"{cls.__name__}.register_array_function: registered {repr(func.__name__)} in place of {np_function.__name__}"
)
return func
return decorator
[docs]
@classmethod
def register_array_function_type_compatibility(cls, np_function: Callable) -> Callable:
"""
This registers the type compatibility check for the `np_function` with the function that it decorates.
Parameters
----------
np_function: callable
The overridden Numpy array function.
Returns
-------
callable
The decorator that registers the type compatibility check for the `np_function` with the decorated function.
"""
# @wraps(np_function)
def decorator(check_type_compatibility):
cls.HANDLED_TYPE_COMPATIBILITY_CHECK[np_function] = check_type_compatibility
if logger.isEnabledFor(logging.DEBUG):
logger.debug(
f"{cls.__name__}.register_array_function_type_compatibility: registered type compatibility check {repr(check_type_compatibility)} for array function {np_function.__name__}"
)
return check_type_compatibility
return decorator
[docs]
@classmethod
def deregister_array_function(cls, np_function: Callable) -> None:
"""
Deregistration of the Numpy function and type compatibility override.
Parameters
----------
np_function: callable
The overridden Numpy array function.
"""
if cls.get_array_function(np_function) is not None:
if logger.isEnabledFor(logging.DEBUG):
logger.debug(f"{cls.__name__}.deregister_array_function: deregistered {repr(np_function.__name__)}")
del cls.HANDLED_FUNCTIONS[np_function]
[docs]
@classmethod
def deregister_array_function_type_compatibility(cls, np_function: Callable) -> None:
"""
Deregistration of the Numpy function and type compatibility override.
Parameters
----------
np_function: callable
The overridden Numpy array function.
"""
if cls.HANDLED_TYPE_COMPATIBILITY_CHECK.get(np_function, None) is not None:
if logger.isEnabledFor(logging.DEBUG):
logger.debug(
f"{cls.__name__}.deregister_array_function_type_compatibility: deregistered {repr(np_function.__name__)}"
)
del cls.HANDLED_TYPE_COMPATIBILITY_CHECK[np_function]
[docs]
@classmethod
def deregister(cls, np_function: Callable) -> None:
cls.deregister_array_function(np_function)
cls.deregister_array_function_type_compatibility(np_function)
# --------------------------------------------------------------------------
[docs]
@classmethod
def _possibly_warn(cls, warning_string: str) -> Optional[bool]:
if cls.WarningLevel == 0:
return False
if cls.WarningLevel == 1:
warnings.warn(warning_string)
return True
raise TypeError(warning_string)
# --------------------------------------------------------------------------
def __new__(cls, arr, **kwargs) -> FastArray:
allow_unicode = kwargs.get("unicode", False)
try:
del kwargs["unicode"]
except:
pass
# If already a numpy array no need to call asany
if isinstance(arr, np.ndarray) and len(kwargs) == 0:
instance = arr
if isinstance(instance, cls) and instance.dtype.char != "U":
if instance.dtype.char not in NumpyCharTypes.Supported:
cls._possibly_warn(
f"FastArray contains an unsupported type '{instance.dtype}'. Problems may occur. Consider categoricals."
)
# if already a FastArray, do not rewrap this
return instance
else:
# flip the list or other object to a numpy array
instance = np.asanyarray(arr, **kwargs)
if not allow_unicode and instance.dtype.char == "U":
try:
instance = np.asarray(instance, dtype="S")
except:
pass
if len(instance.shape) == 0:
if instance.dtype.char in NumpyCharTypes.Supported:
instance = np.asanyarray([instance], **kwargs)
else:
# np.asarray on a set will return an object of 1
if isinstance(arr, set):
instance = np.asarray(list(arr), **kwargs)
else:
raise TypeError(f"FastArray cannot initialize {arr}")
if instance.ndim > 1:
# only one dimension can be greater than one
if cls._check_ndim(instance) > 1:
cls._possibly_warn(FastArray.WarningDict["multiple_dimensions"].format(instance.shape))
# warnings.warn(f"FastArray contains two or more dimensions greater than one - shape:{instance.shape}. Problems may occur.")
elif not (instance.flags.f_contiguous or instance.flags.c_contiguous):
# copy should eliminate strides problem
instance = instance.copy()
cls._possibly_warn(f"FastArray initialized with strides.")
# for arrays that can cause problems but we allow now
if cls.NoTolerance:
if not (instance.flags.f_contiguous or instance.flags.c_contiguous):
# copy should eliminate strides problem
instance = instance.copy()
cls._possibly_warn(f"FastArray initialized with strides.")
if instance.dtype.char not in NumpyCharTypes.Supported:
cls._possibly_warn(
f"FastArray contains an unsupported type '{instance.dtype}'. Problems may occur. Consider categoricals."
)
return instance.view(cls)
[docs]
def __array_finalize__(self, obj):
"""Finalizes self from other, called as part of ndarray.__new__()"""
if obj is None:
return
from_peer = isinstance(obj, FastArray)
if from_peer and hasattr(obj, "_name"):
self._name = obj._name
# --------------------------------------------------------------------------
[docs]
def __reduce__(self):
"""
Used for pickling.
For just a FastArray we pass back the view of the np.ndarray, which then knows how to pickle itself.
NOTE: I think there is a faster way.. possible returning a byte string.
"""
cls = type(self)
# check if subclassed routine knows how to serialize itself
if hasattr(self, "_build_sds_meta_data"):
try:
name = self._name
except:
name = "unknown"
tups = self._build_sds_meta_data(name)
return (cls._load_from_sds_meta_data, (name, self.view(FastArray), tups[1], tups[0]))
# set to true to turn compression on
if cls.CompressPickle and len(self) > 0:
# create a single compressed array of uint8
carr = rc.CompressDecompressArrays([self], 0)[0]
return (FA_FROM_UINT8, (carr.view(np.ndarray),))
else:
return (
cls.__new__,
(
cls,
self.view(np.ndarray),
),
)
# --------------------------------------------------------------------------
[docs]
@classmethod
def _check_ndim(cls, instance):
"""
Iterates through dimensions of an array, counting how many dimensions have values greater than 1.
Problems may occure with multidimensional FastArrays, and the user will be warned.
"""
index = 0
aboveone = 0
while index < instance.ndim:
if instance.shape[index] > 1:
aboveone += 1
index += 1
return aboveone
# --------------------------------------------------------------------------
[docs]
def get_name(self) -> str:
"""
Get the name that's assigned to a `FastArray`.
When a `FastArray` object is created, it has no name. It can be assigned a name
via `set_name`. For details, see :meth:`FastArray.set_name`.
Returns
-------
str or None
The assigned name, or None if the array has not been named.
See Also
--------
FastArray.set_name
Examples
--------
Assign the `FastArray` a name using :meth:`FastArray.set_name`:
>>> a = rt.arange(5)
>>> a.set_name('FA Name')
FastArray([0, 1, 2, 3, 4])
Get the name:
>>> a.get_name()
'FA Name'
"""
name = None
try:
name = self._name
except:
pass
return name
# --------------------------------------------------------------------------
[docs]
def set_name(self, name) -> FastArray:
"""
Assign a name to a `FastArray`.
A `FastArray` is a wrapper around a NumPy `ndarray`. When a `FastArray` is
created, it has no name. You can assign it a name using `set_name`.
**Interactions with Dataset Objects**
When an unnamed `FastArray` is added to a `Dataset`:
- The `FastArray` inherits the name of the `Dataset` column.
- Calling ``fa.set_name`` or ``ds.col.set_name``, or changing the displayed
column name via ``ds.col_rename``, changes the name assigned to the
`FastArray`.
- Note that calling ``fa.set_name`` or ``ds.col.set_name`` doesn't change the
displayed column name.
When a named `FastArray` is added to a `Dataset`:
- A new `FastArray` instance is created that inherits the `Dataset` column name.
- Calling ``ds.col.set_name`` or changing the displayed column name via
``ds.col_rename`` changes the new instance's name.
- Calling `set_name` on the original `FastArray` instance changes only that instance's
name.
In both cases, the NumPy array underlying the `FastArray` is shared -- changes
to its values appear in the `Dataset` column, and vice-versa.
**Interactions with FastArray Objects**
- When a `FastArray` is created as a view of another, named `FastArray`, the new
`FastArray` instance inherits the name from the original `FastArray`.
- Whether the original `FastArray` is named or unnamed, calling `set_name` on
either `FastArray` does not change the name of the other `FastArray`.
Parameters
----------
name : str
The name to assign to the `FastArray`.
Returns
-------
`FastArray`
The `FastArray` is returned. The name can be accessed using
:meth:`FastArray.get_name`.
See Also
--------
FastArray.get_name
Examples
--------
>>> a = rt.arange(5)
>>> a.set_name('FA Name')
FastArray([0, 1, 2, 3, 4])
You can get the name using :meth:`FastArray.get_name`:
>>> a.get_name()
'FA Name'
When an unnamed `FastArray` is added to a `Dataset` column, the `FastArray`
inherits the name of the column.
>>> a = rt.FastArray([1, 2, 3])
>>> ds = rt.Dataset()
>>> ds.Column_Name = a
>>> a.get_name()
'Column_Name'
Calling ``ds.col.set_name`` changes the name assigned to the `FastArray`
(but not the displayed column name).
>>> ds.Column_Name.set_name('New Name')
FastArray([1, 2, 3])
>>> a.get_name()
'New Name'
>>> ds
# Column_Name
- -----------
0 1
1 2
2 3
<BLANKLINE>
[3 rows x 1 columns] total bytes: 24.0 B
When a named `FastArray` is added to a `Dataset` column, a new `FastArray`
instance is created that inherits the column name. The original instance is
not renamed.
>>> a = rt.FastArray([1, 2, 3])
>>> a.set_name('FA Name')
FastArray([1, 2, 3])
>>> ds = rt.Dataset()
>>> ds.Column_Name = a
>>> ds.Column_Name.get_name()
'Column_Name'
>>> a.get_name()
'FA Name'
Changing the displayed column name affects the name of the new instance,
but not the name of the original `FastArray`.
>>> ds.col_rename('Column_Name', 'New_Column')
>>> ds.New_Column.get_name()
'New_Column'
>>> a.get_name()
'FA Name'
"""
self._name = name
return self
# --------------------------------------------------------------------------
[docs]
@staticmethod
def _FastFunctionsOn():
if FastArray.Verbose > 0:
print(f"FASTFUNC ON: fastfunc was {FastArray.FasterUFunc}")
FastArray.FasterUFunc = True
[docs]
@staticmethod
def _FastFunctionsOff():
if FastArray.Verbose > 0:
print(f"FASTFUNC OFF: fastfunc was {FastArray.FasterUFunc}")
FastArray.FasterUFunc = False
@property
def _np(self) -> np.ndarray:
"""
Return a NumPy array view of the input `FastArray`.
Returns
-------
numpy.ndarray
A NumPy array view of the input `FastArray`.
See Also
--------
numpy.ndarray.view : Can be used to view a NumPy array as a `FastArray`.
Examples
--------
Return a NumPy array view for an integer `FastArray`:
>>> a = rt.FA([1, 2, 3, 4, 5])
>>> a
FastArray([1, 2, 3, 4, 5])
>>> a._np
array([1, 2, 3, 4, 5])
Changes to the view are reflected in the original `FastArray`:
>>> npview = a._np
>>> npview[2] = 10
>>> a
FastArray([ 1, 2, 10, 4, 5])
To view a NumPy array as a `FastArray`, you can use `numpy.ndarray.view`:
>>> npview.view(rt.FastArray)
FastArray([ 1, 2, 10, 4, 5])
"""
return self.view(np.ndarray)
[docs]
@staticmethod
def _V0():
print("setting verbose level to 0")
FastArray.Verbose = 0
return FastArray.Verbose
[docs]
@staticmethod
def _V1():
print("setting verbose level to 1")
FastArray.Verbose = 1
return FastArray.Verbose
[docs]
@staticmethod
def _V2():
print("setting verbose level to 2")
FastArray.Verbose = 2
return FastArray.Verbose
[docs]
@staticmethod
def _ON():
"""
enable intercepting array ufunc
"""
return FastArray._FastFunctionsOn()
[docs]
@staticmethod
def _OFF():
"""
disable intercepting of array ufunc
"""
return FastArray._FastFunctionsOff()
[docs]
@staticmethod
def _TON():
print("Threading on")
return rc.ThreadingMode(0)
[docs]
@staticmethod
def _TOFF():
print("Threading off")
return rc.ThreadingMode(1)
[docs]
@staticmethod
def _RON(quiet=False):
"""
Turn on recycling.
Parameters
----------
quiet: bool, optional
Returns
-------
True if recycling was previously on, else False
"""
if not quiet:
print("Recycling numpy arrays on")
result = rc.SetRecycleMode(0)
FastArray.Recycle = True
return result
[docs]
@staticmethod
def _ROFF(quiet=False):
"""
Turn off recycling.
Parameters
----------
quiet: bool, optional
Returns
-------
True if recycling was previously on, else False
"""
if not quiet:
print("Recycling numpy arrays off")
result = rc.SetRecycleMode(1)
FastArray.Recycle = False
return result
[docs]
@staticmethod
def _RDUMP():
"""
Displays to server's stdout
Returns
-------
Total size of items not in use
"""
return rc.RecycleDump()
[docs]
@staticmethod
def _GCNOW(timeout: int = 0):
"""
Pass the garbage collector timeout value to cleanup.
Passing 0 will force an immediate garbage collection.
Returns
-------
Dictionary of memory heuristics including 'TotalDeleted'
"""
import gc
gc.collect()
result = rc.RecycleGarbageCollectNow(timeout)
totalDeleted = result["TotalDeleted"]
if totalDeleted > 0:
FastArray._GCNOW(timeout)
return result
[docs]
@staticmethod
def _GCSET(timeout: int = 100):
"""
Pass the garbage collector timeout value to expire
The timeout value is roughly in 2/5 secs
A value of 100 is usually about 40 seconds
Returns
-------
Previous timespan
"""
return rc.RecycleSetGarbageCollectTimeout(timeout)
[docs]
@staticmethod
def _LON():
"""Turn the math ledger on to record all array math routines"""
return TypeRegister.MathLedger._LedgerOn()
[docs]
@staticmethod
def _LOFF():
"""Turn the math ledger off"""
return TypeRegister.MathLedger._LedgerOff()
[docs]
@staticmethod
def _LDUMP(dataset=True):
"""Print out the math ledger"""
return TypeRegister.MathLedger._LedgerDump(dataset=dataset)
[docs]
@staticmethod
def _LDUMPF(filename):
"""Save the math ledger to a file"""
return TypeRegister.MathLedger._LedgerDumpFile(filename)
[docs]
@staticmethod
def _LCLEAR():
"""Clear all the entries in the math ledger"""
return TypeRegister.MathLedger._LedgerClear()
# --------------------------------------------------------------------------
[docs]
def __setitem__(self, fld, value):
"""
Used on the left hand side of
arr[fld] = value
This routine tries to convert invalid dtypes to that invalids are preserved when setting
The mbset portion of this is no written (which will not raise an indexerror on out of bounds)
Parameters
----------
fld: scalar, boolean, fancy index mask, slice, sequence, or list
value: scalar, sequence or dataset value as follows
sequence can be list, tuple, np.ndarray, FastArray
Raises
------
IndexError
"""
newvalue = None
# try to make an array, even if array of 1
if np.isscalar(value):
if not isinstance(value, (str, bytes, np.bytes_, np.str_)):
# convert to array of 1 item
newvalue = FastArray([value])
elif isinstance(value, (list, tuple)):
# convert to numpy array
newvalue = FastArray(value, unicode=True)
elif isinstance(value, np.ndarray):
# just reference it
newvalue = value
if newvalue is not None:
# now we have a numpy array.. convert the dtype to match us
# this should take care of invalids
# convert first 14 common types (bool, ints, floats)
if newvalue.dtype != self.dtype and newvalue.dtype.num <= 13:
newvalue = newvalue.astype(self.dtype)
# check for boolean array since we do not handle fancy index yet
if isinstance(fld, np.ndarray) and fld.dtype.num == 0:
is_unsupported = self._is_not_supported(newvalue)
if is_unsupported:
# make it contiguous, in case that's the problem?
newvalue = newvalue.copy()
# re-test support, just to be sure.
is_unsupported = self._is_not_supported(newvalue)
# if supported, call our setitem, it will return False if it fails
if not is_unsupported:
if rc.SetItem(self, fld, newvalue):
return
try:
np.ndarray.__setitem__(self, fld, newvalue)
except Exception:
# odd ball cases handled here like ufunc tests
np.ndarray.__setitem__(self, fld, value)
return
# punt to normal numpy
np.ndarray.__setitem__(self, fld, value)
# --------------------------------------------------------------------------
[docs]
def __getitem__(self, fld) -> FastArray:
"""
riptable has special routines to handle array input in the indexer.
Everything else will go to numpy getitem.
"""
if isinstance(fld, np.ndarray):
# result= super(FastArray, self).__getitem__(fld).view(FastArray)
if fld.dtype == np.bool_:
# make sure no striding
# NOTE: will fail on self.dtype.byteorder as little endian
if self.flags.f_contiguous:
# dimensions must match
if self.ndim == fld.ndim and self.ndim == 1:
return TypeRegister.MathLedger._INDEX_BOOL(self, fld)
# if we have fancy indexing and we support the array type, make sure we do not have stride problem
if fld.dtype.char in NumpyCharTypes.AllInteger and self.dtype.char in NumpyCharTypes.SupportedAlternate:
if self.flags.f_contiguous and fld.flags.f_contiguous:
if len(self.shape) == 1:
return TypeRegister.MathLedger._MBGET(self, fld)
result = TypeRegister.MathLedger._GETITEM(super(FastArray, self), fld)
return result.view(FastArray)
else:
# could be a list which is often converted to an array
# This assumes that FastArray has a sole parent, np.ndarray
# If this changes, the super() call needs to be used
return np.ndarray.__getitem__(self, fld)
# return super(FastArray, self).__getitem__(fld)
# --------------------------------------------------------------------------
[docs]
def display_query_properties(self):
"""
Returns an ItemFormat object and a function for converting the FastArrays items to strings.
Basic types: Bool, Int, Float, Bytes, String all have default formats / conversion functions.
(see Utils.rt_display_properties)
If a new type is a subclass of FastArray and needs to be displayed in format
different from its underlying type, it will need to take over this routine.
"""
arr_type, convert_func = DisplayConvert.get_display_convert(self)
display_format = default_item_formats.get(arr_type, ItemFormat())
if len(self.shape) > 1:
display_format.convert = convert_func
convert_func = DisplayConvert.convertMultiDims
# add sentinel value for integer
if display_format.invalid is None:
display_format = display_format.copy()
if self.dtype.char in NumpyCharTypes.AllInteger:
display_format.invalid = INVALID_DICT[self.dtype.num]
return display_format, convert_func
# --------------------------------------------------------------------------
[docs]
def astype(self, dtype, order="K", casting="unsafe", subok=True, copy=True) -> FastArray:
"""
Return a `FastArray` with values converted to the specified data type.
Check your results when you convert missing values. Sentinel values are
preserved when Riptable handles the conversion. However, in some cases
the array is sent to NumPy for conversion and results may not be what
you expect.
For parameter descriptions, see :py:meth:`numpy.ndarray.astype`. Note
that until a reported bug is fixed, the `casting` parameter is ignored
when Riptable handles the conversion.
Returns
-------
`FastArray`
A `FastArray` with values converted to the specified data type.
See Also
--------
Dataset.astype
Examples
--------
>>> a = rt.FastArray([1.7, 2.0, 3.0])
>>> a.astype(int)
FastArray([1, 2, 3])
Convert a `NaN` to an `int` sentinel and back:
>>> a = rt.FastArray([rt.nan, 1.0, 2.0])
>>> a_int = a.astype(int)
>>> a_int
FastArray([-2147483648, 1, 2])
>>> a_int.astype(float)
FastArray([nan, 1., 2.])
"""
# result= super(FastArray, self).astype(dtype, order,casting,subok,copy)
# 17 is object
# 18 = ASCII string
# 19 = UNICODE string
to_num = np.dtype(dtype).num
# check for contiguous in one or two dimensions
if self.flags.f_contiguous or self.flags.c_contiguous:
if order == "K" and subok and copy and self.dtype.num <= 13 and to_num <= 13:
# perform a safe conversion understanding sentinels
return TypeRegister.MathLedger._AS_FA_TYPE(self, to_num)
# punt to numpy
result = TypeRegister.MathLedger._ASTYPE(super(FastArray, self), dtype, order, casting, subok, copy)
return result.view(FastArray)
# --------------------------------------------------------------------------
[docs]
def _view_internal(self, type=None):
"""
FastArray subclasses need to take this over if they want to make a shallow copy of
a fastarray instead of viewing themselves as a fastarray (which drops their other properties).
Taking over view directly may have a lot of unintended consequences.
"""
if type is not FastArray or type is not None:
newarr = self.view(type)
# copy all the properties
newarr.__dict__ = self.__dict__.copy()
return newarr
return self.view(FastArray)
# --------------------------------------------------------------------------
[docs]
def copy(self, order="K") -> FastArray:
"""
Return a copy of the input `FastArray`.
Parameters
----------
order : {'K', 'C', 'F', 'A'}, default 'K'
Controls the memory layout of the copy: 'K' means match the layout
of the input array as closely as possible; 'C' means row-based
(C-style) order; 'F' means column-based (Fortran-style) order;
'A' means 'F' if the input array is formatted as 'F', 'C' if not.
Returns
-------
FastArray
A copy of the input `FastArray`.
See Also
--------
.Categorical.copy : Return a copy of the input `.Categorical`.
.Dataset.copy : Return a copy of the input `.Dataset`.
.Struct.copy : Return a copy of the input `.Struct`.
Examples
--------
Copy a `FastArray`:
>>> a = rt.FA([1, 2, 3, 4, 5])
>>> a
FastArray([1, 2, 3, 4, 5])
>>> a2 = a.copy()
>>> a2
FastArray([1, 2, 3, 4, 5])
>>> a2 is a
False # The copy is a separate object.
"""
# result= super(FastArray, self).copy(order)
if self.flags.f_contiguous or self.flags.c_contiguous:
if order == "K" and self.dtype.num <= 13:
# perform a faster multithreaded copy
return TypeRegister.MathLedger._AS_FA_TYPE(self, self.dtype.num)
result = TypeRegister.MathLedger._COPY(super(FastArray, self), order)
return result.view(FastArray)
# --------------------------------------------------------------------------
[docs]
def copy_invalid(self) -> FastArray:
"""
Return a copy of a `FastArray` filled with the invalid value
for the array's data type.
Returns
-------
FastArray
A copy of the input array, filled with the invalid value for the
array's dtype.
See Also
--------
FastArray.inv : Return the invalid value for the input array's dtype.
FastArray.fill_invalid :
Replace the values of a `FastArray` with the invalid value for the
array's dtype.
Examples
--------
Copy an integer array and replace with invalids:
>>> a = rt.FA([1, 2, 3, 4, 5])
>>> a
FastArray([1, 2, 3, 4, 5])
>>> a2 = a.copy_invalid()
>>> a2
FastArray([-2147483648, -2147483648, -2147483648, -2147483648,
-2147483648])
>>> a
FastArray([1, 2, 3, 4, 5]) # a is unchanged.
Copy a floating-point array and replace with invalids:
>>> a3 = rt.FA([0., 1., 2., 3., 4.])
>>> a3
FastArray([0., 1., 2., 3., 4.])
>>> a3.copy_invalid()
FastArray([nan, nan, nan, nan, nan])
Copy a string array and replace with invalids:
>>> a4 = rt.FA(['AMZN', 'IBM', 'MSFT', 'AAPL'])
>>> a4
FastArray([b'AMZN', b'IBM', b'MSFT', b'AAPL'], dtype='|S4')
>>> a4.copy_invalid()
FastArray([b'', b'', b'', b''], dtype='|S4') # Invalid string value is an empty string.
"""
return self.fill_invalid(inplace=False)
# --------------------------------------------------------------------------
@property
def inv(self) -> Any:
"""
Return the invalid value for the input array's data type.
Returns
-------
Any
The invalid value for the input array's dtype. For example,
`~riptable.int8` returns -128, `~riptable.uint8` returns 255, and
`~riptable.bool_` returns `False`.
See Also
--------
FastArray.copy_invalid :
Return a copy of a `FastArray` filled with the invalid value for the
array's dtype.
FastArray.fill_invalid :
Replace the values of a `FastArray` with the invalid value for the
array's dtype.
~riptable.rt_enum.INVALID_DICT : A mapping of invalid values to dtypes.
Examples
--------
Return the invalid value for an integer array:
>>> a = rt.FA([1, 2, 3, 4, 5])
>>> a
FastArray([1, 2, 3, 4, 5])
>>> a.inv
-2147483648
Return the invalid value for a floating-point array:
>>> a2 = rt.FA([0., 1., 2., 3., 4.])
>>> a2
FastArray([0., 1., 2., 3., 4.])
>>> a2.inv
nan
Return the invalid value for a string array:
>>> a3 = rt.FA(["AMZN", "IBM", "MSFT", "AAPL"])
>>> a3
FastArray([b'AMZN', b'IBM', b'MSFT', b'AAPL'], dtype='|S4')
>>> a3.inv
b''
"""
return INVALID_DICT[self.dtype.num]
# --------------------------------------------------------------------------
[docs]
def fill_invalid(self, shape=None, dtype=None, inplace=True) -> FastArray:
"""
Replace all values of the input `FastArray` with an invalid value.
The invalid value used is determined by the input array's dtype or a
user-specified dtype.
Warning: By default, this operation is in place.
Parameters
----------
shape : int or sequence of int, optional
Shape of the new array, for example: ``(2, 3)`` or ``2``. Note that
although multi-dimensional arrays are technically supported by Riptable,
you may get unexpected results when working with them.
dtype : str, optional
The desired dtype for the returned array.
inplace : bool, default True
If `True` (the default), modify original data. If `False`, return a
copy of the array.
Returns
-------
FastArray, optional
If ``inplace=False``, a copy of the input `FastArray` is returned
that has all values replaced with an invalid value. Otherwise, nothing
is returned.
See Also
--------
FastArray.inv : Return the invalid value for the input array's dtype.
FastArray.copy_invalid :
Return a copy of a `FastArray` filled with the invalid value
for the array's dtype.
Examples
--------
Replace an integer array's values with the invalid value for the array's
dtype. By default, the returned array is the same size and dtype as the
input array, and the operation is performed in place:
>>> a = rt.FA([1, 2, 3, 4, 5])
>>> a
FastArray([1, 2, 3, 4, 5])
>>> a.fill_invalid()
>>> a
FastArray([-2147483648, -2147483648, -2147483648, -2147483648,
-2147483648])
Replace a floating-point array's values with the invalid value for the
`~riptable.int32` dtype:
>>> a2 = rt.FA([0., 1., 2., 3., 4.])
>>> a2
FastArray([0., 1., 2., 3., 4.])
>>> a2.fill_invalid(dtype="int32", inplace=False)
FastArray([-2147483648, -2147483648, -2147483648, -2147483648,
-2147483648])
Specify the size and dtype of the output array:
>>> a3 = rt.FA(["AMZN", "IBM", "MSFT", "AAPL"])
>>> a3
FastArray([b'AMZN', b'IBM', b'MSFT', b'AAPL'], dtype='|S4')
>>> a3.fill_invalid(2, dtype="bool", inplace=False)
FastArray([False, False])
"""
return self._fill_invalid_internal(shape=shape, dtype=dtype, inplace=inplace)
[docs]
def _fill_invalid_internal(self, shape=None, dtype=None, inplace=True, fill_val=None):
if dtype is None:
dtype = self.dtype
if isinstance(dtype, str):
dtype = np.dtype(dtype)
if shape is None:
shape = self.shape
elif not isinstance(shape, tuple):
shape = (shape,)
if fill_val is None:
inv = INVALID_DICT[dtype.num]
else:
inv = fill_val
if inplace is True:
if shape != self.shape:
raise ValueError(
f"Inplace fill invalid cannot be different number of rows than existing array. Got {shape} vs. length {len(self)}"
)
if dtype != self.dtype:
raise ValueError(
f"Inplace fill invalid cannot be different dtype than existing array. Got {dtype} vs. {len(self.dtype)}"
)
self.fill(inv)
else:
arr = full(shape, inv, dtype=dtype)
return arr
# -------------------------------------------------------------------------
[docs]
def isin(self, test_elements, *, assume_unique=False, invert=False) -> FastArray:
"""
Calculates `self in test_elements`, broadcasting over `self` only.
Returns a boolean array of the same shape as `self` that is True
where an element of `self` is in `test_elements` and False otherwise.
Parameters
----------
test_elements : array_like
The values against which to test each value of `element`.
This argument is flattened if it is an array or array_like.
See notes for behavior with non-array-like parameters.
assume_unique : bool, optional
If True, the input arrays are both assumed to be unique, which
can speed up the calculation. Default is False.
invert : bool, optional
If True, the values in the returned array are inverted, as if
calculating `element not in test_elements`. Default is False.
``np.isin(a, b, invert=True)`` is equivalent to (but faster
than) ``np.invert(np.isin(a, b))``.
Returns
-------
isin : ndarray, bool
Has the same shape as `element`. The values `element[isin]`
are in `test_elements`.
Note: behavior differs from pandas
- Riptable favors bytestrings, and will make conversions from unicode/bytes to match for operations as necessary.
- We will also accept single scalars for values.
- Pandas series will return another series - we have no series, and will return a FastArray
Examples
--------
>>> from riptable import *
>>> a = FA(['a','b','c','d','e'], unicode=False)
>>> a.isin(['a','b'])
FastArray([ True, True, False, False, False])
>>> a.isin('a')
FastArray([ True, False, False, False, False])
>>> a.isin({'b'})
FastArray([ False, True, False, False, False])
"""
if isinstance(test_elements, set):
test_elements = list(test_elements)
if not isinstance(test_elements, np.ndarray):
# align byte string vs unicode
if self.dtype.char in "SU":
if np.isscalar(test_elements):
test_elements = np.asarray([test_elements], dtype=self.dtype.char)
else:
test_elements = np.asarray(test_elements, dtype=self.dtype.char)
else:
if isinstance(test_elements, tuple):
raise ValueError(
"isin does not currently support tuples. In the future a tuple will be used to represent a multi-key."
)
test_elements = rc.AsFastArray(test_elements, dtype=min_scalar_type(test_elements))
try:
# optimization: if we have just one element, we can just parallel compare that one element
if len(test_elements) == 1:
# string comparison to int will fail
result = self == test_elements[0]
# check for failed result
if np.isscalar(result):
result = ismember(self, test_elements)[0]
else:
result = ismember(self, test_elements)[0]
if invert:
np.logical_not(result, out=result)
return result
except Exception:
# punt non-supported types to numpy
return np.isin(self._np, test_elements, assume_unique=assume_unique, invert=invert)
# -------------------------------------------------------------------------
[docs]
def between(self, low, high, include_low: bool = True, include_high: bool = False) -> FastArray:
"""
Return a boolean `FastArray` indicating which input values are in a
specified interval.
Parameters
----------
low : scalar or array
Lower bound for the interval. If an array, it must be the same size
as `self`, and comparisons are done elementwise.
high : scalar or array
Upper bound for the interval. If an array, it must be the same size
as `self`, and comparisons are done elementwise.
include_low : bool, default `True`
Specifies whether `low` is included when performing comparisons.
include_high : bool, default `False`
Specifies whether `high` is included when performing comparisons.
Returns
-------
FastArray
A boolean `FastArray` indicating which input values are in a specified
interval.
Examples
--------
Specify an interval using scalars:
>>> a = rt.FA([9, 2, 3, 5, 8, 9, 1, 4, 6])
>>> a.between(5, 9, include_low=False) # Exclude 5 (left endpoint).
FastArray([False, False, False, False, True, False, False, False, True])
Specify an interval using arrays:
>>> a2 = rt.FA([1, 2, 3, 4, 5])
>>> a2.between([1, 3, 5, 5, 5], [2, 4, 6, 6, 6])
FastArray([ True, False, False, False, True])
Specify an interval mixing scalar and array bounds:
>>> a3 = rt.FA([1, 2, 3, 4, 5])
>>> a3.between(2, [2, 4, 6, 6, 6])
FastArray([False, True, True, True, True])
"""
low = asanyarray(low)
high = asanyarray(high)
if include_low:
ret = self >= low
else:
ret = self > low
if include_high:
ret &= self <= high
else:
ret &= self < high
return ret
# --------------------------------------------------------------------------
[docs]
def sample(
self,
N: int = 10,
filter: Optional[np.ndarray] = None,
seed: Optional[Union[int, Sequence[int], np.random.SeedSequence, np.random.Generator]] = None,
) -> FastArray:
"""
Return a given number of randomly selected values from a `FastArray`.
Parameters
----------
N : int, default 10
Number of values to select. The entire array is returned if `N` is
greater than the size of the array.
filter : array (bool or int), optional
A boolean mask or index array to filter values before selection. A boolean
mask must have the same length as the original `FastArray`.
seed : int or other types, optional
A seed to initialize the random number generator. If one is not provided,
the generator is initialized using random data from the OS. For details
and other accepted types, see the `seed` parameter for
`numpy.random.default_rng`.
Returns
-------
FastArray
A new `FastArray` containing the randomly selected values.
See Also
--------
.Dataset.sample :
Return a specified number of randomly selected rows from a `.Dataset`.
Examples
--------
No sample size specified:
>>> a = rt.FA([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
>>> a.sample() # 10 randomly selected values returned.
FastArray([ 1, 2, 3, 4, 5, 6, 7, 9, 10, 11]) # Random
Sample 3 values:
>>> a = rt.FA([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
>>> a.sample(3)
FastArray([1, 4, 9]) # Random
Specify a sample size larger than the array:
>>> a2 = rt.FA([1, 2, 3, 4, 5])
>>> a2.sample(100) # The entire array is returned.
FastArray([1, 2, 3, 4, 5])
Specify an index array for filtering:
>>> a3 = rt.FA(['TSLA','AMZN','IBM', 'SPY', 'GME', 'AAPL', 'FB', 'GOOG',
... 'MSFT', 'UBER']) # Create sample data.
>>> filter = rt.FA([0, 1, 3, 7]) # Specify indices of a3 to take the sample from.
>>> a3.sample(2, filter)
FastArray([b'TSLA', b'GOOG'], dtype='|S4') # Random
Specify a boolean mask array for filtering:
>>> a3.sample(8, filter=rt.FA(a3 != 'SPY'))
FastArray([b'TSLA', b'IBM', b'GME', b'AAPL', b'FB', b'GOOG', b'MSFT',
b'UBER'], dtype='|S4') # Random
"""
return sample(self, N=N, filter=filter, seed=seed)
# --------------------------------------------------------------------------
[docs]
def duplicated(self, keep="first", high_unique=False) -> FastArray:
"""
Return a boolean `FastArray` indicating `True` for duplicate items in
the input array.
Parameters
----------
keep : {'first', 'last', 'False'}, default 'first'
- 'first' : Mark each duplicate as `True` except for the first occurrence.
- 'last' : Mark each duplicate as `True` except for the last occurrence.
- 'False' : Mark all duplicates as `True`.
high_unique : bool, default `False` (hashing)
Controls whether the function uses hashing- or sorting-based logic
to find unique values in the input array. If your data has a high
proportion of unique values, set to `True` for faster performance.
Returns
-------
FastArray
A boolean `FastArray` indicating `True` for duplicate items in the
input array.
See Also
--------
FastArray.nunique : Return the number of unique values in an array.
.Dataset.duplicated :
Return a boolean `FastArray` indicating `True` for duplicate rows.
Examples
--------
Exclude the first occurrence of each duplicate (use the default `keep` value):
>>> a = rt.FA([1, 2, 3, 4, 2, 7, 8, 8, 3])
>>> a
FastArray([1, 2, 3, 4, 2, 7, 8, 8, 3])
>>> a.duplicated()
FastArray([False, False, False, False, True, False, False, True, True])
Mark all duplicates:
>>> a.duplicated(keep=False)
FastArray([False, True, True, False, True, False, True, True, True])
"""
arr = self
if keep == "last":
arr = arr[::-1].copy()
elif keep is not False and keep != "first":
raise ValueError(f'keep must be either "first", "last" or False')
# create an return array all set to True
result = ones(len(arr), dtype=np.bool_)
g = Grouping(arr._fa if hasattr(arr, "_fa") else arr, lex=high_unique)
if keep is False:
# search for groups with a count of 1
result[g.ifirstkey[g.ncountgroup[1:] == 1]] = False
else:
result[g.ifirstkey] = False
if keep == "last":
result = result[::-1].copy()
return result
# --------------------------------------------------------------------------
[docs]
def save(
self,
filepath: Union[str, os.PathLike],
share: Optional[str] = None,
compress: bool = True,
overwrite: bool = True,
name: Optional[str] = None,
) -> None:
"""
Save a :py:class:`~.rt_fastarray.FastArray` to an .sds file.
Parameters
----------
filepath : str or os.PathLike
Path for the .sds file. If there's a trailing slash, ``filepath`` is treated
as a path to a directory and you also need to specify ``name``. Alternatively,
you can include a file name (with or without the .sds extension) at the end
of ``filepath`` (with no trailing slash), and an .sds file with that name is
created. Directories that don't yet exist are created.
share : str, optional
If specified, the :py:class:`~.rt_fastarray.FastArray` is saved to shared
memory (NOT to disk) and path information from ``filepath`` is discarded. A
``name`` value must be provided. When shared memory is used, data is not
compressed. Note that shared memory functions are not currently supported on
Windows.
compress : bool, default `True`
When `True` (the default), compression is used when writing to the .sds
file. Otherwise, no compression is used. (If shared memory is used, data is
always saved uncompressed.)
overwrite : bool, default `True`
When `True` (the default), the user is not prompted to specify whether or
not to overwrite an existing .sds file. When set to `False`, a prompt is
displayed.
name : str, optional
Name for the .sds file. The .sds extension is not required. Note that if
`name` is provided, ``filepath`` is treated as a path to a directory, even
if ``filepath`` has no trailing slash.
Returns
-------
An .sds file containing the :py:class:`~.rt_fastarray.FastArray`.
See Also
--------
:py:func:`.rt_sds.save_sds` :
Save :py:class:`~.rt_dataset.Dataset` objects and arrays into a single .sds
file.
:py:func:`.rt_sds.load_sds` :
Load an .sds file.
Examples
--------
Include a file name in the path:
>>> a = rt.FA([0, 1, 2, 3, 4])
FastArray([0, 1, 2, 3, 4])
>>> a.save("C://junk//saved_file")
>>> os.listdir("C://junk")
['saved_file.sds']
When `name` is specified, `filepath` is treated as a path to a directory:
>>> a.save("C://junk//saved_file", name="fa")
>>> os.listdir("C://junk//saved_file")
['fa.sds']
Display a prompt before overwriting an existing file:
>>> a.save("C://junk//saved_file", overwrite=False)
C://junk//saved_file.sds already exists. Overwrite? (y/n) n
No file was saved.
"""
save_sds(filepath, self, share=share, compress=compress, overwrite=overwrite, name=name)
# --------------------------------------------------------------------------
[docs]
def filter(self, filter: npt.ArrayLike) -> "FastArray":
"""
Return a copy of the `FastArray` containing only the elements that meet the specified
condition.
Parameters
----------
filter : array: fancy index or Boolean mask
A fancy index specifies both the desired elements and their order in the
returned `FastArray`. When a Boolean mask is passed, only rows that meet the
specified condition are in the returned `FastArray`.
Returns
-------
`FastArray`
Notes
-----
If you want to perform an operation on a filtered FastArray, it's more efficient to
perform the operation using the ``filter`` keyword argument. For example,
``my_fa.sum(filter = boolean_mask)``.
Examples
--------
Create a `FastArray`:
>>> fa = rt.FastArray(np.linspace(0, 1, 11))
>>> fa
FastArray([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])
Filter using a fancy index:
>>> fa.filter([5, 0, 1])
FastArray([0.5, 0. , 0.1])
Filter using a condition that creates a Boolean mask array:
>>> fa.filter(fa > 0.75)
FastArray([0.8, 0.9, 1. ])
"""
# normalize rowfilter
if np.isscalar(filter):
filter = np.asanyarray([filter])
elif not isinstance(filter, np.ndarray):
filter = np.asanyarray(filter)
# Ensure filter is boolean or integers
if not (np.issubdtype(filter.dtype, np.integer) or np.issubdtype(filter.dtype, bool)):
raise TypeError(f"The filter must be a boolean mask or integer fancy index.")
# Ensure `filter` is the right shape.
# Unlike `Dataset.filter` we don't convert bool to fancy, because we're not reusing the fancy indices.
if filter.ndim != 1:
raise ValueError("`FastArray.filter` only accepts 1D arrays for the element selector/mask.")
# Boolean array needs to be the right length as well.
if np.issubdtype(filter.dtype, bool) and (len(filter) != self.shape[0]):
raise ValueError(
f"The length of the provided selection mask ({len(filter)}) does not match the length of the FastArray({self.shape[0]})."
)
# Perform filter and return
return self[filter]
# --------------------------------------------------------------------------
[docs]
def reshape(self, *args, **kwargs) -> FastArray:
result = super(FastArray, self).reshape(*args, **kwargs)
# this warning happens too much now
# if FastArray._check_ndim(result) != 1:
# warnings.warn(FastArray.WarningDict["multiple_dimensions"].format(result.shape))
if not (result.flags.c_contiguous or result.flags.f_contiguous):
# fix strides problem
return result.copy()
return result
# --------------------------------------------------------------------------
[docs]
def repeat(self, repeats, axis=None) -> FastArray:
"""See `riptable.repeat`."""
return repeat(self, repeats, axis=axis)
# --------------------------------------------------------------------------
[docs]
def tile(self, reps) -> FastArray:
"""See `riptable.tile`."""
return tile(self, reps)
# --------------------------------------------------------------------------
[docs]
def _kwarg_check(self, *args, **kwargs):
# we handle dtype
if ("ddof" in kwargs and kwargs["ddof"] != 1) or "axis" in kwargs or "keepdims" in kwargs:
return True
# --------------------------------------------------------------------------
[docs]
def _reduce_check(self, reduceFunc: REDUCE_FUNCTIONS, npFunc, *args, **kwargs):
"""
Arg2: npFunc pass in None if no numpy equivalent function
"""
if npFunc is not None and (self._kwarg_check(*args, **kwargs) or len(self.shape) != 1):
# TODO: add to math ledger
# set ddof=1 if NOT set which is FastArray default to match matlab/pandas
if "ddof" not in kwargs and reduceFunc in [
REDUCE_FUNCTIONS.REDUCE_VAR,
REDUCE_FUNCTIONS.REDUCE_NANVAR,
REDUCE_FUNCTIONS.REDUCE_STD,
REDUCE_FUNCTIONS.REDUCE_NANSTD,
]:
kwargs["ddof"] = 1
result = npFunc(self._np, *args, **kwargs)
return result
result = TypeRegister.MathLedger._REDUCE(self, reduceFunc)
# It's possible there was no result returned from the reduction function;
# e.g. if the input was empty. If the function being called is well-defined
# for empty lists -- i.e. it is a reduction operation with a defined
# identity element -- set the result to the identity element so the rest of
# the logic below will work correctly.
# If there is no identity element for this operation, raise an exception to
# let the user know; we'd raise an exception below *anyway*, and this allows
# us to provide the user with a more-descriptive/actionable error message.
if result is None:
op_identity_val = type(self)._reduce_op_identity_value.get(reduceFunc, None)
if op_identity_val is not None:
result = op_identity_val
else:
raise ValueError(
f"Reduction '{str(reduceFunc)}' does not have an identity element so cannot be computed over an empty array."
)
# Was an output dtype was explicitly specified?
dtype = kwargs.get("dtype", None)
if dtype is not None:
# user forced dtype return value
return dtype(result)
# preserve type for min/max/nanmin/nanmax
if reduceFunc in [
REDUCE_FUNCTIONS.REDUCE_MIN,
REDUCE_FUNCTIONS.REDUCE_NANMIN,
REDUCE_FUNCTIONS.REDUCE_MAX,
REDUCE_FUNCTIONS.REDUCE_NANMAX,
]:
return self.dtype.type(result)
# internally numpy expects a dtype returned for nanstd and other calculations
if isinstance(result, (int, np.integer)):
# for uint64, the high bit must be preserved
if self.dtype.char in NumpyCharTypes.UnsignedInteger64:
return np.uint64(result)
return np.int64(result)
return np.float64(result)
# ---------------------------------------------------------------------------
[docs]
def _compare_check(self, func, other) -> FastArray:
# a user might type in a string and we want a bytes string
if self.dtype.char in "SU":
if isinstance(other, str):
if self.dtype.char == "S":
# we are byte strings but scalar unicode passed in
other = str.encode(other)
if isinstance(other, list):
# convert the list so a comparison can be made to the byte string array
other = FastArray(other)
result = func(other)
# NOTE: numpy does call FA ufunc for strings
if not isinstance(result, FastArray) and isinstance(result, np.ndarray):
result = result.view(FastArray)
return result
result = func(other)
return result
[docs]
def __ne__(self, other):
return self._compare_check(super().__ne__, other)
[docs]
def __eq__(self, other):
return self._compare_check(super().__eq__, other)
[docs]
def __ge__(self, other):
return self._compare_check(super().__ge__, other)
[docs]
def __gt__(self, other):
return self._compare_check(super().__gt__, other)
[docs]
def __le__(self, other):
return self._compare_check(super().__le__, other)
[docs]
def __lt__(self, other):
return self._compare_check(super().__lt__, other)
[docs]
def eq(self, other):
return self.__eq__(other)
[docs]
def ne(self, other):
return self.__ne__(other)
[docs]
def ge(self, other):
return self.__ge__(other)
[docs]
def le(self, other):
return self.__le__(other)
[docs]
def gt(self, other):
return self.__gt__(other)
[docs]
def lt(self, other):
return self.__lt__(other)
add = np.ndarray.__add__
sub = np.ndarray.__sub__
mul = np.ndarray.__mul__
div = np.ndarray.__truediv__
floordiv = np.ndarray.__floordiv__
pow = np.ndarray.__pow__
mod = np.ndarray.__mod__
# ---------------------------------------------------------------------------
[docs]
def str_append(self, other):
if self.dtype.num == other.dtype.num:
func = TypeRegister.MathLedger._BASICMATH_TWO_INPUTS
return func((self, other), MATH_OPERATION.ADD, self.dtype.num)
raise TypeError("cannot concat")
# ---------------------------------------------------------------------------
[docs]
def squeeze(self, *args, **kwargs):
return self._np.squeeze(*args, **kwargs)
# ---------------------------------------------------------------------------
[docs]
def iscomputable(self) -> bool:
return TypeRegister.is_computable(self)
#############################################
# nep-18 array function protocol implementation
#############################################
[docs]
@classmethod
def _py_number_to_np_dtype(
cls, val: Union[int, np.integer, None], dtype: np.dtype
) -> Union[np.uint, np.int64, np.float64, None]:
"""Convert a python type to numpy dtype.
Only handles integers."""
if val is not None:
# internally numpy expects a dtype returned for nanstd and other calculations
if isinstance(val, (int, np.integer)):
# for uint64, the high bit must be preserved
if dtype.char in NumpyCharTypes.UnsignedInteger64:
return np.uint64(val)
return np.int64(val)
return np.float64(val)
return val
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.argmax)
def _argmax(a, axis=None, out=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_ARGMAX, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.nanargmax)
def _nanargmax(a, axis=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_NANARGMAX, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.argmin)
def _argmin(a, axis=None, out=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_ARGMIN, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.nanargmin)
def _nanargmin(a, axis=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_NANARGMIN, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.empty_like)
def _empty_like(
array: "FastArray",
dtype: Optional[Union[str, np.dtype]] = None,
order: str = "K",
subok: bool = True,
shape: Optional[Union[int, Sequence[int]]] = None,
) -> "FastArray":
array = array._np
result = rc.LedgerFunction(np.empty_like, array, dtype=dtype, order=order, subok=subok, shape=shape)
return result
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.max)
def _max(a, axis=None, out=None, keepdims=None, initial=None, where=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_MAX, 0)
if result is not None:
return a.dtype.type(result)
return result
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.nanmax)
def _nanmax(a, axis=None, out=None, keepdims=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_NANMAX, 0)
if result is not None:
return a.dtype.type(result)
return result
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.mean)
def _mean(a, axis=None, dtype=None, out=None, keepdims=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_MEAN, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.nanmean)
def _nanmean(a, axis=None, dtype=None, out=None, keepdims=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_NANMEAN, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.min)
def _min(a, axis=None, out=None, keepdims=None, initial=None, where=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_MIN, 0)
if result is not None:
return a.dtype.type(result)
return result
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.nanmin)
def _nanmin(a, axis=None, out=None, keepdims=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_NANMIN, 0)
if result is not None:
return a.dtype.type(result)
return result
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.std)
def _std(a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_STD, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.nanstd)
def _nanstd(a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_NANSTD, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.round)
@_ArrayFunctionHelper.register_array_function(np.around)
@_ArrayFunctionHelper.register_array_function(np.round_) # N.B, round_ is an alias for around
def _round_(a, decimals=None, out=None):
# TODO handle `decimal` and `out` arguments
# If callers decide to use this FastArray staticmethod outside the scope of array function protocol
# provide argument checks since it may become unclear when things fail at the C extension layer.
if not isinstance(a, FastArray):
raise ValueError(f"{FastArray.__name__}._round_ expected FastArray subtype, got {type(a)}")
original_dtype = a.dtype
a = a.astype(np.float64)
fast_function = gUnaryUFuncs.get(np.round, None)
if fast_function is None:
raise ValueError(
f"{FastArray.__name__}._round_ unhandled array function {np.round}\nKnown numpy array function to riptable functions: {repr(gUnaryUFuncs)}"
)
# For MATH_OPERATION.ROUND, _BASICMATH_ONE_INPUT returns an array `array(None, dtype=object)`
# if the input dtype is not a float64. As a workaround cast to float64 dtype, perform the operation,
# then cast back to the original dtype.
result = TypeRegister.MathLedger._BASICMATH_ONE_INPUT(a, fast_function, 0)
if not isinstance(result, FastArray) and isinstance(result, np.ndarray):
result = result.view(FastArray)
if result.dtype != original_dtype:
result = result.astype(original_dtype)
return result
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.sum)
def _sum(a, axis=None, dtype=None, out=None, keepdims=None, initial=None, where=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_SUM, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.nansum)
def _nansum(a, axis=None, dtype=None, out=None, keepdims=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_NANSUM, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.var)
def _var(a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_VAR, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
[docs]
@staticmethod
@_ArrayFunctionHelper.register_array_function(np.nanvar)
def _nanvar(a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
result = rc.Reduce(a, REDUCE_FUNCTIONS.REDUCE_NANVAR, 0)
return FastArray._py_number_to_np_dtype(result, a.dtype)
#############################################
# Helper section
#############################################
[docs]
def abs(self, **kwargs) -> FastArray:
return np.abs(self, **kwargs)
[docs]
def quantile(self, **kwargs) -> np.number | FastArray:
return np.quantile(self, **kwargs)
[docs]
def nanquantile(self, **kwargs) -> np.number | FastArray:
return np.nanquantile(self, **kwargs)
[docs]
def percentile(self, **kwargs) -> np.number | FastArray:
return np.percentile(self, **kwargs)
[docs]
def nanpercentile(self, **kwargs) -> np.number | FastArray:
return np.nanpercentile(self, **kwargs)
[docs]
def clip_lower(self, a_min, **kwargs) -> FastArray:
return self.clip(a_min, None, **kwargs)
[docs]
def clip_upper(self, a_max, **kwargs) -> FastArray:
return self.clip(None, a_max, **kwargs)
[docs]
def sign(self, **kwargs) -> FastArray:
return np.sign(self, **kwargs)
[docs]
def trunc(self, **kwargs) -> FastArray:
return np.trunc(self, **kwargs)
[docs]
def where(self, condition, y=np.nan) -> FastArray:
"""
Return a new `FastArray` in which values are replaced where a given
condition is False.
To also provide a value for where the condition is True, use
:meth:`riptable.where`.
Parameters
----------
condition : bool or array of bool
Where the condition is True, keep the original value. Where False,
replace with `y` (if `y` is a scalar) or the corresponding value
from `y` (if `y` is an array). If `condition` is an array or a
a comparison that returns an array, the array must be the same
length as the calling `FastArray`.
y : scalar, array, or callable, default np.nan
The value to use where `condition` is False. If `y` is an
array or a callable that returns an array, it must be the same
length as the calling `FastArray`. The value of `y` that corresponds
to the False value is used.
Returns
-------
FastArray
A new `FastArray` with values replaced where `condition` is False.
See Also
--------
.riptable.where :
Replace values depending on whether a given condition is True or
False.
Examples
--------
`condition` is a comparison that creates an array of booleans, and
`y` is a scalar:
>>> a = rt.FastArray(rt.arange(5))
>>> a
FastArray([0, 1, 2, 3, 4])
>>> a.where(a > 2, 100)
FastArray([100, 100, 100, 3, 4])
`condition` and `y` are same-length arrays:
>>> condition = rt.FastArray([True, True, False, False, False])
>>> y = rt.FastArray([100, 200, 300, 400, 500])
>>> a.where(condition, y)
FastArray([ 0, 1, 300, 400, 500])
"""
return where(condition, self, y)
[docs]
def count(self, sorted=True, filter=None) -> Dataset:
"""
The count of each unique value.
This returns the same information that ``.unique(return_counts = True)``
does, except in a `Dataset` instead of a tuple.
Parameters
----------
sorted : bool, default True
When True (the default), unique values are returned in sorted order. Set to
False to return them in order of first appearance.
filter : ndarray of bool, default None
If provided, any False values will be ignored in the calculation.
Returns
-------
Dataset
A `Dataset` containing the unique values and their counts.
See Also
--------
FastArray.unique
Examples
--------
>>> a = rt.FastArray([0, 2, 1, 3, 3, 2, 2])
>>> a.count()
*Unique Count
------- -----
0 1
1 1
2 3
3 2
<BLANKLINE>
[4 rows x 2 columns] total bytes: 48.0 B
With ``sorted = False``:
>>> a.count(sorted = False)
*Unique Count
------- -----
0 1
2 3
1 1
3 2
<BLANKLINE>
[4 rows x 2 columns] total bytes: 48.0 B
"""
unique_counts = unique(self, sorted=sorted, return_counts=True, filter=filter)
name = self.get_name()
if name is None:
name = "Unique"
ds = TypeRegister.Dataset({name: unique_counts[0], "Count": unique_counts[1]})
ds.label_set_names([name])
return ds
#############################################
# Rolling section (cannot handle strides)
#############################################
[docs]
def rolling_sum(self, window: int = 3) -> FastArray:
return rc.Rolling(self, ROLLING_FUNCTIONS.ROLLING_SUM, window)
[docs]
def rolling_nansum(self, window: int = 3) -> FastArray:
return rc.Rolling(self, ROLLING_FUNCTIONS.ROLLING_NANSUM, window)
[docs]
def rolling_mean(self, window: int = 3) -> FastArray:
return rc.Rolling(self, ROLLING_FUNCTIONS.ROLLING_MEAN, window)
[docs]
def rolling_nanmean(self, window: int = 3) -> FastArray:
return rc.Rolling(self, ROLLING_FUNCTIONS.ROLLING_NANMEAN, window)
[docs]
def rolling_quantile(self, q, window: int = 3) -> FastArray:
window = min(window, len(self))
windowParam = rolling_quantile_funcParam(q, window)
return rc.Rolling(self, ROLLING_FUNCTIONS.ROLLING_QUANTILE, windowParam)
[docs]
def rolling_var(self, window: int = 3) -> FastArray:
return rc.Rolling(self, ROLLING_FUNCTIONS.ROLLING_VAR, window)
[docs]
def rolling_nanvar(self, window: int = 3) -> FastArray:
return rc.Rolling(self, ROLLING_FUNCTIONS.ROLLING_NANVAR, window)
[docs]
def rolling_std(self, window: int = 3) -> FastArray:
return rc.Rolling(self, ROLLING_FUNCTIONS.ROLLING_STD, window)
[docs]
def rolling_nanstd(self, window: int = 3) -> FastArray:
return rc.Rolling(self, ROLLING_FUNCTIONS.ROLLING_NANSTD, window)
#############################################
# TimeWindow section (cannot handle strides), time_array must be INT64
#############################################
[docs]
def timewindow_sum(self, time_array, time_dist):
"""
The input array must be int64 and sorted with ever increasing values.
Sums up the values for a given time window.
Parameters
----------
time_array: sorted integer array of timestamps
time_dist: integer value of the time window size
Examples
--------
>>> a=rt.arange(10, dtype=rt.int64)
>>> a.timewindow_sum(a,5)
FastArray([ 0, 1, 3, 6, 10, 15, 21, 27, 33, 39], dtype=int64)
"""
return rc.TimeWindow(self, time_array, TIMEWINDOW_FUNCTIONS.TIMEWINDOW_SUM, time_dist)
[docs]
def timewindow_prod(self, time_array, time_dist):
"""
The input array must be int64 and sorted with ever increasing values.
Multiplies up the values for a given time window.
Parameters
----------
time_array: sorted integer array of timestamps
time_dist: integer value of the time window size
Examples
--------
>>> a=rt.arange(10, dtype=rt.int64)
>>> a.timewindow_prod(a,5)
FastArray([ 0, 0, 0, 0, 0, 0, 720, 5040, 20160, 60480], dtype=int64)
"""
return rc.TimeWindow(self, time_array, TIMEWINDOW_FUNCTIONS.TIMEWINDOW_PROD, time_dist)
#############################################
# Bottleneck section (only handles int32/int64/float32/float64)
# bottleneck is optional
#############################################
[docs]
def move_sum(self, *args, **kwargs):
return bn.move_sum(self, *args, **kwargs)
[docs]
def move_mean(self, *args, **kwargs):
return bn.move_mean(self, *args, **kwargs)
[docs]
def move_std(self, *args, **kwargs):
return bn.move_std(self, *args, **kwargs)
[docs]
def move_var(self, *args, **kwargs):
return bn.move_var(self, *args, **kwargs)
[docs]
def move_min(self, *args, **kwargs):
return bn.move_min(self, *args, **kwargs)
[docs]
def move_max(self, *args, **kwargs):
return bn.move_max(self, *args, **kwargs)
[docs]
def move_argmin(self, *args, **kwargs):
return bn.move_argmin(self, *args, **kwargs)
[docs]
def move_argmax(self, *args, **kwargs):
return bn.move_argmax(self, *args, **kwargs)
[docs]
def move_rank(self, *args, **kwargs):
return bn.move_rank(self, *args, **kwargs)
# ---------------------------------------------------------------------------
[docs]
def replace(self, old, new):
return bn.replace(self, old, new)
[docs]
def partition2(self, *args, **kwargs):
return bn.partition(self, *args, **kwargs)
[docs]
def argpartition2(self, *args, **kwargs):
return bn.argpartition(self, *args, **kwargs)
[docs]
def rankdata(self, *args, **kwargs):
return bn.rankdata(self, *args, **kwargs)
[docs]
def nanrankdata(self, *args, **kwargs):
return bn.nanrankdata(self, *args, **kwargs)
[docs]
def push(self, *args, **kwargs):
return bn.push(self, *args, **kwargs)
# ---------------------------------------------------------------------------
[docs]
def issorted(self) -> bool:
"""
Return `True` if the array is sorted, `False` otherwise.
NaNs at the end of an array are considered sorted.
Calls :py:func:`~.rt_numpy.issorted`.
Returns
-------
bool
`True` if the array is sorted, `False` otherwise.
See Also
--------
:py:func:`.rt_numpy.issorted`
Examples
--------
>>> a = rt.FastArray(['a', 'b', 'c'])
>>> a.issorted()
True
>>> a = rt.FastArray([1.0, 2.0, 3.0, rt.nan])
>>> rt.issorted(a)
True
>>> a = rt.FastArray(['a', 'c', 'b'])
>>> a.issorted()
False
"""
return issorted(self)
# ---------------------------------------------------------------------------
[docs]
def _unary_op(self, funcnum, fancy=False) -> FastArray:
if self._is_not_supported(self):
# make it contiguous
arr = self.copy()
else:
arr = self
func = TypeRegister.MathLedger._BASICMATH_ONE_INPUT
result = func(arr, funcnum, 0)
if result is None:
raise TypeError(f"Could not perform operation {funcnum} on FastArray of dtype {arr.dtype}")
if fancy:
result = bool_to_fancy(result)
return result
#############################################
# Boolean section
#############################################
[docs]
def isnormal(self, fancy=False):
return self._unary_op(MATH_OPERATION.ISNORMAL, fancy=fancy)
[docs]
def isnotnormal(self, fancy=False):
return self._unary_op(MATH_OPERATION.ISNOTNORMAL, fancy=fancy)
[docs]
def isnan(self, fancy=False):
"""
Return a boolean array that's True for each element that's a NaN (Not a Number),
False otherwise.
Parameters
----------
fancy : bool, default False
Set to True to instead return the indices of the True (NaN) values.
Returns
-------
:py:class:`~.rt_fastarray.FastArray`
A :py:class:`~.rt_fastarray.FastArray` of booleans or indices.
See Also
--------
:py:meth:`.rt_fastarray.FastArray.isnotnan`
:py:meth:`.rt_fastarray.FastArray.notna`
:py:meth:`.rt_fastarray.FastArray.isnanorzero`
:py:func:`.rt_numpy.isnan`
:py:func:`.rt_numpy.isnotnan`
:py:func:`.rt_numpy.isnanorzero`
:py:meth:`.rt_categorical.Categorical.isnan`
:py:meth:`.rt_categorical.Categorical.isnotnan`
:py:meth:`.rt_categorical.Categorical.notna`
:py:meth:`.rt_datetime.Date.isnan`
:py:meth:`.rt_datetime.Date.isnotnan`
:py:meth:`.rt_datetime.DateTimeNano.isnan`
:py:meth:`.rt_datetime.DateTimeNano.isnotnan`
:py:meth:`.rt_dataset.Dataset.mask_or_isnan` :
Return a boolean array that's `True` for each :py:class:`~.rt_dataset.Dataset`
row that contains at least one NaN.
:py:meth:`.rt_dataset.Dataset.mask_and_isnan` :
Return a boolean array that's `True` for each all-NaN
:py:class:`~.rt_dataset.Dataset` row.
Examples
--------
>>> a = rt.FastArray([rt.nan, rt.nan, rt.inf, 3])
>>> a.isnan()
FastArray([ True, True, False, False])
With ``fancy = True``:
>>> a.isnan(fancy = True)
FastArray([0, 1], dtype=int32)
"""
return self._unary_op(MATH_OPERATION.ISNAN, fancy=fancy)
[docs]
def isnotnan(self, fancy=False):
"""
Return a boolean array that's `True` for each element that's not a NaN (Not a
Number), `False` otherwise.
Parameters
----------
fancy : bool, default `False`
Set to `True` to instead return the indices of the `True` (non-NaN) values.
Returns
-------
:py:class:`~.rt_fastarray.FastArray`
A :py:class:`~.rt_fastarray.FastArray` of booleans or indices.
See Also
--------
:py:meth:`.rt_fastarray.FastArray.isnan`
:py:meth:`.rt_fastarray.FastArray.notna`
:py:meth:`.rt_fastarray.FastArray.isnanorzero`
:py:func:`.rt_numpy.isnan`
:py:func:`.rt_numpy.isnotnan`
:py:func:`.rt_numpy.isnanorzero`
:py:meth:`.rt_categorical.Categorical.isnan`
:py:meth:`.rt_categorical.Categorical.isnotnan`
:py:meth:`.rt_categorical.Categorical.notna`
:py:meth:`.rt_datetime.Date.isnan`
:py:meth:`.rt_datetime.Date.isnotnan`
:py:meth:`.rt_datetime.DateTimeNano.isnan`
:py:meth:`.rt_datetime.DateTimeNano.isnotnan`
:py:meth:`.rt_dataset.Dataset.mask_or_isnan` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that contains at least one NaN.
:py:meth:`.rt_dataset.Dataset.mask_and_isnan` :
Return a boolean array that's `True` for each all-NaN
:py:class:`~.rt_dataset.Dataset` row.
Examples
--------
>>> a = rt.FastArray([rt.nan, rt.inf, 2])
>>> a.isnotnan()
FastArray([False, True, True])
With ``fancy = True``:
>>> a.isnotnan(fancy = True)
FastArray([1, 2], dtype=int32)
"""
return self._unary_op(MATH_OPERATION.ISNOTNAN, fancy=fancy)
[docs]
def isnanorzero(self, fancy=False):
"""
Return a boolean array that's `True` for each element that's a NaN (Not a
Number) or zero, `False` otherwise.
Parameters
----------
fancy : bool, default `False`
Set to `True` to instead return the indices of the `True` (NaN or zero)
values.
Returns
-------
:py:class:`~.rt_fastarray.FastArray`
A :py:class:`~.rt_fastarray.FastArray` of booleans or indices.
See Also
--------
:py:func:`.rt_numpy.isnanorzero`
:py:func:`.rt_numpy.isnan`
:py:func:`.rt_numpy.isnotnan`
:py:meth:`.rt_fastarray.FastArray.isnan`
:py:meth:`.rt_fastarray.FastArray.isnotnan`
:py:meth:`.rt_categorical.Categorical.isnan`
:py:meth:`.rt_categorical.Categorical.isnotnan`
:py:meth:`.rt_datetime.Date.isnan`
:py:meth:`.rt_datetime.Date.isnotnan`
:py:meth:`.rt_datetime.DateTimeNano.isnan`
:py:meth:`.rt_datetime.DateTimeNano.isnotnan`
:py:meth:`.rt_dataset.Dataset.mask_or_isnan` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that contains at least one NaN.
:py:meth:`.rt_dataset.Dataset.mask_and_isnan` :
Return a boolean array that's `True` for each all-NaN
:py:class:`~.rt_dataset.Dataset` row.
Examples
--------
>>> a = rt.FastArray([0, rt.nan, rt.inf, 3])
>>> a.isnanorzero()
FastArray([ True, True, False, False])
With ``fancy = True``:
>>> a.isnanorzero(fancy = True)
FastArray([0, 1], dtype=int32)
"""
return self._unary_op(MATH_OPERATION.ISNANORZERO, fancy=fancy)
[docs]
def isfinite(self, fancy=False):
"""
Return a boolean array that's `True` for each finite
:py:class:`~.rt_fastarray.FastArray` element, `False` otherwise.
A value is considered to be finite if it's not positive or negative infinity
or a NaN (Not a Number).
Parameters
----------
fancy : bool, default `False`
Set to `True` to instead return the indices of the `True` (finite) values.
Returns
-------
:py:class:`~.rt_fastarray.FastArray`
An array or booleans or indices.
See Also
--------
:py:meth:`.rt_fastarray.FastArray.isnotfinite`
:py:func:`.rt_numpy.isfinite`
:py:func:`.rt_numpy.isnotfinite`
:py:func:`.rt_numpy.isinf`
:py:func:`.rt_numpy.isnotinf`
:py:meth:`.rt_fastarray.FastArray.isinf`
:py:meth:`.rt_fastarray.FastArray.isnotinf`
:py:meth:`.rt_dataset.Dataset.mask_or_isfinite` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that has at least one finite value.
:py:meth:`.rt_dataset.Dataset.mask_and_isfinite` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that contains all finite values.
:py:meth:`.rt_dataset.Dataset.mask_or_isinf` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that has at least one value that's
positive or negative infinity.
:py:meth:`.rt_dataset.Dataset.mask_and_isinf` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that contains all infinite values.
Examples
--------
>>> a = rt.FastArray([rt.inf, -rt.inf, rt.nan, 0])
>>> a.isfinite()
FastArray([False, False, False, True])
With ``fancy = True``:
>>> a.isfinite(fancy = True)
FastArray([3], dtype=int32)
"""
return self._unary_op(MATH_OPERATION.ISFINITE, fancy=fancy)
[docs]
def isnotfinite(self, fancy=False):
"""
Return a boolean array that's `True` for each non-finite
:py:class:`~.rt_fastarray.FastArray` element, `False` otherwise.
A value is considered to be finite if it's not positive or negative infinity
or a NaN (Not a Number).
Parameters
----------
fancy : bool, default `False`
Set to `True` to instead return the indices of the `True` (non-finite)
values.
Returns
-------
:py:class:`~.rt_fastarray.FastArray`
An array or booleans or indices.
See Also
--------
:py:meth:`.rt_fastarray.FastArray.isfinite`
:py:func:`.rt_numpy.isfinite`
:py:func:`.rt_numpy.isnotfinite`
:py:func:`.rt_numpy.isinf`
:py:func:`.rt_numpy.isnotinf`
:py:meth:`.rt_fastarray.FastArray.isinf`
:py:meth:`.rt_fastarray.FastArray.isnotinf`
:py:meth:`.rt_dataset.Dataset.mask_or_isfinite` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that has at least one finite value.
:py:meth:`.rt_dataset.Dataset.mask_and_isfinite` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that contains all finite values.
:py:meth:`.rt_dataset.Dataset.mask_or_isinf` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that has at least one value that's
positive or negative infinity.
:py:meth:`.rt_dataset.Dataset.mask_and_isinf` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that contains all infinite values.
Examples
--------
>>> a = rt.FastArray([rt.inf, -rt.inf, rt.nan, 0])
>>> a.isnotfinite()
FastArray([ True, True, True, False])
With ``fancy = True``:
>>> a.isnotfinite(fancy = True)
FastArray([0, 1, 2], dtype=int32)
"""
return self._unary_op(MATH_OPERATION.ISNOTFINITE, fancy=fancy)
[docs]
def isinf(self, fancy=False):
"""
Return a boolean array that's `True` for each
:py:class:`~.rt_fastarray.FastArray` element that's positive or negative
infinity, `False` otherwise.
Parameters
----------
fancy : bool, default `False`
Set to `True` to instead return the indices of the `True` (infinite) values.
Returns
-------
:py:class:`~.rt_fastarray.FastArray`
An array or booleans or indices.
See Also
--------
:py:meth:`.rt_fastarray.FastArray.isnotinf`
:py:meth:`.rt_fastarray.FastArray.isfinite`
:py:meth:`.rt_fastarray.FastArray.isnotfinite`
:py:func:`.rt_numpy.isinf`
:py:func:`.rt_numpy.isnotinf`
:py:func:`.rt_numpy.isfinite`
:py:func:`.rt_numpy.isnotfinite`
:py:meth:`.rt_dataset.Dataset.mask_or_isfinite` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that has at least one finite value.
:py:meth:`.rt_dataset.Dataset.mask_and_isfinite` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that contains all finite values.
:py:meth:`.rt_dataset.Dataset.mask_or_isinf` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that has at least one value that's
positive or negative infinity.
:py:meth:`.rt_dataset.Dataset.mask_and_isinf` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that contains all infinite values.
Examples
--------
>>> a = rt.FastArray([rt.inf, -rt.inf, rt.nan, 0])
>>> a.isinf()
FastArray([ True, True, False, False])
With ``fancy = True``:
>>> a.isinf(fancy = True)
FastArray([0, 1], dtype=int32)
"""
return self._unary_op(MATH_OPERATION.ISINF, fancy=fancy)
[docs]
def isnotinf(self, fancy=False):
"""
Return a boolean array that's `True` for each
:py:class:`~.rt_fastarray.FastArray` element that's not positive or negative
infinity, `False` otherwise.
Parameters
----------
fancy : bool, default `False`
Set to `True` to instead return the indices of the `True` (non-infinite) values.
Returns
-------
`FastArray`
An array or booleans or indices.
See Also
--------
:py:meth:`.rt_fastarray.FastArray.isinf`
:py:func:`.rt_numpy.isnotinf`
:py:func:`.rt_numpy.isinf`
:py:func:`.rt_numpy.isfinite`
:py:func:`.rt_numpy.isnotfinite`
:py:meth:`.rt_fastarray.FastArray.isfinite`
:py:meth:`.rt_fastarray.FastArray.isnotfinite`
:py:meth:`.rt_dataset.Dataset.mask_or_isfinite` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that has at least one finite value.
:py:meth:`.rt_dataset.Dataset.mask_and_isfinite` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that contains all finite values.
:py:meth:`.rt_dataset.Dataset.mask_or_isinf` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that has at least one value that's
positive or negative infinity.
:py:meth:`.rt_dataset.Dataset.mask_and_isinf` :
Return a boolean array that's `True` for each
:py:class:`~.rt_dataset.Dataset` row that contains all infinite values.
Examples
--------
>>> a = rt.FastArray([rt.inf, -rt.inf, rt.nan, 0])
>>> a.isnotinf()
FastArray([False, False, True, True])
With ``fancy = True``:
>>> a.isnotinf(fancy = True)
FastArray([2, 3], dtype=int32)
"""
return self._unary_op(MATH_OPERATION.ISNOTINF, fancy=fancy)
#############################################
# Reduce section
#############################################
[docs]
def _fa_filter_wrapper(self, myFunc, filter=None, dtype=None):
if filter is True:
filter = ones(len(self), dtype=bool)
if filter is False:
filter = zeros(len(self), dtype=bool)
if len(filter) != len(self):
raise ValueError("Filter and input not the same length.")
if not self.iscomputable():
return np.NaN
if dtype is not None:
return dtype(myFunc(self, filter))
return myFunc(self, filter)
[docs]
def _fa_keyword_wrapper(self, filter=None, dtype=None, axis=None, keepdims=None, ddof=None, **kwargs):
if self.dtype.char in "OSU":
raise TypeError("FastArray operation applied to string or object array.")
if "out" in kwargs:
if kwargs["out"] is None:
kwargs.pop("out")
if any(kwargs):
logging.warning(
"Unexpected FastArray operation keyword(s): " + ", ".join([key for key, value in kwargs.items()])
)
if dtype is not None:
kwargs["dtype"] = dtype
if axis:
kwargs["axis"] = axis
if keepdims:
kwargs["keepdims"] = keepdims
if ddof is not None:
kwargs["ddof"] = ddof
if filter is not None:
kwargs["filter"] = filter
if (filter is not None) and ((axis is not None) or (keepdims is not None) or (ddof is not None)):
logging.warning("Since Filter keyword is present, FastArray operations ignore axis, keepdims and ddof")
return kwargs
[docs]
def nansum(self, filter=None, dtype=None, axis=None, keepdims=None, **kwargs) -> np.number:
"""
Compute the sum of the values in the first argument, ignoring NaNs.
If all values in the first argument are NaNs, ``0.0`` is returned.
Parameters
----------
filter : array of bool, default None
Specifies which elements to include in the sum calculation. If the filter is
uniformly ``False``, `nansum` returns ``0.0``.
dtype : rt.dtype or numpy.dtype, default float64
The data type of the result. For a `FastArray` ``x``,
``x.nansum(dtype = my_type)`` is equivalent to ``my_type(x.nansum())``.
Returns
-------
scalar
The sum of the values.
See Also
--------
numpy.nansum
Dataset.nansum : Sums the values of numerical `Dataset` columns, ignoring NaNs.
GroupByOps.nansum : Sums the values of each group, ignoring NaNs. Used by
`Categorical` objects.
Notes
-----
The `dtype` keyword for `FastArray.nansum` specifies the data type of the
result. This differs from `numpy.nansum`, where it specifies the data type used
to compute the sum.
**Notes on Using NumPy Parameters**
Using either of the following NumPy parameters will cause Riptable to switch to
the NumPy implementation of this method (`numpy.nansum`). However, until a
reported bug is fixed, if you also include the `dtype` parameter it will be
applied to the result, not used to compute the sum as it is in `numpy.nansum`.
Also note that if you use either of the following NumPy parameters and also
include a `filter` keyword argument (which `numpy.nansum` does not accept),
Riptable's implementation of `nansum` will be used with the filter argument and
the NumPy parameters will be ignored.
axis : {int, tuple of int, None}, optional
Axis or axes along which the sum is computed. The default is to compute the
sum of the flattened array.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in the result as
dimensions with size one. With this option, the result will broadcast
correctly against the original input array.
If the value is anything but the default, then `keepdims` will be passed
through to the `mean` or `sum` methods of sub-classes of `ndarray`. If the
sub-classes' methods do not implement `keepdims`, any exceptions will be
raised.
Examples
--------
>>> a = rt.FastArray([1, 3, 5, 7, rt.nan])
>>> a.nansum()
16.0
With a `dtype` specified:
>>> a = rt.FastArray([1.0, 3.0, 5.0, 7.0, rt.nan])
>>> a.nansum(dtype = rt.int32)
16
With a filter:
>>> a = rt.FastArray([1, 3, 5, 7, rt.nan])
>>> b = rt.FastArray([False, True, False, True, True])
>>> a.nansum(filter = b)
10.0
"""
kwargs = self._fa_keyword_wrapper(filter=filter, dtype=dtype, axis=axis, keepdims=keepdims, ddof=None, **kwargs)
if filter is not None:
return self._fa_filter_wrapper(_fnansum, filter=filter, dtype=dtype)
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_NANSUM, np.nansum, **kwargs)
[docs]
def mean(self, filter=None, dtype=None, axis=None, keepdims=None, **kwargs) -> np.number:
"""
Compute the arithmetic mean of the values in the first argument.
Parameters
----------
filter : array of bool, default None
Specifies which elements to include in the mean calculation. If the filter
is uniformly ``False``, `mean` returns a `ZeroDivisionError`.
dtype : rt.dtype or numpy.dtype, default float64
The data type of the result. For a `FastArray` ``x``,
``x.mean(dtype = my_type)`` is equivalent to ``my_type(x.mean())``.
Returns
-------
scalar
The mean of the values.
See Also
--------
numpy.mean
FastArray.nanmean : Computes the mean of `FastArray` values, ignoring NaNs.
Dataset.mean : Computes the mean of numerical `Dataset` columns.
GroupByOps.mean : Computes the mean of each group. Used by `Categorical` objects.
Notes
-----
The `dtype` keyword for `FastArray.mean` specifies the data type of the result.
This differs from `numpy.mean`, where it specifies the data type used to compute
the mean.
**Notes on Using NumPy Parameters**
Using either of the following NumPy parameters will cause Riptable to switch to
the NumPy implementation of this method (`numpy.mean`). However, until a
reported bug is fixed, if you also include the `dtype` parameter it will be
applied to the result, not used to compute the mean as it is in `numpy.mean`.
Also note that if you use either of the following NumPy parameters and also
include a `filter` keyword argument (which `numpy.mean` does not accept),
Riptable's implementation of `mean` will be used with the filter argument and
the NumPy parameters will be ignored.
axis : None or int or tuple of ints, optional
Axis or axes along which the means are computed. The default is to compute
the mean of the flattened array.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in the result as
dimensions with size one. With this option, the result will broadcast
correctly against the original input array.
If the default value is passed, then `keepdims` will not be passed through
to the `mean` method of sub-classes of `ndarray`, however any non-default
value will be. If the sub-class's method does not implement `keepdims`, any
exceptions will be raised.
Examples
--------
>>> a = rt.FastArray([1, 3, 5, 7])
>>> a.mean()
4.0
With a `dtype` specified:
>>> a = rt.FastArray([1, 3, 5, 7])
>>> a.mean(dtype = rt.int32)
4
With a filter:
>>> a = rt.FastArray([1, 3, 5, 7])
>>> b = rt.FastArray([False, True, False, True])
>>> a.mean(filter = b)
5.0
"""
kwargs = self._fa_keyword_wrapper(filter=filter, dtype=dtype, axis=axis, keepdims=keepdims, ddof=None, **kwargs)
if filter is not None:
return self._fa_filter_wrapper(_fmean, filter=filter, dtype=dtype)
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_MEAN, np.mean, **kwargs)
[docs]
def nanmean(self, filter=None, dtype=None, axis=None, keepdims=None, **kwargs) -> np.number:
"""
Compute the arithmetic mean of the values in the first argument, ignoring NaNs.
If all values in the first argument are NaNs, ``0.0`` is returned.
Parameters
----------
filter : array of bool, default None
Specifies which elements to include in the mean calculation. If the filter
is uniformly ``False``, `nanmean` returns a `ZeroDivisionError`.
dtype : rt.dtype or numpy.dtype, default float64
The data type of the result. For a `FastArray` ``x``,
``x.nanmean(dtype = my_type)`` is equivalent to ``my_type(x.nanmean())``.
Returns
-------
scalar
The mean of the values.
See Also
--------
numpy.nanmean
FastArray.mean : Computes the mean of `FastArray` values.
Dataset.nanmean : Computes the mean of numerical `Dataset` columns, ignoring
NaNs.
GroupByOps.nanmean : Computes the mean of each group, ignoring NaNs. Used by
`Categorical` objects.
Notes
-----
The `dtype` keyword for `FastArray.nanmean` specifies the data type of the
result. This differs from `numpy.nanmean`, where it specifies the data type used
to compute the mean.
**Notes on Using NumPy Parameters**
Using either of the following NumPy parameters will cause Riptable to switch to
the NumPy implementation of this method (`numpy.nanmean`). However, until a
reported bug is fixed, if you also include the `dtype` parameter it will be
applied to the result, not used to compute the mean as it is in `numpy.nanmean`.
Also note that if you use either of the following NumPy parameters and also
include a `filter` keyword argument (which `numpy.nanmean` does not accept),
Riptable's implementation of `nanmean` will be used with the filter argument
and the NumPy parameters will be ignored.
axis : {int, tuple of int, None}, optional
Axis or axes along which the means are computed. The default is to compute
the mean of the flattened array.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in the result as
dimensions with size one. With this option, the result will broadcast
correctly against the original input array.
If the value is anything but the default, then `keepdims` will be passed
through to the `mean` or `sum` methods of sub-classes of `ndarray`. If the
sub-classes' methods do not implement `keepdims`, any exceptions will be
raised.
Examples
--------
>>> a = rt.FastArray([1, 3, 5, rt.nan])
>>> a.nanmean()
3.0
With a `dtype` specified:
>>> a = rt.FastArray([1, 3, 5, rt.nan])
>>> a.nanmean(dtype = rt.int32)
3
With a filter:
>>> a = rt.FastArray([1, 3, 5, rt.nan])
>>> b = rt.FastArray([False, True, True, True])
>>> a.nanmean(filter = b)
4.0
"""
kwargs = self._fa_keyword_wrapper(filter=filter, dtype=dtype, axis=axis, keepdims=keepdims, ddof=None, **kwargs)
if filter is not None:
return self._fa_filter_wrapper(_fnanmean, filter=filter, dtype=dtype)
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_NANMEAN, np.nanmean, **kwargs)
# ---------------------------------------------------------------------------
# these function take a ddof kwarg
[docs]
def var(self, filter=None, dtype=None, axis=None, keepdims=None, ddof=None, **kwargs):
"""
Compute the variance of the values in the first argument.
Riptable uses the convention that ``ddof = 1``, meaning the variance of
``[x_1, ..., x_n]`` is defined by ``var = 1/(n - 1) * sum(x_i - mean )**2``
(note the ``n - 1`` instead of ``n``). This differs from NumPy, which uses
``ddof = 0`` by default.
Parameters
----------
filter : array of bool, default None
Specifies which elements to include in the variance calculation. If the
filter is uniformly ``False``, `var` returns a `ZeroDivisionError`.
dtype : rt.dtype or numpy.dtype, default float64
The data type of the result. For a `FastArray` ``x``,
``x.var(dtype = my_type)`` is equivalent to ``my_type(x.var())``.
Returns
-------
scalar
The variance of the values.
See Also
--------
numpy.var
FastArray.nanvar : Computes the variance of `FastArray` values, ignoring NaNs.
Dataset.var : Computes the variance of numerical `Dataset` columns.
GroupByOps.var : Computes the variance of each group. Used by `Categorical`
objects.
Notes
-----
The `dtype` keyword for `FastArray.var` specifies the data type of the result.
This differs from `numpy.var`, where it specifies the data type used to compute
the variance.
**Notes on Using NumPy Parameters**
Using any of the following NumPy parameters will cause Riptable to switch to
the NumPy implementation of this method (`numpy.var`). However, until a
reported bug is fixed, if you also include the `dtype` parameter it will be
applied to the result, not used to compute the variance as it is in `numpy.var`.
Also note that if you use any of the following NumPy parameters and also
include a `filter` keyword argument (which `numpy.var` does not accept),
Riptable's implementation of `var` will be used with the filter argument
and the NumPy parameters will be ignored.
axis : None or int or tuple of ints, optional
Axis or axes along which the variance is computed. The default is to
compute the variance of the flattened array.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in the result as
dimensions with size one. With this option, the result will broadcast
correctly against the input array.
If the default value is passed, then `keepdims` will not be passed through
to the `var` method of sub-classes of `ndarray`, however any non-default
value will be. If the sub-classes' method does not implement `keepdims`, any
exceptions will be raised.
ddof : int, optional
"Delta Degrees of Freedom": the divisor used in the calculation is
``N - ddof``, where ``N`` represents the number of elements. By default
`ddof` is zero for the NumPy implementation, versus one for the Riptable
implementation.
Examples
--------
>>> a = rt.FastArray([1, 2, 3])
>>> a.var()
1.0
With a `dtype` specified:
>>> a = rt.FastArray([1, 2, 3])
>>> a.var(dtype = rt.int32)
1
With a filter:
>>> a = rt.FastArray([1, 2, 3])
>>> b = rt.FastArray([False, True, True])
>>> a.var(filter = b)
0.5
"""
kwargs = self._fa_keyword_wrapper(filter=filter, dtype=dtype, axis=axis, keepdims=keepdims, ddof=ddof, **kwargs)
if filter is not None:
return self._fa_filter_wrapper(_fvar, filter=filter, dtype=dtype)
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_VAR, np.var, **kwargs)
[docs]
def nanvar(self, filter=None, dtype=None, axis=None, keepdims=None, ddof=None, **kwargs) -> np.number:
"""
Compute the variance of the values in the first argument, ignoring NaNs.
If all values in the first argument are NaNs, ``NaN`` is returned.
Riptable uses the convention that ``ddof = 1``, meaning the variance of
``[x_1, ..., x_n]`` is defined by ``var = 1/(n - 1) * sum(x_i - mean )**2`` (note
the ``n - 1`` instead of ``n``). This differs from NumPy, which uses ``ddof = 0`` by
default.
Parameters
----------
filter : array of bool, default None
Specifies which elements to include in the variance calculation. If the filter
is uniformly ``False``, `nanvar` returns a `ZeroDivisionError`.
dtype : rt.dtype or numpy.dtype, default float64
The data type of the result. For a `FastArray` ``x``,
``x.nanvar(dtype = my_type)`` is equivalent to ``my_type(x.nanvar())``.
Returns
-------
scalar
The variance of the values.
See Also
--------
numpy.nanvar
FastArray.var : Computes the variance of `FastArray` values.
Dataset.nanvar : Computes the variance of numerical `Dataset` columns,
ignoring NaNs.
GroupByOps.nanvar : Computes the variance of each group, ignoring NaNs. Used by
`Categorical` objects.
Notes
-----
The `dtype` keyword for `FastArray.nanvar` specifies the data type of the
result. This differs from `numpy.nanvar`, where it specifies the data type used
to compute the variance.
**Notes on Using NumPy Parameters**
Using any of the following NumPy parameters will cause Riptable to switch to
the NumPy implementation of this method (`numpy.nanvar`). However, until a
reported bug is fixed, if you also include the `dtype` parameter it will be
applied to the result, not used to compute the variance as it is in
`numpy.nanvar`.
Also note that if you use any of the following NumPy parameters and also
include a `filter` keyword argument (which `numpy.nanvar` does not accept),
Riptable's implementation of `nanvar` will be used with the filter argument
and the NumPy parameters will be ignored.
axis : {int, tuple of int, None}, optional
Axis or axes along which the variance is computed. The default is to
compute the variance of the flattened array.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in the result as
dimensions with size one. With this option, the result will broadcast
correctly against the original input array.
ddof : int, optional
"Delta Degrees of Freedom": the divisor used in the calculation is
``N - ddof``, where ``N`` represents the number of non-NaN elements. By
default `ddof` is zero for the NumPy implementation, versus one for the
Riptable implementation.
Examples
--------
>>> a = rt.FastArray([1, 2, 3, rt.nan])
>>> a.nanvar()
1.0
With a `dtype` specified:
>>> a = rt.FastArray([1, 2, 3, rt.nan])
>>> a.nanvar(dtype = rt.int32)
1
With a filter:
>>> a = rt.FastArray([1, 2, 3, rt.nan])
>>> b = rt.FastArray([False, True, True, True])
>>> a.nanvar(filter = b)
0.5
"""
kwargs = self._fa_keyword_wrapper(filter=filter, dtype=dtype, axis=axis, keepdims=keepdims, ddof=ddof, **kwargs)
if filter is not None:
return self._fa_filter_wrapper(_fnanvar, filter=filter, dtype=dtype)
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_NANVAR, np.nanvar, **kwargs)
[docs]
def std(self, filter=None, dtype=None, axis=None, keepdims=None, ddof=None, **kwargs) -> np.number:
"""
Compute the standard deviation of the values in the first argument.
Riptable uses the convention that ``ddof = 1``, meaning the standard deviation of
``[x_1, ..., x_n]`` is defined by ``std = 1/(n - 1) * sum(x_i - mean )**2`` (note
the ``n - 1`` instead of ``n``). This differs from NumPy, which uses ``ddof = 0`` by
default.
Parameters
----------
filter : array of bool, default None
Specifies which elements to include in the standard deviation calculation. If
the filter is uniformly ``False``, `std` returns a `ZeroDivisionError`.
dtype : rt.dtype or numpy.dtype, default float64
The data type of the result. For a `FastArray` ``x``,
``x.std(dtype = my_type)`` is equivalent to ``my_type(x.std())``.
Returns
-------
scalar
The standard deviation of the values.
See Also
--------
numpy.std
FastArray.nanstd : Computes the standard deviation of `FastArray` values, ignoring
NaNs.
Dataset.std : Computes the standard deviation of numerical `Dataset` columns.
GroupByOps.std : Computes the standard deviation of each group. Used by
`Categorical` objects.
Notes
-----
The `dtype` keyword for `FastArray.std` specifies the data type of the result.
This differs from `numpy.std`, where it specifies the data type used to compute
the standard deviation.
**Notes on Using NumPy Parameters**
Using any of the following NumPy parameters will cause Riptable to switch to
the NumPy implementation of this method (`numpy.std`). However, until a
reported bug is fixed, if you also include the `dtype` parameter it will be
applied to the result, not used to compute the variance as it is in
`numpy.std`.
Also note that if you use any of the following NumPy parameters and also
include a `filter` keyword argument (which `numpy.std` does not accept),
Riptable's implementation of `std` will be used with the filter argument
and the NumPy parameters will be ignored.
axis : None or int or tuple of ints, optional
Axis or axes along which the standard deviation is computed. The
default is to compute the standard deviation of the flattened array.
.. versionadded:: 1.7.0
If this is a tuple of ints, a standard deviation is performed over multiple
axes, instead of a single axis or all the axes as before.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in the result as
dimensions with size one. With this option, the result will broadcast
correctly against the input array.
If the default value is passed, then `keepdims` will not be passed through
to the `std` method of sub-classes of `ndarray`, however any non-default
value will be. If the sub-class' method does not implement `keepdims`,
any exceptions will be raised.
ddof : int, optional
"Delta Degrees of Freedom": the divisor used in the calculation is
``N - ddof``, where ``N`` represents the number of elements. By default
`ddof` is zero for the NumPy implementation, versus one for the
Riptable implementation.
Examples
--------
>>> a = rt.FastArray([1, 2, 3])
>>> a.std()
1.0
With a `dtype` specified:
>>> a = rt.FastArray([1, 2, 3])
>>> a.std(dtype = rt.int32)
1
With a filter:
>>> a = rt.FastArray([1, 2, 3])
>>> b = rt.FA([False, True, True])
>>> a.std(filter = b)
0.7071067811865476
"""
kwargs = self._fa_keyword_wrapper(filter=filter, dtype=dtype, axis=axis, keepdims=keepdims, ddof=ddof, **kwargs)
if filter is not None:
return self._fa_filter_wrapper(_fstd, filter=filter, dtype=dtype)
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_STD, np.std, **kwargs)
[docs]
def nanstd(self, filter=None, dtype=None, axis=None, keepdims=None, ddof=None, **kwargs) -> np.number:
"""
Compute the standard deviation of the values in the first argument, ignoring NaNs.
If all values in the first argument are NaNs, ``NaN`` is returned.
Riptable uses the convention that ``ddof = 1``, meaning the standard deviation of
``[x_1, ..., x_n]`` is defined by ``std = 1/(n - 1) * sum(x_i - mean )**2`` (note
the ``n - 1`` instead of ``n``). This differs from NumPy, which uses ``ddof = 0`` by
default.
Parameters
----------
filter : array of bool, default None
Specifies which elements to include in the standard deviation calculation. If
the filter is uniformly ``False``, `nanstd` returns a `ZeroDivisionError`.
dtype : rt.dtype or numpy.dtype, default float64
The data type of the result. For a `FastArray` ``x``,
``x.nanstd(dtype = my_type)`` is equivalent to ``my_type(x.nanstd())``.
Returns
-------
scalar
The standard deviation of the values.
See Also
--------
numpy.nanstd
FastArray.std : Computes the standard deviation of `FastArray` values.
Dataset.nanstd : Computes the standard deviation of numerical `Dataset` columns,
ignoring NaNs.
GroupByOps.nanstd : Computes the standard deviation of each group, ignoring NaNs.
Used by `Categorical` objects.
Notes
-----
The `dtype` keyword for `FastArray.nanstd` specifies the data type of the
result. This differs from `numpy.nanstd`, where it specifies the data type used
to compute the standard deviation.
**Notes on Using NumPy Parameters**
Using any of the following NumPy parameters will cause Riptable to switch to
the NumPy implementation of this method (`numpy.nanstd`). However, until a
reported bug is fixed, if you also include the `dtype` parameter it will be
applied to the result, not used to compute the variance as it is in
`numpy.nanstd`.
Also note that if you use any of the following NumPy parameters and also
include a `filter` keyword argument (which `numpy.nanstd` does not accept),
Riptable's implementation of `nanstd` will be used with the filter argument
and the NumPy parameters will be ignored.
axis : {int, tuple of int, None}, optional
Axis or axes along which the standard deviation is computed. The default is
to compute the standard deviation of the flattened array.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in the result as
dimensions with size one. With this option, the result will broadcast
correctly against the original input array.
If this value is anything but the default it is passed through as-is to the
relevant functions of the sub-classes. If these functions do not have a
`keepdims` kwarg, a RuntimeError will be raised.
ddof : int, optional
"Delta Degrees of Freedom": the divisor used in the calculation is
``N - ddof``, where ``N`` represents the number of elements. By default
`ddof` is zero for the NumPy implementation, versus one for the
Riptable implementation.
Examples
--------
>>> a = rt.FastArray([1, 2, 3, rt.nan])
>>> a.nanstd()
1.0
With a `dtype` specified:
>>> a = rt.FastArray([1, 2, 3, rt.nan])
>>> a.nanstd(dtype = rt.int32)
1
With filter:
>>> a = rt.FastArray([1, 2, 3, rt.nan])
>>> b = rt.FastArray([False, True, True, True])
>>> a.nanstd(filter = b)
0.7071067811865476
"""
kwargs = self._fa_keyword_wrapper(filter=filter, dtype=dtype, axis=axis, keepdims=keepdims, ddof=ddof, **kwargs)
if filter is not None:
return self._fa_filter_wrapper(_fnanstd, filter=filter, dtype=dtype)
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_NANSTD, np.nanstd, **kwargs)
# ---------------------------------------------------------------------------
[docs]
def nanmin(self, **kwargs) -> np.number:
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_NANMIN, np.nanmin, **kwargs)
[docs]
def nanmax(self, **kwargs) -> np.number:
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_NANMAX, np.nanmax, **kwargs)
# ---------------------------------------------------------------------------
[docs]
def argmin(self, **kwargs) -> int:
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_ARGMIN, np.argmin, **kwargs)
[docs]
def argmax(self, **kwargs) -> int:
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_ARGMAX, np.argmax, **kwargs)
[docs]
def nanargmin(self, **kwargs) -> int:
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_NANARGMIN, np.nanargmin, **kwargs)
[docs]
def nanargmax(self, **kwargs) -> int:
return self._reduce_check(REDUCE_FUNCTIONS.REDUCE_NANARGMAX, np.nanargmax, **kwargs)
#############################################
# Stats/ML section
#############################################
[docs]
def normalize_zscore(self) -> FastArray:
return normalize_zscore(self)
[docs]
def normalize_minmax(self) -> FastArray:
return normalize_minmax(self)
#############################################
# BasicMath section (to be hooked at C level now)
#############################################
# def __add__(self, value): result=rc.BasicMathTwoInputs((self, value), 1, 0); result= result if result is not None else np.add(self,value); return result
# def __add__(self, value): return rc.BasicMathTwoInputs((self, value), 1, 0)
@property
@_use_autocomplete_placeholder(0)
def crc(self) -> int:
"""
Calculate the 32-bit CRC of the data in this array using the Castagnoli polynomial (CRC32C).
This function does not consider the array's shape or strides when calculating the CRC,
it simply calculates the CRC value over the entire buffer described by the array.
Examples
--------
can be used to compare two arrays for structural equality
>>> a = arange(100)
>>> b = arange(100.0)
>>> a.crc == b.crc
False
"""
return crc32c(self)
# todo: range/nanrange
# todo: stats/nanstats
# -------------------------------------------------------
[docs]
def unique(
self,
return_index: bool = False,
return_inverse: bool = False,
return_counts: bool = False,
sorted: bool = True,
lex: bool = False,
dtype: Optional[Union[str, np.dtype]] = None,
filter: Optional[np.ndarray] = None,
**kwargs,
) -> Union["FastArray", Tuple["FastArray", ...], List["FastArray"], tuple]:
"""
Find the unique elements of an array or the unique combinations of elements with
corresponding indices in multiple arrays.
See :meth:`riptable.unique` for full documentation.
"""
return unique(
self,
return_index=return_index,
return_inverse=return_inverse,
return_counts=return_counts,
sorted=sorted,
lex=lex,
dtype=dtype,
filter=filter,
**kwargs,
)
# -------------------------------------------------------
[docs]
def nunique(self) -> int:
"""
Return the number of unique values in the input `FastArray`.
Does not include NaN or sentinel values.
Returns
-------
int
Number of unique values in the input `FastArray`, excluding NaN and
sentinel values.
See Also
--------
FastArray.duplicated : Return a boolean `FastArray` indicating duplicate values.
.Categorical.nunique : Return the number of unique values in the `.Categorical`.
Examples
--------
Retrieve the number of unique values in a floating-point `FastArray`:
>>> a = rt.FastArray([1., 2., 3., 1., 2., 3.])
>>> a
FastArray([1., 2., 3., 1., 2., 3.])
>>> a.nunique()
3
Retrieve the number of unique values in a floating-point `FastArray`
with a NaN value:
>>> a2 = rt.FastArray([1., 2., 3., 1., 2., 3., rt.nan])
>>> a2
FastArray([ 1., 2., 3., 1., 2., 3., nan])
>>> a2.nunique() # The NaN value is not included.
3
Retrieve the number of unique values in an unsigned integer `FastArray`
with a sentinel value:
>>> a3 = rt.FastArray([255, 2, 3, 2, 3], dtype="uint8")
>>> a3
FastArray([255, 2, 3, 2, 3], dtype=uint8)
>>> a3.nunique() # The sentinel value is not included.
2
"""
un = unique(self)
count = len(un)
if count > 0:
# unique is sorted, so check for sentinel based on dtype
inv = INVALID_DICT[self.dtype.num]
if self.dtype.char in NumpyCharTypes.AllFloat:
# check if last item is nan
if un[count - 1] != un[count - 1]:
count -= 1
# unsigned int uses high number as sentinel
elif self.dtype.char in NumpyCharTypes.UnsignedInteger:
if un[count - 1] == inv:
count -= 1
# all other sentinels are lowest number
else:
if un[0] == inv:
count -= 1
return count
# -------------------------------------------------------
[docs]
def searchsorted(self, v, side="left", sorter=None) -> np.number:
return _searchsorted(self, v, side=side, sorter=sorter)
# ---------------------------------------------------------------------------
[docs]
def map_old(self, npdict: dict):
"""
Example
-------
>>> d = {1:10, 2:20}
>>> dat['c'] = dat.a.map(d)
>>> print(dat)
a b cb c
0 1 0 0.0 10
1 1 1 1.0 10
2 1 2 3.0 10
3 2 3 5.0 20
4 2 4 7.0 20
5 2 5 9.0 20
"""
outArray = self.copy()
for k, v in npdict.items():
outArray[self == k] = v
return outArray
[docs]
def map(self, npdict: dict) -> FastArray:
"""
Notes
-----
Uses ismember and can handle large dictionaries
Examples
--------
>>> a=arange(3)
>>> a.map({1: 'a', 2:'b', 3:'c'})
FastArray(['', 'a', 'b'], dtype='<U1')
>>> a=arange(3)+1
>>> a.map({1: 'a', 2:'b', 3:'c'})
FastArray(['a', 'b', 'c'], dtype='<U1')
"""
orig = FastArray([*npdict], unicode=True)
replace = FastArray([*npdict.values()], unicode=True)
outArray = self.fill_invalid(self.shape, dtype=replace.dtype, inplace=False)
found, idx = ismember(self, orig)
outArray[found] = replace[idx[found]]
return outArray
# ---------------------------------------------------------------------------
[docs]
def shift(self, periods=1, invalid=None) -> FastArray:
"""
Shift an array's elements right or left.
Newly empty elements at either end (resulting from the shift) are filled
with the invalid value for the input array's data type.
Parameters
----------
periods : int, default 1
Number of element positions to shift right (if positive) or left
(if negative).
Returns
-------
FastArray
A shifted `FastArray`. Newly empty elements are filled with the
invalid values for the input array's data type.
See Also
--------
FastArray.diff :
Return a `FastArray` containing the differences between adjacent
input array values.
.Categorical.shift :
Shift values in the `.Categorical` by a specified number of periods.
Examples
--------
Shift array elements one position to the right:
>>> a = rt.FA([0, 2, 4, 8, 16, 32])
>>> a
FastArray([ 0, 2, 4, 8, 16, 32])
>>> a.shift()
FastArray([-2147483648, 0, 2, 4,
8, 16])
Shift array elements two positions to the left:
>>> a.shift(-2)
>>> FastArray([ 4, 8, 16, 32,
-2147483648, -2147483648])
Specify a shift value greater than the array length:
>>> a.shift(10)
FastArray([-2147483648, -2147483648, -2147483648, -2147483648,
-2147483648, -2147483648])
"""
if periods == 0:
return self
if invalid is None:
if isinstance(self, TypeRegister.Categorical):
invalid = 0
else:
try:
invalid = INVALID_DICT[self.dtype.num]
except Exception:
raise TypeError(f"shift does not support the dtype {self.dtype.name!r}")
# we know that this is a simple vector: shape == (len, )
# TODO: get recycled
temp = empty_like(self)
if abs(periods) >= len(self):
temp.fill(invalid)
elif periods > 0:
temp[:periods] = invalid
temp[periods:] = self[:-periods]
else:
temp[:periods] = self[-periods:]
temp[periods:] = invalid
# to rewrap categoricals or datelike
if hasattr(self, "newclassfrominstance"):
temp = self.newclassfrominstance(temp, self)
return temp
# -------------------------------------------------------
[docs]
def _internal_self_compare(self, math_op, periods=1, fancy=False):
"""internal routine used for differs and transitions"""
result = empty_like(self, dtype=np.bool_)
if periods == 0:
raise ValueError("periods of 0 is invalid for transitions")
if periods > 0:
TypeRegister.MathLedger._BASICMATH_TWO_INPUTS(
(self[periods:], self[:-periods], result[periods:]), math_op, 0
)
# fill upfront with invalids
result[:periods] = False
else:
TypeRegister.MathLedger._BASICMATH_TWO_INPUTS(
(self[:periods], self[-periods:], result[:periods]), math_op, 0
)
# fill back with invalids (periods is negative)
result[periods:] = False
if fancy:
return bool_to_fancy(result)
return result
# -------------------------------------------------------
[docs]
def differs(self, periods=1, fancy=False) -> FastArray:
"""
Identify array values that are the same as adjacent values.
Returns either a boolean `FastArray`, where `True` indicates equivalent
values, or a fancy index `FastArray` containing the indices of equivalent
values.
Parameters
----------
periods: int, default 1
The number of array element positions to look behind (positive number)
or look ahead (negative number) for comparison.
fancy: bool, default False
If `False` (the default), returns a boolean array. If `True`, returns
a fancy index array.
Returns
-------
FastArray
A boolean or fancy index array that identifies equivalent
elements in the input array.
See Also
--------
FastArray.transitions :
Identify nonequivalent items in the input array and return a boolean
or fancy index array.
Examples
--------
Return a boolean array using the ``periods=1`` default value (look behind
one element position for comparisons):
>>> a = rt.FA([1, 2, 2, 3, 2, 4, 5, 6, 2, 2, 5])
>>> a
FastArray([1, 2, 2, 3, 2, 4, 5, 6, 2, 2, 5])
>>> a.differs()
FastArray([False, False, True, False, False, False, False, False, False,
True, False])
Return a boolean array and look ahead three element positions for comparisons:
>>> a.differs(periods=-3)
FastArray([False, True, False, False, False, False, False, False, False,
False, False])
Return a fancy index array using the ``periods=1`` default value (look
behind one element position for comparisons):
>>> a.differs(fancy=True)
FastArray([2, 9])
Set `periods` to a number larger than the length of the input array:
>>> a.differs(periods=15)
FastArray([False, False, False, False, False, False, False, False, False,
False, False])
"""
if self.dtype.num > 13:
result = self != self.shift(periods)
if fancy:
return bool_to_fancy(result)
return result
return self._internal_self_compare(MATH_OPERATION.CMP_EQ, periods=periods, fancy=fancy)
# ---------------------------------------------------------------------------
[docs]
def transitions(self, periods=1, fancy=False) -> FastArray:
"""
Returns a boolean array.
The boolean array is set to True when the previous item in the array does not equal the current.
Use -1 instead of 1 if you want True set when the next item in the array does not equal the previous.
See also: ``differs``
:param periods: The number of elements to look ahead (or behind), defaults to 1
:type periods: int
:param fancy: Indicates whether to return a fancy_index instead of a boolean array, defaults to False.
:type fancy: bool
:return: boolean ``FastArray``, or fancyIndex (see: `fancy` kwarg)
>>> a = FastArray([0,1,2,3,3,3,4])
>>> a.transitions(periods=1)
FastArray([False, True, True, True, False, False, True])
>>> a.transitions(periods=2)
FastArray([False, False, True, True, True, False, True])
>>> a.transitions(periods=-1)
FastArray([ True, True, True, False, False, True, False])
"""
if self.dtype.num > 13:
result = self != self.shift(periods)
if fancy:
return bool_to_fancy(result)
return result
return self._internal_self_compare(MATH_OPERATION.CMP_NE, periods=periods, fancy=fancy)
# -------------------------------------------------------
[docs]
def diff(self, periods=1) -> FastArray:
"""
Compute the differences between adjacent elements of a `FastArray`.
Spaces at either end are filled with invalid values based on the input array's
dtype. If a calculated difference isn't supported by the dtype, it is
displayed as a NaN or rollover value. For example, negative differences
in a `~riptable.uint8` array are displayed as 255. To resolve this, you can
explicitly upcast to the next larger signed `int` dtype before calculating the
differences.
Parameters
----------
periods: int, default 1
Number of element positions to shift right (if positive) or left (if
negative) before subtracting. Raises an error if set to 0.
Returns
-------
FastArray
An equivalent-length array containing the differences between input array
elements that are adjacent or separated by a specified period. Spaces at
either end are filled with invalids based on the input array's dtype.
See Also
--------
FastArray.shift : Shift an array's elements right or left.
Examples
--------
Calculate differences using the ``periods=1`` default (array elements one
position to the right):
>>> a=rt.FA([0, 2, 4, 8, 16, 32])
>>> a
FastArray([ 0, 2, 4, 8, 16, 32])
>>> a.diff()
FastArray([-2147483648, 2, 2, 4,
8, 16])
Calculate differences using array elements two positions to the left:
>>> a.diff(-2)
FastArray([ -4, -6, -12, -24,
-2147483648, -2147483648])
Specify a `periods` value that is greater than the array length:
>>> a.diff(10)
FastArray([-2147483648, -2147483648, -2147483648, -2147483648,
-2147483648, -2147483648])
"""
try:
invalid = INVALID_DICT[self.dtype.num]
except:
raise TypeError(f"shift does not support the dtype {self.dtype.name!r}")
temp = empty(self.shape, dtype=self.dtype)
if abs(periods) >= len(self):
temp.fill(invalid)
elif periods > 0:
temp[:periods] = invalid
# output into the empty array we created, np.subtract will call FastArray's subtract
np.subtract(self[periods:], self[:-periods], out=temp[periods:])
else:
temp[periods:] = invalid
np.subtract(self[:periods], self[-periods:], out=temp[:periods])
return temp
# -------------------------------------------------------
[docs]
def isna(self) -> FastArray:
"""
isnan is mapped directly to isnan()
Categoricals and DateTime take over isnan.
FastArray handles sentinels.
>>> a=arange(100.0)
>>> a[5]=np.nan
>>> a[87]=np.nan
>>> sum(a.isna())
2
>>> sum(a.astype(np.int32).isna())
2
"""
return self.isnan()
[docs]
def notna(self) -> FastArray:
"""
notna is mapped directly to isnotnan()
Categoricals and DateTime take over isnotnan.
FastArray handles sentinels.
>>> a=arange(100.0)
>>> a[5]=np.nan
>>> a[87]=np.nan
>>> sum(a.notna())
98
>>> sum(a.astype(np.int32).notna())
98
"""
return self.isnotnan()
[docs]
def replacena(self, value, inplace=False) -> FastArray:
"""
Return a `FastArray` with all NaN and invalid values set to the specified value.
Optionally, you can modify the original `FastArray` if it's not locked.
Parameters
----------
value : scalar or array
A value or an array of values to replace all NaN and invalid values. If an
array, the number of values must equal the number of NaN and invalid values.
inplace : bool, default False
If False, return a copy of the `FastArray`. If True, modify the original.
This will modify any other views on this object. This fails if the
`FastArray` is locked.
Returns
-------
`FastArray` or None
The `FastArray` will be the same size and dtype as the original array.
Returns None if ``inplace = True``.
See Also
--------
FastArray.fillna : Replace NaN and invalid values with a specified value or nearby data.
Dataset.fillna : Replace NaN and invalid values with a specified value or nearby data.
Categorical.fill_forward : Replace NaN and invalid values with the last valid group value.
Categorical.fill_backward : Replace NaN and invalid values with the next valid group value.
GroupBy.fill_forward : Replace NaN and invalid values with the last valid group value.
GroupBy.fill_backward : Replace NaN and invalid values with the next valid group value.
Examples
--------
Replace all instances of NaN with a single value:
>>> a = rt.FastArray([rt.nan, 1.0, rt.nan, 3.0])
>>> a.replacena(0)
FastArray([0., 1., 0., 3.])
Replace all invalid values with 0s:
>>> b = rt.FastArray([0, 1, 2, 3, 4, 5])
>>> b[0:3] = b.inv
>>> b.replacena(0)
FastArray([0, 0, 0, 3, 4, 5])
Replace each instance of NaN with a different value:
>>> a.replacena([0, 2])
FastArray([0., 1., 2., 3.])
"""
inst = self if inplace else self.copy()
isna = inst.isna()
if isna.any():
inst[isna] = value
if inplace:
return None
return inst
[docs]
def fillna(self, value=None, method=None, inplace=False, limit=None) -> FastArray:
"""
Replace NaN and invalid values with a specified value or nearby data.
Optionally, you can modify the original :py:class:`~.rt_fastarray.FastArray` if
it's not locked.
Parameters
----------
value : scalar or array, default `None`
A value or an array of values to replace all NaN and invalid values.
A ``value`` is required if ``method = None``. An array can be used only when
``method = None``. If an array is used, the number of values in the array
must equal the number of NaN and invalid values.
method : {None, 'backfill', 'bfill', 'pad', 'ffill'}, default `None`
Method to use to propagate valid values.
* backfill/bfill: Propagates the next encountered valid value backward.
Calls :py:meth:`~.rt_fastarray.FastArray.fill_backward`.
* pad/ffill: Propagates the last encountered valid value forward. Calls
:py:meth:`~.rt_fastarray.FastArray.fill_forward`.
* None: A replacement value is required if ``method = None``. Calls
:py:meth:`~.rt_fastarray.FastArray.replacena`.
If there's not a valid value to propagate forward or backward, the NaN or
invalid value is not replaced unless you also specify a ``value``.
inplace : bool, default `False`
If `False`, return a copy of the :py:class:`~.rt_fastarray.FastArray`. If
`True`, modify original data. This modifies any other views on this object.
This fails if the :py:class:`~.rt_fastarray.FastArray` is locked.
limit : int, default `None`
If ``method`` is specified, this is the maximium number of consecutive NaN
or invalid values to fill. If there is a gap with more than this number of
consecutive NaN or invalid values, the gap is only partially filled.
Returns
-------
:py:class:`~.rt_fastarray.FastArray`
The :py:class:`~.rt_fastarray.FastArray` is the same size and dtype as the
original array.
See Also
--------
:py:func:`.rt_fastarraynumba.fill_forward` : Replace NaN and invalid values with
the last valid value.
:py:func:`.rt_fastarraynumba.fill_backward` : Replace NaN and invalid values
with the next valid value.
:py:func:`.rt_numpy.fill_forward` : Replace NaN and invalid values with the last
valid value.
:py:func:`.rt_numpy.fill_backward` : Replace NaN and invalid values with the
next valid value.
:py:meth:`.rt_dataset.Dataset.fillna` : Replace NaN and invalid values with a
specified value or nearby data.
:py:meth:`.rt_fastarray.FastArray.replacena` : Replace NaN and invalid values
with a specified value.
:py:meth:`.rt_categorical.Categorical.fill_forward` : Replace NaN and invalid
values with the last valid group value.
:py:meth:`.rt_categorical.Categorical.fill_backward` : Replace NaN and invalid
values with the next valid group value.
:py:meth:`.rt_groupby.GroupBy.fill_forward` : Replace NaN and invalid values
with the last valid group value.
:py:meth:`.rt_groupby.GroupBy.fill_backward` : Replace NaN and invalid values
with the next valid group value.
Examples
--------
Replace all NaN values with 0s:
>>> a = rt.FastArray([rt.nan, 1.0, rt.nan, rt.nan, rt.nan, 5.0])
>>> a.fillna(0)
FastArray([0., 1., 0., 0., 0., 5.])
Replace all invalid values with 0s:
>>> b = rt.FastArray([0, 1, 2, 3, 4, 5])
>>> b[0:3] = b.inv
>>> b.fillna(0)
FastArray([0, 0, 0, 3, 4, 5])
Replace each instance of NaN with a different value:
>>> a.fillna([0, 2, 3, 4])
FastArray([0., 1., 2., 3., 4., 5.])
Propagate the last encountered valid value forward. Note that where there's no
valid value to propagate, the NaN or invalid value isn't replaced.
>>> a.fillna(method = 'ffill')
FastArray([nan, 1., 1., 1., 1., 5.])
You can use the `value` parameter to specify a value to use where there's no
valid value to propagate.
>>> a.fillna(value = 0, method = 'ffill')
FastArray([0., 1., 1., 1., 1., 5.])
Replace only the first NaN or invalid value in any consecutive series of NaN or
invalid values.
>>> a.fillna(method = 'bfill', limit = 1)
FastArray([ 1., 1., nan, nan, 5., 5.])
"""
if method is not None:
if method in ["backfill", "bfill"]:
return self.fill_backward(value, inplace=inplace, limit=limit)
if method in ["pad", "ffill"]:
return self.fill_forward(value, inplace=inplace, limit=limit)
raise KeyError(f"fillna: The method {method!r} must be 'backfill', 'bfill', 'pad', 'ffill'")
if value is None:
raise ValueError(f"fillna: Must specify either a 'value' that is not None or a 'method' that is not None.")
if limit is not None:
raise KeyError(f"fillna: There is no limit when method is None")
return self.replacena(value, inplace=inplace)
[docs]
def statx(self) -> Dataset:
return statx(self)
# ---------------------------------------------------------------------------
[docs]
def _is_not_supported(self, arr):
"""returns True if a numpy array is not FastArray internally supported"""
if not (arr.flags.c_contiguous or arr.flags.f_contiguous):
# TODO enable this warning in a future minor release
# FastArray._possibly_warn(f'_is_not_supported: unsupported array flags {arr.flags}')
return True
if arr.dtype.char not in NumpyCharTypes.Supported:
# TODO enable this warning in a future minor release
# FastArray._possibly_warn(f'_is_not_supported: unsupported array dtype {arr.dtype}\nSupported dtypes {NumpyCharTypes.Supported}')
return True
if len(arr.strides) == 0:
# TODO enable this warning in a future minor release
# FastArray._possibly_warn(f'_is_not_supported: unsupported array strides {arr.strides}')
return True
return False
# ---------------------------------------------------------------------------
[docs]
def __array_function__(self, func, types, args, kwargs):
if self.NEW_ARRAY_FUNCTION_ENABLED:
return self._new_array_function(func, types, args, kwargs)
else:
return self._legacy_array_function(func, types, args, kwargs)
# ---------------------------------------------------------------------------
[docs]
def _legacy_array_function(self, func, types, args, kwargs):
"""
Called before array_ufunc.
Does not get called for every function np.isnan/trunc/true_divide for instance.
"""
reduceFunc = NUMPY_CONVERSION_TABLE.get(func, None)
# TODO:
# kwargs of 'axis': None 'out': None should be accepted
if reduceFunc is not None and len(kwargs) == 0:
# speed path (todo add call to ledger)
# default to ddof=0 when no kwargs passed
result = rc.Reduce(args[0], reduceFunc, 0)
if result is not None:
# TypeRegister.MathLedger._REDUCE(args[0], newfunc)
dtype = kwargs.get("dtype", None)
if dtype is not None:
# user forced dtype return value
return dtype(result)
# preserve type for min/max/nanmin/nanmax
if reduceFunc in [
REDUCE_FUNCTIONS.REDUCE_MIN,
REDUCE_FUNCTIONS.REDUCE_NANMIN,
REDUCE_FUNCTIONS.REDUCE_MAX,
REDUCE_FUNCTIONS.REDUCE_NANMAX,
]:
return self.dtype.type(result)
# internally numpy expects a dtype returned for nanstd and other calculations
if isinstance(result, (int, np.integer)):
# for uint64, the high bit must be preserved
if self.dtype.char in NumpyCharTypes.UnsignedInteger64:
return np.uint64(result)
return np.int64(result)
return np.float64(result)
# call the version numpy wanted use to
return super(FastArray, self).__array_function__(func, types, args, kwargs)
# ---------------------------------------------------------------------------
[docs]
def _new_array_function(self, func: Callable, types: tuple, args: tuple, kwargs: dict):
"""
FastArray implementation of the array function protocol.
Parameters
----------
func: callable
An callable exposed by NumPy’s public API, which was called in the form ``func(*args, **kwargs)``.
types: tuple
A tuple of unique argument types from the original NumPy function call that implement ``__array_function__``.
args: tuple
The tuple of arguments that will be passed to `func`.
kwargs: dict
The dictionary of keyword arguments that will be passed to `func`.
Raises
------
TypeError
If `func` is not overridden by a corresponding riptable array function then a TypeError is raised.
Notes
-----
This array function implementation requires each class, such as FastArray and any other derived class,
to implement their own version of the Numpy array function API. In the event these array functions defer to the
inheriting class they will need to either re-wrap the results in the correct type or raise exception if a
particular operation is not well-defined nor meaningful for the derived class.
If an array function, which is also a universal function, is not overridden as an array function, but defined
as a ufunc then it will not be called unless it is registered with the array function helper since array function
protocol takes priority over the universal function protocol.
Reference: `NEP 18 Array Function Protocol <https://numpy.org/neps/nep-0018-array-function-protocol.html>`_
"""
if logger.isEnabledFor(logging.DEBUG):
logger.debug(
f"{FastArray.__name__}._new_array_function(fun={func}, types={types}, args={args}, kwargs={kwargs})"
)
# handle `func` argument
array_func: Callable = FastArray._ArrayFunctionHelper.get_array_function(func)
if array_func is None:
# fallback to numpy for unhandled array functions and attempt to cast back to FastArray
result = super().__array_function__(func, types, args, kwargs)
if result is NotImplemented:
return NotImplemented
elif isinstance(result, np.ndarray):
return result.view(FastArray)
elif isinstance(result, list):
return [(x.view(FastArray) if isinstance(x, np.ndarray) else x) for x in result]
elif isinstance(result, tuple):
return tuple([(x.view(FastArray) if isinstance(x, np.ndarray) else x) for x in result])
else:
# Unknown result type.
raise TypeError(f"Unknown result type '{type(result)}' returned by ndarray.{func}.")
# handle `types` argument
array_func_type_check: Callable = FastArray._ArrayFunctionHelper.get_array_function_type_compatibility_check(
func
)
if array_func_type_check is None:
# no custom type compatibility check; default type compatibility check
# this allows subclasses that don't override __array_function__ to handle FastArray objects
for typ in types:
if not issubclass(typ, FastArray):
if logger.isEnabledFor(logging.DEBUG):
logger.debug(f"{FastArray.__name__}.__array_function__: unsupported type {repr(typ)}")
return NotImplemented
else: # custom type compatibility check
valid: bool = array_func_type_check(types)
if not valid:
if logger.isEnabledFor(logging.DEBUG):
logger.debug(f"{FastArray.__name__}.__array_function__: unsupported type in {repr(types)}")
return NotImplemented
return array_func(*args, **kwargs)
# ---------------------------------------------------------------------------
[docs]
def __array_ufunc__(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any):
"""
The FastArray universal function (or ufunc) override offers multithreaded C/C++ implementation at the RiptideCPP layer.
When FastArray receives a `ufunc` callable it will attempt to handle it in priority order:
1. considering ``FastArray`` ``FastFunction`` is enabled, ufunc is handled by an explicit ufunc override, otherwise
2. ufunc is handled at the Riptable / Numpy API overrides level, otherwise
3. ufunc is handled at the Numpy API level.
Given a combination of `ufunc`, `inputs`, and `kwargs`, if neither of the aforementioned cases support this
then a warning is emitted.
The following references to supported ufuncs are grouped by method type.
- For `method` type ``reduce``, see ``gReduceUFuncs``.
- For `method` type ``__call__``, see ``gBinaryUFuncs``, ``gBinaryLogicalUFuncs``, ``gBinaryBitwiseUFuncs``, and ``gUnaryUFuncs``.
- For `method` type ``at`` return ``None``.
If `out` argument is specified, then an extra array copy is performed on the result of the ufunc computation.
If a `dtype` keyword is specified, all efforts are made to respect the `dtype` on the result of the computation.
Parameters
----------
ufunc : callable
The ufunc object that was called.
method : str
A string indicating which Ufunc method was called (one of "__call__", "reduce", "reduceat", "accumulate", "outer", "inner").
inputs
A tuple of the input arguments to the ufunc.
kwargs
A dictionary containing the optional input arguments of the ufunc. If given, any out arguments, both positional and keyword, are passed as a tuple in kwargs.
Returns
-------
The method should return either the result of the operation, or NotImplemented if the operation requested is not implemented.
Notes
-----
The current implementation does not support the following keyword arguments: `casting`, `sig`, `signature`, and
`core_signature`.
It has partial support for keyword arguments: `where`, `axis`, and `axes`, if they match
the default values.
If FastArray's ``WarningLevel`` is enabled, then warnings will be emitted if any of unsupported or partially
supported keyword arguments are passed.
TODO document custom up casting rules.
See Also
--------
For more information on ufunc see the following numpy documents:
- https://numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
- https://numpy.org/doc/stable/reference/ufuncs.html
Note, the docstring Parameters and Return section is repeated from the numpy
`__array_ufunc__` docstring since this is overriding that method.
"""
# TODO consider using type annotation typing.Final for these read-only variables when moving to Python 3.8
# Python 3.8 added support for typing.Final. Final will catch unintended assignments for constants when running
# static type checkers such as mypy.
_UNSUPPORTED_KEYWORDS: Tuple[str, str, str, str] = ("casting", "sig", "signature", "core_signature")
_PARTIALLY_SUPPORTED_KEYWORDS_TO_DEFAULTS: Mapping[str, Union[None, bool]] = {
"where": True,
"axis": None,
"axes": None,
}
toplevel_abort: bool = False
if FastArray.Verbose > 2:
print("*** top level array_ufunc", ufunc, method, *inputs, kwargs)
# flip any inputs that are fastarrays back to an ndarray...
args: List[Any] = []
for input in inputs:
if isinstance(input, np.ndarray):
is_not_supported = self._is_not_supported(input)
if is_not_supported:
# TODO enable this warning in a future minor release
# FastArray._possibly_warn(f'__array_ufunc__: unsupported input "{input}"')
toplevel_abort |= is_not_supported
args.append(input)
# Check for numpy rules that we cannot handle.
for kw in _UNSUPPORTED_KEYWORDS:
if kw in kwargs:
# TODO enable this warning in a future minor release
# FastArray._possibly_warn(f'__array_ufunc__: unsupported keyword argument "{kw}"')
toplevel_abort |= True
# Check for numpy rules that we partially support; that is, where we only support
# the keyword if the value is some default value and otherwise punt to numpy.
# The value associated with each keyword in the dictionary is the only value we'll
# support for that keyword.
# For example, in numpy 1.17 the sum() function passes where=True by default.
for kw, default_val in _PARTIALLY_SUPPORTED_KEYWORDS_TO_DEFAULTS.items():
if kw in kwargs:
# Use a type check before equality here to avoid errors caused
# by checking equality between bools and arrays.
kwarg_val = kwargs[kw]
if type(default_val) != type(kwarg_val) or kwarg_val != default_val:
toplevel_abort |= True
dtype: Optional[np.dtype] = kwargs.get("dtype", None)
has_outputs: bool = False
out_args: List[Any] = []
# flip any outputs to ndarray...
outputs = kwargs.pop("out", None)
if outputs:
has_outputs = True
for output in outputs:
if isinstance(output, np.ndarray):
is_not_supported = self._is_not_supported(output)
if is_not_supported:
# TODO enable this warning in a future minor release
# FastArray._possibly_warn(f'__array_ufunc__: unsupported output "{output}"')
toplevel_abort |= is_not_supported
out_args.append(output)
# replace out
kwargs["out"] = tuple(out_args)
else:
# TJD - here outputs was not specified
# now if UFunc.nout ==1, this function requires an output
outputs = (None,) * ufunc.nout
# See https://docs.python.org/3/c-api/typeobj.html
# See Number Object Structures and Mapping Object Structure for indexing
# ufunc.nin The number of inputs.
# ufunc.nout The number of outputs.
# ufunc.nargs The number of arguments.
# ufunc.ntypes The number of types.
# ufunc.types Returns a list with types grouped input->output.
# ufunc.identity The identity value.
final_dtype: Optional[np.dtype] = None
fast_function: Optional[MATH_OPERATION] = None
reduce_func: Optional[REDUCE_FUNCTIONS] = None
# Handle reduce ufunc methods.
# note: when method is 'at' this is an inplace unbuffered operation
# this can speed up routines that use heavy masked operations
if method == "reduce" and FastArray.FasterUFunc and not toplevel_abort:
# a.any() and a.all() are logical reduce operations
# Examples
# Look for axis:None -- otherwise ABORT
# Then look for Keepdims wihch means to wrap result in list/array?
# Then check dtype also
#
# In [22]: t=FA([[3,4,5],[6,7,8]])
# In [23]: np.add.reduce(t)
#!!reduce reduce nin: 2 1 <ufunc 'add'> [array([[3, 4, 5],
# [6, 7, 8]])] out: (None,) kwargs: {}
# results [ 9 11 13]
# Out[23]: array([ 9, 11, 13])
# In [24]: np.add.reduce(t, axis=None)
#!!reduce reduce nin: 2 1 <ufunc 'add'> [array([[3, 4, 5],
# [6, 7, 8]])] out: (None,) kwargs: {'axis': None}
# results 33
# Out[24]: 33
# In [25]: np.add.reduce(t, axis=None, keepdims=True)
#!!reduce reduce nin: 2 1 <ufunc 'add'> [array([[3, 4, 5],
# [6, 7, 8]])] out: (None,) kwargs: {'axis': None, 'keepdims': True}
# results [[33]]
# Out[25]: array([[33]])
# In [26]: np.add.reduce(t, axis=None, keepdims=True, dtype=np.float32)
#!!reduce reduce nin: 2 1 <ufunc 'add'> [array([[3, 4, 5],
# [6, 7, 8]])] out: (None,) kwargs: {'axis': None, 'keepdims': True, 'dtype': <class 'numpy.float32'>}
# results [[33.]]
# Out[26]: array([[33.]], dtype=float32)
# print("!!reduce ", method, 'nin:', ufunc.nin, ufunc.nout, ufunc, args, 'out:', outputs, 'kwargs:', kwargs,'ndim', args[0].ndim)
# resultN = super(FastArray, self).__array_ufunc__(ufunc, method,**kwargs)
# print("!!result numpy", resultN, type(resultN))
# NOTE:
# look for reduce logical_or
# look for reduce_logical_and (used with np.fmin for instance)
reduce_func = gReduceUFuncs.get(ufunc, None)
# check if we can proceed to calculate a faster way
if method == "__call__" and FastArray.FasterUFunc and not toplevel_abort:
# check for binary ufunc
if len(args) == 2 and ufunc.nout == 1:
###########################################################################
## BINARY
###########################################################################
array_types: List[np.dtype] = []
scalar_types: List[ScalarType] = []
scalars: int = 0
abort: int = 0
for arr in args:
arrType = type(arr)
if arrType in ScalarType:
scalars += 1
scalar_types.append(arrType)
else:
try:
array_types.append(arr.dtype)
# check for non contingous arrays
if arr.itemsize != arr.strides[0]:
abort = 1
except:
abort = 1
# can happen when None or a python list is passed
if FastArray.Verbose > 1:
print(f"**dont know how to handle array {arr} args: {args}")
if abort == 0:
if scalars < 2:
is_logical = 0
# check for add, sub, mul, divide, power
fast_function = gBinaryUFuncs.get(ufunc, None)
if fast_function is None:
# check for comparison and logical or/and functions
fast_function = gBinaryLogicalUFuncs.get(ufunc, None)
if fast_function is not None:
if FastArray.Verbose > 2:
print(f"**logical function called {ufunc} args: {args}")
is_logical = 1
final_dtype = np.bool_
if fast_function is None:
# check for bitwise functions? (test this)
fast_function = gBinaryBitwiseUFuncs.get(ufunc, None)
if fast_function is not None:
if has_outputs and is_logical == 0:
# have to conform to output
final_dtype = out_args[0].dtype
else:
if is_logical == 1 and scalars == 1:
# NOTE: scalar upcast rules -- just apply to logicals so that arr < 5 does not upcast?
# or globally apply this rule so that arr = arr + 5
# if scalars == 1:
# special case have to see if scalar is in range
if type(args[0]) in ScalarType:
scalar_val = args[0]
else:
scalar_val = args[1]
final_dtype = logical_find_common_type(array_types, scalar_types, scalar_val)
else:
print
# TODO: check for bug where np.int32 type 7 gets flipped to np.int32 type 5
if scalars == 0 and len(array_types) == 2 and (array_types[0] == array_types[1]):
final_dtype = array_types[0]
else:
# check for int scalar against int
# bug where np.int8 and then add +1999 or larger number. need to upcast
if scalars == 1 and array_types[0].num <= 10:
if type(args[0]) in ScalarType:
scalar_val = args[0]
else:
scalar_val = args[1]
final_dtype = logical_find_common_type(
array_types, scalar_types, scalar_val
)
else:
final_dtype = np.find_common_type(array_types, scalar_types)
# if we are adding two strings or unicode, special case
# if we think the final dtype is an object, check if this is really two strings
if fast_function == MATH_OPERATION.ADD and (
array_types[0].num == 18 or array_types[0].num == 19
):
# assume addition of two strings
final_dtype = array_types[0]
if scalars != 0:
# we have a scalar... make sure we convert it
if type(args[0]) in ScalarType:
# fix scalar type make sure string or unicode
if array_types[0].num == 18:
args[0] = str.encode(str(args[0]))
if array_types[0].num == 19:
args[0] = str(args[0])
else:
if array_types[0].num == 18:
args[1] = str.encode(str(args[1]))
if array_types[0].num == 19:
args[1] = str(args[1])
else:
# we have two arrays, if one array is not proper string type, convert it
if array_types[1] != final_dtype:
if array_types[0].num == 18:
args[1] = args[1].astype("S")
if array_types[0].num == 19:
args[1] = args[1].astype("U")
if FastArray.Verbose > 2:
print("ADD string operation", array_types, scalar_types)
elif scalars == 0:
if array_types[0] != array_types[1]:
# UPCAST RULES
if array_types[0] == final_dtype and array_types[1] != final_dtype:
# print("!!!upcast rules second", array_types[0], array_types[1], final_dtype)
# convert to the proper type befor calculation
args[1] = _ASTYPE(args[1], final_dtype)
elif array_types[0] != final_dtype and array_types[1] == final_dtype:
# print("!!!upcast rules first", array_types[0], array_types[1], final_dtype)
# convert to the proper type befor calculation
args[0] = _ASTYPE(args[0], final_dtype)
else:
# sometimes both of them must be upcast...
# consider int8 * uint8 ==> will upcast to int16
# print("!!!cannot understand upcast rules", arraytypes[0], arraytypes[1], final_dtype)
args[0] = _ASTYPE(args[0], final_dtype)
args[1] = _ASTYPE(args[1], final_dtype)
# TJD check logic here... what does numpy when int* * uint8 ? speed test
##UseNumpy = True
else:
# UPCAST RULES when one is a scalar
if array_types[0] != final_dtype:
# which argument is the scalar? convert the other one
if type(args[0]) in ScalarType:
# print("converting arg2 from", args[1], final_dtype)
args[1] = _ASTYPE(args[1], final_dtype)
else:
# print("converting arg1 from ", args[0], final_dtype)
args[0] = _ASTYPE(args[0], final_dtype)
# not a binary ufunc, check for unary ufunc
# check for just 1 input (unary)
elif (ufunc.nin == 1) and (ufunc.nout == 1):
###########################################################################
## UNARY
###########################################################################
fast_function = gUnaryUFuncs.get(ufunc, None)
else:
if FastArray.Verbose > 1:
print("***unknown ufunc arg style: ", ufunc.nin, ufunc.nout, ufunc, args, kwargs)
# -------------------------------------------------------------------------------------------------------------
if not FastArray.FasterUFunc:
fast_function = None
reduce_func = None
# check for a reduce func like sum or min
if reduce_func is not None:
keepdims: bool = kwargs.get("keepdims", False)
if dtype is None:
dtype = args[0].dtype
# MathLedger
result = TypeRegister.MathLedger._REDUCE(args[0], reduce_func)
char = np.dtype(dtype).char
if FastArray.Verbose > 1:
print("***result from reduce", result, type(result), dtype, char)
if result is not None:
# print("reduce called", ufunc, keepdims, dtype)
if reduce_func in [REDUCE_FUNCTIONS.REDUCE_SUM, REDUCE_FUNCTIONS.REDUCE_NANSUM] and isinstance(
result, float
):
result = np.float64(result)
elif dtype != np.float32 and dtype != np.float64:
# preserve integers
if char in NumpyCharTypes.UnsignedInteger64:
# preserve high bit
result = np.uint64(result)
else:
result = np.int64(result)
else:
result = np.float64(result)
# MIN/MAX need to return same type
if reduce_func >= REDUCE_FUNCTIONS.REDUCE_MIN:
# min max not allowed on empty array per unit test
if len(args[0]) == 0:
raise ValueError("min/max arg is an empty sequence.")
# min/max/nanmin/nanmax -- same result
if dtype == np.bool_:
result = np.bool_(result)
else:
result = dtype.type(result)
if keepdims:
result = FastArray([result]).astype(dtype)
elif keepdims:
# force back into an array from scalar
result = FastArray([result])
# we did the reduce, now return the result
return result
# check for normal call function
elif fast_function is not None:
# Call the FastArray APIs instead of numpy
# callmode = 'f'
results = None
if ufunc.nin == 2:
final_num = -1
if final_dtype is not None:
if final_dtype == np.bool_:
final_num = 0
else:
final_num = final_dtype.num
# because scalars can be passed as np.int64(864000)
if type(args[0]) in gNumpyScalarType:
# print('converting arg1', args[0])
args[0] = np.asarray(args[0])
if type(args[1]) in gNumpyScalarType:
# print('converting arg2', args[1])
args[1] = np.asarray(args[1])
if FastArray.Verbose > 2:
print(
"*** binary think we can call",
fast_function,
ufunc.nin,
ufunc.nout,
"arg1",
args[0],
"arg2",
args[1],
"out",
out_args,
"final",
final_num,
)
if len(out_args) == 1:
results = TypeRegister.MathLedger._BASICMATH_TWO_INPUTS(
(args[0], args[1], out_args[0]), fast_function, final_num
)
else:
results = TypeRegister.MathLedger._BASICMATH_TWO_INPUTS(
(args[0], args[1]), fast_function, final_num
)
else:
# for conversion functions
# dtype=kwargs.get('dtype',None)
if FastArray.Verbose > 2:
print(
"*** unary think we can call",
fast_function,
ufunc.nin,
ufunc.nout,
"arg1",
args[0],
"out",
out_args,
)
if len(out_args) == 1:
results = TypeRegister.MathLedger._BASICMATH_ONE_INPUT((args[0], out_args[0]), fast_function, 0)
else:
results = TypeRegister.MathLedger._BASICMATH_ONE_INPUT((args[0]), fast_function, 0)
if results is not None and len(out_args) == 1:
# when the output argument is forced but we calculate it into another array we need to copy the result into the output
if not rc.CompareNumpyMemAddress(out_args[0], results):
if FastArray.Verbose > 2:
print(
"*** performing an extra copy to match output request",
id(out_args[0]),
id(results),
out_args[0],
results,
)
out_args[0][...] = results
results = out_args[0]
if results is None:
# punted
# callmode='p'
if FastArray.Verbose > 1:
print("***punted ufunc: ", ufunc.nin, ufunc.nout, ufunc, args, kwargs)
fast_function = None
# fall to "if fast_function is None" and run through numpy...
# respect dtype
elif dtype is not None and isinstance(results, np.ndarray):
if dtype is not results.dtype:
if FastArray.Verbose > 1:
print("***result from reduce", results, results.dtype, dtype)
# convert
results = results.astype(dtype)
if fast_function is None:
# Call the numpy APIs
# Check if we can use the recycled arrays to avoid an allocation for the output array
if FastArray.Verbose > 1:
print("**punted on numpy!", ufunc)
# NOTE: We are going to let numpy process it
# We must change all FastArrays to normal numpy arrays
args = []
for input in inputs:
# flip back to numpy to avoid errors when numpy calculates
if isinstance(input, FastArray):
args.append(input.view(np.ndarray))
else:
args.append(input)
if has_outputs:
outputs = kwargs.pop("out", None)
if outputs:
out_args = []
for output in outputs:
if isinstance(output, FastArray):
out_args.append(output.view(np.ndarray))
else:
out_args.append(output)
# replace out
kwargs["out"] = tuple(out_args)
# NOTE: If the specified ufunc + inputs combination isn't supported by numpy either,
# as of numpy 1.17.x this call will end up raising a UFuncTypeError so the rest
# of the FastArray.__array_ufunc__ body (below) won't end up executing.
results = TypeRegister.MathLedger._ARRAY_UFUNC(super(FastArray, self), ufunc, method, *args, **kwargs)
# If riptable has not implemented a certain ufunc (or doesn't support it for the given arguments),
# emit a warning about it to let the user know.
# When numpy does not support the ufunc+inputs either, we won't reach this point (as of numpy 1.17.x),
# since numpy will raise a UFuncTypeError earlier (before this point) rather than after we return NotImplemented.
if results is NotImplemented:
warnings.warn(f"***ufunc {ufunc} {args} {kwargs} is not implemented")
return NotImplemented
# Ufuncs also have a fifth method that allows in place operations to be performed using fancy indexing.
# No buffering is used on the dimensions where fancy indexing is used, so the fancy index can list an item more than once
# and the operation will be performed on the result of the previous operation for that item.
# ufunc.reduce(a[, axis, dtype, out, keepdims]) Reduces a's dimension by one, by applying ufunc along one axis.
# ufunc.accumulate(array[, axis, dtype, out]) Accumulate the result of applying the operator to all elements.
# ufunc.reduceat(a, indices[, axis, dtype, out]) Performs a (local) reduce with specified slices over a single axis.
# ufunc.outer(A, B) Apply the ufunc op to all pairs (a, b) with a in A and b in B.
# ufunc.at(a, indices[, b]) Performs unbuffered in place operation on operand 'a' for elements specified by 'indices'.
if method == "at":
return
if ufunc.nout == 1:
# check if we used our own output
# if isinstance(outArray, np.ndarray):
# return outArray.view(FastArray)
# if (final_dtype != None and final_dtype != results.dtype):
# print("****** mispredicted final", final_dtype, results.dtype, ufunc, scalartypes, args, outputs, kwargs);
# results = (results,)
if not isinstance(results, FastArray) and isinstance(results, np.ndarray):
return results.view(FastArray)
# think hit here for sum wihch does not return an array, just a number
return results
# more than one item, so we are making a tuple
# can result in __array_finalize__ being called
results = tuple(
(np.asarray(result).view(FastArray) if output is None else output)
for result, output in zip(results, outputs)
)
# check if we have a tuple of one item, if so just return the one item
if len(results) == 1:
results = results[0]
return results
@property
def numbastring(self):
"""
converts byte string and unicode strings to a 2dimensional array
so that numba can process it correctly
Examples
--------
>>> @numba.jit(nopython=True)
... def numba_str(txt):
... x=0
... for i in range(txt.shape[0]):
... if (txt[i,0]==116 and # 't'
... txt[i,1]==101 and # 'e'
... txt[i,2]==120 and # 'x'
... txt[i,3]==116): # 't'
... x += 1
... return x
>>>
>>> x=FastArray(['some','text','this','is'])
>>> numba_str(x.view(np.uint8).reshape((len(x), x.itemsize)))
>>> numba_str(x.numbastring)
"""
intype = self.dtype.__str__()
if intype[0] == "|" or intype[0] == "<":
if intype[1] == "S":
return self.view(np.uint8).reshape((len(self), self.itemsize))
if intype[1] == "U":
return self.view(np.uint32).reshape((len(self), self.itemsize // 4))
return self
# -----------------------------------------------------------
[docs]
def apply_numba(self, *args, otype=None, myfunc="myfunc", name=None):
"""
Print to screen an example numba signature for the array.
You can then copy this example to build your own numba function.
Parameters
----------
*args:
Test arguments
otype: str, default None
A different output data type
myfunc: str, default 'myfunc'
A string to call the function
name: str, default None
A string to name the array
Examples
--------
>>> import numba
>>> @numba.guvectorize(['void(int64[:], int64[:])'], '(n)->(n)')
... def squarev(x,out):
... for i in range(len(x)):
... out[i]=x[i]**2
...
>>> a=arange(1_000_000).astype(np.int64)
>>> squarev(a)
FastArray([ 0, 1, 4, ..., 999994000009,
999996000004, 999998000001], dtype=int64)
"""
if name is None:
# try first to get the name
name = self.get_name()
if name is None:
name = "a"
intype = self.dtype.__str__()
if otype is None:
outtype = self.dtype.__str__()
else:
outtype = np.dtype(otype).__str__()
# TODO: what if unicode or string? .frombuffer/.view(np.uint8)
preamble = "import numba\n@numba.guvectorize([\n"
middle = f"'void({intype}[:], {outtype}[:])', # <-- can stack multiple different dtypes x.view(np.uint8).reshape(-1, x.itemsize)\n"
postamble = " ], '(n)->(n)', target='cpu')\n"
code = f"def {myfunc}(data_in, data_out):\n for i in range(len(data_in)):\n data_out[i]=data_in[i] #<-- put your code here\n"
exec = preamble + middle + postamble + code
print("Copy the code snippet below and rename myfunc")
print("---------------------------------------------")
print(exec)
print("---------------------------------------------")
if intype[0] == "|" or intype[0] == "<":
if intype[1] == "S":
print(
f"Then call {myfunc}({name}.numbastring,empty_like({name}).numbastring) where {name} is the input array"
)
elif intype[1] == "U":
print(
f"Then call {myfunc}({name}.numbastring,empty_like({name}).numbastring) where {name} is the input array"
)
else:
print(f"Then call {myfunc}({name},empty_like({name})) where {name} is the input array")
# return exec
[docs]
def apply(self, pyfunc, *args, otypes=None, doc=None, excluded=None, cache=False, signature=None):
"""
Generalized function class. see: np.vectorize
Creates and then applies a vectorized function which takes a nested sequence of objects or
numpy arrays as inputs and returns an single or tuple of numpy array as
output. The vectorized function evaluates `pyfunc` over successive tuples
of the input arrays like the python map function, except it uses the
broadcasting rules of numpy.
The data type of the output of `vectorized` is determined by calling
the function with the first element of the input. This can be avoided
by specifying the `otypes` argument.
Parameters
----------
pyfunc : callable
A python function or method.
otypes : str or list of dtypes, optional
The output data type. It must be specified as either a string of
typecode characters or a list of data type specifiers. There should
be one data type specifier for each output.
doc : str, optional
The docstring for the function. If `None`, the docstring will be the
``pyfunc.__doc__``.
excluded : set, optional
Set of strings or integers representing the positional or keyword
arguments for which the function will not be vectorized. These will be
passed directly to `pyfunc` unmodified.
.. versionadded:: 1.7.0
cache : bool, optional
If `True`, then cache the first function call that determines the number
of outputs if `otypes` is not provided.
.. versionadded:: 1.7.0
signature : string, optional
Generalized universal function signature, e.g., ``(m,n),(n)->(m)`` for
vectorized matrix-vector multiplication. If provided, ``pyfunc`` will
be called with (and expected to return) arrays with shapes given by the
size of corresponding core dimensions. By default, ``pyfunc`` is
assumed to take scalars as input and output.
.. versionadded:: 1.12.0
Returns
-------
vectorized : callable
Vectorized function.
See Also
--------
FastArray.apply_numba
FastArray.apply_pandas
Examples
--------
>>> def myfunc(a, b):
... "Return a-b if a>b, otherwise return a+b"
... if a > b:
... return a - b
... else:
... return a + b
>>>
>>> a=arange(10)
>>> b=arange(10)+1
>>> a.apply(myfunc,b)
FastArray([ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19])
Example with one input array
>>> def square(x):
... return x**2
>>>
>>> a=arange(10)
>>> a.apply(square)
FastArray([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81])
Example with lambda
>>> a=arange(10)
>>> a.apply(lambda x: x**2)
FastArray([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81])
Example with numba
>>> from numba import jit
>>> @jit
... def squareit(x):
... return x**2
>>> a.apply(squareit)
FastArray([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81])
Examples to use existing builtin oct function but change the output from string, to unicode, to object
>>> a=arange(10)
>>> a.apply(oct, otypes=['S'])
FastArray([b'0o0', b'0o1', b'0o2', b'0o3', b'0o4', b'0o5', b'0o6', b'0o7', b'0o10', b'0o11'], dtype='|S4')
>>> a=arange(10)
>>> a.apply(oct, otypes=['U'])
FastArray(['0o0', '0o1', '0o2', '0o3', '0o4', '0o5', '0o6', '0o7', '0o10', '0o11'], dtype='<U4')
>>> a=arange(10)
>>> a.apply(oct, otypes=['O'])
FastArray(['0o0', '0o1', '0o2', '0o3', '0o4', '0o5', '0o6', '0o7', '0o10', '0o11'], dtype=object)
"""
vfunc = np.vectorize(pyfunc, otypes=otypes, doc=doc, excluded=excluded, cache=cache, signature=signature)
result = vfunc(self, *args)
return result
# -----------------------------------------------------------
[docs]
def apply_pandas(self, func, convert_dtype=True, args=(), **kwds):
"""
Invoke function on values of FastArray. Can be ufunc (a NumPy function
that applies to the entire FastArray) or a Python function that only works
on single values
Parameters
----------
func : function
convert_dtype : boolean, default True
Try to find better dtype for elementwise function results. If
False, leave as dtype=object
args : tuple
Positional arguments to pass to function in addition to the value
Additional keyword arguments will be passed as keywords to the function
Returns
-------
y : FastArray or Dataset if func returns a FastArray
See Also
--------
FastArray.map: For element-wise operations
FastArray.agg: only perform aggregating type operations
FastArray.transform: only perform transforming type operations
Examples
--------
Create a FastArray with typical summer temperatures for each city.
>>> fa = rt.FastArray([20, 21, 12], index=['London', 'New York','Helsinki'])
>>> fa
London 20
New York 21
Helsinki 12
dtype: int64
Square the values by defining a function and passing it as an
argument to ``apply()``.
>>> def square(x):
... return x**2
>>> fa.apply(square)
London 400
New York 441
Helsinki 144
dtype: int64
Square the values by passing an anonymous function as an
argument to ``apply()``.
>>> fa.apply(lambda x: x**2)
London 400
New York 441
Helsinki 144
dtype: int64
Define a custom function that needs additional positional
arguments and pass these additional arguments using the
``args`` keyword.
>>> def subtract_custom_value(x, custom_value):
... return x-custom_value
>>> fa.apply(subtract_custom_value, args=(5,))
London 15
New York 16
Helsinki 7
dtype: int64
Define a custom function that takes keyword arguments
and pass these arguments to ``apply``.
>>> def add_custom_values(x, **kwargs):
... for month in kwargs:
... x+=kwargs[month]
... return x
>>> fa.apply(add_custom_values, june=30, july=20, august=25)
London 95
New York 96
Helsinki 87
dtype: int64
Use a function from the Numpy library.
>>> fa.apply(np.log)
London 2.995732
New York 3.044522
Helsinki 2.484907
dtype: float64
"""
import pandas as pd
series = pd.Series(self)
result = series.apply(func, convert_dtype=convert_dtype, args=args, **kwds)
return result.values
# -----------------------------------------------------------
@cached_weakref_property
def str(self) -> "FAString":
r"""Casts an array of byte strings or unicode as ``FAString``.
Enables a variety of useful string manipulation methods.
Returns
-------
FAString
Raises
------
TypeError
If the FastArray is of dtype other than byte string or unicode
See Also
--------
np.chararray
np.char
rt.FAString.apply
Examples
--------
>>> s=rt.FA(['this','that','test ']*100_000)
>>> s.str.upper
FastArray([b'THIS', b'THAT', b'TEST ', ..., b'THIS', b'THAT', b'TEST '],
dtype='|S5')
>>> s.str.lower
FastArray([b'this', b'that', b'test ', ..., b'this', b'that', b'test '],
dtype='|S5')
>>> s.str.removetrailing()
FastArray([b'this', b'that', b'test', ..., b'this', b'that', b'test'],
dtype='|S5')
"""
if self.dtype.char in "US":
return TypeRegister.FAString(self)
if self.dtype.char == "O":
# try to convert to string (might have come from pandas)
try:
conv = self.astype("S")
except:
conv = self.astype("U")
return TypeRegister.FAString(conv)
raise TypeError(f"The .str function can only be used on byte string and unicode not {self.dtype!r}")
[docs]
@staticmethod
def from_arrow(
arr: Union["pa.Array", "pa.ChunkedArray"],
zero_copy_only: bool = True,
writable: bool = False,
auto_widen: bool = False,
) -> "FastArray":
"""
Convert a pyarrow `Array` to a riptable `FastArray`.
Parameters
----------
arr : pyarrow.Array or pyarrow.ChunkedArray
zero_copy_only : bool, default True
If True, an exception will be raised if the conversion to a `FastArray` would require copying the
underlying data (e.g. in presence of nulls, or for non-primitive types).
writable : bool, default False
For a `FastArray` created with zero copy (view on the Arrow data), the resulting array is not writable (Arrow data is immutable).
By setting this to True, a copy of the array is made to ensure it is writable.
auto_widen : bool, optional, default to False
When False (the default), if an arrow array contains a value which would be considered
the 'invalid'/NA value for the equivalent dtype in a `FastArray`, raise an exception
because direct conversion would be lossy / change the semantic meaning of the data.
When True, the converted array will be widened (if possible) to the next-largest dtype
to ensure the data will be interpreted in the same way.
Returns
-------
FastArray
"""
import pyarrow.types as pat
# Based on the type of the array, dispatch to type-specific implementations of .from_arrow().
pa_arr_type = arr.type
if (
pat.is_boolean(pa_arr_type)
or pat.is_integer(pa_arr_type)
or pat.is_floating(pa_arr_type)
or pat.is_string(pa_arr_type)
or pat.is_binary(pa_arr_type)
or pat.is_fixed_size_binary(pa_arr_type)
):
# TODO: Check whether this column has a user-specified fill value provided; if so, pass it along to
# the FastArray.from_arrow() method call below.
return FastArray._from_arrow(arr, zero_copy_only=zero_copy_only, writable=writable, auto_widen=auto_widen)
elif pat.is_dictionary(pa_arr_type) or pat.is_struct(pa_arr_type):
return TypeRegister.Categorical._from_arrow(arr, zero_copy_only=zero_copy_only, writable=writable)
elif pat.is_timestamp(pa_arr_type):
return TypeRegister.DateTimeNano._from_arrow(arr, zero_copy_only=zero_copy_only, writable=writable)
elif pat.is_date(pa_arr_type):
return TypeRegister.Date._from_arrow(arr, zero_copy_only=zero_copy_only, writable=writable)
elif pat.is_duration(pa_arr_type):
return TypeRegister.TimeSpan._from_arrow(arr, zero_copy_only=zero_copy_only, writable=writable)
else:
# Unknown/unsupported array type -- can't convert.
raise NotImplementedError(f"pyarrow arrays of type '{pa_arr_type}' can't be converted to riptable arrays.")
[docs]
@staticmethod
def _from_arrow(
arr: Union["pa.Array", "pa.ChunkedArray"],
zero_copy_only: bool = True,
writable: bool = False,
auto_widen: bool = False,
) -> "FastArray":
"""
Convert a pyarrow `Array` to a riptable `FastArray`.
Parameters
----------
arr : pyarrow.Array or pyarrow.ChunkedArray
zero_copy_only : bool, default True
If True, an exception will be raised if the conversion to a `FastArray` would require copying the
underlying data (e.g. in presence of nulls, or for non-primitive types).
writable : bool, default False
For a `FastArray` created with zero copy (view on the Arrow data), the resulting array is not writable (Arrow data is immutable).
By setting this to True, a copy of the array is made to ensure it is writable.
auto_widen : bool, optional, default to False
When False (the default), if an arrow array contains a value which would be considered
the 'invalid'/NA value for the equivalent dtype in a `FastArray`, raise an exception
because direct conversion would be lossy / change the semantic meaning of the data.
When True, the converted array will be widened (if possible) to the next-largest dtype
to ensure the data will be interpreted in the same way.
Returns
-------
FastArray
"""
import pyarrow as pa
import pyarrow.compute as pc
import pyarrow.types as pat
# Make sure the input array is one of the pyarrow array types.
if not isinstance(arr, (pa.Array, pa.ChunkedArray)):
raise TypeError("The array is not an instance of `pyarrow.Array` or `pyarrow.ChunkedArray`.")
# ChunkedArrays need special handling.
if isinstance(arr, pa.ChunkedArray):
# A single-chunk ChunkedArray can be handled by just extracting that chunk
# and recursively processing it.
if arr.num_chunks == 1:
return FastArray._from_arrow(
arr.chunk(0), zero_copy_only=zero_copy_only, writable=writable, auto_widen=auto_widen
)
else:
# TODO: Benchmark this vs. using ChunkedArray.combine_chunks() then converting.
# TODO: Look at `zero_copy_only` and `writable` -- the converted arrays could be destroyed while hstacking
# since we know they'll have just been created; this could reduce peak memory utilization.
return hstack(
[
FastArray._from_arrow(
arr_chunk, zero_copy_only=zero_copy_only, writable=writable, auto_widen=auto_widen
)
for arr_chunk in arr.iterchunks()
]
)
# Handle based on the type of the input array.
if pat.is_integer(arr.type):
# For arrays of primitive types, pa.DataType.to_pandas_dtype() actually returns the equivalent numpy dtype.
arr_dtype = arr.type.to_pandas_dtype()
# Get the riptable invalid value for this array type.
arr_rt_inv = INVALID_DICT[np.dtype(arr_dtype).num]
# Get min and max value of the input array, so we know if we need to promote
# to the next-largest dtype to be able to correctly represent nulls.
# This must be done even if the input array has no nulls, because otherwise the non-null
# values in the input corresponding to riptable integer invalids would then be recognized
# as such after conversion.
min_max_result: pa.StructScalar = pc.min_max(arr)
min_value = min_max_result["min"]
max_value = min_max_result["max"]
arr_pa_dtype_widened: Optional[pa.DataType] = None
if min_value == arr_rt_inv or max_value == arr_rt_inv:
# If the input array holds 64-bit integers (signed or unsigned), we can't do a lossless conversion,
# since there is no wider integer available.
if zero_copy_only:
raise ValueError(
"Cannot perform a zero-copy conversion of an arrow array containing the riptable invalid value for the array dtype."
)
elif arr_dtype.itemsize == 8:
raise ValueError(
"Cannot losslessly convert an arrow array of (u)int64 containing the riptable invalid value to a riptable array."
)
elif not auto_widen:
raise ValueError(
"Input array requires widening for lossless conversion. Specify auto_widen=True if you want to allow the widening conversion (which requires an array copy)."
)
else:
# Widen the dtype of the output array.
output_dtype = np.min_scalar_type(2 * arr_rt_inv)
arr_pa_dtype_widened = pa.from_numpy_dtype(output_dtype)
# Create the output array, performing a widening conversion + filling in nulls with the riptable invalid if necessary.
# TODO: This could be faster -- if there's a way to get a numpy boolean array from a pyarrow array's null-mask,
# we can convert directly to numpy/riptable; then, widen the FastArray (which'll be parallelized);
# then use rt.copy_to() / rt.putmask() to overwrite the elements of the widened FastArray
# corresponding to the nulls from the mask with the riptable invalid value for the output array type.
if arr_pa_dtype_widened is not None:
arr: pa.Array = arr.cast(arr_pa_dtype_widened)
return arr.fill_null(arr_rt_inv).to_numpy(zero_copy_only=False, writable=writable).view(FastArray)
elif pat.is_floating(arr.type):
# Floating-point arrays can be converted directly to numpy, since pyarrow will automatically
# fill null values with NaN.
return arr.to_numpy(zero_copy_only=zero_copy_only, writable=writable).view(FastArray)
elif pat.is_boolean(arr.type):
# Boolean arrays can only be converted when they do not contain nulls.
# riptable does not support an 'invalid'/NA value for boolean, so pyarrow arrays
# with nulls can't be represented in riptable.
if arr.null_count == 0:
return arr.to_numpy(zero_copy_only=zero_copy_only, writable=writable).view(FastArray)
else:
raise ValueError(
"riptable boolean arrays do not support an invalid value, so they cannot be created from pyarrow arrays containing nulls."
)
elif pat.is_string(arr.type) or pat.is_large_string(arr.type):
# pyarrow variable-length string arrays can _never_ be zero-copy converted to fixed-length numpy/riptable arrays
# because of differences in the memory layout.
if zero_copy_only:
raise ValueError(
"pyarrow variable-length string arrays cannot be zero-copy converted to riptable arrays."
)
# Check for whether the array contains only ASCII strings.
# This is used to guide how the FastArray is created.
has_unicode = not pc.all(pc.string_is_ascii(arr))
# Convert the array to a numpy array.
# Unfortunately, as of pyarrow 4.0, this conversion always produces a numpy object array containing the
# strings (as Python strings) rather than a numpy string array.
# We're able to handle this to return the sensible thing for riptable users, but it does mean this conversion
# is slower than necessary right now.
# TODO: Ask pyarrow-dev about implementing an option to return a numpy 'S' or 'U' array instead, it'll be
# much more efficient, even though some space will be wasted due to numpy not supporting variable-length strings.
# TODO: Consider converting the pyarrow array to a dictionary-encoded array -- if there are only a few uniques,
# it'll be more efficient (even though doing more work) by avoiding repetitive creation of the Python string objects.
if arr.null_count == 0:
tmp = arr.to_numpy(zero_copy_only=False, writable=writable)
else:
# Need to fill nulls with an empty string before converting to numpy.
# (INVALID_DICT[np.dtype('U').num] == '').
tmp = arr.fill_null("").to_numpy(zero_copy_only=False, writable=writable)
result = FastArray(tmp, dtype=str, unicode=has_unicode)
if not writable:
result.flags.writeable = False
return result
elif pat.is_fixed_size_binary(arr.type):
null_count = arr.null_count
if null_count != 0:
if zero_copy_only:
raise ValueError(
"Can't perform a zero-copy conversion of a fixed-size binary array to riptable when the input array contains nulls."
)
arr = arr.fill_null(
b"\x00" * arr.type.byte_width
) # can't fill with b"", since b"" is not valid for fixed width type
# Calling pa.Array.to_numpy with zero_copy=True raises an error with fixed sized binary type.
# Calling pa.Array.to_numpy with zero_copy=False returns a numpy array where types are python bytes objects.
# Workaround below creates the numpy buffer of type "S" manually.
buf = np.frombuffer(
arr.buffers()[1],
dtype="S" + str(arr.type.byte_width),
)
if writable and null_count == 0: # already made a copy if null_count != 0
result = FastArray(np.copy(buf))
result.flags.writeable = writable
return result
result = FastArray(buf)
result.flags.writeable = writable
return result
else:
raise ValueError(
f"FastArray cannot be created from a pyarrow array of type '{arr.type}'. You may need to call the `from_arrow` method on one of the derived subclasses instead."
)
[docs]
def to_arrow(
self,
type: Optional["pa.DataType"] = None,
*,
preserve_fixed_bytes: bool = False,
empty_strings_to_null: bool = True,
) -> Union["pa.Array", "pa.ChunkedArray"]:
"""
Convert this `FastArray` to a `pyarrow.Array`.
Parameters
----------
type : pyarrow.DataType, optional, defaults to None
preserve_fixed_bytes : bool, optional, defaults to False
If this `FastArray` is an ASCII string array (dtype.kind == 'S'),
set this parameter to True to produce a fixed-length binary array
instead of a variable-length string array.
empty_strings_to_null : bool, optional, defaults To True
If this `FastArray` is an ASCII or Unicode string array,
specify True for this parameter to convert empty strings to nulls in the output.
riptable inconsistently recognizes the empty string as an 'invalid',
so this parameter allows the caller to specify which interpretation
they want.
Returns
-------
pyarrow.Array or pyarrow.ChunkedArray
Notes
-----
TODO: Add bool parameter which directs the conversion to choose the most-compact output type possible?
This would be relevant to indices of categorical/dictionary-encoded arrays, but could also make sense
for regular FastArray types (e.g. to use an int8 instead of an int32 when it'd be a lossless conversion).
"""
import builtins
import pyarrow as pa
# Derived array types MUST implement their own overload of this function
# for correctness; for that reason, raise an error if someone attempts to
# call *this* implementation of the method for a derived array type.
if builtins.type(self) != FastArray:
raise NotImplementedError(
f"The `{builtins.type(self).__qualname__}` type must implement it's own override of the `to_arrow()` method."
)
# riptable (at least as of 1.0.56) does not *truly* support invalid/NA values
# in bool or ascii/unicode string-typed arrays. Handle those dtypes specially.
if np.issubdtype(self.dtype, np.integer):
# TODO: If this array has .ndims >= 2, need to convert to pyarrow using pa.Tensor.from_numpy(...). That doesn't handle masks as of pyarrow 4.0.
# Get a mask of invalids.
invalids_mask = self.isnan()
# If all values are valid, don't bother creating an all-False mask, it's just wasting memory.
if not invalids_mask.any():
invalids_mask = None
# Create the pyarrow array from it + this array.
return pa.array(self._np, mask=invalids_mask, type=type)
elif np.issubdtype(self.dtype, np.floating):
# Using floating-point NaN to signal both NaN and NA/null in riptable means we
# need to make a decision here on whether to mark those values as NA/null values
# in the returned pyarrow array.
# For now, we don't -- we just pass the data along directly, so the caller can decide
# on whether they want to handle that. (Ideally, we'd have a way to parameterize this
# but the protocol doesn't support it as of pyarrow 4.0. In any case, only the bitmask
# needs to be re-created later if the user wants to consider the NaNs as NA/null values.)
if self.ndim >= 2:
# NOTE: As of pyarrow 4.0, this method doesn't support a `type` argument.
return pa.Tensor.from_numpy(self._np)
else:
return pa.array(self._np, type=type)
elif np.issubdtype(self.dtype, bool):
if self.ndim >= 2:
# NOTE: As of pyarrow 4.0, this method doesn't support a `type` argument.
return pa.Tensor.from_numpy(self._np)
else:
return pa.array(self._np, type=type)
elif np.issubdtype(self.dtype, bytes):
# If the caller wants to convert empty strings to nulls, get a mask of invalids.
if empty_strings_to_null:
invalids_mask = self == self.inv
# If all values are valid, don't bother creating an all-False mask, it's just wasting memory.
if not invalids_mask.any():
invalids_mask = None
else:
invalids_mask = None
# Does the caller want to preserve the fixed-length binary data?
if preserve_fixed_bytes:
# Convert to a fixed-length binary ('bytes') type.
element_str_length = np.dtype(self.dtype).itemsize
arr_type = pa.binary(element_str_length) if type is None else type
return pa.array(self._np, mask=invalids_mask, type=arr_type)
else:
# Convert this array to a pyarrow variable-length string array.
if type is None:
type = pa.string()
# Convert as Unicode ndarray to stringarray, as bytestring does not preserve element length (riptable#249)
return pa.array(np.array(self._np, dtype="U"), mask=invalids_mask, type=type)
elif np.issubdtype(self.dtype, str):
# If the caller wants to convert empty strings to nulls, get a mask of invalids.
if empty_strings_to_null:
invalids_mask = self == self.inv
# If all values are valid, don't bother creating an all-False mask, it's just wasting memory.
if not invalids_mask.any():
invalids_mask = None
else:
invalids_mask = None
# pyarrow (as of v4.0) does not have a fixed-size Unicode string data type, so unlike the 'bytes'
# handling above for ASCII strings, we have to use the variable-length string array type.
if type is None:
type = pa.string()
return pa.array(self._np, mask=invalids_mask, type=type)
else:
raise NotImplementedError(f"FastArray with dtype '{np.dtype(self.dtype)}' is not supported.")
[docs]
def __arrow_array__(self, type: Optional["pa.DataType"] = None) -> Union["pa.Array", "pa.ChunkedArray"]:
"""
Implementation of the ``__arrow_array__`` protocol for conversion to a pyarrow array.
Parameters
----------
type : pyarrow.DataType, optional, defaults to None
Returns
-------
pyarrow.Array or pyarrow.ChunkedArray
Notes
-----
https://arrow.apache.org/docs/python/extending_types.html#controlling-conversion-to-pyarrow-array-with-the-arrow-array-protocol
"""
return self.to_arrow(type=type, preserve_fixed_bytes=False, empty_strings_to_null=True)
# -----------------------------------------------------------
[docs]
@classmethod
def register_function(cls, name, func):
"""
Used to register functions to FastArray.
Used by rt_fastarraynumba
"""
setattr(cls, name, func)
[docs]
def apply_schema(self, schema):
"""
Apply a schema containing descriptive information to the FastArray
:param schema: dict
:return: dictionary of deviations from the schema
"""
from .rt_meta import apply_schema as _apply_schema
return _apply_schema(self, schema)
[docs]
def info(self, **kwargs):
"""
Return a description of the input array's contents.
This information is set using `FastArray.apply_schema` and includes the steward
and dtype.
Parameters
----------
**kwargs : optional
Keyword arguments passed to :func:`.rt_meta.info`.
Returns
-------
:class:`.rt_meta.Info`
A description of the input array's contents.
See Also
--------
FastArray.doc : Return the `.Doc` object for the input `FastArray`.
.Categorical.info : Display a description of the input `.Categorical`.
.Struct.info :
Return an object containing a description of the input structure's contents.
Examples
--------
Return the description of the input array's contents:
>>> a = rt.FA([1, 2, 3, 4, 5])
>>> a.info()
Description: <no description>
Steward: <no steward>
Type: int32
Apply a schema and return the description of the input array's contents:
>>> schema = {"Description": "This is an array", "Steward": "Brian"}
>>> a.apply_schema(schema)
{}
>>> a.info()
Description: This is an array
Steward: Brian
Type: int32
Return the description of the input array's contents with a title:
>>> a.info(title="Test")
Test
====
Description: This is an array
Steward: Brian
Type: int32
"""
from .rt_meta import info as _info
return _info(self, **kwargs)
@property
def doc(self):
"""
Return the `.Doc` object for the input `FastArray`.
If no `.Doc` object exists, return `None`.
Returns
-------
`~riptable.rt_meta.Doc`
The `.Doc` object for the input `FastArray`. If no `.Doc` object
exists, return `None`.
See Also
--------
FastArray.info : Return a description of the input array's contents.
~riptable.rt_meta.apply_schema : Set `.Doc` object values.
Examples
--------
No `.Doc` object exists:
>>> a = rt.FA([1, 2, 3, 4, 5])
>>> print(a.doc)
None
Apply a schema and return the `.Doc` object:
>>> schema = {"Description": "This is an array", "Steward": "Brian", "Type": "int32"}
>>> a.apply_schema(schema)
{}
>>> a.doc
Description: This is an array
Steward: Brian
Type: int32
Return specific `.Doc` object information:
>>> a.doc._type
'int32'
>>> a.doc._descrip
'This is an array'
>>> a.doc._steward
'Brian'
>>> print(a.doc._detail)
None
"""
from .rt_meta import doc as _doc
return _doc(self)
# ====================== END OF CLASS DEFINITION ===============================
# -----------------------------------------------------------
def _setfastarrayview(arr):
"""
Call from CPP into python to flip array view
"""
if isinstance(arr, FastArray):
if FastArray.Verbose > 2:
print("no need to setfastarrayview", arr.dtype, len(arr))
return arr
if FastArray.Verbose > 2:
print("setfastarrayview", arr.dtype, len(arr))
return arr.view(FastArray)
# -----------------------------------------------------------
def _setfastarraytype():
# -----------------------------------------------------------
# calling this function will force fm to return FastArray subclass
# rc.BasicMathHook(FastArray, np.ndarray)
# Coming next build
fa = np.arange(1).view(FastArray)
rc.SetFastArrayType(fa, _setfastarrayview)
rc.BasicMathHook(fa, fa._np)
# -----------------------------------------------------------
def _FixupDocStrings():
"""
Load all the member function of this module
Load all the member functions of the np module
If we find a match, copy over the doc strings
"""
import inspect
import sys
mymodule = sys.modules[__name__]
all_myfunctions = inspect.getmembers(FastArray, inspect.isfunction)
try:
# bottleneck is optional
all_bnfunctions = inspect.getmembers(bn, inspect.isfunction)
all_bnfunctions += inspect.getmembers(bn, inspect.isbuiltin)
# build dictionary of bottleneck docs
bndict = {}
for funcs in all_bnfunctions:
bndict[funcs[0]] = funcs[1]
# now for each function that has an bn flavor, copy over the doc strings
for funcs in all_myfunctions:
if (funcs[0] in bndict) and (funcs[1].__doc__ is None):
funcs[1].__doc__ = bndict[funcs[0]].__doc__
except Exception:
pass
all_npfunctions = [func for func in inspect.getmembers(np.ndarray) if not func[0].startswith("_")]
# build dictionary of np.ndarray docs
npdict = {}
for funcs in all_npfunctions:
npdict[funcs[0]] = funcs[1]
# now for each function that has an np flavor, copy over the doc strings
for funcs in all_myfunctions:
if (funcs[0] in npdict) and (funcs[1].__doc__ is None):
funcs[1].__doc__ = npdict[funcs[0]].__doc__
# now do just plain np
all_npfunctions = [func for func in inspect.getmembers(np) if "__" not in funcs[0]]
# build dictionary of np docs
npdict = {}
for funcs in all_npfunctions:
# print("getting doc string for ", funcs[0])
npdict[funcs[0]] = funcs[1]
# now for each function that has an np flavor, copy over the doc strings
for funcs in all_myfunctions:
if (funcs[0] in npdict) and (funcs[1].__doc__ is None):
funcs[1].__doc__ = npdict[funcs[0]].__doc__
# ----------------------------------------------------------
[docs]
class Threading:
[docs]
@staticmethod
def on():
"""
Turn riptable threading on.
Used only when riptable threading was turned off.
Example
-------
a=rt.arange(1_000_00)
Threading.off()
%time a+=1
Threading.on()
%time a+=1
Returns
-------
Previously whether threading was on or not. 0 or 1. 0=threading was off before.
"""
return FastArray._TON()
[docs]
@staticmethod
def off():
"""
Turn riptable threading off.
Useful for when the system has other processes using other threads
or to limit threading resources.
Example
-------
a=rt.arange(1_000_00)
Threading.off()
%time a+=1
Threading.on()
%time a+=1
Returns
-------
Previously whether threading was on or not. 0 or 1. 0=threading was off before.
"""
return FastArray._TOFF()
[docs]
@staticmethod
def threads(threadcount):
"""
Set how many worker threads riptable can use.
Often defaults to 12 and cannot be set below 1 or > 31.
To turn riptable threading off completely use Threading.off()
Useful for when the system has other processes using other threads
or to limit threading resources.
Example
-------
Threading.threads(8)
Returns
-------
number of threads previously used
"""
return rc.SetThreadWakeUp(threadcount)
# ----------------------------------------------------------
[docs]
class Recycle:
[docs]
@staticmethod
def on():
"""
Turn riptable recycling on.
Used only when riptable recycling was turned off.
Example
-------
a=arange(1_000_00)
Recycle.off()
%timeit a=a + 1
Recycle.on()
%timeit a=a + 1
"""
return FastArray._RON()
[docs]
@staticmethod
def off():
return FastArray._ROFF()
[docs]
@staticmethod
def now(timeout: int = 0):
"""
Pass the garbage collector timeout value to cleanup.
Also calls the python garbage collector.
Parameters
----------
timeout: default to 0. 0 will not set a timeout
Returns
-------
total arrays deleted
"""
import gc
gc.collect()
result = rc.RecycleGarbageCollectNow(timeout)["TotalDeleted"]
if result > 0:
rc.RecycleGarbageCollectNow(timeout)
return result
[docs]
@staticmethod
def timeout(timeout: int = 100):
"""
Pass the garbage collector timeout value to expire.
The timeout value is roughly in 2/5 secs.
A value of 100 is usually about 40 seconds.
If an array has not been reused by the timeout, it is permanently deleted.
Returns
-------
previous timespan
"""
return rc.RecycleSetGarbageCollectTimeout(timeout)
# ----------------------------------------------------------
[docs]
class Ledger:
[docs]
@staticmethod
def on():
"""Turn the math ledger on to record all array math routines"""
return TypeRegister.MathLedger._LedgerOn()
[docs]
@staticmethod
def off():
"""Turn the math ledger off"""
return TypeRegister.MathLedger._LedgerOff()
[docs]
@staticmethod
def dump(dataset=True):
"""Print out the math ledger"""
return TypeRegister.MathLedger._LedgerDump(dataset=dataset)
[docs]
@staticmethod
def to_file(filename):
"""Save the math ledger to a file"""
return TypeRegister.MathLedger._LedgerDumpFile(filename)
[docs]
@staticmethod
def clear():
"""Clear all the entries in the math ledger"""
return TypeRegister.MathLedger._LedgerClear()
# ----------------------------------------------------------
# this is called when the module is loaded
_FixupDocStrings()
# NOTE: Keep this at the end of the file
# -----------------------------------------------------------
# calling this function will force fm to return FastArray subclass
_setfastarraytype()
TypeRegister.FastArray = FastArray
FastArray.register_function("describe", describe)