Source code for riptable.rt_enum

__all__ = [
    "DATETIME_TYPES",
    "DS_DISPLAY_TYPES",
    "GB_FUNCTIONS",
    "MATH_OPERATION",
    "INVALID_DICT",
    "TIMEWINDOW_FUNCTIONS",
    "NumpyCharTypes",
    "REDUCE_FUNCTIONS",
    "ROLLING_FUNCTIONS",
    "SD_TYPES",
    "SM_DTYPES",
    "TypeRegister",
    "DisplayJustification",
    "DisplayColumnColors",
    "DisplayArrayTypes",
    "DisplayDetectModes",
    "DisplayLength",
    "ColHeader",
]


import sys
from collections import namedtuple
from enum import IntEnum
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    ClassVar,
    List,
    Mapping,
    Optional,
    Tuple,
    Type,
)

import numpy as np

if TYPE_CHECKING:
    from .rt_accum2 import Accum2
    from .rt_categorical import Categorical
    from .rt_dataset import Dataset
    from .rt_datetime import (
        Date,
        DateBase,
        DateSpan,
        DateTimeBase,
        DateTimeNano,
        TimeSpan,
    )
    from .rt_display import (
        DisplayAttributes,
        DisplayDetect,
        DisplayOptions,
        DisplayString,
        DisplayTable,
        DisplayText,
    )
    from .rt_fastarray import FastArray
    from .rt_groupby import GroupBy
    from .rt_grouping import Grouping
    from .rt_ledger import MathLedger
    from .rt_multiset import Multiset
    from .rt_pdataset import PDataset
    from .rt_sharedmemory import SharedMemory
    from .rt_sort_cache import SortCache
    from .rt_struct import Struct
    from .rt_timezone import Calendar, TimeZone


# common strings used to indicate special columns or items
INVALID_SHORT_NAME: str = "Inv"
INVALID_LONG_NAME: str = "Invalid"
TOTAL_LONG_NAME: str = "Total"
CLIPPED_LONG_NAME: str = "Clipped"
FILTERED_LONG_NAME: str = "Filtered"
GROUPBY_KEY_PREFIX: str = "key"
"""Default groupby key name - followed by _n for n"""

INVALID_POINTER_32: int = -2147483648
INVALID_POINTER_U32: int = 0xFFFFFFFF  # 4294967295
# INVALID_POINTER_64  = 0x8000000000000000  # -9223372036854775808
INVALID_POINTER_64: int = -9223372036854775808
INVALID_POINTER_U64: int = 0xFFFFFFFFFFFFFFFF  # 18446744073709551615


INVALID_DICT: Mapping[int, Any] = {
    # keys in this dict can be generated with ndarray.dtype.num
    0: False,  # np.bool
    1: -128,  # np.int8
    2: 255,  # numpy.uint8,
    3: -32768,  # numpy.int16
    4: 65535,  # numpy.uint16
    5: INVALID_POINTER_32,  # numpy.int32
    6: INVALID_POINTER_U32,  # numpy.uint32
    7: INVALID_POINTER_32,  # numpy.int_  # default for arange on linux and windows
    8: INVALID_POINTER_U32,  # numpy.uint_
    9: INVALID_POINTER_64,  # numpy.int64
    10: INVALID_POINTER_U64,  # np.uint64
    11: np.nan,  # np.float32
    12: np.nan,  # np.float64
    13: np.nan,  # np.float64
    # 14: numpy.complex64,   # np.complex64
    # 15: numpy.complex128,  # np.complex128
    # 16: numpy.complex128,  # np.complex128
    17: None,  # np.object_
    18: b"",  # np.bytes_
    19: "",  # np.str_
    # 20:                    # numpy.void
    # 21:                    # numpy.datetime64
    # 22:                    # numpy.timedelta64
    23: np.nan,  # numpy.float16
}

if sys.platform != "win32":
    INVALID_DICT: Mapping[int, Any] = {
        # keys in this dict can be generated with ndarray.dtype.num
        0: False,  # np.bool
        1: -128,  # np.int8
        2: 255,  # numpy.uint8,
        3: -32768,  # numpy.int16
        4: 65535,  # numpy.uint16
        5: INVALID_POINTER_32,  # numpy.int32
        6: INVALID_POINTER_U32,  # numpy.uint32
        7: INVALID_POINTER_64,  # numpy.int_  # default for arange on linux and windows
        8: INVALID_POINTER_U64,  # numpy.uint_
        9: INVALID_POINTER_64,  # numpy.int64
        10: INVALID_POINTER_U64,  # np.uint64
        11: np.nan,  # np.float32
        12: np.nan,  # np.float64
        13: np.nan,  # np.float64
        # 14: numpy.complex64,   # np.complex64
        # 15: numpy.complex128,  # np.complex128
        # 16: numpy.complex128,  # np.complex128
        17: None,  # np.object_
        18: b"",  # np.bytes_
        19: "",  # np.str_
        # 20:                    # numpy.void
        # 21:                    # numpy.datetime64
        # 22:                    # numpy.timedelta64
        23: np.nan,  # numpy.float16
    }


# See DatasetRW.h
# Official list of data types
[docs] class SM_DTYPES(IntEnum): DT_INVALID = 0 DT_BOOL = 1 DT_BYTE = 2 DT_INT8 = 3 DT_INT16 = 4 DT_INT32 = 5 DT_INT64 = 6 DT_UINT8 = 7 DT_UINT16 = 8 DT_UINT32 = 9 DT_UINT64 = 10 DT_FLOAT16 = 11 DT_FLOAT32 = 12 DT_FLOAT64 = 13 DT_OBJECT = 17 DT_BYTES = 18 # fixed size ascii string uses this DT_UNICODE = 19 DT_NPVOID = 20 DT_DATETIME64 = 21 DT_TIMEDELTA64 = 22 DT_HALF = 23 DT_CHARARRAY = 24 # how we like to store a fixed size ascii
[docs] class SD_TYPES(IntEnum): SD_UNKNOWN = 0 # Matlab based SD_FUNCTIONH = 1 SD_DATASET = 2 SD_CLASS = 3 SD_STRUCT = 4 SD_SCALAR = 5 SD_CHAR = 6 SD_LOGICAL = 7 SD_CELL = 8 SD_NUMERIC = 9 SD_VECTOR = 10 # Python based SD_PANDAS = 20 SD_NUMPY = 21
[docs] class MATH_OPERATION(IntEnum): """ MATH_OPERATION is the encoding of the Riptable implemented mathematical operations. """ # MATH_OPERATION is how Riptable communicates with RiptideCPP and this enumeration # is repeated in RiptideCPP CommonInc.h. Any changes made to either location must be # reflected in both places. # Two ops, returns same type ADD = 1 SUB = 2 MUL = 3 MOD = 5 MIN = 6 MAX = 7 NANMIN = 8 NANMAX = 9 FLOORDIV = 10 POWER = 11 REMAINDER = 12 FMOD = 13 # Two ops always return a float DIV = 101 SUBDATETIMES = 102 # will check both sides for INV/ZEROS input:int64/int32 (returns double) # SUBDATETIMESRIGHT = 103, # will check right hand side for ZEROS input:int64/int32 (returns double) SUBDATES = 103 # will check both sides for INV/ZEROS same dtype in/out (int32/int64) # ADDDATES = 105 # will check left hand side for ZEROS same dtype in/out (int32/int64) # One input returns same data type ABS = 201 NEG = 202 FABS = 203 INVERT = 204 FLOOR = 205 CEIL = 206 TRUNC = 207 ROUND = 208 NEGATIVE = 212 POSITIVE = 213 SIGN = 214 RINT = 215 EXP = 216 EXP2 = 217 # does not allow floats BITWISE_NOT = 218 # One input always return a float one input SQRT = 301 LOG = 302 LOG2 = 303 LOG10 = 304 EXPM1 = 305 LOG1P = 306 SQUARE = 307 CBRT = 308 RECIPROCAL = 309 # Two inputs Always return a bool CMP_EQ = 401 CMP_NE = 402 CMP_LT = 403 CMP_GT = 404 CMP_LTE = 405 CMP_GTE = 406 LOGICAL_AND = 407 LOGICAL_XOR = 408 LOGICAL_OR = 409 # Two inputs BITWISE_LSHIFT = 501 BITWISE_RSHIFT = 502 BITWISE_AND = 503 BITWISE_XOR = 504 BITWISE_OR = 505 BITWISE_ANDNOT = 506 BITWISE_NOTAND = 507 BITWISE_XOR_SPECIAL = 550 # one input output bool LOGICAL_NOT = 601 ISINF = 603 ISNAN = 604 ISFINITE = 605 ISNORMAL = 606 ISNOTINF = 607 ISNOTNAN = 608 ISNOTFINITE = 609 ISNOTNORMAL = 610 ISNANORZERO = 611 SIGNBIT = 612
gBinaryUFuncs: Mapping[Callable, Optional[MATH_OPERATION]] = { # math ops np.add: MATH_OPERATION.ADD, np.subtract: MATH_OPERATION.SUB, np.multiply: MATH_OPERATION.MUL, np.matmul: None, np.divide: MATH_OPERATION.DIV, np.true_divide: MATH_OPERATION.DIV, np.floor_divide: MATH_OPERATION.FLOORDIV, np.remainder: MATH_OPERATION.REMAINDER, np.fmod: MATH_OPERATION.FMOD, np.mod: MATH_OPERATION.MOD, np.power: MATH_OPERATION.POWER, np.minimum: MATH_OPERATION.MIN, np.maximum: MATH_OPERATION.MAX, np.fmin: MATH_OPERATION.NANMIN, np.fmax: MATH_OPERATION.NANMAX, } """ The mapping of Numpy to Riptable arithmetic binary operator overrides. See Also -------- MATH_OPERATION : the full set of mathematical operations supported by Riptable. """ gBinaryLogicalUFuncs: Mapping[Callable, MATH_OPERATION] = { # comparisons np.less_equal: MATH_OPERATION.CMP_LTE, np.less: MATH_OPERATION.CMP_LT, np.equal: MATH_OPERATION.CMP_EQ, np.not_equal: MATH_OPERATION.CMP_NE, np.greater: MATH_OPERATION.CMP_GT, np.greater_equal: MATH_OPERATION.CMP_GTE, np.logical_and: MATH_OPERATION.LOGICAL_AND, np.logical_xor: MATH_OPERATION.LOGICAL_XOR, np.logical_or: MATH_OPERATION.LOGICAL_OR, } """ The mapping of Numpy to Riptable comparison function overrides. See Also -------- MATH_OPERATION : the full set of mathematical operations supported by Riptable. """ gBinaryBitwiseUFuncs: Mapping[Callable, Optional[MATH_OPERATION]] = { # bitwise operations only apply to bool and integers np.left_shift: None, np.right_shift: None, np.bitwise_and: MATH_OPERATION.BITWISE_AND, np.bitwise_xor: MATH_OPERATION.BITWISE_XOR, np.bitwise_or: MATH_OPERATION.BITWISE_OR, } """ The mapping of Numpy to Riptable bit-twiddling binary operator overrides. See Also -------- MATH_OPERATION : the full set of mathematical operations supported by Riptable. """ gBinaryBitwiseMonoUFuncs: Mapping[Callable, Optional[MATH_OPERATION]] = { # bitwise operations only apply to bool and integers np.invert: None } """ The mapping of Numpy to Riptable bit-twiddling unary operator overrides. See Also -------- MATH_OPERATION : the full set of mathematical operations supported by Riptable. """ gUnaryUFuncs: Mapping[Callable, Optional[MATH_OPERATION]] = { # math ops np.absolute: MATH_OPERATION.ABS, np.abs: MATH_OPERATION.ABS, np.fabs: MATH_OPERATION.FABS, np.invert: MATH_OPERATION.INVERT, np.floor: MATH_OPERATION.FLOOR, np.ceil: MATH_OPERATION.CEIL, np.trunc: MATH_OPERATION.TRUNC, np.round: MATH_OPERATION.ROUND, np.rint: MATH_OPERATION.ROUND, np.isinf: MATH_OPERATION.ISINF, np.isnan: MATH_OPERATION.ISNAN, np.isfinite: MATH_OPERATION.ISFINITE, np.signbit: MATH_OPERATION.SIGNBIT, np.negative: MATH_OPERATION.NEGATIVE, np.positive: MATH_OPERATION.POSITIVE, np.sign: MATH_OPERATION.SIGN, np.exp: MATH_OPERATION.EXP, np.exp2: MATH_OPERATION.EXP2, np.log: MATH_OPERATION.LOG, np.log2: MATH_OPERATION.LOG2, np.log10: MATH_OPERATION.LOG10, np.expm1: None, np.log1p: None, np.sqrt: MATH_OPERATION.SQRT, np.square: None, np.cbrt: MATH_OPERATION.CBRT, np.reciprocal: None, np.logical_not: MATH_OPERATION.LOGICAL_NOT, np.bitwise_not: MATH_OPERATION.BITWISE_NOT, np.signbit: MATH_OPERATION.SIGNBIT, } """ The mapping of Numpy to Riptable arithmetic unary operator overrides. See Also -------- MATH_OPERATION : the full set of mathematical operations supported by Riptable. """ gNanFuncs: Mapping[Callable, Optional[Callable]] = { np.nanargmax: None, np.nanargmin: None, np.nancumprod: None, np.nancumsum: None, np.nanmax: None, np.nanmean: None, np.nanmedian: None, # bug in numpy for floats? np.nanmin: None, np.nanpercentile: None, np.nanprod: None, np.nanstd: None, np.nansum: None, np.nanvar: None, } # See GroupBy.h # Official list of funcss
[docs] class ROLLING_FUNCTIONS(IntEnum): ROLLING_SUM = 0 ROLLING_NANSUM = 1 ROLLING_MEAN = 102 ROLLING_NANMEAN = 103 ROLLING_QUANTILE = 104 ROLLING_VAR = 106 ROLLING_NANVAR = 107 ROLLING_STD = 108 ROLLING_NANSTD = 109
[docs] class REDUCE_FUNCTIONS(IntEnum): REDUCE_SUM = 0 REDUCE_NANSUM = 1 REDUCE_MEAN = 102 REDUCE_NANMEAN = 103 REDUCE_VAR = 106 REDUCE_NANVAR = 107 REDUCE_STD = 108 REDUCE_NANSTD = 109 REDUCE_MIN = 200 REDUCE_NANMIN = 201 REDUCE_MAX = 202 REDUCE_NANMAX = 203 REDUCE_ARGMIN = 204 REDUCE_NANARGMIN = 205 REDUCE_ARGMAX = 206 REDUCE_NANARGMAX = 207 # for Jack TODO REDUCE_ANY = 208 REDUCE_ALL = 209
gReduceUFuncs: Mapping[Callable, REDUCE_FUNCTIONS] = { np.add: REDUCE_FUNCTIONS.REDUCE_SUM, np.minimum: REDUCE_FUNCTIONS.REDUCE_MIN, np.maximum: REDUCE_FUNCTIONS.REDUCE_MAX ## TODO add support for the following reduce ufuncs # np.logical_and: RIPTIDE NP.ALL # np.logical_or: RIPTIDE NP.ANY } """ The mapping of Numpy to Riptable reduce function overrides. See Also -------- REDUCE_FUNCTIONS encoding of all reduce functions that riptable supports """
[docs] class TIMEWINDOW_FUNCTIONS(IntEnum): TIMEWINDOW_SUM = 0 TIMEWINDOW_PROD = 1
# See GroupBy.h # Official list of funcss
[docs] class GB_FUNCTIONS(IntEnum): GB_SUM = 0 GB_MEAN = 1 GB_MIN = 2 GB_MAX = 3 # STD uses VAR with the param set to 1 GB_VAR = 4 GB_STD = 5 GB_NANSUM = 50 GB_NANMEAN = 51 GB_NANMIN = 52 GB_NANMAX = 53 GB_NANVAR = 54 GB_NANSTD = 55 GB_FIRST = 100 GB_NTH = 101 GB_LAST = 102 GB_MEDIAN = 103 # auto handles nan GB_MODE = 104 # auto handles nan GB_TRIMBR = 105 # auto handles nan GB_QUANTILE_MULT = 106 # handles all (nan)median/quantile versions # All int/uints output upgraded to INT64 # Output is all elements (not just grouped) GB_ROLLING_SUM = 200 GB_ROLLING_NANSUM = 201 GB_ROLLING_DIFF = 202 GB_ROLLING_SHIFT = 203 GB_ROLLING_COUNT = 204 GB_ROLLING_MEAN = 205 GB_ROLLING_NANMEAN = 206 GB_ROLLING_QUANTILE = 207 # In ema.cpp GB_CUMSUM = 300 GB_EMADECAY = 301 GB_CUMPROD = 302 GB_FINDNTH = 303 GB_EMANORMAL = 304 GB_EMAWEIGHTED = 305 GB_CUMNANMAX = 306 GB_CUMNANMIN = 307 GB_CUMMAX = 308 GB_CUMMIN = 309
# some groupby functions will work for strings GB_STRING_ALLOWED = [ GB_FUNCTIONS.GB_FIRST, GB_FUNCTIONS.GB_NTH, GB_FUNCTIONS.GB_LAST, GB_FUNCTIONS.GB_ROLLING_SHIFT, GB_FUNCTIONS.GB_MEDIAN, ] GB_DATE_ALLOWED = [ GB_FUNCTIONS.GB_FIRST, GB_FUNCTIONS.GB_NTH, GB_FUNCTIONS.GB_LAST, GB_FUNCTIONS.GB_MEAN, GB_FUNCTIONS.GB_MIN, GB_FUNCTIONS.GB_MAX, GB_FUNCTIONS.GB_NANMEAN, GB_FUNCTIONS.GB_NANMIN, GB_FUNCTIONS.GB_NANMAX, GB_FUNCTIONS.GB_MEDIAN, GB_FUNCTIONS.GB_QUANTILE_MULT, GB_FUNCTIONS.GB_MODE, GB_FUNCTIONS.GB_ROLLING_DIFF, GB_FUNCTIONS.GB_ROLLING_SHIFT, ] GB_FUNC_COUNT = -1 GB_FUNC_USER = 900 GB_FUNC_NUMBA = 1000 ###################################################### # Numba groupby enums ###################################################### class GB_PACKUNPACK(IntEnum): UNPACK = 0 PACK = 1
[docs] class NumpyCharTypes: All = "?bhilqpBHILQPefdgFDGSUVOMm" AllFloat = "efdgFDG" AllInteger = "bBhHiIlLqQpP" Computable = "fdgbBhHiIlLqQpP" # does not include boolean or strings Noncomputable = "SeFDGUVOMm" Unsupported = "eFDGVOMm" # unsupported in riptable world Supported = "?fdgbBhHiIlLqQpPSUV" SupportedFloat = "fdg" SupportedAlternate = "?fdgbBhHiIlLqQpPSU" Character = "c" Complex = "FDG" Datetime = "Mm" Float = "efdg" Float64 = "dg" Integer = "bhilqp" UnsignedInteger = "BHILQP" UnsignedInteger64 = "QP" SignedInteger64 = "qp" # linux gcc compiler long is int64, msvc long is int32 if sys.platform != "win32": UnsignedInteger64 = "LQP" SignedInteger64 = "lqp"
gScalarType: Tuple[type, ...] = ( int, float, complex, bool, bytes, str, memoryview, np.bool_, np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, np.int32, np.uint32, np.int64, np.uint64, np.float16, np.float32, np.float64, np.complex64, np.complex128, np.object_, np.bytes_, np.str_, np.void, np.datetime64, np.timedelta64, ) gNumpyScalarType: Tuple[type, ...] = ( memoryview, np.bool_, np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, np.int32, np.uint32, np.int64, np.uint64, np.float16, np.float32, np.float64, np.complex64, np.complex128, np.object_, np.bytes_, np.str_, np.void, np.datetime64, np.timedelta64, ) def int_dtype_from_len(newlen: int) -> np.dtype: """Returns minimum itemsize unsigned integer dtype for given array length. Assumes that numbers up to the length will need to be stored by the returned dtype. Used by Grouping and Categorical. """ if newlen < 100: dt = np.int8 elif newlen < 30_000: dt = np.int16 elif newlen < 2_000_000_000: dt = np.int32 else: dt = np.int64 return np.dtype(dt) gAnsiColors: Mapping[str, str] = { "Black": "\x1b[0;30m", "BlinkBlack": "\x1b[5;30m", "BlinkBlue": "\x1b[5;34m", "BlinkCyan": "\x1b[5;36m", "BlinkGreen": "\x1b[5;32m", "BlinkLightGray": "\x1b[5;37m", "BlinkPurple": "\x1b[5;35m", "BlinkRed": "\x1b[5;31m", "BlinkYellow": "\x1b[5;33m", "Blue": "\x1b[0;34m", "Brown": "\x1b[0;33m", "Cyan": "\x1b[0;36m", "DarkGray": "\x1b[1;30m", "Green": "\x1b[0;32m", "LightBlue": "\x1b[1;34m", "LightCyan": "\x1b[1;36m", "LightGray": "\x1b[0;37m", "LightGreen": "\x1b[1;32m", "LightPurple": "\x1b[1;35m", "LightRed": "\x1b[1;31m", "NoColor": "", "Normal": "\x1b[0m", "Purple": "\x1b[0;35m", "Red": "\x1b[0;31m", "White": "\x1b[1;37m", "Yellow": "\x1b[1;33m", }
[docs] class DS_DISPLAY_TYPES(IntEnum): HTML = 1 REPR = 2 STR = 3
[docs] class DATETIME_TYPES(IntEnum): ORDINAL_DATE = 1
DateTimeFormats: Mapping[str, str] = {"day": "%d-%b-%Y"}
[docs] class DisplayDetectModes(IntEnum): Jupyter = 1 Ipython = 2 Console = 3 HTML = 5
[docs] class DisplayArrayTypes(IntEnum): Bool = 0 Integer = 1 Float = 2 Bytes = 3 Categorical = 4 String = 5 DateTime = 6 DateTimeBase = 7 DateTimeNano = 9 TimeSpan = 10 Record = 11
[docs] class DisplayLength(IntEnum): Undefined = 0 Short = 1 Medium = 2 Long = 3
class TimeFormat(IntEnum): Clock = 1 YearMonthDay = 2 SIGNano = 3
[docs] class DisplayJustification(IntEnum): Undefined = 0 Left = 1 Right = 2 Center = 3
class DisplayTextDecoration(IntEnum): Undefined = 0 Bold = 1 Italic = 2 Underline = 3 Strikethrough = 4 class DisplayNumberSeparator: NoSeparator = "" Comma = "," # Period = "." #BUG, fix later Underscore = "_"
[docs] class DisplayColumnColors(IntEnum): Default = 0 # no styling Rownum = 1 # row numbers / default header color Sort = 2 # regular sort header and column data Groupby = 3 # groupby header and column data Multiset_head_a = 4 # comparison color for multiset columns headers Multiset_head_b = 5 # comparison color for multiset columns headers Multiset_col_a = 6 # comparison color for multiset column data Multiset_col_b = 7 # comparison color for multiset column data Accum2t = 8 Purple = 9 Pink = 10 Red = 11 GrayItalic = 12 DarkBlue = 13 BGColor = 14 FGColor = 15
class ColumnStyle: """ Holds display styles for entire columns or individual cells. These styles will override the defaults from the _display_query_properties callback in FastArray See also: DisplayColumn, DisplayCell, ItemFormat properties: color : DisplayColumnColors align : DisplayJustification decoration : DisplayTextDecoration width : DisplayLength (default) OR can be set to new max width for array item's string repr """ def __init__( self, color=DisplayColumnColors.Default, align=DisplayJustification.Right, decoration=DisplayTextDecoration.Undefined, width=None, ): self.color = color self.align = align self.decoration = decoration self.width = width def _build_string(self): repr_str = [] repr_str.append(f" {self.__class__.__name__}") repr_str.append(f" color: {DisplayColumnColors(self.color).name}") repr_str.append(f" align: {DisplayJustification(self.align).name}") repr_str.append(f"decoration: {DisplayTextDecoration(self.decoration).name}") repr_str.append(f" width: {self.width}") return "\n".join(repr_str) def __repr__(self): return self._build_string() def __str__(self): return self._build_string() class DisplayColorMode(IntEnum): NoColors = 0 Light = 1 Dark = 2 class CategoryMode(IntEnum): Default = 0 StringArray = 1 IntEnum = 2 Dictionary = 3 NumericArray = 4 MultiKey = 5 class CategoricalOrigin(IntEnum): CategoricalView = 0 CategoricalCopy = 1 StringList = 2 StringListWithCategories = 3 NumericList = 4 IndexWithCategories = 5 CodeMapping = 6 Multikey = 7 Matlab = 8 Pandas = 9 SDSFile = 10 class CategoryStringMode(IntEnum): Default = 0 Bytes = 1 Unicode = 2 class CategoricalConstructor(IntEnum): EmptyValues = 0 IntegerValues = 1 FloatValues = 2 StringValues = 3 MultikeyListValues = 4 MultikeyDictValues = 5 class ApplyType(IntEnum): Invalid = 0 ReduceDataset = 1 ReduceList = 2 Dataset = 3 Arrays = 4 class CompressionMode(IntEnum): Compress = 0 Decompress = 1 CompressFile = 2 DecompressFile = 3 SharedMemory = 4 Info = 5 class CompressionType(IntEnum): Uncompressed = 0 ZStd = 1 class ColumnAttribute(IntEnum): Default = 0 Left = 1 Right = 2 class SDSFlag(IntEnum): OriginalContainer = 0x01 Stackable = 0x02 Scalar = 0x04 Nested = 0x08 Meta = 0x10 class SDSFileType(IntEnum): Unknown = 0 Struct = 1 Dataset = 2 Table = 3 Array = 4 OneFile = 5 # new for one file class DayOfWeek(IntEnum): Monday = 0 Tuesday = 1 Wednesday = 2 Thursday = 3 Friday = 4 Saturday = 5 Sunday = 6 # allowing / now for denest INVALID_FILE_CHARS: Tuple[str, ...] = ("\\", ":", "<", ">", "!", "|", "*", "?") gBasicStats: Mapping[str, str] = { "count": "Number of non-null observations", "sum": "Sum of values", "mean": "Mean of values", "mad": "Mean absolute deviation", # median(abs(a - median(a))) "median": "Arithmetic median of values", "min": "Minimum", "max": "Maximum", "std": "Unbiased standard deviation", "var": "Unbiased variance", "nansum": "Sum of values", "nanmean": "Mean of values", "nanmad": "Mean absolute deviation", "nanmedian": "Arithmetic median of values", "nanmin": "Minimum", "nanmax": "Maximum", "nanstd": "Unbiased standard deviation", "nanvar": "Unbiased variance", "mode": "Mode", "abs": "Absolute Value", "prod": "Product of values", "sem": "Unbiased standard error of the mean", "skew": "Unbiased skewness (3rd moment)", "kurt": "Unbiased kurtosis (4th moment)", "quantile": "Sample quantile (value at %)", "cumsum": "Cumulative sum", "cumprod": "Cumulative product", "cummax": "Cumulative maximum", "cummin": "Cumulative minimum", } ##################################################################################### # Structs begin ##################################################################################### # Used in display table for multi-line column headers # color_group indexing starts at 0 # cell_span cannot be 0. a cell_span of 1 indicates 1 cell wide. ColHeader = namedtuple("ColHeader", ["col_name", "cell_span", "color_group"]) ################################### ## TJD NOTE: Need to use strings instead of enum here ################################## class TypeId(IntEnum): Default = 0 Struct = 1 Dataset = 2 Multiset = 3 GroupBy = 4 Grouping = 5 FastArray = 6 MathLedger = 7 Categorical = 8 Categories = 9 Accum2 = 10 DisplayDetect = 11 DisplayOptions = 12 DisplayTable = 13 SortCache = 14 DateTimeBase = 15 DateTimeNano = 17 TimeSpan = 18 TimeZone = 19 Calendar = 20 Date = 21 DateSpan = 22 PDataset = 23 ###################################################### # SDS File Header order ###################################################### gSDSFileHeader: List[str] = [ "SDSHeaderMagic", "VersionHigh", "VersionLow", "CompMode", "CompType", "CompLevel", # ----- offset 16 ----- "NameBlockSize", "NameBlockOffset", "NameBlockCount", "FileType", # struct, dataset "AuthorId", # python, matlab # ----- offset 48 ----- "MetaBlockSize", "MetaBlockOffset", # ----- offset 64 ----- "TotalMetaCompressedSize", "TotalMetaUncompressedSize", # ----- offset 80 ----- "ArrayBlockSize", "ArrayBlockOffset", # ----- offset 96 ----- "ArraysWritten", "ArrayFirstOffset", # ----- offset 112 ----- "TotalArrayCompressedSize", "TotalArrayUncompressedSize", ] SDS_EXTENSION: str = ".sds" SDS_EXTENSION_BYTES: bytes = b".sds" # please keep the TypeRegister at the end of the file
[docs] class TypeRegister: """ When special classes are loaded, they register with this class to avoid cyclical dependencies """ Struct: ClassVar[Type["Struct"]] = None Dataset: ClassVar[Type["Dataset"]] = None Multiset: ClassVar[Type["Multiset"]] = None GroupBy: ClassVar[Type["GroupBy"]] = None Grouping: ClassVar[Type["Grouping"]] = None FastArray: ClassVar[Type["FastArray"]] = None MathLedger: ClassVar[Type["MathLedger"]] = None Categorical: ClassVar[Type["Categorical"]] = None Categories: ClassVar[Type["Categories"]] = None Accum2: ClassVar[Type["Accum2"]] = None DisplayDetect: ClassVar[Type["DisplayDetect"]] = None DisplayOptions: ClassVar[Type["DisplayOptions"]] = None DisplayTable: ClassVar[Type["DisplayTable"]] = None DisplayString: ClassVar[Type["DisplayString"]] = None DisplayAttributes: ClassVar[Type["DisplayAttributes"]] = None DisplayText: ClassVar[Type["DisplayText"]] = None SortCache: ClassVar[Type["SortCache"]] = None DateTimeBase: ClassVar[Type["DateTimeBase"]] = None DateTimeNano: ClassVar[Type["DateTimeNano"]] = None TimeSpan: ClassVar[Type["TimeSpan"]] = None SharedMemory: ClassVar[Type["SharedMemory"]] = None TimeZone: ClassVar[Type["TimeZone"]] = None Calendar: ClassVar[Type["Calendar"]] = None DateBase: ClassVar[Type["DateBase"]] = None Date: ClassVar[Type["Date"]] = None DateSpan: ClassVar[Type["DateSpan"]] = None PDataset: ClassVar[Type["PDataset"]] = None
[docs] @classmethod def validate_registry(cls): missing = set() for _nm in dir(cls): if not _nm.startswith("_") and getattr(TypeRegister, _nm) is None: missing.add(_nm) if len(missing) > 0: msg = ", ".join(sorted(missing)) raise RuntimeError(f"riptable: Improper initialization! Missing: {msg}")
[docs] @classmethod def is_computable(cls, other): if not (other.dtype.char in NumpyCharTypes.Noncomputable or isinstance(other, cls.Categorical)): return True return False
[docs] @classmethod def is_array_subclass(cls, arr): """ Certain routines can be sped up by skipping the logic before falling back on a numpy call. Note: this is different than using python's issubclass(), which returns True if the classes are the same. Returns True if the item is an instance of a FastArray or numpy array subclass. """ if isinstance(arr, np.ndarray): if type(arr) == np.ndarray or type(arr) == cls.FastArray: return False return True else: return False
[docs] @classmethod def is_binned_array(cls, arr): """ Use this instead of checking isinstance(item, TypeRegister.Categorical). For other binned types in the future. Called by: Dataset.melt() -re-expands Dataset.from_jagged_rows() -re-expands GroupBy.__init__ -calls grouping, gb_keychain properties to borrow bins """ return isinstance(arr, cls.Categorical)
[docs] @classmethod def is_binned_type(cls, arrtype): """ Check the type rather than the instance. See also is_binned_array() Called by: rt_utils._multistack_items() """ return arrtype == cls.Categorical
# ---------------------------------------------------------------
[docs] @classmethod def is_spanlike(cls, arr: np.ndarray): """return True if it is a datespan or timespan""" # TODO: datetime/span are computable sometimes... need a way to distinguish from other FA subclasses # simple math works, but not larger groupby operations like sum result = False if isinstance(arr, (TypeRegister.TimeSpan, TypeRegister.DateSpan)): result = True return result
# ---------------------------------------------------------------
[docs] @classmethod def is_datelike(cls, arr: np.ndarray): """return True if it is a date or time""" # TODO: datetime/span are computable sometimes... need a way to distinguish from other FA subclasses # simple math works, but not larger groupby operations like sum result = False if isinstance( arr, (TypeRegister.DateTimeNano, TypeRegister.TimeSpan, TypeRegister.Date, TypeRegister.DateSpan) ): result = True return result
[docs] @classmethod def is_string_or_object(cls, arr): return cls.is_array_subclass(arr) or arr.dtype.char in "OSU"
[docs] @classmethod def newclassfrominstance(cls, instance, origin): """After slicing or an array routine, return a new instance of a FastArray subclass. If the array was not a subclass, instance is unchanged. Parameters ---------- instance : ndarray Array generated from operation. origin : ndarray Array, possibly a FastArray subclass. Returns ------- instance : ndarray Array of the same class as origin if the origin class has a newclassfrominstance defined. """ # FastArray subclasses should define this classmethod to return # a new object with a different instance array if hasattr(origin, "newclassfrominstance"): instance = origin.newclassfrominstance(instance, origin) return instance
# put these here for now # where should they live?
[docs] @classmethod def as_meta_data(cls, obj): pass
[docs] @classmethod def from_meta_data(cls, itemdict: Optional[dict] = None, flags: Optional[list] = None, meta: str = ""): from .Utils.rt_metadata import MetaData if itemdict is None: itemdict = dict() if flags is None: flags = list() meta = MetaData(meta) iclass = meta.itemclass return iclass._from_meta_data(itemdict, flags, meta)