Source code for riptable.rt_enum

__all__ = [
    "DATETIME_TYPES",
    "DS_DISPLAY_TYPES",
    "GB_FUNCTIONS",
    "MATH_OPERATION",
    "INVALID_DICT",
    "TIMEWINDOW_FUNCTIONS",
    "NumpyCharTypes",
    "REDUCE_FUNCTIONS",
    "ROLLING_FUNCTIONS",
    "SD_TYPES",
    "SM_DTYPES",
    "TypeRegister",
    "DisplayJustification",
    "DisplayColumnColors",
    "DisplayArrayTypes",
    "DisplayDetectModes",
    "DisplayLength",
    "ColHeader",
]


import sys
from collections import namedtuple
from enum import IntEnum
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    ClassVar,
    List,
    Mapping,
    Optional,
    Tuple,
    Type,
)

import numpy as np

if TYPE_CHECKING:
    from .rt_accum2 import Accum2
    from .rt_categorical import Categorical
    from .rt_dataset import Dataset
    from .rt_datetime import (
        Date,
        DateBase,
        DateSpan,
        DateTimeBase,
        DateTimeNano,
        TimeSpan,
    )
    from .rt_display import (
        DisplayAttributes,
        DisplayDetect,
        DisplayOptions,
        DisplayString,
        DisplayTable,
        DisplayText,
    )
    from .rt_fastarray import FastArray
    from .rt_groupby import GroupBy
    from .rt_grouping import Grouping
    from .rt_ledger import MathLedger
    from .rt_multiset import Multiset
    from .rt_pdataset import PDataset
    from .rt_sharedmemory import SharedMemory
    from .rt_sort_cache import SortCache
    from .rt_struct import Struct
    from .rt_timezone import Calendar, TimeZone


# common strings used to indicate special columns or items
INVALID_SHORT_NAME: str = "Inv"
INVALID_LONG_NAME: str = "Invalid"
TOTAL_LONG_NAME: str = "Total"
CLIPPED_LONG_NAME: str = "Clipped"
FILTERED_LONG_NAME: str = "Filtered"
GROUPBY_KEY_PREFIX: str = "key"
"""Default groupby key name - followed by _n for n"""

INVALID_POINTER_32: int = -2147483648
INVALID_POINTER_U32: int = 0xFFFFFFFF  # 4294967295
# INVALID_POINTER_64  = 0x8000000000000000  # -9223372036854775808
INVALID_POINTER_64: int = -9223372036854775808
INVALID_POINTER_U64: int = 0xFFFFFFFFFFFFFFFF  # 18446744073709551615


INVALID_DICT: Mapping[int, Any] = {
    # keys in this dict can be generated with ndarray.dtype.num
    0: False,  # np.bool
    1: -128,  # np.int8
    2: 255,  # numpy.uint8,
    3: -32768,  # numpy.int16
    4: 65535,  # numpy.uint16
    5: INVALID_POINTER_32,  # numpy.int32
    6: INVALID_POINTER_U32,  # numpy.uint32
    7: INVALID_POINTER_32,  # numpy.int_  # default for arange on linux and windows
    8: INVALID_POINTER_U32,  # numpy.uint_
    9: INVALID_POINTER_64,  # numpy.int64
    10: INVALID_POINTER_U64,  # np.uint64
    11: np.nan,  # np.float32
    12: np.nan,  # np.float64
    13: np.nan,  # np.float64
    # 14: numpy.complex64,   # np.complex64
    # 15: numpy.complex128,  # np.complex128
    # 16: numpy.complex128,  # np.complex128
    17: None,  # np.object_
    18: b"",  # np.bytes_
    19: "",  # np.str_
    # 20:                    # numpy.void
    # 21:                    # numpy.datetime64
    # 22:                    # numpy.timedelta64
    23: np.nan,  # numpy.float16
}

if sys.platform != "win32":
    INVALID_DICT: Mapping[int, Any] = {
        # keys in this dict can be generated with ndarray.dtype.num
        0: False,  # np.bool
        1: -128,  # np.int8
        2: 255,  # numpy.uint8,
        3: -32768,  # numpy.int16
        4: 65535,  # numpy.uint16
        5: INVALID_POINTER_32,  # numpy.int32
        6: INVALID_POINTER_U32,  # numpy.uint32
        7: INVALID_POINTER_64,  # numpy.int_  # default for arange on linux and windows
        8: INVALID_POINTER_U64,  # numpy.uint_
        9: INVALID_POINTER_64,  # numpy.int64
        10: INVALID_POINTER_U64,  # np.uint64
        11: np.nan,  # np.float32
        12: np.nan,  # np.float64
        13: np.nan,  # np.float64
        # 14: numpy.complex64,   # np.complex64
        # 15: numpy.complex128,  # np.complex128
        # 16: numpy.complex128,  # np.complex128
        17: None,  # np.object_
        18: b"",  # np.bytes_
        19: "",  # np.str_
        # 20:                    # numpy.void
        # 21:                    # numpy.datetime64
        # 22:                    # numpy.timedelta64
        23: np.nan,  # numpy.float16
    }


# See DatasetRW.h
# Official list of data types

[docs]
class SM_DTYPES(IntEnum):
    DT_INVALID = 0
    DT_BOOL = 1
    DT_BYTE = 2
    DT_INT8 = 3
    DT_INT16 = 4
    DT_INT32 = 5
    DT_INT64 = 6
    DT_UINT8 = 7
    DT_UINT16 = 8
    DT_UINT32 = 9
    DT_UINT64 = 10
    DT_FLOAT16 = 11
    DT_FLOAT32 = 12
    DT_FLOAT64 = 13
    DT_OBJECT = 17
    DT_BYTES = 18  # fixed size ascii string uses this
    DT_UNICODE = 19
    DT_NPVOID = 20
    DT_DATETIME64 = 21
    DT_TIMEDELTA64 = 22
    DT_HALF = 23
    DT_CHARARRAY = 24  # how we like to store a fixed size ascii




[docs]
class SD_TYPES(IntEnum):
    SD_UNKNOWN = 0

    # Matlab based
    SD_FUNCTIONH = 1
    SD_DATASET = 2
    SD_CLASS = 3
    SD_STRUCT = 4
    SD_SCALAR = 5
    SD_CHAR = 6
    SD_LOGICAL = 7
    SD_CELL = 8
    SD_NUMERIC = 9
    SD_VECTOR = 10

    # Python based
    SD_PANDAS = 20
    SD_NUMPY = 21




[docs]
class MATH_OPERATION(IntEnum):
    """
    MATH_OPERATION is the encoding of the Riptable implemented mathematical operations.
    """

    # MATH_OPERATION is how Riptable communicates with RiptideCPP and this enumeration
    # is repeated in RiptideCPP CommonInc.h. Any changes made to either location must be
    # reflected in both places.
    # Two ops, returns same type
    ADD = 1
    SUB = 2
    MUL = 3
    MOD = 5
    MIN = 6
    MAX = 7
    NANMIN = 8
    NANMAX = 9
    FLOORDIV = 10
    POWER = 11
    REMAINDER = 12
    FMOD = 13

    # Two ops always return a float
    DIV = 101
    SUBDATETIMES = 102  # will check both sides for INV/ZEROS input:int64/int32 (returns double)
    # SUBDATETIMESRIGHT = 103,  # will check right hand side for ZEROS input:int64/int32 (returns double)
    SUBDATES = 103  # will check both sides for INV/ZEROS same dtype in/out (int32/int64)
    # ADDDATES = 105           # will check left hand side for ZEROS same dtype in/out (int32/int64)

    # One input returns same data type
    ABS = 201
    NEG = 202
    FABS = 203
    INVERT = 204
    FLOOR = 205
    CEIL = 206
    TRUNC = 207
    ROUND = 208
    NEGATIVE = 212
    POSITIVE = 213
    SIGN = 214
    RINT = 215
    EXP = 216
    EXP2 = 217
    # does not allow floats
    BITWISE_NOT = 218

    # One input always return a float one input
    SQRT = 301
    LOG = 302
    LOG2 = 303
    LOG10 = 304
    EXPM1 = 305
    LOG1P = 306
    SQUARE = 307
    CBRT = 308
    RECIPROCAL = 309

    # Two inputs Always return a bool
    CMP_EQ = 401
    CMP_NE = 402
    CMP_LT = 403
    CMP_GT = 404
    CMP_LTE = 405
    CMP_GTE = 406
    LOGICAL_AND = 407
    LOGICAL_XOR = 408
    LOGICAL_OR = 409

    # Two inputs
    BITWISE_LSHIFT = 501
    BITWISE_RSHIFT = 502
    BITWISE_AND = 503
    BITWISE_XOR = 504
    BITWISE_OR = 505
    BITWISE_ANDNOT = 506
    BITWISE_NOTAND = 507

    BITWISE_XOR_SPECIAL = 550

    # one input output bool
    LOGICAL_NOT = 601
    ISINF = 603
    ISNAN = 604
    ISFINITE = 605
    ISNORMAL = 606
    ISNOTINF = 607
    ISNOTNAN = 608
    ISNOTFINITE = 609
    ISNOTNORMAL = 610
    ISNANORZERO = 611
    SIGNBIT = 612



gBinaryUFuncs: Mapping[Callable, Optional[MATH_OPERATION]] = {
    # math ops
    np.add: MATH_OPERATION.ADD,
    np.subtract: MATH_OPERATION.SUB,
    np.multiply: MATH_OPERATION.MUL,
    np.matmul: None,
    np.divide: MATH_OPERATION.DIV,
    np.true_divide: MATH_OPERATION.DIV,
    np.floor_divide: MATH_OPERATION.FLOORDIV,
    np.remainder: MATH_OPERATION.REMAINDER,
    np.fmod: MATH_OPERATION.FMOD,
    np.mod: MATH_OPERATION.MOD,
    np.power: MATH_OPERATION.POWER,
    np.minimum: MATH_OPERATION.MIN,
    np.maximum: MATH_OPERATION.MAX,
    np.fmin: MATH_OPERATION.NANMIN,
    np.fmax: MATH_OPERATION.NANMAX,
}
"""
The mapping of Numpy to Riptable arithmetic binary operator overrides.

See Also
--------
MATH_OPERATION : the full set of mathematical operations supported by Riptable.
"""


gBinaryLogicalUFuncs: Mapping[Callable, MATH_OPERATION] = {
    # comparisons
    np.less_equal: MATH_OPERATION.CMP_LTE,
    np.less: MATH_OPERATION.CMP_LT,
    np.equal: MATH_OPERATION.CMP_EQ,
    np.not_equal: MATH_OPERATION.CMP_NE,
    np.greater: MATH_OPERATION.CMP_GT,
    np.greater_equal: MATH_OPERATION.CMP_GTE,
    np.logical_and: MATH_OPERATION.LOGICAL_AND,
    np.logical_xor: MATH_OPERATION.LOGICAL_XOR,
    np.logical_or: MATH_OPERATION.LOGICAL_OR,
}
"""
The mapping of Numpy to Riptable comparison function overrides.

See Also
--------
MATH_OPERATION : the full set of mathematical operations supported by Riptable.
"""


gBinaryBitwiseUFuncs: Mapping[Callable, Optional[MATH_OPERATION]] = {
    # bitwise operations only apply to bool and integers
    np.left_shift: None,
    np.right_shift: None,
    np.bitwise_and: MATH_OPERATION.BITWISE_AND,
    np.bitwise_xor: MATH_OPERATION.BITWISE_XOR,
    np.bitwise_or: MATH_OPERATION.BITWISE_OR,
}
"""
The mapping of Numpy to Riptable bit-twiddling binary operator overrides.

See Also
--------
MATH_OPERATION : the full set of mathematical operations supported by Riptable.
"""


gBinaryBitwiseMonoUFuncs: Mapping[Callable, Optional[MATH_OPERATION]] = {
    # bitwise operations only apply to bool and integers
    np.invert: None
}
"""
The mapping of Numpy to Riptable bit-twiddling unary operator overrides.

See Also
--------
MATH_OPERATION : the full set of mathematical operations supported by Riptable.
"""


gUnaryUFuncs: Mapping[Callable, Optional[MATH_OPERATION]] = {
    # math ops
    np.absolute: MATH_OPERATION.ABS,
    np.abs: MATH_OPERATION.ABS,
    np.fabs: MATH_OPERATION.FABS,
    np.invert: MATH_OPERATION.INVERT,
    np.floor: MATH_OPERATION.FLOOR,
    np.ceil: MATH_OPERATION.CEIL,
    np.trunc: MATH_OPERATION.TRUNC,
    np.round: MATH_OPERATION.ROUND,
    np.rint: MATH_OPERATION.ROUND,
    np.isinf: MATH_OPERATION.ISINF,
    np.isnan: MATH_OPERATION.ISNAN,
    np.isfinite: MATH_OPERATION.ISFINITE,
    np.signbit: MATH_OPERATION.SIGNBIT,
    np.negative: MATH_OPERATION.NEGATIVE,
    np.positive: MATH_OPERATION.POSITIVE,
    np.sign: MATH_OPERATION.SIGN,
    np.exp: MATH_OPERATION.EXP,
    np.exp2: MATH_OPERATION.EXP2,
    np.log: MATH_OPERATION.LOG,
    np.log2: MATH_OPERATION.LOG2,
    np.log10: MATH_OPERATION.LOG10,
    np.expm1: None,
    np.log1p: None,
    np.sqrt: MATH_OPERATION.SQRT,
    np.square: None,
    np.cbrt: MATH_OPERATION.CBRT,
    np.reciprocal: None,
    np.logical_not: MATH_OPERATION.LOGICAL_NOT,
    np.bitwise_not: MATH_OPERATION.BITWISE_NOT,
    np.signbit: MATH_OPERATION.SIGNBIT,
}
"""
The mapping of Numpy to Riptable arithmetic unary operator overrides.

See Also
--------
MATH_OPERATION : the full set of mathematical operations supported by Riptable.
"""


gNanFuncs: Mapping[Callable, Optional[Callable]] = {
    np.nanargmax: None,
    np.nanargmin: None,
    np.nancumprod: None,
    np.nancumsum: None,
    np.nanmax: None,
    np.nanmean: None,
    np.nanmedian: None,  # bug in numpy for floats?
    np.nanmin: None,
    np.nanpercentile: None,
    np.nanprod: None,
    np.nanstd: None,
    np.nansum: None,
    np.nanvar: None,
}


# See GroupBy.h
# Official list of funcss

[docs]
class ROLLING_FUNCTIONS(IntEnum):
    ROLLING_SUM = 0
    ROLLING_NANSUM = 1
    ROLLING_MEAN = 102
    ROLLING_NANMEAN = 103
    ROLLING_QUANTILE = 104

    ROLLING_VAR = 106
    ROLLING_NANVAR = 107
    ROLLING_STD = 108
    ROLLING_NANSTD = 109




[docs]
class REDUCE_FUNCTIONS(IntEnum):
    REDUCE_SUM = 0
    REDUCE_NANSUM = 1
    REDUCE_MEAN = 102
    REDUCE_NANMEAN = 103
    REDUCE_VAR = 106
    REDUCE_NANVAR = 107
    REDUCE_STD = 108
    REDUCE_NANSTD = 109
    REDUCE_MIN = 200
    REDUCE_NANMIN = 201
    REDUCE_MAX = 202
    REDUCE_NANMAX = 203
    REDUCE_ARGMIN = 204
    REDUCE_NANARGMIN = 205
    REDUCE_ARGMAX = 206
    REDUCE_NANARGMAX = 207

    # for Jack TODO
    REDUCE_ANY = 208
    REDUCE_ALL = 209



gReduceUFuncs: Mapping[Callable, REDUCE_FUNCTIONS] = {
    np.add: REDUCE_FUNCTIONS.REDUCE_SUM,
    np.minimum: REDUCE_FUNCTIONS.REDUCE_MIN,
    np.maximum: REDUCE_FUNCTIONS.REDUCE_MAX
    ## TODO add support for the following reduce ufuncs
    #   np.logical_and: RIPTIDE NP.ALL
    #   np.logical_or:  RIPTIDE NP.ANY
}
"""
The mapping of Numpy to Riptable reduce function overrides.

See Also
--------
REDUCE_FUNCTIONS encoding of all reduce functions that riptable supports
"""



[docs]
class TIMEWINDOW_FUNCTIONS(IntEnum):
    TIMEWINDOW_SUM = 0
    TIMEWINDOW_PROD = 1



# See GroupBy.h
# Official list of funcss

[docs]
class GB_FUNCTIONS(IntEnum):
    GB_SUM = 0
    GB_MEAN = 1
    GB_MIN = 2
    GB_MAX = 3

    # STD uses VAR with the param set to 1
    GB_VAR = 4
    GB_STD = 5

    GB_NANSUM = 50
    GB_NANMEAN = 51
    GB_NANMIN = 52
    GB_NANMAX = 53
    GB_NANVAR = 54
    GB_NANSTD = 55

    GB_FIRST = 100
    GB_NTH = 101
    GB_LAST = 102
    GB_MEDIAN = 103  # auto handles nan
    GB_MODE = 104  # auto handles nan
    GB_TRIMBR = 105  # auto handles nan
    GB_QUANTILE_MULT = 106  # handles all (nan)median/quantile versions

    # All int/uints output upgraded to INT64
    # Output is all elements (not just grouped)
    GB_ROLLING_SUM = 200
    GB_ROLLING_NANSUM = 201
    GB_ROLLING_DIFF = 202
    GB_ROLLING_SHIFT = 203
    GB_ROLLING_COUNT = 204
    GB_ROLLING_MEAN = 205
    GB_ROLLING_NANMEAN = 206
    GB_ROLLING_QUANTILE = 207

    # In ema.cpp
    GB_CUMSUM = 300
    GB_EMADECAY = 301
    GB_CUMPROD = 302
    GB_FINDNTH = 303
    GB_EMANORMAL = 304
    GB_EMAWEIGHTED = 305
    GB_CUMNANMAX = 306
    GB_CUMNANMIN = 307
    GB_CUMMAX = 308
    GB_CUMMIN = 309



# some groupby functions will work for strings
GB_STRING_ALLOWED = [
    GB_FUNCTIONS.GB_FIRST,
    GB_FUNCTIONS.GB_NTH,
    GB_FUNCTIONS.GB_LAST,
    GB_FUNCTIONS.GB_ROLLING_SHIFT,
    GB_FUNCTIONS.GB_MEDIAN,
]
GB_DATE_ALLOWED = [
    GB_FUNCTIONS.GB_FIRST,
    GB_FUNCTIONS.GB_NTH,
    GB_FUNCTIONS.GB_LAST,
    GB_FUNCTIONS.GB_MEAN,
    GB_FUNCTIONS.GB_MIN,
    GB_FUNCTIONS.GB_MAX,
    GB_FUNCTIONS.GB_NANMEAN,
    GB_FUNCTIONS.GB_NANMIN,
    GB_FUNCTIONS.GB_NANMAX,
    GB_FUNCTIONS.GB_MEDIAN,
    GB_FUNCTIONS.GB_QUANTILE_MULT,
    GB_FUNCTIONS.GB_MODE,
    GB_FUNCTIONS.GB_ROLLING_DIFF,
    GB_FUNCTIONS.GB_ROLLING_SHIFT,
]

GB_FUNC_COUNT = -1
GB_FUNC_USER = 900
GB_FUNC_NUMBA = 1000


######################################################
# Numba groupby enums
######################################################
class GB_PACKUNPACK(IntEnum):
    UNPACK = 0
    PACK = 1



[docs]
class NumpyCharTypes:
    All = "?bhilqpBHILQPefdgFDGSUVOMm"
    AllFloat = "efdgFDG"
    AllInteger = "bBhHiIlLqQpP"
    Computable = "fdgbBhHiIlLqQpP"  # does not include boolean or strings
    Noncomputable = "SeFDGUVOMm"
    Unsupported = "eFDGVOMm"  # unsupported in riptable world
    Supported = "?fdgbBhHiIlLqQpPSUV"
    SupportedFloat = "fdg"
    SupportedAlternate = "?fdgbBhHiIlLqQpPSU"
    Character = "c"
    Complex = "FDG"
    Datetime = "Mm"
    Float = "efdg"
    Float64 = "dg"
    Integer = "bhilqp"
    UnsignedInteger = "BHILQP"
    UnsignedInteger64 = "QP"
    SignedInteger64 = "qp"

    # linux gcc compiler long is int64, msvc long is int32
    if sys.platform != "win32":
        UnsignedInteger64 = "LQP"
        SignedInteger64 = "lqp"



gScalarType: Tuple[type, ...] = (
    int,
    float,
    complex,
    bool,
    bytes,
    str,
    memoryview,
    np.bool_,
    np.int8,
    np.uint8,
    np.int16,
    np.uint16,
    np.int32,
    np.uint32,
    np.int32,
    np.uint32,
    np.int64,
    np.uint64,
    np.float16,
    np.float32,
    np.float64,
    np.complex64,
    np.complex128,
    np.object_,
    np.bytes_,
    np.str_,
    np.void,
    np.datetime64,
    np.timedelta64,
)


gNumpyScalarType: Tuple[type, ...] = (
    memoryview,
    np.bool_,
    np.int8,
    np.uint8,
    np.int16,
    np.uint16,
    np.int32,
    np.uint32,
    np.int32,
    np.uint32,
    np.int64,
    np.uint64,
    np.float16,
    np.float32,
    np.float64,
    np.complex64,
    np.complex128,
    np.object_,
    np.bytes_,
    np.str_,
    np.void,
    np.datetime64,
    np.timedelta64,
)


def int_dtype_from_len(newlen: int) -> np.dtype:
    """Returns minimum itemsize unsigned integer dtype for given array length.
    Assumes that numbers up to the length will need to be stored by the returned dtype.
    Used by Grouping and Categorical.
    """
    if newlen < 100:
        dt = np.int8
    elif newlen < 30_000:
        dt = np.int16
    elif newlen < 2_000_000_000:
        dt = np.int32
    else:
        dt = np.int64
    return np.dtype(dt)


gAnsiColors: Mapping[str, str] = {
    "Black": "\x1b[0;30m",
    "BlinkBlack": "\x1b[5;30m",
    "BlinkBlue": "\x1b[5;34m",
    "BlinkCyan": "\x1b[5;36m",
    "BlinkGreen": "\x1b[5;32m",
    "BlinkLightGray": "\x1b[5;37m",
    "BlinkPurple": "\x1b[5;35m",
    "BlinkRed": "\x1b[5;31m",
    "BlinkYellow": "\x1b[5;33m",
    "Blue": "\x1b[0;34m",
    "Brown": "\x1b[0;33m",
    "Cyan": "\x1b[0;36m",
    "DarkGray": "\x1b[1;30m",
    "Green": "\x1b[0;32m",
    "LightBlue": "\x1b[1;34m",
    "LightCyan": "\x1b[1;36m",
    "LightGray": "\x1b[0;37m",
    "LightGreen": "\x1b[1;32m",
    "LightPurple": "\x1b[1;35m",
    "LightRed": "\x1b[1;31m",
    "NoColor": "",
    "Normal": "\x1b[0m",
    "Purple": "\x1b[0;35m",
    "Red": "\x1b[0;31m",
    "White": "\x1b[1;37m",
    "Yellow": "\x1b[1;33m",
}



[docs]
class DS_DISPLAY_TYPES(IntEnum):
    HTML = 1
    REPR = 2
    STR = 3




[docs]
class DATETIME_TYPES(IntEnum):
    ORDINAL_DATE = 1



DateTimeFormats: Mapping[str, str] = {"day": "%d-%b-%Y"}



[docs]
class DisplayDetectModes(IntEnum):
    Jupyter = 1
    Ipython = 2
    Console = 3
    HTML = 5




[docs]
class DisplayArrayTypes(IntEnum):
    Bool = 0
    Integer = 1
    Float = 2
    Bytes = 3
    Categorical = 4
    String = 5
    DateTime = 6
    DateTimeBase = 7
    DateTimeNano = 9
    TimeSpan = 10
    Record = 11




[docs]
class DisplayLength(IntEnum):
    Undefined = 0
    Short = 1
    Medium = 2
    Long = 3



class TimeFormat(IntEnum):
    Clock = 1
    YearMonthDay = 2
    SIGNano = 3



[docs]
class DisplayJustification(IntEnum):
    Undefined = 0
    Left = 1
    Right = 2
    Center = 3



class DisplayTextDecoration(IntEnum):
    Undefined = 0
    Bold = 1
    Italic = 2
    Underline = 3
    Strikethrough = 4


class DisplayNumberSeparator:
    NoSeparator = ""
    Comma = ","
    # Period      = "." #BUG, fix later
    Underscore = "_"



[docs]
class DisplayColumnColors(IntEnum):
    Default = 0  # no styling
    Rownum = 1  # row numbers / default header color
    Sort = 2  # regular sort header and column data
    Groupby = 3  # groupby header and column data
    Multiset_head_a = 4  # comparison color for multiset columns headers
    Multiset_head_b = 5  # comparison color for multiset columns headers
    Multiset_col_a = 6  # comparison color for multiset column data
    Multiset_col_b = 7  # comparison color for multiset column data
    Accum2t = 8
    Purple = 9
    Pink = 10
    Red = 11
    GrayItalic = 12
    DarkBlue = 13
    BGColor = 14
    FGColor = 15



class ColumnStyle:
    """
    Holds display styles for entire columns or individual cells.
    These styles will override the defaults from the _display_query_properties callback in FastArray
    See also: DisplayColumn, DisplayCell, ItemFormat

    properties:
    color      : DisplayColumnColors
    align      : DisplayJustification
    decoration : DisplayTextDecoration
    width      : DisplayLength (default) OR can be set to new max width for array item's string repr

    """

    def __init__(
        self,
        color=DisplayColumnColors.Default,
        align=DisplayJustification.Right,
        decoration=DisplayTextDecoration.Undefined,
        width=None,
    ):
        self.color = color
        self.align = align
        self.decoration = decoration
        self.width = width

    def _build_string(self):
        repr_str = []
        repr_str.append(f"     {self.__class__.__name__}")
        repr_str.append(f"     color: {DisplayColumnColors(self.color).name}")
        repr_str.append(f"     align: {DisplayJustification(self.align).name}")
        repr_str.append(f"decoration: {DisplayTextDecoration(self.decoration).name}")
        repr_str.append(f"     width: {self.width}")
        return "\n".join(repr_str)

    def __repr__(self):
        return self._build_string()

    def __str__(self):
        return self._build_string()


class DisplayColorMode(IntEnum):
    NoColors = 0
    Light = 1
    Dark = 2


class CategoryMode(IntEnum):
    Default = 0
    StringArray = 1
    IntEnum = 2
    Dictionary = 3
    NumericArray = 4
    MultiKey = 5


class CategoricalOrigin(IntEnum):
    CategoricalView = 0
    CategoricalCopy = 1
    StringList = 2
    StringListWithCategories = 3
    NumericList = 4
    IndexWithCategories = 5
    CodeMapping = 6
    Multikey = 7
    Matlab = 8
    Pandas = 9
    SDSFile = 10


class CategoryStringMode(IntEnum):
    Default = 0
    Bytes = 1
    Unicode = 2


class CategoricalConstructor(IntEnum):
    EmptyValues = 0
    IntegerValues = 1
    FloatValues = 2
    StringValues = 3
    MultikeyListValues = 4
    MultikeyDictValues = 5


class ApplyType(IntEnum):
    Invalid = 0
    ReduceDataset = 1
    ReduceList = 2
    Dataset = 3
    Arrays = 4


class CompressionMode(IntEnum):
    Compress = 0
    Decompress = 1
    CompressFile = 2
    DecompressFile = 3
    SharedMemory = 4
    Info = 5


class CompressionType(IntEnum):
    Uncompressed = 0
    ZStd = 1


class ColumnAttribute(IntEnum):
    Default = 0
    Left = 1
    Right = 2


class SDSFlag(IntEnum):
    OriginalContainer = 0x01
    Stackable = 0x02
    Scalar = 0x04
    Nested = 0x08
    Meta = 0x10


class SDSFileType(IntEnum):
    Unknown = 0
    Struct = 1
    Dataset = 2
    Table = 3
    Array = 4
    OneFile = 5  # new for one file


class DayOfWeek(IntEnum):
    Monday = 0
    Tuesday = 1
    Wednesday = 2
    Thursday = 3
    Friday = 4
    Saturday = 5
    Sunday = 6


# allowing / now for denest
INVALID_FILE_CHARS: Tuple[str, ...] = ("\\", ":", "<", ">", "!", "|", "*", "?")


gBasicStats: Mapping[str, str] = {
    "count": "Number of non-null observations",
    "sum": "Sum of values",
    "mean": "Mean of values",
    "mad": "Mean absolute deviation",  # median(abs(a - median(a)))
    "median": "Arithmetic median of values",
    "min": "Minimum",
    "max": "Maximum",
    "std": "Unbiased standard deviation",
    "var": "Unbiased variance",
    "nansum": "Sum of values",
    "nanmean": "Mean of values",
    "nanmad": "Mean absolute deviation",
    "nanmedian": "Arithmetic median of values",
    "nanmin": "Minimum",
    "nanmax": "Maximum",
    "nanstd": "Unbiased standard deviation",
    "nanvar": "Unbiased variance",
    "mode": "Mode",
    "abs": "Absolute Value",
    "prod": "Product of values",
    "sem": "Unbiased standard error of the mean",
    "skew": "Unbiased skewness (3rd moment)",
    "kurt": "Unbiased kurtosis (4th moment)",
    "quantile": "Sample quantile (value at %)",
    "cumsum": "Cumulative sum",
    "cumprod": "Cumulative product",
    "cummax": "Cumulative maximum",
    "cummin": "Cumulative minimum",
}


#####################################################################################
# Structs begin
#####################################################################################


# Used in display table for multi-line column headers
# color_group indexing starts at 0
# cell_span cannot be 0.  a cell_span of 1 indicates 1 cell wide.
ColHeader = namedtuple("ColHeader", ["col_name", "cell_span", "color_group"])


###################################
## TJD NOTE: Need to use strings instead of enum here
##################################
class TypeId(IntEnum):
    Default = 0
    Struct = 1
    Dataset = 2
    Multiset = 3
    GroupBy = 4
    Grouping = 5
    FastArray = 6
    MathLedger = 7
    Categorical = 8
    Categories = 9
    Accum2 = 10
    DisplayDetect = 11
    DisplayOptions = 12
    DisplayTable = 13
    SortCache = 14
    DateTimeBase = 15
    DateTimeNano = 17
    TimeSpan = 18
    TimeZone = 19
    Calendar = 20
    Date = 21
    DateSpan = 22
    PDataset = 23


######################################################
# SDS File Header order
######################################################
gSDSFileHeader: List[str] = [
    "SDSHeaderMagic",
    "VersionHigh",
    "VersionLow",
    "CompMode",
    "CompType",
    "CompLevel",
    # ----- offset 16 -----
    "NameBlockSize",
    "NameBlockOffset",
    "NameBlockCount",
    "FileType",  # struct, dataset
    "AuthorId",  # python, matlab
    # ----- offset 48 -----
    "MetaBlockSize",
    "MetaBlockOffset",
    # ----- offset 64 -----
    "TotalMetaCompressedSize",
    "TotalMetaUncompressedSize",
    # ----- offset 80 -----
    "ArrayBlockSize",
    "ArrayBlockOffset",
    # ----- offset 96 -----
    "ArraysWritten",
    "ArrayFirstOffset",
    # ----- offset 112 -----
    "TotalArrayCompressedSize",
    "TotalArrayUncompressedSize",
]
SDS_EXTENSION: str = ".sds"
SDS_EXTENSION_BYTES: bytes = b".sds"


# please keep the TypeRegister at the end of the file

[docs]
class TypeRegister:
    """
    When special classes are loaded, they register with this class to avoid cyclical dependencies
    """

    Struct: ClassVar[Type["Struct"]] = None
    Dataset: ClassVar[Type["Dataset"]] = None
    Multiset: ClassVar[Type["Multiset"]] = None
    GroupBy: ClassVar[Type["GroupBy"]] = None
    Grouping: ClassVar[Type["Grouping"]] = None
    FastArray: ClassVar[Type["FastArray"]] = None
    MathLedger: ClassVar[Type["MathLedger"]] = None
    Categorical: ClassVar[Type["Categorical"]] = None
    Categories: ClassVar[Type["Categories"]] = None
    Accum2: ClassVar[Type["Accum2"]] = None
    DisplayDetect: ClassVar[Type["DisplayDetect"]] = None
    DisplayOptions: ClassVar[Type["DisplayOptions"]] = None
    DisplayTable: ClassVar[Type["DisplayTable"]] = None
    DisplayString: ClassVar[Type["DisplayString"]] = None
    DisplayAttributes: ClassVar[Type["DisplayAttributes"]] = None
    DisplayText: ClassVar[Type["DisplayText"]] = None
    SortCache: ClassVar[Type["SortCache"]] = None
    DateTimeBase: ClassVar[Type["DateTimeBase"]] = None
    DateTimeNano: ClassVar[Type["DateTimeNano"]] = None
    TimeSpan: ClassVar[Type["TimeSpan"]] = None
    SharedMemory: ClassVar[Type["SharedMemory"]] = None
    TimeZone: ClassVar[Type["TimeZone"]] = None
    Calendar: ClassVar[Type["Calendar"]] = None
    DateBase: ClassVar[Type["DateBase"]] = None
    Date: ClassVar[Type["Date"]] = None
    DateSpan: ClassVar[Type["DateSpan"]] = None
    PDataset: ClassVar[Type["PDataset"]] = None


[docs]
    @classmethod
    def validate_registry(cls):
        missing = set()
        for _nm in dir(cls):
            if not _nm.startswith("_") and getattr(TypeRegister, _nm) is None:
                missing.add(_nm)
        if len(missing) > 0:
            msg = ", ".join(sorted(missing))
            raise RuntimeError(f"riptable: Improper initialization!  Missing: {msg}")



[docs]
    @classmethod
    def is_computable(cls, other):
        if not (other.dtype.char in NumpyCharTypes.Noncomputable or isinstance(other, cls.Categorical)):
            return True
        return False



[docs]
    @classmethod
    def is_array_subclass(cls, arr):
        """
        Certain routines can be sped up by skipping the logic before falling back on a numpy call.
        Note: this is different than using python's issubclass(), which returns True if the classes are the same.
        Returns True if the item is an instance of a FastArray or numpy array subclass.
        """
        if isinstance(arr, np.ndarray):
            if type(arr) == np.ndarray or type(arr) == cls.FastArray:
                return False
            return True
        else:
            return False



[docs]
    @classmethod
    def is_binned_array(cls, arr):
        """
        Use this instead of checking isinstance(item, TypeRegister.Categorical). For other binned
        types in the future.

        Called by:
        Dataset.melt()             -re-expands
        Dataset.from_jagged_rows() -re-expands
        GroupBy.__init__           -calls grouping, gb_keychain properties to borrow bins

        """
        return isinstance(arr, cls.Categorical)



[docs]
    @classmethod
    def is_binned_type(cls, arrtype):
        """
        Check the type rather than the instance.
        See also is_binned_array()

        Called by:
        rt_utils._multistack_items()
        """
        return arrtype == cls.Categorical


    # ---------------------------------------------------------------

[docs]
    @classmethod
    def is_spanlike(cls, arr: np.ndarray):
        """return True if it is a datespan or timespan"""
        # TODO: datetime/span are computable sometimes... need a way to distinguish from other FA subclasses
        # simple math works, but not larger groupby operations like sum
        result = False
        if isinstance(arr, (TypeRegister.TimeSpan, TypeRegister.DateSpan)):
            result = True
        return result


    # ---------------------------------------------------------------

[docs]
    @classmethod
    def is_datelike(cls, arr: np.ndarray):
        """return True if it is a date or time"""
        # TODO: datetime/span are computable sometimes... need a way to distinguish from other FA subclasses
        # simple math works, but not larger groupby operations like sum
        result = False
        if isinstance(
            arr, (TypeRegister.DateTimeNano, TypeRegister.TimeSpan, TypeRegister.Date, TypeRegister.DateSpan)
        ):
            result = True
        return result



[docs]
    @classmethod
    def is_string_or_object(cls, arr):
        return cls.is_array_subclass(arr) or arr.dtype.char in "OSU"



[docs]
    @classmethod
    def newclassfrominstance(cls, instance, origin):
        """After slicing or an array routine, return a new instance of a FastArray subclass.
        If the array was not a subclass, instance is unchanged.

        Parameters
        ----------
        instance : ndarray
            Array generated from operation.
        origin : ndarray
            Array, possibly a FastArray subclass.

        Returns
        -------
        instance : ndarray
            Array of the same class as origin if the origin class has a newclassfrominstance defined.
        """
        # FastArray subclasses should define this classmethod to return
        # a new object with a different instance array
        if hasattr(origin, "newclassfrominstance"):
            instance = origin.newclassfrominstance(instance, origin)
        return instance


    # put these here for now
    # where should they live?

[docs]
    @classmethod
    def as_meta_data(cls, obj):
        pass



[docs]
    @classmethod
    def from_meta_data(cls, itemdict: Optional[dict] = None, flags: Optional[list] = None, meta: str = ""):
        from .Utils.rt_metadata import MetaData

        if itemdict is None:
            itemdict = dict()
        if flags is None:
            flags = list()
        meta = MetaData(meta)
        iclass = meta.itemclass
        return iclass._from_meta_data(itemdict, flags, meta)