__all__ = [
"DateTimeBase",
"DateTimeNano",
"TimeSpan",
"Date",
"DateSpan",
"DateTimeUTC",
"DateTimeNanoScalar",
"TimeSpanScalar",
"DateScalar",
"DateSpanScalar",
"parse_epoch",
"timestring_to_nano",
"datestring_to_nano",
"datetimestring_to_nano",
"strptime_to_nano",
]
from datetime import date
from datetime import datetime as dt
from datetime import timezone
import math
import time
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
import warnings
import numpy as np
import riptide_cpp as rc
from dateutil import tz
from .rt_categorical import Categorical
from .rt_enum import (
INVALID_DICT,
MATH_OPERATION,
DayOfWeek,
DisplayArrayTypes,
DisplayJustification,
DisplayLength,
DisplayTextDecoration,
NumpyCharTypes,
SDSFlag,
TimeFormat,
TypeId,
TypeRegister,
)
from .rt_fastarray import FastArray
from .rt_hstack import hstack_any
from .rt_numpy import (
arange,
empty,
full,
hstack,
isnan,
mask_andi,
mask_ori,
mask_xori,
putmask,
searchsorted,
sum,
zeros,
)
from .Utils.rt_display_properties import ItemFormat
from .Utils.rt_metadata import META_VERSION, MetaData, meta_from_version
if TYPE_CHECKING:
# pyarrow is an optional dependency.
try:
import pyarrow as pa
except ImportError:
pass
NANOS_PER_MICROSECOND = 1_000
NANOS_PER_MILLISECOND = 1_000_000
NANOS_PER_SECOND = 1_000_000_000
NANOS_PER_MINUTE = NANOS_PER_SECOND * 60
NANOS_PER_HOUR = NANOS_PER_MINUTE * 60
NANOS_PER_DAY = NANOS_PER_HOUR * 24
NANOS_PER_YEAR = NANOS_PER_DAY * 365
NANOS_PER_LEAPYEAR = NANOS_PER_DAY * 366
NANOS_AT_2000 = (NANOS_PER_YEAR * 30) + (NANOS_PER_DAY * 7)
SECONDS_PER_DAY = 60 * 60 * 24
DAYS_PER_YEAR = 365
DAYS_PER_LEAPYEAR = 366
DAYS_AT_2000 = (DAYS_PER_YEAR * 30) + 7
UTC_1970_DAY_SPLITS = FastArray(
[
0, # 1970
DAYS_PER_YEAR,
2 * DAYS_PER_YEAR,
(3 * DAYS_PER_YEAR) + (1),
(4 * DAYS_PER_YEAR) + (1),
(5 * DAYS_PER_YEAR) + (1),
(6 * DAYS_PER_YEAR) + (1),
(7 * DAYS_PER_YEAR) + (2),
(8 * DAYS_PER_YEAR) + (2),
(9 * DAYS_PER_YEAR) + (2),
(10 * DAYS_PER_YEAR) + (2), # 1980
(11 * DAYS_PER_YEAR) + (3),
(12 * DAYS_PER_YEAR) + (3),
(13 * DAYS_PER_YEAR) + (3),
(14 * DAYS_PER_YEAR) + (3),
(15 * DAYS_PER_YEAR) + (4),
(16 * DAYS_PER_YEAR) + (4),
(17 * DAYS_PER_YEAR) + (4),
(18 * DAYS_PER_YEAR) + (4),
(19 * DAYS_PER_YEAR) + (5),
(20 * DAYS_PER_YEAR) + (5), # 1990
(21 * DAYS_PER_YEAR) + (5),
(22 * DAYS_PER_YEAR) + (5),
(23 * DAYS_PER_YEAR) + (6),
(24 * DAYS_PER_YEAR) + (6),
(25 * DAYS_PER_YEAR) + (6),
(26 * DAYS_PER_YEAR) + (6),
(27 * DAYS_PER_YEAR) + (7),
(28 * DAYS_PER_YEAR) + (7),
(29 * DAYS_PER_YEAR) + (7),
DAYS_AT_2000, # 2000
DAYS_AT_2000 + DAYS_PER_LEAPYEAR,
DAYS_AT_2000 + (2 * DAYS_PER_YEAR) + (1),
DAYS_AT_2000 + (3 * DAYS_PER_YEAR) + (1),
DAYS_AT_2000 + (4 * DAYS_PER_YEAR) + (1),
DAYS_AT_2000 + (5 * DAYS_PER_YEAR) + (2),
DAYS_AT_2000 + (6 * DAYS_PER_YEAR) + (2),
DAYS_AT_2000 + (7 * DAYS_PER_YEAR) + (2),
DAYS_AT_2000 + (8 * DAYS_PER_YEAR) + (2),
DAYS_AT_2000 + (9 * DAYS_PER_YEAR) + (3),
DAYS_AT_2000 + (10 * DAYS_PER_YEAR) + (3), # 2010
DAYS_AT_2000 + (11 * DAYS_PER_YEAR) + (3),
DAYS_AT_2000 + (12 * DAYS_PER_YEAR) + (3),
DAYS_AT_2000 + (13 * DAYS_PER_YEAR) + (4),
DAYS_AT_2000 + (14 * DAYS_PER_YEAR) + (4),
DAYS_AT_2000 + (15 * DAYS_PER_YEAR) + (4),
DAYS_AT_2000 + (16 * DAYS_PER_YEAR) + (4),
DAYS_AT_2000 + (17 * DAYS_PER_YEAR) + (5),
DAYS_AT_2000 + (18 * DAYS_PER_YEAR) + (5),
DAYS_AT_2000 + (19 * DAYS_PER_YEAR) + (5),
DAYS_AT_2000 + (20 * DAYS_PER_YEAR) + (5), # 2020
DAYS_AT_2000 + (21 * DAYS_PER_YEAR) + (6),
DAYS_AT_2000 + (22 * DAYS_PER_YEAR) + (6),
DAYS_AT_2000 + (23 * DAYS_PER_YEAR) + (6),
DAYS_AT_2000 + (24 * DAYS_PER_YEAR) + (6),
DAYS_AT_2000 + (25 * DAYS_PER_YEAR) + (7),
DAYS_AT_2000 + (26 * DAYS_PER_YEAR) + (7),
DAYS_AT_2000 + (27 * DAYS_PER_YEAR) + (7),
DAYS_AT_2000 + (28 * DAYS_PER_YEAR) + (7),
DAYS_AT_2000 + (29 * DAYS_PER_YEAR) + (8),
DAYS_AT_2000 + (30 * DAYS_PER_YEAR) + (8), # 2030
DAYS_AT_2000 + (31 * DAYS_PER_YEAR) + (8),
DAYS_AT_2000 + (32 * DAYS_PER_YEAR) + (8),
DAYS_AT_2000 + (33 * DAYS_PER_YEAR) + (9),
DAYS_AT_2000 + (34 * DAYS_PER_YEAR) + (9),
DAYS_AT_2000 + (35 * DAYS_PER_YEAR) + (9),
DAYS_AT_2000 + (36 * DAYS_PER_YEAR) + (9),
DAYS_AT_2000 + (37 * DAYS_PER_YEAR) + (10),
DAYS_AT_2000 + (38 * DAYS_PER_YEAR) + (10),
DAYS_AT_2000 + (39 * DAYS_PER_YEAR) + (10),
DAYS_AT_2000 + (40 * DAYS_PER_YEAR) + (10), # 2040
DAYS_AT_2000 + (41 * DAYS_PER_YEAR) + (11),
DAYS_AT_2000 + (42 * DAYS_PER_YEAR) + (11),
DAYS_AT_2000 + (43 * DAYS_PER_YEAR) + (11),
DAYS_AT_2000 + (44 * DAYS_PER_YEAR) + (11),
DAYS_AT_2000 + (45 * DAYS_PER_YEAR) + (12),
DAYS_AT_2000 + (46 * DAYS_PER_YEAR) + (12),
DAYS_AT_2000 + (47 * DAYS_PER_YEAR) + (12),
DAYS_AT_2000 + (48 * DAYS_PER_YEAR) + (12),
DAYS_AT_2000 + (49 * DAYS_PER_YEAR) + (13),
DAYS_AT_2000 + (50 * DAYS_PER_YEAR) + (13), # 2050
DAYS_AT_2000 + (51 * DAYS_PER_YEAR) + (13),
DAYS_AT_2000 + (52 * DAYS_PER_YEAR) + (13),
DAYS_AT_2000 + (53 * DAYS_PER_YEAR) + (14),
DAYS_AT_2000 + (54 * DAYS_PER_YEAR) + (14),
DAYS_AT_2000 + (55 * DAYS_PER_YEAR) + (14),
DAYS_AT_2000 + (56 * DAYS_PER_YEAR) + (14),
DAYS_AT_2000 + (57 * DAYS_PER_YEAR) + (15),
DAYS_AT_2000 + (58 * DAYS_PER_YEAR) + (15),
DAYS_AT_2000 + (59 * DAYS_PER_YEAR) + (15),
DAYS_AT_2000 + (60 * DAYS_PER_YEAR) + (15), # 2060
DAYS_AT_2000 + (61 * DAYS_PER_YEAR) + (16),
DAYS_AT_2000 + (62 * DAYS_PER_YEAR) + (16),
DAYS_AT_2000 + (63 * DAYS_PER_YEAR) + (16),
DAYS_AT_2000 + (64 * DAYS_PER_YEAR) + (16),
DAYS_AT_2000 + (65 * DAYS_PER_YEAR) + (17),
DAYS_AT_2000 + (66 * DAYS_PER_YEAR) + (17),
DAYS_AT_2000 + (67 * DAYS_PER_YEAR) + (17),
DAYS_AT_2000 + (68 * DAYS_PER_YEAR) + (17),
DAYS_AT_2000 + (69 * DAYS_PER_YEAR) + (18),
DAYS_AT_2000 + (70 * DAYS_PER_YEAR) + (18), # 2070
DAYS_AT_2000 + (71 * DAYS_PER_YEAR) + (18),
DAYS_AT_2000 + (72 * DAYS_PER_YEAR) + (18),
DAYS_AT_2000 + (73 * DAYS_PER_YEAR) + (19),
DAYS_AT_2000 + (74 * DAYS_PER_YEAR) + (19),
DAYS_AT_2000 + (75 * DAYS_PER_YEAR) + (19),
DAYS_AT_2000 + (76 * DAYS_PER_YEAR) + (19),
DAYS_AT_2000 + (77 * DAYS_PER_YEAR) + (20),
DAYS_AT_2000 + (78 * DAYS_PER_YEAR) + (20),
DAYS_AT_2000 + (79 * DAYS_PER_YEAR) + (20),
DAYS_AT_2000 + (80 * DAYS_PER_YEAR) + (20), # 2080
DAYS_AT_2000 + (81 * DAYS_PER_YEAR) + (21),
DAYS_AT_2000 + (82 * DAYS_PER_YEAR) + (21),
DAYS_AT_2000 + (83 * DAYS_PER_YEAR) + (21),
DAYS_AT_2000 + (84 * DAYS_PER_YEAR) + (21),
DAYS_AT_2000 + (85 * DAYS_PER_YEAR) + (22),
DAYS_AT_2000 + (86 * DAYS_PER_YEAR) + (22),
DAYS_AT_2000 + (87 * DAYS_PER_YEAR) + (22),
DAYS_AT_2000 + (88 * DAYS_PER_YEAR) + (22),
DAYS_AT_2000 + (89 * DAYS_PER_YEAR) + (23),
DAYS_AT_2000 + (90 * DAYS_PER_YEAR) + (23), # 2090
DAYS_AT_2000 + (91 * DAYS_PER_YEAR) + (23),
DAYS_AT_2000 + (92 * DAYS_PER_YEAR) + (23),
DAYS_AT_2000 + (93 * DAYS_PER_YEAR) + (24),
DAYS_AT_2000 + (94 * DAYS_PER_YEAR) + (24),
DAYS_AT_2000 + (95 * DAYS_PER_YEAR) + (24),
DAYS_AT_2000 + (96 * DAYS_PER_YEAR) + (24),
DAYS_AT_2000 + (97 * DAYS_PER_YEAR) + (25),
DAYS_AT_2000 + (98 * DAYS_PER_YEAR) + (25),
DAYS_AT_2000 + (99 * DAYS_PER_YEAR) + (25),
]
)
# UTC @ midnight, years 1970 - 2099
UTC_1970_SPLITS = UTC_1970_DAY_SPLITS * NANOS_PER_DAY
MATLAB_EPOCH_DATENUM = 719529
EPOCH_DAY_OF_WEEK = DayOfWeek.Thursday
YDAY_SPLITS = FastArray([0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334])
YDAY_SPLITS_LEAP = FastArray([0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335])
MONTH_STR_ARRAY = FastArray(["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"])
# need to hard code the nano cutoffs because FastArray can't do math yet
UTC_YDAY_SPLITS = FastArray(
[
(NANOS_PER_DAY * 0),
(NANOS_PER_DAY * 31),
(NANOS_PER_DAY * 59),
(NANOS_PER_DAY * 90),
(NANOS_PER_DAY * 120),
(NANOS_PER_DAY * 151),
(NANOS_PER_DAY * 181),
(NANOS_PER_DAY * 212),
(NANOS_PER_DAY * 243),
(NANOS_PER_DAY * 273),
(NANOS_PER_DAY * 304),
(NANOS_PER_DAY * 334),
]
)
UTC_YDAY_SPLITS_LEAP = FastArray(
[
(NANOS_PER_DAY * 0),
(NANOS_PER_DAY * 31),
(NANOS_PER_DAY * 60),
(NANOS_PER_DAY * 91),
(NANOS_PER_DAY * 121),
(NANOS_PER_DAY * 152),
(NANOS_PER_DAY * 182),
(NANOS_PER_DAY * 213),
(NANOS_PER_DAY * 244),
(NANOS_PER_DAY * 274),
(NANOS_PER_DAY * 305),
(NANOS_PER_DAY * 335),
]
)
TIME_FORMATS = {
1: "%Y%m%d", # ordinal date
2: "%#H:%M %p", # ms from midnight
3: "%Y%m%d %H:%M:%S",
4: "%H:%M:%S",
5: "%H:%M",
}
# ------------------------------------------------------------------------------------
[docs]
def strptime_to_nano(dtstrings, format, from_tz=None, to_tz="NYC"):
"""
Converts datetime string to DateTimeNano object with user-specified format.
Parameters
----------
dtstrings : array of timestrings
format : timestring format
Currently supports the following escape codes:
**Date**
* ``%y`` Year without century as zero-padded decimal number.
* ``%Y`` Year with century as decimal number.
* ``%m`` Month as a decimal number (with or without zero-padding).
* ``%B`` Full month name: ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
* ``%b`` Abbreviated month name: ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
* ``%d`` Day of the month as a decimal number (with or without zero-padding).
**Time**
* ``%H`` Hour (24-hour clock) as a decimal number (with or without zero-padding). (Note: if a ``%p`` formatter is present, this will be interpretted as a 12-hour clock hour)
* ``%I`` Hour (12-hour clock) as a decimal number (with or without zero-padding). (Note: unlike ``%H``, must be 1-12)
* ``%p`` Locale’s equivalent of either AM or PM.
* ``%M`` Minute as a decimal number (with or without zero-padding).
* ``%S`` Second as a decimal number (with or without zero-padding).
from_tz : str
The timezone of origin: 'NYC', 'GMT', 'DUBLIN', etc.
to_tz : str
The timezone that the time will be displayed in.
Notes
-----
Works best with timestrings that include a date:
* If no year is present in the string, an invalid time will be returned for all values.
* If no form of year/month/day is present, values will yield a time in 1970.
Consider using `timestring_to_nano()`, which also will accept one datestring for all times.
If the timestring ends in a '.', the following numbers will be parsed as a second fraction. This happens
automatically, no escape character is required in the format string.
If no time escape characters are present, will return midnight at all date values.
If formatted correctly, consider using `datestring_to_nano()`.
Examples
--------
Date, with/without padding:
>>> dt = FastArray(['02/01/1992', '2/1/1992'])
>>> fmt = '%m/%d/%Y'
>>> strptime_to_nano(dt, fmt, from_tz='NYC')
DateTimeNano([19920201 00:00:00.000000000, 19920201 00:00:00.000000000])
Date + 24-hour clock:
>>> dt = FastArray(['02/01/1992 7:48:30', '2/1/1992 19:48:30'])
>>> fmt = '%m/%d/%Y %H:%M:%S'
>>> strptime_to_nano(dt, fmt, from_tz='NYC')
DateTimeNano([19920201 07:48:30.000000000, 19920201 19:48:30.000000000])
Date + 12-hour clock + am/pm:
>>> dt = FastArray(['02/01/1992 7:48:30 AM', '2/1/1992 7:48:30 PM'])
>>> fmt = '%m/%d/%Y %I:%M:%S %p'
>>> strptime_to_nano(dt, fmt, from_tz='NYC')
DateTimeNano([19920201 07:48:30.000000000, 19920201 19:48:30.000000000])
Date + time + second fraction:
>>> dt = FastArray(['02/01/1992 7:48:30.123456789', '2/1/1992 15:48:30.000000006'])
>>> fmt = '%m/%d/%Y %H:%M:%S'
>>> strptime_to_nano(dt, fmt, from_tz='NYC')
DateTimeNano([19920201 07:48:30.123456789, 19920201 15:48:30.000000006])
"""
if isinstance(format, str):
format = format.encode()
nano_times = rc.StrptimeToNanos(dtstrings, format)
return DateTimeNano(nano_times, from_tz=from_tz, to_tz=to_tz)
# ------------------------------------------------------------------------------------
def _possibly_convert_cat(arr):
"""
When a cateorical is passed into DateTime functions, we extract the unique categories
and then re-expand at the end
Returns
-------
samearry, None: if not a categorical
uniques, cat: if a categorical
"""
if isinstance(arr, TypeRegister.Categorical):
return arr.category_array, arr
return arr, None
# ------------------------------------------------------------------------------------
[docs]
def datetimestring_to_nano(dtstring, from_tz=None, to_tz="NYC"):
"""
Converts datetime string to DateTimeNano object.
By default, the timestrings are assumed to be in Eastern Time. If they are already in UTC time, set gmt=True.
Parameters
----------
dtstring : array of timestrings in format YYYY-MM-DD HH:MM:SS, YYYYMMDD HH:MM:SS.ffffff, etc. (bytestrings/unicode supported)
from_tz : a string for the timezone of origin: 'NYC', 'GMT', 'DUBLIN', etc.
to_tz : a string for the timezone that the time will be displayed in
returns DateTimeNano
See Also: timestring_to_nano(), datestring_to_nano()
Examples
--------
>>> dts = FA(['2012-12-12 12:34:56.001002', '20130303 1:14:15', '2008-07-06 15:14:13'])
>>> datetimestring_to_nano(dts, from_tz='NYC')
DateTimeNano([20121212 12:34:56.001002000, 20130303 01:14:15.000000000, 20080706 15:14:13.000000000])
"""
nano_times = rc.DateTimeStringToNanos(dtstring)
return DateTimeNano(nano_times, from_tz=from_tz, to_tz=to_tz)
# ------------------------------------------------------------------------------------
[docs]
def datestring_to_nano(datestring, time=None, from_tz=None, to_tz="NYC"):
"""
Converts date string to DateTimeNano object (default midnight).
By default, the timestrings are assumed to be in Eastern Time. If they are already in UTC time, set gmt=True.
Parameters
----------
datestring : array of datestrings in format YYYY-MM-DD or YYYYMMDD (bytestrings/unicode supported)
time : a single string or array of strings in the format HH:MM:SS.ffffff (bytestrings/unicode supported)
from_tz : a string for the timezone of origin: 'NYC', 'GMT', 'DUBLIN', etc.
to_tz : a string for the timezone that the time will be displayed in
returns DateTimenano
See Also: timestring_to_nano(), datetimestring_to_nano()
Examples
--------
Date only:
>>> dates = FA(['2018-01-01', '2018-01-02', '2018-01-03'])
>>> datestring_to_nano(dates, from_tz='NYC')
DateTimeNano([20180101 00:00:00.000000000, 20180102 00:00:00.000000000, 20180103 00:00:00.000000000])
With time:
>>> dates = FA(['2018-01-01', '2018-01-02', '2018-01-03'])
>>> datestring_to_nano(dates, time='9:30:00', from_tz='NYC')
DateTimeNano([20180101 09:30:00.000000000, 20180102 09:30:00.000000000, 20180103 09:30:00.000000000])
"""
nano_dates = rc.DateStringToNanos(datestring)
if time is None:
result = nano_dates
else:
if isinstance(time, (str, bytes)):
time = TypeRegister.FastArray([time])
time = rc.TimeStringToNanos(time)
result = nano_dates + time
result = DateTimeNano(result, from_tz=from_tz, to_tz=to_tz)
return result
# ------------------------------------------------------------------------------------
[docs]
def timestring_to_nano(timestring, date=None, from_tz=None, to_tz="NYC"):
"""
Converts timestring to TimeSpan or DateTimeNano object.
By default, the timestrings are assumed to be in Eastern Time. If they are already in UTC time, set gmt=True.
If a date is specified, a DateTimeNano object will be returned.
If a date is not specified, a TimeSpan will be returned.
Parameters
----------
timestring : array of timestrings in format HH:MM:SS, H:MM:SS, HH:MM:SS.ffffff (bytestrings/unicode supported)
date : a single string or array of date strings in format YYYY-MM-DD (bytestrings/unicode supported)
from_tz : a string for the timezone of origin: 'NYC', 'GMT', 'DUBLIN', etc.
to_tz : a string for the timezone that the time will be displayed in
returns TimeSpan or DateTimeNano
See Also: datestring_to_nano(), datetimestring_to_nano()
Examples
--------
Return TimeSpan:
>>> ts = FA(['1:23:45', '12:34:56.000100', ' 14:00:00'])
>>> timestring_to_nano(ts, from_tz='NYC')
TimeSpan([01:23:45.000000000, 12:34:56.000100000, 14:00:00.000000000])
With single date string:
>>> ts = FA(['1:23:45', '12:34:56', '23:22:21'])
>>> timestring_to_nano(ts, date='2018-02-01', from_tz='NYC')
DateTimeNano([20180201 01:23:45.000000000, 20180201 12:34:56.000000000, 20180201 23:22:21.000000000])
Multiple date strings:
>>> ts = FA(['1:23:45', '12:34:56', '23:22:21'])
>>> dts = FA(['2018-02-01', '2018-02-07', '2018-05-12'])
>>> timestring_to_nano(ts, date=dts, from_tz='NYC')
DateTimeNano([20180201 01:23:45.000000000, 20180207 12:34:56.000000000, 20180512 23:22:21.000000000])
"""
nano_times = rc.TimeStringToNanos(timestring)
if date is None:
result = TimeSpan(nano_times)
else:
if isinstance(date, (str, bytes)):
date = TypeRegister.FastArray([date])
date = rc.DateStringToNanos(date)
result = date + nano_times
result = DateTimeNano(result, from_tz=from_tz, to_tz=to_tz)
return result
# ===========================================================================================
[docs]
def parse_epoch(etime, to_tz="NYC"):
"""Days since epoch and milliseconds since midnight from nanosecond timestamps.
Parameters
----------
etime : array-like
UTC nanoseconds.
to_tz : str, default 'NYC'
TimeZone short string - see riptable.rt_timezone.
This routine didn't used to take a timezone, so it defaults to the previous setting.
Used in the phonyx data loader.
Returns
-------
days : array (int32)
Days since epoch.
millis : array (float64)
Milliseconds since midnight.
"""
dtn = DateTimeNano(etime, from_tz="UTC", to_tz=to_tz)
return dtn.days_since_epoch, dtn.millis_since_midnight()
# ------------------------------------------------------------
def _apply_inv_mask(arr1, arr2, fillval=None, arr1_inv_mask=None, arr2_inv_mask=None):
"""Preserve NaN date and time values in the final result of date/time class operations.
Called by time fraction properties and math operations.
"""
if isinstance(arr1, np.ndarray):
if len(arr1) == 1:
# broadcast array of 1 path
if arr1[0] <= 0:
return TypeRegister.FastArray([INVALID_DICT[arr2.dtype.num]])
return arr2
else:
if arr1_inv_mask is None:
arr1_inv_mask = arr1.isnan()
if fillval is None:
# use the sentinel or nan for the return array type, e.g. year() returns int32
fillval = INVALID_DICT[arr2.dtype.num]
putmask(arr2, arr1_inv_mask, fillval)
# apply the invalid mask from an operation with another array
if arr2_inv_mask is not None:
# return invalid fill, fixes broadcasting if math operations
# was with a scalar or single item array
if np.isscalar(arr2_inv_mask):
if arr2_inv_mask:
arr2[:] = fillval
elif len(arr2_inv_mask) == 1:
if arr2_inv_mask[0]:
arr2[:] = fillval
else:
putmask(arr2, arr2_inv_mask, fillval)
return arr2
else:
# scalar path
if arr1 <= 0:
return INVALID_DICT[arr2.dtype.num]
return arr2
# ========================================================
[docs]
class DateTimeBase(FastArray):
"""Base class for DateTimeNano and TimeSpan.
Both of these subclasses have times with nanosecond precision.
"""
DEFAULT_FORMATTER = time.strftime
PRECISION = 9
NAN_TIME = 0
# ------------------------------------------------------------
def __new__(cls, values):
instance = np.asarray(values).view(cls)
instance._display_length = DisplayLength.Long
return instance
# ------------------------------------------------------------
[docs]
def __array_finalize__(self, obj):
"""Finalizes self from other, called as part of ndarray.__new__()"""
super().__array_finalize__(obj)
if obj is None:
return
from_peer = isinstance(obj, DateTimeBase)
self._display_length = obj._display_length if from_peer else DisplayLength.Long
self._timezone = obj._timezone if from_peer else TypeRegister.TimeZone(from_tz="UTC", to_tz="UTC")
# ------------------------------------------------------------
@property
def _fa(self):
return self.view(FastArray)
# ------------------------------------------------------------
@property
def display_length(self):
if not hasattr(self, "_display_length"):
self._display_length = DisplayLength.Long
return self._display_length
# ------------------------------------------------------------
[docs]
def get_classname(self):
return __class__.__name__
# ------------------------------------------------------------
[docs]
def display_item(self, utcnano):
raise NotImplementedError(f"DateTimeBase subclasses need to override this method.")
# ------------------------------------------------------------
# ------------------------------------------------------------
# ------------------------------------------------------------
# ------------------------------------------------------------
[docs]
def _build_string(self):
def qwrap(timestring):
return "".join(["'", timestring, "'"])
_slicesize = int(np.floor(DateTimeBase.MAX_DISPLAY_LEN / 2))
_asize = len(self)
# DFUNC = self.display_item
fmt, DFUNC = self.display_query_properties()
# print with break
if _asize > DateTimeBase.MAX_DISPLAY_LEN:
left_idx = self.view(FastArray)[:_slicesize]
right_idx = self.view(FastArray)[-_slicesize:]
left_strings = [qwrap(DFUNC(i, fmt)) for i in left_idx]
break_string = ["..."]
right_strings = [qwrap(DFUNC(i, fmt)) for i in right_idx]
all_strings = left_strings + break_string + right_strings
# print full
else:
all_strings = [qwrap(DFUNC(i, fmt)) for i in self]
result = ", ".join(all_strings)
return result
# ------------------------------------------------------------
[docs]
@staticmethod
def _add_nano_ext(utcnano, timestr):
precision = DateTimeBase.PRECISION
if precision > 0:
if precision > 9:
precision = 9
power = 10**precision
nanos = int(utcnano % power)
nanostr = str(nanos).zfill(precision)
timestr = timestr + "." + nanostr
return timestr
# ------------------------------------------------------------
[docs]
def __str__(self):
return self._build_string()
# ------------------------------------------------------------
[docs]
def __repr__(self):
return self.get_classname() + "([" + self._build_string() + "])"
# ------------------------------------------------------------
[docs]
def __getitem__(self, fld):
result = self._fa.__getitem__(fld)
if isinstance(result, FastArray):
# possible fix for strides bug
# if result.strides[0] != result.itemsize:
# result = result.copy()
result = self.newclassfrominstance(result, self)
if np.isscalar(result):
return self.get_scalar(result)
return result
# -------------------------------------------------------------
[docs]
def _math_error_string(self, value, operator, reverse=False):
if reverse:
a = value
b = self
else:
a = self
b = value
return f"unsupported operand type(s) for {operator}: {type(a).__name__} {type(b).__name__}"
# ------------------------------------------------------------
[docs]
def _funnel_mathops(self, funcname, value):
"""
Wrapper for all math operations on Date and DateSpan.
Both subclasses need to take over:
_check_mathops_nano()
_check_mathops()
maybe... still testing
_build_mathops_result()
Easier to catch forbidden operations here.
"""
# if funcname in self.forbidden_mathops:
# raise TypeError(f'Cannot perform {funcname} on {self.__class__.__name__} object.')
inv_mask = self.isnan()
other_inv_mask = None
return_type = None
caller = self._fa
# check if operand has nano precision, set invalid, return type accordingly
value, other_inv_mask, return_type, caller = self._check_mathops_base(
funcname, value, other_inv_mask, return_type, caller
)
# perform main math operation on fast array
func = getattr(caller, funcname)
result = func(value)
# set return type, preserve invalids for non-nano operands
if return_type is None:
return_type, other_inv_mask = self._check_mathops(funcname, value)
# if return type is still None, returning invalid fill
if return_type is None:
return other_inv_mask
# apply invalid mask(s) and wrap result in final return type
result = self._build_mathops_result(value, result, inv_mask, other_inv_mask, return_type)
return result
# ------------------------------------------------------------
[docs]
def copy(self, order="K"):
instance = self._fa.copy(order=order)
return self.newclassfrominstance(instance, self)
# ========================================================
class TimeStampBase:
"""Parent class for DateTimeNano and Date."""
def __init__(self):
pass
# ------------------------------------------------------------
def _year(self, arr, fix_dst=False):
"""
Parameters
----------
arr : array
Underlying FastArray or result of previous timezone fixup.
fix_dst : bool, default False
If True, adjust array's stored times to match display. (DateTimeNano only)
Returns
-------
int32 FastArray of the year. For example 2003 is the integer 2003.
"""
if fix_dst:
arr = self._timezone.fix_dst(arr)
result = self._year_splits.searchsorted(arr, side="right").astype(np.int32, copy=False) + 1969
return result
# ------------------------------------------------------------
def _month(self, arr=None, fix_dst=False):
"""
Internal year to avoid performing the daylight savings fixup multiple times.
"""
if arr is None:
if fix_dst:
arr = self._timezone.fix_dst(self._fa)
fix_dst = False
else:
arr = self._fa
year = self._year(arr, fix_dst=fix_dst)
startyear = arr - self._year_splits[year - 1970]
maskleap = (year % 4) == 0
# get the months for non-leaps
smonth = self._yearday_splits.searchsorted(startyear, side="right")
# get the months for leap and fix any leapyears with maskleap
putmask(smonth, maskleap, self._yearday_splits_leap.searchsorted(startyear, side="right"))
return smonth.astype(np.int32, copy=False).view(FastArray)
# ------------------------------------------------------------
def _preserve_invalid_comparison(self, caller, other, funcname):
"""Date and DateTimeNano have multiple values for nan (0 and integer sentinel).
Both of their compare checks need to preserve nans in the result the same way.
"""
func = getattr(caller, funcname)
result = func(other)
if funcname == "__ne__":
result += self.isnan()
else:
result *= self.isnotnan()
return result
# ========================================================
class DateBase(FastArray):
"""Parent class for Date and Datespan.
Both of these subclasses have times with day precision.
"""
# NAN_DATE = INVALID_DICT[np.dtype(np.int32).num]
NAN_DATE = 0
# ------------------------------------------------------------
def __new__(cls, arr, **kwargs):
return arr.view(cls)
# ------------------------------------------------------------
def __init__(cls, arr, **kwargs):
pass
# ------------------------------------------------------------
@property
def _fa(self):
return self.view(FastArray)
# ------------------------------------------------------------
def __str__(self):
return self._build_string()
# ------------------------------------------------------------
def __repr__(self):
return self.get_classname() + "([" + self._build_string() + "])"
# ------------------------------------------------------------
def __array_finalize__(self, obj):
"""Finalizes self from other, called as part of ndarray.__new__()"""
super().__array_finalize__(obj)
if obj is None:
return
from_peer = isinstance(obj, DateBase)
self._display_length = obj._display_length if from_peer else DisplayLength.Long
# ------------------------------------------------------------
# DateBase
# For Date and DateSpan (though DateSpan will be deprecated)
def _strftime(self, format, dtype="O"):
"""
Convert each `Date` or `DateSpan` element to a formatted string
representation.
For `DateSpan` objects, see the Notes below.
Parameters
----------
format : str
One or more format codes supported by the
:py:meth:`datetime.date.strftime` function of the standard
Python distribution. For codes, see
:ref:`python:strftime-strptime-behavior`.
dtype : {"O", "S", "U"}, default "O"
The data type of the returned array:
- "O": object string
- "S": byte string
- "U": unicode string
Returns
-------
`ndarray`
An `ndarray` of strings.
See Also
--------
Date.strftime, DateSpan.strftime, DateScalar.strftime
DateTimeNano.strftime, DateTimeNanoScalar.strftime, TimeSpan.strftime,
TimeSpanScalar.strftime
Notes
-----
This routine has not been sped up yet. It's also not NaN-aware: NaNs
are converted to the date of the epoch (01-01-1970), then formatted.
`DateSpan` objects are converted to timestamps relative to the epoch
before they're formatted (for example, a `DateSpan` of "2 days" is
converted to 01-03-1970), so you may need to adjust the data before
calling this method on them. Negative `DateSpan` values (for example,
"-10 days") can't be formatted with this method.
Examples
--------
>>> d = rt.Date(['20210101', '20210519', '20220308'])
>>> d.strftime('%D')
array(['01/01/21', '05/19/21', '03/08/22'], dtype=object)
`DateSpan` objects are converted to timestamps relative to the epoch
(01-01-70) before they're formatted, so use with caution.
>>> ds = d - rt.Date('20201230')
>>> ds
DateSpan(['1 day', '139 days', '432 days'])
>>> ds.strftime('%D')
array(['01/02/70', '05/20/70', '03/09/71'], dtype=object)
"""
if isinstance(self, np.ndarray):
return np.asarray(
[
dt.utcfromtimestamp(timestamp).strftime(format)
for timestamp in self._fa.astype(np.int64) * SECONDS_PER_DAY
],
dtype=dtype,
)
else:
return dt.strftime(dt.utcfromtimestamp(self * SECONDS_PER_DAY), format)
# ------------------------------------------------------------
@property
def display_length(self):
if not hasattr(self, "_display_length"):
self._display_length = DisplayLength.Long
return self._display_length
# # TODO uncomment when starfish is implemented and imported
# def _sf_display_query_properties(self):
# itemformat = sf.ItemFormat({'length':self.display_length,
# 'align':sf.DisplayAlign.Right})
# return itemformat, self.display_convert_func
# ------------------------------------------------------------
def display_query_properties(self):
# if TypeRegister.DisplayOptions.STARFISH:
# return self._sf_display_query_properties()
"""
Each instance knows how to format its time strings. The formatter is specified in TIME_FORMATS
The length property of item_format stores the index into TIME_FORMATS for the display_convert_func
"""
item_format = ItemFormat(
length=self.display_length,
justification=DisplayJustification.Right,
can_have_spaces=True,
decoration=None,
)
convert_func = self.display_convert_func
return item_format, convert_func
# ------------------------------------------------------------
def _build_string(self):
_slicesize = int(np.floor(DateTimeBase.MAX_DISPLAY_LEN / 2))
_asize = len(self)
fmt, DFUNC = self.display_query_properties()
# print with break
if _asize > DateTimeBase.MAX_DISPLAY_LEN:
left_idx = self.view(FastArray)[:_slicesize]
right_idx = self.view(FastArray)[-_slicesize:]
left_strings = [f"'{DFUNC(i, fmt)}'" for i in left_idx]
break_string = ["..."]
right_strings = [f"'{DFUNC(i, fmt)}'" for i in right_idx]
all_strings = left_strings + break_string + right_strings
# print full
else:
all_strings = [f"'{DFUNC(i, fmt)}'" for i in self]
result = ", ".join(all_strings)
return result
def __getitem__(self, fld):
"""
Restore the Date/DateSpan class after the indexing operation.
"""
result = self._fa[fld]
if isinstance(result, np.ndarray):
# possible fix for strides bug
# if result.strides[0] != result.itemsize:
# result = result.copy()
return self.newclassfrominstance(result, self)
if np.isscalar(result):
return self.get_scalar(result)
return result
# ------------------------------------------------------------
def _funnel_mathops(self, funcname, value):
"""
Wrapper for all math operations on Date and DateSpan.
Both subclasses need to take over:
_check_mathops_nano()
_check_mathops()
maybe... still testing
_build_mathops_result()
Easier to catch forbidden operations here.
"""
if funcname in self.forbidden_mathops:
raise TypeError(f"Cannot perform {funcname} on {self.__class__.__name__} object.")
inv_mask = self.isnan()
other_inv_mask = None
return_type = None
caller = self._fa
# check if operand has nano precision, set invalid, return type accordingly
value, other_inv_mask, return_type, caller = self._check_mathops_nano(
funcname, value, other_inv_mask, return_type, caller
)
# perform main math operation on fast array
func = getattr(caller, funcname)
result = func(value)
# set return type, preserve invalids for non-nano operands
if return_type is None:
return_type, other_inv_mask = self._check_mathops(funcname, value)
# if return type is still None, returning invalid fill
if return_type is None:
return other_inv_mask
# apply invalid mask(s) and wrap result in final return type
result = self._build_mathops_result(value, result, inv_mask, other_inv_mask, return_type)
return result
# ------------------------------------------------------------
def _build_mathops_result(self, value, result, inv_mask, other_inv_mask, return_type):
# restore invalid for Date and other operand if necessary
# print('**DateBase._build_mathops_result')
# print('value',value)
# print('result',result)
# print('inv_mask',inv_mask)
# print('other_inv_mask',other_inv_mask)
# print('return type',return_type)
result = _apply_inv_mask(
self, result, fillval=self.NAN_DATE, arr1_inv_mask=inv_mask, arr2_inv_mask=other_inv_mask
)
if not isinstance(result, return_type):
if return_type == DateTimeNano:
try:
# base on original to_tz
# use a try, because this may be hit by TimeSpan operand (no timezone)
to_tz = value._timezone._to_tz
except:
to_tz = "GMT"
result = DateTimeNano(result, from_tz="GMT", to_tz=to_tz)
else:
result = return_type(result)
return result
# -------------------------------------------------------------
def min(self, **kwargs):
"""
The earliest `Date` or shortest `DateSpan` in an array.
Note that until a reported bug is fixed, this method is not NaN-aware.
Returns
-------
`Date` or `DateSpan`
When called on a `Date`, returns a `Date`. When called on a
`DateSpan`, returns a `DateSpan`.
See Also
--------
Date.max, Date.min, DateSpan.max, DateSpan.min,
DateTimeNano.max, DateTimeNano.min
Notes
-----
This returns an array, not a scalar. However, broadcasting rules will
apply to operations with it.
Examples
--------
Called on a `Date`:
>>> d = rt.Date(['20210103', '20210104', '20210105'])
>>> d.min()
Date(['2021-01-03'])
Called on a `DateSpan`:
>>> ds = d - rt.Date('20210101')
>>> ds
DateSpan(['2 days', '3 days', '4 days'])
>>> ds.min()
DateSpan(['2 days'])
"""
return self.__class__([self._fa.min()])
# -------------------------------------------------------------
def max(self, **kwargs):
"""
The latest `Date` or longest `DateSpan` in an array.
Returns
-------
`Date` or `DateSpan`
When called on a `Date`, returns a `Date`. When called on a
`DateSpan`, returns a `DateSpan`.
See Also
--------
Date.min, Date.max, DateSpan.min, DateSpan.max,
DateTimeNano.min, DateTimeNano.max
Notes
-----
This returns an array, not a scalar. However, broadcasting rules will
apply to operations with it.
Examples
--------
Called on a `Date`:
>>> d = rt.Date(['20210103', '20210104', '20210105'])
>>> d.max()
Date(['2021-01-05'])
Called on a `DateSpan`:
>>> ds = d - rt.Date('20210101')
>>> ds
DateSpan(['2 days', '3 days', '4 days'])
>>> ds.max()
DateSpan(['4 days'])
"""
return self.__class__([self._fa.max()])
def _meta_dict(self, name=None):
classname = self.__class__.__name__
if name is None:
name = classname
metadict = {
"name": name,
"typeid": getattr(TypeId, classname),
"classname": classname,
"ncols": 0,
"version": self.MetaVersion,
"author": "python",
"instance_vars": {
"_display_length": self.display_length,
},
"_base_is_stackable": SDSFlag.Stackable,
}
return metadict
# ------------------------------------------------------------
def _as_meta_data(self, name=None):
if name is None:
name = self.get_name()
meta = MetaData(self._meta_dict(name=name))
return {meta["name"]: self._fa}, [SDSFlag.OriginalContainer + SDSFlag.Stackable], meta.string
# ------------------------------------------------------------
def _build_sds_meta_data(self, name):
meta = MetaData(self._meta_dict(name=name))
cols = []
tups = []
return meta, cols, tups
# ------------------------------------------------------------
@classmethod
def _from_meta_data(cls, arrdict, arrflags, meta):
meta = MetaData(meta)
instance = cls([*arrdict.values()][0])
# combine loaded meta variables with class defaults
vars = meta["instance_vars"]
for k, v in cls.MetaDefault.items():
vars.setdefault(k, v)
for k, v in vars.items():
setattr(instance, k, v)
return instance
# ------------------------------------------------------------
def copy(self, order="K"):
instance = self._fa.copy(order=order)
return self.newclassfrominstance(instance, self)
# ------------------------------------------------------------
@classmethod
def newclassfrominstance(cls, instance, origin):
result = instance.view(cls)
result._display_length = origin.display_length
return result
# ========================================================
[docs]
class Date(DateBase, TimeStampBase):
"""
Date arrays have an underlying int32 array. The array values are number of days since January 1st. 1970.
Can be initialized from integer date values, strings, or matlab ordinal dates.
Parameters
----------
arr : array, categorical, list, or scalar
from_matlab : indicates that values are from matlab datenum
format : if initialized with string, specify a format string for strptime to parse date information
otherwise, will assume format is YYYYMMDD
Examples
--------
From strings:
>>> datestrings = tile(np.array(['2018-02-01', '2018-03-01', '2018-04-01']), 3)
>>> Date(datestrings)
Date([2018-02-01, 2018-03-01, 2018-04-01, 2018-02-01, 2018-03-01, 2018-04-01, 2018-02-01, 2018-03-01, 2018-04-01])
From riptable.Categorical (sometimes Matlab data comes in this way):
>>> c = Categorical(datestrings)
>>> c
Categorical([2018-02-01, 2018-03-01, 2018-04-01, 2018-02-01, 2018-03-01, 2018-04-01, 2018-02-01, 2018-03-01, 2018-04-01]) Length: 9
FastArray([1, 2, 3, 1, 2, 3, 1, 2, 3], dtype=int8) Base Index: 1
FastArray(['2018-02-01', '2018-03-01', '2018-04-01'], dtype='<U10') Unique count: 3
>>> d = Date(c)
>>> d
Date([2018-02-01, 2018-03-01, 2018-04-01, 2018-02-01, 2018-03-01, 2018-04-01, 2018-02-01, 2018-03-01, 2018-04-01])
From Matlab datenum:
>>> d = FA([737061.0, 737062.0, 737063.0, 737064.0, 737065.0])
>>> Date(dates, from_matlab=True)
Date([2018-01-01, 2018-01-02, 2018-01-03, 2018-01-04, 2018-01-05])
From riptable DateTimeNano:
>>> dtn = DateTimeNano.random(5)
>>> dtn
DateTimeNano([20150318 13:28:01.853344227, 20150814 17:34:43.991344669, 19761204 04:30:52.680683459, 20120524 06:44:13.482424912, 19830803 17:12:54.771824294])
>>> Date(dtn)
Date([2015-03-18, 2015-08-14, 1976-12-04, 2012-05-24, 1983-08-03])
"""
# for .SDS file format
MetaVersion = 1
MetaDefault = {
# vars for container loader
"name": "Date",
"typeid": TypeId.Date,
"version": 0, # if no version, assume before versions implemented
"instance_vars": {"_display_length": DisplayLength.Long},
}
forbidden_mathops = ("__mul__", "__imul__")
def __new__(cls, arr, from_matlab=False, format=None):
instance = None
if isinstance(arr, list) or np.isscalar(arr):
arr = FastArray(arr)
if isinstance(arr, np.ndarray):
# if this the same class, do nothing
if not isinstance(arr, Date):
# sometimes matlab dates are categoricals
if isinstance(arr, TypeRegister.Categorical):
try:
cats = arr.category_array
# flip to correct integer before re-expanding
if cats.dtype.char in ("U", "S"):
cats = cls._convert_datestring(cats).astype(np.int32, copy=False)
arr = TypeRegister.Categorical(arr._fa, cats)
arr = arr.expand_array
except:
raise TypeError(f"Could not re-expand categorical to array in mode {arr.category_mode.name}")
# fix datetimenano so the days match display (account for previous daylight savings fixup)
elif isinstance(arr, TypeRegister.DateTimeNano):
# there is a bug here -- do not think a timezone fixup is nec
# arr = arr._timezone.fix_dst(arr._fa, arr._timezone._dst_cutoffs)
arr = arr._fa // NANOS_PER_DAY
# flip strings to days from 1970
if arr.dtype.char in ("U", "S"):
arr = cls._convert_datestring(arr, format=format)
# flip matlab ordinal dates to days from 1970
if from_matlab:
arr = cls._convert_matlab_days(arr)
elif arr.dtype.char in NumpyCharTypes.AllInteger + NumpyCharTypes.AllFloat:
arr = arr.astype(np.int32, copy=False)
else:
raise TypeError(f"Could not initialize Date object with array of type {arr.dtype}.")
else:
raise TypeError(
f"Date objects must be initialized with numeric or string arrays, lists or scalars. Got {type(arr)}"
)
instance = arr.view(cls)
instance._display_length = DisplayLength.Long
return instance
# ------------------------------------------------------------
def __init__(self, arr, from_matlab=False, format=None):
pass
# ------------------------------------------------------------
# Date
[docs]
def strftime(self, format, dtype="O"):
"""
Convert each `Date` element to a formatted string representation.
Parameters
----------
format : str
One or more format codes supported by the
:py:meth:`datetime.date.strftime` function of the standard
Python distribution. For codes, see
:ref:`python:strftime-strptime-behavior`.
dtype : {"O", "S", "U"}, default "O"
The data type of the returned array elements:
- "O": object string
- "S": byte string
- "U": unicode string
Returns
-------
`ndarray`
An `ndarray` of strings.
See Also
--------
DateScalar.strftime, DateTimeNano.strftime, DateTimeNanoScalar.strftime,
TimeSpan.strftime, TimeSpanScalar.strftime
Notes
-----
This routine has not been sped up yet. It's also not NaN-aware: NaNs
are converted to the timestamp of the epoch (01-01-1970), then formatted.
Examples
--------
>>> d = rt.Date(['20210101', '20210519', '20220308'])
>>> d.strftime('%D')
array(['01/01/21', '05/19/21', '03/08/22'], dtype=object)
"""
return self._strftime(format, dtype=dtype)
# ------------------------------------------------------------
[docs]
def get_scalar(self, scalarval):
return DateScalar(scalarval, _from=self)
# -------------------------------------------------------
[docs]
def diff(self, periods=1):
"""
Returns
-------
DateSpan
"""
result = self._fa.diff(periods=periods)
return DateSpan(result)
# ------------------------------------------------------------
[docs]
@classmethod
def _convert_datestring(cls, arr, format=None):
"""
For construction from array of strings or categorical.
"""
if format is None:
arr = datestring_to_nano(arr, from_tz="UTC")._fa // NANOS_PER_DAY
# default assumes YYYYMMDD
else:
arr = strptime_to_nano(arr, format, from_tz="UTC")._fa // NANOS_PER_DAY
return arr
# ------------------------------------------------------------
[docs]
@classmethod
def _convert_matlab_days(cls, arr):
"""
TODO: move this to a more generic superclass - almost exactly the same as DateTimeNano._convert_matlab_days
Parameters
----------
arr : array of matlab datenums (1 is 1-Jan-0000)
timezone : TimeZone object from DateTimeNano constructor
Converts matlab datenums to an array of int64 containing utc nanoseconds.
"""
inv_mask = isnan(arr)
# matlab dates come in as float
arr = FastArray(arr, dtype=np.int32)
arr = arr - MATLAB_EPOCH_DATENUM
putmask(arr, inv_mask, cls.NAN_DATE)
return arr
# ------------------------------------------------------------
[docs]
def get_classname(self):
return __class__.__name__
# ------------------------------------------------------------
[docs]
@staticmethod
def display_convert_func(date_num, itemformat: ItemFormat):
# TODO: apply ItemFormat options that were passed in
return Date.format_date_num(date_num, itemformat)
# ------------------------------------------------------------
# ------------------------------------------------------------
# ------------------------------------------------------------
[docs]
def fill_invalid(self, shape=None, dtype=None, inplace=True):
arr = self._fill_invalid_internal(shape=shape, dtype=self.dtype, fill_val=self.NAN_DATE, inplace=inplace)
if arr is None:
return
return Date(arr)
# ------------------------------------------------------------
[docs]
def isnan(self):
"""
Return a boolean array that's True for each `Date` element that's
a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int32 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each NaN element, False
otherwise.
See Also
--------
Date.isnotnan, DateTimeNano.isnan, DateTimeNano.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that
contains at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> d = rt.Date.range('20190201', days = 3, step = 2)
>>> d[0] = 0
>>> d[1] = d.inv
>>> d
Date(['Inv', 'Inv', '2019-02-05'])
>>> d.isnan()
FastArray([ True, True, False])
"""
return self._fa.isnanorzero()
# ------------------------------------------------------------
[docs]
def isnotnan(self):
"""
Return a boolean array that's True for each `Date` element that's
not a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int32 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each non-NaN element,
False otherwise.
See Also
--------
Date.isnan, DateTimeNano.isnan, DateTimeNano.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that
contains at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> d = rt.Date.range('20190201', days = 3, step = 2)
>>> d[0] = 0
>>> d[1] = d.inv
>>> d
Date(['Inv', 'Inv', '2019-02-05'])
>>> d.isnotnan()
FastArray([False, False, True])
"""
return ~self.isnan()
# ------------------------------------------------------------
[docs]
def isfinite(self):
"""
Return a boolean array that's True for each `Date` element that's
not a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int32 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each non-NaN element,
False otherwise.
See Also
--------
Date.isnan, DateTimeNano.isnan, DateTimeNano.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that
contains at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> d = rt.Date.range('20190201', days = 3, step = 2)
>>> d[0] = 0
>>> d[1] = d.inv
>>> d
Date(['Inv', 'Inv', '2019-02-05'])
>>> d.isfinite()
FastArray([False, False, True])
"""
return ~self.isnan()
# ------------------------------------------------------------
[docs]
def isnotfinite(self):
"""
Return a boolean array that's True for each `Date` element that's
a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int32 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each NaN element, False
otherwise.
See Also
--------
Date.isnotnan, DateTimeNano.isnan, DateTimeNano.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that
contains at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> d = rt.Date.range('20190201', days = 3, step = 2)
>>> d[0] = 0
>>> d[1] = d.inv
>>> d
Date(['Inv', 'Inv', '2019-02-05'])
>>> d.isnotfinite()
FastArray([ True, True, False])
"""
return self._fa.isnanorzero()
# ------------------------------------------------------------
@property
def yyyymmdd(self):
return DateTimeNano(self._fa * NANOS_PER_DAY, from_tz="GMT", to_tz="GMT").yyyymmdd
# ------------------------------------------------------------
@property
def _year_splits(self):
"""Midnght on Jan. 1st from 1970 - 2099 in utc nanoseconds."""
return UTC_1970_DAY_SPLITS
# ------------------------------------------------------------
@property
def _yearday_splits(self):
"""Midnight on the 1st of the month in dayssince the beginning of the year."""
return YDAY_SPLITS
# ------------------------------------------------------------
@property
def _yearday_splits_leap(self):
"""Midnight on the 1st of the month in days since the beginning of the year during a leap year."""
return YDAY_SPLITS_LEAP
# ------------------------------------------------------------
@property
def year(self):
"""
The year of each `Date` element.
Years are currently limited to 1970-2099. To expand the range, add
to the UTC_1970_DAY_SPLITS table.
NaN or invalid `Date` values return Riptable's int32 sentinel value
(-MAXINT).
Returns
-------
`FastArray`
A `FastArray` of integers representing the year of each `Date` element.
See Also
--------
Date.month, Date.monthyear, Date.day_of_year, Date.day_of_month,
Date.day_of_week
Examples
--------
>>> d = rt.Date(['2016-02-01', '2017-02-01', '2018-02-01'])
>>> d.year
FastArray([2016, 2017, 2018])
With NaN and invalid values:
>>> d[0] = 0
>>> d[1] = d.inv
>>> d.year
FastArray([-2147483648, -2147483648, 2018])
"""
year = self._year(self._fa, fix_dst=False)
return _apply_inv_mask(self, year)
# ------------------------------------------------------------
@property
def month(self, arr=None):
"""
The month of each `Date` element.
Months are represented as integers: 1 = Jan, 2 = Feb, etc.
NaN or invalid `Date` values return Riptable's int32 sentinel value
(-MAXINT).
Returns
-------
`FastArray`
A `FastArray` of integers representing the month of each `Date`
element.
See Also
--------
Date.monthyear, Date.year, Date.day_of_year, Date.day_of_month,
Date.day_of_week
Examples
--------
>>> d = rt.Date(['2016-02-01', '2017-03-01', '2018-04-01'])
>>> d.month
FastArray([2, 3, 4])
With NaN and invalid values:
>>> d[0] = 0
>>> d[1] = d.inv
>>> d.month
FastArray([-2147483648, -2147483648, 4])
"""
return _apply_inv_mask(self, self._month())
# ------------------------------------------------------------
@property
def monthyear(self, arr=None):
"""
The month and year of each `Date` element.
Each month-year value is a byte string with a three-letter month
abbreviation concatenated with a four-digit year.
NaN or invalid `Date` values return Riptable's int32 sentinel value
(-MAXINT) as a byte string.
Returns
-------
`FastArray`
A `FastArray` of byte strings containing the month and year of
each `Date` element.
See Also
--------
Date.year, Date.month, Date.day_of_year, Date.day_of_month,
Date.day_of_week
Examples
--------
>>> d = rt.Date(['2000-02-29', '2018-12-25', '2019-03-18'])
>>> d.monthyear
FastArray([b'Feb2000', b'Dec2018', b'Mar2019'], dtype='|S14')
With NaN and invalid values:
>>> d[0] = 0
>>> d[1] = d.inv
>>> d.monthyear
FastArray([b'-2147483648', b'-2147483648', b'Mar2019'], dtype='|S14')
"""
month = self.month
yearstr = self.year.astype("S")
return MONTH_STR_ARRAY[month - 1] + yearstr
# ------------------------------------------------------------
@property
def is_leapyear(self):
"""
Return a boolean array that's True for each `Date` element that's
in a leap year, False otherwise.
NaN or invalid `Date` values return False.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each `Date` element
that's in a leap year, False otherwise.
See Also
--------
Date.is_weekend, Date.is_weekday
Examples
--------
>>> d = rt.Date(['1996-01-01', '2000-01-01', '2004-01-01', '2022-01-01'])
>>> d.is_leapyear
FastArray([ True, True, True, False])
With NaN and invalid values:
>>> d[0] = 0
>>> d[1] = d.inv
>>> d.is_leapyear
FastArray([False, False, True, False])
"""
year = self._year(self._fa, fix_dst=False)
arr = self._fa - self._year_splits[year - 1970]
maskleap = year % 4 == 0
return maskleap
# ------------------------------------------------------------
@property
def day_of_year(self):
"""
The day of the year of each `Date` element.
Days are represented as integers: 1 = Jan 1, 32 = Feb 1, etc.
NaN or invalid `Date` values return Riptable's int32 sentinel value
(-MAXINT).
Returns
-------
`FastArray`
A `FastArray` of integers representing the day of the year of
each `Date` element.
See Also
--------
Date.day_of_month, Date.day_of_week, Date.year, Date.month,
Date.monthyear
Examples
--------
>>> d = rt.Date(['2019-01-01', '2020-02-29', '2021-12-31'])
>>> d.day_of_year
FastArray([ 1, 60, 365])
With NaN and invalid values:
>>> d[0] = 0
>>> d[1] = d.inv
>>> d.day_of_year
FastArray([-2147483648, -2147483648, 365])
"""
year = self._year(self._fa, fix_dst=False)
arr = self._fa - self._year_splits[year - 1970]
arr += 1
return _apply_inv_mask(self, arr)
# ------------------------------------------------------------
@property
def day_of_month(self):
"""
The day of the month of each `Date` element.
Days are represented as integers: 1 = Jan 1, 31 = Jan 31, etc.
NaN or invalid `Date` values return Riptable's int32 sentinel value
(-MAXINT).
Returns
-------
`FastArray`
A `FastArray` of integers representing the day of the month of
each `Date` element.
See Also
--------
Date.day_of_year, Date.day_of_week, Date.year, Date.month,
Date.monthyear
Examples
--------
>>> d = rt.Date(['2019-01-01', '2020-02-29', '2021-12-31'])
>>> d.day_of_month
FastArray([ 1, 29, 31])
With NaN and invalid values:
>>> d[0] = 0
>>> d[1] = d.inv
>>> d.day_of_month
FastArray([-2147483648, -2147483648, 31])
"""
year = self._year(self._fa, fix_dst=False)
# subtract the days from start of year so all times are in MM-DD, etc.
startyear = self._fa - self._year_splits[year - 1970]
# treat the whole array like a non-leapyear
startmonth_idx = self._yearday_splits.searchsorted(startyear, side="right") - 1
monthtime = startyear - self._yearday_splits[startmonth_idx]
# fix up the leapyears with a different yearday split table
leapmask = (year % 4) == 0
startmonth_idx_leap = self._yearday_splits_leap.searchsorted(startyear[leapmask], side="right") - 1
monthtime[leapmask] = startyear[leapmask] - self._yearday_splits_leap[startmonth_idx_leap]
# unlike month, weekday, hour, etc. day of month starts at 1
monthday = monthtime + 1
return _apply_inv_mask(self, monthday)
# ------------------------------------------------------------
@property
def day_of_week(self):
"""
The day of the week of each `Date` element.
Days are represented as integers: 0 = Monday, 1 = Tuesday, ...,
6 = Sunday.
NaN or invalid `Date` values return Riptable's int32 sentinel value
(-MAXINT).
Returns
-------
`FastArray`
A `FastArray` of integers representing the day of the week of
each `Date` element.
See Also
--------
Date.day_of_year, Date.day_of_month, Date.year, Date.month,
Date.monthyear
Examples
--------
>>> d = rt.Date(['2019-02-11', '2019-02-12', '2019-02-13',
... '2019-02-14', '2019-02-15', '2019-02-16', '2019-02-17'])
>>> d.day_of_week
FastArray([0, 1, 2, 3, 4, 5, 6])
With NaN and invalid values:
>>> d[0] = 0
>>> d[1] = d.inv
>>> d.day_of_week
FastArray([-2147483648, -2147483648, 2, 3,
4, 5, 6])
"""
arr = (self._fa + EPOCH_DAY_OF_WEEK) % 7
return _apply_inv_mask(self, arr)
# ------------------------------------------------------------
@property
def is_weekend(self):
"""
Return a boolean array that's True for each `Date` element that's
a Saturday or Sunday, False otherwise.
NaN or invalid `Date` values return False.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each `Date` element
that's a Saturday or Sunday, False otherwise.
See Also
--------
Date.is_weekday, Date.is_leapyear
Examples
--------
>>> d = rt.Date(['2019-02-09', '2019-02-10', '2019-02-11', '2019-02-12',
... '2019-02-13', '2019-02-14', '2019-02-15', '2019-02-16', '2019-02-17'])
>>> d.is_weekend
FastArray([ True, True, False, False, False, False, False, True, True])
With NaN and invalid values:
>>> d[0] = 0
>>> d[1] = d.inv
>>> d.is_weekend
FastArray([False, False, False, False, False, False, False, True, True])
"""
return _apply_inv_mask(self, self.day_of_week > 4)
# ------------------------------------------------------------
@property
def is_weekday(self):
"""
Return a boolean array that's True for each `Date` element that's
a weekday (Monday-Friday), False otherwise.
NaN or invalid `Date` values return False.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each `Date` element
that's a weekday, False otherwise.
See Also
--------
Date.is_weekend, Date.is_leapyear
Examples
--------
>>> d = rt.Date(['2019-02-11', '2019-02-12', '2019-02-13',
... '2019-02-14', '2019-02-15', '2019-02-16', '2019-02-17'])
>>> d.is_weekday
FastArray([ True, True, True, True, True, False, False])
With NaN and invalid values:
>>> d[0] = 0
>>> d[1] = d.inv
>>> d.is_weekday
FastArray([False, False, True, True, True, False, False])
"""
return _apply_inv_mask(self, self.day_of_week < 5)
# ------------------------------------------------------------
@property
def seconds_since_epoch(self):
"""
Many existing python datetime routines expect seconds since epoch.
This call is to eliminate "magic numbers" like 3600 from code.
"""
return _apply_inv_mask(self, self._fa * SECONDS_PER_DAY)
# ------------------------------------------------------------
[docs]
@classmethod
def hstack(cls, dates):
"""
hstacks Date objects and returns a new Date object.
Will be called by riptable.hstack() if the first item in the sequence is a Date object.
Parameters
----------
dates : list or tuple of Date objects
>>> d1 = Date('2015-02-01')
>>> d2 = Date(['2016-02-01', '2017-02-01', '2018-02-01'])
>>> hstack([d1, d2])
Date([2015-02-01, 2016-02-01, 2017-02-01, 2018-02-01])
"""
# pass the subclass to the parent class routine
return hstack_any(dates, cls, Date)
# ------------------------------------------------------------
[docs]
@classmethod
def range(cls, start, end=None, days=None, step=1, format=None, closed=None):
"""
Return a `Date` object of dates within a given interval, spaced
by `step`.
Note: Either `end` or `days` must be provided, but providing both
results in unexpected behavior. In future versions, an error will be
raised.
Parameters
----------
start : int or str
Start date as an integer (YYYYMMDD) or string. If the string is not
in 'YYYYMMDD' format, `format` is required.
end : int or str, optional
End date as an integer (YYYYMMDD) or string. If the string is not
in 'YYYYMMDD' format, `format` is required. If `end` is not
provided, the number of dates to generate must be specified
with `days`.
days : int, optional
Instead of using `end`, use `days` to specify the number of
dates to generate. Required if `end` isn't provided. Providing
both `end` and `days` results in unexpected behavior.
step : int, default 1
The number of days between generated dates.
format : str, optional
For a string `start` or `end` value, one or more format codes
supported by the :py:meth:`datetime.strptime` function of the
standard Python distribution. For codes, see
:ref:`python:strftime-strptime-behavior`. The format code is used
to parse the string representation and convert it to a `Date`
element.
closed : {None, 'left', 'right'}, default None
Determines whether the `start` and `end` dates are included in the
result. Applies only when `start` and `end` are specified and ``step=1``.
- `left`: Start date is included, end date is excluded.
- `right`: End date is included, start date is excluded.
- None (the default): Both the start and end dates are included.
Returns
-------
`Date`
A `Date` object of dates within a given interval, spaced by `step`.
See Also
--------
DateTimeNano.random : Return an array of randomly generated `DateTimeNano` values.
.riptable.arange : Return an array of evenly spaced values within a specified interval.
Examples
--------
With integer `start` and `end` dates:
>>> rt.Date.range(20230101, 20230105)
Date(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05'])
With string `start` and `end` dates, and a format code:
>>> rt.Date.range('01 January, 2023', '05 January, 2023', format='%d %B, %Y')
Date(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05'])
If `end` isn't specified, `days` is required:
>>> rt.Date.range(20230101, days=5)
Date(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05'])
Changing the `step`:
>>> rt.Date.range(20230101, 20230105, step=2)
Date(['2023-01-01', '2023-01-03'])
A left-inclusive, right-exclusive range:
>>> rt.Date.range(20230101, 20230105, closed='left')
Date(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04'])
"""
if isinstance(start, (int, np.integer)):
start = str(start)
# convert separately for more accurate error
if isinstance(start, (str, bytes)):
start = cls(start, format=format)._fa[0]
else:
raise TypeError(f"Start date must be string or integer. Got {type(start)}")
if end is None:
if days is None:
raise ValueError(f"Must set either ``end`` or ``days`` keyword.")
# compensate for step
end = start + (days * step)
end = cls(end)._fa[0]
else:
if isinstance(end, (int, np.integer)):
end = str(end)
if not isinstance(end, (str, bytes)):
raise TypeError(f"End date must be string or integer. Got {type(start)}")
end = cls(end, format=format)._fa[0]
if days is None and step == 1:
# include one or both ends
if closed is None:
end += 1
elif closed == "right":
end += 1
start += 1
elif closed == "left":
pass
else:
raise ValueError(f'Closed has to be either "left", "right" or None. Got {closed}')
arr = arange(start, end, step, dtype=np.int32)
return cls(arr)
# ------------------------------------------------------------
[docs]
def _date_compare_check(self, funcname, other):
"""
Funnel for all comparison operations.
Helps Date interact with DateTimeNano, TimeSpan.
"""
caller = self._fa
if isinstance(other, (DateSpan, TimeSpan, DateSpanScalar, TimeSpanScalar)):
raise TypeError(f"Cannot perform {funcname} comparison operation between {type(self)} and {type(other)}.")
elif isinstance(other, DateTimeNano):
caller = self._fa * NANOS_PER_DAY
to_tz = other._timezone._to_tz
# fix the timezone to match the display of the DateTimeNano
caller = DateTimeNano(self._fa * NANOS_PER_DAY, from_tz=to_tz, to_tz=to_tz)
# looks weird now, saving explicit branchesfor if any forbidden types appear
elif isinstance(other, Date):
other = other._fa
elif isinstance(other, (str, bytes)):
other = Date(other)
# Categorical will fall through to constructor too
elif isinstance(other, np.ndarray):
other = Date(other)
# let everything else fall through for FastArray to catch
# restore invalids
return self._preserve_invalid_comparison(caller, other, funcname)
# -------------------COMPARISONS------------------------------
# ------------------------------------------------------------
[docs]
def __ne__(self, other):
return self._date_compare_check("__ne__", other)
[docs]
def __eq__(self, other):
return self._date_compare_check("__eq__", other)
[docs]
def __ge__(self, other):
return self._date_compare_check("__ge__", other)
[docs]
def __gt__(self, other):
return self._date_compare_check("__gt__", other)
[docs]
def __le__(self, other):
return self._date_compare_check("__le__", other)
[docs]
def __lt__(self, other):
return self._date_compare_check("__lt__", other)
# ------------------------------------------------------------
[docs]
def __add__(self, value):
"""
**Addition rules**
Date + Date = TypeError
Date + DateTimeNano = TypeError
Date + DateSpan = Date
Date + TimeSpan = DateTimeNano
All other operands will be treated as DateSpan and return Date.
"""
return self._funnel_mathops("__add__", value)
[docs]
def __iadd__(self, value):
return self._funnel_mathops("__iadd__", value)
[docs]
def __radd__(self, value):
return self._funnel_mathops("__add__", value)
# ------------------------------------------------------------
[docs]
def __sub__(self, value):
"""
**Subtraction rules**
Date - Date = DateSpan
Date - DateSpan = Date
Date - DateTimeNano = TimeSpan
Date - TimeSpan = DateTimeNano
All other operands will be treated as DateSpan and return Date.
"""
if isinstance(value, Date):
func = TypeRegister.MathLedger._BASICMATH_TWO_INPUTS
# need routine for int32 - int32 => int32 (operands have 0 as invalid, result has sentinel as invalid)
# right now, using the double return, gets recasted in the constructor
op = MATH_OPERATION.SUBDATETIMES
functup = (self, value)
result = func(functup, op, 0)
return DateSpan(result)
elif isinstance(value, DateTimeNano):
caller = DateTimeNano(self._fa * NANOS_PER_DAY, from_tz=value._timezone._from_tz)
return caller - value
else:
return self._funnel_mathops("__sub__", value)
[docs]
def __isub__(self, value):
return self._funnel_mathops("__isub__", value)
[docs]
def __rsub__(self, value):
if isinstance(value, (Date, DateTimeNano)):
return value.__sub__(self)
else:
raise NotImplementedError
[docs]
def __mul__(self, other):
raise NotImplementedError
[docs]
def __matmul__(self, other):
raise NotImplementedError
# need to check properties to see if division is happening
# def __truediv__(self, other): raise NotImplementedError
# def __floordiv__(self, other): raise NotImplementedError
# def __mod__(self, other): raise NotImplementedError
# def __divmod__(self, other): raise NotImplementedError
[docs]
def __pow__(self, other, modulo=None):
raise NotImplementedError
[docs]
def __lshift__(self, other):
raise NotImplementedError
[docs]
def __rshift__(self, other):
raise NotImplementedError
[docs]
def __and__(self, other):
raise NotImplementedError
[docs]
def __xor__(self, other):
raise NotImplementedError
[docs]
def __or__(self, other):
raise NotImplementedError
[docs]
def __rmul__(self, other):
raise NotImplementedError
[docs]
def __rmatmul__(self, other):
raise NotImplementedError
[docs]
def __rtruediv__(self, other):
raise NotImplementedError
[docs]
def __rfloordiv__(self, other):
raise NotImplementedError
[docs]
def __rmod__(self, other):
raise NotImplementedError
[docs]
def __rdivmod__(self, other):
raise NotImplementedError
[docs]
def __rpow__(self, other):
raise NotImplementedError
[docs]
def __rlshift__(self, other):
raise NotImplementedError
[docs]
def __rrshift__(self, other):
raise NotImplementedError
[docs]
def __rand__(self, other):
raise NotImplementedError
[docs]
def __rxor__(self, other):
raise NotImplementedError
[docs]
def __ror__(self, other):
raise NotImplementedError
[docs]
def __imul__(self, other):
raise NotImplementedError
[docs]
def __imatmul__(self, other):
raise NotImplementedError
[docs]
def __itruediv__(self, other):
raise NotImplementedError
[docs]
def __ifloordiv__(self, other):
raise NotImplementedError
[docs]
def __imod__(self, other):
raise NotImplementedError
[docs]
def __ipow__(self, other, modulo=None):
raise NotImplementedError
[docs]
def __ilshift__(self, other):
raise NotImplementedError
[docs]
def __irshift__(self, other):
raise NotImplementedError
[docs]
def __iand__(self, other):
raise NotImplementedError
[docs]
def __ixor__(self, other):
raise NotImplementedError
[docs]
def __ior__(self, other):
raise NotImplementedError
[docs]
def __neg__(self):
raise NotImplementedError
[docs]
def __pos__(self):
raise NotImplementedError
[docs]
def __abs__(self):
raise NotImplementedError
[docs]
def __invert__(self):
raise NotImplementedError
[docs]
def __complex__(self):
raise NotImplementedError
[docs]
def __int__(self):
raise NotImplementedError
[docs]
def __float__(self):
raise NotImplementedError
[docs]
def __round__(self, ndigits=0):
raise NotImplementedError
[docs]
def __trunc__(self):
raise NotImplementedError
[docs]
def __floor__(self):
raise NotImplementedError
[docs]
def __ceil__(self):
raise NotImplementedError
# ------------------------------------------------------------
[docs]
def _check_mathops(self, funcname, value):
"""
This gets called after a math operation has been performed on the Date's FastArray.
Return type may differ based on operation. Preserves invalids from original input.
Parameters
----------
funcname : name of ufunc
value : original operand in math operation
returns return_type, other_inv_mask
"""
# for now, make Date the default return type
return_type = Date
other_inv_mask = None
if isinstance(value, Date):
if funcname in ("__add__", "__iadd__", "__isub__"):
raise TypeError(f"Cannot {funcname} operation between Date and Date")
return_type = DateSpan
other_inv_mask = value.isnan()
# invalid gets early exit
elif isinstance(value, (int, float, np.number)):
# return same length Date full of NAN_DATE
if isnan(value):
# other_inv_mask will hold the final return
return_type = None
other_inv_mask = Date(self.copy_invalid())
elif isinstance(value, np.ndarray):
other_inv_mask = isnan(value)
return return_type, other_inv_mask
# ------------------------------------------------------------
[docs]
def _check_mathops_nano(self, funcname, value, other_inv_mask, return_type, caller):
"""
Operations with TimeSpan and DateTimeNano will flip to nano precision, or raise an error.
Parameters
----------
funcname : name of ufunc
value : original operand in math operation
other_inv_mask : None, might be set in this routine
return_type : None, might be set to TimeSpan or DateTimeNano
caller : FastArray view of Date object.
"""
if isinstance(value, TimeSpan):
return_type = DateTimeNano
other_inv_mask = value.isnan()
caller = self._fa * NANOS_PER_DAY
value = value._fa.astype(np.int64)
elif isinstance(value, DateTimeNano):
if funcname in ("__add__", "__iadd__", "__isub__"):
raise TypeError(f"Cannot perform addition between Date and DateTimeNano")
return_type = TimeSpan
other_inv_mask = value.isnan()
caller = self._fa * NANOS_PER_DAY
value = value._fa
return value, other_inv_mask, return_type, caller
# ------------------------------------------------------------
## ------------------------------------------------------------
@property
def start_of_month(self):
"""
Returns
-------
rt.Date array of first of self's month
"""
return self - self.day_of_month + 1
@property
def start_of_week(self):
"""
Returns
-------
rt.Date array of previous Monday
"""
return self - self.day_of_week
[docs]
@staticmethod
def _from_arrow(
arr: Union["pa.Array", "pa.ChunkedArray"], zero_copy_only: bool = True, writable: bool = False
) -> "Date":
"""
Create a `Date` instance from a "date32" or "date64"-typed `pyarrow.Array`.
Parameters
----------
arr : pyarrow.Array or pyarrow.ChunkedArray
Must be a "date32"- or "date64"-typed pyarrow array.
zero_copy_only : bool, optional, defaults to False
writable : bool, optional, defaults to False
Returns
-------
Date
"""
import pyarrow as pa
import pyarrow.types as pat
# Only support converting from date-typed (pa.date32(), pa.date64()) arrays.
if not pat.is_date(arr.type):
raise ValueError(
f"rt.Date arrays can only be created from pyarrow arrays of type 'date32' and 'date64', not '{arr.type}'."
)
# ChunkedArrays need special handling.
if isinstance(arr, pa.ChunkedArray):
# A single-chunk ChunkedArray can be handled by just extracting that chunk
# and recursively processing it.
if arr.num_chunks == 1:
return Date._from_arrow(arr.chunk(0), zero_copy_only=zero_copy_only, writable=writable)
else:
# TODO: Benchmark this vs. using ChunkedArray.combine_chunks() then converting.
# TODO: Look at `zero_copy_only` and `writable` -- the converted arrays could be destroyed while hstacking
# since we know they'll have just been created; this could reduce peak memory utilization.
return hstack(
[
Date._from_arrow(arr_chunk, zero_copy_only=zero_copy_only, writable=writable)
for arr_chunk in arr.iterchunks()
]
)
# If this is a date64 array (milliseconds since the UNIX epoch), we need to convert to a date32 array first;
# pa.date32() uses the same underlying representation (integer days since the UNIX epoch) as rt.Date.
if pat.is_date64(arr.type):
arr: pa.Array = arr.cast(pa.date32())
# Create the rt.Date array from the pyarrow array.
arr_int32 = arr.view(pa.int32())
# When the input pyarrow array doesn't have any NA values, this operation **can be** zero-copy
# depending on which options the caller has specified.
if arr.null_count == 0:
arr_int32_np = arr_int32.to_numpy(zero_copy_only=not writable, writable=writable)
return arr_int32_np.view(type=Date)
elif zero_copy_only:
raise RuntimeError("Unable to perform zero-copy conversion from an input array containing nulls.")
else:
# The input array has one or more nulls, so this conversion can *never* be zero-copy.
# Since we have to perform a copy somewhere, do the copy in pyarrow using the .replace() method
# so we can simultaneously fill in the null elements with the riptable 'invalid'/NA value
# for this array type; this also prevents pyarrow from converting the data to a floating-point
# dtype and filling the nulls with NaN.
# Get a pyarrow scalar with the riptable int32 invalid.
# rt.Date treats both the int32 'invalid' and zero as 'invalid'/NA values; the choice to use
# the int32 invalid is arbitrary -- this could just as easily use zero as the replacement value.
int32_inv_pa = pa.scalar(INVALID_DICT[np.dtype(np.int32).num], type=pa.int32())
# Fill the nulls with the riptable int32 invalid. This operation also creates a copy,
# because arrow arrays are immutable.
arr_int32_filled = arr_int32.fill_null(int32_inv_pa)
# Now do the conversion to a numpy array; it should be zero-copy.
# TODO: If writable=True here, it seems like we'll do a 2nd copy of the data? Is there any way to avoid it?
arr_int32_np = arr_int32_filled.to_numpy(zero_copy_only=not writable, writable=writable)
return arr_int32_np.view(type=Date)
[docs]
def to_arrow(
self,
type: Optional["pa.DataType"] = None,
*,
preserve_fixed_bytes: bool = False,
empty_strings_to_null: bool = True,
) -> Union["pa.Array", "pa.ChunkedArray"]:
"""
Convert this `Date` to a `pyarrow.Array`.
Parameters
----------
type : pyarrow.DataType, optional, defaults to None
Unused.
preserve_fixed_bytes : bool, optional, defaults to False
Unused.
empty_strings_to_null : bool, optional, defaults To True
Unused.
Returns
-------
pyarrow.Array or pyarrow.ChunkedArray
"""
import pyarrow as pa
# Get the invalid mask for this array.
# If all values are valid, don't bother passing an all-False mask when creating the arrow array.
invalids_mask = self.isnan()
if not invalids_mask.any():
invalids_mask = None
# TODO: Do we need to try to implement support for the case where the `type` parameter
# was specified? What should we do in that case if the type isn't compatible?
# Maybe we create the pyarrow array like we're doing now, then if `type` is not None
# we call .cast() on the created array and pass `type`?
# Create/return the pyarrow array.
return pa.array(self._np, mask=invalids_mask, type=pa.date32())
[docs]
def __arrow_array__(self, type: Optional["pa.DataType"] = None) -> Union["pa.Array", "pa.ChunkedArray"]:
return self.to_arrow(type=type)
# ========================================================
[docs]
class DateSpan(DateBase):
"""
DateSpan arrays have an underlying int32 array. The array values are in number of days.
These are created as the result of certain math operations on Date objects.
Parameters
----------
arr : numeric array, list, or scalar
unit : can set units to 'd' (day) or 'w' (week)
"""
# for .SDS file format
MetaVersion = 1
MetaDefault = {
# vars for container loader
"name": "Date",
"typeid": TypeId.DateSpan,
"version": 0, # if no version, assume before versions implemented
"instance_vars": {"_display_length": DisplayLength.Long},
}
NAN_DATE = INVALID_DICT[np.dtype(np.int32).num] # int32 sentinel
forbidden_mathops = ()
def __new__(cls, arr, unit=None):
instance = None
if isinstance(arr, list) or np.isscalar(arr):
arr = FastArray(arr, dtype=np.int32)
if isinstance(arr, np.ndarray):
if arr.dtype.char in NumpyCharTypes.AllInteger + NumpyCharTypes.AllFloat:
# is this unit really necessary?
if unit in ("W", "w"):
arr = arr * 7
arr = arr.astype(np.int32, copy=False)
else:
raise TypeError(f"Could not initialize Date object with array of type {arr.dtype}.")
else:
raise TypeError(
f"DateSpan objects must be initialized with numeric arrays, lists or scalars. Got {type(arr)}"
)
instance = arr.view(cls)
instance._display_length = DisplayLength.Long
return instance
# ------------------------------------------------------------
def __init__(self, arr, unit=None):
pass
# ------------------------------------------------------------
[docs]
def get_classname(self):
return __class__.__name__
# ------------------------------------------------------------
[docs]
def get_scalar(self, scalarval):
return DateSpanScalar(scalarval, _from=self)
# ------------------------------------------------------------
# DateSpan
[docs]
def strftime(self, format, dtype="O"):
"""
Convert each `DateSpan` element to a formatted string representation.
.. deprecated:: 1.3
`DateSpan.strftime` is deprecated will be removed in the future.
Note that because each `DateSpan` element is converted to a timestamp
relative to the epoch before it is formatted (for example, a `DateSpan`
of "2 days" is converted to 01-03-1970), you may need to adjust the data
before calling this method.
Negative `DateSpan` values (for example, "-10 days") can't be formatted
with this method.
Parameters
----------
format : str
One or more format codes supported by the
:py:meth:`datetime.date.strftime` function of the standard
Python distribution. For codes, see
:ref:`python:strftime-strptime-behavior`.
dtype : {"O", "S", "U"}, default "O"
The data type of the returned array elements.
- "O": object string
- "S": byte string
- "U": unicode string
Returns
-------
`ndarray`
An `ndarray` of strings.
See Also
--------
DateScalar.strftime, Date.strftime, DateSpan.strftime,
DateTimeNano.strftime, DateTimeNanoScalar.strftime, TimeSpan.strftime,
TimeSpanScalar.strftime
Notes
-----
This routine has not been sped up yet. It's also not NaN-aware: NaNs
are converted to the timestamp of the epoch (01-01-1970), then formatted.
Examples
--------
>>> d = rt.Date(['20210101', '20210519', '20220308'])
>>> ds = d - rt.Date('20201201')
>>> ds
DateSpan(['31 days', '169 days', '462 days'])
>>> ds.strftime('%D')
array(['02/01/70', '06/19/70', '04/08/71'], dtype=object)
"""
warnings.warn("DateSpan.strftime will be removed in the future.", DeprecationWarning)
return self._strftime(format, dtype=dtype)
# ------------------------------------------------------------
[docs]
@staticmethod
def display_convert_func(date_num, itemformat: ItemFormat):
"""
Called by main rt_display() routine to format items in array correctly in Dataset display.
Also called by DateSpan's __str__() and __repr__().
"""
return DateSpan.format_date_span(date_num, itemformat)
# ------------------------------------------------------------
# ------------------------------------------------------------
@property
def format_short(self):
self._display_length = DisplayLength.Short
@property
def format_long(self):
self._display_length = DisplayLength.Long
# ------------------------------------------------------------
# ------------------------------------------------------------
[docs]
def fill_invalid(self, shape=None, dtype=None, inplace=True):
arr = self._fill_invalid_internal(shape=shape, dtype=self.dtype, inplace=inplace)
if arr is None:
return
return DateSpan(arr)
# ------------------------------------------------------------
[docs]
@classmethod
def hstack(cls, dates):
"""
hstacks DateSpan objects and returns a new DateSpan object.
Will be called by riptable.hstack() if the first item in the sequence is a DateSpan object.
Parameters
----------
dates : list or tuple of DateSpan objects
>>> d1 = Date('2015-02-01')
>>> d2 = Date(['2016-02-01', '2017-02-01', '2018-02-01'])
>>> hstack([d1, d2])
Date([2015-02-01, 2016-02-01, 2017-02-01, 2018-02-01])
"""
# pass the subclass to the parent class routine
return hstack_any(dates, cls, DateSpan)
# ------------------------------------------------------------
[docs]
def _check_mathops_nano(self, funcname, value, other_inv_mask, return_type, caller):
"""
Operations with TimeSpan and DateTimeNano will flip to nano precision, or raise an error.
Parameters
----------
funcname : name of ufunc
value : original operand in math operation
other_inv_mask : None, might be set in this routine
return_type : None, might be set to TimeSpan or DateTimeNano
caller : FastArray view of Date object.
"""
if isinstance(value, TimeSpan):
return_type = TimeSpan
other_inv_mask = value.isnan()
caller = self._fa * NANOS_PER_DAY
elif isinstance(value, DateTimeNano):
if funcname in ("__sub__", "__isub__"):
raise TypeError(f"Cannot perform {funcname} operation between DateSpan and DateTimeNano")
return_type = DateTimeNano
other_inv_mask = value.isnan()
caller = self._fa * NANOS_PER_DAY
value = value._fa
return value, other_inv_mask, return_type, caller
# ------------------------------------------------------------
[docs]
def _check_mathops(self, funcname, value):
"""
This gets called after a math operation has been performed on the Date's FastArray.
Return type may differ based on operation. Preserves invalids from original input.
Parameters
----------
funcname : name of ufunc
value : original operand in math operation
returns return_type, other_inv_mask
"""
# for now, make Date the default return type
return_type = DateSpan
other_inv_mask = None
if isinstance(value, Date):
if funcname in ("__sub__", "__isub__"):
raise TypeError(f"Cannot perform {funcname} operation between DateSpan and Date")
return_type = Date
other_inv_mask = value.isnan()
# invalid gets early exit
elif isinstance(value, (int, float, np.number)):
# return same length Date full of NAN_DATE
if isnan(value) or value == self.NAN_DATE:
# other_inv_mask will hold the final return
return_type = None
other_inv_mask = DateSpan(self.copy_invalid())
elif isinstance(value, np.ndarray):
other_inv_mask = isnan(value)
return return_type, other_inv_mask
# ------------------------------------------------------------
[docs]
def __add__(self, value):
return self._funnel_mathops("__add__", value)
[docs]
def __iadd__(self, value):
return self._funnel_mathops("__iadd__", value)
[docs]
def __sub__(self, value):
return self._funnel_mathops("__sub__", value)
[docs]
def __isub__(self, value):
return self._funnel_mathops("__isub__", value)
# ------------------------------------------------------------
[docs]
def _datespan_compare_check(self, funcname, other):
"""
Funnel for all comparison operations.
Helps Date interact with DateTimeNano, TimeSpan.
"""
caller = self._fa
if isinstance(other, (Date, DateTimeNano, TypeRegister.Categorical)):
# Date allows categorical comparisons, DateSpan does not
raise TypeError(f"Cannot perform {funcname} comparison operation between {type(self)} and {type(other)}.")
elif isinstance(other, TimeSpan):
caller = self._fa * NANOS_PER_DAY
# looks weird now, saving explicit branchesfor if any forbidden types appear
elif isinstance(other, DateSpan):
other = other._fa
# Categorical will fall through to constructor too
elif isinstance(other, np.ndarray):
other = Date(other)
# let everything else fall through for FastArray to catch
func = getattr(caller, funcname)
return func(other)
# -------------------COMPARISONS------------------------------
# ------------------------------------------------------------
[docs]
def __ne__(self, other):
return self._datespan_compare_check("__ne__", other)
[docs]
def __eq__(self, other):
return self._datespan_compare_check("__eq__", other)
[docs]
def __ge__(self, other):
return self._datespan_compare_check("__ge__", other)
[docs]
def __gt__(self, other):
return self._datespan_compare_check("__gt__", other)
[docs]
def __le__(self, other):
return self._datespan_compare_check("__le__", other)
[docs]
def __lt__(self, other):
return self._datespan_compare_check("__lt__", other)
# ------------------------------------------------------------
[docs]
def DateTimeUTC(arr, to_tz="NYC", from_matlab=False, format=None, start_date=None, gmt=None):
"""Forces DateTimeNano ``from_tz`` keyword to 'UTC'.
For more see DateTimeNano.
"""
return DateTimeNano(
arr, from_tz="UTC", to_tz=to_tz, from_matlab=from_matlab, format=format, start_date=start_date, gmt=gmt
)
# ========================================================
class DateTimeCommon:
"""
Common functions shared between the array based class and the scalar
This class must be combine with another class because of dependency on _timezone
"""
# -CLOCK HH:MM------------------------------------------------
@property
def format_clock(self):
"""Set time to be displayed as HH:MM:SS"""
self._display_length = DisplayLength.Short
@property
def format_short(self):
"""Set time to be displayed as HH:MM:SS"""
self._display_length = DisplayLength.Short
# -YYYYMMDD----------------------------------------------------
@property
def format_medium(self):
"""Set time to be displayed as YYYYMMDD"""
self._display_length = DisplayLength.Medium
@property
def format_ymd(self):
"""Set time to be displayed as YYYYMMDD"""
self._display_length = DisplayLength.Medium
@property
def format_day(self):
"""Set time to be displayed as YYYYMMDD"""
self._display_length = DisplayLength.Medium
# -YYYYMMDD HH:MM:SS.nanosecond ---------------------------------
@property
def format_long(self):
"""Set time to be displayed as YYYYMMDD HH:MM:SS.fffffffff"""
self._display_length = DisplayLength.Long
@property
def format_full(self):
"""Set time to be displayed as YYYYMMDD HH:MM:SS.fffffffff"""
self._display_length = DisplayLength.Long
@property
def format_sig(self):
"""Set time to be displayed as YYYYMMDD HH:MM:SS.fffffffff"""
self._display_length = DisplayLength.Long
# ------------------------------------------------------------
@property
def days_since_epoch(self):
"""
Number of days since epoch.
Examples
--------
>>> dtn = DateTimeNano(['1970-01-11'], from_tz='NYC')
>>> dtn.days_since_epoch
FastArray([10], dtype=int64)
Returns
-------
int64 array
"""
arr = self._timezone.fix_dst(self)
return arr // NANOS_PER_DAY
# ------------------------------------------------------------
@property
def seconds_since_epoch(self):
"""
Number of seconds since epoch.
Examples
--------
>>> dtn = DateTimeNano(['1970-01-02'], from_tz='NYC')
>>> dtn.seconds_since_epoch
FastArray([86400], dtype=int64)
Returns
-------
int64 array
"""
arr = self._timezone.fix_dst(self)
return arr // NANOS_PER_SECOND
# ------------------------------------------------------------
def nanos_since_midnight(self):
"""
The number of nanoseconds since midnight for each `DateTimeNano`
element.
The results are adjusted for the timezone specified in the `to_tz`
parameter when the `DateTimeNano` is created. The default `to_tz`
value is 'NYC'.
This method can be called on `DateTimeNano` arrays and
`DateTimeNanoScalar` objects.
Returns
-------
`FastArray` or scalar
When this method is called on a `DateTimeNano` array, it returns a
`FastArray` of int64 integers representing the number of
nanoseconds since midnight for each `DateTimeNano` element. When
called on a `DateTimeNanoScalar`, a scalar (int64) is returned.
See Also
--------
DateTimeNano.nanos_since_midnight, DateTimeNanoScalar.nanos_since_midnight,
DateTimeNano.time_since_midnight, DateTimeNanoScalar.time_since_midnight,
DateTimeNano.millis_since_midnight, DateTimeNanoScalar.millis_since_midnight
Examples
--------
With the same `from_tz` and `to_tz`:
>>> dtn = rt.DateTimeNano(['2022-01-01 00:00:00.000123456',
... '2022-01-02 12:00:00.000456789'],
... from_tz = 'NYC', to_tz = 'NYC')
>>> dtn.nanos_since_midnight()
FastArray([ 123456, 43200000456789], dtype=int64)
Results adjusted for a `to_tz` that differs from the `from_tz`:
>>> dtn2 = rt.DateTimeNano(['2022-01-01 00:00:00.000123456',
... '2022-01-02 12:00:00.000456789'],
... from_tz = 'GMT', to_tz = 'NYC')
>>> dtn2
DateTimeNano(['20211231 19:00:00.000123456', '20220102 07:00:00.000456789'], to_tz='NYC')
>>> dtn2.nanos_since_midnight()
FastArray([68400000123456, 25200000456789], dtype=int64)
When it's called on a `DateTimeNanoScalar` object, a scalar is returned:
>>> dtn2[0].nanos_since_midnight()
68400000123456
"""
arr = self._timezone.fix_dst(self)
arr = arr % NANOS_PER_DAY
return _apply_inv_mask(self, arr)
# ------------------------------------------------------------
def millis_since_midnight(self):
"""
The number of milliseconds since midnight for each `DateTimeNano`
element.
The results are adjusted for the timezone specified in the `to_tz`
parameter when the `DateTimeNano` is created. The default `to_tz`
value is 'NYC'.
This method can be called on `DateTimeNano` arrays and
`DateTimeNanoScalar` objects. Unlike similar methods, this returns
floating point numbers.
Returns
-------
`FastArray` or scalar
When this method is called on a `DateTimeNano` array, it returns a
`FastArray` of float64s representing the number of milliseconds
since midnight for each `DateTimeNano` element. When called on a
`DateTimeNanoScalar`, a scalar (float64) is returned.
See Also
--------
DateTimeNano.millis_since_midnight, DateTimeNanoScalar.millis_since_midnight,
DateTimeNano.time_since_midnight, DateTimeNanoScalar.time_since_midnight,
DateTimeNano.nanos_since_midnight, DateTimeNanoScalar.nanos_since_midnight
Examples
--------
With the same `from_tz` and `to_tz`:
>>> dtn = rt.DateTimeNano(['2022-01-01 00:00:01.000123456',
... '2022-01-02 00:00:01.000456789'],
... from_tz = 'NYC', to_tz = 'NYC')
>>> dtn.millis_since_midnight()
FastArray([1000.123456, 1000.456789])
Results adjusted for a `to_tz` that differs from the `from_tz`:
>>> dtn2 = rt.DateTimeNano(['2022-01-01 00:00:01.000123456',
... '2022-01-02 00:00:01.000456789'],
... from_tz = 'GMT', to_tz = 'NYC')
>>> dtn2
DateTimeNano(['20211231 19:00:01.000123456', '20220101 19:00:01.000456789'], to_tz='NYC')
>>> dtn2.millis_since_midnight()
FastArray([68401000.123456, 68401000.456789])
When it's called on a `DateTimeNanoScalar` object, a scalar is returned:
>>> dtn2[0].millis_since_midnight()
68401000.123456
"""
arr = self._timezone.fix_dst(self)
arr = arr % NANOS_PER_DAY
arr = arr / NANOS_PER_MILLISECOND
return _apply_inv_mask(self, arr)
# ------------------------------------------------------------
def date(self):
"""
Copies the object and removes hours, minutes, seconds, and second fractions.
All resulting times will be at midnight.
Examples
--------
>>> dtn = DateTimeNano(['2019-01-04 12:34', '2019-06-06 14:00'], from_tz='NYC')
>>> dtn.date()
DateTimeNano([20190104 00:00:00.000000000, 20190606 00:00:00.000000000])
Returns
-------
obj:`DateTimeNano`
"""
if self._timezone._dst_reverse is not None:
arr = self._timezone.fix_dst(self._fa)
arr = arr - (arr % NANOS_PER_DAY)
else:
arr = self._fa
arr = arr - (arr % NANOS_PER_DAY)
# from_tz needs to match to_tz (similar to from_matlab_days, except can't force 'GMT' because of DST fixup)
# return DateTimeNano(arr, from_tz=self._timezone._to_tz, to_tz='UTC')
result = DateTimeNano(arr, from_tz=self._timezone._to_tz, to_tz=self._timezone._to_tz)
if isinstance(self, DateTimeNanoScalar):
return result[0]
return result
# ------------------------------------------------------------
@property
def yyyymmdd(self):
"""
Returns integers in the format YYYYMMDD.
Accounts for daylight savings time, leap years.
Examples
--------
>>> dtn = DateTimeNano(['2018-01-09', '2000-02-29', '2000-03-01', '2019-12-31'], from_tz='NYC')
>>> dtn.yyyymmdd
FastArray([20180109, 20000229, 20000301, 20191231])
Returns
-------
int32 array
Note
----
this routine is very similar to day_of_month - can probably internal routines to avoid repeating code
"""
arr = self._fa
arr = self._timezone.fix_dst(arr)
year = self._year(arr, fix_dst=False)
# initialize result
final = year * 10_000
# subtract the nanos from start of year so all times are in MM-DD HH:MM:SS, etc.
startyear = arr - self._year_splits[year - 1970]
# treat the whole array like a non-leapyear
monthnum = self._yearday_splits.searchsorted(startyear, side="right")
startmonth_idx = monthnum - 1
monthtime = startyear - self._yearday_splits[startmonth_idx]
# fix up the leapyears with a different yearday split table
leapmask = (year % 4) == 0
monthnum_leap = self._yearday_splits_leap.searchsorted(startyear[leapmask], side="right")
startmonth_idx_leap = monthnum_leap - 1
monthnum[leapmask] = monthnum_leap
monthtime[leapmask] = startyear[leapmask] - self._yearday_splits_leap[startmonth_idx_leap]
# future optimization, takeover place, or send __setitem__ indexer to our version of it
# np.place(monthnum, leapmask, monthnum_leap)
# np.place(monthtime, leapmask, startyear[leapmask] - UTC_YDAY_SPLITS_LEAP[startmonth_idx_leap])
# add month and day values to final
final += monthnum.astype(np.int32) * 100
final += (monthtime // NANOS_PER_DAY) + 1
return final
# ------------------------------------------------------------
@property
def _year_splits(self):
"""Midnght on Jan. 1st from 1970 - 2099 in utc nanoseconds."""
return UTC_1970_SPLITS
# ------------------------------------------------------------
@property
def _yearday_splits(self):
"""Midnight on the 1st of the month in nanoseconds since the beginning of the year."""
return UTC_YDAY_SPLITS
# ------------------------------------------------------------
@property
def _yearday_splits_leap(self):
"""Midnight on the 1st of the month in nanoseconds since the beginning of the year during a leap year."""
return UTC_YDAY_SPLITS_LEAP
# ------------------------------------------------------------
def year(self):
"""
The year value for each entry in the array
Examples
--------
>>> dtn = DateTimeNano(['1984-02-01', '1992-02-01', '2018-02-01'], from_tz='NYC')
>>> dtn.year()
FastArray([1984, 1992, 2018])
Returns
-------
int32 array
"""
year = self._year(self._fa, fix_dst=True)
return _apply_inv_mask(self, year)
# ------------------------------------------------------------
def month(self):
"""
The month value for each entry in the array.
1=Jan, 2=Feb, etc. ( is leap-year aware )
Examples
--------
>>> dtn = DateTimeNano(['2000-02-29', '2018-12-25'], from_tz='NYC')
>>> dtn.month()
FastArray([ 2, 12])
Returns
-------
int32 array
"""
return _apply_inv_mask(self, self._month(fix_dst=True))
# ------------------------------------------------------------
def monthyear(self, arr=None):
"""
Returns a string with 3 letter month + 4 digit year
Examples
--------
>>> d = DateTimeNano(['2000-02-29', '2018-12-25'], from_tz='NYC')
>>> d.monthyear()
FastArray([ 'Feb2000','Dec2018'])
"""
month = self.month()
yearstr = self.year().astype("S")
return MONTH_STR_ARRAY[month - 1] + yearstr
# ------------------------------------------------------------
@property
def day_of_year(self):
"""
The day of year value for each entry in the array.
Day values are from 1 to 365 (or 366 if leap year)
Examples
--------
>>> dtn = DateTimeNano(['2019-01-01', '2019-02-01', '2019-12-31 23:59', '2000-12-31 23:59'], from_tz='NYC')
FastArray([ 1, 32, 365, 366], dtype=int64)
Returns
-------
int32 array
"""
result = self.nanos_since_start_of_year()
if isinstance(result, np.ndarray):
np.floor_divide(result, NANOS_PER_DAY, out=result)
else:
result = result // NANOS_PER_DAY
result += 1
return result
# ------------------------------------------------------------
@property
def day_of_month(self):
"""
The day of month value for each entry in the array.
Day values are from 1 to 31
Adjusts for daylight savings time, leap year
Examples
--------
>>> dtn = DateTimeNano(['2018-01-09', '2000-02-29', '2000-03-01', '2019-12-31'], from_tz='NYC')
>>> dtn.day_of_month
FastArray([ 9, 29, 1, 31], dtype=int64)
Returns
-------
int32 array
"""
arr = self._fa
year = self._year(arr, fix_dst=True)
# subtract the nanos from start of year so all times are in MM-DD HH:MM:SS, etc.
startyear = arr - self._year_splits[year - 1970]
# treat the whole array like a non-leapyear
startmonth_idx = self._yearday_splits.searchsorted(startyear, side="right") - 1
monthtime = startyear - self._yearday_splits[startmonth_idx]
# fix up the leapyears with a different yearday split table
leapmask = (year % 4) == 0
startmonth_idx_leap = self._yearday_splits_leap.searchsorted(startyear[leapmask], side="right") - 1
monthtime[leapmask] = startyear[leapmask] - self._yearday_splits_leap[startmonth_idx_leap]
# unlike month, weekday, hour, etc. day of month starts at 1
if isinstance(monthtime, np.ndarray):
np.floor_divide(monthtime, NANOS_PER_DAY, out=monthtime)
else:
monthtime = monthtime // NANOS_PER_DAY
monthtime += 1
return monthtime
# ------------------------------------------------------------
@property
def day_of_week(self):
"""
Day of week value for each entry in the array.
Monday (0) -> Sunday (6)
January 1st 1970 was a Thursday! (3)
Examples
--------
>>> dtn = DateTimeNano(['1992-02-01 19:48:00', '1995-05-12 05:12:00'], from_tz='NYC')
>>> dtn.day_of_week
FastArray([5, 4])
Returns
-------
int32 array
"""
arr = self.days_since_epoch
arr += EPOCH_DAY_OF_WEEK
if isinstance(arr, np.ndarray):
# inplace operation
np.mod(arr, 7, out=arr)
else:
arr = arr % 7
return arr
# ------------------------------------------------------------
@property
def start_of_week(self):
"""
Return the Monday for the week the TimeStamp is in
Returns a Date or DateScalar
"""
arr = self.days_since_epoch
arr += EPOCH_DAY_OF_WEEK
adjust = arr % 7
arr -= adjust
arr -= EPOCH_DAY_OF_WEEK
result = Date(arr)
if not isinstance(arr, np.ndarray):
return result[0]
return result
# ------------------------------------------------------------
@property
def is_dst(self):
"""
Boolean array, True if a time value was in daylight savings time for the displayed timezone.
If the timezone is GMT, returns False for all items, including invalid times.
Examples
--------
>>> dtn = DateTimeNano(['2018-11-03 12:34', '2018-11-04 12:34'], from_tz='NYC')
>>> dtn.is_dst
FastArray([ True, False])
>>> dtn = DateTimeNano(['2019-03-30 12:34', '2019-03-31 12:34'], from_tz='DUBLIN')
>>> dtn.is_dst
FastArray([False, True])
>>> dtn = DateTimeNano(['2019-03-30 12:34', '2019-03-31 12:34'], from_tz='GMT', to_tz='GMT')
>>> dtn.is_dst
FastArray([False, False])
Returns
-------
bool array
"""
return self._timezone._is_dst(self._fa)
# ------------------------------------------------------------
@property
def tz_offset(self):
"""
Array of hour offset from GMT. Accounts for daylight savings time in timezone set by to_tz.
If the timezone is GMT, returns all 0.
Examples
--------
dtn = DateTimeNano(['2018-11-03 12:34', '2018-11-04 12:34'], from_tz='NYC')
>>> dtn.tz_offset
FastArray([-4, -5])
>>> dtn = DateTimeNano(['2019-03-30 12:34', '2019-03-31 12:34'], from_tz='DUBLIN', from_tz='DUBLIN')
>>> dtn.tz_offset
FastArray([0, 1])
>>> dtn = DateTimeNano(['2019-03-30 12:34', '2019-03-31 12:34'], from_tz='GMT', to_tz='GMT')
>>> dtn.tz_offset
FastArray([0, 0])
Returns
-------
int32 array
"""
return self._timezone._tz_offset(self._fa)
# -----------------------------------------------------
def putmask(self, arr1, filter, arr2):
"""
scalar or array putmask
"""
if isinstance(arr1, np.ndarray):
return putmask(arr1, filter, arr2)
else:
if filter:
return arr2
else:
return arr1
# ------------------------------------------------------------
@property
def is_weekday(self):
"""
Returns boolean array of wether or not each time occured on a weekday.
Examples
--------
(Monday, Thursday, Saturday)
>>> dtn = DateTimeNano(['2019-01-07', '2019-01-10', '2019-01-12'],from_tz='NYC')
>>> dtn.is_weekday
FastArray([ True, True, False])
Returns
-------
bool array
"""
inv_mask = self.isnan()
isweekday = self.day_of_week < 5
self.putmask(isweekday, inv_mask, False)
return isweekday
# ------------------------------------------------------------
@property
def is_weekend(self):
"""
Returns boolean array of wether or not each time occured on a weekend.
Examples
--------
(Monday, Thursday, Saturday)
>>> dtn = DateTimeNano(['2019-01-07', '2019-01-10', '2019-01-12'],from_tz='NYC')
>>> dtn.is_weekend
FastArray([False, False, True])
Returns
-------
bool array
"""
inv_mask = self.isnan()
isweekend = self.day_of_week > 4
self.putmask(isweekend, inv_mask, False)
return isweekend
# ------------------------------------------------------------
@property
def day(self):
"""
Fractional day time relative to 24 hours.
Examples
--------
>>> dtn = DateTimeNano(['2000-02-01 19:48:00.000000'], from_tz='NYC')
>>> dtn.day
FastArray([0.825])
Returns
-------
float64 array
Notes
-----
this is different than properties for hour, minute, and second as the
relative unit is its own unit.
"""
inv_mask = self.isnan()
arr = self._timezone.fix_dst(self._fa)
arr = arr % NANOS_PER_DAY
arr = arr / NANOS_PER_DAY
self.putmask(arr, inv_mask, np.nan)
return arr
# ------------------------------------------------------------
@property
def hour(self):
"""
Hours relative to the current day (with partial hour decimal).
Examples
--------
>>> dtn = DateTimeNano(['2000-02-01 19:48:00.000000'], from_tz='NYC')
>>> dtn.hour
>>> FastArray([19.8])
Returns
-------
float64 array
See Also
--------
DateTimeNano.hour_span
"""
return self._hour()
# -----------------------------------------------------
@property
def hour_span(self):
"""
Hours relative to the current day (with partial hour decimal) as a TimeSpan object.
Examples
--------
>>> dtn = DateTimeNano(['2000-02-01 19:48:00.000000'], from_tz='NYC')
>>> dtn.hour_span
TimeSpan([19:48:00.000000000])
Returns
-------
obj:`TimeSpan`
See Also
--------
DateTimeNano.hour
"""
return self._hour(span=True)
def _hour(self, span=False):
inv_mask = self.isnan()
arr = self._timezone.fix_dst(self._fa)
arr = arr % NANOS_PER_DAY
if span:
result = TypeRegister.TimeSpan(arr)
else:
result = arr / NANOS_PER_HOUR
self.putmask(result, inv_mask, np.nan)
return result
# ------------------------------------------------------------
def _time_fraction(self, modulo, divisor, span=False):
"""
Internal routine for minute, second, millisecond, microsecond, nanosecond (+span) properties.
None of these need to account for timezone.
"""
inv_mask = self.isnan()
arr = self._fa % modulo
if span:
if isinstance(self, DateTimeNano):
result = TypeRegister.TimeSpan(arr)
else:
result = TypeRegister.TimeSpanScalar(arr)
else:
result = arr / divisor
self.putmask(result, inv_mask, np.nan)
return result
# ------------------------------------------------------------
@property
def minute(self):
"""
Minutes relative to the current hour (with partial minute decimal).
Examples
--------
>>> dtn = DateTimeNano(['2000-02-01 19:48:30.000000'], from_tz='NYC')
>>> dtn.minute
>>> FastArray([48.5])
Returns
-------
float64 array
See Also
--------
DateTimeNano.minute_span
"""
return self._time_fraction(NANOS_PER_HOUR, NANOS_PER_MINUTE)
@property
def minute_span(self):
"""
Minutes relative to the current hour (with partial minute decimal) as a TimeSpan object
Examples
--------
>>> dtn = DateTimeNano(['2000-02-01 19:48:30.000000'], from_tz='NYC')
>>> dtn.minute_span
>>> TimeSpan([00:48:30.000000000])
Returns
-------
obj:`TimeSpan`
See Also
--------
DateTimeNano.minute
"""
return self._time_fraction(NANOS_PER_HOUR, NANOS_PER_MINUTE, span=True)
# ------------------------------------------------------------
@property
def second(self):
"""
Seconds relative to the current minute (with partial second decimal).
Examples
--------
>>> dtn = DateTimeNano(['2000-02-01 19:48:30.100000'], from_tz='NYC')
>>> dtn.seconds
>>> FastArray([30.1])
Returns
-------
float64 array
See Also
--------
DateTimeNano.second_span
"""
return self._time_fraction(NANOS_PER_MINUTE, NANOS_PER_SECOND)
@property
def second_span(self):
"""
Seconds relative to the current minute (with partial second decimal) as a TimeSpan object.
Examples
--------
>>> dtn = DateTimeNano(['2000-02-01 19:48:30.100000'], from_tz='NYC')
>>> dtn.second_span
TimeSpan([00:00:30.100000000])
"""
return self._time_fraction(NANOS_PER_MINUTE, NANOS_PER_SECOND, span=True)
# ------------------------------------------------------------
@property
def millisecond(self):
"""
Milliseconds relative to the current second (with partial millisecond decimal).
Examples
--------
>>> dtn = DateTimeNano(['1992-02-01 12:00:01.123000000'], from_tz='NYC')
>>> dtn.millisecond
FastArray([123.])
Returns
-------
float64 array
See Also
--------
DateTimeNano.millisecond_span
"""
return self._time_fraction(NANOS_PER_SECOND, NANOS_PER_MILLISECOND)
@property
def millisecond_span(self):
"""
Milliseconds relative to the current second (with partial millisecond decimal) as a TimeSpan object.
Examples
--------
>>> dtn = DateTimeNano(['1992-02-01 12:00:01.123000000'], from_tz='NYC')
>>> dtn.millisecond_span
TimeSpan([00:00:00.123000000])
Returns
-------
obj:`TimeSpan`
See Also
--------
DateTimeNano.millisecond
"""
return self._time_fraction(NANOS_PER_SECOND, NANOS_PER_MILLISECOND, span=True)
# ------------------------------------------------------------
@property
def microsecond(self):
"""
Microseconds relative to the current millisecond (with partial microsecond decimal)
Examples
--------
>>> dtn = DateTimeNano(['1992-02-01 12:00:01.000123000'], from_tz='NYC')
>>> dtn.microsecond
FastArray([123.])
Returns
-------
float64 array
See Also
--------
DateTimeNano.microsecond_span
"""
return self._time_fraction(NANOS_PER_MILLISECOND, NANOS_PER_MICROSECOND)
@property
def microsecond_span(self):
"""
Microseconds relative to the current millisecond (with partial microsecond decimal) as a TimeSpan object.
Examples
--------
>>> dtn = DateTimeNano(['1992-02-01 12:00:01.000123000'], from_tz='NYC')
>>> dtn.microsecond_span
TimeSpan([00:00:00.000123000])
Returns
-------
obj:`TimeSpan`
See Also
--------
DateTimeNano.microsecond
"""
return self._time_fraction(NANOS_PER_MILLISECOND, NANOS_PER_MICROSECOND, span=True)
# ------------------------------------------------------------
@property
def nanosecond(self):
"""
Nanoseconds relative to the current microsecond.
Examples
--------
>>> dtn = DateTimeNano(['1992-02-01 12:00:01.000000123'], from_tz='NYC')
>>> dtn.nanosecond
FastArray([123.])
Returns
-------
float64 array
See Also
--------
DateTimeNano.nanosecond_span
"""
return self._time_fraction(NANOS_PER_MICROSECOND, 1)
@property
def nanosecond_span(self):
"""
Nanoseconds relative to the current microsecond as a TimeSpan object.
Examples
--------
>>> dtn = DateTimeNano(['1992-02-01 12:00:01.000000123'], from_tz='NYC')
>>> dtn.nanosecond_span
TimeSpan([00:00:00.000000123])
Returns
-------
obj:`TimeSpan`
See Also
--------
DateTimeNano.nanosecond
"""
return self._time_fraction(NANOS_PER_MICROSECOND, 1, span=True)
# ------------------------------------------------------------
def nanos_since_start_of_year(self):
"""
Nanoseconds since Jan. 1st at midnight of the current year.
Examples
--------
>>> dtn = DateTimeNano(['2018-01-01 00:00:00.000123456'],from_tz='NYC')
>>> dtn.nanos_since_start_of_year()
FastArray([123456], dtype=int64)
Returns
-------
int64 array
See Also
--------
DateTimeNano.time_since_start_of_year
"""
arr = self._timezone.fix_dst(self._fa)
year = self._year(arr, fix_dst=False)
arr = arr - self._year_splits[year - 1970]
return arr
# ------------------------------------------------------------
def time_since_start_of_year(self):
"""
Nanoseconds since Jan. 1st at midnight of the current year as a TimeSpan object.
Examples
--------
>>> dtn = DateTimeNano(['2018-01-01 16:00:00.000123456'],from_tz='NYC')
>>> dtn.time_since_start_of_year()
TimeSpan([16:00:00.000123456])
Returns
-------
obj:`TimeSpan`
See Also
--------
DateTimeNano.nanos_since_start_of_year
Note
----
Nanosecond precision will be lost after ~52 days
"""
result = TimeSpan(self.nanos_since_start_of_year())
if isinstance(self, DateTimeNano):
return result
return result[0]
# ------------------------------------------------------------
def time_since_midnight(self):
"""
The time since midnight for each `DateTimeNano` element, returned as a
`TimeSpan` object.
This is useful for splitting the time from a `DateTimeNano`. This
method can be called on `DateTimeNano` arrays and `DateTimeNanoScalar`
objects.
The results are adjusted for the timezone specified in the `to_tz`
parameter when the `DateTimeNano` is created. The default `to_tz`
value is 'NYC'.
Returns
-------
`TimeSpan`
A `TimeSpan` object containing the time since midnight
(as HH:MM:SS.nanoseconds) for each `DateTimeNano` element.
See Also
--------
DateTimeNano.time_since_midnight, DateTimeNanoScalar.time_since_midnight,
DateTimeNano.nanos_since_midnight, DateTimeNanoScalar.nanos_since_midnight,
DateTimeNano.millis_since_midnight, DateTimeNanoScalar.millis_since_midnight
Examples
--------
With the same `from_tz` and `to_tz`:
>>> dtn = rt.DateTimeNano(['2022-01-01 00:00:00.000123456',
... '2022-01-02 12:00:00.000456789'],
... from_tz = 'NYC', to_tz = 'NYC')
>>> dtn.time_since_midnight()
TimeSpan(['00:00:00.000123456', '12:00:00.000456789'])
Results adjusted for a `to_tz` that differs from the `from_tz`:
>>> dtn = rt.DateTimeNano(['2022-01-01 00:00:00.000123456',
... '2022-01-02 12:00:00.000456789'],
... from_tz = 'GMT', to_tz = 'NYC')
>>> dtn
DateTimeNano(['20211231 19:00:00.000123456', '20220102 07:00:00.000456789'], to_tz='NYC')
>>> dtn.time_since_midnight()
TimeSpan(['19:00:00.000123456', '07:00:00.000456789'])
"""
return self.hour_span
# ------------------------------------------------------------
# for DateTimeNano and DateTimeNanoScalar
def _build_mathops_result(self, other, funcname, call_super, other_inv_mask, inplace, op, return_type):
"""
Operates on fastarray or takes invalid fast track for DateTimeNano math operations like add/sub
"""
input1 = self
if not isinstance(self, np.ndarray):
input1 = DateTimeNano(self)
func = TypeRegister.MathLedger._BASICMATH_TWO_INPUTS
if call_super:
if inplace:
# inplace operations need to save invalids beforehand
input1_mask = input1.isnan()
else:
input1_mask = None
# also need to apply invalid from operand
if other_inv_mask is None:
other_inv_mask = isnan(other)
func = getattr(input1._fa, funcname)
result = func(other)
result = _apply_inv_mask(
input1, result, fillval=DateTimeBase.NAN_TIME, arr1_inv_mask=input1_mask, arr2_inv_mask=other_inv_mask
)
else:
if inplace:
functup = (input1, other, input1)
else:
functup = (input1, other)
result = func(functup, op, 0)
if result is None:
raise RuntimeError(
f"Could not perform {funcname} operation with DateTimeNano and {type(other)} {other}"
)
if return_type == DateTimeNano:
result = DateTimeNano(result, from_tz="GMT", to_tz=input1._timezone._to_tz)
else:
result = return_type(result)
# check if both were scalars, then return a scalar
if not isinstance(self, np.ndarray) and not isinstance(other, np.ndarray):
return result[0]
return result
# ------------------------------------------------------------
# DateTimeCommon class
# For DateTimeNano and DateTimeNanoScalar
def _strftime(self, format, dtype="O"):
"""
Convert each `DateTimeNano` or `DateTimeNanoScalar` to a formatted
string representation.
Parameters
----------
format : str
One or more format codes supported by the
:py:meth:`datetime.datetime.strftime` function of the standard
Python distribution. For codes, see
:ref:`python:strftime-strptime-behavior`.
dtype : {"O", "S", "U"}, default "O"
For `DateTimeNano` input, the data type of the returned array:
- "O": object string
- "S": byte string
- "U": unicode string
Returns
-------
`ndarray` or str
For `DateTimeNano` input, returns an `ndarray` of strings. For
`DateTimeNanoScalar` input, returns a scalar string.
See Also
--------
DateTimeNano.strftime, DateTimeNanoScalar.strftime, Date.strftime,
DateScalar.strftime, TimeSpan.strftime, TimeSpanScalar.strftime
Notes
-----
This routine has not been sped up yet. It also raises an error on NaNs.
Examples
--------
>>> dtn = rt.DateTimeNano(['20210101 09:31:15', '20210519 05:21:17'], from_tz='NYC')
>>> dtn
DateTimeNano(['20210101 09:31:15.000000000', '20210519 05:21:17.000000000'], to_tz='NYC')
>>> dtn.strftime('%c')
array(['Fri Jan 1 09:31:15 2021', 'Wed May 19 05:21:17 2021'], dtype=object)
>>> dtn[0].strftime('%c')
'Fri Jan 1 09:31:15 2021'
"""
in_seconds = self / NANOS_PER_SECOND
to_tz = self._timezone._to_tz
if to_tz in ["GMT", "UTC"]:
if isinstance(in_seconds, np.ndarray):
return np.asarray(
[dt.utcfromtimestamp(timestamp).strftime(format) for timestamp in in_seconds], dtype=dtype
)
else:
return dt.strftime(dt.utcfromtimestamp(in_seconds), format)
else:
# Choose timezone from to_tz
localzone = tz.gettz(self._timezone._timezone_str)
if isinstance(in_seconds, np.ndarray):
return np.asarray(
[dt.fromtimestamp(timestamp, localzone).strftime(format) for timestamp in in_seconds], dtype=dtype
)
else:
return dt.strftime(dt.fromtimestamp(in_seconds, localzone), format)
# ========================================================
[docs]
class DateTimeNano(DateTimeBase, TimeStampBase, DateTimeCommon):
"""
Date and timezone-aware time information, stored to nanosecond precision.
`DateTimeNano` arrays have an underlying `~riptable.int64` array representing the
number of nanoseconds since the Unix epoch (00:00:00 UTC on 01-01-1970). Dates
before the Unix epoch are invalid.
In most cases, `DateTimeNano` objects default to display in Eastern/NYC time,
accounting for Daylight Saving Time. The exception is when `arr` is an array of
`Date` objects, in which case the default display timezone is UTC.
Parameters
----------
arr : array of `int`, `str`, `Date`, `TimeSpan`, :py:class:`~datetime.datetime`, `numpy.datetime64`
Datetimes to store in the `DateTimeNano` array.
- Integers represent nanoseconds since the Unix epoch (00:00:00 UTC on
01-01-1970).
- Datetime strings can generally be in YYYYMMDD HH:MM:SS.fffffffff format
without ``format`` codes needing to be specified. Bytestrings, unicode
strings, and strings in `ISO 8601 <https://en.wikipedia.org/wiki/ISO_8601>`_
format are supported. If your strings are in another format (for example,
MMDDYY), specify it with ``format``. Other notes for string input:
- `from_tz` is required.
- If `start_date` is provided, strings are parsed as `TimeSpan`
objects before `start_date` is applied. See how this affects output in the
Examples section below.
- For NumPy vs. Riptable string parsing differences, see the Notes section
below.
- For `Date` objects, both `from_tz` and `to_tz` are "UTC" by
default.
- For `TimeSpan` objects, `start_date` needs to be specified.
- Using the `DateTimeNano` constructor is recommended for
`Date` + `TimeSpan` operations.
- `numpy.datetime64` values are converted to nanoseconds.
from_tz : str
The timezone the data in `arr` is stored in. Required if the `DateTimeNano` is
created from strings, and recommended in other cases to ensure expected results.
The default `from_tz` is "UTC" for all `arr` types except strings, for which a
`from_tz` must be specified.
Timezones supported (Daylight Saving Time is accounted for):
- "America/New_York"
- "Australia/Sydney"
- "Europe/Dublin"
- "DUBLIN": alias for "Europe/Dublin"
- "GMT": Greenwich Mean Time
- "NYC": US/Eastern
- "UTC": (not a timezone, but accepted as an alias for GMT)
to_tz : str
The timezone the data is displayed in. If `arr` is `Date` objects, the default
`to_tz` is "UTC". For other `arr` types, the default `to_tz` is "NYC".
from_matlab : bool, default False
When set to `True`, indicates that `arr` contains Matlab datenums (the number
of days since 0-Jan-0000). Because Matlab datenums may also include a fraction
of a day, be sure to specify `from_tz` for accurate time data.
format : str
Specify a format for string `arr` input. For format codes, see the `Python
strptime cheatsheet <https://strftime.org/>`_. This parameter is ignored for
non-string `arr` input.
start_date : `str` or array of `Date`
- Required if constructing a `DateTimeNano` from a `TimeSpan`.
- If `arr` is strings, the values in `arr` are parsed as `TimeSpan` objects
before `start_date` is applied. See how this affects output in the Examples
section below. Otherwise, `start_date` is added (as nanos) to dates in `arr`.
- If `start_date` is a string, use YYYYMMDD format.
- If `start_date` is a `Date` array, it is broadcast to `arr` if possible;
otherwise an error is raised.
- A `start_date` before the Unix epoch is converted to the Unix epoch.
Notes
-----
- The constructor does not attempt to preserve NaN times from Python
:py:class:`~datetime.datetime` objects.
- If the integer data in a `DateTimeNano` object is extracted, it is in the
`from_tz` timezone. To initialize another `DateTimeNano` with the same underlying
array, use the same `from_tz`.
- `DateTimeNano` objects have no knowledge of timezones. All timezone operations
are handled by the `TimeZone` class.
Math Operations
The following math operations can be performed:
+----------------------------------------+
| Date + TimeSpan = DateTimeNano |
+----------------------------------------+
| Date - DateTimeNano = TimeSpan |
+----------------------------------------+
| Date - TimeSpan = DateTimeNano |
+----------------------------------------+
| DateTimeNano - DateTimeNano = TimeSpan |
+----------------------------------------+
| DateTimeNano - Date = TimeSpan |
+----------------------------------------+
| DateTimeNano - TimeSpan = DateTimeNano |
+----------------------------------------+
| DateTimeNano + TimeSpan = DateTimeNano |
+----------------------------------------+
String Parsing Differences Between NumPy and Riptable
- Riptable `DateTimeNano` string parsing is generally more forgiving than NumPy's
`numpy.datetime64` array parsing.
- In some cases where NumPy raises an error, Riptable returns an object.
- The lower limit for `DateTimeNano` string parsing is Unix epoch time.
- You can always guarantee that Riptable and NumPy get the same results by using
the full `ISO 8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ datetime format
(YYYY-MM-DDTHH:MM:SS.fffffffff).
Riptable parses strings without leading zeros:
>>> import numpy as np
>>> rt.DateTimeNano(["2018-1-1"], from_tz="NYC")
DateTimeNano(['20180101 00:00:00.000000000'], to_tz='NYC')
>>> np.array(["2018-1-1"], dtype="datetime64[ns]")
ValueError: Error parsing datetime string "2018-1-1" at position 5
Riptable handles extra trailing spaces; NumPy incorrectly treats them as a
timezone whose parsing will be deprecated soon:
>>> rt.DateTimeNano(["2018-10-11 10:11:00.123 "], from_tz="NYC")
DateTimeNano(['20181011 10:11:00.123000000'], to_tz='NYC')
>>> np.array(["2018-10-11 10:11:00.123 "], dtype="datetime64[ns]")
DeprecationWarning: parsing timezone aware datetimes is deprecated; this will
raise an error in the future
array(['2018-10-11T10:11:00.123000000'], dtype='datetime64[ns]')
Riptable correctly parses dates without delimiters:
>>> rt.DateTimeNano(["20181231"], from_tz="NYC")
DateTimeNano(['20181231 00:00:00.000000000'], to_tz='NYC')
>>> np.array(["20181231"], dtype="datetime64[ns]")
array(['1840-08-31T19:51:12.568664064'], dtype='datetime64[ns]')
To ensure that Riptable and NumPy get the same results, use the full
`ISO 8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ datetime format:
>>> rt.DateTimeNano(["2018-12-31T12:34:56.789123456"], from_tz="NYC")
DateTimeNano(['20181231 12:34:56.789123456'], to_tz='NYC')
>>> np.array(["2018-12-31T12:34:56.789123456"], dtype="datetime64[ns]")
array(['2018-12-31T12:34:56.789123456'], dtype='datetime64[ns]')
See Also
--------
DateTimeNano.info : See timezone info for a `DateTimeNano` object.
Date : Riptable's `Date` class.
DateSpan : Riptable's `DateSpan` class.
TimeSpan : Riptable's `TimeSpan` class.
.TimeZone : Riptable's `.TimeZone` class.
Examples
--------
Create a `DateTimeNano` from an integer representing the nanoseconds since 00:00:00
UTC on 01-01-1970:
>>> rt.DateTimeNano([1514828730123456000], from_tz="UTC")
DateTimeNano(['20180101 12:45:30.123456000'], to_tz='NYC')
From a datetime string in NYC time:
>>> rt.DateTimeNano(["2018-01-01 12:45:30.123456000"], from_tz="NYC")
DateTimeNano(['20180101 12:45:30.123456000'], to_tz='NYC')
From `numpy.datetime64` array (note that NumPy has less precision):
>>> dt = np.array(["2018-11-02 09:30:00.002201", "2018-11-02 09:30:00.004212"], dtype="datetime64[ns]")
>>> rt.DateTimeNano(dt, from_tz="NYC")
DateTimeNano(['20181102 09:30:00.002201000', '20181102 09:30:00.004212000'], to_tz='NYC')
If your datetime strings are nonstandard, specify the format using ``format`` with
`Python strptime codes <https://strftime.org/>`_.
>>> rt.DateTimeNano(["12/31/19 08:05:01", "6/30/19 14:20:35"], format="%m/%d/%y %H:%M:%S", from_tz="NYC")
DateTimeNano(['20191231 08:05:01.000000000', '20190630 14:20:35.000000000'], to_tz='NYC')
Convert Matlab datenums:
>>> rt.DateTimeNano([737426, 738251.75], from_matlab=True, from_tz="NYC")
DateTimeNano(['20190101 00:00:00.000000000', '20210405 18:00:00.000000000'], to_tz='NYC')
Note that if you create a `DateTimeNano` by adding a `Date` and a `TimeSpan` without
using the `DateTimeNano` constructor, `from_tz` and `to_tz` will be "GMT":
>>> d = rt.Date("20230305")
>>> ts = rt.TimeSpan("05:00")
>>> dtn = d + ts
>>> dtn.info()
DateTimeNano(['20230305 05:00:00.000000000'], to_tz='GMT')
Displaying in timezone: GMT
Origin: GMT
Offset: 0 hours
Create a `DateTimeNano` from a list of Python :py:class:`~datetime.datetime`
objects:
>>> from datetime import datetime as dt
>>> pdt = [dt(2018, 7, 2, 14, 30), dt(2019, 6, 8, 8, 30)]
>>> rt.DateTimeNano(pdt)
UserWarning: FastArray contains an unsupported type 'object'. Problems may occur.
Consider categoricals.
warnings.warn(warning_string)
DateTimeNano(['20180702 10:30:00.000000000', '20190608 04:30:00.000000000', to_tz='NYC')
If you specify a `start_date` with an `arr` of strings, the strings are parsed as
`TimeSpan` objects before `start_date` is applied. Note the first two examples in
``arr`` result in NaN TimeSpans, which are silently treated as zeros:
>>> arr = ["20180205", "20180205 14:30", "14:30"]
>>> rt.DateTimeNano(arr, from_tz="UTC", to_tz="UTC", start_date="20230601")
DateTimeNano(['20230601 00:00:00.000000000', '20230601 00:00:00.000000000', '20230601 14:30:00.000000000'], to_tz='UTC')
`.GetNanoTime` gets the current Unix epoch time:
>>> rt.DateTimeNano([rt.GetNanoTime()], from_tz="UTC")
DateTimeNano(['20230615 18:36:58.378020700'], to_tz='NYC')
"""
MetaVersion = 0
MetaDefault = {
"name": "DateTimeNano",
"typeid": TypeId.DateTimeNano,
"ncols": 0,
"version": 0,
"instance_vars": {"_display_length": DisplayLength.Long, "_to_tz": "NYC"},
}
# TODO: add more intervals here and to DateTimeNano quarters
# need to interact with the business calendar class
# maybe merge these with TimeSpan unit conversion dict?
FrequencyStrings = {
"H": "h",
"T": "m",
"MIN": "m",
"S": "s",
"L": "ms",
"MS": "ms",
"U": "us",
"US": "us",
"N": "ns",
"NS": "ns",
}
_INVALID_FREQ_ERROR = "Invalid frequency: {}"
# ------------------------------------------------------------
def __new__(cls, arr, from_tz=None, to_tz=None, from_matlab=False, format=None, start_date=None, gmt=None):
"""
Date and timezone-aware time information, stored to nanosecond precision.
`DateTimeNano` arrays have an underlying `~riptable.int64` array representing the
number of nanoseconds since the Unix epoch (00:00:00 UTC on 01-01-1970). Dates
before the Unix epoch are invalid.
In most cases, `DateTimeNano` objects default to display in Eastern/NYC time,
accounting for Daylight Saving Time. The exception is when `arr` is an array of
`Date` objects, in which case the default display timezone is UTC.
Parameters
----------
arr : array of `int`, `str`, `Date`, `TimeSpan`, :py:class:`~datetime.datetime`, `numpy.datetime64`
Datetimes to store in the `DateTimeNano` array.
- Integers represent nanoseconds since the Unix epoch (00:00:00 UTC on
01-01-1970).
- Datetime strings can generally be in YYYYMMDD HH:MM:SS.fffffffff format
without ``format`` codes needing to be specified. Bytestrings, unicode
strings, and strings in `ISO 8601 <https://en.wikipedia.org/wiki/ISO_8601>`_
format are supported. If your strings are in another format (for example,
MMDDYY), specify it with ``format``. Other notes for string input:
- `from_tz` is required.
- If `start_date` is provided, strings are parsed as `TimeSpan`
objects before `start_date` is applied. See how this affects output in the
Examples section below.
- For NumPy vs. Riptable string parsing differences, see the Notes section
below.
- For `Date` objects, both `from_tz` and `to_tz` are "UTC" by
default.
- For `TimeSpan` objects, `start_date` needs to be specified.
- Using the `DateTimeNano` constructor is recommended for
`Date` + `TimeSpan` operations.
- `numpy.datetime64` values are converted to nanoseconds.
from_tz : str
The timezone the data in `arr` is stored in. Required if the `DateTimeNano` is
created from strings, and recommended in other cases to ensure expected results.
The default `from_tz` is "UTC" for all `arr` types except strings, for which a
`from_tz` must be specified.
Timezones supported (Daylight Saving Time is accounted for):
- "America/New_York"
- "Australia/Sydney"
- "Europe/Dublin"
- "DUBLIN": alias for "Europe/Dublin"
- "GMT": Greenwich Mean Time
- "NYC": US/Eastern
- "UTC": (not a timezone, but accepted as an alias for GMT)
to_tz : str
The timezone the data is displayed in. If `arr` is `Date` objects, the default
`to_tz` is "UTC". For other `arr` types, the default `to_tz` is "NYC".
from_matlab : bool, default False
When set to `True`, indicates that `arr` contains Matlab datenums (the number
of days since 0-Jan-0000). Because Matlab datenums may also include a fraction
of a day, be sure to specify `from_tz` for accurate time data.
format : str
Specify a format for string `arr` input. For format codes, see the `Python
strptime cheatsheet <https://strftime.org/>`_. This parameter is ignored for
non-string `arr` input.
start_date : `str` or array of `Date`
- Required if constructing a `DateTimeNano` from a `TimeSpan`.
- If `arr` is strings, the values in `arr` are parsed as `TimeSpan` objects
before `start_date` is applied. See how this affects output in the Examples
section below. Otherwise, `start_date` is added (as nanos) to dates in `arr`.
- If `start_date` is a string, use YYYYMMDD format.
- If `start_date` is a `Date` array, it is broadcast to `arr` if possible;
otherwise an error is raised.
- A `start_date` before the Unix epoch is converted to the Unix epoch.
Notes
-----
- The constructor does not attempt to preserve NaN times from Python
:py:class:`~datetime.datetime` objects.
- If the integer data in a `DateTimeNano` object is extracted, it is in the
`from_tz` timezone. To initialize another `DateTimeNano` with the same underlying
array, use the same `from_tz`.
- `DateTimeNano` objects have no knowledge of timezones. All timezone operations
are handled by the `TimeZone` class.
Math Operations
The following math operations can be performed:
+----------------------------------------+
| Date + TimeSpan = DateTimeNano |
+----------------------------------------+
| Date - DateTimeNano = TimeSpan |
+----------------------------------------+
| Date - TimeSpan = DateTimeNano |
+----------------------------------------+
| DateTimeNano - DateTimeNano = TimeSpan |
+----------------------------------------+
| DateTimeNano - Date = TimeSpan |
+----------------------------------------+
| DateTimeNano - TimeSpan = DateTimeNano |
+----------------------------------------+
| DateTimeNano + TimeSpan = DateTimeNano |
+----------------------------------------+
String Parsing Differences Between NumPy and Riptable
- Riptable `DateTimeNano` string parsing is generally more forgiving than NumPy's
`numpy.datetime64` array parsing.
- In some cases where NumPy raises an error, Riptable returns an object.
- The lower limit for `DateTimeNano` string parsing is Unix epoch time.
- You can always guarantee that Riptable and NumPy get the same results by using
the full `ISO 8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ datetime format
(YYYY-MM-DDTHH:MM:SS.fffffffff).
Riptable parses strings without leading zeros:
>>> import numpy as np
>>> rt.DateTimeNano(["2018-1-1"], from_tz="NYC")
DateTimeNano(['20180101 00:00:00.000000000'], to_tz='NYC')
>>> np.array(["2018-1-1"], dtype="datetime64[ns]")
ValueError: Error parsing datetime string "2018-1-1" at position 5
Riptable handles extra trailing spaces; NumPy incorrectly treats them as a
timezone whose parsing will be deprecated soon:
>>> rt.DateTimeNano(["2018-10-11 10:11:00.123 "], from_tz="NYC")
DateTimeNano(['20181011 10:11:00.123000000'], to_tz='NYC')
>>> np.array(["2018-10-11 10:11:00.123 "], dtype="datetime64[ns]")
DeprecationWarning: parsing timezone aware datetimes is deprecated; this will
raise an error in the future
array(['2018-10-11T10:11:00.123000000'], dtype='datetime64[ns]')
Riptable correctly parses dates without delimiters:
>>> rt.DateTimeNano(["20181231"], from_tz="NYC")
DateTimeNano(['20181231 00:00:00.000000000'], to_tz='NYC')
>>> np.array(["20181231"], dtype="datetime64[ns]")
array(['1840-08-31T19:51:12.568664064'], dtype='datetime64[ns]')
To ensure that Riptable and NumPy get the same results, use the full
`ISO 8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ datetime format:
>>> rt.DateTimeNano(["2018-12-31T12:34:56.789123456"], from_tz="NYC")
DateTimeNano(['20181231 12:34:56.789123456'], to_tz='NYC')
>>> np.array(["2018-12-31T12:34:56.789123456"], dtype="datetime64[ns]")
array(['2018-12-31T12:34:56.789123456'], dtype='datetime64[ns]')
See Also
--------
DateTimeNano.info : See timezone info for a `DateTimeNano` object.
Date : Riptable's `Date` class.
DateSpan : Riptable's `DateSpan` class.
TimeSpan : Riptable's `TimeSpan` class.
.TimeZone : Riptable's `.TimeZone` class.
Examples
--------
Create a `DateTimeNano` from an integer representing the nanoseconds since 00:00:00
UTC on 01-01-1970:
>>> rt.DateTimeNano([1514828730123456000], from_tz="UTC")
DateTimeNano(['20180101 12:45:30.123456000'], to_tz='NYC')
From a datetime string in NYC time:
>>> rt.DateTimeNano(["2018-01-01 12:45:30.123456000"], from_tz="NYC")
DateTimeNano(['20180101 12:45:30.123456000'], to_tz='NYC')
From `numpy.datetime64` array (note that NumPy has less precision):
>>> dt = np.array(["2018-11-02 09:30:00.002201", "2018-11-02 09:30:00.004212"], dtype="datetime64[ns]")
>>> rt.DateTimeNano(dt, from_tz="NYC")
DateTimeNano(['20181102 09:30:00.002201000', '20181102 09:30:00.004212000'], to_tz='NYC')
If your datetime strings are nonstandard, specify the format using ``format`` with
`Python strptime codes <https://strftime.org/>`_.
>>> rt.DateTimeNano(["12/31/19 08:05:01", "6/30/19 14:20:35"], format="%m/%d/%y %H:%M:%S", from_tz="NYC")
DateTimeNano(['20191231 08:05:01.000000000', '20190630 14:20:35.000000000'], to_tz='NYC')
Convert Matlab datenums:
>>> rt.DateTimeNano([737426, 738251.75], from_matlab=True, from_tz="NYC")
DateTimeNano(['20190101 00:00:00.000000000', '20210405 18:00:00.000000000'], to_tz='NYC')
Note that if you create a `DateTimeNano` by adding a `Date` and a `TimeSpan` without
using the `DateTimeNano` constructor, `from_tz` and `to_tz` will be "GMT":
>>> d = rt.Date("20230305")
>>> ts = rt.TimeSpan("05:00")
>>> dtn = d + ts
>>> dtn.info()
DateTimeNano(['20230305 05:00:00.000000000'], to_tz='GMT')
Displaying in timezone: GMT
Origin: GMT
Offset: 0 hours
Create a `DateTimeNano` from a list of Python :py:class:`~datetime.datetime`
objects:
>>> from datetime import datetime as dt
>>> pdt = [dt(2018, 7, 2, 14, 30), dt(2019, 6, 8, 8, 30)]
>>> rt.DateTimeNano(pdt)
UserWarning: FastArray contains an unsupported type 'object'. Problems may occur.
Consider categoricals.
warnings.warn(warning_string)
DateTimeNano(['20180702 10:30:00.000000000', '20190608 04:30:00.000000000', to_tz='NYC')
If you specify a `start_date` with an `arr` of strings, the strings are parsed as
`TimeSpan` objects before `start_date` is applied. Note the first two examples in
``arr`` result in NaN TimeSpans, which are silently treated as zeros:
>>> arr = ["20180205", "20180205 14:30", "14:30"]
>>> rt.DateTimeNano(arr, from_tz="UTC", to_tz="UTC", start_date="20230601")
DateTimeNano(['20230601 00:00:00.000000000', '20230601 00:00:00.000000000', '20230601 14:30:00.000000000'], to_tz='UTC')
`.GetNanoTime` gets the current Unix epoch time:
>>> rt.DateTimeNano([rt.GetNanoTime()], from_tz="UTC")
DateTimeNano(['20230615 18:36:58.378020700'], to_tz='NYC')
"""
# changing defaults / requirments based on constructor
# non-string constructors don't require from_tz keyword to be set
# need to store original keyword values to check in the funnel (saving all in case we add more)
_orig_from_tz = from_tz
if from_tz is None:
from_tz = "UTC"
_from_matlab = from_matlab
_format = format
_start_date = start_date
# check for categorical of string or dates
arr, cat = _possibly_convert_cat(arr)
if isinstance(arr, TypeRegister.Date):
if to_tz is None:
to_tz = "UTC"
# will automatically flip to int64, send through as nanosecond integer array
arr = arr._fa * NANOS_PER_DAY
else:
if to_tz is None:
to_tz = "NYC"
# create a timezone object to handle daylight savings, any necessary conversion, etc.
_timezone = TypeRegister.TimeZone(from_tz=from_tz, to_tz=to_tz)
if from_matlab:
instance = cls._convert_matlab_days(arr, _timezone)
else:
if start_date is not None:
if not isinstance(arr, np.ndarray):
arr = FastArray(arr)
# if array was strings, interpret as timespan
# numeric arrays will also be interpretted as timespan
if arr.dtype.char in "US":
arr = TimeSpan(arr)
# interpret as start date in nanoseconds
if isinstance(start_date, (str, bytes)):
start_date = FastArray(start_date)
start_date = rc.DateStringToNanos(start_date)[0]
elif isinstance(start_date, Date):
if len(start_date) == len(arr):
# user has passed in multiple start dates
start_date = start_date._fa * NANOS_PER_DAY
elif len(start_date) == 1:
start_date = start_date[0] * NANOS_PER_DAY
else:
raise ValueError(
f"start_date Date array must be either length 1 or the same length as arr. Got arr of length {len(arr)} and start_date of length {len(start_date)}."
)
else:
raise TypeError(
f"Start date must be string in format YYYYMMDD or Date object. Got type {type(start_date)}"
)
instance = None
if isinstance(arr, list) or np.isscalar(arr):
arr = FastArray(arr)
if isinstance(arr, np.ndarray):
if arr.dtype.char == "O":
# possibly python datetime object
if isinstance(arr[0], dt):
# warn if it will take more than 1 second
if len(arr) > 750_000:
warnings.warn(f"Python is converting {len(arr)} datetime objects. Performance may suffer.")
arr = np.array([t.isoformat() for t in arr], dtype="datetime64[ns]")
# string
if arr.dtype.char in "US":
if _orig_from_tz is None:
raise ValueError(TypeRegister.TimeZone.tz_error_msg)
# if format specified, use our strptime
if format is not None:
instance = strptime_to_nano(arr, format, from_tz=from_tz, to_tz=to_tz)
else:
# otherwise assume ISO-8601 format
instance = datetimestring_to_nano(arr, from_tz=from_tz, to_tz=to_tz)
# check for categorical of string
if cat is not None:
# re-expand since it came in as a categorical
instance = cat.expand_any(instance)
return instance
# flip numpy datetime64 array
elif arr.dtype.char == "M":
arr = arr.astype("datetime64[ns]", copy=False).view(np.int64)
# don't allow timespan arrays without start date
elif isinstance(arr, TimeSpan) and start_date is None:
raise TypeError(f"Cannot create DateTimeNano from TimeSpan array unless start_date is provided.")
elif arr.dtype.char in NumpyCharTypes.AllInteger + NumpyCharTypes.AllFloat:
pass
else:
raise TypeError(f"Cannot create DateTimeNano object from {arr.dtype}")
# only flip to int64 if necessary
# TODO: for uint64 do we want a .view() so we dont have to convert?
instance = arr.astype(np.int64, copy=False)
if start_date is not None:
instance = instance + start_date
# match stored utc nano to desired display
instance = _timezone.to_utc(instance)
else:
raise TypeError(f"Cannot initialize DateTimeNano with type {type(arr)}, must be list or array.")
# check for categorical of string
if cat is not None:
# re-expand since it came in as a categorical
instance = cat.expand_any(instance)
instance = instance.view(cls)
instance._display_length = DisplayLength.Long
instance._timezone = _timezone
return instance
# ------------------------------------------------------------
def __init__(self, arr, from_matlab=False, from_tz=None, to_tz=None, format=None, start_date=None, gmt=None):
pass
# ------------------------------------------------------------
# DateTimeNano
[docs]
def strftime(self, format, dtype="O"):
"""
Convert each `DateTimeNano` element to a formatted string representation.
Parameters
----------
format : str
One or more format codes supported by the
:py:meth:`datetime.datetime.strftime` function of the standard
Python distribution. For codes, see
:ref:`python:strftime-strptime-behavior`.
dtype : {"O", "S", "U"}, default "O"
The data type of the returned array:
- "O": object string
- "S": byte string
- "U": unicode string
Returns
-------
`ndarray`
An `ndarray` of strings.
See Also
--------
DateTimeNanoScalar.strftime, Date.strftime, DateScalar.strftime,
TimeSpan.strftime, TimeSpanScalar.strftime
Notes
-----
This routine has not been sped up yet. It also raises an error on NaNs.
Examples
--------
>>> dtn = rt.DateTimeNano(['20210101 09:31:15', '20210519 05:21:17'], from_tz='NYC')
>>> dtn
DateTimeNano(['20210101 09:31:15.000000000', '20210519 05:21:17.000000000'], to_tz='NYC')
>>> dtn.strftime('%c')
array(['Fri Jan 1 09:31:15 2021', 'Wed May 19 05:21:17 2021'], dtype=object)
"""
return self._strftime(format, dtype=dtype)
# ------------------------------------------------------------
[docs]
def get_classname(self):
"""
Return object's class name for array repr.
Returns
-------
obj:`str`
Object's class name.
"""
return __class__.__name__
# ------------------------------------------------------------
[docs]
def get_scalar(self, scalarval):
return DateTimeNanoScalar(scalarval, _from=self)
# ------------------------------------------------------------
[docs]
@classmethod
def _convert_matlab_days(cls, arr, timezone):
"""
Parameters
----------
arr : array of matlab datenums (1 is 1-Jan-0000)
timezone : TimeZone object from DateTimeNano constructor
Converts matlab datenums to an array of int64 containing utc nanoseconds.
"""
if not isinstance(arr, np.ndarray):
arr = FastArray(arr)
inv_mask = isnan(arr)
# matlab dates come in as float
# first, flip to float64 so no precision is lost
arr = arr.astype(np.float64, copy=False)
arr = arr - MATLAB_EPOCH_DATENUM
# might be a better way to do this with fewer array copies
arr *= NANOS_PER_DAY
arr = arr.astype(np.int64)
arr = timezone.to_utc(arr, inv_mask=inv_mask)
putmask(arr, inv_mask, cls.NAN_TIME)
return arr
# ------------------------------------------------------------
[docs]
def set_timezone(self, tz):
"""
Changes the timezone that the times are displayed in.
Different lookup array will be used for daylight savings fixups.
Does not modify the underlying array.
Parameters
----------
tz : str
Abbreviated name of desired timezone. See rt.TimeZone.valid_timezones
Examples
--------
Normal:
>>> dtn = DateTimeNano(['2019-01-07 10:36'], from_tz='NYC', to_tz='NYC')
>>> dtn
DateTimeNano([20190107 10:36:00.000000000])
>>> dtn.set_timezone('DUBLIN')
>>> dtn
DateTimeNano([20190107 15:36:00.000000000])
NYC is in daylight savings time, Dublin is not:
>>> dtn = DateTimeNano(['2019-03-15 10:36'], from_tz='NYC', to_tz='NYC')
>>> dtn
DateTimeNano([20190315 10:36:00.000000000])
>>> dtn.set_timezone('DUBLIN')
>>> dtn
DateTimeNano([20190315 14:36:00.000000000])
"""
self._timezone._set_timezone(tz)
# ------------------------------------------------------------
[docs]
def astimezone(self, tz):
"""
Returns a new DateTimeNano object in a different displayed timezone.
The new object holds a reference to the same underlying array.
Parameters
----------
tz : str
Abbreviated name of desired timezone. See rt.TimeZone.valid_timezones
Returns
-------
obj:`DateTimeNano`
Notes
-----
Unlike Python's datetime.datetime.astimezone(), accepts strings, not timezone objects.
"""
return DateTimeNano(self._fa, from_tz="GMT", to_tz=tz)
# ------------------------------------------------------------
[docs]
def to_iso(self):
"""
Generates a FastArray of ISO-8601 timestamp bytestrings.
The string will match the time +/- timezone offset displayed in the output of the DateTimeNano object.
Examples
--------
>>> dtn = DateTimeNano(['2019-01-22 12:34'],from_tz='NYC')
>>> dtn
DateTimeNano([20190122 12:34:00.000000000])
>>> dtn.to_iso()
FastArray([b'2019-01-22T12:34:00.000000000'], dtype='|S48')
>>> dtn = DateTimeNano(['2019-01-22'],from_tz='GMT',to_tz='NYC')
>>> dtn
DateTimeNano([20190121 19:00:00.000000000])
>>> dtn.to_iso()
FastArray([b'2019-01-21T19:00:00.000000000'], dtype='|S48')
Returns
-------
obj:`FastArray`
"""
inv_mask = self.isnan()
arr = self._timezone.fix_dst(self._fa)
arr = arr.astype("datetime64[ns]")
putmask(arr, inv_mask, np.datetime64("nat"))
return arr.astype("S")
@property
def display_length(self):
if not hasattr(self, "_display_length"):
self._display_length = DisplayLength.Long
return self._display_length
# TODO uncomment when starfish is implemented and imported
# def _sf_display_query_properties(self):
# itemformat = sf.ItemFormat({'length':self.display_length,
# 'align':sf.DisplayAlign.Right,
# 'timezone_str':self._timezone._timezone_str})
# return itemformat, self.display_convert_func
# ------------------------------------------------------------
[docs]
def display_query_properties(self):
# if TypeRegister.DisplayOptions.STARFISH:
# return self._sf_display_query_properties()
"""
Call back for display functions to get the formatting function and style for timestrings.
Each instance knows how to format its time strings. The formatter is specified in TIME_FORMATS
The length property of item_format stores the index into TIME_FORMATS for the display_convert_func
Returns
-------
obj:`ItemFormat`
See riptable.Utils.rt_display_properties
function
Callback function for formatting the timestring
"""
item_format = ItemFormat(
length=self.display_length,
justification=DisplayJustification.Right,
can_have_spaces=True,
decoration=None,
timezone_str=self._timezone._timezone_str,
)
convert_func = self.display_convert_func
return item_format, convert_func
# ------------------------------------------------------------
[docs]
@staticmethod
def display_convert_func(utcnano, itemformat: ItemFormat):
"""
Convert a utc nanosecond timestamp to a string for display.
Parameters
----------
utcnano : int
Timestamp in nanoseconds, a single value from a DateTimeNano array
itemformat : obj:`ItemFormat`
Style object retrieved from display callback.
Returns
-------
str
Timestamp as string.
See Also
--------
DateTimeNano.display_query_properties
riptable.Utils.rt_display_properties
"""
# TODO: apply ItemFormat options that were passed in
return DateTimeNano.format_nano_time(utcnano, itemformat)
# ------------------------------------------------------------
[docs]
def display_item(self, utcnano):
"""
Convert a utc nanosecond timestamp to a string for array repr.
Parameters
----------
utcnano : int
Timestamp in nanoseconds, a single value from a DateTimeNano array
Returns
-------
str
Timestamp as string.
"""
itemformat, _ = self.display_query_properties()
return self.format_nano_time(utcnano, itemformat)
# -----------------------------------------------------------
# -----------------------------------------------------------
# ------------------------------------------------------------
# ------------------------------------------------------------
# ------------------------------------------------------------
[docs]
@classmethod
def newclassfrominstance(cls, instance, origin):
"""
Restore timezone/length info.
"""
result = instance.view(cls)
result._timezone = origin._timezone.copy()
result._display_length = origin._display_length
return result
# ------------------------------------------------------------
[docs]
def info(self):
"""
Returns
-------
str
Verbose array repr with timezone information.
"""
print(self.__repr__(verbose=True))
# -------------------------------------------------------
def diff(self, periods=1):
"""
Returns
-------
TimeSpan
"""
result = self._fa.diff(periods=periods)
return TimeSpan(result)
# ------------------------------------------------------------
[docs]
def __repr__(self, verbose=False):
repr_strings = []
tz_string = f", to_tz='{self._timezone._to_tz}'"
repr_strings.append(self.get_classname() + "([" + self._build_string() + "]" + tz_string + ")")
if verbose is False:
return "\n".join(repr_strings)
repr_strings.append(f"Displaying in timezone: {self._timezone._timezone_str}")
repr_strings.append(f"Origin: {self._timezone._from_tz}")
repr_strings.append(f"Offset: {self._timezone._offset} hours")
return "\n".join(repr_strings)
# ------------------------------------------------------------
[docs]
@classmethod
def hstack(cls, dtlist):
"""
Performs an hstack on a list of DateTimeNano objects.
All items in list must have their display set to the same timezone.
Parameters
----------
dtlist : obj:`list` of obj:`DateTimeNano`
DateTimeNano objects to be stacked.
Examples
--------
>>> dtn1 = DateTimeNano(['2019-01-01', '2019-01-02'], from_tz='NYC')
>>> dtn2 = DateTimeNano(['2019-01-03', '2019-01-04'], from_tz='NYC')
>>> DateTimeNano.hstack([dtn1, dtn2])
DateTimeNano([20190101 00:00:00.000000000, 20190102 00:00:00.000000000, 20190103 00:00:00.000000000, 20190104 00:00:00.000000000])
Returns
-------
obj:`DateTimeNano`
"""
return hstack_any(dtlist, cls, DateTimeNano)
# ------------------------------------------------------------
[docs]
def shift(self, periods=1):
"""
Modeled on pandas.shift.
Values in the array will be shifted to the right if periods is positive, to the left if negative.
Spaces at either end will be filled with invalid.
If abs(periods) >= the length of the array, the result will be full of invalid.
Parameters
----------
periods : int
Number of periods to move, can be positive or negative
Returns
-------
obj:`DateTimeNano`
"""
temp = FastArray.shift(self, periods=periods)
return self.newclassfrominstance(temp, self)
# -------------------------------------------------------------
[docs]
def cut_time(
self,
buckets: Union[int, "TimeSpan", List],
start_time: Tuple = None,
end_time: Tuple = None,
add_pre_bucket: bool = False,
add_post_bucket: bool = False,
label: str = "left",
label_fmt: str = None,
nyc: bool = False,
) -> TypeRegister.Categorical:
"""
Analogous to rt.cut() but for times. We ignore the date part and cut based on time of day component only.
Parameters
----------
buckets: int or rt.TimeSpan or a list of for custom buckets
Specify your bucket size or buckets. Supply either an int for the common use case of equally sized minute buckets or a custom list
Acceptable lists formats:
[(h, m, s, ms)] - it'll assume fields are 0 if length is less than 4
start_time: optional if buckets is explicitly supplied, (h, m) or (h, m, s) or (h, m , s, ms) tuple
left end point of first bucket, this type may change in future
end_time:
see start_time, right end point of last bucket
add_pre_bucket: bool
add a pre-open bucket or treat as invalid ?
add_post_bucket: bool
add a after close bucket or treat as invalid ?
label: optional str
"left": for left end points
"right": for right end points
label_fmt: optional str
strftime format for label
nyc: bool, default is False
convenience shortcut to default to NYC start and end time, ignored if buckets explicitly supplied
Returns
-------
rt.Categorical
See Also
--------
inspired from pandas TimeGrouper
Examples
--------
TODO - sanitize - add cut_time examples
See the version history for structure of older examples.
"""
# first define some helper functions
def timetuple_to_nsm(tup) -> int:
if not 2 <= len(tup) <= 4:
raise ValueError("Expected (h,m), (h,m,s) or (h,m,s,ms)")
zeros = (0,) * (4 - len(tup))
h, m, s, ms = tup + zeros
return 1_000_000 * (ms + 1000 * (s + 60 * (m + 60 * h)))
def scalar(arr_or_scalar):
try:
len(arr_or_scalar)
except Exception:
return arr_or_scalar
if len(arr_or_scalar) == 1:
return arr_or_scalar[0]
raise ValueError("not a length 1 array")
# end helper functions
is_already_list = False
if isinstance(buckets, int):
buckets = TimeSpan(buckets, "m")
elif isinstance(buckets, TimeSpan):
pass
elif isinstance(buckets, type([])):
is_already_list = True
else:
raise ValueError(f"Unknown bucket_size type, got: {type(buckets)}")
# two cases bucket_size is already a list or it's a TimeSpan
if is_already_list:
bucket_cut_points = [timetuple_to_nsm(xx) for xx in sorted(buckets)]
else:
if nyc and (start_time is not None and end_time is not None):
raise ValueError("If nyc is True then you can't set both start and end time bounds")
if nyc:
if start_time is None:
start_time = (9, 30)
if end_time is None:
end_time = (16, 15)
if start_time is None or end_time is None:
raise ValueError("Need to specify start and end times")
bucket_cut_points = []
now_nsm = timetuple_to_nsm(start_time)
end_time_nsm = timetuple_to_nsm(end_time)
bucket_size_nsm = buckets.nanoseconds
while now_nsm < end_time_nsm:
bucket_cut_points.append(scalar(now_nsm))
now_nsm += bucket_size_nsm
bucket_cut_points.append(end_time_nsm)
if add_pre_bucket:
bucket_cut_points.insert(timetuple_to_nsm((0, 0)), 0)
if add_post_bucket:
bucket_cut_points.append(timetuple_to_nsm((24, 0)))
if label_fmt is None:
label_fmt = TimeSpan(bucket_cut_points).clock_format_short()
if label == "right":
bucket_cut_labels = TimeSpan(bucket_cut_points[1:]).strftime(label_fmt)
elif label == "left":
bucket_cut_labels = TimeSpan(bucket_cut_points[:-1]).strftime(label_fmt)
else:
raise ValueError(f"Unknown label, got {label}")
if add_pre_bucket:
bucket_cut_labels[0] = "pre"
if add_post_bucket:
bucket_cut_labels[-1] = "post"
cat = searchsorted(bucket_cut_points, self.nanos_since_midnight())
# map right side invalid to 0
cat[cat >= len(bucket_cut_points)] = 0
return TypeRegister.Categorical(cat, bucket_cut_labels, base_index=1, ordered=False)
# -------------------------------------------------------------
[docs]
def fill_invalid(self, shape=None, dtype=None, inplace=True):
arr = self._fill_invalid_internal(shape=shape, dtype=self.dtype, fill_val=self.NAN_TIME, inplace=inplace)
if arr is None:
return
return DateTimeNano(arr, from_tz="GMT", to_tz=self._timezone._to_tz)
# -------------------------------------------------------------
[docs]
def isnan(self):
"""
Return a boolean array that's True for each `DateTimeNano` element
that's a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int64 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each NaN element, False
otherwise.
See Also
--------
DateTimeNano.isnotnan, Date.isnan, Date.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that contains
at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> dtn = rt.DateTimeNano(['20210101 09:31:15', '20210519 05:21:17',
... '20210713 02:44:19'], from_tz = 'NYC')
>>> dtn[0] = 0
>>> dtn[1] = dtn.inv
>>> dtn
DateTimeNano(['Inv', 'Inv', '20210712 22:44:19.000000000'], to_tz='NYC')
>>> dtn.isnan()
FastArray([ True, True, False])
"""
return self._fa.isnanorzero()
# -------------------------------------------------------------
[docs]
def isnotnan(self):
"""
Return a boolean array that's True for each `DateTimeNano` element
that's not a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int64 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each non-NaN element,
False otherwise.
See Also
--------
DateTimeNano.isnan, Date.isnan, Date.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that
contains at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> dtn = rt.DateTimeNano(['20210101 09:31:15', '20210519 05:21:17',
... '20210713 02:44:19'], from_tz = 'NYC')
>>> dtn[0] = 0
>>> dtn[1] = dtn.inv
>>> dtn
DateTimeNano(['Inv', 'Inv', '20210712 22:44:19.000000000'], to_tz='NYC')
>>> dtn.isnotnan()
FastArray([False, False, True])
"""
return ~self.isnan()
# -------------------------------------------------------------
[docs]
def isfinite(self):
"""
Return a boolean array that's True for each `DateTimeNano` element
that's not a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int64 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each non-NaN element,
False otherwise.
See Also
--------
DateTimeNano.isnan, Date.isnan, Date.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that
contains at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> dtn = rt.DateTimeNano(['20210101 09:31:15', '20210519 05:21:17',
... '20210713 02:44:19'], from_tz = 'NYC')
>>> dtn[0] = 0
>>> dtn[1] = dtn.inv
>>> dtn
DateTimeNano(['Inv', 'Inv', '20210712 22:44:19.000000000'], to_tz='NYC')
>>> dtn.isfinite()
FastArray([False, False, True])
"""
return ~self.isnan()
# -------------------------------------------------------------
[docs]
def isnotfinite(self):
"""
Return a boolean array that's True for each `DateTimeNano` element
that's a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int64 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each NaN element, False
otherwise.
See Also
--------
DateTimeNano.isnotnan, Date.isnan, Date.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that contains
at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> dtn = rt.DateTimeNano(['20210101 09:31:15', '20210519 05:21:17',
... '20210713 02:44:19'], from_tz = 'NYC')
>>> dtn[0] = 0
>>> dtn[1] = dtn.inv
>>> dtn
DateTimeNano(['Inv', 'Inv', '20210712 22:44:19.000000000'], to_tz='NYC')
>>> dtn.isnotfinite()
FastArray([ True, True, False])
"""
return self._fa.isnanorzero()
# -------------------------------------------------------------
[docs]
def _datetimenano_compare_check(self, funcname, other):
caller = self._fa
if isinstance(other, (DateTimeNano, DateTimeNanoScalar)):
if other._timezone._to_tz != self._timezone._to_tz:
warnings.warn(
f"DateTimeNano objects are being displayed in different timezones. Results may not appear to be correct for {funcname}"
)
elif isinstance(other, (Date, DateScalar)):
other = DateTimeNano(other._fa * NANOS_PER_DAY, from_tz=self._timezone._to_tz, to_tz=self._timezone._to_tz)
elif isinstance(other, (TimeSpan, DateSpan, TimeSpanScalar, DateSpanScalar)):
raise TypeError(f"Cannot compare DateTimeNano with {type(other)}")
# let everything else fall through to fast array
# restore invalids
return self._preserve_invalid_comparison(caller, other, funcname)
# -------------------COMPARISONS------------------------------
# ------------------------------------------------------------
[docs]
def __ne__(self, other):
return self._datetimenano_compare_check("__ne__", other)
[docs]
def __eq__(self, other):
return self._datetimenano_compare_check("__eq__", other)
[docs]
def __ge__(self, other):
return self._datetimenano_compare_check("__ge__", other)
[docs]
def __gt__(self, other):
return self._datetimenano_compare_check("__gt__", other)
[docs]
def __le__(self, other):
return self._datetimenano_compare_check("__le__", other)
[docs]
def __lt__(self, other):
return self._datetimenano_compare_check("__lt__", other)
# -------------------------------------------------------------
[docs]
def min(self, **kwargs):
"""
The earliest `DateTimeNano` in an array.
Note that until a reported bug is fixed, this method is not NaN-aware.
Returns
-------
`DateTimeNano`
A `DateTimeNano` array containing the earliest `DateTimeNano`
from the input array.
See Also
--------
DateTimeNano.max, Date.min, Date.max, DateSpan.min, Datespan.max
Notes
-----
This returns an array, not a scalar. However, broadcasting rules will
apply to operations with it.
Examples
--------
>>> dtn = rt.DateTimeNano(['20210101 09:31:15', '20210519 05:21:17'],
... from_tz='NYC', to_tz='NYC')
>>> dtn.min()
DateTimeNano(['20210101 09:31:15.000000000'], to_tz='NYC')
"""
return DateTimeNano([self._fa.min()], from_tz="GMT", to_tz=self._timezone._to_tz)
# return DateTimeNanoScalar(self._fa.min(), timezone=self._timezone)
# -------------------------------------------------------------
[docs]
def max(self, **kwargs):
"""
The latest `DateTimeNano` in an array.
Returns
-------
`DateTimeNano`
A `DateTimeNano` array containing the latest `DateTimeNano`
from the input array.
See Also
--------
DateTimeNano.min, Date.min, Date.max, DateSpan.min, Datespan.max
Notes
-----
This returns an array, not a scalar. However, broadcasting rules will
apply to operations with it.
Examples
--------
>>> dtn = rt.DateTimeNano(['20210101 09:31:15', '20210519 05:21:17'],
... from_tz='NYC', to_tz='NYC')
>>> dtn.max()
DateTimeNano(['20210101 09:31:15.000000000'], to_tz='NYC')
"""
return DateTimeNano([self._fa.max()], from_tz="GMT", to_tz=self._timezone._to_tz)
# return DateTimeNanoScalar(self._fa.max(), timezone=self._timezone)
# -------------------------------------------------------------
[docs]
def diff(self, periods=1):
"""
Calculate the n-th discrete difference.
Parameters
----------
periods : int, optional
The number of times values are differenced. If zero, the input
is returned as-is.
Returns
-------
obj:`TimeSpan`
"""
return TimeSpan(self._fa.diff(periods=periods).astype(np.float64))
# -------------------------------------------------------------
[docs]
def __radd__(self, other):
return self.__add__(other)
# -------------------------------------------------------------
[docs]
def __iadd__(self, other):
# warnings.warn(f'Currently allowing inplace operation __iadd__ on DateTimeNano. May change in the future.')
return self.__add__(other, inplace=True)
# -------------------------------------------------------------
[docs]
def __add__(self, other, inplace=False):
call_super = False
other_inv_mask = None
func = TypeRegister.MathLedger._BASICMATH_TWO_INPUTS
op = None
return_type = DateTimeNano
if not isinstance(other, np.ndarray) and not isinstance(
other, (DateTimeNanoScalar, DateScalar, TimeSpanScalar, DateSpanScalar)
):
# TJD change
if np.isscalar(other):
other = np.int64(other)
else:
other = FastArray(other, dtype=np.int64)
# op = MATH_OPERATION.ADDDATES
call_super = True
else:
if isinstance(other, (DateTimeNano, DateTimeNanoScalar)):
raise TypeError(f"Cannot add two objects {type(self)} and {type(other)}")
elif isinstance(other, (Date, DateScalar)):
raise TypeError(f"Cannot add two objects {type(self)} and {type(other)}")
elif isinstance(other, (TimeSpan, TimeSpanScalar)):
other_inv_mask = isnan(other)
other = other.astype(np.int64)
call_super = True
# op = MATH_OPERATION.ADDDATES
elif isinstance(other, (DateSpan, DateSpanScalar)):
other_inv_mask = isnan(other)
other = other.astype(np.int64) * NANOS_PER_DAY
call_super = True
# op = MATH_OPERATION.ADDDATES
else:
other = other.view(FastArray)
other = other.astype(np.int64, copy=False)
call_super = True
# op = MATH_OPERATION.ADDDATES
if inplace:
funcname = "__iadd__"
else:
funcname = "__add__"
return self._build_mathops_result(other, funcname, call_super, other_inv_mask, inplace, op, return_type)
# -------------------------------------------------------------
[docs]
def __rsub__(self, other):
if isinstance(other, (Date, DateScalar)):
return other.__sub__(self)
else:
raise TypeError(f"DateTimeNano can only be subtracted from DateTimeNano or Date.")
# -------------------------------------------------------------
[docs]
def __isub__(self, other):
warnings.warn(f"Currently allowing inplace operation __isub__ on DateTimeNano. May change in the future.")
return self.__sub__(other, inplace=True)
# -------------------------------------------------------------
[docs]
def __sub__(self, other, inplace=False):
call_super = False
other_inv_mask = None
func = TypeRegister.MathLedger._BASICMATH_TWO_INPUTS
op = None
if not isinstance(other, np.ndarray) and not isinstance(
other, (DateTimeNanoScalar, DateScalar, TimeSpanScalar, DateSpanScalar)
):
return_type = DateTimeNano
# TJD change
if np.isscalar(other):
other = np.int64(other)
else:
other = FastArray(other, dtype=np.int64)
call_super = True
else:
if isinstance(other, (DateTimeNano, DateTimeNanoScalar)):
# ready to go
return_type = TimeSpan
if inplace:
raise TypeError(f"__sub__ returns TimeSpan. Cannot perform inplace.")
op = MATH_OPERATION.SUBDATETIMES
elif isinstance(other, (Date, DateScalar)):
return_type = TimeSpan
op = MATH_OPERATION.SUBDATETIMES
if inplace:
raise TypeError(f"__sub__ returns TimeSpan. Cannot perform inplace.")
# upcast Date
other = other.astype(np.int64) * NANOS_PER_DAY
elif isinstance(other, (TimeSpan, TimeSpanScalar)):
# apply our own mask during this track
return_type = DateTimeNano
# upcast TimeSpan to preserve precision
other = other.astype(np.int64)
call_super = True
elif isinstance(other, (DateSpan, DateSpanScalar)):
return_type = DateTimeNano
# need to get mask before upcasting
other_inv_mask = isnan(other)
other = other.astype(np.int64) * NANOS_PER_DAY
call_super = True
else:
# user fastarray operation
return_type = DateTimeNano
other = other.view(FastArray)
other = other.astype(np.int64, copy=False)
# op = MATH_OPERATION.SUBDATETIMESLEFT
call_super = True
if inplace:
funcname = "__isub__"
else:
funcname = "__sub__"
return self._build_mathops_result(other, funcname, call_super, other_inv_mask, inplace, op, return_type)
[docs]
def __matmul__(self, other):
raise NotImplementedError
# need to check properties to see if division is happening
# def __truediv__(self, other): raise NotImplementedError
# def __floordiv__(self, other): raise NotImplementedError
# def __mod__(self, other): raise NotImplementedError
# def __divmod__(self, other): raise NotImplementedError
[docs]
def __pow__(self, other, modulo=None):
raise NotImplementedError
[docs]
def __lshift__(self, other):
raise NotImplementedError
[docs]
def __rshift__(self, other):
raise NotImplementedError
[docs]
def __and__(self, other):
raise NotImplementedError
[docs]
def __xor__(self, other):
raise NotImplementedError
[docs]
def __or__(self, other):
raise NotImplementedError
[docs]
def __rmul__(self, other):
raise NotImplementedError
[docs]
def __rmatmul__(self, other):
raise NotImplementedError
[docs]
def __rtruediv__(self, other):
raise NotImplementedError
[docs]
def __rfloordiv__(self, other):
raise NotImplementedError
[docs]
def __rmod__(self, other):
raise NotImplementedError
[docs]
def __rdivmod__(self, other):
raise NotImplementedError
[docs]
def __rpow__(self, other):
raise NotImplementedError
[docs]
def __rlshift__(self, other):
raise NotImplementedError
[docs]
def __rrshift__(self, other):
raise NotImplementedError
[docs]
def __rand__(self, other):
raise NotImplementedError
[docs]
def __rxor__(self, other):
raise NotImplementedError
[docs]
def __ror__(self, other):
raise NotImplementedError
[docs]
def __imul__(self, other):
raise NotImplementedError
[docs]
def __imatmul__(self, other):
raise NotImplementedError
[docs]
def __itruediv__(self, other):
raise NotImplementedError
[docs]
def __ifloordiv__(self, other):
raise NotImplementedError
[docs]
def __imod__(self, other):
raise NotImplementedError
[docs]
def __ipow__(self, other, modulo=None):
raise NotImplementedError
[docs]
def __ilshift__(self, other):
raise NotImplementedError
[docs]
def __irshift__(self, other):
raise NotImplementedError
[docs]
def __iand__(self, other):
raise NotImplementedError
[docs]
def __ixor__(self, other):
raise NotImplementedError
[docs]
def __ior__(self, other):
raise NotImplementedError
[docs]
def __neg__(self):
raise NotImplementedError
[docs]
def __pos__(self):
raise NotImplementedError
[docs]
def __invert__(self):
raise NotImplementedError
[docs]
def __complex__(self):
raise NotImplementedError
[docs]
def __int__(self):
raise NotImplementedError
[docs]
def __float__(self):
raise NotImplementedError
[docs]
def __round__(self, ndigits=0):
raise NotImplementedError
[docs]
def __trunc__(self):
raise NotImplementedError
[docs]
def __floor__(self):
raise NotImplementedError
[docs]
def __ceil__(self):
raise NotImplementedError
# -------------------------------------------------------------
# ----raise error on certain math operations-------------------
# def __radd__(self, value):
# return self.__add__(value)
[docs]
def __mul__(self, value):
return self._guard_math_op(value, "__mul__")
[docs]
def __floordiv__(self, value):
return self._guard_math_op(value, "__floordiv__")
[docs]
def __truediv__(self, value):
return self._guard_math_op(value, "__truediv__")
[docs]
def __abs__(self):
raise TypeError(f"Cannot perform absolute value on DateTimeNano object.")
[docs]
def _guard_math_op(self, value, op_name):
if isinstance(value, DateTimeBase):
raise TypeError(f"Cannot perform operation {op_name} between DateTimeNano and {type(value)}")
op = getattr(self._fa, op_name)
return op(value)
# -------------------------------------------------------------
[docs]
@classmethod
def _random(cls, sz, to_tz="NYC", from_tz="NYC", inv=None, start=None, end=None):
"""
Internal routine for random(), random_invalid()
"""
if start is None:
start = NANOS_PER_YEAR
end = NANOS_PER_YEAR * 50
else:
start = (start - 1970) * NANOS_PER_YEAR
if end is None:
# maybe test if leap year?
end = start + NANOS_PER_YEAR
else:
end = (end - 1970) * NANOS_PER_YEAR
arr = np.random.randint(start, end, sz, dtype=np.int64)
if inv is not None:
putmask(arr, inv, 0)
return DateTimeNano(arr, to_tz=to_tz, from_tz=from_tz)
[docs]
@classmethod
def random(cls, sz, to_tz="NYC", from_tz="NYC", inv=None, start=None, end=None):
"""
Return an array of randomly generated `DateTimeNano` values.
If `start` and `end` are not provided, years range from 1971 to 2020.
Parameters
----------
sz : int
The length of the generated array.
to_tz : str, default 'NYC'
The timezone for display. For valid timezone options, see
:py:attr:`.TimeZone.valid_timezones`.
from_tz : str, default 'NYC'
The timezone of origin. For valid timezone options, see
:py:attr:`.TimeZone.valid_timezones`.
inv : array of bool, optional
Where True, an invalid `DateTimeNano` is in the returned array.
start : int, optional
The start year for the range. If no end year is provided, all times
are within the start year.
end : int, optional
The end year for the range. Used only if `start` is provided.
Returns
-------
DateTimeNano
A `DateTimeNano` with randomly generated values.
See Also
--------
DateTimeNano.random_invalid :
Return a randomly generated `DateTimeNano` array with randomly placed invalid values.
Date.range : Return a `Date` object of dates within a given interval, spaced by `step`.
.riptable.arange :
Return an array of evenly spaced values within a given interval.
Examples
--------
>>> rt.DateTimeNano.random(3)
DateTimeNano([19980912 15:31:08.025189457, 19931121 15:48:32.855425859, 19930915 14:58:31.376750294]) # random
If `start` is provided but `end` is not, all times are within the start year:
>>> rt.DateTimeNano.random(3, start=2015)
DateTimeNano(['20151011 12:15:45.588049363', '20150207 14:54:33.649991888', '20150131 18:58:13.543792210'], to_tz='NYC') # random
With an `inv` mask. Where True, an invalid `DateTimeNano` is in the returned
array:
>>> i = rt.FastArray([True, False, True])
>>> rt.DateTimeNano.random(3, inv=i)
DateTimeNano(['Inv', '19930915 02:39:29.621051630', 'Inv'], to_tz='NYC')
"""
return cls._random(sz, to_tz=to_tz, from_tz=from_tz, inv=inv, start=start, end=end)
[docs]
@classmethod
def random_invalid(cls, sz, to_tz="NYC", from_tz="NYC", start=None, end=None):
"""
Return a randomly generated `DateTimeNano` object with randomly placed
invalid values.
This method is the same as `DateTimeNano.random`, except that a mask is
randomly generated to place the invalid values.
If `start` and `end` are not provided, years for valid `DateTimeNano`
values range from 1971 to 2020.
Parameters
----------
sz : int
The length of the generated array.
to_tz : str, default 'NYC'
The timezone for display. For valid timezone options, see
:py:attr:`.TimeZone.valid_timezones`.
from_tz : str, default 'NYC'
The timezone of origin. For valid timezone options, see
:py:attr:`.TimeZone.valid_timezones`.
start : int, optional
The start year for the range. If no end year is provided, all times
are within the start year.
end : int, optional
The end year for the range. Used only if `start` is provided.
Returns
-------
DateTimeNano
A `DateTimeNano` with randomly placed invalid values.
See Also
--------
DateTimeNano.random : Return an array of randomly generated `DateTimeNano` values.
Date.range : Return a `Date` object of dates within a given interval, spaced by `step`.
.riptable.arange : Return an array of evenly spaced values within a specified interval.
Examples
--------
>>> rt.DateTimeNano.random_invalid(3)
DateTimeNano(['Inv', '19830405 15:24:01.815771855', 'Inv'], to_tz='NYC') # random
"""
# TODO: Use np.random.default_rng() here instead.
inv = np.random.randint(0, 2, sz, dtype=bool)
return cls._random(sz, to_tz=to_tz, from_tz=from_tz, inv=inv, start=start, end=end)
# -------------------------------------------------------------
[docs]
def resample(self, rule: str, dropna: bool = False):
"""Convenience method for frequency conversion and resampling of
DateTimeNano arrays.
Parameters
----------
rule : string
The offset string or object representing target conversion.
Can also begin the string with a number e.g. '3H'
Currently supported:
H hour
T, min minute
S second
L, ms millisecond
U, us microsecond
N, ns nanosecond
dropna : bool, default False
If True, returns a DateTimeNano the same length as caller, with all values rounded to specified frequency.
If False, returns a DateTimeNano range from caller's min to max with values at every specified frequency.
Examples
--------
>>> dtn = DateTimeNano(['2015-04-15 14:26:54.735321368',
'2015-04-20 07:30:00.858219615',
'2015-04-23 13:15:24.526871083',
'2015-04-21 02:25:11.768548100',
'2015-04-24 07:47:54.737776979',
'2015-04-10 23:59:59.376589955'],
from_tz='UTC', to_tz='UTC')
>>> dtn.resample('L', dropna=True)
DateTimeNano(['20150415 14:26:54.735000000', '20150420 07:30:00.858000000', '20150423 13:15:24.526000000', '20150421 02:25:11.768000000', '20150424 07:47:54.737000000', '20150410 23:59:59.376000000'], to_tz='UTC')
>>> dtn = DateTimeNano(['20190417 17:47:00.000001',
'20190417 17:47:00.000003',
'20190417 17:47:00.000005'],
from_tz='NYC')
>>> dtn.resample('1us')
DateTimeNano(['20190417 17:47:00.000001000', '20190417 17:47:00.000002000', '20190417 17:47:00.000003000', '20190417 17:47:00.000004000', '20190417 17:47:00.000005000'], to_tz='NYC')
Returns
-------
dtn : `DateTimeNano`
"""
# -------------------------------------------------------
def parse_rule(rule):
# returns an integer or float amount and unit string
amount = None
for i, c in enumerate(rule):
if not c.isnumeric() and c != ".":
if i == 0:
amount = 1
else:
amount = rule[:i]
try:
amount = int(amount)
except:
amount = float(amount)
break
# never hit a string interval code
if amount is None:
raise ValueError(self._INVALID_FREQ_ERROR.format(rule))
unit = rule[i:].upper()
unit = self.FrequencyStrings.get(unit, None)
if unit is None:
raise ValueError(self._INVALID_FREQ_ERROR.format(rule))
return amount, unit
# -------------------------------------------------------
def get_time_unit(unit):
if unit in TimeSpan.unit_convert_factors:
unit = TimeSpan.unit_convert_factors[unit]
else:
raise NotImplementedError(f"Currently supports frequency strings {[*self.FrequencyStrings]}")
return unit
# -------------------------------------------------------
def time_interval(amount, unit):
# amount is a multiplier for the unit
# unit is a TimeSpan unit or for larger units, will be assigned separately to maintain precision
# TODO: check for nan times
# should these be included in any min/max calculation?
# TJD note this needs to be reviewed... min max should return a scalar not an array of 1
start = np.int64(self.min()[0])
end = np.int64(self.max()[0])
unit = get_time_unit(unit)
step = np.int64(amount * unit)
start = start - (start % step)
# should this include both ends?
end = end - (end % step) + step
stamps = arange(start, end, step=step)
interval = DateTimeNano(stamps, to_tz=self._timezone._to_tz)
return interval
# -------------------------------------------------------
def as_time_interval(amount, unit):
# returns a date time nano the same length as the original
# may have repeats, empty will not appear
unit = get_time_unit(unit)
step = np.int64(amount * unit)
timediff = self._fa % step
return self - timediff
# -------------------------------------------------------
if not isinstance(rule, str):
raise TypeError(f"Rule must be a string. Got {type(rule)}.")
amount, unit = parse_rule(rule)
if dropna:
resampled = as_time_interval(amount, unit)
else:
resampled = time_interval(amount, unit)
return resampled
[docs]
@staticmethod
def _from_arrow(
arr: Union["pa.Array", "pa.ChunkedArray"], zero_copy_only: bool = True, writable: bool = False
) -> "DateTimeNano":
"""
Create a `DateTimeNano` instance from a "timestamp"-typed `pyarrow.Array`.
Parameters
----------
arr : pyarrow.Array or pyarrow.ChunkedArray
Must be a "timestamp"-typed pyarrow array.
zero_copy_only : bool, optional, defaults to False
writable : bool, optional, defaults to False
Returns
-------
DateTimeNano
"""
import pyarrow as pa
import pyarrow.types as pat
# Only support converting from timestamp-typed arrays.
if not isinstance(arr, (pa.Array, pa.ChunkedArray)):
raise TypeError("The array is not an instance of `pyarrow.Array` or `pyarrow.ChunkedArray`.")
elif not pat.is_timestamp(arr.type):
raise ValueError(
f"rt.DateTimeNano arrays can only be created from pyarrow arrays of type 'timestamp', not '{arr.type}'."
)
# If zero_copy_only is set but the timestamp unit isn't 'ns', we won't be able to perform
# a zero-copy conversion so raise an exception.
if zero_copy_only and arr.type.unit != "ns":
raise ValueError(
f"Unable to perform a zero-copy conversion for an timestamp-typed array with the unit '{arr.type.unit}'."
)
# ChunkedArrays need special handling.
if isinstance(arr, pa.ChunkedArray):
# A single-chunk ChunkedArray can be handled by just extracting that chunk
# and recursively processing it.
if arr.num_chunks == 1:
return DateTimeNano._from_arrow(arr.chunk(0), zero_copy_only=zero_copy_only, writable=writable)
else:
# TODO: Benchmark this vs. using ChunkedArray.combine_chunks() then converting.
# TODO: Look at `zero_copy_only` and `writable` -- the converted arrays could be destroyed while hstacking
# since we know they'll have just been created; this could reduce peak memory utilization.
return hstack(
[
DateTimeNano._from_arrow(arr_chunk, zero_copy_only=zero_copy_only, writable=writable)
for arr_chunk in arr.iterchunks()
]
)
# TEMP: If the input array uses a unit other than 'ns', we need to scale it to nanoseconds since that's what's
# used as the representation for DateTimeNano.
# This could be done more efficiently (both in terms of CPU and memory) by combining the unit conversion
# with the logic below; this implementation just gets things working for now so we can
# e.g. implement unit tests.
if arr.type.unit != "ns":
ns_timestamp_type = pa.timestamp("ns", tz=arr.type.tz)
arr: pa.Array = arr.cast(ns_timestamp_type)
# TODO: Also need to check if this is one of the timezones supported by riptable.
if arr.type.tz is None:
raise ValueError("The input array is timezone-naive, which is not supported by riptable.")
# Convert the pyarrow array's timezone (id from tz database) to a riptable TZ string,
# then set that on the output array.
from_tz_str = TypeRegister.TimeZone.normalize_tz_to_tzdb_name(arr.type.tz)
# Create a view of the underlying data as int64 epoch-nanoseconds.
arr_int64 = arr.view(pa.int64())
# When the input pyarrow array doesn't have any NA values, this operation **can be** zero-copy
# depending on which options the caller has specified.
if arr.null_count == 0:
arr_int64_np = arr_int64.to_numpy(zero_copy_only=not writable, writable=writable)
return DateTimeNano(arr_int64_np, from_tz=from_tz_str)
elif zero_copy_only:
raise RuntimeError("Unable to perform zero-copy conversion from an input array containing nulls.")
else:
# The input array has one or more nulls, so this conversion can *never* be zero-copy.
# Since we have to perform a copy somewhere, do the copy in pyarrow using the .replace() method
# so we can simultaneously fill in the null elements with the riptable 'invalid'/NA value
# for this array type; this also prevents pyarrow from converting the data to a floating-point
# dtype and filling the nulls with NaN.
# Get a pyarrow scalar with the riptable int64 invalid.
# rt.DateTimeNano treats both the int64 'invalid' and zero as 'invalid'/NA values; the choice to use
# the int64 invalid is arbitrary -- this could just as easily use zero as the replacement value.
int64_inv_pa = pa.scalar(INVALID_DICT[np.dtype(np.int64).num], type=pa.int64())
# Fill the nulls with the riptable int64 invalid. This operation also creates a copy,
# because arrow arrays are immutable.
arr_int64_filled = arr_int64.fill_null(int64_inv_pa)
# Now do the conversion to a numpy array; it should be zero-copy.
# TODO: If writable=True here, it seems like we'll do a 2nd copy of the data? Is there any way to avoid it?
arr_int64_np = arr_int64_filled.to_numpy(zero_copy_only=not writable, writable=writable)
return DateTimeNano(arr_int64_np, from_tz=from_tz_str)
[docs]
def to_arrow(
self,
type: Optional["pa.DataType"] = None,
*,
preserve_fixed_bytes: bool = False,
empty_strings_to_null: bool = True,
) -> Union["pa.Array", "pa.ChunkedArray"]:
"""
Convert this `DateTimeNano` to a `pyarrow.Array`.
Parameters
----------
type : pyarrow.DataType, optional, defaults to None
Unused.
preserve_fixed_bytes : bool, optional, defaults to False
Unused.
empty_strings_to_null : bool, optional, defaults To True
Unused.
Returns
-------
pyarrow.Array or pyarrow.ChunkedArray
"""
import pyarrow as pa
# Get the tz db / ICU timezone name.
tz_name = TypeRegister.TimeZone.normalize_tz_to_tzdb_name(self._timezone._from_tz)
# Create the corresponding pyarrow type.
arr_type = pa.timestamp("ns", tz=tz_name)
# Get the invalid mask for this array.
# If all values are valid, don't bother passing an all-False mask when creating the arrow array.
invalids_mask = self.isnan()
if not invalids_mask.any():
invalids_mask = None
# TODO: If the caller specified the `type` parameter (e.g. to have the array output
# as a different timezone), create the array as below, but then cast it to the target type.
return pa.array(self._np, mask=invalids_mask, type=arr_type)
[docs]
def __arrow_array__(self, type: Optional["pa.DataType"] = None) -> Union["pa.Array", "pa.ChunkedArray"]:
return self.to_arrow(type=type)
# ========================================================
class TimeSpanBase:
"""Parent class for TimeSpan"""
ReduceFuncs = False
unit_convert_factors = {
"Y": NANOS_PER_YEAR,
"W": NANOS_PER_DAY * 7,
"D": NANOS_PER_DAY,
"h": NANOS_PER_HOUR,
"m": NANOS_PER_MINUTE,
"s": NANOS_PER_SECOND,
"ms": NANOS_PER_MILLISECOND,
"us": NANOS_PER_MICROSECOND,
"ns": 1,
}
# ------------------------------------------------------------
def get_classname(self):
return __class__.__name__
# ------------------------------------------------------------
# TimeSpanBase
# For TimeSpan and TimeSpanScalar
def _strftime(self, format, dtype="U"):
"""
Convert each `TimeSpan` or `TimeSpanScalar` to a formatted
string representation.
Parameters
----------
format : str
One or more format codes supported by the
:py:meth:`datetime.datetime.strftime` function of the standard
Python distribution. For codes, see
:ref:`python:strftime-strptime-behavior`.
dtype : {"U", "S", "O"}, default "U"
For `TimeSpan` input, the data type of the returned array:
- "U": unicode string
- "S": byte string
- "O": object string
Returns
-------
`ndarray` or str
For `TimeSpan` input, returns an `ndarray` of strings. For
`TimeSpanScalar` input, returns a scalar string.
See Also
--------
TimeSpan.strftime, TimeSpanScalar.strftime, Date.strftime,
DateScalar.strftime, DateTimeNano.strftime, DateTimeNanoScalar.strftime
Notes
-----
This routine has not been sped up yet. It also raises an error on NaNs.
Examples
--------
>>> ts = rt.TimeSpan(['09:00', '10:45', '02:30'])
>>> ts
TimeSpan(['09:00:00.000000000', '10:45:00.000000000', '02:30:00.000000000'])
>>> ts.strftime('%X')
array(['09:00:00', '10:45:00', '02:30:00'], dtype='<U8')
>>> ts[0].strftime('%X')
'09:00:00'
"""
# get negative mask since strftime does not handle negative
isnegative = self._fa < 0
if isinstance(self, np.ndarray):
result = np.asarray(
[dt.utcfromtimestamp(timestamp).strftime(format) for timestamp in self._fa.abs() / 1_000_000_000.0],
dtype=dtype,
)
if isnegative.sum() > 0:
if dtype == "S":
negcol = zeros(result.shape, dtype="S1")
negcol[isnegative] = b"-"
else:
negcol = zeros(result.shape, dtype="U1")
negcol[isnegative] = "-"
result = negcol + result
else:
result = dt.strftime(dt.utcfromtimestamp(abs(self) / 1_000_000_000.0), format)
if isnegative:
# check dtype 'S'
if dtype == "S":
result = b"-" + result
else:
result = "-" + result
return result
# ------------------------------------------------------------
# --------RETURN FLOAT ARRAY AT DIFFERENT RESOLUTIONS---------
@property
def days(self):
"""Timespan as float64 array of days.
Note
----
Loss of nanosecond precision at ~52 days.
"""
return self._fa / NANOS_PER_DAY
@property
def hours(self):
"""Timespan as float64 array of hours."""
return self._fa / NANOS_PER_HOUR
@property
def minutes(self):
"""Timespan as float64 array of minutes."""
return self._fa / NANOS_PER_MINUTE
@property
def seconds(self):
"""Timespan as float64 array of seconds."""
return self._fa / NANOS_PER_SECOND
@property
def milliseconds(self):
"""Timespan as float64 array of milliseconds."""
return self._fa / NANOS_PER_MILLISECOND
@property
def microseconds(self):
"""Timespan as float64 array of microseconds."""
return self._fa / NANOS_PER_MICROSECOND
@property
def nanoseconds(self):
"""Timespan as float64 array of nanoseconds (same as underlying array)."""
return self._fa
@property
def hhmmss(self):
"""Timespan as int64 array in format HHMMSS."""
SEC_PER_MIN = 60
hour, remainder = divmod(self.astype(np.int64) // NANOS_PER_SECOND, 3600)
minutes, seconds = divmod(remainder, SEC_PER_MIN)
return (10_000 * hour + 100 * minutes + seconds).astype(np.int64)
# ------------------------------------------------------------
@classmethod
def _unit_to_nano_span(cls, values, unit):
"""
:param values: FastArray from calling constructor
:param unit: unit string (see numpy's timedelta64 dtype)
"""
if isinstance(unit, bytes):
unit = unit.decode()
try:
mult = cls.unit_convert_factors[unit]
except:
raise ValueError(f"Cannot initialize span with {unit} units.")
if mult != 1:
values = values * mult
return values
# ------------------------------------------------------------
@staticmethod
def display_item(nanosecs, itemformat=None):
if itemformat is not None:
length = itemformat.length
else:
length = DisplayLength.Short
if length == DisplayLength.Medium:
return TimeSpan.display_item_unit(nanosecs)
else:
return TimeSpan.display_item_clock(nanosecs)
# ------------------------------------------------------------
@staticmethod
def display_item_unit(nanosecs):
"""
For each item, finds the highest unit to express it in amounts between 1 and 1000 or standard time measure.
e.g. 59.123m, 678.823ms, 30ns
"""
if np.isnan(nanosecs):
return "Inv"
# TODO add different formatting for large time spans (> 1 day)
divisor, unit_str = TimeSpan._display_resolution(nanosecs)
if divisor == 1:
delta = str(nanosecs)
else:
delta = nanosecs / divisor
delta = "{0:.3f}".format(delta)
return delta + unit_str
@staticmethod
def _display_resolution(nanosecs):
"""
Get extension and divisor for display_item_unit() (see above)
"""
nanosecs = abs(nanosecs)
divisor = NANOS_PER_HOUR
unit_str = "h"
if nanosecs < 1_000:
divisor = 1
unit_str = "ns"
elif nanosecs < 1_000_000:
divisor = 1_000
unit_str = "us"
elif nanosecs < NANOS_PER_SECOND:
divisor = 1_000_000
unit_str = "ms"
elif nanosecs < NANOS_PER_MINUTE:
divisor = NANOS_PER_SECOND
unit_str = "s"
elif nanosecs < NANOS_PER_HOUR:
divisor = NANOS_PER_MINUTE
unit_str = "m"
# we should probably use a different format past this point
# maybe a formatting string with more info
# elif max_time < NANOS_PER_DAY:
# divisor = NANOS_PER_HOUR
# unit_str = 'h'
return divisor, unit_str
# ------------------------------------------------------------
@staticmethod
def display_item_clock(nanosecs):
"""
Long clock format (default) HH:MM:SS.<nano-decimal>
"""
format_str = "%H:%M:%S"
item = abs(nanosecs)
if isnan(item):
timestr = "Inv"
else:
gmt_time = time.gmtime(item / NANOS_PER_SECOND)
timestr = DateTimeBase.DEFAULT_FORMATTER(format_str, gmt_time)
days = np.int64(item) // NANOS_PER_DAY
if days > 0:
timestr = str(days) + "d " + timestr
if nanosecs < 0:
timestr = "-" + timestr
timestr = DateTimeBase._add_nano_ext(item, timestr)
return timestr
# ------------------------------------------------------------
def clock_format_short(self) -> str:
"""
Gets the minimal clock format that will properly represent the time spans.
"""
if (self.seconds % 1).any():
return "%H:%M:%S.%f"
elif (self.minutes % 1).any():
return "%H:%M:%S"
else:
return "%H:%M"
# ------------------------------------------------------------
def display_clock_short(self) -> "TimeSpan":
"""
Gets the minimal clock display that will properly represent the time spans.
"""
fmt = self.clock_format_short()
return self.strftime(fmt)
# ------------------------------------------------------------
@staticmethod
def display_convert_func(nanosecs, itemformat: ItemFormat):
return TimeSpan.display_item(nanosecs, itemformat=itemformat)
# TODO uncomment when starfish is implemented and imported
# def _sf_display_query_properties(self):
# itemformat = sf.ItemFormat({'length':self.display_length,
# 'align':sf.DisplayAlign.Right})
# return itemformat, self.display_convert_func
# ------------------------------------------------------------
def display_query_properties(self):
# if TypeRegister.DisplayOptions.STARFISH:
# return self._sf_display_query_properties()
item_format = ItemFormat(
length=self.display_length, justification=DisplayJustification.Right, can_have_spaces=True, decoration=None
)
convert_func = self.display_convert_func
return item_format, convert_func
# --BINARY OPERATIONS------------------------------------------
# -------------------------------------------------------------
def __add__(self, value):
other_inv_mask = None
# TimeSpan add
if not isinstance(value, np.ndarray):
value = FastArray(value).astype(np.float64)
else:
# DateTimeNano / Date will fix up this operation
if isinstance(value, (DateTimeNano, DateTimeNanoScalar, Date, DateScalar)):
return value.__add__(self)
elif isinstance(value, (DateSpan, DateSpanScalar)):
other_inv_mask = isnan(value)
value = value._fa * NANOS_PER_DAY
else:
other_inv_mask = isnan(value)
value = value.view(FastArray)
value = value.astype(np.float64, copy=False)
return self._fix_binary_ops(value, "__add__", other_inv_mask=other_inv_mask)
# -------------------------------------------------------------
def __radd__(self, value):
return self.__add__(value)
# -------------------------------------------------------------
def __sub__(self, value):
if isinstance(value, (DateTimeNano, DateTimeNanoScalar, Date, DateScalar)):
return value.__rsub__(self)
return self._fix_binary_ops(value, "__sub__")
# -------------------------------------------------------------
def __rsub__(self, value):
if not isinstance(value, np.ndarray):
value = FastArray(value).astype(np.float64)
else:
if isinstance(value, (DateTimeNano, DateTimeNanoScalar, Date, DateScalar)):
return value.__sub__(self)
elif isinstance(value, (DateSpan, DateSpanScalar)):
other_inv_mask = isnan(value)
value = value._fa * NANOS_PER_DAY
# interpret everything else as nanosecond timespan values
else:
other_inv_mask = isnan(value)
value = value.view(FastArray)
value = value.astype(np.float64, copy=False)
return self._fix_binary_ops(value, "__rsub__")
# -------------------------------------------------------------
def __mul__(self, value):
if isinstance(
value,
(TimeSpan, DateSpan, Date, DateTimeNano, TimeSpanScalar, DateSpanScalar, DateScalar, DateTimeNanoScalar),
):
raise TypeError(f"Cannot multiply TimeSpan by {type(value)} object.")
if not isinstance(value, np.ndarray):
value = FastArray(value).astype(np.float64)
return self._fix_binary_ops(value, "__mul__")
# -------------------------------------------------------------
def __rmul__(self, other):
return self.__mul__(other)
# -------------------------------------------------------------
def __floordiv__(self, value):
if isinstance(value, (TimeSpan, TimeSpanScalar)):
result = self._fa.__floordiv__(value)
return result
else:
raise TypeError(
f"Can only floor divide TimeSpan objects with other timespan objects not type {type(value)}."
)
# -------------------------------------------------------------
def __truediv__(self, value):
# handle TimeSpan('00:30:00') / TimeSpan('01:00:00') with truediv
if isinstance(value, (TimeSpan, TimeSpanScalar)):
return self._fa.__truediv__(value)
return self._fix_binary_ops(value, "__truediv__")
# -------------------------------------------------------------
def _fix_binary_ops(self, value, op_name, other_inv_mask=None):
"""
Preserves invalids from integer arrays. If valid, wraps result fastarray in TimeSpan object.
"""
# print("binary", type(self), type(value), op_name)
if np.isscalar(self):
op = getattr(np.float64, op_name)
result = op(self, value)
else:
# get the array version
op = getattr(FastArray, op_name)
result = op(self, value)
if np.isscalar(result):
result = TimeSpanScalar(result)
elif isinstance(result, np.ndarray):
if other_inv_mask is None:
# this shouldn't get hit, test
if result.dtype.char in NumpyCharTypes.AllInteger:
inv_mask = value == INVALID_DICT[result.dtype.num]
result[inv_mask] = np.nan
else:
# possible nan fill
if len(other_inv_mask) == 1:
if isnan(other_inv_mask)[0]:
result = TimeSpan(full(len(self), np.nan, dtype=np.float64))
else:
result[other_inv_mask] = np.nan
result = TimeSpan(result)
return result
def __pow__(self, other, modulo=None):
raise NotImplementedError
def __lshift__(self, other):
raise NotImplementedError
def __rshift__(self, other):
raise NotImplementedError
def __and__(self, other):
raise NotImplementedError
def __xor__(self, other):
raise NotImplementedError
def __or__(self, other):
raise NotImplementedError
# def __rmul__(self, other): raise NotImplementedError
def __rmatmul__(self, other):
raise NotImplementedError
def __rtruediv__(self, other):
raise NotImplementedError
def __rfloordiv__(self, other):
raise NotImplementedError
def __rmod__(self, other):
raise NotImplementedError
def __rdivmod__(self, other):
raise NotImplementedError
def __rpow__(self, other):
raise NotImplementedError
def __rlshift__(self, other):
raise NotImplementedError
def __rrshift__(self, other):
raise NotImplementedError
def __rand__(self, other):
raise NotImplementedError
def __rxor__(self, other):
raise NotImplementedError
def __ror__(self, other):
raise NotImplementedError
def __imul__(self, other):
raise NotImplementedError
def __imatmul__(self, other):
raise NotImplementedError
def __itruediv__(self, other):
raise NotImplementedError
def __ifloordiv__(self, other):
raise NotImplementedError
def __imod__(self, other):
raise NotImplementedError
def __ipow__(self, other, modulo=None):
raise NotImplementedError
def __ilshift__(self, other):
raise NotImplementedError
def __irshift__(self, other):
raise NotImplementedError
def __iand__(self, other):
raise NotImplementedError
def __ixor__(self, other):
raise NotImplementedError
def __ior__(self, other):
raise NotImplementedError
# def __neg__(self): raise NotImplementedError
# def __pos__(self): raise NotImplementedError
# def __abs__(self): raise NotImplementedError
def __invert__(self):
raise NotImplementedError
def __complex__(self):
raise NotImplementedError
def __int__(self):
raise NotImplementedError
# def __float__(self): raise NotImplementedError
def __round__(self, ndigits=0):
raise NotImplementedError
def __trunc__(self):
raise NotImplementedError
def __floor__(self):
raise NotImplementedError
def __ceil__(self):
raise NotImplementedError
# --UNARY OPERATIONS-------------------------------------------
# -------------------------------------------------------------
def __abs__(self):
return self._unary_ufunc_builder("__abs__")
def __neg__(self):
return self._unary_ufunc_builder("__neg__")
def __pos__(self):
return self._unary_ufunc_builder("__pos__")
def abs(self):
return self.__abs__()
def _unary_ufunc_builder(self, op_name):
if np.isscalar(self):
func = getattr(np.float64, op_name)
return TimeSpanScalar(func(self))
else:
# call the fastarray version of the function
return TimeSpan(getattr(self._fa, op_name)())
# ------------------------------------------------------------
@classmethod
def _reduce_func_builder(cls):
"""
Generates all reduce functions - which return a single value (in nanoseconds).
The value will be flipped to float64 (we don't need higher precision than nanoseconds), and put in a
new TimeSpan.
"""
for name in [
"sum",
"mean",
"std",
"var",
"min",
"max",
"median",
"nansum",
"nanmean",
"nanstd",
"nanvar",
"nanmin",
"nanmax",
"nanmedian",
]:
func_string = []
func_string.append("def " + name + "(self, **kwargs):")
func_string.append(" r = self._fa." + name + "()")
func_string.append(" r = FastArray(r, dtype=np.float64)")
func_string.append(" return TimeSpan(r)")
func_string.append("setattr(cls, '" + name + "', " + name + ")")
exec("\n".join(func_string))
# ------------------------------------------------------------
# -------------------------------------------------------------
def _timespan_compare_check(self, funcname, other):
func = getattr(self._fa, funcname)
if isinstance(other, (str, bytes)):
other = TimeSpan(other)[0]
if isinstance(other, (DateTimeNano, Date)):
raise TypeError(f"Cannot compare TimeSpan with {type(other)}")
# upcast DateSpan to nanoseconds
elif isinstance(other, DateSpan):
other = (other._fa * NANOS_PER_DAY).astype(np.float64)
# let everything else fall through to fast array
result = func(other)
# invalid will automatically be handled because TimeSpan is float
return result
# -------------------COMPARISONS------------------------------
# ------------------------------------------------------------
def __ne__(self, other):
return self._timespan_compare_check("__ne__", other)
def __eq__(self, other):
return self._timespan_compare_check("__eq__", other)
def __ge__(self, other):
return self._timespan_compare_check("__ge__", other)
def __gt__(self, other):
return self._timespan_compare_check("__gt__", other)
def __le__(self, other):
return self._timespan_compare_check("__le__", other)
def __lt__(self, other):
return self._timespan_compare_check("__lt__", other)
# ========================================================
[docs]
class TimeSpan(TimeSpanBase, DateTimeBase):
"""Array of time delta in nanoseconds, held in float64.
Parameters
----------
values : numeric or string array or scalar
If string, interpreted as HH:MM:SS.ffffff ( seconds/second fractions optional )
If numeric, interpreted as nanoseconds, unless `unit` provided.
single number or array / list of numbers (unless unit is specified, will assume nanoseconds)
unit : str, optional, default 'ns'
Precision of data in the constructor. All will be converted to nanoseconds.
Valid units: 'Y', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns'
Examples
--------
From single string:
>>> dts = TimeSpan('12:34')
>>> dts
TimeSpan([12:34:00.000000000])
From milliseconds since midnight:
>>> dts = TimeSpan(FA([34500000., 36500000., 38500000.,]), unit='ms')
>>> dts
TimeSpan([09:35:00.000000000, 10:08:20.000000000, 10:41:40.000000000])
From the result of DateTimeNano subtraction:
>>> dtn1 = DateTimeNano(['2018-01-01 09:35:00'], from_tz='NYC')
>>> dtn2 = DateTimeNano(['2018-01-01 07:15:00'], from_tz='NYC')
>>> dtn1 - dtn2
TimeSpan([02:20:00.000000000])
Certain DateTimeNano properties can return a TimeSpan:
>>> dtn = DateTimeNano(['2018-01-01 09:35:00'], from_tz='NYC')
>>> dtn.hour_span
TimeSpan([09:35:00.000000000])
Can be added to DateTimeNano objects:
>>> dtn = DateTimeNano(['2018-01-01 09:35:00'], from_tz='NYC')
>>> ts = TimeSpan(FA([8400000000000.0]))
>>> dtn + ts
DateTimeNano([20180101 11:55:00.000000000])
Can be multiplied / divided by scalars:
>>> ts = TimeSpan(FA([8400000000000.0]))
>>> ts
TimeSpan([02:20:00.000000000])
>>> ts / 2
TimeSpan([01:10:00.000000000])
>>> ts * 5.6
TimeSpan([13:04:00.000000000])
"""
# ------------------------------------------------------------
def __new__(cls, values, unit=None):
# handle all input as array, scalars -> array of one item
if not isinstance(values, np.ndarray):
values = FastArray(values)
# strings must be in format HH:MM / HH:MM:SS / HH:MM:SS.ffffff
if values.dtype.char in "US":
# send to wrapper for strptime
return timestring_to_nano(values)
# init class math funcs
if cls.ReduceFuncs is False:
cls._reduce_func_builder()
cls.ReduceFuncs = True
# handle all others as numeric
instance = values.astype(np.float64, copy=False)
if unit is not None:
instance = cls._unit_to_nano_span(instance, unit)
# wrap in class
instance = instance.view(cls)
instance._display_length = DisplayLength.Short
return instance
# ------------------------------------------------------------
# TimeSpan
[docs]
def strftime(self, format, dtype="U"):
"""
Convert each `TimeSpan` element to a formatted string representation.
Parameters
----------
format : str
One or more format codes supported by the
:py:meth:`datetime.datetime.strftime` function of the standard
Python distribution. For codes, see
:ref:`python:strftime-strptime-behavior`.
dtype : {"U", "S", "O"}, default "U"
The data type of the returned array:
- "U": unicode string
- "S": byte string
- "O": object string
Returns
-------
`ndarray`
An `ndarray` of strings.
See Also
--------
TimeSpanScalar.strftime, Date.strftime, DateScalar.strftime,
DateTimeNano.strftime, DateTimeNanoScalar.strftime
Notes
-----
This routine has not been sped up yet. It also raises an error on NaNs.
Examples
--------
>>> ts = rt.TimeSpan(['09:00', '10:45', '02:30'])
>>> ts
TimeSpan(['09:00:00.000000000', '10:45:00.000000000', '02:30:00.000000000'])
>>> ts.strftime('%X')
array(['09:00:00', '10:45:00', '02:30:00'], dtype='<U8')
"""
return self._strftime(format, dtype=dtype)
# ------------------------------------------------------------
[docs]
def get_classname(self):
return __class__.__name__
# ------------------------------------------------------------
[docs]
def get_scalar(self, scalarval):
return TimeSpanScalar(scalarval, _from=self)
# ------------------------------------------------------------
[docs]
@classmethod
def newclassfrominstance(cls, instance, origin):
result = instance.view(cls)
result._display_length = origin.display_length
return result
# ------------------------------------------------------------
[docs]
@classmethod
def hstack(cls, tspans):
"""
TODO: maybe add type checking?
This is a very simple class, rewrap the hstack result in class.
"""
return hstack_any(tspans, cls, TimeSpan)
# ------------------------------------------------------------
[docs]
def fill_invalid(self, shape=None, dtype=None, inplace=True):
arr = self._fill_invalid_internal(shape=shape, dtype=self.dtype, inplace=inplace)
if arr is None:
return
return TimeSpan(arr)
# ------------------------------------------------------------
[docs]
@staticmethod
def _from_arrow(
arr: Union["pa.Array", "pa.ChunkedArray"], zero_copy_only: bool = True, writable: bool = False
) -> "TimeSpan":
"""
Create a `TimeSpan` instance from a "duration"-typed `pyarrow.Array`.
Parameters
----------
arr : pyarrow.Array or pyarrow.ChunkedArray
Must be a "duration"-typed pyarrow array.
zero_copy_only : bool, optional, defaults to False
writable : bool, optional, defaults to False
Returns
-------
TimeSpan
"""
import pyarrow as pa
import pyarrow.types as pat
# import pyarrow.compute as pc
# Only support converting from duration-typed arrays.
if not isinstance(arr, (pa.Array, pa.ChunkedArray)):
raise TypeError("The array is not an instance of `pyarrow.Array` or `pyarrow.ChunkedArray`.")
elif not pat.is_duration(arr.type):
raise ValueError(
f"rt.TimeSpan arrays can only be created from pyarrow arrays of type 'duration', not '{arr.type}'."
)
# If the input array's type specifies a unit other than 'ns',
# we need to convert it to nanoseconds, because rt.TimeSpan always uses nanoseconds as the unit.
if arr.type.unit != "ns":
if zero_copy_only:
raise ValueError(
"Cannot perform a zero-copy conversion of a pyarrow duration array with units other than 'ns'."
)
else:
pa_ns_duration_ty = pa.duration("ns")
arr = arr.cast(pa_ns_duration_ty)
# ChunkedArrays need special handling.
if isinstance(arr, pa.ChunkedArray):
# A single-chunk ChunkedArray can be handled by just extracting that chunk
# and recursively processing it.
if arr.num_chunks == 1:
return TimeSpan._from_arrow(arr.chunk(0), zero_copy_only=zero_copy_only, writable=writable)
else:
# TODO: Benchmark this vs. using ChunkedArray.combine_chunks() then converting.
# TODO: Look at `zero_copy_only` and `writable` -- the converted arrays could be destroyed while hstacking
# since we know they'll have just been created; this could reduce peak memory utilization.
return hstack(
[
TimeSpan._from_arrow(arr_chunk, zero_copy_only=zero_copy_only, writable=writable)
for arr_chunk in arr.iterchunks()
]
)
# Detect whether the values in the array are in the range [0, 2 ^ 53], in which case we *can* perform a zero-copy conversion
# of the data (assuming writable=False), since the integer and float representation will be the same.
# 2 ^ 53 nanoseconds is approx. 104 days + 6 hours.
# N.B. This doesn't work as expected -- seems like it's due to differences in integer vs. FP endianness in memory?
arr_int64_view = arr.view(pa.int64())
# min_max_result: pa.StructScalar = pc.min_max(arr_int64_view)
# min_value = min_max_result['min']
# max_value = min_max_result['max']
# in_lossless_float64_int_range = min_value.as_py() >= 0 and max_value.as_py() <= (2 ** 53)
in_lossless_float64_int_range = False
# Can we perform a lossless conversion to float64?
if in_lossless_float64_int_range:
f64_view_pa = arr_int64_view.view(pa.float64())
f64_view_np = f64_view_pa.to_numpy(zero_copy_only=not writable, writable=writable)
return f64_view_np.view(type=TimeSpan)
elif zero_copy_only:
raise ValueError(
"Cannot perform a zero-copy conversion of a pyarrow duration array containing negative values, or values larger than 2^53 nanoseconds."
)
else:
# Convert from int64 ns representation to float64 representation, which in this case requires creating a new array.
f64_copy_pa = arr_int64_view.cast(pa.float64())
f64_view_np = f64_copy_pa.to_numpy(zero_copy_only=not writable, writable=writable)
return f64_view_np.view(type=TimeSpan)
[docs]
def to_arrow(
self,
type: Optional["pa.DataType"] = None,
*,
preserve_fixed_bytes: bool = False,
empty_strings_to_null: bool = True,
) -> Union["pa.Array", "pa.ChunkedArray"]:
"""
Convert this `TimeSpan` to a `pyarrow.Array`.
Parameters
----------
type : pyarrow.DataType, optional, defaults to None
Unused.
preserve_fixed_bytes : bool, optional, defaults to False
Unused.
empty_strings_to_null : bool, optional, defaults To True
Unused.
Returns
-------
pyarrow.Array or pyarrow.ChunkedArray
"""
import pyarrow as pa
# Create the corresponding pyarrow type.
arr_type = pa.duration("ns")
# Get the invalid mask for this array.
# If all values are valid, don't bother passing an all-False mask when creating the arrow array.
invalids_mask = self.isnan()
if not invalids_mask.any():
invalids_mask = None
# Convert the float64 backing array (for this TimeSpan instance) to int64,
# since that's what pyarrow will use for it's internal representation.
int_ns_arr = self.astype(dtype=np.int64)
# TODO: If the `type` parameter is specified, should we create the array below
# then try to cast it to the caller-specified type?
return pa.array(int_ns_arr._np, mask=invalids_mask, type=arr_type)
[docs]
def __arrow_array__(self, type: Optional["pa.DataType"] = None) -> Union["pa.Array", "pa.ChunkedArray"]:
return self.to_arrow(type=type)
# ==========================================================
# Scalars
# ==========================================================
[docs]
class DateScalar(np.int32):
"""
Derived from np.int32
days since unix epoch in 1970
TODO: need to inherit math functions
"""
__slots__ = "_display_length"
# ------------------------------------------------------------
def __new__(cls, arr, **kwargs):
return super().__new__(cls, arr)
# ------------------------------------------------------------
def __init__(*args, **kwargs):
self = args[0]
_from = kwargs.get("_from", None)
if _from is not None and hasattr(_from, "_display_length"):
self._display_length = _from._display_length
else:
self._display_length = DisplayLength.Long
# ------------------------------------------------------------
@property
def _fa(self):
return self
# ------------------------------------------------------------
[docs]
def get_classname(self):
return __class__.__name__
[docs]
def __repr__(self):
itemformat = self.get_item_format()
return f"{self.get_classname()}('{Date.format_date_num(self._np, itemformat)}')"
[docs]
def __str__(self):
itemformat = self.get_item_format()
return Date.format_date_num(self._np, itemformat)
# ------------------------------------------------------------
# DateScalar
[docs]
def strftime(self, format):
"""
Convert a `DateScalar` to a formatted string representation.
Parameters
----------
format : str
One or more format codes supported by the
:py:meth:`datetime.date.strftime` function of the standard
Python distribution. For codes, see
:ref:`python:strftime-strptime-behavior`.
Returns
-------
str
A string representation of the reformatted `DateScalar`.
See Also
--------
Date.strftime, DateTimeNano.strftime, DateTimeNanoScalar.strftime,
TimeSpan.strftime, TimeSpanScalar.strftime
Notes
-----
This routine has not been sped up yet. It also raises an error on NaNs.
Examples
--------
>>> d = rt.Date(['20210101', '20210519', '20220308'])
>>> d[0].strftime('%D')
'01/01/21'
"""
return dt.strftime(dt.utcfromtimestamp(self.astype(np.int64) * SECONDS_PER_DAY), format)
# ------------------------------------------------------------
@property
def _np(self):
return self.view(np.int32)
# used in adding a scalar to a Dataset
[docs]
def repeat(self, repeats, axis=None):
return Date(self._np.repeat(repeats, axis=axis))
[docs]
def tile(self, repeats):
return Date(self._np.tile(repeats))
# ==========================================================
[docs]
class DateSpanScalar(np.int32):
"""
Derived from np.int32
Number of days between two dates
"""
__slots__ = "_display_length"
NAN_DATESPANSCALAR = INVALID_DICT[np.dtype(np.int32).num] # int32 sentinel
# ------------------------------------------------------------
def __new__(cls, arr, **kwargs):
return super().__new__(cls, arr)
# ------------------------------------------------------------
def __init__(*args, **kwargs):
self = args[0]
_from = kwargs.get("_from", None)
if _from is not None:
self._display_length = _from._display_length
else:
self._display_length = DisplayLength.Long
# ------------------------------------------------------------
[docs]
def get_classname(self):
return __class__.__name__
[docs]
def __repr__(self):
itemformat = self.get_item_format()
return f"{self.get_classname()}('{DateSpan.format_date_span(self._np, itemformat)}')"
[docs]
def __str__(self):
itemformat = self.get_item_format()
return DateSpan.format_date_span(self._np, itemformat)
# ------------------------------------------------------------
@property
def _np(self):
return self.view(np.int32)
# ------------------------------------------------------------
[docs]
def isnan(self):
return self == DateSpanScalar.NAN_DATESPANSCALAR
# ------------------------------------------------------------
[docs]
def isnotnan(self):
return self != DateSpanScalar.NAN_DATESPANSCALAR
# ------------------------------------------------------------
[docs]
def isfinite(self):
return self != DateSpanScalar.NAN_DATESPANSCALAR
# ------------------------------------------------------------
[docs]
def isnotfinite(self):
return self == DateSpanScalar.NAN_DATESPANSCALAR
# ------------------------------------------------------------
@property
def _fa(self):
return self.view(np.int32)
# used in adding a scalar to a Dataset
[docs]
def repeat(self, repeats, axis=None):
return DateSpan(self._np.repeat(repeats, axis=axis))
[docs]
def tile(self, repeats):
return DateSpan(self._np.tile(repeats))
# ==========================================================
[docs]
class DateTimeNanoScalar(np.int64, DateTimeCommon, TimeStampBase):
"""
Derived from np.int64
NOTE: np.int64 is a SLOT wrapper and does not have a __dict__
Number of nanoseconds since unix epoch 1970 in UTC
"""
__slots__ = "_display_length", "_timezone"
# ------------------------------------------------------------
def __new__(cls, arr, **kwargs):
return super().__new__(cls, arr)
# ------------------------------------------------------------
def __init__(*args, **kwargs):
# This needs more work, especially when init with a string
self = args[0]
_from = kwargs.get("_from", None)
if _from is not None and hasattr(_from, "_timezone"):
self._timezone = _from._timezone
else:
to_tz = kwargs.get("to_tz", None)
from_tz = kwargs.get("from_tz", None)
if from_tz is None:
from_tz = "UTC"
if isinstance(self, TypeRegister.Date):
if to_tz is None:
to_tz = "UTC"
# will automatically flip to int64, send through as nanosecond integer array
self = np.int64(self) * NANOS_PER_DAY
else:
if to_tz is None:
to_tz = "NYC"
# create a timezone object to handle daylight savings, any necessary conversion, etc.
_timezone = TypeRegister.TimeZone(from_tz=from_tz, to_tz=to_tz)
self._timezone = _timezone
self._display_length = DisplayLength.Long
if _from is not None and hasattr(_from, "_display_length"):
self._display_length = _from._display_length
# ------------------------------------------------------------
# DateTimeNanoScalar
[docs]
def strftime(self, format):
"""
Convert a `DateTimeNanoScalar` to a formatted string representation.
Parameters
----------
format : str
One or more format codes supported by the
:py:meth:`datetime.datetime.strftime` function of the standard
Python distribution. For codes, see
:ref:`python:strftime-strptime-behavior`.
Returns
-------
str
A string representation of the reformatted `DateTimeNanoScalar`.
See Also
--------
DateTimeNano.strftime, Date.strftime, DateScalar.strftime,
TimeSpan.strftime, TimeSpanScalar.strftime
Notes
-----
This routine has not been sped up yet. It also raises an error on NaNs.
Examples
--------
>>> dtn = rt.DateTimeNano(['20210101 09:31:15', '20210519 05:21:17'], from_tz='NYC')
>>> dtn
DateTimeNano(['20210101 09:31:15.000000000', '20210519 05:21:17.000000000'], to_tz='NYC')
>>> dtn[0].strftime('%c')
'Fri Jan 1 09:31:15 2021'
"""
return self._strftime(format)
# ------------------------------------------------------------
[docs]
def isnan(self):
return self <= 0
# ------------------------------------------------------------
[docs]
def isnotnan(self):
return self > 0
# ------------------------------------------------------------
[docs]
def isfinite(self):
return self > 0
# ------------------------------------------------------------
[docs]
def isnotfinite(self):
return self <= 0
# ------------------------------------------------------------
@property
def _np(self):
return self.view(np.int64)
# ------------------------------------------------------------
@property
def _fa(self):
return self.view(np.int64)
# ------------------------------------------------------------
[docs]
def get_classname(self):
return __class__.__name__
# ------------------------------------------------------------
[docs]
def __repr__(self):
itemformat = self.get_item_format()
# return DateTimeNano.format_nano_time(self._np, itemformat)
return f"{self.get_classname()}('{DateTimeNano.format_nano_time(self._np, itemformat)}')"
[docs]
def __str__(self):
itemformat = self.get_item_format()
return DateTimeNano.format_nano_time(self._np, itemformat)
# --BINARY OPERATIONS------------------------------------------
# -------------------------------------------------------------
[docs]
def __add__(self, value):
# reroute this back to the nonscalar
return DateTimeNano.__add__(self, value)
[docs]
def __sub__(self, value):
# reroute this back to the nonscalar
return DateTimeNano.__sub__(self, value)
# used in adding a scalar to a Dataset
[docs]
def repeat(self, repeats, axis=None):
return DateTimeNano(
self._np.repeat(repeats, axis=axis), to_tz=self._timezone._to_tz, from_tz=self._timezone._from_tz
)
[docs]
def tile(self, repeats):
return DateTimeNano(self._np.tile(repeats), to_tz=self._timezone._to_tz, from_tz=self._timezone._from_tz)
# ==========================================================
[docs]
class TimeSpanScalar(np.float64, TimeSpanBase):
"""
Derived from np.float64
************ not implemented
Holds single float values for TimeSpan arrays.
These will be returned from operations that currently return a TimeSpan of a single item.
"""
__slots__ = "_display_length"
# ------------------------------------------------------------
def __new__(cls, arr, **kwargs):
return super().__new__(cls, arr)
def __new__(cls, scalar, **kwargs):
# strings must be in format HH:MM / HH:MM:SS / HH:MM:SS.ffffff
if isinstance(scalar, (str, bytes, np.bytes_, np.str_)):
# send to wrapper for strptime
scalar = timestring_to_nano(np.asarray([scalar]))[0]
return super(TimeSpanScalar, cls).__new__(cls, scalar, **kwargs)
def __init__(*args, **kwargs):
self = args[0]
_from = kwargs.get("_from", None)
# TimeSpan has no timezone
self._display_length = getattr(_from, "_display_length", DisplayLength.Long)
# ------------------------------------------------------------
@property
def _fa(self):
# must go to numpy or it will flip back to an array
return self.view(np.float64)
# ------------------------------------------------------------
@property
def _np(self):
return self.view(np.float64)
# ------------------------------------------------------------
# TimeSpanScalar
[docs]
def strftime(self, format):
"""
Convert a `TimeSpanScalar` to a formatted string representation.
Parameters
----------
format : str
One or more format codes supported by the
:py:meth:`datetime.datetime.strftime` function of the standard
Python distribution. For codes, see
:ref:`python:strftime-strptime-behavior`.
Returns
-------
str
A string representation of the reformatted `TimeSpanScalar`.
See Also
--------
TimeSpan.strftime, Date.strftime, DateScalar.strftime,
DateTimeNano.strftime, DateTimeNanoScalar.strftime
Notes
-----
This routine has not been sped up yet. It also raises an error on NaNs.
Examples
--------
>>> ts = rt.TimeSpan(['09:00', '10:45', '02:30'])
>>> ts
TimeSpan(['09:00:00.000000000', '10:45:00.000000000', '02:30:00.000000000'])
>>> ts[0].strftime('%X')
'09:00:00'
"""
return self._strftime(format)
# ------------------------------------------------------------
[docs]
def isnan(self):
return math.isnan(self)
# ------------------------------------------------------------
[docs]
def isnotnan(self):
return not math.isnan(self)
# ------------------------------------------------------------
[docs]
def isfinite(self):
return math.isfinite(self)
# ------------------------------------------------------------
[docs]
def isnotfinite(self):
return not math.isfinite(self)
# ------------------------------------------------------------
[docs]
def get_classname(self):
return __class__.__name__
# ------------------------------------------------------------
[docs]
def __repr__(self):
itemformat = self.get_item_format()
return f"{self.get_classname()}('{TimeSpan.display_item_clock(self._np)}')"
[docs]
def __str__(self):
itemformat = self.get_item_format()
return TimeSpan.display_item_clock(self._np)
# because np.float64 is first, it hooks these before TimeSpanBase
[docs]
def __abs__(self):
return self._unary_ufunc_builder("__abs__")
[docs]
def __neg__(self):
return self._unary_ufunc_builder("__neg__")
[docs]
def __pos__(self):
return self._unary_ufunc_builder("__pos__")
[docs]
def abs(self):
return self.__abs__()
# --BINARY OPERATIONS------------------------------------------
# -------------------------------------------------------------
[docs]
def __add__(self, value):
return TimeSpanBase.__add__(self, value)
[docs]
def __radd__(self, value):
return TimeSpanBase.__radd__(self, value)
[docs]
def __sub__(self, value):
return TimeSpanBase.__sub__(self, value)
[docs]
def __rsub__(self, value):
return TimeSpanBase.__rsub__(self, value)
[docs]
def __mul__(self, value):
return TimeSpanBase.__mul__(self, value)
[docs]
def __rmul__(self, value):
return TimeSpanBase.__rmul__(self, value)
[docs]
def __floordiv__(self, value):
return TimeSpanBase.__floordiv__(self, value)
[docs]
def __truediv__(self, value):
return TimeSpanBase.__truediv__(self, value)
[docs]
def __eq__(self, other):
return self._timespan_compare_check("__eq__", other)
# used in adding a scalar to a Dataset
[docs]
def repeat(self, repeats, axis=None):
return TimeSpan(self._np.repeat(repeats, axis=axis))
[docs]
def tile(self, repeats):
return TimeSpan(self._np.tile(repeats))
# -----------------------------------------------------
# keep this at end of file
TypeRegister.DateTimeBase = DateTimeBase
TypeRegister.DateTimeNano = DateTimeNano
TypeRegister.TimeSpan = TimeSpan
TypeRegister.DateBase = DateBase
TypeRegister.Date = Date
TypeRegister.DateSpan = DateSpan
TypeRegister.DateTimeNanoScalar = DateTimeNanoScalar
TypeRegister.TimeSpanScalar = TimeSpanScalar
TypeRegister.DateScalar = DateScalar
TypeRegister.DateSpanScalar = DateSpanScalar