├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── CHANGELOG.md
├── MANIFEST.in
├── README.md
├── README_HELPERS.md
├── README_HELPERS.rst
├── _config.yml
├── docs
    ├── README.rst
    ├── README_HELPERS.rst
    └── index.html
├── pyproject.toml
├── requirements.txt
├── src
    └── thymus
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── freq_conversions.py
    │   ├── point.py
    │   ├── timeseries.py
    │   ├── tsproto.py
    │   ├── tssdict.py
    │   └── tsslist.py
├── tests
    ├── __init__.py
    ├── test_freq_conversions.py
    ├── test_point.py
    ├── test_timeseries.py
    ├── test_tsproto.py
    ├── test_tssdict.py
    └── test_tsslist.py
└── tox.ini


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ develop, master ]
 9 |   pull_request:
10 |     branches: [ develop, master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       matrix:
18 |         python-version: ["3.10", "3.11"]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v2
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v1
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         pip install .
30 |     - name: Lint with flake8
31 |       run: |
32 |         pip install flake8
33 |         # stop the build if there are Python syntax errors or undefined names
34 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
35 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
36 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
37 |     - name: Test with pytest
38 |       run: |
39 |         pip install pytest
40 |         pytest
41 |     - name: Test with unittest
42 |       run: |
43 |         python -m unittest
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build 
2 | dist 
3 | *.egg*
4 | *.pyc
5 | *.pyo
6 | venv
7 | 
8 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | ## (0.3.5)
 3 | ## Changed
 4 | * Corrected erroneous home page url.
 5 | 
 6 | ## (0.3.4)
 7 | ## Changed
 8 | * Updated packaging. Removed setup files. Now it only uses pyproject.toml.
 9 | 
10 | ## (0.3.1)
11 | ## Changed
12 | * Corrected mistakes in packaging
13 | 
14 | ## (0.3.0)
15 | ## Changed
16 | * Updated packaging
17 | 
18 | ## (0.2.6)
19 | ### Changed
20 | * With a frequency conversion, if the asking frequency is the same as the current frequency, the return harmlessly returns the current timeseries.
21 | ### Added
22 | * Added a parameter to TssDict for initializing with a parameter `split`, which can be timeseries with multiple columns. It then becomes a tssdict with each row becoming a separate timeseries, keyed on the column name.
23 | 
24 | ## (0.2.5)
25 | ### Added
26 | * Added a Point class
27 | This class enables a combined look at a specific point in a time series.
28 | 
29 | * Added `get_point` function to Timeseries to enable easy access to the Point class described above.
30 | 
31 | * Added `point_class` to the Timeseries object. This feature holds the default Point class used for the `get_point` described above. However, if a subclass of Point is created, then substituting in your own subclass will enable your custom Point class to be output.
32 | 
33 | * Added `timeseries_class` to both `TssList` and `TssDict`. When using subclassed `Timeseries`, both `TssList` and `TssDict` will automatically load.
34 | 
35 | ### Changed
36 | * Increased test coverage. Total coverage shows 93%.
37 | 
38 | ## (0.2.4)
39 | ### Changed
40 | * Changed the function `TsProto.common_length`. Previously, accepted two timeseries and returned two timeseries that trimmed to the length in common those timeseries. Now, it accepts a list of timeseries via `*ts`.
41 | * Corrected some wording in function docs.
42 | 
43 | ## (0.2.3) -
44 | ### Added
45 | * Added a minor improvement to how headers are treated when using `Timeseries.to_dict()`. Outputting a dictionary version of a Timeseries consists of header and data. The header previously consisted of the elements key, columns, frequency, and end_of_period. Now, the header function outputs any elements in self.__dict__ that are not tseries and dseries. Any subclasses of Timeseries with extra fields will now be included automatically.
46 | 
47 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include LICENSE.txt
 2 | include README.md
 3 | include README.html
 4 | 
 5 | 
 6 | recursive-include src/thymus *
 7 | recursive-include tests *
 8 | 
 9 | recursive-exclude thymus .coverage
10 | 
11 | exclude thymus/exclude
12 | 
13 | prune __pycache__
14 | 
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
   1 | # Thymus-Timeseries
   2 | 
   3 | An intuitive library tracking dates and timeseries in common using numpy
   4 | arrays.
   5 | 
   6 | When working with arrays of timeseries, the manipulation process can easily cause mismatching sets of arrays in time, arrays in the wrong order, slow down the analysis, and lead to generally spending more time to ensure consistency.
   7 | 
   8 | This library attempts to address the problem in a way that enables ready access to the current date range, but stays out of your way most of the time. Essentially, this library is a wrapper around numpy arrays.
   9 | 
  10 | This library grew out of the use of market and trading data. The timeseries is typically composed of regular intervals but with gaps such as weekends and holidays. In the case of intra-day data, there are interuptions due to periods when the market is closed or gaps in trading.
  11 | 
  12 | While the library grew from addressing issues associated with market data, the implementation does not preclude use in other venues. Direct access to the numpy arrays is expected and the point of being able to use the library.
  13 | 
  14 | ## Dependencies
  15 | 
  16 | Other than NumPy being installed, there are no other requirements.
  17 | 
  18 | ## Installation
  19 | 
  20 | ```pip install thymus-timeseries```
  21 | 
  22 | ## A Brief Look at Capabilities.
  23 | 
  24 | ### Creating a Small Sample Timeseries Object
  25 | 
  26 | As a first look, we will create a small timeseries object and show a few ways
  27 | that it can used. For this example, we will use daily data.
  28 | 
  29 | from datetime import datetime
  30 | import numpy as np
  31 | 
  32 | from thymus.timeseries import Timeseries
  33 | 
  34 | ts = Timeseries()
  35 | 
  36 | #### Elements of Timeseries()
  37 | 
  38 | * **key:**  An optional identifier for the timeseries.
  39 | * **columns:** Defaults to None but is an an optional list of column names for the data.
  40 | * **frequency:** Defaults to `d`, the **d** in this case refers to the default daily data. current frequencies supported are `sec`, `min`, `h`, `d`, `w`, `m`, `q`, `y`.
  41 | 
  42 | * **dseries:** This is a numpy array of dates in numeric format.
  43 | 
  44 | * **tseries:** This is a numpy array of data. most of the work takes place here.
  45 | 
  46 | * **end-of-period:**  Defaults to True indicating that the data is as of the end of the period. This only comes into play when converting from one frequency to another and will be ignored for the moment.
  47 | 
  48 | While normal usage of the timeseries object would involve pulling data from a database and inserting data into the timeseries object, we will use a quick-and-dirty method of inputting some data. Dates are stored as either ordinals or timestamps, avoiding clogging up memory with large sets of datetime objects. Because it is daily data, ordinals will be used for this example.
  49 | 
  50 | ```
  51 | ts = Timeseries()
  52 | 
  53 | start_date = datetime(2015, 12, 31).toordinal()
  54 | 
  55 | ts.dseries = start_date + np.arange(10)
  56 | ts.tseries = np.arange(10)
  57 | 
  58 | ts.make_arrays()
  59 | ```
  60 | We created an initial timeseries object. It starts at the end of 2015 and continues for 10 days. Setting the values in **dseries** and **tseries** can be somewhat sloppy. For example, a list could be assigned initially to either **dseries** (the dates) and a numpy array to **tseries** (the values).
  61 | 
  62 | The use of the **make_arrays()** function converts the date series to an int32 array (because they are ordinal values) and **tseries** to a float64 array. The idea is that the data might often enter the timeseries object as lists, but then be converted to arrays of appropriate format for use.
  63 | 
  64 | The completed timeseries object is:
  65 | ```
  66 |     print(ts)
  67 | 
  68 |     <Timeseries>
  69 |     key:
  70 |     columns: None
  71 |     frequency: d
  72 |     daterange: ('2015-12-31', '2016-01-09')
  73 |     end-of-period: True
  74 |     shape: (10,)
  75 | ```
  76 | You can see the date range contained in the date series. The shape refers to the shape of the **tseries** array. **key** and **columns** are free-form, available to update as appropriate to identify the timeseries and content of the columns. Again, the **end-of-period** flag can be ignored right now.
  77 | 
  78 | ## Selection
  79 | 
  80 | Selection of elements is the same as numpy arrays. Currently, our sample has
  81 | 10 elements.
  82 | ```
  83 |     print(ts[:5])
  84 | 
  85 |     <Timeseries>
  86 |     key:
  87 |     columns: []
  88 |     frequency: d
  89 |     daterange: ('2015-12-31', '2016-01-04')
  90 |     end-of-period: True
  91 |     shape: (5,)
  92 | ```
  93 | Note how the date range above reflects the selected elements.
  94 | ```
  95 | ts1 = ts % 2 == 0
  96 | ts1.tseries
  97 | [True False True False True False True False True False]
  98 | ```
  99 | We can isolate the dates of even numbers: note that `tseries`, not the timeseries obj, is explicitly used with `np.argwhere`.  More on when to operate directly on tseries later.
 100 | ```
 101 | evens = np.argwhere((ts % 2 == 0).tseries)
 102 | 
 103 | ts_even = ts[evens]
 104 | ```
 105 | This just prints a list of date and value pairs only useful with very small sets (or examples like this)
 106 | ```
 107 | print(ts_even.items('str'))
 108 | 
 109 | ('2015-12-31', '[0.0]')
 110 | ('2016-01-02', '[2.0]')
 111 | ('2016-01-04', '[4.0]')
 112 | ('2016-01-06', '[6.0]')
 113 | ('2016-01-08', '[8.0]')
 114 | ```
 115 | 
 116 | ### Date-based Selection
 117 | 
 118 | So let us use a slightly larger timeseries. 1000 rows 2 columns of data. And,
 119 | use random values to ensure uselessness.
 120 | ```
 121 |     ts = Timeseries()
 122 | 
 123 |     start_date = datetime(2015, 12, 31).toordinal()
 124 | 
 125 |     ts.dseries = start_date + np.arange(1000)
 126 |     ts.tseries = np.random.random((1000, 2))
 127 | 
 128 |     ts.make_arrays()
 129 | 
 130 |     print(ts)
 131 | 
 132 |     <Timeseries>
 133 |     key:
 134 |     columns: []
 135 |     frequency: d
 136 |     daterange: ('2015-12-31', '2018-09-25')
 137 |     end-of-period: True
 138 |     shape: (1000, 2)
 139 | ```
 140 | 
 141 | You can select on the basis of date ranges, but first we will use a row number
 142 | technique that is based on slicing. This function is called **trunc()** for
 143 | truncation.
 144 | 
 145 | #### Normal Truncation
 146 | You will end up with a timeseries with row 100 through 499. This provides in-place execution.
 147 | ```
 148 | ts.trunc(start=100, finish=500)
 149 | 
 150 | # this version returns a new timeseries, effective for chaining.
 151 | ts1 = ts.trunc(start=100, finish=500, new=True)
 152 | ```
 153 | #### Truncation by Date Range
 154 | But suppose you want to select a specific date range? This leads to the next
 155 | function, **truncdate()**.
 156 | ```
 157 |     # select using datetime objects
 158 |     ts1 = ts.truncdate(
 159 |         start=datetime(2017, 1, 1),
 160 |         finish=datetime(2017, 12, 31),
 161 |         new=True)
 162 | 
 163 |     print(ts1)
 164 | 
 165 |     <Timeseries>
 166 |     key:
 167 |     columns: []
 168 |     frequency: d
 169 |     daterange: ('2017-01-01', '2017-12-31')
 170 |     end-of-period: True
 171 |     shape: (365, 2)
 172 | ```
 173 | As you might expect, the timeseries object has a date range of all the days
 174 | during 2017. But see how this is slightly different than slicing. When you use
 175 | `truncdate()` it selects everything within the date range *inclusive* of the
 176 | ending date as well. The idea is to avoid having to always find one day after
 177 | the date range that you want to select to accommodate slicing behavior. This
 178 | way is more convenient in this context.
 179 | 
 180 | You can also convert data from a higher frequency to a lower frequency. Suppose we needed monthly data for 2017 from our timeseries.
 181 | 
 182 | ```
 183 | start = datetime(2017, 1, 1)
 184 | finish = datetime(2017, 12, 31)
 185 | ts1 = ts.truncdate(start=start, finish=finish, new=True).convert('m')
 186 | 
 187 | print(ts1.items('str'))
 188 | 
 189 | ('2017-01-31', '[0.1724835781570483, 0.9856812220255055]')
 190 | ('2017-02-28', '[0.3855043513164875, 0.30697511661843124]')
 191 | ('2017-03-31', '[0.7067982987769881, 0.7680886691626396]')
 192 | ('2017-04-30', '[0.07770763295126926, 0.04697651222041588]')
 193 | ('2017-05-31', '[0.4473657194650975, 0.49443624153533783]')
 194 | ('2017-06-30', '[0.3793816656495891, 0.03646544387811124]')
 195 | ('2017-07-31', '[0.2783335012003322, 0.5144979569785825]')
 196 | ('2017-08-31', '[0.9261879195281345, 0.6980224313957553]')
 197 | ('2017-09-30', '[0.09531834159018227, 0.5435208082899813]')
 198 | ('2017-10-31', '[0.6865842769906441, 0.7951735180348887]')
 199 | ('2017-11-30', '[0.34901775001111657, 0.7014208950555662]')
 200 | ('2017-12-31', '[0.4731393617405252, 0.630488855197775]')
 201 | ```
 202 | 
 203 | Or yearly. In this case, we use a flag that governs whether to include the partial period leading up to the last year. The default includes it. However, when unwanted the flag, **include_partial** can be set to False.
 204 | ```
 205 | ts1 = ts.convert('y', include_partial=True)
 206 | 
 207 | print(ts1.items('str'))
 208 | 
 209 | ('2015-12-31', '[0.2288539210230056, 0.288320541664724]')
 210 | ('2016-12-31', '[0.5116274142615629, 0.21680312154651182]')
 211 | ('2017-12-31', '[0.4731393617405252, 0.630488855197775]')
 212 | ('2018-09-25', '[0.7634145837512148, 0.32026411425902257]')
 213 | 
 214 | ts2 = ts.convert('y', include_partial=False)
 215 | 
 216 | print(ts2.items('str'))
 217 | 
 218 | ('2015-12-31', '[[0.2288539210230056, 0.288320541664724]]')
 219 | ('2016-12-31', '[[0.5116274142615629, 0.21680312154651182]]')
 220 | ('2017-12-31', '[[0.4731393617405252, 0.630488855197775]]')
 221 | ```
 222 | 
 223 | ## Points
 224 | Sometimes when examining a `tseries`, a particular point stands out and you want to investigate it further. When was it? Since this package separates dates and values by design, there needs to be a quick way to find this out.
 225 | 
 226 | There are two ways to do this. Suppose the value in question is row 100.
 227 | ```
 228 | row = 100
 229 | # would give you the ordinal/timestamp date
 230 | ts.dseries[row]
 231 | 
 232 | # gives a datetime object.
 233 | datetime.fromordinal(ts.dseries[row])
 234 | ```
 235 | This is not particularly difficult, but you do enough times, it feels laborious. To cut down on the typing, there is another way.
 236 | ```
 237 | Usage:
 238 |     get_point(rowdate=None, row_no=None)
 239 | 
 240 | row = 100
 241 | point = ts.get_point(row_no=100)
 242 | 
 243 | print(point)
 244 | 
 245 | <Point: row_no: 100, date: 2020-04-10, [48.3886577  48.48543501 48.58221233 48.67898964 48.77576696] />
 246 | 
 247 | ```
 248 | This gives all the information in one place, the row number, a meaningful date, and the values of interest.
 249 | 
 250 | The point object created contains attributes:
 251 | * **ts:** The originating timeseries.
 252 | * **row_no:** The location within the data.
 253 | * **date:** This ordinal/timestamp in the data
 254 | * **date_str:** This method shows the date in string format.
 255 | * **datetime:** This method shows the date as datetime object.
 256 | * **values:** The values contained in the row.
 257 | 
 258 | Note that the `Point` class is designed to be an active window into your data. Changing an item in values is a direct change to the timeseries.
 259 | 
 260 | Changing the `row_no` shifts contents of `values` to reflect the data in the new row.
 261 | 
 262 | ### Columns
 263 | If you use columns in your timeseries, you can also improve your output.
 264 | ```
 265 | ts.columns = ["dog", "cat", "squirrel", "cow", "monkeys"]
 266 | 
 267 | print(point)
 268 | 
 269 | <Point: row_no: 100, date: 2020-04-10,
 270 |   dog: 48.38865769863544
 271 |   cat: 48.48543501403271
 272 |   squirrel: 48.58221232942998
 273 |   cow: 48.678989644827254
 274 |   monkey: 48.77576696022452 />
 275 | ```
 276 | The point object uses the columns of the timeseries to create attributes.
 277 | 
 278 | The point object now has created the following attributes:
 279 | * **ts:** The originating timeseries.
 280 | * **row_no:** The location within the data.
 281 | * **date:** This ordinal/timestamp in the data
 282 | * **date_str:** This method shows the date in string format.
 283 | * **datetime:** This method shows the date as  datetime object.
 284 | * **values:** The values contained in the row.
 285 | 
 286 | New Attributes:
 287 | 
 288 | * **dog:** Column 0
 289 | * **cat:** Column 1
 290 | * **squirrel:** Column 2
 291 | * **cow:** Column 3
 292 | * **monkey:** Column 4
 293 | 
 294 | Just as `values` is a direct window, these attributes are also a direct window. Changing `point.dog` affects the `tseries[row_no][0]` value.
 295 | 
 296 | With just a few columns of data, it is not hard to remember which is which. However, more columns become increasingly unwieldy.
 297 | 
 298 | ### Iteration
 299 | Because the `Point` class automatically changes as the row number changes, it can also be used for iteration. A subclassed Point can provide easy programmatic access for calculations and updates with meaningful variable names.
 300 | 
 301 | ## Combining Timeseries
 302 | 
 303 | Suppose you want to combine multiple timeseries together that are of different lengths? In this case we assume that the two timeseries end on the same date, but one has a longer tail than the other. However, the operation that you need requires common dates.
 304 | 
 305 | By **combine** we mean instead of two timeseries make one timeseries that has
 306 | the columns of both.
 307 | ```
 308 |     ts_short = Timeseries()
 309 |     ts_long = Timeseries()
 310 | 
 311 |     end_date = datetime(2016, 12, 31)
 312 | 
 313 |     ts_short.dseries = [
 314 |             (end_date + timedelta(days=-i)).toordinal()
 315 |             for i in range(5)]
 316 | 
 317 |     ts_long.dseries = [
 318 |             (end_date + timedelta(days=-i)).toordinal()
 319 |             for i in range(10)]
 320 | 
 321 |     ts_short.tseries = np.zeros((5))
 322 |     ts_long.tseries = np.ones((10))
 323 | 
 324 |     ts_short.make_arrays()
 325 |     ts_long.make_arrays()
 326 | 
 327 |     ts_combine = ts_short.combine(ts_long)
 328 | 
 329 |     print(ts.items('str'))
 330 | 
 331 |     ('2016-12-31', '[0.0, 1.0]')
 332 |     ('2016-12-30', '[0.0, 1.0]')
 333 |     ('2016-12-29', '[0.0, 1.0]')
 334 |     ('2016-12-28', '[0.0, 1.0]')
 335 |     ('2016-12-27', '[0.0, 1.0]')
 336 | ```
 337 | The combine function has a couple variations. While it can be helpful to automatically discard the unwanted rows, you can also enforce that combining does not take place if the number of rows do not match. Also, you can build out the missing information with padding to create a timeseries that has
 338 | the length of the longest timeseries.
 339 | ```
 340 | # this would raise an error -- the two are different lengths
 341 | ts_combine = ts_short.combine(ts_long discard=False)
 342 | 
 343 | # this combines, and fills 99 as a missing value
 344 | ts_combine = ts_short.combine(ts_long discard=False, pad=99)
 345 | 
 346 | print(ts_combine.items('str'))
 347 | ('2016-12-31', '[0.0, 1.0]')
 348 | ('2016-12-30', '[0.0, 1.0]')
 349 | ('2016-12-29', '[0.0, 1.0]')
 350 | ('2016-12-28', '[0.0, 1.0]')
 351 | ('2016-12-27', '[0.0, 1.0]')
 352 | ('2016-12-26', '[99.0, 1.0]')
 353 | ('2016-12-25', '[99.0, 1.0]')
 354 | ('2016-12-24', '[99.0, 1.0]')
 355 | ('2016-12-23', '[99.0, 1.0]')
 356 | ('2016-12-22', '[99.0, 1.0]')
 357 | ```
 358 | The combining can also receive multiple timeseries.
 359 | ```
 360 | ts_combine = ts_short.combine([ts_long, ts_long, ts_long])
 361 | 
 362 | print(ts_combine.items('str'))
 363 | ('2016-12-31', '[0.0, 1.0, 1.0, 1.0]')
 364 | ('2016-12-30', '[0.0, 1.0, 1.0, 1.0]')
 365 | ('2016-12-29', '[0.0, 1.0, 1.0, 1.0]')
 366 | ('2016-12-28', '[0.0, 1.0, 1.0, 1.0]')
 367 | ('2016-12-27', '[0.0, 1.0, 1.0, 1.0]')
 368 | ```
 369 | ## Splitting Timeseries
 370 | 
 371 | In some ways it would make sense to mirror the **combine()** function
 372 | with a **split()** from an aesthetic standpoint. However, splitting is very
 373 | straight-forward without such a function. For example, suppose you want a
 374 | timeseries that only has the the first two columns from our previous example.
 375 | As you can see in the ts_split tseries, the first two columns were taken.
 376 | ```
 377 |     ts_split = ts_combine[:, :2]
 378 | 
 379 |     print(ts_split.items('str'))
 380 |     ('2016-12-31', '[0.0, 1.0]')
 381 |     ('2016-12-30', '[0.0, 1.0]')
 382 |     ('2016-12-29', '[0.0, 1.0]')
 383 |     ('2016-12-28', '[0.0, 1.0]')
 384 |     ('2016-12-27', '[0.0, 1.0]')
 385 | ```
 386 | 
 387 | ## Arithmetic Operations
 388 | 
 389 | We have combined timeseries together to stack up rows in common. In
 390 | addition, we looked at the issue of mismatched lengths. Now we will look at
 391 | arithmetic approaches and some of the design decisions and tradeoffs associated
 392 | with mathematical operations.
 393 | 
 394 | We will start with the **add()** function. First, if we assume that all we are
 395 | adding together are arrays that have exactly the same dateseries, and
 396 | therefore the same length, and we assume they have exactly the same number of
 397 | columns, then the whole question becomes trivial. If we relax those
 398 | constraints, then some choices need to be made.
 399 | 
 400 | We will use the long and short timeseries from the previous example.
 401 | ```
 402 |     # this will fail due to dissimilar lengths
 403 |     ts_added = ts_short.add(ts_long, match=True)
 404 | 
 405 |     # this will work
 406 |     ts_added = ts_short.add(ts_long, match=False)
 407 | 
 408 |     [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 409 | ```
 410 | The **add()** function checks to see if the number of columns match. If they do
 411 | not an error is raised. If the **match** flag is True, then it also checks
 412 | that all the dates in both timeseries match prior to the operation.
 413 | 
 414 | If **match** is False, then as long as the columns are compatible, the
 415 | operation can take place. It also supports the concept of sparse arrays as
 416 | well. For example, suppose you have a timeseries that is primary, but you would
 417 | like to add in a timeseries values from only a few dates within the range. This
 418 | function will find the appropriate dates adding in the values at just those
 419 | rows.
 420 | 
 421 | To summarize, all dates in common to both timeseries will be included in the
 422 | new timeseries if **match** is False.
 423 | 
 424 | Because the previous function is somewhat specialized, you can assume that the
 425 | checking of common dates and creating the new timeseries can be somewhat slower
 426 | than other approaches.
 427 | 
 428 | If we assume some commonalities about our timeseries, then we can do our work
 429 | in a more intuitive fashion.
 430 | 
 431 | ### Assumptions of Commonality
 432 | 
 433 | Let us assume that our timeseries might be varying in length, but we absolutely
 434 | know what either our starting date or ending date is. And, let us assume that
 435 | all the dates for the periods in common to the timeseries match.
 436 | 
 437 | If we accept those assumptions, then a number of operations become quite easy.
 438 | 
 439 | The timeseries object can accept simple arithmetic as if it is an array. It
 440 | automatically passes the values on to the **tseries** array. If the two arrays
 441 | are not the same length the longer array is truncated to the shorter length. So
 442 | if you were add two arrays together that end at the same date, you would want
 443 | to sort them latest date to earliest date using the function
 444 | **sort_by_date()**.
 445 | 
 446 | ### Examples
 447 | ```
 448 | # starting tseries
 449 | ts.tseries
 450 | [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9.]
 451 | 
 452 | (ts + 3).tseries
 453 | [  3.   4.   5.   6.   7.   8.   9.  10.  11.  12.]
 454 | 
 455 | # Also, reverse (__radd__)
 456 | (3 + ts).tseries
 457 | [  3.   4.   5.   6.   7.   8.   9.  10.  11.  12.]
 458 | 
 459 | # of course not just addition
 460 | 5 * ts.tseries
 461 | [  0.   5.  10.  15.  20.  25.  30.  35.  40.  45.]
 462 | ```
 463 | Also, in-place operations. But first, we will make a copy.
 464 | ```
 465 | ts1 = ts.clone()
 466 | ts1.tseries /= 3
 467 | print(ts1.tseries)
 468 | [0.0
 469 | 0.3333333333333333
 470 | 0.6666666666666666
 471 | 1.0
 472 | 1.3333333333333333
 473 | 1.6666666666666667
 474 | 2.0
 475 | 2.3333333333333335
 476 | 2.6666666666666665
 477 | 3.0]
 478 | 
 479 | ts1 = ts ** 3
 480 | ts1.tseries
 481 | 0.0
 482 | 1.0
 483 | 8.0
 484 | 27.0
 485 | 64.0
 486 | 125.0
 487 | 216.0
 488 | 343.0
 489 | 512.0
 490 | 729.0
 491 | 
 492 | ts1 = 10 ** ts
 493 | ts1.tseries
 494 | [1.0
 495 | 10.0
 496 | 100.0
 497 | 1000.0
 498 | 10000.0
 499 | 100000.0
 500 | 1000000.0
 501 | 10000000.0
 502 | 100000000.0
 503 | 1000000000.0]
 504 | ```
 505 | 
 506 | In other words, the normal container functions you can use with numpy arrays
 507 | are available to the timeseries objects. The following container functions for
 508 | arrays are supported.
 509 | ```
 510 | __pow__ __add__ __rsub__ __sub__    __eq__      __ge__   __gt__   __le__
 511 | __lt__  __mod__ __mul__  __ne__     __radd__    __rmod__ __rmul__ __rpow__
 512 | __abs__ __pos__ __neg__  __invert__ __rdivmod__ __rfloordiv__
 513 | __floordiv__ __truediv__
 514 | __rtruediv__ __divmod__
 515 | 
 516 | __and__ __or__ __ror__ __rand__ __rxor__ __xor__ __rshift__
 517 | __rlshift__ __lshift__ __rrshift__
 518 | 
 519 | __iadd__ __ifloordiv__ __imod__ __imul__ __ipow__ __isub__
 520 | __itruediv__]
 521 | 
 522 | __iand__ __ilshift__ __ior__ __irshift__ __ixor__
 523 | ```
 524 | ### Functions of Arrays Not Supported
 525 | 
 526 | The purpose the timeseries objects is to implement an intuitive usage of
 527 | timeseries objects in a fashion that is consistent with NumPy. However, it is
 528 | not intended to replace functions that are better handled explicitly with
 529 | the **dseries** and **tseries** arrays directly. The difference will be clear
 530 | by comparing the list of functions for the timeseries object versus a numpy array. Most of the functions of the timeseries object is related to handling the commonality of date series with time series. You can see that the bulk of the thymus functions relate to maintenance of the coordination betwee the date series and timeseries. The meat of the functions still lie with the
 531 | numpy arrays by design.
 532 | 
 533 | ```
 534 | # timeseries members and functions:
 535 | ts.add                   ts.daterange             ts.get_pcdiffs           ts.series_direction
 536 | ts.as_dict               ts.datetime_series       ts.header                ts.set_ones
 537 | ts.as_json               ts.dseries               ts.if_dseries_match      ts.set_zeros
 538 | ts.as_list               ts.end_date              ts.if_tseries_match      ts.shape
 539 | ts.clone                 ts.end_of_period         ts.items                 ts.sort_by_date
 540 | ts.closest_date          ts.extend                ts.key                   ts.start_date
 541 | ts.columns               ts.fmt_date              ts.lengths               ts.trunc
 542 | ts.combine               ts.frequency             ts.make_arrays           ts.truncdate
 543 | ts.common_length         ts.get_date_series_type  ts.months                ts.tseries
 544 | ts.convert               ts.get_datetime          ts.replace               ts.years
 545 | ts.date_native           ts.get_diffs             ts.reverse
 546 | ts.date_string_series    ts.get_duped_dates       ts.row_no
 547 | 
 548 | # numpy functions in the arrays
 549 | ts.tseries.T             ts.tseries.cumsum        ts.tseries.min           ts.tseries.shape
 550 | ts.tseries.all           ts.tseries.data          ts.tseries.nbytes        ts.tseries.size
 551 | ts.tseries.any           ts.tseries.diagonal      ts.tseries.ndim          ts.tseries.sort
 552 | ts.tseries.argmax        ts.tseries.dot           ts.tseries.newbyteorder  ts.tseries.squeeze
 553 | ts.tseries.argmin        ts.tseries.dtype         ts.tseries.nonzero       ts.tseries.std
 554 | ts.tseries.argpartition  ts.tseries.dump          ts.tseries.partition     ts.tseries.strides
 555 | ts.tseries.argsort       ts.tseries.dumps         ts.tseries.prod          ts.tseries.sum
 556 | ts.tseries.astype        ts.tseries.fill          ts.tseries.ptp           ts.tseries.swapaxes
 557 | ts.tseries.base          ts.tseries.flags         ts.tseries.put           ts.tseries.take
 558 | ts.tseries.byteswap      ts.tseries.flat          ts.tseries.ravel         ts.tseries.tobytes
 559 | ts.tseries.choose        ts.tseries.flatten       ts.tseries.real          ts.tseries.tofile
 560 | ts.tseries.clip          ts.tseries.getfield      ts.tseries.repeat        ts.tseries.tolist
 561 | ts.tseries.compress      ts.tseries.imag          ts.tseries.reshape       ts.tseries.tostring
 562 | ts.tseries.conj          ts.tseries.item          ts.tseries.resize        ts.tseries.trace
 563 | ts.tseries.conjugate     ts.tseries.itemset       ts.tseries.round         ts.tseries.transpose
 564 | ts.tseries.copy          ts.tseries.itemsize      ts.tseries.searchsorted  ts.tseries.var
 565 | ts.tseries.ctypes        ts.tseries.max           ts.tseries.setfield      ts.tseries.view
 566 | ts.tseries.cumprod       ts.tseries.mean          ts.tseries.setflags
 567 | ```
 568 | ### Other Date Functions
 569 | 
 570 | Variations on a theme:
 571 | ```
 572 | # truncation
 573 | ts.truncdate(
 574 |     start=datetime(2017, 1, 1),
 575 |     finish=datetime(2017, 12, 31))
 576 | 
 577 | # just start date etc.
 578 | ts.truncdate(
 579 |     start=datetime(2017, 1, 1))
 580 | 
 581 | # this was in date order but suppose it was in reverse order?
 582 | # this result will give the same answer
 583 | ts1 = ts.truncdate(
 584 |     start=datetime(2017, 1, 1),
 585 |     new=True)
 586 | 
 587 | ts.reverse()
 588 | 
 589 | ts1 = ts.truncdate(
 590 |     start=datetime(2017, 1, 1),
 591 |     new=True)
 592 | 
 593 | # use the date format native to the dateseries (ordinal / timestamp)
 594 | ts1 = ts.truncdate(
 595 |     start=datetime(2017, 1, 1).toordinal(),
 596 |     new=True)
 597 | 
 598 | # suppose you start with a variable that represents a date range
 599 | # date range can be either a list or tuple
 600 | ts.truncdate(
 601 |     [datetime(2017, 1, 1), datetime(2017, 12, 31)])
 602 | ```
 603 | ## Assorted Date Functions
 604 | ```
 605 | # native format
 606 | ts.daterange()
 607 | (735963, 735972)
 608 | 
 609 | # str format
 610 | ts.daterange('str')
 611 | ('2015-12-31', '2016-01-09')
 612 | 
 613 | # datetime format
 614 | ts.daterange('datetime')
 615 | (datetime.datetime(2015, 12, 31, 0, 0), datetime.datetime(2016, 1, 9, 0, 0))
 616 | 
 617 | # native format
 618 | ts.start_date(); ts.end_date()
 619 | 735963  735972
 620 | 
 621 | # str format
 622 | ts.start_date('str'); ts.end_date('str')
 623 | 2015-12-31  2016-01-09
 624 | 
 625 | # datetime format
 626 | ts.start_date('datetime'); ts.end_date('datetime')
 627 | 2015-12-31 00:00:00  2016-01-09 00:00:00
 628 | ```
 629 | Sometimes it is helpful to find a particular row based on the date. Also, that date might not be in the dateseries, and so, the closest date will suffice.
 630 | 
 631 | We will create a sample timeseries to illustrate.
 632 | ```
 633 | ts = Timeseries()
 634 | ts.dseries = []
 635 | ts.tseries = []
 636 | 
 637 | start_date = datetime(2015, 12, 31)
 638 | for i in range(40):
 639 |     date = start_date + timedelta(days=i)
 640 |     if date.weekday() not in [5, 6]:   # skipping weekends
 641 | 
 642 |         ts.dseries.append(date.toordinal())
 643 |         ts.tseries.append(i)
 644 | 
 645 | ts.make_arrays()
 646 | 
 647 | # row_no, date
 648 | (0, '2015-12-31')
 649 | (1, '2016-01-01')
 650 | (2, '2016-01-04')
 651 | (3, '2016-01-05')
 652 | (4, '2016-01-06')
 653 | (5, '2016-01-07')
 654 | (6, '2016-01-08')
 655 | (7, '2016-01-11')
 656 | (8, '2016-01-12')
 657 | (9, '2016-01-13')
 658 | (10, '2016-01-14')
 659 | (11, '2016-01-15')
 660 | (12, '2016-01-18')
 661 | (13, '2016-01-19')
 662 | (14, '2016-01-20')
 663 | (15, '2016-01-21')
 664 | (16, '2016-01-22')
 665 | (17, '2016-01-25')
 666 | (18, '2016-01-26')
 667 | (19, '2016-01-27')
 668 | (20, '2016-01-28')
 669 | (21, '2016-01-29')
 670 | (22, '2016-02-01')
 671 | (23, '2016-02-02')
 672 | (24, '2016-02-03')
 673 | (25, '2016-02-04')
 674 | (26, '2016-02-05')
 675 | (27, '2016-02-08')
 676 | 
 677 | date1 = datetime(2016, 1, 7)    # existing date within date series
 678 | date2 = datetime(2016, 1, 16)   # date falling on a weekend
 679 | date3 = datetime(2015, 6, 16)   # date prior to start of date series
 680 | date4 = datetime(2016, 3, 8)    # date after to end of date series
 681 | 
 682 | # as datetime and in the series
 683 | existing_row = ts.row_no(rowdate=date1, closest=1)
 684 | 5
 685 | 
 686 | existing_date = ts.closest_date(rowdate=date1, closest=1)
 687 | print(datetime.fromordinal(existing_date))
 688 | 2016-01-07 00:00:00
 689 | 
 690 | # as datetime but date not in series
 691 | next_row = ts.row_no(rowdate=date2, closest=1)
 692 | 12
 693 | 
 694 | next_date = ts.closest_date(rowdate=date2, closest=1)
 695 | print(datetime.fromordinal(next_date))
 696 | 2016-01-18 00:00:00
 697 | 
 698 | prev_row = ts.row_no(rowdate=date2, closest=-1)
 699 | 11
 700 | 
 701 | prev_date = ts.closest_date(rowdate=date2, closest=-1)
 702 | print(datetime.fromordinal(prev_date))
 703 | 2016-01-15 00:00:00
 704 | 
 705 | # this will fail -- date is outside the date series
 706 | # as datetime but date not in series, look for earlier date
 707 | ts.closest_date(rowdate=date3, closest=-1)
 708 | 
 709 | # this will fail -- date is outside the date series
 710 | ts.closest_date(rowdate=date4, closest=1)
 711 | ```
 712 | ## Functions by Category
 713 | 
 714 | ### Output
 715 | 
 716 | #### Timeseries
 717 | ##### **ts.to_dict()**
 718 | 
 719 | Returns the time series as a dict with the date as the key.
 720 | 
 721 | Usage:
 722 |     self.to_dict(dt_fmt=None, data_list=False)
 723 | 
 724 | This has been reworked to include all fields of the timeseries
 725 | rather than just dates and times, so header informtion is now included.
 726 | 
 727 | For flexibility, the date can be formatted in various ways:
 728 | * dt_fmt=None Native format depending on frequency                         but converted to string.
 729 | * dt_fmt='datetime' Datetime objects.
 730 | * dt_fmt='str' Converts dates to string using constants                         `timeseries.FMT_DATE` or `timeseries.FMT_IDATE`, depending on the timeseries type.
 731 | 
 732 | * data_list A boolean that signals whether dates should be used as keys in a dict for the values, or whether the dates and values are output as a list.
 733 | 
 734 | This matters because some operations are necessary to target
 735 | specific dates, but it does not preserve order. Or, if data_list
 736 | is True, then the combination of dates and values are output as
 737 | a list and order is maintained.
 738 | 
 739 | ##### **ts.to_json()**
 740 | 
 741 | This function returns the timeseries in JSON format.
 742 | 
 743 | Usage:
 744 |     self.as_json(indent=2, dt_fmt=str, data_list=True)
 745 | 
 746 | dt_fmt options are the same as for to_dict
 747 | 
 748 | ##### **ts.to_list()**
 749 | 
 750 | Returns the timeseries as a list.
 751 | 
 752 | #### Point
 753 | ##### **point.to_dict()**
 754 | This function returns a dict of the point variables.
 755 | 
 756 | Usage:
 757 |     to_dict(dt_fmt=None)
 758 | 
 759 | Parameters:
 760 |     dt_fmt: (None|str) : Format choice is "str" or "datetime"
 761 | 
 762 | Returns:
 763 |     point (dict)
 764 | 
 765 | Typical output:
 766 | ```
 767 | point.to_dict(dt_fmt="str")
 768 | 
 769 | {
 770 |   "row_no": 100,
 771 |   "date": "2020-04-10",
 772 |   "dog": 48.38865769863544,
 773 |   "cat": 48.48543501403271,
 774 |   "squirrel": 48.58221232942998,
 775 |   "cow": 48.678989644827254,
 776 |   "monkeys": 48.77576696022452
 777 | }
 778 | 
 779 | ```
 780 | 
 781 | ### Miscellaneous
 782 | #### ts.header()
 783 | 
 784 | This function returns a dict of the non-timeseries data.
 785 | 
 786 | #### ts.items(fmt=None)
 787 | 
 788 | This function returns the date series and the time series as if it
 789 | is in one list. The term items used to suggest the iteration of dicts
 790 | where items are the key, value combination.
 791 | 
 792 | if fmt == 'str':
 793 |     the dates are output as strings
 794 | 
 795 | #### ts.months(include_partial=True)
 796 | 
 797 | This function provides a quick way to summarize daily (or less)
 798 | as monthly data.
 799 | 
 800 | It is basically a pass-through to the convert function with more
 801 | decoration of the months.
 802 | 
 803 | Usage:
 804 | 
 805 |     months(include_partial=True)
 806 | 
 807 |     returns a dict with year-month as keys
 808 | 
 809 | #### ts.years(include_partial=True)
 810 | 
 811 | This function provides a quick way to summarize daily (or less)
 812 | as yearly data.
 813 | 
 814 | It is basically a pass-through to the convert function with more
 815 | decoration of the years.
 816 | 
 817 | Usage:
 818 | 
 819 | years(include_partial=True)
 820 | 
 821 | returns a dict with year as keys
 822 | 
 823 | #### ts.datetime_series()
 824 | 
 825 | This function returns the dateseries converted to a list of
 826 | datetime objects.
 827 | 
 828 | #### ts.date_string_series(dt_fmt=None)
 829 | 
 830 | This function returns a list of the dates in the timeseries as
 831 | strings.
 832 | 
 833 | Usage:
 834 |     self.date_string_series(dt_fmt=None)
 835 | 
 836 | dt_fmt is a datetime mask to alter the default formatting.
 837 | 
 838 | ### Array Manipulation
 839 | 
 840 | #### ts.add(ts, match=True)
 841 | 
 842 | Adds two timeseries together.
 843 | 
 844 | if match is True:
 845 |     means there should be a one to one corresponding date in each time
 846 |     series.  If not raise error.
 847 | else:
 848 |     means that timeseries with sporadic or missing dates can be added
 849 | 
 850 | Note: this does not evaluate whether both timeseries have the same
 851 |         number of columns. It will fail if they do not.
 852 | 
 853 | Returns the timeseries. Not in-place.
 854 | 
 855 | #### ts.clone()
 856 | 
 857 | This function returns a copy of the timeseries.
 858 | 
 859 | #### ts.combine(tss, discard=True, pad=None)
 860 | 
 861 | This function combines timeseries into a single array. Combining in
 862 | this case means accumulating additional columns of information.
 863 | 
 864 | Truncation takes place at the end of rows. So if the timeseries is
 865 | sorted from latest dates to earliest dates, the older values would be
 866 | removed.
 867 | 
 868 | Usage:
 869 |     self.combine(tss, discard=True, pad=None)
 870 | 
 871 | Think of tss as the plural of timeseries.
 872 | 
 873 | If discard:
 874 |     Will truncate all timeseries lengths down to the shortest
 875 |     timeseries.
 876 | 
 877 | if discard is False:
 878 |     An error will be raised if the all the lengths do not match
 879 | 
 880 |     unless:
 881 |         if pad is not None:
 882 |             the shorter timeseries will be padded with the value pad.
 883 | 
 884 | Returns the new ts.
 885 | 
 886 | #### ts.common_length(*ts)
 887 | 
 888 | This static method trims the lengths of timeseries and returns the
 889 | timeseries trimmed to the same length.
 890 | 
 891 | The idea is that in order to do array operations there must be a
 892 | common length for each timeseries.
 893 | 
 894 | Reflecting the bias for using timeseries sorted from latest info to
 895 | earlier info, truncation takes place at the end of the array. That
 896 | way older less important values are removed if necessary.
 897 | 
 898 | Usage:
 899 |     ts1_new, ts2_new = self.common_length(ts1, ts2)
 900 |     [ts1, ts2, ..., ts_n] = self.common_length(*ts)
 901 | 
 902 | #### ts.convert(new_freq, include_partial=True, **kwargs)
 903 | 
 904 | This function returns the timeseries converted to another frequency,
 905 | such as daily to monthly.
 906 | 
 907 | Usage:
 908 |     convert(new_freq, include_partial=True, **kwargs)
 909 | 
 910 | The only kwarg is
 911 |     weekday=<some value>
 912 | 
 913 | This is used when converting to weekly data. The weekday number
 914 | corresponds to the the datetime.weekday() function.
 915 | 
 916 | #### ts.extend(ts, overlay=True)
 917 | 
 918 | This function combines a timeseries to another, taking into account the
 919 | possibility of overlap.
 920 | 
 921 | This assumes that the frequency is the same.
 922 | 
 923 | This function is chiefly envisioned to extend a timeseries with
 924 | additional dates.
 925 | 
 926 | Usage:
 927 |     self.extend(ts, overlay=True)
 928 | 
 929 | If overlay is True then the incoming timeseries will overlay
 930 | any values that are duplicated.
 931 | 
 932 | #### ts.trunc(start=None, finish=None, new=False)
 933 | 
 934 | This function truncates in place, typically.
 935 | 
 936 | truncate from (start:finish)
 937 | remember start is lowest number, latest date
 938 | 
 939 | This truncation works on the basis of slicing, so
 940 | finish is not inclusive.
 941 | 
 942 | Usage:
 943 |     self.trunc(start=None, finish=None, new=False)
 944 | 
 945 | #### ts.truncdate(start=None, finish=None, new=False)
 946 | 
 947 | This function truncates in place on the basis of dates.
 948 | 
 949 | Usage:
 950 |     self.truncdate(start=None, finish=None, new=False)
 951 | 
 952 | start and finish are dates, input as either datetime or the actual
 953 | internal format of the **dseries** (ordinals or timestamps).
 954 | 
 955 | If the dates are not actually in the list, the starting date will
 956 | be the next viable date after the start date requested. If the finish
 957 | date is not available, the previous date from the finish date will be
 958 | the last.
 959 | 
 960 | If new is True, the timeseries will not be modified in place. Rather
 961 | a new timeseries will be returned instead.
 962 | 
 963 | #### ts.replace(ts, match=True)
 964 | 
 965 | This function replaces values where the dates match an incoming
 966 | timeseries. So if the incoming date on the timeseries matches, the
 967 | value in the current timeseries will be replaced by the incoming
 968 | timeseries.
 969 | 
 970 | Usage:
 971 |     self.replace(ts, match=True)
 972 | 
 973 | If match is False, the incoming timseries may have dates not found in
 974 | the self timeseries.
 975 | 
 976 | Returns the modified timeseries. Not in place.
 977 | 
 978 | #### ts.reverse()
 979 | 
 980 |         This function does in-place reversal of the timeseries and dateseries.
 981 | 
 982 | #### ts.get_diffs()
 983 | 
 984 | This function gets the differences between values from date to date in
 985 | the timeseries.
 986 | 
 987 | #### ts.get_pcdiffs()
 988 | 
 989 | This function gets the percent differences between values in the
 990 | timeseries.
 991 | 
 992 | No provision for dividing by zero here.
 993 | 
 994 | #### ts.set_ones(fmt=None, new=False)
 995 | 
 996 | This function converts an existing timeseries to ones using the same
 997 | shape as the existing timeseries.
 998 | 
 999 | It is used as a convenience to create an empty timeseries with a
1000 | specified date range.
1001 | 
1002 | if fmt use as shape
1003 | 
1004 | usage:
1005 |     set_ones(self, fmt=None, new=False)
1006 | 
1007 | #### ts.set_zeros(fmt=None, new=False)
1008 | 
1009 | This function converts an existing timeseries to zeros using the same
1010 | shape as the existing timeseries.
1011 | 
1012 | It is used as a convenience to create an empty timeseries with a
1013 | specified date range.
1014 | 
1015 | if fmt use as shape
1016 | 
1017 | usage:
1018 |     set_zeros(self, fmt=None, new=False)
1019 | 
1020 | #### ts.sort_by_date(reverse=False, force=False)
1021 | 
1022 | This function converts a timeseries to either date order or reverse
1023 | date order.
1024 | 
1025 | Usage:
1026 |     sort_by_date(self, reverse=False, force=False)
1027 | 
1028 | If reverse is True, then order will be newest to oldest.
1029 | If force is False, the assumption is made that comparing the first
1030 | and last date will determine the current order of the timeseries. That
1031 | would mean that unnecessary sorting can be avoided. Also, if the order
1032 | needs to be reversed, the sort is changed via the less expensive
1033 | reverse function.
1034 | 
1035 | If dates and values are in no particular order, with force=True, the
1036 | actual sort takes place.
1037 | 
1038 | This function changes the data in-place.
1039 | 
1040 | ### Evaluation
1041 | 
1042 | #### ts.daterange(fmt=None)
1043 | 
1044 | This function returns the starting and ending dates of the timeseries.
1045 | 
1046 | Usage:
1047 | 
1048 |     self.daterange()
1049 |         (735963, 735972)
1050 | 
1051 |     self.daterange('str')
1052 |         ('2015-12-31', '2016-01-09')
1053 | 
1054 |     self.daterange('datetime')
1055 |         (datetime(2015, 12, 31, 0, 0),
1056 |          datetime.datetime(2016, 1, 9, 0, 0))
1057 | 
1058 | #### ts.start_date(fmt=None)
1059 | 
1060 | This function returns the starting date of the timeseries in its
1061 | native value, timestamp or ordinal.
1062 | 
1063 | If fmt is 'str' returns in string format
1064 | If fmt is 'datetime' returns in string format
1065 | 
1066 | #### ts.end_date(fmt=None)
1067 | 
1068 | This funtcion returns the ending date of the timeseries in its native
1069 | value, timestamp or ordinal.
1070 | 
1071 | If fmt is 'str' returns in string format
1072 | If fmt is 'datetime' returns in string format
1073 | 
1074 | #### ts.get_duped_dates()
1075 | 
1076 | This function pulls dates that are duplicated. This is to be used to
1077 | locate timeseries that are faulty.
1078 | 
1079 | Usage:
1080 |     get_duped_dates()
1081 | 
1082 |     returns [[odate1, count], [odate2, count]]
1083 | 
1084 | #### ts.series_direction()
1085 | 
1086 | if a lower row is a lower date, then 1 for ascending
1087 | if a lower row is a higher date then -1 for descending
1088 | 
1089 | #### ts.get_date_series_type()
1090 | 
1091 | This function returns the date series type associated with the
1092 | timeseries.  The choices are TS_ORDINAL or TS_TIMESTAMP.
1093 | 
1094 | #### ts.if_dseries_match(ts)
1095 | 
1096 | This function returns True if the date series are the same.
1097 | 
1098 | #### ts.if_tseries_match(ts)
1099 | 
1100 | This function returns True if the time series are the same.
1101 | 
1102 | ### Utilities
1103 | 
1104 | #### ts.date_native(date)
1105 | 
1106 | This awkwardly named function returns a date in the native format of the timeseries, namely ordinal or timestamp.
1107 | 
1108 | #### ts.row_no(rowdate, closest=0, no_error=False)
1109 | 
1110 | Shows the row in the timeseries
1111 | 
1112 | Usage:
1113 |     ts.row(rowdate=<datetime>)
1114 |     ts.row(rowdate=<date as either ordinal or timestamp>)
1115 | 
1116 | Returns an error if the date is not found in the index
1117 | 
1118 | if closest is invoked:
1119 |     closest = 1
1120 |         find the closest date after the rowdate
1121 |     closest = -1
1122 |         find the closest date before the rowdate
1123 | 
1124 | If no_error
1125 |     returns -1 instead of raising an error if the date was
1126 |     outside of the timeseries.
1127 | 
1128 | #### ts.get_datetime(date)
1129 | 
1130 | This function returns a date as a datetime object. This takes into account the type of date stored in **dseries**.
1131 | 
1132 | Usage:
1133 |     self.get_datetime(date)
1134 | 
1135 | #### ts.lengths()
1136 | 
1137 | This function returns the lengths of both the date series and time series. Both numbers are included in case a mismatch has occurred.
1138 | 
1139 | #### ts.shape()
1140 | 
1141 | This function return the shape of the timeseries. This is a shortcut
1142 | to putting in ts.tseries.shape.
1143 | 
1144 | #### ts.fmt_date(numericdate, dt_type, dt_fmt=None)
1145 | 
1146 | This static method accepts a date and converts it to the format used in the timeseries.
1147 | 
1148 | #### ts.make_arrays()
1149 | 
1150 | Convert the date and time series lists (if so) to numpy arrays
1151 | 
1152 | #### ts.get_fromDB(**kwargs)
1153 | 
1154 | This is just a stub to suggest a viable name for getting data from a database.
1155 | 
1156 | #### ts.save_toDB(**kwargs):
1157 | 
1158 | This is just a stub to suggest a viable name for saving data to a database.
1159 | 


--------------------------------------------------------------------------------
/README_HELPERS.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Helpers
  3 | 
  4 | A brief explanation of some helper classes.
  5 | 
  6 | When dealing with a set of timeseries, it can be useful to gather them into
  7 | groups that have some commonality, such as lists and dicts. Both classes are
  8 | derived from the list and dict objects with additional features added.
  9 | 
 10 | ## TssList
 11 | 
 12 | This class starts with the list class and adds some features to smooth the
 13 | path for using sets of timeseries. It is called TssList to suggest the
 14 | plural of timeseries (ts).
 15 | 
 16 | To use it we will start with three sample timeseries.
 17 | ```
 18 |     start_date = datetime(2015, 12, 31).toordinal()
 19 | 
 20 |     ts = Timeseries()
 21 |     ts.key = 'One'
 22 |     ts.dseries = start_date + np.arange(10)
 23 |     ts.tseries = np.arange(10)
 24 |     ts.make_arrays()
 25 | 
 26 |     # longer timeseries
 27 |     ts_long = Timeseries()
 28 |     ts_long.key = 'Two'
 29 |     ts_long.dseries = start_date + np.arange(20)
 30 |     ts_long.tseries = np.arange(20)
 31 |     ts_long.make_arrays()
 32 | 
 33 |     # shorter timeseries
 34 |     ts_short = Timeseries()
 35 |     ts_short.key = 'Three'
 36 |     ts_short.dseries = start_date + np.arange(5)
 37 |     ts_short.tseries = np.arange(5)
 38 |     ts_short.make_arrays() 
 39 | ```
 40 | Having created the timeseries' for our set, now we create our aggregate list.
 41 | ```
 42 | 
 43 |     tsslist = TssList([
 44 |         ts,
 45 |         ts_long,
 46 |         ts_short
 47 |     ])
 48 | ```
 49 | Having created our list we will see some features available to us that are not
 50 | found with a normal list.
 51 | 
 52 | ## min_date() and max_date()
 53 | 
 54 | With disparate timeseries, it can be helpful to know where the timeframes
 55 | start and end, particularly when truncating or doing operations on them as a
 56 | group.
 57 | 
 58 | ### combine(discard=True, pad=None)
 59 | 
 60 | This function operates essentially in the same fashion as the combine function
 61 | for an individual timeseries.
 62 | 
 63 | For our example:
 64 | ```
 65 |     # discard is True, truncate to match the shortest timeseries
 66 |     ts = tsslist.combine(discard=True, pad=None)
 67 | 
 68 |     # the shape matches the shortest timeseries but with all three columns
 69 |     print(ts.tseries.shape)
 70 |     (5, 3)
 71 | 
 72 |     # do not discard, instead pad with zeros
 73 |     ts = tsslist.combine(discard=False, pad=0)
 74 | 
 75 |     # the shape matches the longest timeseries
 76 |     print(ts.tseries.shape)
 77 |     (20, 3) 
 78 | ```
 79 | 
 80 | ### get_values(date, notify=False)
 81 | 
 82 | This function finds the values as of a particular date and so does not require
 83 | either explicitly walking the list of timeseries, or combining the timeseries
 84 | into a common timeseries to get the values.
 85 | 
 86 | The notify flag indicates whether an error is thrown if a date is not present
 87 | in one of the timeseries. If notify is false, a None is inserted where the
 88 | value would be.
 89 | 
 90 | ### clone()
 91 | 
 92 | This function operates in the same fashion as the clone function for an
 93 | individual timeseries.
 94 | 
 95 | ### as_dict()
 96 | 
 97 | Sometimes it is helpful to flip a list to a dict to work with specific
 98 | timeseries directly. as_dict() returns a dict with the keys being the key for
 99 | each timeseries.
100 | 
101 | ## TssDict
102 | 
103 | This class provides similar features to TssList and is designed to be used
104 | when using specific timeseries or groups of timeseries.
105 | 
106 | We will create a sample version.
107 | ```
108 |     tssdict = TssDict(
109 |         [
110 |             ts,
111 |             ts_long,
112 |             ts_short
113 |         ])
114 | ```
115 | The keys for the dict will be the key for the timeseries. You can also do have
116 | lists of timeseries. For example we can add to our tssdict:
117 | ```
118 |     tssdict['our_list'] = [
119 |         Timeseries(),
120 |         Timeseries(),
121 |         Timeseries()]
122 | 
123 |     anew_tssdict = TssDict()
124 | 
125 |     # a dict in the dict
126 |     tssdict['anew'] = anew_tssdict 
127 | ```
128 | Why the last one? Not really sure, but it could be done. But when you start
129 | doing turtles all the way the down, it could get problematical.
130 | 
131 | As with TssList, there are similar functions available aside from the usual
132 | functions associated with a dict.
133 | 
134 | ### min_date(), max_date()
135 | 
136 | These functions operate in the same way as TssList with an important
137 | difference.
138 | ```
139 |     min_date, key = tssdict.min_date()
140 | 
141 |     max_date, key = tssdict.max_date()
142 | ```
143 | With this version you can know which key has the value.
144 | 
145 | ### get_values(date, keys=None, notify=False)
146 | 
147 | This version of get_values, like the TssList returns values found in the
148 | timeseries as of a particular date. However, you can pass in a list of
149 | which keys to select as well as the order returned of the values.
150 | ```
151 |     values, keys = tssdict.get_values(date, keys=None, notify=False)
152 | ```
153 | If the keys option is None, all the keys returned in whatever order the dict
154 | decides to use.
155 | 
156 | ### combine(keys=None, discard=True, pad=None)
157 | 
158 | This version of combine, like the TssList combines all the timeseries into one
159 | timeseries. It uses the same options, except as in the previous function, you
160 | can pass in a list of keys to govern the order of the columns in the
161 | timeseries.
162 | 
163 | If the value portion of the dict is a list, TssList, or TssDict, that value
164 | portion will be combined into a timeseries before being appended to the
165 | timeseries in common.
166 | 
167 | ### clone()
168 | 
169 | This function returns a copy of the object as the others do.


--------------------------------------------------------------------------------
/README_HELPERS.rst:
--------------------------------------------------------------------------------
  1 | Helpers
  2 | =======
  3 | 
  4 | A brief explanation of some helper classes.
  5 | 
  6 | When dealing with a set of timeseries, it can be useful to gather them
  7 | into groups that have some commonality, such as lists and dicts. Both
  8 | classes are derived from the list and dict objects with additional
  9 | features added.
 10 | 
 11 | TssList
 12 | -------
 13 | 
 14 | This class starts with the list class and adds some features to smooth
 15 | the path for using sets of timeseries. It is called TssList to suggest
 16 | the plural of timeseries (ts).
 17 | 
 18 | To use it we will start with three sample timeseries.
 19 | 
 20 | ::
 21 | 
 22 |         start_date = datetime(2015, 12, 31).toordinal()
 23 | 
 24 |         ts = Timeseries()
 25 |         ts.key = 'One'
 26 |         ts.dseries = start_date + np.arange(10)
 27 |         ts.tseries = np.arange(10)
 28 |         ts.make_arrays()
 29 | 
 30 |         # longer timeseries
 31 |         ts_long = Timeseries()
 32 |         ts_long.key = 'Two'
 33 |         ts_long.dseries = start_date + np.arange(20)
 34 |         ts_long.tseries = np.arange(20)
 35 |         ts_long.make_arrays()
 36 | 
 37 |         # shorter timeseries
 38 |         ts_short = Timeseries()
 39 |         ts_short.key = 'Three'
 40 |         ts_short.dseries = start_date + np.arange(5)
 41 |         ts_short.tseries = np.arange(5)
 42 |         ts_short.make_arrays()
 43 | 
 44 | Having created the timeseries' for our set, now we create our aggregate
 45 | list.
 46 | 
 47 | ::
 48 | 
 49 | 
 50 |         tsslist = TssList([
 51 |             ts,
 52 |             ts_long,
 53 |             ts_short
 54 |         ])
 55 | 
 56 | Having created our list we will see some features available to us that
 57 | are not found with a normal list.
 58 | 
 59 | min\_date() and max\_date()
 60 | ---------------------------
 61 | 
 62 | With disparate timeseries, it can be helpful to know where the
 63 | timeframes start and end, particularly when truncating or doing
 64 | operations on them as a group.
 65 | 
 66 | combine(discard=True, pad=None)
 67 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 68 | 
 69 | This function operates essentially in the same fashion as the combine
 70 | function for an individual timeseries.
 71 | 
 72 | For our example:
 73 | 
 74 | ::
 75 | 
 76 |         # discard is True, truncate to match the shortest timeseries
 77 |         ts = tsslist.combine(discard=True, pad=None)
 78 | 
 79 |         # the shape matches the shortest timeseries but with all three columns
 80 |         print(ts.tseries.shape)
 81 |         (5, 3)
 82 | 
 83 |         # do not discard, instead pad with zeros
 84 |         ts = tsslist.combine(discard=False, pad=0)
 85 | 
 86 |         # the shape matches the longest timeseries
 87 |         print(ts.tseries.shape)
 88 |         (20, 3)
 89 | 
 90 | get\_values(date, notify=False)
 91 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 92 | 
 93 | This function finds the values as of a particular date and so does not
 94 | require either explicitly walking the list of timeseries, or combining
 95 | the timeseries into a common timeseries to get the values.
 96 | 
 97 | The notify flag indicates whether an error is thrown if a date is not
 98 | present in one of the timeseries. If notify is false, a None is inserted
 99 | where the value would be.
100 | 
101 | clone()
102 | ~~~~~~~
103 | 
104 | This function operates in the same fashion as the clone function for an
105 | individual timeseries.
106 | 
107 | as\_dict()
108 | ~~~~~~~~~~
109 | 
110 | Sometimes it is helpful to flip a list to a dict to work with specific
111 | timeseries directly. as\_dict() returns a dict with the keys being the
112 | key for each timeseries.
113 | 
114 | TssDict
115 | -------
116 | 
117 | This class provides similar features to TssList and is designed to be
118 | used when using specific timeseries or groups of timeseries.
119 | 
120 | We will create a sample version.
121 | 
122 | ::
123 | 
124 |         tssdict = TssDict(
125 |             [
126 |                 ts,
127 |                 ts_long,
128 |                 ts_short
129 |             ])
130 | 
131 | The keys for the dict will be the key for the timeseries. You can also
132 | do have lists of timeseries. For example we can add to our tssdict:
133 | 
134 | ::
135 | 
136 |         tssdict['our_list'] = [
137 |             Timeseries(),
138 |             Timeseries(),
139 |             Timeseries()]
140 | 
141 |         anew_tssdict = TssDict()
142 | 
143 |         # a dict in the dict
144 |         tssdict['anew'] = anew_tssdict
145 | 
146 | Why the last one? Not really sure, but it could be done. But when you
147 | start doing turtles all the way the down, it could get problematical.
148 | 
149 | As with TssList, there are similar functions available aside from the
150 | usual functions associated with a dict.
151 | 
152 | min\_date(), max\_date()
153 | ~~~~~~~~~~~~~~~~~~~~~~~~
154 | 
155 | These functions operate in the same way as TssList with an important
156 | difference.
157 | 
158 | ::
159 | 
160 |         min_date, key = tssdict.min_date()
161 | 
162 |         max_date, key = tssdict.max_date()
163 | 
164 | With this version you can know which key has the value.
165 | 
166 | get\_values(date, keys=None, notify=False)
167 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
168 | 
169 | This version of get\_values, like the TssList returns values found in
170 | the timeseries as of a particular date. However, you can pass in a list
171 | of which keys to select as well as the order returned of the values.
172 | 
173 | ::
174 | 
175 |         values, keys = tssdict.get_values(date, keys=None, notify=False)
176 | 
177 | If the keys option is None, all the keys returned in whatever order the
178 | dict decides to use.
179 | 
180 | combine(keys=None, discard=True, pad=None)
181 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
182 | 
183 | This version of combine, like the TssList combines all the timeseries
184 | into one timeseries. It uses the same options, except as in the previous
185 | function, you can pass in a list of keys to govern the order of the
186 | columns in the timeseries.
187 | 
188 | If the value portion of the dict is a list, TssList, or TssDict, that
189 | value portion will be combined into a timeseries before being appended
190 | to the timeseries in common.
191 | 
192 | clone()
193 | ~~~~~~~
194 | 
195 | This function returns a copy of the object as the others do.
196 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/docs/README_HELPERS.rst:
--------------------------------------------------------------------------------
  1 | Helpers
  2 | =======
  3 | 
  4 | A brief explanation of some helper classes.
  5 | 
  6 | When dealing with a set of timeseries, it can be useful to gather them
  7 | into groups that have some commonality, such as lists and dicts. Both
  8 | classes are derived from the list and dict objects with additional
  9 | features added.
 10 | 
 11 | TssList
 12 | -------
 13 | 
 14 | This class starts with the list class and adds some features to smooth
 15 | the path for using sets of timeseries. It is called TssList to suggest
 16 | the plural of timeseries (ts).
 17 | 
 18 | To use it we will start with three sample timeseries.
 19 | 
 20 | ::
 21 | 
 22 |         start_date = datetime(2015, 12, 31).toordinal()
 23 | 
 24 |         ts = Timeseries()
 25 |         ts.key = 'One'
 26 |         ts.dseries = start_date + np.arange(10)
 27 |         ts.tseries = np.arange(10)
 28 |         ts.make_arrays()
 29 | 
 30 |         # longer timeseries
 31 |         ts_long = Timeseries()
 32 |         ts_long.key = 'Two'
 33 |         ts_long.dseries = start_date + np.arange(20)
 34 |         ts_long.tseries = np.arange(20)
 35 |         ts_long.make_arrays()
 36 | 
 37 |         # shorter timeseries
 38 |         ts_short = Timeseries()
 39 |         ts_short.key = 'Three'
 40 |         ts_short.dseries = start_date + np.arange(5)
 41 |         ts_short.tseries = np.arange(5)
 42 |         ts_short.make_arrays()
 43 | 
 44 | Having created the timeseries' for our set, now we create our aggregate
 45 | list.
 46 | 
 47 | ::
 48 | 
 49 | 
 50 |         tsslist = TssList([
 51 |             ts,
 52 |             ts_long,
 53 |             ts_short
 54 |         ])
 55 | 
 56 | Having created our list we will see some features available to us that
 57 | are not found with a normal list.
 58 | 
 59 | min\_date() and max\_date()
 60 | ---------------------------
 61 | 
 62 | With disparate timeseries, it can be helpful to know where the
 63 | timeframes start and end, particularly when truncating or doing
 64 | operations on them as a group.
 65 | 
 66 | combine(discard=True, pad=None)
 67 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 68 | 
 69 | This function operates essentially in the same fashion as the combine
 70 | function for an individual timeseries.
 71 | 
 72 | For our example:
 73 | 
 74 | ::
 75 | 
 76 |         # discard is True, truncate to match the shortest timeseries
 77 |         ts = tsslist.combine(discard=True, pad=None)
 78 | 
 79 |         # the shape matches the shortest timeseries but with all three columns
 80 |         print(ts.tseries.shape)
 81 |         (5, 3)
 82 | 
 83 |         # do not discard, instead pad with zeros
 84 |         ts = tsslist.combine(discard=False, pad=0)
 85 | 
 86 |         # the shape matches the longest timeseries
 87 |         print(ts.tseries.shape)
 88 |         (20, 3)
 89 | 
 90 | get\_values(date, notify=False)
 91 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 92 | 
 93 | This function finds the values as of a particular date and so does not
 94 | require either explicitly walking the list of timeseries, or combining
 95 | the timeseries into a common timeseries to get the values.
 96 | 
 97 | The notify flag indicates whether an error is thrown if a date is not
 98 | present in one of the timeseries. If notify is false, a None is inserted
 99 | where the value would be.
100 | 
101 | clone()
102 | ~~~~~~~
103 | 
104 | This function operates in the same fashion as the clone function for an
105 | individual timeseries.
106 | 
107 | as\_dict()
108 | ~~~~~~~~~~
109 | 
110 | Sometimes it is helpful to flip a list to a dict to work with specific
111 | timeseries directly. as\_dict() returns a dict with the keys being the
112 | key for each timeseries.
113 | 
114 | TssDict
115 | -------
116 | 
117 | This class provides similar features to TssList and is designed to be
118 | used when using specific timeseries or groups of timeseries.
119 | 
120 | We will create a sample version.
121 | 
122 | ::
123 | 
124 |         tssdict = TssDict(
125 |             [
126 |                 ts,
127 |                 ts_long,
128 |                 ts_short
129 |             ])
130 | 
131 | The keys for the dict will be the key for the timeseries. You can also
132 | do have lists of timeseries. For example we can add to our tssdict:
133 | 
134 | ::
135 | 
136 |         tssdict['our_list'] = [
137 |             Timeseries(),
138 |             Timeseries(),
139 |             Timeseries()]
140 | 
141 |         anew_tssdict = TssDict()
142 | 
143 |         # a dict in the dict
144 |         tssdict['anew'] = anew_tssdict
145 | 
146 | Why the last one? Not really sure, but it could be done. But when you
147 | start doing turtles all the way the down, it could get problematical.
148 | 
149 | As with TssList, there are similar functions available aside from the
150 | usual functions associated with a dict.
151 | 
152 | min\_date(), max\_date()
153 | ~~~~~~~~~~~~~~~~~~~~~~~~
154 | 
155 | These functions operate in the same way as TssList with an important
156 | difference.
157 | 
158 | ::
159 | 
160 |         min_date, key = tssdict.min_date()
161 | 
162 |         max_date, key = tssdict.max_date()
163 | 
164 | With this version you can know which key has the value.
165 | 
166 | get\_values(date, keys=None, notify=False)
167 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
168 | 
169 | This version of get\_values, like the TssList returns values found in
170 | the timeseries as of a particular date. However, you can pass in a list
171 | of which keys to select as well as the order returned of the values.
172 | 
173 | ::
174 | 
175 |         values, keys = tssdict.get_values(date, keys=None, notify=False)
176 | 
177 | If the keys option is None, all the keys returned in whatever order the
178 | dict decides to use.
179 | 
180 | combine(keys=None, discard=True, pad=None)
181 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
182 | 
183 | This version of combine, like the TssList combines all the timeseries
184 | into one timeseries. It uses the same options, except as in the previous
185 | function, you can pass in a list of keys to govern the order of the
186 | columns in the timeseries.
187 | 
188 | If the value portion of the dict is a list, TssList, or TssDict, that
189 | value portion will be combined into a timeseries before being appended
190 | to the timeseries in common.
191 | 
192 | clone()
193 | ~~~~~~~
194 | 
195 | This function returns a copy of the object as the others do.
196 | 


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
  1 | <h1>Thymus-timeseries</h1>
  2 | <p>An intuitive library tracking dates and timeseries in common using numpy
  3 | arrays.</p>
  4 | <p>When working with arrays of timeseries, the manipulation process can easily
  5 | cause mismatching sets of arrays in time, arrays in the wrong order, slow down
  6 | the analysis, and lead to generally spending more time to ensure consistency.</p>
  7 | <p>This library attempts to address the problem in a way that enables ready access
  8 | to the current date range, but stays out of your way most of the time.
  9 | Essentially, this library is a wrapper around numpy arrays.</p>
 10 | <p>This library grew out of the use of market and trading data. The
 11 | timeseries is typically composed of regular intervals but with gaps
 12 | such as weekends and holidays. In the case of intra-day data, there are
 13 | interuptions due to periods when the market is closed or gaps in trading.</p>
 14 | <p>While the library grew from addressing issues associated with market
 15 | data, the implementation does not preclude use in other venues. Direct
 16 | access to the numpy arrays is expected and the point of being able to use the
 17 | library.</p>
 18 | <h2>Dependencies</h2>
 19 | <p>Other than NumPy being installed, there are no other requirements.</p>
 20 | <h2>Installation</h2>
 21 | <p>pip install thymus-timeseries</p>
 22 | <h2>A Brief Look at Capabilities.</h2>
 23 | <h3>Creating a Small Sample Timeseries Object</h3>
 24 | <p>As a first look, we will create a small timeseries object and show a few ways
 25 | that it can used. For this example, we will use daily data.
 26 | ```
 27 |     from datetime import datetime
 28 |     import numpy as np</p>
 29 | <pre><code>from thymus.timeseries import Timeseries
 30 | 
 31 | ts = Timeseries()
 32 | 
 33 | # elements of Timeseries()
 34 | key:                (an optional identifier for the timeseries)
 35 | columns: []         (an optional list of column names for the data)
 36 | frequency: d        (the d in this case refers to the default daily data.
 37 |                      current frequencies supported are sec, min, h, d, w,
 38 |                      m, q, y)
 39 | 
 40 | dseries:            (this is a numpy array of dates in numeric format)
 41 | 
 42 | tseries:            (this is a numpy array of data. most of the work takes
 43 |                         place here.)
 44 | 
 45 | end-of-period: True (this is a default indicating that the data is as of
 46 |                         the end of the data. This only comes into play when
 47 |                         converting from one frequency to another and will
 48 |                         be ignored for the moment.)
 49 | </code></pre>
 50 | <p><code>While normal usage of the timeseries object would involve pulling data from a
 51 | database and inserting data into the timeseries object, we will use a
 52 | quick-and-dirty method of inputting some data. Dates are stored as either
 53 | ordinals or timestamps, avoiding clogging up memory with large sets of datetime
 54 | objects. Because it is daily data, ordinals will be used for this example.</code>
 55 |     ts = Timeseries()</p>
 56 | <pre><code>start_date = datetime(2015, 12, 31).toordinal()
 57 | 
 58 | ts.dseries = start_date + np.arange(10)
 59 | ts.tseries = np.arange(10)
 60 | 
 61 | ts.make_arrays()
 62 | </code></pre>
 63 | <p>```
 64 | We created an initial timeseries object. It starts at the end of
 65 | 2015 and continues for 10 days. Setting the values in <strong>dseries</strong> and
 66 | <strong>tseries</strong>
 67 | can be somewhat sloppy. For example, a list could be assigned initially to
 68 | either <strong>dseries</strong> (the dates) and a numpy array to <strong>tseries</strong> (the values).</p>
 69 | <p>The use of the <strong>make_arrays()</strong> function converts the date series to an int32
 70 | array (because they are ordinal values) and <strong>tseries</strong> to a float64 array. The
 71 | idea is that the data might often enter the timeseries object as lists, but
 72 | then be converted to arrays of appropriate format for use.</p>
 73 | <p>The completed timeseries object is:
 74 | ```
 75 |     print(ts)</p>
 76 | <pre><code>&lt;Timeseries&gt;
 77 | key: 
 78 | columns: []
 79 | frequency: d
 80 | daterange: ('2015-12-31', '2016-01-09')
 81 | end-of-period: True
 82 | shape: (10,)
 83 | </code></pre>
 84 | <p>```
 85 | You can see the date range contained in the date series. The shape refers
 86 | to the shape of the <strong>tseries</strong> array. <strong>key</strong> and <strong>columns</strong> are free-form,
 87 | available to update as appropriate to identify the timeseries and content of
 88 | the columns. Again, the <strong>end-of-period</strong> flag can be ignored right now.</p>
 89 | <h2>Selection</h2>
 90 | <p>Selection of elements is the same as numpy arrays. Currently, our sample has
 91 | 10 elements.
 92 | <code>print(ts[:5])
 93 |     &lt;Timeseries&gt;
 94 |     key: 
 95 |     columns: []
 96 |     frequency: d
 97 |     daterange: ('2015-12-31', '2016-01-04')
 98 |     end-of-period: True
 99 |     shape: (5,)</code>
100 | Note how the date range above reflects the selected elements.
101 | <code>ts1 = ts % 2 == 0
102 |     ts1.tseries
103 |     [ True False  True False  True False  True False  True False]</code>
104 | We can isolate the dates of even numbers:
105 | ```
106 |     # note that tseries, not the timeseries obj, is explicitly used with
107 |     #   np.argwhere.  More on when to operate directly on tseries later.
108 |     evens = np.argwhere((ts % 2 == 0).tseries)</p>
109 | <pre><code>ts_even = ts[evens]
110 | 
111 | # this just prints a list of date and value pairs only useful with
112 | # very small sets (or examples like this)
113 | print(ts_even.items('str'))
114 | ('2015-12-31', '[0.0]')
115 | ('2016-01-02', '[2.0]')
116 | ('2016-01-04', '[4.0]')
117 | ('2016-01-06', '[6.0]')
118 | ('2016-01-08', '[8.0]')
119 | </code></pre>
120 | <p>```</p>
121 | <h2>Date-based Selection</h2>
122 | <p>So let us use a slightly larger timeseries. 1000 rows 2 columns of data. And,
123 | use random values to ensure uselessness.
124 | ```
125 |     ts = Timeseries()</p>
126 | <pre><code>start_date = datetime(2015, 12, 31).toordinal()
127 | 
128 | ts.dseries = start_date + np.arange(1000)
129 | ts.tseries = np.random.random((1000, 2))
130 | 
131 | ts.make_arrays()
132 | 
133 | print(ts)
134 | 
135 | &lt;Timeseries&gt;
136 | key: 
137 | columns: []
138 | frequency: d
139 | daterange: ('2015-12-31', '2018-09-25')
140 | end-of-period: True
141 | shape: (1000, 2)
142 | </code></pre>
143 | <p>```</p>
144 | <p>You can select on the basis of date ranges, but first we will use a row number
145 | technique that is based on slicing. This function is called <strong>trunc()</strong> for
146 | truncation.
147 | ```
148 |     # normal truncation -- you will end up with a timeseries with row 100
149 |     # through 499. This provides in-place execution.
150 |     ts.trunc(start=100, finish=500)</p>
151 | <pre><code># this version returns a new timeseries, effective for chaining.
152 | ts1 = ts.trunc(start=100, finish=500, new=True)
153 | </code></pre>
154 | <p><code>But suppose you want to select a specific date range? This leads to the next
155 | function, **truncdate()**.</code>
156 |     # select using datetime objects
157 |     ts1 = ts.truncdate(
158 |         start=datetime(2017, 1, 1),
159 |         finish=datetime(2017, 12, 31),
160 |         new=True)</p>
161 | <pre><code>print(ts1)
162 | 
163 | &lt;Timeseries&gt;
164 | key: 
165 | columns: []
166 | frequency: d
167 | daterange: ('2017-01-01', '2017-12-31')
168 | end-of-period: True
169 | shape: (365, 2)
170 | </code></pre>
171 | <p>```
172 | As you might expect, the timeseries object has a date range of all the days
173 | during 2017. But see how this is slightly different than slicing. When you use
174 | <strong>truncdate()</strong> it selects everything within the date range inclusive of the
175 | ending date as well. The idea is to avoid having to always find one day after
176 | the date range that you want to select to accommodate slicing behavior. This
177 | way is more convenient.</p>
178 | <p>You can also convert data from a higer frequency to a lower frequency. Suppose
179 | we needed monthly data for 2017 from our timeseries.
180 | ```
181 |     start = datetime(2017, 1, 1)
182 |     finish = datetime(2017, 12, 31)
183 |     ts1 = ts.truncdate(start=start, finish=finish, new=True).convert('m')</p>
184 | <pre><code>print(ts1.items('str'))
185 | 
186 | ('2017-01-31', '[0.1724835781570483, 0.9856812220255055]')
187 | ('2017-02-28', '[0.3855043513164875, 0.30697511661843124]')
188 | ('2017-03-31', '[0.7067982987769881, 0.7680886691626396]')
189 | ('2017-04-30', '[0.07770763295126926, 0.04697651222041588]')
190 | ('2017-05-31', '[0.4473657194650975, 0.49443624153533783]')
191 | ('2017-06-30', '[0.3793816656495891, 0.03646544387811124]')
192 | ('2017-07-31', '[0.2783335012003322, 0.5144979569785825]')
193 | ('2017-08-31', '[0.9261879195281345, 0.6980224313957553]')
194 | ('2017-09-30', '[0.09531834159018227, 0.5435208082899813]')
195 | ('2017-10-31', '[0.6865842769906441, 0.7951735180348887]')
196 | ('2017-11-30', '[0.34901775001111657, 0.7014208950555662]')
197 | ('2017-12-31', '[0.4731393617405252, 0.630488855197775]')
198 | </code></pre>
199 | <p><code>Or yearly. In this case, we use a flag that governs whether to include the partial period
200 | leading up to the last year. The default includes it. However, when unwanted the flag,
201 | **include_partial** can be set to False.</code>
202 |     ts1 = ts.convert('y', include_partial=True)</p>
203 | <pre><code>print(ts1.items('str'))
204 | 
205 | ('2015-12-31', '[0.2288539210230056, 0.288320541664724]')
206 | ('2016-12-31', '[0.5116274142615629, 0.21680312154651182]')
207 | ('2017-12-31', '[0.4731393617405252, 0.630488855197775]')
208 | ('2018-09-25', '[0.7634145837512148, 0.32026411425902257]')
209 | 
210 | ts2 = ts.convert('y', include_partial=False)
211 | 
212 | print(ts2.items('str'))
213 | 
214 | ('2015-12-31', '[[0.2288539210230056, 0.288320541664724]]')
215 | ('2016-12-31', '[[0.5116274142615629, 0.21680312154651182]]')
216 | ('2017-12-31', '[[0.4731393617405252, 0.630488855197775]]')
217 | </code></pre>
218 | <p>```</p>
219 | <h2>Combining Timeseries</h2>
220 | <p>Suppose you want to combine multiple timeseries together that are of different
221 | lengths? In this case we assume that the two timeseries end on the same date,
222 | but one has a longer tail than the other. However, the operation that you need
223 | requires common dates.</p>
224 | <p>By <strong>combine</strong> we mean instead of two timeseries make one timeseries that has
225 | the columns of both.
226 | ```
227 |     ts_short = Timeseries()
228 |     ts_long = Timeseries()</p>
229 | <pre><code>end_date = datetime(2016, 12, 31)
230 | 
231 | ts_short.dseries = [
232 |         (end_date + timedelta(days=-i)).toordinal()
233 |         for i in range(5)]
234 | 
235 | ts_long.dseries = [
236 |         (end_date + timedelta(days=-i)).toordinal()
237 |         for i in range(10)]
238 | 
239 | ts_short.tseries = np.zeros((5))
240 | ts_long.tseries = np.ones((10))
241 | 
242 | ts_short.make_arrays()
243 | ts_long.make_arrays()
244 | 
245 | ts_combine = ts_short.combine(ts_long)
246 | 
247 | print(ts.items('str'))
248 | 
249 | ('2016-12-31', '[0.0, 1.0]')
250 | ('2016-12-30', '[0.0, 1.0]')
251 | ('2016-12-29', '[0.0, 1.0]')
252 | ('2016-12-28', '[0.0, 1.0]')
253 | ('2016-12-27', '[0.0, 1.0]')
254 | </code></pre>
255 | <p><code>The combine function has a couple variations. While it can be helpful to automatically discard the
256 | unwanted rows, you can also enforce that combining does not take place if the number of rows do not
257 | match. Also, you can build out the missing information with padding to create a timeseries that has
258 | the length of the longest timeseries.</code>
259 |     # this would raise an error -- the two are different lengths
260 |     ts_combine = ts_short.combine(ts_long discard=False)</p>
261 | <pre><code># this combines, and fills 99 as a missing value
262 | ts_combine = ts_short.combine(ts_long discard=False, pad=99)
263 | 
264 | print(ts_combine.items('str'))
265 | ('2016-12-31', '[0.0, 1.0]')
266 | ('2016-12-30', '[0.0, 1.0]')
267 | ('2016-12-29', '[0.0, 1.0]')
268 | ('2016-12-28', '[0.0, 1.0]')
269 | ('2016-12-27', '[0.0, 1.0]')
270 | ('2016-12-26', '[99.0, 1.0]')
271 | ('2016-12-25', '[99.0, 1.0]')
272 | ('2016-12-24', '[99.0, 1.0]')
273 | ('2016-12-23', '[99.0, 1.0]')
274 | ('2016-12-22', '[99.0, 1.0]')
275 | </code></pre>
276 | <p><code>The combining can also receive multiple timeseries.</code>
277 |     ts_combine = ts_short.combine([ts_long, ts_long, ts_long])</p>
278 | <pre><code>print(ts_combine.items('str'))
279 | ('2016-12-31', '[0.0, 1.0, 1.0, 1.0]')
280 | ('2016-12-30', '[0.0, 1.0, 1.0, 1.0]')
281 | ('2016-12-29', '[0.0, 1.0, 1.0, 1.0]')
282 | ('2016-12-28', '[0.0, 1.0, 1.0, 1.0]')
283 | ('2016-12-27', '[0.0, 1.0, 1.0, 1.0]')
284 | </code></pre>
285 | <p>```</p>
286 | <h2>Splitting Timeseries</h2>
287 | <p>In some ways it would make sense to mirror the <strong>combine()</strong> function
288 | with a <strong>split()</strong> from an aesthetic standpoint. However, splitting is very
289 | straight-forward without such a function. For example, suppose you want a
290 | timeseries that only has the the first two columns from our previous example.
291 | As you can see in the ts_split tseries, the first two columns were taken.
292 | ```
293 |     ts_split = ts_combine[:, :2]</p>
294 | <pre><code>print(ts_split.items('str'))
295 | ('2016-12-31', '[0.0, 1.0]')
296 | ('2016-12-30', '[0.0, 1.0]')
297 | ('2016-12-29', '[0.0, 1.0]')
298 | ('2016-12-28', '[0.0, 1.0]')
299 | ('2016-12-27', '[0.0, 1.0]')
300 | </code></pre>
301 | <p>```</p>
302 | <h2>Arithmetic Operations</h2>
303 | <p>We have combined timeseries together to stack up rows in common. In
304 | addition, we looked at the issue of mismatched lengths. Now we will look at
305 | arithmetic approaches and some of the design decisions and tradeoffs associated
306 | with mathematical operations.</p>
307 | <p>We will start with the <strong>add()</strong> function. First, if we assume that all we are
308 | adding together are arrays that have exactly the same dateseries, and
309 | therefore the same length, and we assume they have exactly the same number of
310 | columns, then the whole question becomes trivial. If we relax those
311 | constraints, then some choices need to be made.</p>
312 | <p>We will use the long and short timeseries from the previous example.
313 | ```
314 |     # this will fail due to dissimilar lengths
315 |     ts_added = ts_short.add(ts_long, match=True)</p>
316 | <pre><code># this will work
317 | ts_added = ts_short.add(ts_long, match=False)
318 | 
319 | [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
320 | </code></pre>
321 | <p>```
322 | The <strong>add()</strong> function checks to see if the number of columns match. If they do
323 | not an error is raised. If the <strong>match</strong> flag is True, then it also checks
324 | that all the dates in both timeseries match prior to the operation.</p>
325 | <p>If <strong>match</strong> is False, then as long as the columns are compatible, the
326 | operation can take place. It also supports the concept of sparse arrays as
327 | well. For example, suppose you have a timeseries that is primary, but you would
328 | like to add in a timeseries values from only a few dates within the range. This
329 | function will find the appropriate dates adding in the values at just those
330 | rows.</p>
331 | <p>To summarize, all dates in common to both timeseries will be included in the
332 | new timeseries if <strong>match</strong> is False.</p>
333 | <p>Because the previous function is somewhat specialized, you can assume that the
334 | checking of common dates and creating the new timeseries can be somewhat slower
335 | than other approaches.</p>
336 | <p>If we assume some commonalities about our timeseries, then we can do our work
337 | in a more intuitive fashion.</p>
338 | <h3>Assumptions of Commonality</h3>
339 | <p>Let us assume that our timeseries might be varying in length, but we absolutely
340 | know what either our starting date or ending date is. And, let us assume that
341 | all the dates for the periods in common to the timeseries match.</p>
342 | <p>If we accept those assumptions, then a number of operations become quite easy.</p>
343 | <p>The timeseries object can accept simple arithmetic as if it is an array. It
344 | automatically passes the values on to the <strong>tseries</strong> array. If the two arrays
345 | are not the same length the longer array is truncated to the shorter length. So
346 | if you were add two arrays together that end at the same date, you would want
347 | to sort them latest date to earliest date using the function
348 | <strong>sort_by_date()</strong>.</p>
349 | <h3>Examples</h3>
350 | <p>```
351 |     # starting tseries
352 |     ts.tseries
353 |     [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9.]</p>
354 | <pre><code>(ts + 3).tseries
355 | [  3.   4.   5.   6.   7.   8.   9.  10.  11.  12.]
356 | 
357 | # Also, reverse (__radd__)
358 | (3 + ts).tseries
359 | [  3.   4.   5.   6.   7.   8.   9.  10.  11.  12.]
360 | 
361 | # of course not just addition
362 | 5 * ts.tseries
363 | [  0.   5.  10.  15.  20.  25.  30.  35.  40.  45.]
364 | </code></pre>
365 | <p><code>Also, in-place operations. But first, we will make a copy.</code>
366 |     ts1 = ts.clone()
367 |     ts1.tseries /= 3
368 |     print(ts1.tseries)
369 |     [0.0
370 |     0.3333333333333333
371 |     0.6666666666666666
372 |     1.0
373 |     1.3333333333333333
374 |     1.6666666666666667
375 |     2.0
376 |     2.3333333333333335
377 |     2.6666666666666665
378 |     3.0]</p>
379 | <pre><code>ts1 = ts ** 3
380 | ts1.tseries
381 | 0.0
382 | 1.0
383 | 8.0
384 | 27.0
385 | 64.0
386 | 125.0
387 | 216.0
388 | 343.0
389 | 512.0
390 | 729.0
391 | 
392 | ts1 = 10 ** ts
393 | ts1.tseries
394 | [1.0
395 | 10.0
396 | 100.0
397 | 1000.0
398 | 10000.0
399 | 100000.0
400 | 1000000.0
401 | 10000000.0
402 | 100000000.0
403 | 1000000000.0]
404 | </code></pre>
405 | <p>```</p>
406 | <p>In other words, the normal container functions you can use with numpy arrays
407 | are available to the timeseries objects. The following container functions for
408 | arrays are supported.
409 | ```
410 |     <strong>pow</strong> <strong>add</strong> <strong>rsub</strong> <strong>sub</strong>    <strong>eq</strong>      <strong>ge</strong>   <strong>gt</strong>   <strong>le</strong>
411 |     <strong>lt</strong>  <strong>mod</strong> <strong>mul</strong>  <strong>ne</strong>     <strong>radd</strong>    <strong>rmod</strong> <strong>rmul</strong> <strong>rpow</strong>
412 |     <strong>abs</strong> <strong>pos</strong> <strong>neg</strong>  <strong>invert</strong> <strong>rdivmod</strong> <strong>rfloordiv</strong>
413 |     <strong>floordiv</strong> <strong>truediv</strong>
414 |     <strong>rtruediv</strong> <strong>divmod</strong></p>
415 | <pre><code>__and__ __or__ __ror__ __rand__ __rxor__ __xor__ __rshift__
416 | __rlshift__ __lshift__ __rrshift__
417 | 
418 | __iadd__ __ifloordiv__ __imod__ __imul__ __ipow__ __isub__
419 | __itruediv__]
420 | 
421 | __iand__ __ilshift__ __ior__ __irshift__ __ixor__
422 | </code></pre>
423 | <p>```</p>
424 | <h3>Functions of Arrays Not Supported</h3>
425 | <p>The purpose the timeseries objects is to implement an intuitive usage of
426 | timeseries objects in a fashion that is consistent with NumPy. However, it is
427 | not intended to replace functions that are better handled explicitly with
428 | the <strong>dseries</strong> and <strong>tseries</strong> arrays directly. The difference will be clear
429 | by
430 | comparing the list of functions for the timeseries object versus a numpy array. Most of the
431 | functions of the timeseries object is related to handling the commonality of date series with
432 | time series. You can see that the bulk of the thymus functions relate to maintenance of the
433 | coordination betwee the date series and timeseries. The meat of the functions still lie with the
434 | numpy arrays.
435 | ```</p>
436 | <h1>timeseries members and functions:</h1>
437 | <p>ts.add                   ts.daterange             ts.get_pcdiffs           ts.series_direction
438 | ts.as_dict               ts.datetime_series       ts.header                ts.set_ones
439 | ts.as_json               ts.dseries               ts.if_dseries_match      ts.set_zeros
440 | ts.as_list               ts.end_date              ts.if_tseries_match      ts.shape
441 | ts.clone                 ts.end_of_period         ts.items                 ts.sort_by_date
442 | ts.closest_date          ts.extend                ts.key                   ts.start_date
443 | ts.columns               ts.fmt_date              ts.lengths               ts.trunc
444 | ts.combine               ts.frequency             ts.make_arrays           ts.truncdate
445 | ts.common_length         ts.get_date_series_type  ts.months                ts.tseries
446 | ts.convert               ts.get_datetime          ts.replace               ts.years
447 | ts.date_native           ts.get_diffs             ts.reverse
448 | ts.date_string_series    ts.get_duped_dates       ts.row_no</p>
449 | <h1>numpy functions in the arrays</h1>
450 | <p>ts.tseries.T             ts.tseries.cumsum        ts.tseries.min           ts.tseries.shape
451 | ts.tseries.all           ts.tseries.data          ts.tseries.nbytes        ts.tseries.size
452 | ts.tseries.any           ts.tseries.diagonal      ts.tseries.ndim          ts.tseries.sort
453 | ts.tseries.argmax        ts.tseries.dot           ts.tseries.newbyteorder  ts.tseries.squeeze
454 | ts.tseries.argmin        ts.tseries.dtype         ts.tseries.nonzero       ts.tseries.std
455 | ts.tseries.argpartition  ts.tseries.dump          ts.tseries.partition     ts.tseries.strides
456 | ts.tseries.argsort       ts.tseries.dumps         ts.tseries.prod          ts.tseries.sum
457 | ts.tseries.astype        ts.tseries.fill          ts.tseries.ptp           ts.tseries.swapaxes
458 | ts.tseries.base          ts.tseries.flags         ts.tseries.put           ts.tseries.take
459 | ts.tseries.byteswap      ts.tseries.flat          ts.tseries.ravel         ts.tseries.tobytes
460 | ts.tseries.choose        ts.tseries.flatten       ts.tseries.real          ts.tseries.tofile
461 | ts.tseries.clip          ts.tseries.getfield      ts.tseries.repeat        ts.tseries.tolist
462 | ts.tseries.compress      ts.tseries.imag          ts.tseries.reshape       ts.tseries.tostring
463 | ts.tseries.conj          ts.tseries.item          ts.tseries.resize        ts.tseries.trace
464 | ts.tseries.conjugate     ts.tseries.itemset       ts.tseries.round         ts.tseries.transpose
465 | ts.tseries.copy          ts.tseries.itemsize      ts.tseries.searchsorted  ts.tseries.var
466 | ts.tseries.ctypes        ts.tseries.max           ts.tseries.setfield      ts.tseries.view
467 | ts.tseries.cumprod       ts.tseries.mean          ts.tseries.setflags    <br />
468 | ```</p>
469 | <h3>Other Date Functions</h3>
470 | <p>Variations on a theme:
471 | ```
472 |     # truncation
473 |     ts.truncdate(
474 |         start=datetime(2017, 1, 1),
475 |         finish=datetime(2017, 12, 31))</p>
476 | <pre><code># just start date etc.
477 | ts.truncdate(
478 |     start=datetime(2017, 1, 1))
479 | 
480 | # this was in date order but suppose it was in reverse order?
481 | # this result will give the same answer
482 | ts1 = ts.truncdate(
483 |     start=datetime(2017, 1, 1),
484 |     new=True)
485 | 
486 | ts.reverse()
487 | 
488 | ts1 = ts.truncdate(
489 |     start=datetime(2017, 1, 1),
490 |     new=True)
491 | 
492 | # use the date format native to the dateseries (ordinal / timestamp)
493 | ts1 = ts.truncdate(
494 |     start=datetime(2017, 1, 1).toordinal(),
495 |     new=True)
496 | 
497 | # suppose you start with a variable that represents a date range
498 | # date range can be either a list or tuple
499 | ts.truncdate(
500 |     [datetime(2017, 1, 1), datetime(2017, 12, 31)])
501 | </code></pre>
502 | <p>```</p>
503 | <h2>Assorted Date Functions</h2>
504 | <p>```
505 |     # native format
506 |     ts.daterange()
507 |     (735963, 735972)</p>
508 | <pre><code># str format
509 | ts.daterange('str')
510 | ('2015-12-31', '2016-01-09')
511 | 
512 | # datetime format
513 | ts.daterange('datetime')
514 | (datetime.datetime(2015, 12, 31, 0, 0), datetime.datetime(2016, 1, 9, 0, 0))
515 | 
516 | # native format
517 | ts.start_date(); ts.end_date()
518 | 735963  735972
519 | 
520 | # str format
521 | ts.start_date('str'); ts.end_date('str')
522 | 2015-12-31  2016-01-09
523 | 
524 | # datetime format
525 | ts.start_date('datetime'); ts.end_date('datetime')
526 | 2015-12-31 00:00:00  2016-01-09 00:00:00
527 | </code></pre>
528 | <p>```
529 | Sometimes it is helpful to find a particular row based on the date. Also, that date might not be in
530 | the dateseries, and so, the closest date will suffice.</p>
531 | <p>We will create a sample timeseries to illustrate.
532 | ```
533 |     ts = Timeseries()
534 |     ts.dseries = []
535 |     ts.tseries = []</p>
536 | <pre><code>start_date = datetime(2015, 12, 31)
537 | for i in range(40):
538 |     date = start_date + timedelta(days=i)
539 |     if date.weekday() not in [5, 6]:   # skipping weekends
540 | 
541 |         ts.dseries.append(date.toordinal())
542 |         ts.tseries.append(i)
543 | 
544 | ts.make_arrays()
545 | 
546 | # row_no, date
547 | (0, '2015-12-31')
548 | (1, '2016-01-01')
549 | (2, '2016-01-04')
550 | (3, '2016-01-05')
551 | (4, '2016-01-06')
552 | (5, '2016-01-07')
553 | (6, '2016-01-08')
554 | (7, '2016-01-11')
555 | (8, '2016-01-12')
556 | (9, '2016-01-13')
557 | (10, '2016-01-14')
558 | (11, '2016-01-15')
559 | (12, '2016-01-18')
560 | (13, '2016-01-19')
561 | (14, '2016-01-20')
562 | (15, '2016-01-21')
563 | (16, '2016-01-22')
564 | (17, '2016-01-25')
565 | (18, '2016-01-26')
566 | (19, '2016-01-27')
567 | (20, '2016-01-28')
568 | (21, '2016-01-29')
569 | (22, '2016-02-01')
570 | (23, '2016-02-02')
571 | (24, '2016-02-03')
572 | (25, '2016-02-04')
573 | (26, '2016-02-05')
574 | (27, '2016-02-08')
575 | 
576 | date1 = datetime(2016, 1, 7)    # existing date within date series
577 | date2 = datetime(2016, 1, 16)   # date falling on a weekend
578 | date3 = datetime(2015, 6, 16)   # date prior to start of date series
579 | date4 = datetime(2016, 3, 8)    # date after to end of date series
580 | 
581 | # as datetime and in the series
582 | existing_row = ts.row_no(rowdate=date1, closest=1)
583 | 5
584 | 
585 | existing_date = ts.closest_date(rowdate=date1, closest=1)
586 | print(datetime.fromordinal(existing_date))
587 | 2016-01-07 00:00:00
588 | 
589 | # as datetime but date not in series
590 | next_row = ts.row_no(rowdate=date2, closest=1)
591 | 12
592 | 
593 | next_date = ts.closest_date(rowdate=date2, closest=1)
594 | print(datetime.fromordinal(next_date))
595 | 2016-01-18 00:00:00
596 | 
597 | prev_row = ts.row_no(rowdate=date2, closest=-1)
598 | 11
599 | 
600 | prev_date = ts.closest_date(rowdate=date2, closest=-1)
601 | print(datetime.fromordinal(prev_date))
602 | 2016-01-15 00:00:00
603 | 
604 | # this will fail -- date is outside the date series
605 | # as datetime but date not in series, look for earlier date
606 | ts.closest_date(rowdate=date3, closest=-1)
607 | 
608 | # this will fail -- date is outside the date series
609 | ts.closest_date(rowdate=date4, closest=1)
610 | </code></pre>
611 | <p>```</p>
612 | <h2>Functions by Category</h2>
613 | <h3>Output</h3>
614 | <h4>ts.as_dict()</h4>
615 | <pre><code>    Returns the time series as a dict with the date as the key and without
616 |     the header information.
617 | </code></pre>
618 | <h4>ts.as_json(indent=2)</h4>
619 | <pre><code>    This function returns the timeseries in JSON format and includes the
620 |     header information.
621 | </code></pre>
622 | <h4>ts.as_list()</h4>
623 | <pre><code>    Returns the timeseries as a list.
624 | </code></pre>
625 | <h4>ts.header()</h4>
626 | <pre><code>    This function returns a dict of the non-timeseries data.
627 | </code></pre>
628 | <h4>ts.items(fmt=None)</h4>
629 | <pre><code>    This function returns the date series and the time series as if it
630 |     is in one list. The term items used to suggest the iteration of dicts
631 |     where items are the key, value combination.
632 | 
633 |     if fmt == 'str':
634 |         the dates are output as strings
635 | </code></pre>
636 | <h4>ts.months(include_partial=True)</h4>
637 | <pre><code>    This function provides a quick way to summarize daily (or less)
638 |     as monthly data.
639 | 
640 |     It is basically a pass-through to the convert function with more
641 |     decoration of the months.
642 | 
643 |     Usage:
644 | 
645 |         months(include_partial=True)
646 | 
647 |         returns a dict with year-month as keys
648 | </code></pre>
649 | <h4>ts.years(include_partial=True)</h4>
650 | <pre><code>    This function provides a quick way to summarize daily (or less)
651 |     as yearly data.
652 | 
653 |     It is basically a pass-through to the convert function with more
654 |     decoration of the years.
655 | 
656 |     Usage:
657 | 
658 |         years(include_partial=True)
659 | 
660 |         returns a dict with year as keys
661 | </code></pre>
662 | <h4>ts.datetime_series()</h4>
663 | <pre><code>    This function returns the dateseries converted to a list of
664 |     datetime objects.
665 | </code></pre>
666 | <h4>ts.date_string_series(dt_fmt=None)</h4>
667 | <pre><code>    This function returns a list of the dates in the timeseries as
668 |     strings.
669 | 
670 |     Usage:
671 |         self.date_string_series(dt_fmt=None)
672 | 
673 |     dt_fmt is a datetime mask to alter the default formatting.
674 | </code></pre>
675 | <h3>Array Manipulation</h3>
676 | <h4>ts.add(ts, match=True)</h4>
677 | <pre><code>    Adds two timeseries together.
678 | 
679 |     if match is True:
680 |         means there should be a one to one corresponding date in each time
681 |         series.  If not raise error.
682 |     else:
683 |         means that timeseries with sporadic or missing dates can be added
684 | 
685 |     Note: this does not evaluate whether both timeseries have the same
686 |             number of columns. It will fail if they do not.
687 | 
688 |     Returns the timeseries. Not in-place.
689 | </code></pre>
690 | <h4>ts.clone()</h4>
691 | <pre><code>    This function returns a copy of the timeseries.
692 | </code></pre>
693 | <h4>ts.combine(tss, discard=True, pad=None)</h4>
694 | <pre><code>    This function combines timeseries into a single array. Combining in
695 |     this case means accumulating additional columns of information.
696 | 
697 |     Truncation takes place at the end of rows. So if the timeseries is
698 |     sorted from latest dates to earliest dates, the older values would be
699 |     removed.
700 | 
701 |     Usage:
702 |         self.combine(tss, discard=True, pad=None)
703 | 
704 |     Think of tss as the plural of timeseries.
705 | 
706 |     If discard:
707 |         Will truncate all timeseries lengths down to the shortest
708 |         timeseries.
709 | 
710 |     if discard is False:
711 |         An error will be raised if the all the lengths do not match
712 | 
713 |         unless:
714 |             if pad is not None:
715 |                 the shorter timeseries will be padded with the value pad.
716 | 
717 |     Returns the new ts.
718 | </code></pre>
719 | <h4>ts.common_length(ts1, ts2)</h4>
720 | <pre><code>    This static method trims the lengths of two timeseries and returns two
721 |     timeseries with the same length.
722 | 
723 |     The idea is that in order to do array operations there must be a
724 |     common length for each timeseries.
725 | 
726 |     Reflecting the bias for using timeseries sorted from latest info to
727 |     earlier info, truncation takes place at the end of the array. That
728 |     way older less important values are removed if necessary.
729 | 
730 |     Usage:
731 |         ts1_new, ts2_new = self.common_length(ts1, ts2)
732 | </code></pre>
733 | <h4>ts.convert(new_freq, include_partial=True, **kwargs)</h4>
734 | <pre><code>    This function returns the timeseries converted to another frequency,
735 |     such as daily to monthly.
736 | 
737 |     Usage:
738 |         convert(new_freq, include_partial=True, **kwargs)
739 | 
740 |     The only kwarg is
741 |         weekday=&lt;some value&gt;
742 | 
743 |     This is used when converting to weekly data. The weekday number
744 |     corresponds to the the datetime.weekday() function.
745 | </code></pre>
746 | <h4>ts.extend(ts, overlay=True)</h4>
747 | <pre><code>    This function combines a timeseries to another, taking into account the
748 |     possibility of overlap.
749 | 
750 |     This assumes that the frequency is the same.
751 | 
752 |     This function is chiefly envisioned to extend a timeseries with
753 |     additional dates.
754 | 
755 |     Usage:
756 |         self.extend(ts, overlay=True)
757 | 
758 |     If overlay is True then the incoming timeseries will overlay
759 |     any values that are duplicated.
760 | </code></pre>
761 | <h4>ts.trunc(start=None, finish=None, new=False)</h4>
762 | <pre><code>    This function truncates in place, typically.
763 | 
764 |     truncate from (start:finish)
765 |     remember start is lowest number, latest date
766 | 
767 |     This truncation works on the basis of slicing, so
768 |     finish is not inclusive.
769 | 
770 |     Usage:
771 |         self.trunc(start=None, finish=None, new=False)
772 | </code></pre>
773 | <h4>ts.truncdate(start=None, finish=None, new=False)</h4>
774 | <pre><code>    This function truncates in place on the basis of dates.
775 | 
776 |     Usage:
777 |         self.truncdate(start=None, finish=None, new=False)
778 | 
779 |     start and finish are dates, input as either datetime or the actual
780 |     internal format of the **dseries** (ordinals or timestamps).
781 | 
782 |     If the dates are not actually in the list, the starting date will
783 |     be the next viable date after the start date requested. If the finish
784 |     date is not available, the previous date from the finish date will be
785 |     the last.
786 | 
787 |     If new is True, the timeseries will not be modified in place. Rather
788 |     a new timeseries will be returned instead.
789 | </code></pre>
790 | <h4>ts.replace(ts, match=True)</h4>
791 | <pre><code>    This function replaces values where the dates match an incoming
792 |     timeseries. So if the incoming date on the timeseries matches, the
793 |     value in the current timeseries will be replaced by the incoming
794 |     timeseries.
795 | 
796 |     Usage:
797 |         self.replace(ts, match=True)
798 | 
799 |     If match is False, the incoming timseries may have dates not found in
800 |     the self timeseries.
801 | 
802 |     Returns the modified timeseries. Not in place.
803 | </code></pre>
804 | <h4>ts.reverse()</h4>
805 | <pre><code>    This function does in-place reversal of the timeseries and dateseries.
806 | </code></pre>
807 | <h4>ts.get_diffs()</h4>
808 | <pre><code>    This function gets the differences between values from date to date in
809 |     the timeseries.
810 | </code></pre>
811 | <h4>ts.get_pcdiffs()</h4>
812 | <pre><code>    This function gets the percent differences between values in the
813 |     timeseries.
814 | 
815 |     No provision for dividing by zero here.
816 | </code></pre>
817 | <h4>ts.set_ones(fmt=None, new=False)</h4>
818 | <pre><code>    This function converts an existing timeseries to ones using the same
819 |     shape as the existing timeseries.
820 | 
821 |     It is used as a convenience to create an empty timeseries with a
822 |     specified date range.
823 | 
824 |     if fmt use as shape
825 | 
826 |     usage:
827 |         set_ones(self, fmt=None, new=False)
828 | </code></pre>
829 | <h4>ts.set_zeros(fmt=None, new=False)</h4>
830 | <pre><code>    This function converts an existing timeseries to zeros using the same
831 |     shape as the existing timeseries.
832 | 
833 |     It is used as a convenience to create an empty timeseries with a
834 |     specified date range.
835 | 
836 |     if fmt use as shape
837 | 
838 |     usage:
839 |         set_zeros(self, fmt=None, new=False)
840 | </code></pre>
841 | <h4>ts.sort_by_date(reverse=False, force=False)</h4>
842 | <pre><code>    This function converts a timeseries to either date order or reverse
843 |     date order.
844 | 
845 |     Usage:
846 |         sort_by_date(self, reverse=False, force=False)
847 | 
848 |     If reverse is True, then order will be newest to oldest.
849 |     If force is False, the assumption is made that comparing the first
850 |     and last date will determine the current order of the timeseries. That
851 |     would mean that unnecessary sorting can be avoided. Also, if the order
852 |     needs to be reversed, the sort is changed via the less expensive
853 |     reverse function.
854 | 
855 |     If dates and values are in no particular order, with force=True, the
856 |     actual sort takes place.
857 | 
858 |     This function changes the data in-place.
859 | </code></pre>
860 | <h3>Evaluation</h3>
861 | <h4>ts.daterange(fmt=None)</h4>
862 | <pre><code>    This function returns the starting and ending dates of the timeseries.
863 | 
864 |     Usage:
865 | 
866 |         self.daterange()
867 |             (735963, 735972)
868 | 
869 |         self.daterange('str')
870 |             ('2015-12-31', '2016-01-09')
871 | 
872 |         self.daterange('datetime')
873 |             (datetime(2015, 12, 31, 0, 0),
874 |              datetime.datetime(2016, 1, 9, 0, 0))
875 | </code></pre>
876 | <h4>ts.start_date(fmt=None)</h4>
877 | <pre><code>    This function returns the starting date of the timeseries in its
878 |     native value, timestamp or ordinal.
879 | 
880 |     If fmt is 'str' returns in string format
881 |     If fmt is 'datetime' returns in string format
882 | </code></pre>
883 | <h4>ts.end_date(fmt=None)</h4>
884 | <pre><code>    This funtcion returns the ending date of the timeseries in its native
885 |     value, timestamp or ordinal.
886 | 
887 |     If fmt is 'str' returns in string format
888 |     If fmt is 'datetime' returns in string format
889 | </code></pre>
890 | <h4>ts.get_duped_dates()</h4>
891 | <pre><code>    This function pulls dates that are duplicated. This is to be used to
892 |     locate timeseries that are faulty.
893 | 
894 |     Usage:
895 |         get_duped_dates()
896 | 
897 |         returns [[odate1, count], [odate2, count]]
898 | </code></pre>
899 | <h4>ts.series_direction()</h4>
900 | <pre><code>    if a lower row is a lower date, then 1 for ascending
901 |     if a lower row is a higher date then -1 for descending
902 | </code></pre>
903 | <h4>ts.get_date_series_type()</h4>
904 | <pre><code>    This function returns the date series type associated with the
905 |     timeseries.  The choices are TS_ORDINAL or TS_TIMESTAMP.
906 | </code></pre>
907 | <h4>ts.if_dseries_match(ts)</h4>
908 | <pre><code>    This function returns True if the date series are the same.
909 | </code></pre>
910 | <h4>ts.if_tseries_match(ts)</h4>
911 | <pre><code>    This function returns True if the time series are the same.
912 | </code></pre>
913 | <h3>Utilities</h3>
914 | <h4>ts.date_native(date)</h4>
915 | <pre><code>    This awkwardly named function returns a date in the native format of
916 |     of the timeseries, namely ordinal or timestamp.
917 | </code></pre>
918 | <h4>ts.row_no(rowdate, closest=0, no_error=False)</h4>
919 | <pre><code>    Shows the row in the timeseries
920 | 
921 |     Usage:
922 |         ts.row(rowdate=&lt;datetime&gt;)
923 |         ts.row(rowdate=&lt;date as either ordinal or timestamp&gt;)
924 | 
925 |     Returns an error if the date is not found in the index
926 | 
927 |     if closest is invoked:
928 |         closest = 1
929 |             find the closest date after the rowdate
930 |         closest = -1
931 |             find the closest date before the rowdate
932 | 
933 |     If no_error
934 |         returns -1 instead of raising an error if the date was
935 |         outside of the timeseries.
936 | </code></pre>
937 | <h4>ts.get_datetime(date)</h4>
938 | <pre><code>    This function returns a date as a datetime object.
939 |     This takes into account the type of date stored in **dseries**.
940 | 
941 |     Usage:
942 |         self.get_datetime(date)
943 | </code></pre>
944 | <h4>ts.lengths()</h4>
945 | <pre><code>    This function returns the lengths of both the date series and time
946 |     series. Both numbers are included in case a mismatch has occurred.
947 | </code></pre>
948 | <h4>ts.shape()</h4>
949 | <pre><code>    This function return the shape of the timeseries. This is a shortcut
950 |     to putting in ts.tseries.shape.
951 | </code></pre>
952 | <h4>ts.fmt_date(numericdate, dt_type, dt_fmt=None)</h4>
953 | <pre><code>    This static method accepts a date and converts it to
954 |     the format used in the timeseries.
955 | </code></pre>
956 | <h4>ts.make_arrays()</h4>
957 | <pre><code>    Convert the date and time series lists (if so) to numpy arrays
958 | </code></pre>
959 | <h4>ts.get_fromDB(**kwargs)</h4>
960 | <pre><code>    This is just a stub to suggest a viable name for getting data from a
961 |     database.
962 | </code></pre>
963 | <h4>ts.save_toDB(**kwargs):</h4>
964 | <pre><code>    This is just a stub to suggest a viable name for saving data to a
965 |     database.
966 | </code></pre>


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "Thymus-timeseries"
 3 | description = "An intuitive library tracking dates and timeseries in common using NumPy arrays."
 4 | readme = {file = "README.md", content-type="text/markdown"}
 5 | version = "0.3.5"
 6 | authors = [{name="Don Smiley", email="dsmiley@sidorof.com"}]
 7 | dependencies = [
 8 |   "numpy",
 9 | ]
10 | license = {text = "MIT License"}
11 | requires-python = ">= 3.10"
12 | 
13 | [project.urls]
14 | Homepage = "https://github.com/sidorof/Thymus-timeseries"
15 | Doc_URL = "https://sidorof.github.io/Thymus-timeseries/"
16 | 
17 | [build-system]
18 | requires = ["setuptools", "wheel"]
19 | build-backend = "setuptools.build_meta"
20 | 
21 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | 
3 | 


--------------------------------------------------------------------------------
/src/thymus/__init__.py:
--------------------------------------------------------------------------------
1 | # thymus/__init__.py
2 | from .timeseries import Timeseries
3 | from .tsslist import TssList
4 | from .tssdict import TssDict
5 | 


--------------------------------------------------------------------------------
/src/thymus/constants.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module holds the constants used in the other modules.
 3 | 
 4 | """
 5 | 
 6 | CACHE_PATH = "cache"
 7 | TS_ORDINAL = "ordinal"
 8 | TS_TIMESTAMP = "timestamp"
 9 | 
10 | FREQ_D = "d"
11 | FREQ_W = "w"
12 | FREQ_M = "m"
13 | FREQ_Q = "q"
14 | FREQ_Y = "y"
15 | 
16 | FREQ_H = "h"
17 | FREQ_MIN = "min"
18 | FREQ_SEC = "sec"
19 | 
20 | FREQ_DAYTYPES = (FREQ_D, FREQ_W, FREQ_M, FREQ_Q, FREQ_Y)
21 | FREQ_IDAYTYPES = (FREQ_H, FREQ_MIN, FREQ_SEC)
22 | 


--------------------------------------------------------------------------------
/src/thymus/freq_conversions.py:
--------------------------------------------------------------------------------
  1 | # freq_conversions.py
  2 | """
  3 | This module converts timeseries from one type to another.
  4 | 
  5 | """
  6 | 
  7 | import numpy as np
  8 | 
  9 | from .constants import FREQ_D, FREQ_M, FREQ_Q, FREQ_Y
 10 | from .constants import FREQ_H, FREQ_MIN, FREQ_SEC
 11 | from .constants import TS_ORDINAL, TS_TIMESTAMP
 12 | 
 13 | HIERARCHY = (FREQ_SEC, FREQ_MIN, FREQ_H, FREQ_D, FREQ_M, FREQ_Q, FREQ_Y)
 14 | 
 15 | 
 16 | def _q_test(date, kwargs):
 17 |     """
 18 |     Computes quarterly indicators.
 19 | 
 20 |     No expectation of using kwargs in this. It is for consistency.
 21 |     """
 22 | 
 23 |     return date.month % 3 == 0
 24 | 
 25 | 
 26 | def _weekday_test(date, kwargs):
 27 |     """
 28 |     Computes weekly indicators.
 29 |     """
 30 | 
 31 |     if "weekday" in kwargs:
 32 |         weekday = kwargs["weekday"]
 33 | 
 34 |         if date.weekday() == weekday:
 35 |             return 1
 36 |         else:
 37 |             return 0
 38 |     else:
 39 |         return date.weekday()
 40 | 
 41 | 
 42 | def _filter_dates(dates, freq, kwargs):
 43 |     """
 44 |     This function filters dates to indicate end of periods for ordinals.
 45 |     """
 46 | 
 47 |     indicator = DATETIME_DICT[freq]
 48 | 
 49 |     if isinstance(indicator, str):
 50 |         # no special behavior
 51 |         indicators = np.fromiter(
 52 |             [date.__getattribute__(indicator) for date in dates],
 53 |             dtype=np.int32,
 54 |         )
 55 | 
 56 |         return np.argwhere(indicators[1:] - indicators[:-1] > 0)
 57 | 
 58 |     else:
 59 |         # apply a function
 60 |         indicators = np.fromiter(
 61 |             [indicator(date, kwargs) for date in dates], dtype=np.int32
 62 |         )
 63 | 
 64 |         return np.argwhere(indicators[1:] - indicators[:-1] > 0)
 65 | 
 66 | 
 67 | def _filter_idates(dates, freq, end_of_period, **kwargs):
 68 |     """
 69 |     This function filters dates to indicate end of periods for timestamps.
 70 |     """
 71 | 
 72 |     indicator = DATETIME_DICT[freq]
 73 | 
 74 |     if isinstance(indicator, str):
 75 |         # no special behavior
 76 |         indicators = np.fromiter(
 77 |             [date.__getattribute__(indicator) for date in dates],
 78 |             dtype=np.int32,
 79 |         )
 80 | 
 81 |         selected = np.argwhere(indicators[1:] - indicators[:-1] > 0)
 82 | 
 83 |         # check special case of start date
 84 | 
 85 |         if end_of_period is False:
 86 |             if dates[-1].__getattribute__(indicator) == 0:
 87 |                 selected = np.append(selected, len(dates) - 1)
 88 | 
 89 |         return selected
 90 | 
 91 |     else:
 92 |         # apply a function -- here for completeness at the moment
 93 |         # could apply to 5 minute data for example
 94 |         return np.fromiter(
 95 |             [indicator(date, kwargs) for date in dates], dtype=np.int32
 96 |         )
 97 | 
 98 | 
 99 | DATETIME_DICT = {
100 |     # 'sec': 'second',
101 |     "min": "second",
102 |     "h": "minute",
103 |     "d": "hour",
104 |     "w": _weekday_test,
105 |     "m": "day",
106 |     "q": _q_test,
107 |     "y": "month",
108 | }
109 | 
110 | 
111 | def convert(ts, new_freq, include_partial=True, **kwargs):
112 |     """
113 |     This function converts a timeseries to another frequency. Conversion only
114 |     works from a higher frequency to a lower frequency, for example daily to
115 |     monthly.
116 | 
117 |     NOTE: add a gatekeeper for invalid kwargs.
118 |     """
119 | 
120 |     new_ts = ts.clone()
121 |     series_dir = ts.series_direction()
122 |     new_ts.sort_by_date(reverse=True)
123 | 
124 |     freq_idx = HIERARCHY.index(ts.frequency)
125 |     new_idx = HIERARCHY.index(FREQ_Q)
126 |     daily_idx = HIERARCHY.index(FREQ_D)
127 | 
128 |     if freq_idx > new_idx:
129 |         raise ValueError(
130 |             "Cannot convert from %s to %s." % (ts.frequency, new_freq)
131 |         )
132 | 
133 |     dates = new_ts.datetime_series()
134 | 
135 |     date_series_type = ts.get_date_series_type()
136 |     if date_series_type == TS_ORDINAL:
137 |         selected = _filter_dates(dates, new_freq, kwargs)
138 |     elif date_series_type == TS_TIMESTAMP:
139 |         selected = _filter_idates(
140 |             dates, new_freq, end_of_period=ts.end_of_period
141 |         )
142 |     else:
143 |         raise ValueError("Invalid date series type: %s" % (date_series_type))
144 | 
145 |     if selected.shape[0] > 0:
146 |         if new_ts.end_of_period:
147 |             selected += 1  # shift to start of next period
148 | 
149 |         if include_partial or freq_idx > daily_idx:
150 |             if selected[0] != 0:
151 |                 # insert most recent date
152 |                 # selected = np.insert(selected, 0, 0)
153 |                 # np.insert(arr, obj, values, axis=None)
154 |                 selected = np.insert(selected, 0, 0)
155 | 
156 |         if freq_idx > daily_idx:
157 |             # already processed (probably)
158 |             if selected[-1] != len(dates) - 1:
159 |                 selected = np.append(selected, len(dates) - 1)
160 | 
161 |     new_ts.tseries = new_ts.tseries[selected.flatten()]
162 | 
163 |     new_ts.frequency = new_freq
164 | 
165 |     if new_freq == FREQ_D:
166 |         # convert dates from timestamp to ordinal
167 |         new_ts.dseries = np.fromiter(
168 |             [date.toordinal() for date in np.array(dates)[selected]],
169 |             dtype=np.int32,
170 |         )
171 |     else:
172 |         new_ts.dseries = new_ts.dseries[selected]
173 | 
174 |     new_ts.dseries = new_ts.dseries.flatten()
175 | 
176 |     if series_dir != new_ts.series_direction():
177 |         new_ts.reverse()
178 | 
179 |     return new_ts
180 | 


--------------------------------------------------------------------------------
/src/thymus/point.py:
--------------------------------------------------------------------------------
  1 | # thymus/point.py
  2 | """
  3 | This module implements the Point class.
  4 | """
  5 | 
  6 | 
  7 | class Point(object):
  8 |     """
  9 |     This class enables a combined look at a specific point in a
 10 |     time series. While the purpose of Thymus-Timeseries is the
 11 |     separation of dates and values, there are times when having
 12 |     both together can be very useful.
 13 | 
 14 |     Usage:
 15 |     point = Point(ts, row)
 16 | 
 17 |     This produces an object:
 18 | 
 19 |     point.ts      a reference to the parent time series
 20 |     point.date    ordinal/timestamp
 21 |     point.values  an array of values
 22 | 
 23 |     If columns are being used for the object, properties
 24 |     are created for each column / value pair.
 25 | 
 26 |     For example:
 27 | 
 28 |     ts.columns = ["dog", "cat", "squirrel"]}
 29 | 
 30 |     Properties will have
 31 |         point.dog
 32 |         point.cat
 33 |         point.squirrel
 34 | 
 35 |     as well as values, as referenced above.
 36 | 
 37 |     Setting point.dog to a new value will update the column in
 38 |     ts.tseries[row].
 39 | 
 40 |     print(point)
 41 |     <Point: row_no: 3, date: 2020-01-04,
 42 |       dog: 0.8709958385754379
 43 |       cat: 0.9677731539727088
 44 |       squirrel: 1.0645504693699797 />
 45 | 
 46 |     """
 47 | 
 48 |     def __init__(self, ts, row_no):
 49 |         self.ts = ts
 50 |         self.row_no = row_no
 51 | 
 52 |         if len(self.ts.tseries.shape) == 2:
 53 |             shape = self.ts.tseries.shape
 54 |         else:
 55 |             shape = (self.ts.tseries.shape, 1)
 56 | 
 57 |         if ts.columns and len(ts.columns) == shape[1]:
 58 |             for idx, column in enumerate(ts.columns):
 59 | 
 60 |                 def get_value(self, idx=idx):
 61 |                     return self.values[idx]
 62 | 
 63 |                 get_value.__doc__ = f"Gets value from column {idx}."
 64 | 
 65 |                 def set_value(self, value, idx=idx):
 66 |                     self.values[idx] = value
 67 | 
 68 |                 set_value.__doc__ = f"Sets value in column {idx}."
 69 | 
 70 |                 setattr(self.__class__, column, property(get_value, set_value))
 71 | 
 72 |     @property
 73 |     def values(self):
 74 |         """
 75 |         The values found at ts.tseries[row_no].
 76 | 
 77 |         These values cannot be changed directly. However, that is
 78 |         possible using the generated column variables.
 79 |         """
 80 |         return self.ts.tseries[self.row_no]
 81 | 
 82 |     @property
 83 |     def date(self):
 84 |         """The date found at ts.dseries[row_no]."""
 85 |         return self.ts.dseries[self.row_no]
 86 | 
 87 |     def __repr__(self, line_break=60):
 88 |         """
 89 |         If the line of column/values will be over 60 characters, the
 90 |         output will be vertical for easier viewing.
 91 |         """
 92 |         if len(self.ts.tseries.shape) == 1:
 93 |             # wrong shape
 94 |             if self.ts.columns:
 95 |                 values = f"{self.ts.columns[0]}: {self.values}"
 96 |             else:
 97 |                 values = self.values
 98 | 
 99 |         elif self.ts.columns:
100 |             values = ", ".join(
101 |                 [
102 |                     f"{column}: {value}"
103 |                     for column, value in zip(self.ts.columns, self.values)
104 |                 ]
105 |             )
106 |             if len(values) > line_break:
107 |                 values = "\n  " + values.replace(", ", "\n  ")
108 |         else:
109 |             values = self.values
110 | 
111 |         sdate = self.date_str()
112 |         name = self.__class__.__name__
113 |         row_no = self.row_no
114 |         return f"<{name}: row_no: {row_no}, date: {sdate}, {values} />"
115 | 
116 |     def date_str(self):
117 |         """
118 |         Returns the date in string format.
119 |         """
120 |         return self.ts.fmt_date(self.date, self.ts.get_date_series_type())
121 | 
122 |     def datetime(self):
123 |         """
124 |         Returns the date as a date/datetime object.
125 |         """
126 |         return self.ts.get_datetime(self.date)
127 | 
128 |     def to_dict(self, dt_fmt=None):
129 |         """
130 |         This function returns a dict of the point variables.
131 | 
132 |         Usage:
133 |             to_dict(dt_fmt=None)
134 | 
135 |         Parameters:
136 |             dt_fmt: (None|str) : Format choice is "str" or "datetime"
137 | 
138 |         Returns:
139 |             point (dict)
140 |         """
141 | 
142 |         pdict = {"row_no": self.row_no}
143 | 
144 |         if dt_fmt is None:
145 |             pdict["date"] = self.date
146 |         elif dt_fmt == "str":
147 |             pdict["date"] = self.date_str()
148 |         elif dt_fmt == "datetime":
149 |             pdict["date"] = self.datetime()
150 |         else:
151 |             raise ValueError("Invalid dt_fmt: choices(None,str, datetime)")
152 |         if self.ts.columns:
153 |             tmp = dict(
154 |                 [
155 |                     [column, value]
156 |                     for column, value in zip(self.ts.columns, self.values)
157 |                 ]
158 |             )
159 | 
160 |             pdict.update(tmp)
161 |         return pdict
162 | 


--------------------------------------------------------------------------------
/src/thymus/tsproto.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module implements the class Timeseries
  3 | 
  4 | """
  5 | 
  6 | from copy import deepcopy
  7 | import numpy as np
  8 | 
  9 | from .constants import TS_ORDINAL, TS_TIMESTAMP, FREQ_DAYTYPES
 10 | from .constants import FREQ_IDAYTYPES, FREQ_D
 11 | 
 12 | 
 13 | class TsProto(object):
 14 |     """
 15 |     This class implements the low-level functions of the Timeseries class.
 16 | 
 17 |     By separating the these functions the code base can be broken into more
 18 |     managable chunks.
 19 | 
 20 |     Most of these functions are handed off the container functions in arrays.
 21 |     """
 22 | 
 23 |     def __init__(self):
 24 |         self.frequency = FREQ_D
 25 | 
 26 |         self.tseries = None
 27 |         self.dseries = None
 28 |         self.end_of_period = True
 29 | 
 30 |         self.key = ""
 31 |         self.columns = []
 32 | 
 33 |     def __iadd__(self, other):
 34 |         """ts += other"""
 35 |         return self._proc_func(new=False, func="__iadd__", other=other)
 36 | 
 37 |     def __irshift__(self, other):
 38 |         """'Return self>>=value."""
 39 |         return self._proc_func(new=False, func="__irshift__", other=other)
 40 | 
 41 |     def __iand__(self, other):
 42 |         """Return self&=value."""
 43 |         return self._proc_func(new=False, func="__iand__", other=other)
 44 | 
 45 |     def __isub__(self, other):
 46 |         """ts -= other"""
 47 |         return self._proc_func(new=False, func="__isub__", other=other)
 48 | 
 49 |     def __ifloordiv__(self, other):
 50 |         """ts /= other"""
 51 |         return self._proc_func(new=False, func="__ifloordiv__", other=other)
 52 | 
 53 |     def __ilshift__(self, other):
 54 |         """Return self<<=value."""
 55 |         return self._proc_func(new=False, func="__ilshift__", other=other)
 56 | 
 57 |     def __itruediv__(self, other):
 58 |         """ts /= other"""
 59 |         return self._proc_func(new=False, func="__itruediv__", other=other)
 60 | 
 61 |     def __imod__(self, other):
 62 |         """ts %= other"""
 63 |         return self._proc_func(new=False, func="__imod__", other=other)
 64 | 
 65 |     def __ior__(self, other):
 66 |         """Return self|=value."""
 67 |         return self._proc_func(new=False, func="__ior__", other=other)
 68 | 
 69 |     def __ixor__(self, other):
 70 |         """Return self^=value."""
 71 |         return self._proc_func(new=False, func="__ixor__", other=other)
 72 | 
 73 |     def __imul__(self, other):
 74 |         """ts *= other"""
 75 |         return self._proc_func(new=False, func="__imul__", other=other)
 76 | 
 77 |     def __ipow__(self, other):
 78 |         """ts **= other"""
 79 |         return self._proc_func(new=False, func="__ipow__", other=other)
 80 | 
 81 |     def __radd__(self, other):
 82 |         """Return other + ts."""
 83 |         return self._proc_func(new=True, func="__radd__", other=other)
 84 | 
 85 |     def __rtruediv__(self, other):
 86 |         """Return other / ts."""
 87 |         return self._proc_func(new=True, func="__rtruediv__", other=other)
 88 | 
 89 |     def __rand__(self, other):
 90 |         """Return other & ts."""
 91 |         return self._proc_func(new=True, func="__rand__", other=other)
 92 | 
 93 |     def __rxor__(self, other):
 94 |         """Return value^self."""
 95 |         return self._proc_func(new=True, func="__rxor__", other=other)
 96 | 
 97 |     def __rdivmod__(self, other):
 98 |         """Return divmod(value, self)"""
 99 |         return self._proc_func(new=True, func="__rdivmod__", other=other)
100 | 
101 |     def __rmod__(self, other):
102 |         """Return other % ts."""
103 |         return self._proc_func(new=True, func="__rmod__", other=other)
104 | 
105 |     def __rmul__(self, other):
106 |         """Return other * ts."""
107 |         return self._proc_func(new=True, func="__rmul__", other=other)
108 | 
109 |     def __rfloordiv__(self, other):
110 |         """Return value//self."""
111 |         return self._proc_func(new=True, func="__rfloordiv__", other=other)
112 | 
113 |     def __rlshift__(self, other):
114 |         """Return self<<value."""
115 |         return self._proc_func(new=True, func="__rlshift__", other=other)
116 | 
117 |     def __rsub__(self, other):
118 |         """Return other - ts."""
119 |         return self._proc_func(new=True, func="__rsub__", other=other)
120 | 
121 |     def __ror__(self, other):
122 |         """Return value|self"""
123 |         return self._proc_func(new=True, func="__ror__", other=other)
124 | 
125 |     def __rpow__(self, other):
126 |         """Return other ** ts."""
127 |         return self._proc_func(new=True, func="__rpow__", other=other)
128 | 
129 |     def __rrshift__(self, other):
130 |         """Return value>>self."""
131 |         return self._proc_func(new=True, func="__rrshift__", other=other)
132 | 
133 |     def __abs__(self):
134 |         """Return abs(ts)."""
135 |         return self._proc_func(new=True, func="__abs__", other=None)
136 | 
137 |     def __pos__(self):
138 |         """Return +ts."""
139 |         return self._proc_func(new=True, func="__pos__", other=None)
140 | 
141 |     def __neg__(self):
142 |         """Return -ts."""
143 |         return self._proc_func(new=True, func="__neg__", other=None)
144 | 
145 |     def __invert__(self):
146 |         """not ts."""
147 |         return self._proc_func(new=True, func="__invert__", other=None)
148 | 
149 |     def __add__(self, other):
150 |         """ts + other.
151 | 
152 |         Related functions:
153 |             add()
154 |         """
155 |         return self._proc_func(new=True, func="__add__", other=other)
156 | 
157 |     def __pow__(self, other):
158 |         """Return ts ** other."""
159 |         return self._proc_func(new=True, func="__pow__", other=other)
160 | 
161 |     def __and__(self, other):
162 |         """Return ts & other."""
163 |         return self._proc_func(new=True, func="__and__", other=other)
164 | 
165 |     def __divmod__(self, other):
166 |         """Return other + ts."""
167 |         return self._proc_func(new=True, func="__divmod__", other=other)
168 | 
169 |     def __eq__(self, other):
170 |         """Return other + ts."""
171 |         return self._proc_func(new=True, func="__eq__", other=other)
172 | 
173 |     def __floordiv__(self, other):
174 |         """Return other + ts."""
175 |         return self._proc_func(new=True, func="__floordiv__", other=other)
176 | 
177 |     def __ge__(self, other):
178 |         """Return other + ts."""
179 |         return self._proc_func(new=True, func="__ge__", other=other)
180 | 
181 |     def __sub__(self, other):
182 |         """Return other + ts."""
183 |         return self._proc_func(new=True, func="__sub__", other=other)
184 | 
185 |     def __truediv__(self, other):
186 |         """Return other + ts."""
187 |         return self._proc_func(new=True, func="__truediv__", other=other)
188 | 
189 |     def __gt__(self, other):
190 |         """Return other + ts."""
191 |         return self._proc_func(new=True, func="__gt__", other=other)
192 | 
193 |     def __xor__(self, other):
194 |         """Return other + ts."""
195 |         return self._proc_func(new=True, func="__xor__", other=other)
196 | 
197 |     def __le__(self, other):
198 |         """Return other + ts."""
199 |         return self._proc_func(new=True, func="__le__", other=other)
200 | 
201 |     def __rshift__(self, other):
202 |         """Return self>>value."""
203 |         return self._proc_func(new=True, func="__rshift__", other=other)
204 | 
205 |     def __lshift__(self, other):
206 |         """Return other + ts."""
207 |         return self._proc_func(new=True, func="__lshift__", other=other)
208 | 
209 |     def __lt__(self, other):
210 |         """Return other + ts."""
211 |         return self._proc_func(new=True, func="__lt__", other=other)
212 | 
213 |     def __mod__(self, other):
214 |         """Return other + ts."""
215 |         return self._proc_func(new=True, func="__mod__", other=other)
216 | 
217 |     def __mul__(self, other):
218 |         """Return other + ts."""
219 |         return self._proc_func(new=True, func="__mul__", other=other)
220 | 
221 |     def __ne__(self, other):
222 |         """Return other + ts."""
223 |         return self._proc_func(new=True, func="__ne__", other=other)
224 | 
225 |     def __or__(self, other):
226 |         """Return other + ts."""
227 |         return self._proc_func(new=True, func="__or__", other=other)
228 | 
229 |     def _proc_func(self, new, func, other=None):
230 |         """Processes container function.
231 | 
232 |         Reorganize this.
233 |         """
234 | 
235 |         if new:
236 |             tmp_ts = self.clone()
237 | 
238 |             if isinstance(other, TsProto):
239 |                 self._column_checks(tmp_ts, other)
240 |                 tmp_ts, other = self.common_length(tmp_ts, other)
241 | 
242 |             if other is None:
243 |                 # there may be others that should be in this list.
244 |                 #   at the moment, fixing a problem.
245 |                 if func in ("__eq__", "__ne__"):
246 |                     tmp_ts.tseries = getattr(tmp_ts.tseries, func)(other)
247 |                 else:
248 |                     tmp_ts.tseries = getattr(tmp_ts.tseries, func)()
249 |             else:
250 |                 if isinstance(other, TsProto):
251 |                     tmp_ts.tseries = getattr(tmp_ts.tseries, func)(
252 |                         other.tseries
253 |                     )
254 |                 else:
255 |                     tmp_ts.tseries = getattr(tmp_ts.tseries, func)(other)
256 | 
257 |             return tmp_ts
258 |         else:
259 |             if other is None:
260 |                 self.tseries = (getattr(self.tseries, func)(),)
261 |                 return self
262 |             else:
263 |                 if isinstance(other, TsProto):
264 |                     self._column_checks(self, other)
265 |                     self, other = self.common_length(self, other)
266 |                     self.tseries = getattr(self.tseries, func)(other.tseries)
267 |                 else:
268 |                     self.tseries = getattr(self.tseries, func)(other)
269 |                 return self
270 | 
271 |     @staticmethod
272 |     def _column_checks(ts1, ts2):
273 |         """
274 |         This function raises an error if the columns on the two timeseries
275 |         do not match.
276 |         """
277 |         status = False
278 |         if len(ts1.tseries.shape) == 2 and len(ts2.tseries.shape) != 2:
279 |             status = True
280 |         elif len(ts1.tseries.shape) != 2 and len(ts2.tseries.shape) == 2:
281 |             status = True
282 | 
283 |         elif len(ts1.tseries.shape) == 2 and len(ts2.tseries.shape) == 2:
284 |             if ts1.tseries.shape[1] != ts2.tseries.shape[1]:
285 |                 status = True
286 | 
287 |         else:
288 |             pass
289 | 
290 |         if status:
291 |             raise ValueError(
292 |                 " ".join(
293 |                     ["Both timeseries must have the same columns:", "%s vs %s"]
294 |                 )
295 |                 % (ts1.tseries.shape, ts2.tseries.shape)
296 |             )
297 | 
298 |     @staticmethod
299 |     def _array_size_check(ts1, ts2):
300 |         """Raise error if timeseries' shapes do not match."""
301 |         if np.array_equal(ts1, ts2):
302 |             return True
303 |         else:
304 |             raise ValueError(
305 |                 " ".join(
306 |                     ["Sizes of the timeseries are different:", "%s vs %s"]
307 |                 )
308 |                 % (ts1.tseries.shape, ts2.tseries.shape)
309 |             )
310 | 
311 |     def make_arrays(self):
312 |         """
313 |         Convert the date and time series lists (if so) to numpy arrays
314 |         """
315 |         self.tseries = self._make_array(self.tseries, np.float64)
316 | 
317 |         if self.get_date_series_type() == TS_ORDINAL:
318 |             self.dseries = self._make_array(self.dseries, np.int32).flatten()
319 |         else:
320 |             self.dseries = self._make_array(self.dseries, np.float64).flatten()
321 | 
322 |     @staticmethod
323 |     def _make_array(convert_list, numtype):
324 |         """
325 |         Converts a list to numpy array
326 |         """
327 |         return np.array(convert_list, numtype)
328 | 
329 |     def lengths(self):
330 |         """
331 |         This function returns the lengths of both the date series and time
332 |         series. Both numbers are included in case a mismatch has occurred.
333 |         """
334 |         return (len(self.dseries), len(self.tseries))
335 | 
336 |     def get_date_series_type(self):
337 |         """
338 |         This function returns the date series type associated with the
339 |         timeseries.  The choices are TS_ORDINAL or TS_TIMESTAMP.
340 | 
341 |         """
342 | 
343 |         if self.frequency in FREQ_DAYTYPES:
344 |             return TS_ORDINAL
345 |         elif self.frequency in FREQ_IDAYTYPES:
346 |             return TS_TIMESTAMP
347 |         else:
348 |             raise ValueError("Unknown frequency: %s" % self.frequency)
349 | 
350 |     def __getitem__(self, key):
351 |         """
352 |         This function returns a timeseries where both the date and values are
353 |         sliced.
354 | 
355 |         Usage:
356 |             ts[:10]
357 | 
358 |             returns a timeseries with dseries and tseries of length 10.
359 | 
360 |         """
361 | 
362 |         if isinstance(key, tuple):
363 |             dkey = key[0]
364 |         else:
365 |             dkey = key
366 | 
367 |         tmp_ts = self.clone()
368 |         tmp_ts.dseries = tmp_ts.dseries[dkey]
369 |         tmp_ts.tseries = tmp_ts.tseries[key]
370 | 
371 |         return tmp_ts
372 | 
373 |     def clone(self):
374 |         """This function returns a copy of the timeseries."""
375 |         return deepcopy(self)
376 | 
377 |     @staticmethod
378 |     def common_length(*ts):
379 |         """
380 |         This function trims the lengths of timeseries and returns all
381 |         timeseries with the same length.
382 | 
383 |         The idea is that in order to do array operations there must be a
384 |         common length for each timeseries.
385 | 
386 |         Reflecting the bias for using timeseries sorted from latest info to
387 |         earlier info, truncation takes place at the end of the array. That
388 |         way older less important values are removed if necessary.
389 | 
390 |         This function does not alter the timeseries passed in. The list of
391 |         timeseries returned are clones of the originals.
392 | 
393 |         Changed:
394 | 
395 |         Usage:
396 |             [ts1, ts2, ..., ts_n] = self.common_length(*ts)
397 | 
398 |         Formerly:
399 |             ts1_new, ts2_new = self.common_length(ts1, ts2)
400 | 
401 | 
402 |         """
403 | 
404 |         min_length = min([len(ts_tmp.tseries) for ts_tmp in ts])
405 | 
406 |         return [ts_tmp[:min_length] for ts_tmp in ts]
407 | 
408 |     def shape(self):
409 |         """
410 |         This function return the shape of the timeseries. This is a shortcut
411 |         to putting in ts.tseries.shape.
412 |         """
413 | 
414 |         if self.tseries is None:
415 |             return None
416 |         elif isinstance(self.tseries, list):
417 |             return np.array(self.tseries).shape
418 |         else:
419 |             return self.tseries.shape
420 | 
421 |     def if_dseries_match(self, ts):
422 |         """
423 |         This function returns True if the date series are the same.
424 |         """
425 | 
426 |         return np.array_equal(self.dseries, ts.dseries)
427 | 
428 |     def if_tseries_match(self, ts):
429 |         """
430 |         This function returns True if the time series are the same.
431 |         """
432 | 
433 |         return np.array_equal(self.tseries, ts.tseries)
434 | 


--------------------------------------------------------------------------------
/src/thymus/tssdict.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module implements a class derived from dicts for working with timeseries.
  3 | 
  4 | """
  5 | 
  6 | from copy import deepcopy
  7 | import json
  8 | 
  9 | from .tsslist import TssList
 10 | from .timeseries import Timeseries
 11 | 
 12 | 
 13 | class TssDict(dict):
 14 |     """
 15 |     This class is a way of handling some of the routine tasks for groups
 16 |     of timeseries.
 17 | 
 18 |     Assumption:
 19 |         This is a dict of timeseries that are keyed by tickers, etc. Or, it
 20 |         could be a dict of keys that hold lists of timeseries with some
 21 |         commonality.
 22 | 
 23 |     Usage:
 24 |         tssdict = TssDict(values=None)
 25 | 
 26 |     values can be a dict, list, using the key from each timeseries as the
 27 |     dict key.
 28 | 
 29 | 
 30 |     """
 31 | 
 32 |     timeseries_class = Timeseries
 33 | 
 34 |     def __init__(self, values=None, split=None):
 35 |         dict.__init__(self)  # only did this to satisfy pylint
 36 | 
 37 |         if isinstance(values, dict):
 38 |             for key, values in values.items():
 39 |                 self[key] = values
 40 |         elif isinstance(values, list):
 41 |             for i, ts_tmp in enumerate(values):
 42 |                 ts_tmp = values[i]
 43 |                 self[ts_tmp.key] = ts_tmp
 44 |         else:
 45 |             # nothing to do.
 46 |             pass
 47 | 
 48 |         if split:
 49 |             if isinstance(split, Timeseries):
 50 |                 for key, values in self.split_timeseries(split):
 51 |                     self[key] = values
 52 | 
 53 |     @staticmethod
 54 |     def split_timeseries(ts):
 55 |         """
 56 |         Splits up a timeseries so that each column is a separate timeseries
 57 |         within a tssdict.
 58 | 
 59 |         The only caveat is that there must be a column in ts.columns for each
 60 |         column in the timeseries. Since that is discretionary, it must be
 61 |         checked.
 62 |         """
 63 |         error = "The number of column names must match tseries.shape[1]."
 64 |         if ts.columns is None:
 65 |             raise ValueError(error)
 66 |         if len(ts.columns) != ts.tseries.shape[1]:
 67 |             raise ValueError(error)
 68 | 
 69 |         tmp_list = []
 70 |         for col in range(len(ts.columns)):
 71 |             tmp_ts = Timeseries()
 72 |             tmp_ts.dseries = ts.dseries
 73 |             tmp_ts.tseries = ts.tseries[:, col]
 74 |             tmp_ts.columns = [ts.columns[col]]
 75 | 
 76 |             tmp_list.append((ts.columns[col], tmp_ts))
 77 | 
 78 |         return tmp_list
 79 | 
 80 |     def min_date(self):
 81 |         """
 82 |         Returns the earliest date as a tuple(datetime, key in the group).
 83 |         """
 84 |         min_date = None
 85 |         min_key = None
 86 | 
 87 |         for key, values in self.items():
 88 |             if isinstance(values, Timeseries):
 89 |                 date = values.start_date("datetime")
 90 |                 if min_date is not None:
 91 |                     if date < min_date:
 92 |                         min_date = date
 93 |                         min_key = key
 94 |                 else:
 95 |                     min_date = date
 96 |                     min_key = key
 97 | 
 98 |             else:
 99 |                 # what is it?
100 |                 raise ValueError("Unsupported values in dict")
101 | 
102 |         return (min_date, min_key)
103 | 
104 |     def max_date(self):
105 |         """
106 |         Returns the latest date, key in the group
107 | 
108 |         If more than one has the same max date, simply one of them is
109 |         returned.
110 |         """
111 |         max_date = None
112 |         max_key = None
113 | 
114 |         for key, values in self.items():
115 |             if isinstance(values, Timeseries):
116 |                 date = values.end_date("datetime")
117 |                 if max_date is not None:
118 |                     date = date
119 |                     if date > max_date:
120 |                         max_date = date
121 |                         max_key = key
122 |                 else:
123 |                     max_date = date
124 |                     max_key = key
125 | 
126 |             else:
127 |                 # what is it?
128 |                 raise ValueError("Unsupported values in dict")
129 | 
130 |         return (max_date, max_key)
131 | 
132 |     def longest_ts(self):
133 |         """
134 |         This function returns item with the longest timeseries.
135 | 
136 |         """
137 |         max_length = 0
138 |         max_key = None
139 | 
140 |         for key, ts in self.items():
141 |             if isinstance(ts, Timeseries):
142 |                 if ts.tseries is not None:
143 |                     length = ts.tseries.shape[0]
144 |                     if length > max_length:
145 |                         max_length = length
146 |                         max_key = key
147 | 
148 |             else:
149 |                 # what is it?
150 |                 raise ValueError("Unsupported values in dict")
151 | 
152 |         return (max_length, max_key)
153 | 
154 |     def shortest_ts(self):
155 |         """
156 |         This function returns item with the shortest timeseries.
157 | 
158 |         """
159 |         min_length = None
160 |         min_key = None
161 | 
162 |         for key, ts in self.items():
163 |             if isinstance(ts, Timeseries):
164 |                 if ts.tseries is None:
165 |                     return None
166 | 
167 |                 length = ts.tseries.shape[0]
168 |                 if min_length is None:
169 |                     min_length = length
170 |                 else:
171 |                     if length < min_length:
172 |                         min_length = length
173 |                         min_key = key
174 | 
175 |             else:
176 |                 # what is it?
177 |                 raise ValueError("Unsupported values in dict")
178 | 
179 |         return (min_length, min_key)
180 | 
181 |     def get_values(self, date, keys=None, notify=False):
182 |         """
183 |         This function finds the values as the date. If keys come in as a list
184 |         the order of the values can be controlled or to limit the timeseries
185 |         selected.
186 | 
187 |         The point of notify is to not fail gracefully if necessary.
188 |         """
189 | 
190 |         if keys is None:
191 |             keys = self.keys()
192 | 
193 |         all_values = []
194 | 
195 |         for key in keys:
196 |             tmp = self[key]
197 | 
198 |             if isinstance(tmp, Timeseries):
199 |                 try:
200 |                     all_values.append(tmp.tseries[tmp.row_no(date)])
201 |                 except ValueError:
202 |                     if notify:
203 |                         raise ValueError(
204 |                             "ts %s does not have a value on %s" % (key, date)
205 |                         )
206 |                     else:
207 |                         all_values.append(None)
208 | 
209 |             else:
210 |                 raise ValueError("Unsupported values in dict")
211 | 
212 |         return (tuple(all_values), tuple(keys))
213 | 
214 |     def combine(self, keys=None, discard=True, pad=None):
215 |         """
216 |         This function combines all timeseries into one. The point of keys is
217 |         that you have the ability to control the order of the columns.
218 | 
219 |         Usage:
220 |             combine(self, keys=None, pad=None)
221 | 
222 |         returns ts, keys
223 | 
224 |         """
225 | 
226 |         def iter_combine(ts1, item, discard=discard, pad=pad):
227 |             """This function combines an item with an existing timeseries."""
228 |             if isinstance(item, TssList):
229 |                 if ts1 is None:
230 |                     ts1 = item.combine(discard=discard, pad=pad)
231 |                 else:
232 |                     ts1.combine(item, discard=discard, pad=pad)
233 | 
234 |             elif isinstance(item, list):
235 |                 if ts1 is None:
236 |                     ts1 = TssList(item).combine(discard=discard, pad=pad)
237 |                 else:
238 |                     ts1.combine(item, discard=discard, pad=pad)
239 | 
240 |             elif isinstance(item, Timeseries):
241 |                 if ts1 is None:
242 |                     ts1 = item.clone()
243 |                 else:
244 |                     ts1 = ts1.combine(item, discard=discard, pad=pad)
245 | 
246 |             elif isinstance(item, TssDict):
247 |                 if ts1 is None:
248 |                     ts1, _ = item.combine(discard=discard, pad=pad)
249 |                 else:
250 |                     ts1.combine(
251 |                         item.combine(discard=discard, pad=pad),
252 |                         discard=discard,
253 |                         pad=pad,
254 |                     )
255 | 
256 |             else:
257 |                 raise ValueError("Unsupported type in for \n%s" % (item))
258 | 
259 |             return ts1
260 | 
261 |         if keys is None:
262 |             keys = self.keys()
263 | 
264 |         if len(keys) == 0:
265 |             return None
266 | 
267 |         ts1 = None
268 |         for key in keys:
269 |             ts1 = iter_combine(ts1, self[key], discard=discard, pad=pad)
270 | 
271 |         return ts1, tuple(keys)
272 | 
273 |     def clone(self):
274 |         """
275 |         Returns a new copy of the object.
276 |         """
277 | 
278 |         return deepcopy(self)
279 | 
280 |     def to_dict(self, dt_fmt="str", data_list=True):
281 |         """
282 |         This function outputs the entirety of the object as a dict with
283 |         the timeseries components as a dict as well.
284 | 
285 |         This enables building JSON formatted files from objects that include
286 |         TssDict objects.
287 | 
288 |         Usage:
289 |             self.to_dict(dt_fmt='str', data_list=True)
290 |         """
291 |         outdict = {}
292 | 
293 |         for key, ts in self.items():
294 |             outdict[key] = ts.to_dict(dt_fmt=dt_fmt, data_list=data_list)
295 | 
296 |         return outdict
297 | 
298 |     def from_dict(self, tssdict):
299 |         """
300 |         This function loads from a dict.
301 | 
302 |         The format of the dict of timeseries is assumed to use the form from
303 |         Timeseries.to_dict(dt_fmt='str')
304 |         """
305 |         self.clear()
306 | 
307 |         for key, value in tssdict.items():
308 |             self[key] = self.timeseries_class().from_dict(value)
309 | 
310 |         return self
311 | 
312 |     def to_json(self, indent=2, dt_fmt="str", data_list=True):
313 |         """
314 |         This function returns the timeseries dict in JSON format.
315 | 
316 |         Usage:
317 |             self.to_json(indent=2, dt_fmt='str', data_list=True)
318 | 
319 |         indent: indenting in the JSON output
320 |         dt_fmt: formatting of the dates. Look at help for
321 |                     Timeseries.to_dict
322 |         data_list: Whether data uses a dict for dates as keys or
323 |                    simply a list.
324 |                    Default is for a list. Otherwise, sorting the
325 |                    timeseries in the list would be required.
326 | 
327 |         """
328 |         return json.dumps(
329 |             self.to_dict(dt_fmt=dt_fmt, data_list=data_list), indent=indent
330 |         )
331 | 
332 |     def from_json(self, json_str):
333 |         """
334 |         This function loads a JSON string and applies it to the object.
335 |         """
336 |         self.clear()
337 | 
338 |         tss_tmp = json.loads(json_str)
339 | 
340 |         if isinstance(tss_tmp, dict):
341 |             for key, value in tss_tmp.items():
342 |                 self[key] = self.timeseries_class().from_dict(value)
343 | 
344 |         else:
345 |             raise ValueError(
346 |                 "Incoming JSON string does not start with a dict."
347 |             )
348 | 
349 |         return self
350 | 


--------------------------------------------------------------------------------
/src/thymus/tsslist.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module implements a class derived from lists for working with timeseries.
  3 | 
  4 | """
  5 | 
  6 | from copy import deepcopy
  7 | import json
  8 | 
  9 | from .timeseries import Timeseries
 10 | 
 11 | 
 12 | class TssList(list):
 13 |     """
 14 |     This class is a way of handling some the routine tasks for groups
 15 |     of timeseries.
 16 | 
 17 |     Assumption:
 18 |         This is a list of timeseries of common interest
 19 | 
 20 |     Usage:
 21 |         tss = TssList(tss=None)  # where tss is a list of timeseries
 22 | 
 23 | 
 24 |     """
 25 | 
 26 |     timeseries_class = Timeseries
 27 | 
 28 |     def __init__(self, tss=None):
 29 |         list.__init__(self)  # only did this to satisfy pylint
 30 |         if tss is None:
 31 |             pass
 32 |         elif isinstance(tss, list):
 33 |             self.extend(tss)
 34 |         elif isinstance(tss, tuple):
 35 |             self.extend(tss)
 36 |         else:
 37 |             raise ValueError("Initialization requires either a list or None")
 38 | 
 39 |     def min_date(self):
 40 |         """
 41 |         Returns the earliest date as a datetime.
 42 | 
 43 |         By using a datetime, there is no need to distinguish whether time
 44 |         series are ordinal or timestamp or mixed.
 45 | 
 46 |         """
 47 | 
 48 |         dates = [
 49 |             ts.start_date("datetime") for ts in self if ts.dseries is not None
 50 |         ]
 51 | 
 52 |         if dates:
 53 |             return min(dates)
 54 |         else:
 55 |             return None
 56 | 
 57 |     def max_date(self):
 58 |         """
 59 |         Returns the latest date
 60 | 
 61 |         """
 62 | 
 63 |         dates = [
 64 |             ts.end_date("datetime") for ts in self if ts.dseries is not None
 65 |         ]
 66 | 
 67 |         if dates:
 68 |             return max(dates)
 69 |         else:
 70 |             return None
 71 | 
 72 |     def get_values(self, date, notify=False):
 73 |         """
 74 |         This function finds the values current to the date for the tickers.
 75 | 
 76 |         The point of notify is not fail gracefully if necessary.
 77 |         """
 78 | 
 79 |         values = []
 80 |         for idx, ts_tmp in enumerate(self):
 81 |             try:
 82 |                 values.append(ts_tmp.tseries[ts_tmp.row_no(rowdate=date)])
 83 |             except ValueError:
 84 |                 if notify:
 85 |                     raise ValueError(
 86 |                         "ts %s does not have a value on %s" % (idx, date)
 87 |                     )
 88 |                 else:
 89 |                     values.append(None)
 90 | 
 91 |         return tuple(values)
 92 | 
 93 |     def combine(self, discard=True, pad=None):
 94 |         """
 95 |         This function combines all timeseries into one by applying the combine
 96 |         function to each timeseries.
 97 | 
 98 |         Usage:
 99 |             combine(self, discard=True, pad=None)
100 | 
101 |         returns ts
102 | 
103 |         """
104 | 
105 |         if len(self) > 1:
106 |             return self[0].combine(self[1:], discard=discard, pad=pad)
107 |         else:
108 |             return self[0].clone()
109 | 
110 |     def clone(self):
111 |         """
112 |         Returns a new copy of the object.
113 |         """
114 | 
115 |         return deepcopy(self)
116 | 
117 |     def as_dict(self):
118 |         """
119 |         This function returns a dict with keys being the timeseries key.
120 | 
121 |         If keys are missing, an error is raised.
122 |         """
123 | 
124 |         for ts_tmp in self:
125 |             if ts_tmp.key == "" or ts_tmp.key is None:
126 |                 raise ValueError("There must be a key for each timeseries.")
127 | 
128 |         return dict([(ts_tmp.key, ts_tmp) for ts_tmp in self])
129 | 
130 |     def to_list(self, dt_fmt="str", data_list=True):
131 |         """
132 |         This function builds a list of timeseries that are in the form of
133 |         dict objects for use when build JSON formatted file.
134 |         """
135 | 
136 |         outlist = []
137 | 
138 |         for ts in self:
139 |             outlist.append(ts.to_dict(dt_fmt=dt_fmt, data_list=data_list))
140 | 
141 |         return outlist
142 | 
143 |     def to_json(self, indent=2, dt_fmt="str", data_list=True):
144 |         """
145 |         This function returns the timeseries list in JSON format.
146 | 
147 |         Usage:
148 |             self.to_json(indent=2, dt_fmt='str', data_list=True)
149 | 
150 |         indent: indenting in the JSON output
151 |         dt_fmt: formatting of the dates. Look at help for
152 |                     Timeseries.to_dict
153 |         data_list: Whether data uses a dict for dates as keys or
154 |                    simply a list.
155 |                    Default is for a list. Otherwise, sorting the
156 |                    timeseries in the list would be required.
157 | 
158 |         """
159 |         return json.dumps(
160 |             self.to_list(dt_fmt=dt_fmt, data_list=data_list), indent=indent
161 |         )
162 | 
163 |     def from_dict(self, tsslist):
164 |         """
165 |         This function loads a list of dicts that will be converted to
166 |         time series objects.
167 |         """
168 |         self.clear()
169 | 
170 |         for item in tsslist:
171 |             self.append(self.timeseries_class().from_dict(item))
172 | 
173 |         return self
174 | 
175 |     def from_json(self, json_str):
176 |         """
177 |         This function loads a JSON string and applies it to the object.
178 |         """
179 |         self.clear()
180 | 
181 |         tss_tmp = json.loads(json_str)
182 | 
183 |         for item in tss_tmp:
184 |             self.append(self.timeseries_class().from_dict(item))
185 | 
186 |         return self
187 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidorof/Thymus-timeseries/3a5ef17084fe3f0970a7ff2163dc372148d62ee5/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_freq_conversions.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module tests the conversions from one frequency to another.
  3 | 
  4 | """
  5 | 
  6 | from datetime import datetime, timedelta
  7 | import numpy as np
  8 | 
  9 | import unittest
 10 | 
 11 | from thymus.timeseries import Timeseries
 12 | from thymus.constants import FREQ_D, FREQ_W, FREQ_M, FREQ_Q, FREQ_Y
 13 | from thymus.constants import FREQ_H, FREQ_MIN
 14 | 
 15 | from thymus.freq_conversions import convert
 16 | 
 17 | 
 18 | class TestFreqConversions(unittest.TestCase):
 19 |     """
 20 |     This class tests conversions of timeseries.
 21 |     """
 22 | 
 23 |     def setUp(self):
 24 |         # sample timeseries
 25 |         self.ts_ord = Timeseries()
 26 |         start_date = datetime(2015, 12, 31)
 27 | 
 28 |         # sloppily ends slightly more than two years
 29 |         end_date = datetime(2018, 1, 15)
 30 | 
 31 |         # set up two years of data with weekends skipped
 32 |         date = start_date
 33 |         self.ts_ord.dseries = []
 34 |         while date <= end_date:
 35 |             if date.weekday() not in [5, 6]:
 36 |                 self.ts_ord.dseries.append(date.toordinal())
 37 |             date += timedelta(days=1)
 38 | 
 39 |         self.ts_ord.tseries = np.arange(len(self.ts_ord.dseries))
 40 |         self.ts_ord.make_arrays()
 41 | 
 42 |         # timestamp based timeseries
 43 |         self.ts_seconds = Timeseries(frequency="sec")
 44 |         start_date = datetime(2016, 1, 1, 0, 0)
 45 |         end_date = datetime(2016, 1, 4, 0, 0)
 46 | 
 47 |         length = (end_date - start_date).total_seconds()
 48 | 
 49 |         self.ts_seconds.dseries = start_date.timestamp() + np.arange(length)
 50 |         self.ts_seconds.tseries = np.arange(length)
 51 |         self.ts_seconds.make_arrays()
 52 | 
 53 |     def test_convweekly_period_start(self):
 54 |         """Test timeseries conversion to weekly with start-of-period data."""
 55 |         ts = self.ts_ord.clone()
 56 |         ts.end_of_period = False
 57 | 
 58 |         # conv_weekly with defaults
 59 |         ts1 = convert(ts, new_freq=FREQ_W)
 60 | 
 61 |         self.assertEqual(ts1.frequency, FREQ_W)
 62 | 
 63 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 4).toordinal())
 64 |         self.assertEqual(ts1.dseries[1], datetime(2016, 1, 11).toordinal())
 65 |         self.assertEqual(ts1.dseries[2], datetime(2016, 1, 18).toordinal())
 66 |         self.assertEqual(ts1.dseries[3], datetime(2016, 1, 25).toordinal())
 67 |         self.assertEqual(ts1.dseries[4], datetime(2016, 2, 1).toordinal())
 68 |         self.assertEqual(ts1.dseries[5], datetime(2016, 2, 8).toordinal())
 69 | 
 70 |         # ending values
 71 |         self.assertEqual(ts1.dseries[-2], datetime(2018, 1, 8).toordinal())
 72 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
 73 | 
 74 |         # conv_weekly with include_partial=True
 75 |         ts1 = convert(ts, new_freq=FREQ_W, include_partial=True)
 76 | 
 77 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 4).toordinal())
 78 |         self.assertEqual(ts1.dseries[1], datetime(2016, 1, 11).toordinal())
 79 |         self.assertEqual(ts1.dseries[2], datetime(2016, 1, 18).toordinal())
 80 |         self.assertEqual(ts1.dseries[3], datetime(2016, 1, 25).toordinal())
 81 |         self.assertEqual(ts1.dseries[4], datetime(2016, 2, 1).toordinal())
 82 |         self.assertEqual(ts1.dseries[5], datetime(2016, 2, 8).toordinal())
 83 | 
 84 |         # ending values
 85 |         self.assertEqual(ts1.dseries[-2], datetime(2018, 1, 8).toordinal())
 86 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
 87 | 
 88 |         ts1 = convert(ts, new_freq=FREQ_W, include_partial=False)
 89 | 
 90 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 4).toordinal())
 91 |         self.assertEqual(ts1.dseries[1], datetime(2016, 1, 11).toordinal())
 92 |         self.assertEqual(ts1.dseries[2], datetime(2016, 1, 18).toordinal())
 93 |         self.assertEqual(ts1.dseries[3], datetime(2016, 1, 25).toordinal())
 94 |         self.assertEqual(ts1.dseries[4], datetime(2016, 2, 1).toordinal())
 95 |         self.assertEqual(ts1.dseries[5], datetime(2016, 2, 8).toordinal())
 96 | 
 97 |         # ending values
 98 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
 99 | 
100 |         ts1 = convert(ts, new_freq=FREQ_W, weekday=2)
101 | 
102 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 7).toordinal())
103 |         self.assertEqual(ts1.dseries[1], datetime(2016, 1, 14).toordinal())
104 |         self.assertEqual(ts1.dseries[2], datetime(2016, 1, 21).toordinal())
105 |         self.assertEqual(ts1.dseries[3], datetime(2016, 1, 28).toordinal())
106 |         self.assertEqual(ts1.dseries[4], datetime(2016, 2, 4).toordinal())
107 |         self.assertEqual(ts1.dseries[5], datetime(2016, 2, 11).toordinal())
108 | 
109 |         # ending values
110 |         self.assertEqual(ts1.dseries[-2], datetime(2018, 1, 11).toordinal())
111 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
112 | 
113 |         # test lower frequency data
114 |         self.assertRaises(
115 |             ValueError,
116 |             convert,
117 |             convert(self.ts_ord, new_freq=FREQ_Y),
118 |             new_freq=FREQ_W,
119 |         )
120 | 
121 |     def test_convweekly_period_end(self):
122 |         """
123 |         Test timeseries conversion to weekly with end-of-period data.
124 |         """
125 |         ts = self.ts_ord.clone()
126 | 
127 |         # conv_weekly with defaults
128 |         ts1 = convert(ts, new_freq=FREQ_W)
129 | 
130 |         self.assertEqual(ts1.frequency, FREQ_W)
131 | 
132 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 1).toordinal())
133 |         self.assertEqual(ts1.dseries[1], datetime(2016, 1, 8).toordinal())
134 |         self.assertEqual(ts1.dseries[2], datetime(2016, 1, 15).toordinal())
135 |         self.assertEqual(ts1.dseries[3], datetime(2016, 1, 22).toordinal())
136 |         self.assertEqual(ts1.dseries[4], datetime(2016, 1, 29).toordinal())
137 |         self.assertEqual(ts1.dseries[5], datetime(2016, 2, 5).toordinal())
138 | 
139 |         # ending values
140 |         self.assertEqual(ts1.dseries[-2], datetime(2018, 1, 12).toordinal())
141 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
142 | 
143 |         # conv_weekly with include_partial=True
144 |         ts1 = convert(ts, new_freq=FREQ_W, include_partial=True)
145 | 
146 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 1).toordinal())
147 |         self.assertEqual(ts1.dseries[1], datetime(2016, 1, 8).toordinal())
148 |         self.assertEqual(ts1.dseries[2], datetime(2016, 1, 15).toordinal())
149 |         self.assertEqual(ts1.dseries[3], datetime(2016, 1, 22).toordinal())
150 |         self.assertEqual(ts1.dseries[4], datetime(2016, 1, 29).toordinal())
151 |         self.assertEqual(ts1.dseries[5], datetime(2016, 2, 5).toordinal())
152 | 
153 |         # ending values
154 |         self.assertEqual(ts1.dseries[-2], datetime(2018, 1, 12).toordinal())
155 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
156 | 
157 |         # conv_weekly with include_partial=False
158 |         ts1 = convert(ts, new_freq=FREQ_W, include_partial=False)
159 | 
160 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 1).toordinal())
161 |         self.assertEqual(ts1.dseries[1], datetime(2016, 1, 8).toordinal())
162 |         self.assertEqual(ts1.dseries[2], datetime(2016, 1, 15).toordinal())
163 |         self.assertEqual(ts1.dseries[3], datetime(2016, 1, 22).toordinal())
164 |         self.assertEqual(ts1.dseries[4], datetime(2016, 1, 29).toordinal())
165 |         self.assertEqual(ts1.dseries[5], datetime(2016, 2, 5).toordinal())
166 | 
167 |         # ending values
168 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 12).toordinal())
169 | 
170 |         # conv_weekly with weekday=2
171 |         ts1 = convert(ts, new_freq=FREQ_W, weekday=2)
172 | 
173 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 6).toordinal())
174 |         self.assertEqual(ts1.dseries[1], datetime(2016, 1, 13).toordinal())
175 |         self.assertEqual(ts1.dseries[2], datetime(2016, 1, 20).toordinal())
176 |         self.assertEqual(ts1.dseries[3], datetime(2016, 1, 27).toordinal())
177 |         self.assertEqual(ts1.dseries[4], datetime(2016, 2, 3).toordinal())
178 |         self.assertEqual(ts1.dseries[5], datetime(2016, 2, 10).toordinal())
179 | 
180 |         # ending values
181 |         self.assertEqual(ts1.dseries[-2], datetime(2018, 1, 10).toordinal())
182 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
183 | 
184 |         # test lower frequency data
185 |         self.assertRaises(
186 |             ValueError,
187 |             convert,
188 |             convert(self.ts_ord, new_freq=FREQ_Y),
189 |             new_freq=FREQ_W,
190 |         )
191 | 
192 |     def test_convmonthly_period_start(self):
193 |         """
194 |         This function tests converting timeseries to monthly data that have
195 |         starting period data.
196 | 
197 |         """
198 | 
199 |         # daily_monthly(ts, new_freq):
200 |         ts = self.ts_ord.clone()
201 |         ts.end_of_period = False
202 | 
203 |         # daily_monthly with defaults
204 |         ts1 = convert(ts, new_freq=FREQ_M)
205 | 
206 |         self.assertEqual(ts1.frequency, FREQ_M)
207 | 
208 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 1).toordinal())
209 |         self.assertEqual(ts1.dseries[1], datetime(2016, 2, 1).toordinal())
210 |         self.assertEqual(ts1.dseries[2], datetime(2016, 3, 1).toordinal())
211 |         self.assertEqual(ts1.dseries[3], datetime(2016, 4, 1).toordinal())
212 |         self.assertEqual(ts1.dseries[4], datetime(2016, 5, 2).toordinal())
213 |         self.assertEqual(ts1.dseries[5], datetime(2016, 6, 1).toordinal())
214 |         self.assertEqual(ts1.dseries[6], datetime(2016, 7, 1).toordinal())
215 | 
216 |         # ending values
217 |         self.assertEqual(ts1.dseries[-2], datetime(2018, 1, 1).toordinal())
218 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
219 | 
220 |         # conv_monthly with include_partial=True
221 |         ts1 = convert(ts, new_freq=FREQ_M, include_partial=True)
222 | 
223 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 1).toordinal())
224 |         self.assertEqual(ts1.dseries[1], datetime(2016, 2, 1).toordinal())
225 |         self.assertEqual(ts1.dseries[2], datetime(2016, 3, 1).toordinal())
226 |         self.assertEqual(ts1.dseries[3], datetime(2016, 4, 1).toordinal())
227 |         self.assertEqual(ts1.dseries[4], datetime(2016, 5, 2).toordinal())
228 |         self.assertEqual(ts1.dseries[5], datetime(2016, 6, 1).toordinal())
229 |         self.assertEqual(ts1.dseries[6], datetime(2016, 7, 1).toordinal())
230 | 
231 |         # ending values
232 |         self.assertEqual(ts1.dseries[-2], datetime(2018, 1, 1).toordinal())
233 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
234 | 
235 |         # conv_monthly with include_partial=False
236 |         ts1 = convert(ts, new_freq=FREQ_M, include_partial=False)
237 | 
238 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 1).toordinal())
239 |         self.assertEqual(ts1.dseries[1], datetime(2016, 2, 1).toordinal())
240 |         self.assertEqual(ts1.dseries[2], datetime(2016, 3, 1).toordinal())
241 |         self.assertEqual(ts1.dseries[3], datetime(2016, 4, 1).toordinal())
242 |         self.assertEqual(ts1.dseries[4], datetime(2016, 5, 2).toordinal())
243 |         self.assertEqual(ts1.dseries[5], datetime(2016, 6, 1).toordinal())
244 |         self.assertEqual(ts1.dseries[6], datetime(2016, 7, 1).toordinal())
245 | 
246 |         # ending values
247 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 1).toordinal())
248 | 
249 |         # test lower frequency data
250 |         self.assertRaises(
251 |             ValueError,
252 |             convert,
253 |             convert(self.ts_ord, new_freq="y"),
254 |             new_freq=FREQ_M,
255 |         )
256 | 
257 |         # timestamp conversion goes here
258 | 
259 |     def test_convmonthly_period_end(self):
260 |         """
261 |         This function tests converting timeseries to monthly data that have
262 |         ending period data.
263 | 
264 |         """
265 |         ts = self.ts_ord.clone()
266 | 
267 |         # daily_monthly with defaults
268 |         ts1 = convert(ts, new_freq=FREQ_M)
269 | 
270 |         self.assertEqual(ts1.frequency, FREQ_M)
271 | 
272 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
273 |         self.assertEqual(ts1.dseries[1], datetime(2016, 1, 29).toordinal())
274 |         self.assertEqual(ts1.dseries[2], datetime(2016, 2, 29).toordinal())
275 |         self.assertEqual(ts1.dseries[3], datetime(2016, 3, 31).toordinal())
276 |         self.assertEqual(ts1.dseries[4], datetime(2016, 4, 29).toordinal())
277 |         self.assertEqual(ts1.dseries[5], datetime(2016, 5, 31).toordinal())
278 |         self.assertEqual(ts1.dseries[6], datetime(2016, 6, 30).toordinal())
279 | 
280 |         # ending values
281 |         self.assertEqual(ts1.dseries[-2], datetime(2017, 12, 29).toordinal())
282 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
283 | 
284 |         # conv_monthly with include_partial=True
285 |         ts1 = convert(ts, new_freq=FREQ_M, include_partial=True)
286 | 
287 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
288 |         self.assertEqual(ts1.dseries[1], datetime(2016, 1, 29).toordinal())
289 |         self.assertEqual(ts1.dseries[2], datetime(2016, 2, 29).toordinal())
290 |         self.assertEqual(ts1.dseries[3], datetime(2016, 3, 31).toordinal())
291 |         self.assertEqual(ts1.dseries[4], datetime(2016, 4, 29).toordinal())
292 |         self.assertEqual(ts1.dseries[5], datetime(2016, 5, 31).toordinal())
293 |         self.assertEqual(ts1.dseries[6], datetime(2016, 6, 30).toordinal())
294 | 
295 |         # ending values
296 |         self.assertEqual(ts1.dseries[-2], datetime(2017, 12, 29).toordinal())
297 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
298 | 
299 |         # conv_monthly with include_partial=False
300 |         ts1 = convert(ts, new_freq=FREQ_M, include_partial=False)
301 | 
302 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
303 |         self.assertEqual(ts1.dseries[1], datetime(2016, 1, 29).toordinal())
304 |         self.assertEqual(ts1.dseries[2], datetime(2016, 2, 29).toordinal())
305 |         self.assertEqual(ts1.dseries[3], datetime(2016, 3, 31).toordinal())
306 |         self.assertEqual(ts1.dseries[4], datetime(2016, 4, 29).toordinal())
307 |         self.assertEqual(ts1.dseries[5], datetime(2016, 5, 31).toordinal())
308 |         self.assertEqual(ts1.dseries[6], datetime(2016, 6, 30).toordinal())
309 | 
310 |         # ending values
311 |         self.assertEqual(ts1.dseries[-1], datetime(2017, 12, 29).toordinal())
312 | 
313 |         # test lower frequency data
314 |         self.assertRaises(
315 |             ValueError,
316 |             convert,
317 |             convert(self.ts_ord, new_freq=FREQ_Y),
318 |             new_freq=FREQ_M,
319 |         )
320 | 
321 |         # timestamp conversion goes here
322 | 
323 |     def test_convquarterly_period_start(self):
324 |         """
325 |         This function tests converting timeseries to quarterly data that have
326 |         starting period data.
327 | 
328 |         """
329 | 
330 |         # conv_quarterly(ts, new_freq):
331 |         ts = self.ts_ord.clone()
332 |         ts.end_of_period = False
333 | 
334 |         # conv_quarterly with defaults
335 |         ts1 = convert(ts, new_freq=FREQ_Q)
336 | 
337 |         self.assertEqual(ts1.frequency, FREQ_Q)
338 | 
339 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 1).toordinal())
340 |         self.assertEqual(ts1.dseries[1], datetime(2016, 4, 1).toordinal())
341 |         self.assertEqual(ts1.dseries[2], datetime(2016, 7, 1).toordinal())
342 |         self.assertEqual(ts1.dseries[3], datetime(2016, 10, 3).toordinal())
343 |         self.assertEqual(ts1.dseries[4], datetime(2017, 1, 2).toordinal())
344 |         self.assertEqual(ts1.dseries[5], datetime(2017, 4, 3).toordinal())
345 |         self.assertEqual(ts1.dseries[6], datetime(2017, 7, 3).toordinal())
346 | 
347 |         # ending values
348 |         self.assertEqual(ts1.dseries[-2], datetime(2018, 1, 1).toordinal())
349 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
350 | 
351 |         # conv_quarterly with include_partial=True
352 |         ts1 = convert(ts, new_freq=FREQ_Q, include_partial=True)
353 | 
354 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 1).toordinal())
355 |         self.assertEqual(ts1.dseries[1], datetime(2016, 4, 1).toordinal())
356 |         self.assertEqual(ts1.dseries[2], datetime(2016, 7, 1).toordinal())
357 |         self.assertEqual(ts1.dseries[3], datetime(2016, 10, 3).toordinal())
358 |         self.assertEqual(ts1.dseries[4], datetime(2017, 1, 2).toordinal())
359 |         self.assertEqual(ts1.dseries[5], datetime(2017, 4, 3).toordinal())
360 |         self.assertEqual(ts1.dseries[6], datetime(2017, 7, 3).toordinal())
361 | 
362 |         # ending values
363 |         self.assertEqual(ts1.dseries[-2], datetime(2018, 1, 1).toordinal())
364 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
365 | 
366 |         # conv_quarterly with include_partial=False
367 |         ts1 = convert(ts, new_freq=FREQ_Q, include_partial=False)
368 | 
369 |         self.assertEqual(ts1.dseries[0], datetime(2016, 1, 1).toordinal())
370 |         self.assertEqual(ts1.dseries[1], datetime(2016, 4, 1).toordinal())
371 |         self.assertEqual(ts1.dseries[2], datetime(2016, 7, 1).toordinal())
372 |         self.assertEqual(ts1.dseries[3], datetime(2016, 10, 3).toordinal())
373 |         self.assertEqual(ts1.dseries[4], datetime(2017, 1, 2).toordinal())
374 |         self.assertEqual(ts1.dseries[5], datetime(2017, 4, 3).toordinal())
375 |         self.assertEqual(ts1.dseries[6], datetime(2017, 7, 3).toordinal())
376 | 
377 |         # ending values
378 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 1).toordinal())
379 | 
380 |         # unresolved design decision
381 |         # with monthly data
382 |         # ts1 = convert(
383 |         #    convert(ts, new_freq=FREQ_M, include_partial=False),
384 |         #    new_freq=FREQ_Q,
385 |         #    include_partial=False)
386 | 
387 |         # self.assertEqual(ts1.dseries[0], datetime(2016, 1, 1).toordinal())
388 |         # self.assertEqual(ts1.dseries[1], datetime(2016, 4, 1).toordinal())
389 |         # self.assertEqual(ts1.dseries[2], datetime(2016, 7, 1).toordinal())
390 |         # self.assertEqual(ts1.dseries[3], datetime(2016, 10, 3).toordinal())
391 |         # self.assertEqual(ts1.dseries[4], datetime(2017, 1, 2).toordinal())
392 |         # self.assertEqual(ts1.dseries[5], datetime(2017, 4, 3).toordinal())
393 |         # self.assertEqual(ts1.dseries[6], datetime(2017, 7, 3).toordinal())
394 | 
395 |         # ending values
396 |         # self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 1).toordinal())
397 | 
398 |         # test lower frequency data
399 |         self.assertRaises(
400 |             ValueError,
401 |             convert,
402 |             convert(self.ts_ord, new_freq=FREQ_Y),
403 |             new_freq=FREQ_Q,
404 |         )
405 | 
406 |         # timestamp conversion goes here
407 | 
408 |     def test_convquarterly_period_end(self):
409 |         """
410 |         This function tests converting timeseries to quarterly data that have
411 |         ending period data.
412 | 
413 |         """
414 | 
415 |         ts = self.ts_ord.clone()
416 | 
417 |         ts1 = convert(ts, new_freq=FREQ_Q)
418 | 
419 |         self.assertEqual(ts1.frequency, FREQ_Q)
420 | 
421 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
422 |         self.assertEqual(ts1.dseries[1], datetime(2016, 3, 31).toordinal())
423 |         self.assertEqual(ts1.dseries[2], datetime(2016, 6, 30).toordinal())
424 |         self.assertEqual(ts1.dseries[3], datetime(2016, 9, 30).toordinal())
425 |         self.assertEqual(ts1.dseries[4], datetime(2016, 12, 30).toordinal())
426 |         self.assertEqual(ts1.dseries[5], datetime(2017, 3, 31).toordinal())
427 |         self.assertEqual(ts1.dseries[6], datetime(2017, 6, 30).toordinal())
428 | 
429 |         # ending values
430 |         self.assertEqual(ts1.dseries[-2], datetime(2017, 12, 29).toordinal())
431 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
432 | 
433 |         # conv_quarterly with include_partial=True
434 |         ts1 = convert(ts, new_freq=FREQ_Q, include_partial=True)
435 | 
436 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
437 |         self.assertEqual(ts1.dseries[1], datetime(2016, 3, 31).toordinal())
438 |         self.assertEqual(ts1.dseries[2], datetime(2016, 6, 30).toordinal())
439 |         self.assertEqual(ts1.dseries[3], datetime(2016, 9, 30).toordinal())
440 |         self.assertEqual(ts1.dseries[4], datetime(2016, 12, 30).toordinal())
441 |         self.assertEqual(ts1.dseries[5], datetime(2017, 3, 31).toordinal())
442 |         self.assertEqual(ts1.dseries[6], datetime(2017, 6, 30).toordinal())
443 | 
444 |         # ending values
445 |         self.assertEqual(ts1.dseries[-2], datetime(2017, 12, 29).toordinal())
446 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
447 | 
448 |         # conv_quarterly with include_partial=False
449 |         ts1 = convert(ts, new_freq=FREQ_Q, include_partial=False)
450 | 
451 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
452 |         self.assertEqual(ts1.dseries[1], datetime(2016, 3, 31).toordinal())
453 |         self.assertEqual(ts1.dseries[2], datetime(2016, 6, 30).toordinal())
454 |         self.assertEqual(ts1.dseries[3], datetime(2016, 9, 30).toordinal())
455 |         self.assertEqual(ts1.dseries[4], datetime(2016, 12, 30).toordinal())
456 |         self.assertEqual(ts1.dseries[5], datetime(2017, 3, 31).toordinal())
457 |         self.assertEqual(ts1.dseries[6], datetime(2017, 6, 30).toordinal())
458 | 
459 |         # ending values
460 |         self.assertEqual(ts1.dseries[-1], datetime(2017, 12, 29).toordinal())
461 | 
462 |         # resolve design decision
463 |         # ts1 = convert(
464 |         #    convert(ts, new_freq=FREQ_M, include_partial=False),
465 |         #    new_freq=FREQ_Q,
466 |         #    include_partial=False)
467 | 
468 |         # self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
469 |         # self.assertEqual(ts1.dseries[1], datetime(2016, 3, 31).toordinal())
470 |         # self.assertEqual(ts1.dseries[2], datetime(2016, 6, 30).toordinal())
471 |         # self.assertEqual(ts1.dseries[3], datetime(2016, 9, 30).toordinal())
472 |         # self.assertEqual(ts1.dseries[4], datetime(2016, 12, 30).toordinal())
473 |         # self.assertEqual(ts1.dseries[5], datetime(2017, 3, 31).toordinal())
474 |         # self.assertEqual(ts1.dseries[6], datetime(2017, 6, 30).toordinal())
475 | 
476 |         # ending values
477 |         # self.assertEqual(ts1.dseries[-1], datetime(2017, 12, 29).toordinal())
478 | 
479 |         # test lower frequency data
480 |         # test lower frequency data
481 |         self.assertRaises(
482 |             ValueError,
483 |             convert,
484 |             convert(self.ts_ord, new_freq=FREQ_Y),
485 |             new_freq=FREQ_Q,
486 |         )
487 | 
488 |         # timestamp conversion goes here
489 | 
490 |     def test_convyearly_period_start(self):
491 |         """
492 |         This function tests converting timeseries to yearly data that have
493 |         starting period data.
494 | 
495 |         """
496 | 
497 |         ts = self.ts_ord.clone()
498 | 
499 |         # conv_yearly with defaults
500 |         ts1 = convert(ts, new_freq=FREQ_Y)
501 | 
502 |         self.assertEqual(ts1.frequency, FREQ_Y)
503 | 
504 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
505 |         self.assertEqual(ts1.dseries[1], datetime(2016, 12, 30).toordinal())
506 |         self.assertEqual(ts1.dseries[2], datetime(2017, 12, 29).toordinal())
507 | 
508 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
509 | 
510 |         # conv_yearly with include_partial=True
511 |         ts1 = convert(ts, new_freq=FREQ_Y, include_partial=True)
512 | 
513 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
514 |         self.assertEqual(ts1.dseries[1], datetime(2016, 12, 30).toordinal())
515 |         self.assertEqual(ts1.dseries[2], datetime(2017, 12, 29).toordinal())
516 | 
517 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
518 | 
519 |         # conv_yearly with include_partial=False
520 |         ts1 = convert(ts, new_freq=FREQ_Y, include_partial=False)
521 | 
522 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
523 |         self.assertEqual(ts1.dseries[1], datetime(2016, 12, 30).toordinal())
524 |         self.assertEqual(ts1.dseries[2], datetime(2017, 12, 29).toordinal())
525 | 
526 |         # resolve design decision
527 |         # with monthly data
528 |         # ts1 = convert(
529 |         #    convert(ts, new_freq=FREQ_M, include_partial=False),
530 |         #    new_freq=FREQ_Y,
531 |         #    include_partial=False)
532 | 
533 |         # self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
534 |         # self.assertEqual(ts1.dseries[1], datetime(2016, 12, 30).toordinal())
535 |         # self.assertEqual(ts1.dseries[2], datetime(2017, 12, 29).toordinal())
536 | 
537 |         # timestamp conversion goes here
538 | 
539 |     def test_convyearly_period_end(self):
540 |         """
541 |         This function tests converting timeseries to yearly data that have
542 |         ending period data.
543 | 
544 |         """
545 | 
546 |         ts = self.ts_ord.clone()
547 | 
548 |         # conv_yearly with defaults
549 |         ts1 = convert(ts, new_freq=FREQ_Y)
550 | 
551 |         self.assertEqual(ts1.frequency, FREQ_Y)
552 | 
553 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
554 |         self.assertEqual(ts1.dseries[1], datetime(2016, 12, 30).toordinal())
555 |         self.assertEqual(ts1.dseries[2], datetime(2017, 12, 29).toordinal())
556 | 
557 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
558 | 
559 |         # conv_yearly with include_partial=True
560 |         ts1 = convert(ts, new_freq=FREQ_Y, include_partial=True)
561 | 
562 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
563 |         self.assertEqual(ts1.dseries[1], datetime(2016, 12, 30).toordinal())
564 |         self.assertEqual(ts1.dseries[2], datetime(2017, 12, 29).toordinal())
565 | 
566 |         self.assertEqual(ts1.dseries[-1], datetime(2018, 1, 15).toordinal())
567 | 
568 |         # conv_yearly with include_partial=False
569 |         ts1 = convert(ts, new_freq=FREQ_Y, include_partial=False)
570 | 
571 |         self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
572 |         self.assertEqual(ts1.dseries[1], datetime(2016, 12, 30).toordinal())
573 |         self.assertEqual(ts1.dseries[2], datetime(2017, 12, 29).toordinal())
574 | 
575 |         # resolve design decision
576 |         # with monthly data
577 |         # ts1 = convert(
578 |         #    convert(ts, new_freq=FREQ_M, include_partial=False),
579 |         #    new_freq=FREQ_Y,
580 |         #    include_partial=False)
581 | 
582 |         # self.assertEqual(ts1.dseries[0], datetime(2015, 12, 31).toordinal())
583 |         # self.assertEqual(ts1.dseries[1], datetime(2016, 12, 30).toordinal())
584 |         # self.assertEqual(ts1.dseries[2], datetime(2017, 12, 29).toordinal())
585 | 
586 |         # timestamp conversion goes here
587 | 
588 |     def test_convminutes_period_start(self):
589 |         """
590 |         This function tests converting timeseries to minute data that have
591 |         starting period data.
592 | 
593 |         """
594 | 
595 |         ts = self.ts_seconds.clone()
596 |         ts.end_of_period = False
597 | 
598 |         ts1 = convert(ts, new_freq=FREQ_MIN)
599 | 
600 |         self.assertEqual(
601 |             ts1.dseries[0], datetime(2016, 1, 1, 0, 0, 0).timestamp()
602 |         )
603 |         self.assertEqual(
604 |             ts1.dseries[1], datetime(2016, 1, 1, 0, 1, 0).timestamp()
605 |         )
606 |         self.assertEqual(
607 |             ts1.dseries[2], datetime(2016, 1, 1, 0, 2, 0).timestamp()
608 |         )
609 |         self.assertEqual(
610 |             ts1.dseries[3], datetime(2016, 1, 1, 0, 3, 0).timestamp()
611 |         )
612 | 
613 |     @unittest.skip
614 |     def test_convminutes_period_end(self):
615 |         """
616 |         This function tests converting timeseries to minute data that have
617 |         ending period data.
618 | 
619 |         Needs design decision.
620 |         """
621 | 
622 |         ts = self.ts_seconds.clone()
623 | 
624 |         ts1 = convert(ts, new_freq=FREQ_MIN)
625 | 
626 |         self.assertEqual(
627 |             ts1.dseries[0], datetime(2016, 1, 1, 0, 0, 59).timestamp()
628 |         )
629 |         self.assertEqual(
630 |             ts1.dseries[1], datetime(2016, 1, 1, 0, 1, 59).timestamp()
631 |         )
632 |         self.assertEqual(
633 |             ts1.dseries[2], datetime(2016, 1, 1, 0, 2, 59).timestamp()
634 |         )
635 |         self.assertEqual(
636 |             ts1.dseries[3], datetime(2016, 1, 1, 0, 3, 59).timestamp()
637 |         )
638 |         self.assertEqual(
639 |             ts1.dseries[4], datetime(2016, 1, 1, 0, 4, 59).timestamp()
640 |         )
641 |         self.assertEqual(
642 |             ts1.dseries[5], datetime(2016, 1, 1, 0, 5, 59).timestamp()
643 |         )
644 | 
645 |     def test_convhours_period_start(self):
646 |         """
647 |         This function tests conversion to hours.
648 | 
649 |         Currently there is a problem with this. At the moment it a design
650 |         decision needs to be made on how to handle end-of-period conversions.
651 | 
652 |         """
653 |         ts = self.ts_seconds.clone()
654 |         ts.end_of_period = False
655 |         ts1 = convert(ts, new_freq=FREQ_H)
656 | 
657 |         self.assertEqual(
658 |             ts1.dseries[0], datetime(2016, 1, 1, 0, 0, 0).timestamp()
659 |         )
660 |         self.assertEqual(
661 |             ts1.dseries[1], datetime(2016, 1, 1, 1, 0, 0).timestamp()
662 |         )
663 |         self.assertEqual(
664 |             ts1.dseries[2], datetime(2016, 1, 1, 2, 0, 0).timestamp()
665 |         )
666 |         self.assertEqual(
667 |             ts1.dseries[3], datetime(2016, 1, 1, 3, 0, 0).timestamp()
668 |         )
669 |         self.assertEqual(
670 |             ts1.dseries[4], datetime(2016, 1, 1, 4, 0, 0).timestamp()
671 |         )
672 |         self.assertEqual(
673 |             ts1.dseries[5], datetime(2016, 1, 1, 5, 0, 0).timestamp()
674 |         )
675 | 
676 |     @unittest.skip
677 |     def test_convhours_period_end(self):
678 |         """
679 |         This function tests conversion to hours.
680 | 
681 |         Currently there is a problem with this. At the moment a design
682 |         decision needs to be made on how to handle end-of-period conversions.
683 | 
684 |         """
685 | 
686 |         ts = self.ts_seconds.clone()
687 |         ts.end_of_period = False
688 |         ts1 = convert(ts, new_freq=FREQ_H)
689 | 
690 |         ts = self.ts_seconds.clone()
691 |         ts.end_of_period = True
692 |         ts1 = convert(ts, new_freq=FREQ_H)
693 | 
694 |         self.assertEqual(
695 |             ts1.dseries[0], datetime(2016, 1, 1, 0, 0, 0).timestamp()
696 |         )
697 |         self.assertEqual(
698 |             ts1.dseries[1], datetime(2016, 1, 1, 1, 0, 0).timestamp()
699 |         )
700 |         self.assertEqual(
701 |             ts1.dseries[2], datetime(2016, 1, 1, 2, 0, 0).timestamp()
702 |         )
703 |         self.assertEqual(
704 |             ts1.dseries[3], datetime(2016, 1, 1, 3, 0, 0).timestamp()
705 |         )
706 |         self.assertEqual(
707 |             ts1.dseries[4], datetime(2016, 1, 1, 4, 0, 0).timestamp()
708 |         )
709 |         self.assertEqual(
710 |             ts1.dseries[5], datetime(2016, 1, 1, 5, 0, 0).timestamp()
711 |         )
712 | 
713 |     @unittest.skip
714 |     def test_conv_days(self):
715 |         """
716 |         This function converts timestamp data to a daily frequency with
717 |         ordinal dates.
718 | 
719 |         Currently, there is a problem with this.
720 |         """
721 | 
722 |         ts = self.ts_seconds.clone()
723 | 
724 |         ts.end_of_period = False
725 | 
726 |         ts1 = convert(ts, new_freq=FREQ_D)
727 | 
728 |         self.assertEqual(
729 |             ts1.dseries[0], datetime(2016, 1, 1, 0, 0, 0).toordinal()
730 |         )
731 |         self.assertEqual(
732 |             ts1.dseries[1], datetime(2016, 1, 2, 0, 0, 0).toordinal()
733 |         )
734 |         self.assertEqual(
735 |             ts1.dseries[2], datetime(2016, 1, 3, 0, 0, 0).toordinal()
736 |         )
737 |         self.assertEqual(
738 |             ts1.dseries[3], datetime(2016, 1, 4, 0, 0, 0).toordinal()
739 |         )
740 | 
741 |         ts = self.ts_seconds.clone()
742 |         ts.end_of_period = True
743 | 
744 |         ts1 = convert(ts, new_freq=FREQ_D)
745 | 
746 |         self.assertEqual(
747 |             ts1.dseries[0], datetime(2016, 1, 1, 0, 0, 0).toordinal()
748 |         )
749 |         self.assertEqual(
750 |             ts1.dseries[1], datetime(2016, 1, 2, 0, 0, 0).toordinal()
751 |         )
752 |         self.assertEqual(
753 |             ts1.dseries[2], datetime(2016, 1, 3, 0, 0, 0).toordinal()
754 |         )
755 |         self.assertEqual(
756 |             ts1.dseries[3], datetime(2016, 1, 4, 0, 0, 0).toordinal()
757 |         )
758 | 
759 | 
760 | if __name__ == "__main__":
761 |     unittest.main()
762 | 


--------------------------------------------------------------------------------
/tests/test_point.py:
--------------------------------------------------------------------------------
  1 | # tests/test_point.py
  2 | """
  3 | This module tests the Point class
  4 | """
  5 | import unittest
  6 | 
  7 | from datetime import date, datetime
  8 | import json
  9 | import numpy as np
 10 | 
 11 | from thymus.timeseries import Timeseries
 12 | from thymus.point import Point
 13 | 
 14 | 
 15 | class TestPoint(unittest.TestCase):
 16 |     """This class tests the class Point."""
 17 | 
 18 |     def setUp(self):
 19 |         # three timeseries
 20 |         self.ts = Timeseries()
 21 |         self.ts.key = "Test Key"
 22 |         self.ts.columns = ["dog", "cat", "squirrel"]
 23 | 
 24 |         self.start_date = datetime(2021, 1, 29).toordinal()
 25 |         self.ts.dseries = self.start_date + np.arange(5)
 26 |         self.ts.tseries = np.arange(15).reshape((5, 3)) / 10.33
 27 |         self.ts.make_arrays()
 28 | 
 29 |     def test_class_init_(self):
 30 |         """Test class initialization."""
 31 |         point = Point(self.ts, 3)
 32 | 
 33 |         self.assertTrue(hasattr(point, "dog"))
 34 |         self.assertTrue(hasattr(point, "cat"))
 35 |         self.assertTrue(hasattr(point, "squirrel"))
 36 | 
 37 |         self.assertEqual(point.dog, point.values[0])
 38 |         self.assertEqual(point.cat, point.values[1])
 39 |         self.assertEqual(point.squirrel, point.values[2])
 40 |         self.assertEqual(point.date, point.ts.dseries[3])
 41 |         self.assertEqual(point.row_no, 3)
 42 | 
 43 |         point.dog = 1
 44 |         point.cat = 2
 45 |         point.squirrel = 3
 46 | 
 47 |         self.assertEqual(point.dog, point.values[0])
 48 |         self.assertEqual(point.cat, point.values[1])
 49 |         self.assertEqual(point.squirrel, point.values[2])
 50 | 
 51 |     def test__repr__(self):
 52 |         """Test the appearance."""
 53 |         point = Point(self.ts, 3)
 54 | 
 55 |         output = repr(point)
 56 | 
 57 |         self.assertTrue(output.startswith("<Point"))
 58 |         self.assertTrue(output.endswith("/>"))
 59 | 
 60 |         for column in self.ts.columns:
 61 |             with self.subTest(column=column):
 62 |                 self.assertTrue(output.find(column) > -1)
 63 | 
 64 |         # no columns
 65 |         self.ts.columns = None
 66 |         point = Point(self.ts, 3)
 67 | 
 68 |         output = repr(point)
 69 | 
 70 |         # has values
 71 |         self.assertTrue(output.find("[") > -1)
 72 | 
 73 |         # should show new name
 74 |         class NewPoint(Point):
 75 |             def __init__(self, ts, row_no):
 76 |                 super().__init__(ts, row_no)
 77 | 
 78 |         output = repr(NewPoint(self.ts, 3))
 79 | 
 80 |         self.assertTrue(output.startswith("<NewPoint"))
 81 |         self.assertTrue(output.endswith("/>"))
 82 | 
 83 |         # format question
 84 |         ts = self.ts[:, 1]
 85 |         point = Point(ts, 3)
 86 | 
 87 |         self.assertEqual(
 88 |             repr(point),
 89 |             "<Point: row_no: 3, date: 2021-02-01, 0.968054211035818 />",
 90 |         )
 91 | 
 92 |         ts.columns = ["test"]
 93 |         point = Point(ts, 3)
 94 | 
 95 |         self.assertEqual(
 96 |             repr(point),
 97 |             "<Point: row_no: 3, date: 2021-02-01, test: 0.968054211035818 />",
 98 |         )
 99 | 
100 |     def test_to_dict(self):
101 |         """Test formatting for a dictionary."""
102 | 
103 |         # native date format
104 |         self.assertDictEqual(
105 |             Point(self.ts, 3).to_dict(),
106 |             {
107 |                 "row_no": 3,
108 |                 "date": datetime(2021, 1, 29).toordinal() + 3,
109 |                 "dog": 0.8712487899322362,
110 |                 "cat": 0.968054211035818,
111 |                 "squirrel": 1.0648596321393997,
112 |             },
113 |         )
114 | 
115 |         # str date format
116 |         self.assertDictEqual(
117 |             Point(self.ts, 0).to_dict(dt_fmt="str"),
118 |             {
119 |                 "row_no": 0,
120 |                 "date": datetime(2021, 1, 29).strftime("%F"),
121 |                 "dog": 0.0,
122 |                 "cat": 0.0968054211035818,
123 |                 "squirrel": 0.1936108422071636,
124 |             },
125 |         )
126 | 
127 |         # str date format
128 |         self.assertDictEqual(
129 |             Point(self.ts, 0).to_dict(dt_fmt="datetime"),
130 |             {
131 |                 "row_no": 0,
132 |                 "date": datetime(2021, 1, 29).date(),
133 |                 "dog": 0.0,
134 |                 "cat": 0.0968054211035818,
135 |                 "squirrel": 0.1936108422071636,
136 |             },
137 |         )
138 | 
139 |         # invalid date format
140 |         self.assertRaises(ValueError, Point(self.ts, 0).to_dict, dt_fmt="test")
141 | 
142 | 
143 | if __name__ == "__main__":
144 |     unittest.main()
145 | 


--------------------------------------------------------------------------------
/tests/test_tsproto.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module tests the prototype timeseries class.
  3 | """
  4 | import unittest
  5 | 
  6 | from datetime import datetime
  7 | import numpy as np
  8 | 
  9 | from thymus.constants import FREQ_D, FREQ_SEC
 10 | from thymus.tsproto import TsProto
 11 | from thymus.tsslist import TssList
 12 | 
 13 | 
 14 | class TestTsProto(unittest.TestCase):
 15 |     """
 16 |     This class tests the base class TsProto.
 17 |     """
 18 | 
 19 |     def setUp(self):
 20 |         # three timeseries
 21 |         self.ts = TsProto()
 22 |         start_date = datetime(2015, 12, 31).toordinal()
 23 |         self.ts.dseries = start_date + np.arange(10)
 24 |         self.ts.tseries = np.arange(10)
 25 |         self.ts.make_arrays()
 26 | 
 27 |         # longer timeseries
 28 |         self.ts_long = TsProto()
 29 |         start_date = datetime(2015, 12, 31).toordinal()
 30 |         self.ts_long.dseries = start_date + np.arange(20)
 31 |         self.ts_long.tseries = np.arange(20)
 32 |         self.ts_long.make_arrays()
 33 | 
 34 |         # shorter timeseries
 35 |         self.ts_short = TsProto()
 36 |         start_date = datetime(2015, 12, 31).toordinal()
 37 |         self.ts_short.dseries = start_date + np.arange(5)
 38 |         self.ts_short.tseries = np.arange(5)
 39 |         self.ts_short.make_arrays()
 40 | 
 41 |     def test_class_init_(self):
 42 |         """Test class initialization."""
 43 |         tmp_ts = TsProto()
 44 | 
 45 |         # defaults for dseries, tseries might get changed back to list
 46 |         # self.assertIsInstance(tmp_ts.dseries, list)
 47 |         # self.assertIsInstance(tmp_ts.tseries, list)
 48 |         self.assertIsInstance(tmp_ts.columns, list)
 49 |         self.assertEqual(tmp_ts.frequency, FREQ_D)
 50 |         self.assertTrue(tmp_ts.end_of_period)
 51 | 
 52 |     def test_container_functions(self):
 53 |         """Tests the ability to pass through container functions to array."""
 54 | 
 55 |         ts = self.ts.clone()
 56 | 
 57 |         flist = [
 58 |             "__pow__",
 59 |             "__add__",
 60 |             "__rsub__",
 61 |             "__rtruediv__",
 62 |             "__divmod__",
 63 |             "__sub__",
 64 |             "__eq__",
 65 |             "__floordiv__",
 66 |             "__truediv__",
 67 |             "__ge__",
 68 |             "__gt__",
 69 |             "__le__",
 70 |             "__lt__",
 71 |             "__mod__",
 72 |             "__mul__",
 73 |             "__ne__",
 74 |             "__radd__",
 75 |             "__rdivmod__",
 76 |             "__rfloordiv__",
 77 |             "__rmod__",
 78 |             "__rmul__",
 79 |             "__rpow__",
 80 |         ]
 81 | 
 82 |         unary_flist = ["__abs__", "__pos__", "__neg__"]
 83 | 
 84 |         special_flist = ["__invert__"]
 85 | 
 86 |         flist1 = [
 87 |             "__and__",
 88 |             "__or__",
 89 |             "__ror__",
 90 |             "__rand__",
 91 |             "__rxor__",
 92 |             "__xor__",
 93 |             "__rshift__",
 94 |             "__rlshift__",
 95 |             "__lshift__",
 96 |             "__rrshift__",
 97 |         ]
 98 | 
 99 |         iflist = [
100 |             "__iadd__",
101 |             "__ifloordiv__",
102 |             "__imod__",
103 |             "__imul__",
104 |             "__ipow__",
105 |             "__isub__",
106 |             "__itruediv__",
107 |         ]
108 | 
109 |         iflist1 = [
110 |             "__iand__",
111 |             "__ilshift__",
112 |             "__ior__",
113 |             "__irshift__",
114 |             "__ixor__",
115 |         ]
116 | 
117 |         ts_other = self.ts.clone() * 4 + 3
118 |         for func in flist:
119 |             for other in [3, ts_other]:
120 |                 ts = self.ts.clone()
121 |                 ts.tseries += 1
122 | 
123 |                 ts_a = getattr(ts, func)(other)
124 |                 a_series = ts_a.tseries
125 | 
126 |                 if isinstance(other, TsProto):
127 |                     b_series = getattr(ts.tseries, func)(other.tseries)
128 |                 else:
129 |                     b_series = getattr(ts.tseries, func)(other)
130 | 
131 |                 self.assertTrue(np.array_equal(a_series, b_series))
132 |                 self.assertTrue(ts.if_dseries_match(ts_a))
133 | 
134 |         # specific tests
135 |         # decide what to do here. future warning that it will
136 |         #    be eventually elementwise
137 |         self.assertIsNotNone(ts_other)
138 | 
139 |         for func in unary_flist:
140 |             ts = self.ts.clone()
141 |             ts_a = getattr(ts, func)()
142 |             a_series = ts_a.tseries
143 | 
144 |             b_series = getattr(ts.tseries, func)()
145 | 
146 |             self.assertTrue(np.array_equal(a_series, b_series))
147 |             self.assertTrue(ts.if_dseries_match(ts_a))
148 | 
149 |         for func in special_flist:
150 |             ts = self.ts.clone()
151 |             ts.tseries = np.array(ts.tseries, np.int32)
152 |             self.assertTrue(
153 |                 np.array_equal(
154 |                     getattr(ts, func)().tseries,
155 |                     getattr(ts.tseries, func)(),
156 |                 )
157 |             )
158 | 
159 |         ts_other = self.ts.clone() * 4
160 |         ts_other.tseries = np.array(ts_other.tseries, np.int64)
161 |         for func in flist1:
162 |             for other in [3, ts_other]:
163 |                 ts = self.ts.clone()
164 |                 ts.tseries = np.array(ts.tseries, np.int64)
165 | 
166 |                 ts_a = getattr(ts, func)(other)
167 |                 a_series = ts_a.tseries
168 | 
169 |                 if isinstance(other, TsProto):
170 |                     b_series = getattr(ts.tseries, func)(other.tseries)
171 |                 else:
172 |                     b_series = getattr(ts.tseries, func)(other)
173 | 
174 |                 self.assertTrue(np.array_equal(a_series, b_series))
175 |                 self.assertTrue(ts.if_dseries_match(ts_a))
176 | 
177 |         for func in iflist:
178 |             for other in [3, self.ts.clone() + 2.5]:
179 |                 ts = self.ts.clone() + 4.0
180 |                 ts1 = self.ts.clone() + 4.0
181 | 
182 |                 ts_a = getattr(ts, func)(other)
183 |                 a_series = ts_a.tseries
184 | 
185 |                 if isinstance(other, TsProto):
186 |                     b_series = getattr(ts.tseries, func)(other.tseries)
187 |                 else:
188 |                     b_series = getattr(ts.tseries, func)(other)
189 | 
190 |                 self.assertTrue(np.array_equal(a_series, b_series))
191 |                 self.assertTrue(ts.if_dseries_match(ts_a))
192 | 
193 |         ts_other = self.ts.clone() * 4
194 |         ts_other.tseries = np.array(ts_other.tseries, np.int64)
195 |         for func in iflist1:
196 |             for other in [3, ts_other]:
197 |                 ts = self.ts.clone()
198 |                 ts.tseries = np.array(ts.tseries, np.int64)
199 |                 ts1 = self.ts.clone()
200 |                 ts1.tseries = np.array(ts1.tseries, np.int64)
201 | 
202 |                 if other is None:
203 |                     other = self.ts.clone() * 4
204 |                     other.tseries = np.array(other.tseries, np.int64)
205 | 
206 |                 ts_a = getattr(ts, func)(other)
207 |                 a_series = ts_a.tseries
208 | 
209 |                 if isinstance(other, TsProto):
210 |                     b_series = getattr(ts.tseries, func)(other.tseries)
211 |                 else:
212 |                     b_series = getattr(ts.tseries, func)(other)
213 | 
214 |                 self.assertTrue(np.array_equal(a_series, b_series))
215 |                 self.assertTrue(ts.if_dseries_match(ts_a))
216 | 
217 |     def test_timeseries__add__lengths(self):
218 |         """Tests adding two timeseries with mismatched lengths"""
219 |         #
220 |         # silently truncate
221 |         ts1 = self.ts.clone()
222 |         ts1.dseries = self.ts.dseries.tolist()[:5]
223 |         ts1.tseries = self.ts.tseries.tolist()[:5]
224 |         ts1.make_arrays()
225 | 
226 |         ts = self.ts + ts1
227 | 
228 |         self.assertEqual(ts.tseries[0], 0)
229 |         self.assertEqual(ts.tseries[1], 2)
230 |         self.assertEqual(ts.tseries[2], 4)
231 |         self.assertEqual(ts.tseries[3], 6)
232 |         self.assertEqual(ts.tseries[4], 8)
233 | 
234 |         self.assertEqual(len(ts.dseries), 5)
235 |         self.assertEqual(len(ts.tseries), 5)
236 | 
237 |     def test_timeseries__add__columns(self):
238 |         """Tests adding two timeseries mismatched columns"""
239 |         ts1 = self.ts.clone()
240 |         ts1.tseries = np.arange(10).reshape((-1, 1))
241 |         ts1.make_arrays()
242 | 
243 |         # self ts shape   (10) incoming (10, 1)
244 |         self.assertRaises(ValueError, self.ts.__add__, ts1)
245 | 
246 |         # self ts shape   (10, 1) incoming (10, 2)
247 |         ts2 = ts1.clone()
248 |         ts2.tseries = np.arange(20).reshape((-1, 2))
249 |         ts2.make_arrays()
250 | 
251 |         self.assertRaises(ValueError, ts1.__add__, ts2)
252 | 
253 |     def test_timeseries__iadd__lengths(self):
254 |         """Tests in-place adding two timeseries mismatched lengths"""
255 |         #
256 |         # silently truncate
257 |         ts1 = self.ts.clone()
258 |         ts1.dseries = self.ts.dseries.tolist()[:5]
259 |         ts1.tseries = self.ts.tseries.tolist()[:5]
260 |         ts1.make_arrays()
261 | 
262 |         self.ts += ts1
263 | 
264 |         self.assertEqual(self.ts.tseries[0], 0)
265 |         self.assertEqual(self.ts.tseries[1], 2)
266 |         self.assertEqual(self.ts.tseries[2], 4)
267 |         self.assertEqual(self.ts.tseries[3], 6)
268 |         self.assertEqual(self.ts.tseries[4], 8)
269 | 
270 |         self.assertEqual(len(self.ts.dseries), 5)
271 |         self.assertEqual(len(self.ts.tseries), 5)
272 | 
273 |     def test_timeseries__iadd__columns(self):
274 |         """Tests in-place adding two timeseries with mismatched columns"""
275 |         ts1 = self.ts.clone()
276 |         ts1.tseries = np.arange(10).reshape((-1, 1))
277 |         ts1.make_arrays()
278 | 
279 |         # self ts shape   (10) incoming (10, 1)
280 |         self.assertRaises(ValueError, self.ts.__iadd__, ts1)
281 | 
282 |         # self ts shape   (10, 1) incoming (10, 2)
283 |         ts2 = ts1.clone()
284 |         ts2.tseries = np.arange(20).reshape((-1, 2))
285 |         ts2.make_arrays()
286 | 
287 |         self.assertRaises(ValueError, ts1.__iadd__, ts2)
288 | 
289 |     def test_column_checks(self):
290 |         """Tests verification of similar columns"""
291 |         ts1 = self.ts.clone()
292 |         ts1.tseries = np.arange(10).reshape((-1, 1))
293 |         ts1.make_arrays()
294 | 
295 |         # self ts shape   (10) incoming (10, 1)
296 |         self.assertRaises(ValueError, self.ts._column_checks, self.ts, ts1)
297 | 
298 |         # self ts shape   (10, 1) incoming (10, 2)
299 |         ts2 = ts1.clone()
300 |         ts2.tseries = np.arange(20).reshape((-1, 2))
301 |         ts2.make_arrays()
302 | 
303 |         self.assertRaises(ValueError, self.ts._column_checks, ts1, ts2)
304 | 
305 |         # now one with columns ok
306 |         ts2 = ts1.clone()
307 | 
308 |         self.assertIsNone(self.ts._column_checks(ts1, ts2))
309 | 
310 |     def test_if_dseries_match(self):
311 |         """Tests comparing two date series."""
312 | 
313 |         ts = self.ts.clone()
314 | 
315 |         self.assertTrue(self.ts.if_dseries_match(ts))
316 | 
317 |         ts = self.ts.clone()
318 |         ts.dseries[0] += 1
319 | 
320 |         self.assertFalse(self.ts.if_dseries_match(ts))
321 | 
322 |     def test_if_tseries_match(self):
323 |         """Tests comparing two series of values."""
324 | 
325 |         ts = self.ts.clone()
326 | 
327 |         self.assertTrue(self.ts.if_tseries_match(ts))
328 | 
329 |         ts = self.ts.clone()
330 |         ts.tseries[0] += 1
331 | 
332 |         self.assertFalse(self.ts.if_tseries_match(ts))
333 | 
334 |     def test___getitem__(self):
335 |         """This function tests selection."""
336 | 
337 |         ts = self.ts.clone()
338 | 
339 |         ts1 = ts[:2]
340 | 
341 |         self.assertTrue(np.array_equal(ts1.dseries, ts.dseries[:2]))
342 |         self.assertTrue(np.array_equal(ts1.tseries, ts.tseries[:2]))
343 | 
344 |         # test separate slicing for dseries and tseries
345 |         ts1 = ts.clone()
346 | 
347 |         ts1.tseries = np.arange(len(ts.tseries) * 4).reshape((-1, 4))
348 | 
349 |         ts2 = ts1[:5, 1]
350 |         self.assertTrue(np.array_equal(ts2.dseries, ts1.dseries[:5]))
351 |         self.assertTrue(np.array_equal(ts2.tseries, ts1.tseries[:5, 1]))
352 | 
353 |         # test separate slicing with more dimensions in tseries
354 |         ts1 = TsProto()
355 |         ts1.dseries = datetime(2016, 1, 1).toordinal() + np.arange(1000)
356 | 
357 |         ts1.tseries = np.arange(9000).reshape((1000, 3, 3))
358 | 
359 |         ts2 = ts1[:500, 1]
360 | 
361 |         self.assertTrue(np.array_equal(ts2.dseries, ts1.dseries[:500]))
362 |         self.assertTrue(np.array_equal(ts2.tseries, ts1.tseries[:500, 1]))
363 | 
364 |         ts2 = ts1[:500, :, 1]
365 | 
366 |         self.assertTrue(np.array_equal(ts2.dseries, ts1.dseries[:500]))
367 |         self.assertTrue(np.array_equal(ts2.tseries, ts1.tseries[:500, :, 1]))
368 | 
369 |         ts2 = ts1[:500, 1, :2]
370 | 
371 |         self.assertTrue(np.array_equal(ts2.dseries, ts1.dseries[:500]))
372 |         self.assertTrue(np.array_equal(ts2.tseries, ts1.tseries[:500, 1, :2]))
373 | 
374 |     def test_timeseries_common_length(self):
375 |         """Tests truncating timeseries to a common length."""
376 | 
377 |         ts1, ts2 = self.ts.common_length(self.ts, self.ts_long)
378 |         self.assertEqual(len(ts1.tseries), len(ts2.tseries))
379 | 
380 |         ts1, ts2, ts3 = self.ts.common_length(
381 |             self.ts, self.ts_short, self.ts_long
382 |         )
383 |         self.assertEqual(len(ts1.tseries), len(ts2.tseries))
384 |         self.assertEqual(len(ts1.tseries), len(ts3.tseries))
385 | 
386 |     def test_timeseries_shape(self):
387 |         """Tests returning the shape of the tseries."""
388 | 
389 |         self.assertTupleEqual(self.ts.shape(), self.ts.tseries.shape)
390 | 
391 |         ts = TsProto()
392 | 
393 |         # test blank TsProto
394 |         ts = TsProto()
395 |         self.assertIsNone(ts.shape(), None)
396 | 
397 |         # now with data
398 |         ts.dseries = np.arange(100)
399 |         ts.tseries = np.arange(200).reshape(100, 2)
400 | 
401 |         self.assertTupleEqual(ts.shape(), (ts.tseries.shape))
402 | 
403 |     def test_lengths(self):
404 |         """Tests returning the lengths of the dseries and tseries."""
405 | 
406 |         lengths = self.ts.lengths()
407 | 
408 |         self.assertEqual(lengths[0], len(self.ts.dseries))
409 |         self.assertEqual(lengths[1], len(self.ts.tseries))
410 | 
411 |         self.ts.tseries = self.ts.tseries[:3]
412 | 
413 |         lengths = self.ts.lengths()
414 | 
415 |         self.assertEqual(lengths[0], len(self.ts.dseries))
416 |         self.assertEqual(lengths[1], len(self.ts.tseries))
417 | 
418 |     def test_timeseries_make_arrays(self):
419 |         """Tests converting lists to arrays for both dseries and tseries."""
420 | 
421 |         # daily, so ordinal
422 |         ts = TsProto()
423 | 
424 |         ts.frequency = FREQ_D
425 | 
426 |         ts.dseries = [i for i in range(100)]
427 |         ts.tseries = [i for i in range(100)]
428 | 
429 |         ts.make_arrays()
430 | 
431 |         self.assertTrue(np.array_equal(ts.dseries, np.arange(100)))
432 | 
433 |         self.assertTrue(isinstance(ts.dseries[0], np.int32))
434 |         self.assertTrue(isinstance(ts.tseries[0], np.float64))
435 | 
436 |         # seconds, so timestamp
437 |         ts = TsProto()
438 | 
439 |         ts.frequency = FREQ_SEC
440 | 
441 |         ts.dseries = [i for i in range(100)]
442 |         ts.tseries = [i for i in range(100)]
443 | 
444 |         ts.make_arrays()
445 | 
446 |         self.assertTrue(np.array_equal(ts.dseries, np.arange(100)))
447 | 
448 |         self.assertTrue(isinstance(ts.dseries[0], np.float64))
449 |         self.assertTrue(isinstance(ts.tseries[0], np.float64))
450 | 
451 |         # verify that dseries is flattened
452 |         ts.dseries = [[i] for i in range(100)]
453 |         ts.tseries = [i for i in range(100)]
454 | 
455 |         ts.make_arrays()
456 |         self.assertEqual(len(ts.dseries.shape), 1)
457 | 
458 |     def test_timeseries__make_array(self):
459 |         """Tests making a numpy array to a specific type."""
460 | 
461 |         ts = TsProto()
462 | 
463 |         convert_list = [i for i in range(100)]
464 | 
465 |         new_array = ts._make_array(convert_list, numtype=np.float64)
466 | 
467 |         # verify structure, does not verify type
468 |         self.assertTrue(np.array_equal(new_array, np.array(convert_list)))
469 | 
470 |         self.assertTrue(isinstance(new_array[0], np.float64))
471 | 
472 |         new_array = ts._make_array(convert_list, numtype=np.int32)
473 | 
474 |         self.assertTrue(np.array_equal(new_array, np.array(convert_list)))
475 | 
476 |         self.assertTrue(isinstance(new_array[0], np.int32))
477 | 
478 |     def test_timeseries_clone(self):
479 |         """Tests creation of duplication timeseries."""
480 | 
481 |         ts = self.ts.clone()
482 | 
483 |         # is it a separate object
484 |         self.assertNotEqual(ts.__str__(), self.ts.__str__())
485 | 
486 |         # do the characteristics match up?
487 |         self.assertEqual(ts.key, self.ts.key)
488 |         self.assertEqual(ts.frequency, self.ts.frequency)
489 |         self.assertTrue(np.array_equal(ts.tseries, self.ts.tseries))
490 |         self.assertTrue(np.array_equal(ts.dseries, self.ts.dseries))
491 |         self.assertListEqual(ts.columns, self.ts.columns)
492 |         self.assertEqual(ts.end_of_period, self.ts.end_of_period)
493 | 
494 | 
495 | if __name__ == "__main__":
496 |     unittest.main()
497 | 


--------------------------------------------------------------------------------
/tests/test_tssdict.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module tests the TssList class
  3 | """
  4 | import unittest
  5 | 
  6 | from datetime import date, datetime, timedelta
  7 | import json
  8 | import numpy as np
  9 | 
 10 | 
 11 | from thymus.timeseries import Timeseries
 12 | from thymus.tsslist import TssList
 13 | from thymus.tssdict import TssDict
 14 | 
 15 | 
 16 | class TestTssDict(unittest.TestCase):
 17 |     """This class tests the class TssDict."""
 18 | 
 19 |     def setUp(self):
 20 |         # three timeseries
 21 |         self.ts = Timeseries()
 22 |         self.ts.key = "Main"
 23 |         self.ts.columns = ["F1"]
 24 | 
 25 |         start_date = datetime(2015, 12, 31).toordinal()
 26 |         self.ts.dseries = start_date + np.arange(10)
 27 |         self.ts.tseries = np.arange(10)
 28 |         self.ts.make_arrays()
 29 | 
 30 |         # longer timeseries
 31 |         self.ts_long = Timeseries()
 32 |         self.ts_long.key = "Long"
 33 |         start_date = datetime(2015, 12, 31).toordinal()
 34 |         self.ts_long.dseries = start_date + np.arange(20)
 35 |         self.ts_long.tseries = np.arange(20)
 36 |         self.ts_long.make_arrays()
 37 | 
 38 |         # shorter timeseries
 39 |         self.ts_short = Timeseries()
 40 |         self.ts_short.key = "Short"
 41 |         start_date = datetime(2015, 12, 31).toordinal()
 42 |         self.ts_short.dseries = start_date + np.arange(5)
 43 |         self.ts_short.tseries = np.arange(5)
 44 |         self.ts_short.make_arrays()
 45 | 
 46 |         self.tssdict = TssDict([self.ts, self.ts_long, self.ts_short])
 47 | 
 48 |     def test_class_init_(self):
 49 |         """Test class initialization."""
 50 | 
 51 |         self.assertEqual(len(self.tssdict), 3)
 52 | 
 53 |         tmp_ts0 = Timeseries()
 54 |         tmp_ts1 = Timeseries()
 55 |         tmp_ts2 = Timeseries()
 56 | 
 57 |         tmp_ts0.key = "ts0"
 58 |         tmp_ts1.key = "ts1"
 59 |         tmp_ts2.key = "ts2"
 60 | 
 61 |         tssdict = TssDict([tmp_ts0, tmp_ts1, tmp_ts2])
 62 | 
 63 |         self.assertEqual(len(tssdict), 3)
 64 | 
 65 |         tssdict = TssDict()
 66 | 
 67 |         tssdict["ts0"] = tmp_ts0
 68 |         tssdict["ts1"] = tmp_ts1
 69 |         tssdict["ts2"] = tmp_ts2
 70 | 
 71 |         self.assertEqual(len(tssdict), 3)
 72 | 
 73 |         tssdict = TssDict(
 74 |             {
 75 |                 tmp_ts0.key: tmp_ts0,
 76 |                 tmp_ts1.key: tmp_ts1,
 77 |                 tmp_ts2.key: tmp_ts2,
 78 |             }
 79 |         )
 80 | 
 81 |         self.assertEqual(len(tssdict), 3)
 82 | 
 83 |     def test_tssdict_from_split_ts(self):
 84 |         ts = Timeseries()
 85 | 
 86 |         ts.tseries = np.arange(100).reshape((10, 10))
 87 |         ts.dseries = date.today().toordinal() + np.arange(10)
 88 | 
 89 |         self.assertRaises(ValueError, TssDict.split_timeseries, ts)
 90 | 
 91 |         ts.columns = [f"Col{i}" for i in range(5)]
 92 | 
 93 |         self.assertRaises(ValueError, TssDict.split_timeseries, ts)
 94 | 
 95 |         ts.columns = [f"Col{i}" for i in range(10)]
 96 | 
 97 |         tssdict = TssDict(split=ts)
 98 | 
 99 |         self.assertListEqual(list(tssdict.keys()), ts.columns)
100 | 
101 |         for idx, (key, ts_tmp) in enumerate(tssdict.items()):
102 |             self.assertListEqual(ts.dseries.tolist(), ts_tmp.dseries.tolist())
103 | 
104 |             self.assertListEqual(
105 |                 ts.tseries[:, idx].tolist(), ts_tmp.tseries.flatten().tolist()
106 |             )
107 | 
108 |             self.assertEqual(ts.columns[idx], ts_tmp.columns[0])
109 | 
110 |     def test_tssdict_min_date(self):
111 |         """Tests min date"""
112 | 
113 |         # First add a timeseries that is earlier than the others
114 |         tmp_ts0 = Timeseries()
115 |         tmp_ts0.key = "First"
116 | 
117 |         tmp_ts0.dseries = datetime(2014, 12, 31).toordinal() - np.arange(10)
118 |         tmp_ts0.tseries = np.arange(10)
119 |         tmp_ts0.make_arrays()
120 | 
121 |         self.tssdict[tmp_ts0.key] = tmp_ts0
122 | 
123 |         self.assertTupleEqual(
124 |             self.tssdict.min_date(), (date(2014, 12, 22), "First")
125 |         )
126 | 
127 |         tmp_nodata = Timeseries()
128 |         tmp_nodata.key = "nothing"
129 |         tssdict = TssDict()
130 |         tssdict[tmp_nodata.key] = tmp_nodata
131 | 
132 |         self.assertTupleEqual(tssdict.min_date(), (None, "nothing"))
133 | 
134 |         tssdict = TssDict()
135 | 
136 |         # none timeseries list
137 |         tssdict["test"] = [
138 |             date(2014, 12, 31) + timedelta(days=i) for i in range(10)
139 |         ]
140 |         tssdict["test1"] = [
141 |             date(2013, 12, 31) + timedelta(days=i) for i in range(10)
142 |         ]
143 |         self.assertRaises(ValueError, tssdict.min_date)
144 | 
145 |     def test_tssdict_max_date(self):
146 |         """Tests max date"""
147 | 
148 |         self.assertTupleEqual(
149 |             self.tssdict.max_date(), (date(2016, 1, 19), "Long")
150 |         )
151 | 
152 |         tssdict = TssDict()
153 | 
154 |         # none timeseries list
155 |         tssdict["test"] = [
156 |             date(2014, 12, 31) + timedelta(days=i) for i in range(10)
157 |         ]
158 |         tssdict["test1"] = [
159 |             date(2013, 12, 31) + timedelta(days=i) for i in range(10)
160 |         ]
161 |         self.assertRaises(ValueError, tssdict.max_date)
162 | 
163 |     def test_tssdict_longest_ts(self):
164 |         """
165 |         This test tests for the longest timeseries.
166 |         """
167 |         length, key = self.tssdict.longest_ts()
168 | 
169 |         self.assertTupleEqual(
170 |             (length, key), (self.ts_long.tseries.shape[0], "Long")
171 |         )
172 | 
173 |         self.tssdict["test"] = "something else"
174 |         self.assertRaises(ValueError, self.tssdict.longest_ts)
175 | 
176 |     def test_tssdict_shortest_ts(self):
177 |         """
178 |         This test tests for the shortest timeseries.
179 |         """
180 |         length, key = self.tssdict.shortest_ts()
181 | 
182 |         self.assertTupleEqual(
183 |             (length, key), (self.ts_short.tseries.shape[0], "Short")
184 |         )
185 | 
186 |         # zero length
187 |         self.tssdict["nothing"] = Timeseries()
188 |         self.assertIsNone(self.tssdict.shortest_ts())
189 | 
190 |         del self.tssdict["nothing"]
191 | 
192 |         # bad data
193 |         self.tssdict["bad"] = "something else"
194 |         self.assertRaises(ValueError, self.tssdict.shortest_ts)
195 | 
196 |     def test_tssdict_combine(self):
197 |         """
198 |         A batch of tests combining columns to one timeseries.
199 | 
200 |         Tests check to see whether the parameters are passed down properly to
201 |         each timeseries.
202 |         """
203 | 
204 |         # combine(self, discard=True, pad=None)
205 |         ts_new, _ = self.tssdict.combine(discard=True, pad=None)
206 | 
207 |         # shape corresponds to the shortest length
208 |         self.assertEqual(
209 |             ts_new.tseries.shape[0], self.ts_short.tseries.shape[0]
210 |         )
211 | 
212 |         self.assertEqual(ts_new.tseries.shape[1], 3)
213 | 
214 |         # combine(self, discard=False, pad=0)
215 |         ts_new, _ = self.tssdict.combine(discard=False, pad=0)
216 | 
217 |         # shape corresponds to the longest length
218 |         self.assertEqual(
219 |             ts_new.tseries.shape[0], self.ts_long.tseries.shape[0]
220 |         )
221 | 
222 |         self.assertEqual(ts_new.tseries.shape[1], 3)
223 | 
224 |         # test with TssList
225 |         tmp_ts0 = Timeseries()
226 |         tmp_ts0.key = "First"
227 | 
228 |         tmp_ts0.dseries = datetime(2014, 12, 31).toordinal() - np.arange(10)
229 |         tmp_ts0.tseries = np.arange(10)
230 |         tmp_ts0.make_arrays()
231 | 
232 |         tmp_ts1 = Timeseries()
233 |         tmp_ts1.key = "Second"
234 | 
235 |         tmp_ts1.dseries = datetime(2014, 12, 31).toordinal() - np.arange(10)
236 |         tmp_ts1.tseries = np.arange(10)
237 |         tmp_ts1.make_arrays()
238 | 
239 |         tssdict = TssDict(TssList([tmp_ts0, tmp_ts1]))
240 | 
241 |         ts, _ = tssdict.combine()
242 | 
243 |         self.assertTupleEqual(ts.tseries.shape, (10, 2))
244 | 
245 |         # test with TssDict
246 |         tssdict = TssDict(TssDict([tmp_ts0, tmp_ts1]))
247 | 
248 |         ts, _ = tssdict.combine()
249 |         self.assertTupleEqual(ts.tseries.shape, (10, 2))
250 | 
251 |     def test_tssdict_get_values(self):
252 |         """Tests the ability to locate the correct row of data."""
253 | 
254 |         date1 = datetime(2016, 1, 4)  # existing date within date series
255 |         date2 = datetime(2016, 1, 16)  # date falling on a weekend
256 | 
257 |         # get data from existing date
258 |         self.assertTupleEqual(
259 |             self.tssdict.get_values(
260 |                 date=date1, keys=["Main", "Long", "Short"]
261 |             ),
262 |             ((4.0, 4.0, 4.0), ("Main", "Long", "Short")),
263 |         )
264 | 
265 |         # attempt to get data from date not present, with notify
266 |         self.assertRaises(
267 |             ValueError, self.tssdict.get_values, date2, notify=True
268 |         )
269 | 
270 |         # attempt to get data from date not present, no notify
271 |         self.assertTupleEqual(
272 |             self.tssdict.get_values(
273 |                 date=date2, keys=["Main", "Long", "Short"]
274 |             ),
275 |             ((None, 16.0, None), ("Main", "Long", "Short")),
276 |         )
277 | 
278 |     def test_clone(self):
279 |         """Verifies that a copy is made."""
280 |         tssdict = self.tssdict.clone()
281 | 
282 |         # is it a separate object
283 |         for key, ts_new in tssdict.items():
284 |             ts_orig = self.tssdict[key]
285 |             self.assertNotEqual(ts_new, ts_orig)
286 | 
287 |         # do the characteristics match up?
288 |         self.assertEqual(len(tssdict), 3)
289 | 
290 |     def test_to_json(self):
291 |         """
292 |         This function tests sending a TssList to a json format.
293 | 
294 |         Using a cheap assumption that since it is simply a dict, that as long
295 |         as the timeseries are converted, the list is what is needed to check.
296 | 
297 |         More needs to be checked.
298 |         """
299 | 
300 |         json_str = self.tssdict.to_json()
301 | 
302 |         self.assertIsInstance(json.loads(json_str), dict)
303 | 
304 |     def test_from_dict(self):
305 |         """
306 |         This function tests creating a TssDict instance from a dict of timeseries.
307 | 
308 |         The format of the incoming timeseries is to_dict(dt_fmt='str')
309 |         """
310 |         tssdict = TssDict().from_dict(
311 |             {
312 |                 self.ts.key: self.ts.to_dict(dt_fmt="str"),
313 |                 self.ts_long.key: self.ts_long.to_dict(dt_fmt="str"),
314 |                 self.ts_short.key: self.ts_short.to_dict(dt_fmt="str"),
315 |             }
316 |         )
317 |         self.assertListEqual(
318 |             list(self.tssdict.keys()),
319 |             [self.ts.key, self.ts_long.key, self.ts_short.key],
320 |         )
321 | 
322 |     def test_from_json(self):
323 |         """
324 |         This function tests building back a tsslist from json fmt string.
325 | 
326 |         This relies heavily on the test for Timeseries.from_json.
327 |         """
328 |         json_str = self.tssdict.to_json()
329 | 
330 |         tssdict = TssDict()
331 | 
332 |         tssdict.from_json(json_str)
333 | 
334 |         self.assertEqual(len(tssdict), 3)
335 | 
336 |         self.assertTupleEqual(tssdict["Main"].shape(), self.ts.shape())
337 |         self.assertTupleEqual(tssdict["Long"].shape(), self.ts_long.shape())
338 |         self.assertTupleEqual(tssdict["Short"].shape(), self.ts_short.shape())
339 | 
340 |         test = json.dumps(["test"])
341 | 
342 |         self.assertRaises(ValueError, tssdict.from_json, json.dumps(["test"]))
343 | 
344 |     def test_tssdict_do_func(self):
345 |         """Placeholder for future function."""
346 |         pass
347 | 
348 | 
349 | if __name__ == "__main__":
350 |     unittest.main()
351 | 


--------------------------------------------------------------------------------
/tests/test_tsslist.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module tests the TssList class
  3 | """
  4 | import unittest
  5 | 
  6 | from datetime import date, datetime
  7 | import json
  8 | import numpy as np
  9 | 
 10 | from thymus.timeseries import Timeseries
 11 | from thymus.tsslist import TssList
 12 | 
 13 | 
 14 | class TestTssList(unittest.TestCase):
 15 |     """This class tests the class TssList."""
 16 | 
 17 |     def setUp(self):
 18 |         # three timeseries
 19 |         self.ts = Timeseries()
 20 |         self.ts.key = "Test Key"
 21 |         self.ts.columns = ["F1"]
 22 | 
 23 |         start_date = datetime(2015, 12, 31).toordinal()
 24 |         self.ts.dseries = start_date + np.arange(10)
 25 |         self.ts.tseries = np.arange(10)
 26 |         self.ts.make_arrays()
 27 | 
 28 |         # longer timeseries
 29 |         self.ts_long = Timeseries()
 30 |         self.ts.columns = ["F1"]
 31 |         start_date = datetime(2015, 12, 31).toordinal()
 32 |         self.ts_long.dseries = start_date + np.arange(20)
 33 |         self.ts_long.tseries = np.arange(20)
 34 |         self.ts_long.make_arrays()
 35 | 
 36 |         # shorter timeseries  with no columns
 37 |         self.ts_short = Timeseries()
 38 |         start_date = datetime(2015, 12, 31).toordinal()
 39 |         self.ts_short.dseries = start_date + np.arange(5)
 40 |         self.ts_short.tseries = np.arange(5)
 41 |         self.ts_short.make_arrays()
 42 | 
 43 |         self.tss = TssList([self.ts, self.ts_long, self.ts_short])
 44 | 
 45 |     def test_class_init_(self):
 46 |         """Test class initialization."""
 47 | 
 48 |         self.assertEqual(len(self.tss), 3)
 49 | 
 50 |         tss = TssList()
 51 | 
 52 |         self.assertEqual(len(tss), 0)
 53 | 
 54 |         tss.append(Timeseries())
 55 |         tss.append(Timeseries())
 56 |         tss.append(Timeseries())
 57 | 
 58 |         self.assertEqual(len(tss), 3)
 59 | 
 60 |         # initialize with something other than a list
 61 |         # could expand this to verifying the contents of the list
 62 |         self.assertRaises(ValueError, TssList, 3)
 63 | 
 64 |         # from tuple
 65 |         tss = TssList((Timeseries(), Timeseries(), Timeseries()))
 66 | 
 67 |     def test_tsslist_min_date(self):
 68 |         """Tests min date"""
 69 |         self.assertEqual(self.tss.min_date(), self.ts.start_date("datetime"))
 70 | 
 71 |         tmp_ts0 = Timeseries()
 72 | 
 73 |         tmp_ts0.dseries = datetime(2014, 12, 31).toordinal() + np.arange(10)
 74 |         tmp_ts0.tseries = np.arange(10)
 75 |         tmp_ts0.make_arrays()
 76 | 
 77 |         self.tss.append(tmp_ts0)
 78 | 
 79 |         self.assertEqual(self.tss.min_date(), date(2014, 12, 31))
 80 | 
 81 |         tss = TssList()
 82 | 
 83 |         self.assertIsNone(tss.min_date())
 84 | 
 85 |     def test_tsslist_max_date(self):
 86 |         """Tests max date"""
 87 | 
 88 |         self.assertEqual(self.tss.max_date(), date(2016, 1, 19))
 89 | 
 90 |         tmp_ts0 = Timeseries()
 91 | 
 92 |         tmp_ts0.dseries = datetime(2018, 12, 31).toordinal() - np.arange(10)
 93 |         tmp_ts0.tseries = np.arange(10)
 94 |         tmp_ts0.make_arrays()
 95 | 
 96 |         self.tss.append(tmp_ts0)
 97 | 
 98 |         self.assertEqual(self.tss.max_date(), date(2018, 12, 31))
 99 | 
100 |         tss = TssList()
101 | 
102 |         self.assertIsNone(tss.max_date())
103 | 
104 |     def test_tsslist_combine(self):
105 |         """
106 |         A batch of tests combining columns to one timeseries.
107 | 
108 |         Tests check to see whether the parameters are passed down properly to
109 |         each timeseries.
110 |         """
111 | 
112 |         # combine(self, discard=True, pad=None)
113 |         ts_new = self.tss.combine(discard=True, pad=None)
114 | 
115 |         # shape corresponds to the shortest length
116 |         self.assertEqual(
117 |             ts_new.tseries.shape[0], self.ts_short.tseries.shape[0]
118 |         )
119 | 
120 |         self.assertEqual(ts_new.tseries.shape[1], 3)
121 | 
122 |         # combine(self, discard=False, pad=0)
123 |         ts_new = self.tss.combine(discard=False, pad=0)
124 | 
125 |         # shape corresponds to the longest length
126 |         self.assertEqual(
127 |             ts_new.tseries.shape[0], self.ts_long.tseries.shape[0]
128 |         )
129 | 
130 |         self.assertEqual(ts_new.tseries.shape[1], 3)
131 | 
132 |         # test instance of single timeseries in list, should return a clone
133 |         tsslist = TssList([self.ts])
134 | 
135 |         ts_new = tsslist.combine()
136 |         self.assertNotEqual(ts_new, self.ts)
137 |         self.assertListEqual(ts_new.tseries.tolist(), self.ts.tseries.tolist())
138 |         self.assertListEqual(ts_new.dseries.tolist(), self.ts.dseries.tolist())
139 | 
140 |     def test_tsslist_get_values(self):
141 |         """Tests the ability to locate the correct row of data."""
142 | 
143 |         date1 = datetime(2016, 1, 4)  # existing date within date series
144 |         date2 = datetime(2016, 1, 16)  # date falling on a weekend
145 | 
146 |         # get data from existing date
147 |         self.assertTupleEqual(self.tss.get_values(date=date1), (4.0, 4.0, 4.0))
148 | 
149 |         # attempt to get data from date not present, with notify
150 |         self.assertRaises(ValueError, self.tss.get_values, date2, notify=True)
151 | 
152 |         # attempt to get data from date not present, no notify
153 |         self.assertTupleEqual(
154 |             self.tss.get_values(date=date2), (None, 16.0, None)
155 |         )
156 | 
157 |     def test_clone(self):
158 |         """Verifies that a copy is made."""
159 |         tss = self.tss.clone()
160 | 
161 |         # is it a separate object
162 |         for i, ts_new in enumerate(tss):
163 |             ts_orig = self.tss[i]
164 |             self.assertIsInstance(ts_orig, Timeseries)
165 |             self.assertNotEqual(ts_new, ts_orig)
166 | 
167 |         # do the characteristics match up?
168 |         self.assertEqual(len(tss), 3)
169 | 
170 |         ts_orig = self.tss[0]
171 |         ts_copy = tss[0]
172 | 
173 |         self.assertEqual(ts_copy.key, ts_orig.key)
174 |         self.assertEqual(ts_copy.frequency, ts_orig.frequency)
175 |         self.assertTrue(np.array_equal(ts_copy.tseries, ts_orig.tseries))
176 |         self.assertTrue(np.array_equal(ts_copy.dseries, ts_orig.dseries))
177 |         self.assertListEqual(ts_copy.columns, ts_orig.columns)
178 |         self.assertEqual(ts_copy.end_of_period, ts_orig.end_of_period)
179 | 
180 |         ts_orig = self.tss[1]
181 |         ts_copy = tss[1]
182 | 
183 |         self.assertEqual(ts_copy.key, ts_orig.key)
184 |         self.assertEqual(ts_copy.frequency, ts_orig.frequency)
185 |         self.assertTrue(np.array_equal(ts_copy.tseries, ts_orig.tseries))
186 |         self.assertTrue(np.array_equal(ts_copy.dseries, ts_orig.dseries))
187 |         self.assertEqual(ts_copy.columns, ts_orig.columns)
188 |         self.assertEqual(ts_copy.end_of_period, ts_orig.end_of_period)
189 | 
190 |         ts_orig = self.tss[2]
191 |         ts_copy = tss[2]
192 | 
193 |         self.assertEqual(ts_copy.key, ts_orig.key)
194 |         self.assertEqual(ts_copy.frequency, ts_orig.frequency)
195 |         self.assertTrue(np.array_equal(ts_copy.tseries, ts_orig.tseries))
196 |         self.assertTrue(np.array_equal(ts_copy.dseries, ts_orig.dseries))
197 |         self.assertEqual(ts_copy.columns, ts_orig.columns)
198 |         self.assertEqual(ts_copy.end_of_period, ts_orig.end_of_period)
199 | 
200 |     def test_as_dict(self):
201 |         "Can it return a dict from the list?"
202 | 
203 |         self.assertTrue(ValueError, self.tss.as_dict)
204 | 
205 |         test_dict = {}
206 |         for i in range(len(self.tss)):
207 |             ts = self.tss[i]
208 |             ts.key = "key_%i" % (i)
209 |             test_dict[ts.key] = ts
210 | 
211 |         self.assertDictEqual(self.tss.as_dict(), test_dict)
212 | 
213 |         tss = TssList()
214 |         # no key
215 |         ts = Timeseries()
216 |         ts.tseries = np.arange(5)
217 |         ts.dseries = [date.today().toordinal() + i for i in range(5)]
218 |         ts.make_arrays()
219 | 
220 |         tss.append(ts)
221 | 
222 |         self.assertRaises(ValueError, tss.as_dict)
223 | 
224 |     def test_to_json(self):
225 |         """
226 |         This function tests sending a TssList to a json format.
227 | 
228 |         Using a cheap assumption that since it is simply a list, that as long
229 |         as the timeseries are converted, the list is what is needed to check.
230 | 
231 |         More needs to be checked.
232 |         """
233 | 
234 |         json_str = self.tss.to_json()
235 | 
236 |         self.assertIsInstance(json.loads(json_str), list)
237 | 
238 |     def test_from_json(self):
239 |         """
240 |         This function tests building back a tsslist from json fmt string.
241 | 
242 |         This relies heavily on the test for Timeseries.from_json.
243 |         """
244 |         json_str = self.tss.to_json()
245 | 
246 |         tsslist = TssList()
247 | 
248 |         tsslist.from_json(json_str)
249 | 
250 |         self.assertEqual(len(tsslist), 3)
251 | 
252 |         # did it come back in the right order?
253 |         self.assertTupleEqual(tsslist[0].shape(), self.ts.shape())
254 |         self.assertTupleEqual(tsslist[1].shape(), self.ts_long.shape())
255 |         self.assertTupleEqual(tsslist[2].shape(), self.ts_short.shape())
256 | 
257 |     def test_tsslist_do_func(self):
258 |         """Placeholder for future function."""
259 |         pass
260 | 
261 | 
262 | if __name__ == "__main__":
263 |     unittest.main()
264 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py310,py311
 3 | 
 4 | [testenv]
 5 | usedevelop = true
 6 | deps =
 7 |     pytest>=7.4.2
 8 |     pytest-cov>=4.1.0
 9 |     pytest-datadir>=1.3.1
10 |     filetype>=1.0.7
11 |     numpy
12 | commands =
13 |     pytest --cov=src --cov-report=html {posargs}
14 | 
15 | [flake8]
16 | extend-ignore=E203
17 | per-file-ignores=__init__.py:F401
18 | exclude=.tox,venv
19 | 
20 | [testenv:lint]
21 | deps = flake8>=6.0.0
22 | # skip_install = true
23 | commands =
24 |     flake8 . --max-line-length=100 --max-complexity 10
25 | 


--------------------------------------------------------------------------------