├── test
    ├── test_real
    └── experiments
    │   ├── functions
    │       └── files
    │       │   ├── signature.py
    │       │   └── compressed_pickle.py
    │   ├── abstractions
    │       └── search
    │       │   └── general_search.py
    │   └── notebooks
    │       └── File Command Logic.ipynb
├── examples
    ├── sample_env.py
    ├── user_settings.py
    ├── user_handler.py
    └── sample_env_refactor.py
├── jamboree
    ├── base
    │   ├── __init__.py
    │   ├── old
    │   │   └── __init__.py
    │   ├── processors
    │   │   ├── legacy.py
    │   │   ├── __init__.py
    │   │   ├── abstracts
    │   │   │   ├── __init__.py
    │   │   │   ├── search.py
    │   │   │   ├── files.py
    │   │   │   ├── main.py
    │   │   │   ├── legacy.py
    │   │   │   └── event.py
    │   │   ├── search.py
    │   │   ├── main.py
    │   │   └── files.py
    │   ├── core.py
    │   └── handler.py
    ├── storage
    │   ├── __init__.py
    │   ├── files
    │   │   ├── __init__.py
    │   │   ├── redisify
    │   │   │   ├── __init__.py
    │   │   │   └── core.py
    │   │   └── core.py
    │   ├── databases
    │   │   ├── __init__.py
    │   │   ├── database.py
    │   │   └── jmongo.py
    │   └── README.md
    ├── utils
    │   ├── settings.py
    │   ├── support
    │   │   ├── storage
    │   │   │   ├── checksums.py
    │   │   │   ├── __init__.py
    │   │   │   └── cereal.py
    │   │   ├── search
    │   │   │   ├── assistance
    │   │   │   │   ├── cache.py
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── keystore.py
    │   │   │   │   └── inserter.py
    │   │   │   ├── __init__.py
    │   │   │   ├── querying.py
    │   │   │   ├── validation.py
    │   │   │   └── core.py
    │   │   ├── __init__.py
    │   │   └── events
    │   │   │   ├── feature.py
    │   │   │   ├── clock.py
    │   │   │   └── cereal.py
    │   ├── __init__.py
    │   ├── context
    │   │   ├── __init__.py
    │   │   └── main.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── ordefault.py
    │   │   └── fhash.py
    │   └── caches.py
    ├── middleware
    │   ├── __init__.py
    │   ├── procedures
    │   │   ├── management
    │   │   │   ├── __init__.py
    │   │   │   └── strategy.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── README.md
    │   │   │   ├── _flow.py
    │   │   │   ├── _sklearn.py
    │   │   │   ├── _creme.py
    │   │   │   └── _torch.py
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   └── core.py
    │   └── processors
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── resample.py
    ├── handlers
    │   ├── abstracted
    │   │   ├── __init__.py
    │   │   ├── features.py
    │   │   ├── search
    │   │   │   ├── __init__.py
    │   │   │   ├── meta.py
    │   │   │   └── updated.py
    │   │   └── datasets
    │   │   │   ├── __init__.py
    │   │   │   ├── economic.py
    │   │   │   ├── price.py
    │   │   │   └── orderbook.py
    │   ├── complex
    │   │   ├── __init__.py
    │   │   ├── engines
    │   │   │   └── __init__.py
    │   │   ├── backtestable
    │   │   │   ├── default
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── files.py
    │   │   │   │   └── db.py
    │   │   │   ├── __init__.py
    │   │   │   ├── files.py
    │   │   │   └── db.py
    │   │   ├── README.md
    │   │   ├── model.py
    │   │   ├── meta.py
    │   │   └── metric.py
    │   ├── processors
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── resample.py
    │   ├── __init__.py
    │   ├── default
    │   │   ├── __init__.py
    │   │   ├── access.py
    │   │   └── blob.py
    │   └── base.py
    └── __init__.py
├── scripts
    └── search
    │   └── meta_search_handler.py
├── docs
    ├── .DS_Store
    ├── event_sourcing.png
    ├── jamboree_logo.png
    ├── readme
    │   ├── .DS_Store
    │   ├── How Jamboree Works.md
    │   └── Insert No Duplicates.md
    ├── imgs
    │   ├── event_sourcing.png
    │   ├── jamboree_logo.png
    │   ├── jamboree_long.png
    │   ├── event-sourcing_long.png
    │   └── jamboree-long-new.png
    ├── redis_event_source.png
    ├── redis_mongo_layer.png
    ├── Event Source Redis Key System.png
    ├── JIP
    │   └── SchemaDesign.md
    └── notebooks
    │   ├── Untitled.ipynb
    │   └── Verification.ipynb
├── .dockerignore
├── PKG-INFO
├── pyproject.toml
├── .gitignore
├── setup.py
└── README.md


/test/test_real:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/sample_env.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jamboree/base/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jamboree/storage/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jamboree/utils/settings.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jamboree/base/old/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jamboree/middleware/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jamboree/base/processors/legacy.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jamboree/handlers/abstracted/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jamboree/handlers/abstracted/features.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jamboree/utils/support/storage/checksums.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jamboree/handlers/complex/__init__.py:
--------------------------------------------------------------------------------
1 | from .meta import MetaHandler


--------------------------------------------------------------------------------
/jamboree/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .caches import memoized_method, omit


--------------------------------------------------------------------------------
/scripts/search/meta_search_handler.py:
--------------------------------------------------------------------------------
1 | 
2 | def main():
3 |     pass


--------------------------------------------------------------------------------
/jamboree/storage/files/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import FileStorageConnection


--------------------------------------------------------------------------------
/jamboree/handlers/complex/engines/__init__.py:
--------------------------------------------------------------------------------
1 | from .files import FileEngine


--------------------------------------------------------------------------------
/jamboree/utils/support/search/assistance/cache.py:
--------------------------------------------------------------------------------
1 | """
2 |     Cache
3 | """


--------------------------------------------------------------------------------
/docs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/.DS_Store


--------------------------------------------------------------------------------
/jamboree/utils/support/search/assistance/__init__.py:
--------------------------------------------------------------------------------
1 | from .keystore import Keystore


--------------------------------------------------------------------------------
/jamboree/utils/support/__init__.py:
--------------------------------------------------------------------------------
1 | from .storage.cereal import serialize, deserialize


--------------------------------------------------------------------------------
/jamboree/utils/support/events/feature.py:
--------------------------------------------------------------------------------
1 | """
2 | Here for feature conversions
3 | """


--------------------------------------------------------------------------------
/jamboree/utils/support/storage/__init__.py:
--------------------------------------------------------------------------------
1 | from .cereal import serialize, deserialize


--------------------------------------------------------------------------------
/docs/event_sourcing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/event_sourcing.png


--------------------------------------------------------------------------------
/docs/jamboree_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/jamboree_logo.png


--------------------------------------------------------------------------------
/docs/readme/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/readme/.DS_Store


--------------------------------------------------------------------------------
/jamboree/middleware/procedures/management/__init__.py:
--------------------------------------------------------------------------------
1 | # from .models import ModelProcedureManagement


--------------------------------------------------------------------------------
/jamboree/middleware/procedures/management/strategy.py:
--------------------------------------------------------------------------------
1 | """
2 |     Strategy procedure managements
3 | """


--------------------------------------------------------------------------------
/jamboree/storage/files/redisify/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import RedisFileProcessor, RedisFileConnection


--------------------------------------------------------------------------------
/docs/imgs/event_sourcing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/imgs/event_sourcing.png


--------------------------------------------------------------------------------
/docs/imgs/jamboree_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/imgs/jamboree_logo.png


--------------------------------------------------------------------------------
/docs/imgs/jamboree_long.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/imgs/jamboree_long.png


--------------------------------------------------------------------------------
/docs/redis_event_source.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/redis_event_source.png


--------------------------------------------------------------------------------
/docs/redis_mongo_layer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/redis_mongo_layer.png


--------------------------------------------------------------------------------
/jamboree/utils/context/__init__.py:
--------------------------------------------------------------------------------
1 | from .main import example_space, timecontext, watch_loop, watch_loop_callback


--------------------------------------------------------------------------------
/docs/imgs/event-sourcing_long.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/imgs/event-sourcing_long.png


--------------------------------------------------------------------------------
/docs/imgs/jamboree-long-new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/imgs/jamboree-long-new.png


--------------------------------------------------------------------------------
/jamboree/handlers/processors/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import DataProcessorsAbstract
2 | from .resample import DynamicResample


--------------------------------------------------------------------------------
/test/experiments/functions/files/signature.py:
--------------------------------------------------------------------------------
1 | """
2 |     Create a signature for a complex data type
3 | """
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/Event Source Redis Key System.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/Event Source Redis Key System.png


--------------------------------------------------------------------------------
/jamboree/middleware/processors/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import DataProcessorsAbstract
2 | from .resample import DynamicResample


--------------------------------------------------------------------------------
/jamboree/utils/support/events/clock.py:
--------------------------------------------------------------------------------
1 | """
2 |     # Clock
3 | 
4 |     - All of the time functions you need for zsets
5 | """


--------------------------------------------------------------------------------
/test/experiments/abstractions/search/general_search.py:
--------------------------------------------------------------------------------
1 | class GeneralSearch(object):
2 |     def __init__(self):
3 |         pass


--------------------------------------------------------------------------------
/jamboree/handlers/abstracted/search/__init__.py:
--------------------------------------------------------------------------------
1 | from .updated import ParameterizedSearch
2 | from .meta import MetadataSearchHandler


--------------------------------------------------------------------------------
/jamboree/handlers/complex/backtestable/default/__init__.py:
--------------------------------------------------------------------------------
1 | from .db import BacktestDBHandler
2 | from .files import BlobStorageHandler


--------------------------------------------------------------------------------
/jamboree/utils/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .fhash import consistent_hash, consistent_unhash, omit
2 | from .ordefault import dict_validation


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 | .gitignore
3 | LICENSE
4 | VERSION
5 | Changelog.md
6 | Makefile
7 | docker-compose.yml
8 | .gitlab-ci.yml
9 | README.*


--------------------------------------------------------------------------------
/jamboree/handlers/complex/backtestable/__init__.py:
--------------------------------------------------------------------------------
1 | from .default.db import BacktestDBHandler
2 | from .default.files import BacktestBlobHandler


--------------------------------------------------------------------------------
/jamboree/handlers/abstracted/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .economic import EconomicData
2 | from .price import PriceData
3 | from .orderbook import OrderbookData


--------------------------------------------------------------------------------
/jamboree/handlers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseHandler
2 | from .default import DBHandler, TimeHandler, DataHandler, MultiDataManagement 
3 | # from .complex.model import ModelEngine


--------------------------------------------------------------------------------
/jamboree/base/processors/__init__.py:
--------------------------------------------------------------------------------
1 | from .abstracts.legacy import LegacyProcessor
2 | from .abstracts.event import EventProcessor
3 | from .abstracts.files import FileProcessor
4 | from .abstracts.search import SearchProcessor


--------------------------------------------------------------------------------
/jamboree/base/processors/abstracts/__init__.py:
--------------------------------------------------------------------------------
1 | from .legacy import LegacyProcessor
2 | from .event import EventProcessor
3 | from .files import FileProcessor
4 | from .search import SearchProcessor
5 | from .main import Processor


--------------------------------------------------------------------------------
/jamboree/base/processors/abstracts/search.py:
--------------------------------------------------------------------------------
1 | from abc import ABC
2 | from typing import List
3 | 
4 | class SearchProcessor(ABC):
5 |     """ All of the common search queries will go here """
6 |     def search(self):
7 |         pass


--------------------------------------------------------------------------------
/jamboree/middleware/procedures/models/__init__.py:
--------------------------------------------------------------------------------
1 | from ._creme import CremeProcedure
2 | from ._sklearn import SklearnProcedure, CustomSklearnGaussianProcedure
3 | from ._torch import TorchProcedure
4 | from ._flow import TFKerasProcedure


--------------------------------------------------------------------------------
/jamboree/handlers/default/__init__.py:
--------------------------------------------------------------------------------
1 | from .db import DBHandler
2 | from .access import Access
3 | from .time import TimeHandler
4 | from .data import DataHandler
5 | from .multi import MultiDataManagement
6 | from .blob import BlobStorageHandler
7 | 


--------------------------------------------------------------------------------
/jamboree/storage/databases/__init__.py:
--------------------------------------------------------------------------------
1 | from .database import DatabaseConnection
2 | from .jmongo import MongoDatabaseConnection
3 | from .jredis import RedisDatabaseConnection
4 | from .jredis_zset import RedisDatabaseZSetsConnection as ZRedisDatabaseConnection 


--------------------------------------------------------------------------------
/jamboree/middleware/procedures/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import ProcedureAbstract, ProcedureManagement, ModelProcedureAbstract 
2 | # from .models import CremeProcedure, SklearnProcedure, TFKerasProcedure, TorchProcedure
3 | # from .management import ModelProcedureManagement


--------------------------------------------------------------------------------
/jamboree/__init__.py:
--------------------------------------------------------------------------------
1 | from .base.core import Jamboree
2 | from .base.processors.main import Jamboree as JamboreeNew
3 | from .handlers.base import BaseHandler
4 | from .handlers.default.db import DBHandler
5 | from .handlers.default.data import DataHandler
6 | from .handlers.default.time import TimeHandler


--------------------------------------------------------------------------------
/jamboree/handlers/abstracted/datasets/economic.py:
--------------------------------------------------------------------------------
1 | from jamboree.handlers.default import DataHandler
2 | 
3 | class EconomicData(DataHandler):
4 |     """
5 |         # Economic Data
6 |         
7 |         A way to browse economic data. Is an extension of DataHandler and includes basic searches.
8 |     """
9 |     pass


--------------------------------------------------------------------------------
/jamboree/utils/core/ordefault.py:
--------------------------------------------------------------------------------
1 | def dict_validation(obj:dict) -> bool:
2 |     obj_keys = list(obj.keys())
3 |     for x in ['subcategories', 'entity', 'submetatype', 'name', 'metatype', 'category', "abbreviation"]:
4 |         if x not in obj_keys:
5 |             return False
6 |     return True
7 | 
8 | def default(obj):
9 |     pass


--------------------------------------------------------------------------------
/jamboree/utils/support/search/__init__.py:
--------------------------------------------------------------------------------
1 | from .validation import is_nested, is_gen_type, name_match, is_generic, is_geo, to_str, to_field, is_queryable_dict
2 | from .validation import is_valid_geo, is_valid_bool, is_valid_numeric, is_valid_tags, is_valid_text
3 | from .builders import InsertBuilder, QueryBuilder
4 | from .core import BaseSearchHandlerSupport


--------------------------------------------------------------------------------
/jamboree/utils/support/events/cereal.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     # Serialization commands
 3 | 
 4 |     JSON serialization functions specically tailored to the events segment of the code base
 5 | """
 6 | 
 7 | def single_one():
 8 |     pass
 9 | 
10 | def bulk_serialize():
11 |     pass
12 | 
13 | def bulk_unserialize():
14 |     pass
15 | 
16 | def serialize_df():
17 |     pass


--------------------------------------------------------------------------------
/jamboree/base/processors/search.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from typing import List
 3 | 
 4 | class SearchProcessor(ABC):
 5 |     """ All of the common search queries will go here """
 6 |     def search(self):
 7 |         pass
 8 | 
 9 | 
10 |     def search_specific(self):
11 |         pass
12 | 
13 |     def search_all(self):
14 |         """ Search all of the tags"""
15 |         pass


--------------------------------------------------------------------------------
/docs/JIP/SchemaDesign.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | class RequirementsSchema(Schema):
 3 |     __metadata__ = DescriptionObject(**parameters)
 4 | 
 5 |     field = FieldType(name,**parameters) # parameters here describe how the data will be used
 6 |     field = FieldType(**parameters)
 7 |     field = FieldType(**parameters)
 8 |     field = FieldType(**parameters)
 9 |     field = FieldType(**parameters)
10 | ```


--------------------------------------------------------------------------------
/jamboree/utils/support/storage/cereal.py:
--------------------------------------------------------------------------------
 1 | import lz4.frame
 2 | import dill
 3 | 
 4 | """ 
 5 |     # COMPRESSED SERIALIZATION LIBRARY
 6 | 
 7 |     Simply compress and serialize
 8 | """
 9 | 
10 | def serialize(obj):
11 |     """ Should take a complex object and pickle it"""
12 |     pickled = dill.dumps(obj, byref=False)
13 |     compressed = lz4.frame.compress(pickled)
14 |     return compressed
15 | 
16 | def deserialize(obj):
17 |     """ Should take a serialized object and pickle"""
18 |     decompressed = lz4.frame.decompress(obj)
19 |     unpickled = dill.loads(decompressed)
20 |     return unpickled


--------------------------------------------------------------------------------
/jamboree/middleware/processors/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from typing import Any
 3 | 
 4 | 
 5 | class DataProcessorsAbstract(ABC):
 6 |     """ DataProcessor is used to """
 7 |     def __init__(self, name, **kwargs):
 8 |         self._name = name
 9 |         self.set_settings(**kwargs)
10 | 
11 |     def set_settings(self, **kwargs):
12 |         raise NotImplementedError(
13 |             "Need to set the settings you're expecting for this preprocessor"
14 |         )
15 |     
16 |     def process(self, data:Any) -> Any:
17 |         raise NotImplementedError(
18 |             "A command to preprocess information and return that info."
19 |         )
20 | 
21 | 


--------------------------------------------------------------------------------
/jamboree/middleware/procedures/models/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Interaction Procedures
 2 | 
 3 | 
 4 | Forces all ML libraries to be accessed using the exact same sklearn like API.
 5 | 
 6 | 
 7 | * `predict(X, **params)`
 8 | * `pred_proba(X, y, **params)`
 9 | * `fit(X, y, **params)`
10 | * `partial_fit(X, y, **params)`
11 | * `adjust(X, y, **params)`
12 | * `.metrics` - Gets the metrics of the model. `X` and `y` are supposed to be taken from the adjust column.
13 | * `get_params()`
14 | * `set_params(**params)` 
15 | * `extract()` - Gets the full model in a storable format
16 | 
17 | Will need to run through some walk-forward testing examples after the backtest is done.


--------------------------------------------------------------------------------
/jamboree/handlers/processors/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from typing import Any
 3 | 
 4 | 
 5 | class DataProcessorsAbstract(ABC):
 6 |     """ DataProcessor is used to transform"""
 7 |     def __init__(self, name, **kwargs):
 8 |         self._name = name
 9 |         self.set_settings(**kwargs)
10 | 
11 |     def set_settings(self, **kwargs):
12 |         raise NotImplementedError(
13 |             "Need to set the settings you're expecting for this preprocessor"
14 |         )
15 |     
16 |     def process(self, data:Any) -> Any:
17 |         raise NotImplementedError(
18 |             "A command to preprocess information and return that info."
19 |         )
20 | 
21 | 


--------------------------------------------------------------------------------
/jamboree/utils/core/fhash.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import ujson
 3 | import orjson
 4 | from cytoolz import keyfilter
 5 | 
 6 | 
 7 | def consistent_hash(query: dict) -> str:
 8 |     _hash = ujson.dumps(query, sort_keys=True)
 9 |     _hash = base64.b64encode(str.encode(_hash))
10 |     _hash = _hash.decode('utf-8')
11 |     return _hash
12 | 
13 | def consistent_unhash(_hash:str) -> str:
14 |     """ Take a consistent hash (sorted) and turn it back into a dictionary"""
15 |     decoded_hash = base64.b64decode(_hash).decode('utf-8')
16 |     _hash_dict = ujson.loads(decoded_hash)
17 |     return _hash_dict
18 | 
19 | 
20 | def omit(blacklist, d):
21 |     return keyfilter(lambda k: k not in blacklist, d)
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/test/experiments/functions/files/compressed_pickle.py:
--------------------------------------------------------------------------------
 1 | import cloudpickle as clp
 2 | import lz4.frame
 3 | 
 4 | 
 5 | def serialize(obj):
 6 |     """ Should take a complex object and pickle it"""
 7 |     pickled = clp.dumps(obj)
 8 |     compressed = lz4.frame.compress(pickled)
 9 |     return compressed
10 | 
11 | def deserialize(obj):
12 |     """ Should take a serialized object and pickle"""
13 |     decompressed = lz4.frame.decompress(obj)
14 |     unpickled = clp.loads(decompressed)
15 |     return unpickled
16 | 
17 | class SampleObject(object):
18 |     def __init__(self) -> None:
19 |         self.one = "IAHSUALKS"
20 |         self.two = "AYVUKASAVS"
21 | 
22 | def main():
23 |     sample = SampleObject()
24 |     ssample = serialize(sample)
25 |     dsample = deserialize(ssample)
26 |     assert sample.one == dsample.one
27 | 
28 | if __name__ == "__main__":
29 |     main()


--------------------------------------------------------------------------------
/jamboree/storage/README.md:
--------------------------------------------------------------------------------
 1 | # Storage Models
 2 | 
 3 | The storage model will present a common interface for all common queries and write commands. They will be separated into two parts:
 4 | 
 5 | 1. Files (TBA)
 6 |    * This will be for everything related to file management. The central idea behind it is that we'll be able to store gigabyte to terrabyte sized files into cloud platforms, such as S3 & DataLake. 
 7 |    * We'll also have procedures to store information into memory, such as redis. We'll split the files at a higher level so they can be better handled.
 8 | 2. Database Connection. 
 9 |    * Since the main Jamboree object is starting to become bloated, the main goal here is to create something that would allow us to run through different datastores with little to no problem. 
10 |    * We're starting with `mongodb` and `redis`, but with the abstracts available we'll be able to move into other data stores as well.


--------------------------------------------------------------------------------
/jamboree/utils/caches.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import weakref
 3 | 
 4 | from cytoolz import keyfilter
 5 | 
 6 | 
 7 | def omit(blacklist, d):
 8 |     return keyfilter(lambda k: k not in blacklist, d)
 9 | 
10 | def memoized_method(*lru_args, **lru_kwargs):
11 |     def decorator(func):
12 |         @functools.wraps(func)
13 |         def wrapped_func(self, *args, **kwargs):
14 |             # We're storing the wrapped method inside the instance. If we had
15 |             # a strong reference to self the instance would never die.
16 |             self_weak = weakref.ref(self)
17 |             @functools.wraps(func)
18 |             @functools.lru_cache(*lru_args, **lru_kwargs)
19 |             def cached_method(*args, **kwargs):
20 |                 return func(self_weak(), *args, **kwargs)
21 |             setattr(self, func.__name__, cached_method)
22 |             return cached_method(*args, **kwargs)
23 |         return wrapped_func
24 |     return decorator
25 | 


--------------------------------------------------------------------------------
/jamboree/base/processors/main.py:
--------------------------------------------------------------------------------
 1 | from redis import Redis
 2 | from jamboree.base.processors.abstracts import Processor
 3 | from jamboree.base.processors.event import JamboreeEvents
 4 | from jamboree.base.processors.files import JamboreeFileProcessor
 5 | class Jamboree(Processor):
 6 |     def __init__(self, **kwargs) -> None:
 7 |         super().__init__()
 8 | 
 9 |         redis_host = kwargs.get("REDIS_HOST", "localhost")
10 |         redis_port = int(kwargs.get("REDIS_PORT", "6379"))
11 |         mongo_host = kwargs.get("MONGO_HOST", "localhost")
12 |         rconn = Redis(host=redis_host, port=redis_port)
13 |         # redis.Redis(redis_host, port=redis_port)
14 | 
15 |         self.event = JamboreeEvents(
16 |             mongodb_host=mongo_host,
17 |             redis_host=redis_host,
18 |             redis_port=redis_port
19 |         )
20 | 
21 |         # Set the files management here
22 |         self.storage = JamboreeFileProcessor()
23 |         self.storage.rconn = rconn
24 |         self.event.rconn = rconn
25 |         self.event.initialize()
26 |         self.storage.initialize()
27 |         


--------------------------------------------------------------------------------
/jamboree/base/core.py:
--------------------------------------------------------------------------------
 1 | from redis import Redis
 2 | from jamboree.base.processors.abstracts import Processor
 3 | from jamboree.base.processors.event import JamboreeEvents
 4 | from jamboree.base.processors.files import JamboreeFileProcessor
 5 | 
 6 | class Jamboree(Processor):
 7 |     def __init__(self, **kwargs) -> None:
 8 |         super().__init__()
 9 | 
10 |         redis_host = kwargs.get("REDIS_HOST", "localhost")
11 |         redis_port = int(kwargs.get("REDIS_PORT", "6379"))
12 |         mongo_host = kwargs.get("MONGO_HOST", "localhost")
13 |         rconn = Redis(host=redis_host, port=redis_port)
14 |         # redis.Redis(redis_host, port=redis_port)
15 | 
16 |         self.event = JamboreeEvents(
17 |             mongodb_host=mongo_host,
18 |             redis_host=redis_host,
19 |             redis_port=redis_port
20 |         )
21 | 
22 |         # Set the files management here
23 |         self.storage = JamboreeFileProcessor()
24 |         self.storage.rconn = rconn
25 |         self.event.rconn = rconn
26 |         self.event.initialize()
27 |         self.storage.initialize()
28 |         self.rconn = rconn


--------------------------------------------------------------------------------
/jamboree/handlers/complex/README.md:
--------------------------------------------------------------------------------
 1 | # Complex Handlers
 2 | 
 3 | Complex handlers have multiple types included inside that need to be synced. They're more common inside of Linkkt's proprietry systems. These will be systems we'll leave a bit more exposed. A good set of examples:
 4 | 
 5 | 1. MetaHandler
 6 |    1. Will keep track of all metadata for our system
 7 |    2. Metadata will be overwritable records and also include the future search handler
 8 |    3. The search handler will help us find associated data
 9 | 2. Metric
10 |    1. The metric handler will extend from the DBhandler, through it'll also 
11 |       1. Searchable by including our MetaHandler 
12 |       2. Attach our TimeHandler so we can dynamically backtest and see a model perform over time
13 | 3. ModelHandler
14 |    1. Will extend from the BlobHandler
15 |    2. It'll also include a `MetricHandler` and `MetaHandler`
16 |    3. The `MetricHandler` will allow us to monitor predictions are progressing over time
17 |       1. Since this has a `TimeHandler` included we'll be able to track metrics for a given model
18 |       2. Since this also has its own designated `MetaHandler` we'll be able to search for a model's effectiveness over time from a different system.
19 |    4. The `MetaHandler` will also help us find the model later


--------------------------------------------------------------------------------
/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 2.1
 2 | Name: jamboree
 3 | Version: 0.5.2
 4 | Summary: A multi-layer event sourcing and general data library
 5 | Author: Kevin Hill
 6 | Author-email: kah.kevin.hill@gmail.com
 7 | Requires-Python: >=3.7,<4.0
 8 | Classifier: Programming Language :: Python :: 3
 9 | Classifier: Programming Language :: Python :: 3.7
10 | Classifier: Programming Language :: Python :: 3.8
11 | Requires-Dist: addict (>=2.2.1,<3.0.0)
12 | Requires-Dist: crayons (>=0.3.0,<0.4.0)
13 | Requires-Dist: cytoolz (>=0.10.1,<0.11.0)
14 | Requires-Dist: dill (>=0.3.1,<0.4.0)
15 | Requires-Dist: funtime (>=0.4.7,<0.5.0)
16 | Requires-Dist: gym (>=0.17.1,<0.18.0)
17 | Requires-Dist: json-tricks (>=3.14.0,<4.0.0)
18 | Requires-Dist: loguru (>=0.4.1,<0.5.0)
19 | Requires-Dist: lz4 (>=3.0.2,<4.0.0)
20 | Requires-Dist: maya (>=0.6.1,<0.7.0)
21 | Requires-Dist: numpy
22 | Requires-Dist: pandas (>=1.0.3,<2.0.0)
23 | Requires-Dist: pandas_datareader (>=0.8.1,<0.9.0)
24 | Requires-Dist: pebble (>=4.5.1,<5.0.0)
25 | Requires-Dist: pytest (>=5.4.1,<6.0.0)
26 | Requires-Dist: redis (==3.3.11)
27 | Requires-Dist: sklearn (>=0.0,<0.1)
28 | Requires-Dist: torch (>=1.4.0,<2.0.0)
29 | Requires-Dist: torchvision (>=0.5.0,<0.6.0)
30 | Requires-Dist: ujson (>=2.0.2,<3.0.0)
31 | Requires-Dist: version_query (>=1.1.0,<2.0.0)
32 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "jamboree"
 3 | version = "0.8.8"
 4 | description = "A multi-layer event sourcing and general data library"
 5 | authors = ["Kevin Hill <kah.kevin.hill@gmail.com>"]
 6 | 
 7 | [tool.poetry.dependencies]
 8 | python = "^3.6.8"
 9 | numpy = "pandas"
10 | pandas = "^1.0.3"
11 | loguru = "^0.4.1"
12 | pebble = "^4.5.1"
13 | maya = "^0.6.1"
14 | ujson = "^2.0.2"
15 | gym = "^0.17.1"
16 | lz4 = "^3.0.2"
17 | cytoolz = "^0.10.1"
18 | pytest = "^5.4.1"
19 | addict = "^2.2.1"
20 | version_query = "^1.1.0"
21 | redis = "3.3.11"
22 | pandas_datareader = "^0.8.1"
23 | dill = "^0.3.1"
24 | json-tricks = "^3.14.0"
25 | sklearn = "^0.0"
26 | crayons = "^0.3.0"
27 | skorch = "^0.7.0"
28 | creme = "^0.5.0"
29 | jupyter = "^1.0.0"
30 | cerberus = "^1.3.2"
31 | hiredis = "^1.0.1"
32 | eliot = "^1.12.0"
33 | eliot-tree = "^19.0.1"
34 | yfinance = "^0.1.54"
35 | anycache = "^2.0.7"
36 | tqdm = "^4.45.0"
37 | orjson = "^3.0.2"
38 | pandas-datareader = "^0.8.1"
39 | pydantic = "^1.5.1"
40 | redisearch = "^0.9.0"
41 | matplotlib = "^3.2.1"
42 | pillow = "^7.2.0"
43 | 
44 | 
45 | 
46 | [tool.poetry.dev-dependencies]
47 | pylint = "^2.5.2"
48 | black = {version = "^19.10b0", allow-prereleases = true}
49 | flake8 = "^3.8.3"
50 | mypy = "^0.782"
51 | yapf = "^0.30.0"
52 | [build-system]
53 | requires = ["poetry>=0.12"]
54 | build-backend = "poetry.masonry.api"
55 | 


--------------------------------------------------------------------------------
/jamboree/middleware/procedures/README.md:
--------------------------------------------------------------------------------
 1 | # Procedures
 2 | 
 3 | Procedures are abstracts that we use to call things in consistent ways. The core example is a model procedure. Multiple different models are called in multiple different ways yet they can look extremely consistent on the surface. For example, calling a `fit` and `partial_fit` can look the same for all major machine learning libraries. 
 4 | 
 5 | Let's compare using `sklearn` and `creme-ml` for a basic fit example.
 6 | 
 7 | 
 8 | ```py
 9 | class SKLearnProcedure(object):
10 |     def __init__(self, *args, **kwargs):
11 |         pass
12 |     
13 |     def partial_fit(self, data):
14 |         pass
15 | 
16 |     def fit(self, data):
17 |         pass
18 | 
19 | 
20 | 
21 | class CremeProcedure(object):
22 |     def __init__(self, *args, **kwargs):
23 |         pass
24 |     
25 |     def partial_fit(self, data):
26 |         # All steps goes here
27 |         pass
28 | 
29 |     def fit(self, data):
30 |         # All steps goes here
31 |         pass
32 | ```
33 | 
34 | 
35 | We can call the exact same calls using the exact same data and get exactly what we need.
36 | 
37 | 
38 | ```py
39 | data = pd.DataFrame()
40 | 
41 | creme_model = CremeProcedure()
42 | sklearn_model = SKlearnProcedure()
43 | 
44 | 
45 | sklearn_model.fit(data)
46 | creme_model.fit(data)
47 | ```
48 | 
49 | Each one of these models will have procedures to handle what's inputted into them.


--------------------------------------------------------------------------------
/jamboree/base/processors/abstracts/files.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from typing import List
 3 | 
 4 | class FileProcessor(ABC):
 5 |     """ 
 6 |         # File processor abstract. 
 7 |         
 8 |         Deals with all blobs and files..
 9 |     """
10 |     
11 |     def initialize(self):
12 |         pass
13 | 
14 |     def save(self, query: dict, data: dict, **kwargs):
15 |         """ Save a single blob of data. """
16 |         raise NotImplementedError
17 | 
18 |     def save_version(self, query, **kwargs):
19 |         """ Save a single blob of data at a given version. """
20 |         raise NotADirectoryError
21 | 
22 |     def query(self, query, **kwargs):
23 |         """ Query a blob of data. Get the latest """
24 |         raise NotImplementedError
25 | 
26 |     def query_version(self, query, **kwargs):
27 |         """ Save an explicit version of data """
28 |         raise NotImplementedError
29 | 
30 |     def delete(self, query, **kwargs):
31 |         """ Delete the latest version of data """
32 |         raise NotImplementedError
33 |     
34 |     def delete_version(self, query:dict, **kwargs):
35 |         """ Delete a given version of data if it exist """
36 |         raise NotImplementedError
37 | 
38 |     def delete_all(self, query:dict, **kwargs):
39 |         """ Purge everything """
40 |         raise NotImplementedError
41 |     
42 |     def absolute_exists(self, query:dict, **kwargs):
43 |         raise NotImplementedError


--------------------------------------------------------------------------------
/jamboree/handlers/abstracted/search/meta.py:
--------------------------------------------------------------------------------
 1 | from jamboree.handlers.abstracted.search import ParameterizedSearch
 2 | from jamboree.handlers.default.search import BaseSearchHandler
 3 | 
 4 | 
 5 | class MetadataSearchHandler(ParameterizedSearch):
 6 |     """
 7 |         # 10 metatypes
 8 | 
 9 |         Metatypes are
10 |         
11 |         1. Strategy
12 |         2. Data
13 |         3. Model
14 |         4. Meta
15 | 
16 |     """
17 |     def __init__(self):
18 |         super().__init__()
19 |         self.entity = "metadata"
20 |         self.dreq = {
21 |             "name": str,
22 |             # The type of metadata we're positioning
23 |             # strategy, data, model, metainfo (metadata about metadata) are the clear items in mind
24 |             "metatype": str,
25 |             # another identifiable metatype to narrow down results
26 |             # pricing(data), economic, weather (data), social(data), political (data), features (data)
27 |             # batch (models), online (models), micro (strategies), macro (strategies), supporting_group (complex)
28 |             "submetatype": str,
29 |             "category": str,
30 |             "subcategories": dict,
31 |             "description": str,
32 |             "info": dict,
33 |             # The location about the information involved
34 |             "location": "GEO",
35 |             "abbreviation": str
36 |         }
37 |         self.must_have = ['name', 'metatype', 'category', 'submetatype', 'abbreviation']
38 |     
39 |     


--------------------------------------------------------------------------------
/jamboree/utils/support/search/assistance/keystore.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Temporarily stores all keys that we'll possibly be using later.
 3 | 
 4 |     Entirely used to get the subdocuments by id. 
 5 |     Store all subdocuments by super_id
 6 | 
 7 | 
 8 | """
 9 | 
10 | from addict import Dict
11 | 
12 | class Keystore(object):
13 |     def __init__(self):
14 |         self.store = Dict()
15 |     
16 | 
17 |     def add_by_superid(self, superid:str, key:str, _dict:dict):
18 |         # _dict.pop("super_id", None) 
19 |         _dict.pop("id", None)
20 |         _dict.pop("payload", None)
21 | 
22 |         item = {
23 |             key: _dict
24 |         }
25 |         super_item = {
26 |             str(superid): item
27 |         }
28 |         self.store.update(super_item)
29 |     
30 |     def get_by_superid(self, superid:str):
31 |         if superid in self.store:
32 |             return self.store[superid]
33 |         return {}
34 |     
35 | 
36 |     def add(self, superid:str, key:str, _dict:dict):
37 |         _dict.pop("super_id", None) 
38 |         _dict.pop("id", None)
39 |         _dict.pop("payload", None)
40 | 
41 |         item = {
42 |             key: _dict
43 |         }
44 |         super_item = {
45 |             str(superid): item
46 |         }
47 |         self.store.update(super_item)
48 |     
49 |     def get(self, superid:str):
50 |         if superid in self.store:
51 |             return self.store[superid]
52 |         return {}
53 |     
54 |     def reset(self):
55 |         self.store = Dict()


--------------------------------------------------------------------------------
/jamboree/base/processors/abstracts/main.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from typing import List, Optional
 3 | from jamboree.base.processors.abstracts import EventProcessor, FileProcessor, SearchProcessor
 4 | 
 5 | 
 6 | class Processor(ABC):
 7 |     """ Use to allow for multiple items"""
 8 |     def __init__(self):
 9 |         self._event:Optional[EventProcessor] = None
10 |         self._storage:Optional[FileProcessor] = None
11 |         self._search:Optional[SearchProcessor] = None
12 | 
13 |     @property
14 |     def event(self) -> EventProcessor:
15 |         if not isinstance(self._event, EventProcessor):
16 |             raise AttributeError("EventProcessor not added yet ... ")
17 |         return self._event
18 |     
19 |     @event.setter
20 |     def event(self, _event:EventProcessor) -> EventProcessor:
21 |         self._event = _event
22 |     
23 | 
24 |     @property
25 |     def storage(self) -> FileProcessor:
26 |         if not isinstance(self._storage, FileProcessor):
27 |             raise AttributeError("FileProcessor not added yet ... ")
28 |         return self._storage
29 |     
30 |     @storage.setter
31 |     def storage(self, _storage:FileProcessor):
32 |         self._storage = _storage
33 | 
34 |     @property
35 |     def search(self) -> SearchProcessor:
36 |         if not isinstance(self._search, SearchProcessor):
37 |             raise AttributeError("SearchProcessor not added yet ... ")
38 |         return self._search
39 |     
40 |     def search(self, _search:SearchProcessor):
41 |         self._search = self.search


--------------------------------------------------------------------------------
/jamboree/utils/support/search/querying.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | class text(object):
 3 |     @staticmethod
 4 |     def exact(term):
 5 |         return {
 6 |             "type": "TEXT",
 7 |             "is_filter": True,
 8 |             "values": {
 9 |                 "term": term,
10 |                 "is_exact": True
11 |             }
12 |         }
13 |     
14 |     @staticmethod
15 |     def fuzzy(term):
16 |         return {
17 |             "type": "TEXT",
18 |             "is_filter": True,
19 |             "values": {
20 |                 "term": f"%{term}%",
21 |                 "is_exact": False
22 |             }
23 |         }
24 |     
25 |     @staticmethod
26 |     def orlist(terms:List[str], is_bundle=False):
27 |         _term = text.orliststr(terms, is_bundle)
28 |         return {
29 |             "type": "TEXT",
30 |             "is_filter": True,
31 |             "values": {
32 |                 "term": _term,
33 |                 "is_exact": False
34 |             }
35 |         }
36 | 
37 |     @staticmethod
38 |     def orliststr(terms:List[str], is_bundle=False):
39 |         if len(terms) == 0:
40 |             return ""
41 |         _term = "|".join(terms)
42 |         if is_bundle:
43 |             _temp = f"({_term})"
44 |             _term = _temp
45 |         return _term
46 |     
47 | 
48 | class tags(object):
49 |     
50 |     @staticmethod
51 |     def andfieldstr(field, items:List[str]):
52 |         if len(items) == 0:
53 |             return ""
54 |         
55 |         and_fields_str = [f"{field}:{item} " for item in items]
56 |         return and_fields_str


--------------------------------------------------------------------------------
/jamboree/middleware/procedures/models/_flow.py:
--------------------------------------------------------------------------------
 1 | from addict import Dict
 2 | from sklearn.base import BaseEstimator
 3 | from jamboree.middleware.procedures import ModelProcedureAbstract
 4 | from sklearn.datasets import make_friedman2
 5 | from sklearn.gaussian_process import GaussianProcessRegressor
 6 | from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
 7 | from loguru import logger
 8 | 
 9 | 
10 | """ TODO: FIX THIS CRAP!!!"""
11 | 
12 | 
13 | class TFKerasProcedure(ModelProcedureAbstract):
14 |     def __init__(self, *args, **kwargs) -> None:
15 |         super().__init__()
16 |         self.requirements.model = True
17 |         self.requirements.criterion = False
18 |         self.requirements.optimizer = False
19 |         
20 |         # types = Dict()
21 |         # types.model = BaseEstimator
22 |         
23 |         self.types.model = BaseEstimator
24 |     
25 |     @logger.catch
26 |     def get_params(self):
27 |         self.verify()
28 |         return self.dictionary.model.get_params()
29 |         
30 |     @logger.catch
31 |     def predict(self, X, **kwargs):
32 |         self.verify()
33 |         return self.dictionary.model.predict(X, **kwargs)
34 |     
35 |     @logger.catch
36 |     def predict_prob(self, X, **kwargs):
37 |         self.verify()
38 |         return self.dictionary.model.predict_prob(X, **kwargs)
39 | 
40 |     @logger.catch
41 |     def partial_fit(self, X, y, **kwargs):
42 |         self.verify()
43 |         self.dictionary.model.partial_fit(X, y, **kwargs)
44 | 
45 |     def fit(self, X, y, **kwargs):
46 |         self.verify()
47 |         self.dictionary.model.fit(X, y, **kwargs)
48 |         # print(self.mdict.model.predict(X[:2,:], return_std=True))


--------------------------------------------------------------------------------
/jamboree/base/processors/abstracts/legacy.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from typing import List
 3 | 
 4 | 
 5 | class LegacyProcessor(ABC):
 6 |     """ Use to make the main jamboree object still function without a problem. We have a few ways to explore this concept."""
 7 |     def save(self, query: dict, data: dict, abs_rel="absolute"):
 8 |         raise NotImplementedError
 9 | 
10 | 
11 |     def save_many(self, query: dict, data: List[dict], abs_rel="absolute"):
12 |         raise NotImplementedError    
13 | 
14 | 
15 |     def get_latest(self, query, abs_rel="absolute") -> dict:
16 |         raise NotImplementedError
17 | 
18 | 
19 |     def get_latest_many(self, query, abs_rel="absolute", limit=1000):
20 |         raise NotImplementedError
21 | 
22 | 
23 |     def get_between(self, query:dict, min_epoch:float, max_epoch:float, abs_rel:str="absolute") -> list:
24 |         raise NotImplementedError
25 | 
26 | 
27 |     def get_latest_by(self, query:dict, max_epoch, abs_rel="absolute", limit:int=10) -> dict:
28 |         raise NotImplementedError
29 | 
30 | 
31 |     def count(self, query: dict) -> int:
32 |         raise NotImplementedError
33 | 
34 | 
35 |     def remove_first(self, query: dict):
36 |         raise NotImplementedError
37 | 
38 |     
39 |     def pop_multiple(self, query: dict, limit: int):
40 |         raise NotImplementedError
41 | 
42 | 
43 |     def _bulk_save(self, query: dict, data: list):
44 |         raise NotImplementedError
45 | 
46 | 
47 |     def single_get(self, query:dict):
48 |         raise NotImplementedError
49 | 
50 | 
51 |     def single_set(self, query:dict, data:dict):
52 |         raise NotImplementedError
53 | 
54 | 
55 |     def single_delete(self, query:dict):
56 |         raise NotImplementedError


--------------------------------------------------------------------------------
/jamboree/handlers/default/access.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | from typing import Any, AnyStr, Dict
 3 | from jamboree.handlers.default.db import DBHandler
 4 | 
 5 | 
 6 | 
 7 | class Access(DBHandler):
 8 |     # ---------------------------------------------------------------------------------
 9 |     #                          Simple Accessor Properties
10 |     # ---------------------------------------------------------------------------------
11 |     
12 |     @property
13 |     def name(self):
14 |         return self['name']
15 | 
16 |     @name.setter
17 |     def name(self, __name: str):
18 |         self['name'] = __name
19 | 
20 |     
21 |     @property
22 |     def category(self) -> str:
23 |         return self['category']
24 | 
25 |     @category.setter
26 |     def category(self, _category: str):
27 |         self['category'] = _category
28 | 
29 |     @property
30 |     def subcategories(self) -> str:
31 |         return self['subcategories']
32 | 
33 |     @subcategories.setter
34 |     def subcategories(self, __subcatgeories: Dict[AnyStr, Any]):
35 |         self['subcategories'] = __subcatgeories
36 | 
37 |     @property
38 |     def metatype(self) -> str:
39 |         return self['metatype']
40 | 
41 |     @metatype.setter
42 |     def metatype(self, __metatype: str):
43 |         self['metatype'] = __metatype
44 | 
45 |     @property
46 |     def submetatype(self) -> str:
47 |         return self['submetatype']
48 | 
49 |     @submetatype.setter
50 |     def submetatype(self, __submetatype: str):
51 |         self['submetatype'] = __submetatype
52 | 
53 |     @property
54 |     def abbreviation(self) -> str:
55 |         return self['abbreviation']
56 | 
57 |     @abbreviation.setter
58 |     def abbreviation(self, __abb: str):
59 |         self['abbreviation'] = __abb


--------------------------------------------------------------------------------
/jamboree/base/processors/abstracts/event.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from typing import List
 3 | 
 4 | 
 5 | class EventProcessor(ABC):
 6 |     def initialize(self):
 7 |         raise NotImplementedError
 8 | 
 9 |     def save(self, query: dict, data: dict, abs_rel="absolute"):
10 |         raise NotImplementedError
11 | 
12 | 
13 |     def save_many(self, query: dict, data: List[dict], abs_rel="absolute"):
14 |         raise NotImplementedError    
15 | 
16 | 
17 |     def get_latest(self, query, abs_rel="absolute") -> dict:
18 |         raise NotImplementedError
19 | 
20 | 
21 |     def get_latest_many(self, query, abs_rel="absolute", limit=1000):
22 |         raise NotImplementedError
23 | 
24 | 
25 |     def get_between(self, query:dict, min_epoch:float, max_epoch:float, abs_rel:str="absolute") -> list:
26 |         raise NotImplementedError
27 | 
28 | 
29 |     def get_latest_by(self, query:dict, max_epoch, abs_rel="absolute", limit:int=10) -> dict:
30 |         raise NotImplementedError
31 |     
32 | 
33 |     def get_all(self, query:dict, abs_rel:str="relative"):
34 |         raise NotImplementedError
35 | 
36 |     def count(self, query: dict) -> int:
37 |         raise NotImplementedError
38 | 
39 | 
40 |     def remove_first(self, query: dict):
41 |         raise NotImplementedError
42 | 
43 |     
44 |     def pop_multiple(self, query: dict, limit: int):
45 |         raise NotImplementedError
46 | 
47 | 
48 |     def _bulk_save(self, query: dict, data: list):
49 |         raise NotImplementedError
50 | 
51 | 
52 |     def single_get(self, query:dict):
53 |         raise NotImplementedError
54 | 
55 | 
56 |     def single_set(self, query:dict, data:dict):
57 |         raise NotImplementedError
58 | 
59 | 
60 |     def single_delete(self, query:dict):
61 |         raise NotImplementedError
62 | 
63 |     def lock(self, query):
64 |         raise NotImplementedError


--------------------------------------------------------------------------------
/examples/user_settings.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import random
 3 | import time
 4 | import uuid
 5 | from contextlib import ContextDecorator
 6 | from copy import copy
 7 | from pprint import pprint
 8 | from random import randint
 9 | 
10 | import maya
11 | import numpy as np
12 | import pandas as pd
13 | from loguru import logger
14 | from toolz.itertoolz import pluck
15 | 
16 | import vaex
17 | from jamboree import DBHandler, Jamboree
18 | 
19 | 
20 | class UserSettingsHandler(DBHandler):
21 |     """Abstract handler that we use to keep track of information.
22 |     """
23 | 
24 |     def __init__(self, **kwargs):
25 |         super().__init__()
26 |         self.entity = "user_settings"
27 |         self.required = {
28 |             "email": str,
29 |             "episode": str
30 |         }
31 |         self._limit = 100
32 |         self._settings_handler = None
33 | 
34 |     @property
35 |     def limit(self):
36 |         """ The maximum number of records we intend to get when calling the many function."""
37 |         return self._limit
38 |     
39 |     @limit.setter
40 |     def limit(self, limit):
41 |         self._limit = limit 
42 |     
43 |     def is_authenticated(self):
44 |         return True
45 |     
46 |     def is_active(self):
47 |         return True
48 |  
49 |     def is_anonymous(self):
50 |         return False
51 | 
52 |     def register(self, password:str, confirm:str):
53 |         pass
54 | 
55 |     def login(self, password:str):
56 |         pass
57 | 
58 |     def logout(self):
59 |         pass
60 | 
61 |     def session(self):
62 |         pass
63 | 
64 |     
65 |     def deactivate(self):
66 |         pass
67 | 
68 |     def reactivate(self):
69 |         pass
70 |     
71 | 
72 |     def latest_user(self):
73 |         pass
74 | 
75 |     def save_user(self):
76 |         pass
77 | 
78 |     
79 | 
80 | def flip(n=0.02):
81 |     if n >= random.uniform(0, 1):
82 |         return True
83 |     return False
84 | 


--------------------------------------------------------------------------------
/jamboree/utils/context/main.py:
--------------------------------------------------------------------------------
 1 | from contextlib import ContextDecorator, contextmanager
 2 | 
 3 | import maya
 4 | import redis
 5 | from loguru import logger
 6 | from redis.exceptions import WatchError
 7 | 
 8 | 
 9 | class timecontext(ContextDecorator):
10 |     def __enter__(self):
11 |         self.start = maya.now()._epoch
12 |         return self
13 | 
14 |     def __exit__(self, *exc):
15 |         self.end = maya.now()._epoch
16 |         delta = self.end - self.start
17 |         logger.success(f"It took {delta}s")
18 |         logger.success(f"It took {(delta*1000)}ms")
19 |         return False
20 | 
21 | 
22 | @contextmanager
23 | def watch_loop():
24 |     while True:
25 |         try:
26 |             yield
27 |             break
28 |         except WatchError:
29 |             continue
30 | 
31 | 
32 | def watch_loop_callback(callback):
33 |     while True:
34 |         try:
35 |             callback()
36 |             break
37 |         except WatchError:
38 |             continue
39 | 
40 | 
41 | class example_space(ContextDecorator):
42 |     def __init__(self, name) -> None:
43 |         self.name = name
44 |         self.is_pass = True
45 |         self.start = maya.now()._epoch
46 | 
47 |     def __enter__(self):
48 | 
49 |         return self
50 | 
51 |     def failed(self):
52 |         self.is_pass = False
53 | 
54 |     def __exit__(self, type, value, traceback):
55 |         self.end = maya.now()._epoch
56 |         delta = self.end - self.start
57 |         if value is not None or self.is_pass == False:
58 |             logger.error(
59 |                 "----------------------------------------- Example didn't pass --------------------------------------------"
60 |             )
61 |         else:
62 |             logger.success(
63 |                 "------------------------------------------ Example did pass ----------------------------------------------"
64 |             )
65 |         logger.info(f"It took {delta}ms")
66 |         return False
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     with example_space("Printing") as example:
71 |         print("Don't want to kill my vibe")
72 |         # example.failed()
73 | 


--------------------------------------------------------------------------------
/jamboree/handlers/base.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from abc import ABC, ABCMeta
 3 | from typing import Dict, Any, List
 4 | 
 5 | 
 6 | class BaseHandler(object, metaclass=ABCMeta):
 7 |     """ 
 8 |         A way to handle reads and writes consistently without having to write every single variable:
 9 |     """
10 | 
11 |     def __init__(self):
12 |         pass
13 | 
14 |     def check(self):
15 |         raise NotImplementedError
16 | 
17 |     def save(self, data: dict):
18 |         raise NotImplementedError
19 | 
20 |     def _bulk_save(self, query: dict, data: list):
21 |         raise NotImplementedError
22 | 
23 |     def _get_many(self, limit: int, ar: str, alt={}):
24 |         raise NotImplementedError
25 | 
26 |     def last(self):
27 |         raise NotImplementedError
28 | 
29 |     def many(self, limit: int = 100):
30 |         raise NotImplementedError
31 | 
32 |     def save_many(self, query: dict, data: list):
33 |         raise NotImplementedError
34 | 
35 |     def pop_multiple(self, query, _limit: int = 1):
36 |         raise NotImplementedError
37 | 
38 |     def swap(self, query, alt: dict = {}):
39 |         """ Swap betwen the first and last item """
40 |         raise NotImplementedError
41 | 
42 |     def query_mix(self, query: dict, alt: dict = {}):
43 |         raise NotImplementedError
44 | 
45 | 
46 | class BaseFileHandler(object, metaclass=ABCMeta):
47 |     """ 
48 |         A way to handle reads and writes consistently without having to write every single variable:
49 |     """
50 | 
51 |     def __init__(self):
52 |         pass
53 | 
54 |     def check(self):
55 |         raise NotImplementedError
56 | 
57 |     def save(self, data: dict):
58 |         raise NotImplementedError
59 |     
60 |     def save_version(self, query:dict, data):
61 |         pass
62 | 
63 |     def last(self):
64 |         raise NotImplementedError
65 | 
66 |     def many(self, limit: int = 100):
67 |         raise NotImplementedError
68 | 
69 |     def save_many(self, query: dict, data: list):
70 |         raise NotImplementedError
71 | 
72 |     def delete(self, query:dict):
73 |         raise NotImplementedError
74 |     
75 |     def delete_version(self, query:dict, version:str):
76 |         raise NotImplementedError


--------------------------------------------------------------------------------
/docs/readme/How Jamboree Works.md:
--------------------------------------------------------------------------------
 1 | # Key Generation
 2 | Here we test how we're going to generate a key for redis so we can query for information later.
 3 | 
 4 | 
 5 | ```python
 6 | %pwd
 7 | ```
 8 | 
 9 | 
10 | 
11 | 
12 |     '/home/skywalker/PycharmProjects/jamboree/test/notebooks'
13 | 
14 | 
15 | 
16 | 
17 | ```python
18 | %cd ../..
19 | ```
20 | 
21 |     /home/skywalker/PycharmProjects/jamboree
22 | 
23 | 
24 | 
25 | ```python
26 | import orjson
27 | import maya
28 | import random
29 | ```
30 | 
31 | 
32 | ```python
33 | from jamboree.utils.helper import Helpers
34 | ```
35 | 
36 | 
37 | ```python
38 | helpers = Helpers()
39 | ```
40 | 
41 | 
42 | ```python
43 | sample_key = helpers.generate_hash({"type":"hello_world", "name": "Jamboree", "count": 0})
44 | ```
45 | 
46 | 
47 | ```python
48 | print(f"The sample key is: '{sample_key}'")
49 | ```
50 | 
51 |     The sample key is: 'eyJjb3VudCI6MCwibmFtZSI6IkphbWJvcmVlIiwidHlwZSI6ImhlbGxvX3dvcmxkIn0='
52 | 
53 | 
54 | 
55 | ## How we use the generated key to create an eventsource
56 | 
57 | We'd then store that key into redis and start appending other serialized variables into a list. It looks like the following diagram.
58 | 
59 | 
60 | ```python
61 | orjson.dumps({"hello": "world", "number": random.uniform(0, 100), "timestamp": maya.now()._epoch})
62 | ```
63 | 
64 | 
65 | 
66 | 
67 |     b'{"hello":"world","number":20.127252760805113,"timestamp":1579529262.278698}'
68 | 
69 | 
70 | 
71 | # Event Diagram
72 | 
73 | Here's a diagram of an event source.
74 | 
75 | ![redis picture](../../docs/redis_event_source.png)
76 | 
77 | We continuously push information information into a key, at the tail end of the list. A more recent version of the event sourcing system uses `zlists` to query timing. This actually allows us to have better time indexing.
78 | 
79 | # Double layer storage system
80 | 
81 | The Jamboree system uses a double layer storage system. The general idea behind it is that the top layer stores information largely in memory, then the bottom layer stores information in long-term memory. We'll periodically pull records from the hard storage database and repopulate the in-memory database.
82 | 
83 | ![redis picture](../../docs/redis_mongo_layer.png)
84 | 
85 | 
86 | ```python
87 | 
88 | ```
89 | 


--------------------------------------------------------------------------------
/jamboree/handlers/complex/backtestable/files.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | from loguru import logger
 3 | 
 4 | import maya
 5 | import pandas as pd
 6 | 
 7 | from jamboree import Jamboree
 8 | from jamboree import JamboreeNew
 9 | 
10 | from jamboree.handlers.complex.meta import MetaHandler
11 | # from jamboree.handlers.complex.metric import MetricHandler
12 | 
13 | from jamboree.handlers.default.time import TimeHandler
14 | from jamboree.handlers.default.db import DBHandler
15 | from jamboree.handlers.default import BlobStorageHandler
16 | 
17 | class BacktestBlobHandler(BlobStorageHandler):
18 |     def __init__(self):
19 |         super().__init__()
20 |         
21 |         
22 |         # Other objects to consider
23 |         self._time:TimeHandler = TimeHandler()
24 |         self._meta:MetaHandler = MetaHandler()
25 |         self._episode = uuid.uuid4().hex
26 |         
27 |         
28 |         self._is_live = False
29 |         self.is_event = False # use to make sure there's absolutely no duplicate data
30 |     
31 |     # Create a context for this to remove complete dependency from the model handler.
32 |     
33 |     @property
34 |     def episode(self) -> str:
35 |         return self._episode
36 |     
37 |     @episode.setter
38 |     def episode(self, _episode:str):
39 |         self._episode = _episode
40 |     
41 |     @property
42 |     def live(self) -> bool:
43 |         return self._is_live
44 |     
45 |     @live.setter
46 |     def live(self, _live:bool):
47 |         self._is_live = _live
48 | 
49 |     @property
50 |     def time(self) -> 'TimeHandler':
51 |         # self._time.event = self.event
52 |         self._time.processor = self.processor
53 |         self._time['episode'] = self.episode
54 |         self._time['live'] = self.live
55 |         return self._time
56 | 
57 |     @time.setter
58 |     def time(self, _time:'TimeHandler'):
59 |         self._time = _time
60 | 
61 | 
62 |     def reset(self):
63 |         """ Reset the data we're querying for. """
64 |         self.time.reset()
65 |     
66 |     
67 |     
68 |     def __str__(self) -> str:
69 |         name = self["name"]
70 |         category = self["category"]
71 |         subcategories = self["subcategories"]
72 |         jscat = self.main_helper.generate_hash(subcategories)
73 |         return f"{name}:{category}:{jscat}"


--------------------------------------------------------------------------------
/jamboree/handlers/complex/backtestable/default/files.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | from loguru import logger
 3 | 
 4 | import maya
 5 | import pandas as pd
 6 | 
 7 | from jamboree import Jamboree
 8 | from jamboree import JamboreeNew
 9 | 
10 | from jamboree.handlers.complex.meta import MetaHandler
11 | # from jamboree.handlers.complex.metric import MetricHandler
12 | 
13 | from jamboree.handlers.default.time import TimeHandler
14 | from jamboree.handlers.default.db import DBHandler
15 | from jamboree.handlers.default import BlobStorageHandler
16 | 
17 | class BacktestBlobHandler(BlobStorageHandler):
18 |     def __init__(self):
19 |         super().__init__()
20 |         
21 |         
22 |         # Other objects to consider
23 |         self._time:TimeHandler = TimeHandler()
24 |         self._meta:MetaHandler = MetaHandler()
25 |         self._episode = uuid.uuid4().hex
26 |         
27 |         
28 |         self._is_live = False
29 |         self.is_event = False # use to make sure there's absolutely no duplicate data
30 |     
31 |     # Create a context for this to remove complete dependency from the model handler.
32 |     
33 |     @property
34 |     def episode(self) -> str:
35 |         return self._episode
36 |     
37 |     @episode.setter
38 |     def episode(self, _episode:str):
39 |         self._episode = _episode
40 |     
41 |     @property
42 |     def live(self) -> bool:
43 |         return self._is_live
44 |     
45 |     @live.setter
46 |     def live(self, _live:bool):
47 |         self._is_live = _live
48 | 
49 |     @property
50 |     def time(self) -> 'TimeHandler':
51 |         # self._time.event = self.event
52 |         self._time.processor = self.processor
53 |         self._time['episode'] = self.episode
54 |         self._time['live'] = self.live
55 |         return self._time
56 | 
57 |     @time.setter
58 |     def time(self, _time:'TimeHandler'):
59 |         self._time = _time
60 | 
61 | 
62 |     def reset(self):
63 |         """ Reset the data we're querying for. """
64 |         self.time.reset()
65 |     
66 |     
67 |     
68 |     def __str__(self) -> str:
69 |         name = self["name"]
70 |         category = self["category"]
71 |         subcategories = self["subcategories"]
72 |         jscat = self.main_helper.generate_hash(subcategories)
73 |         return f"{name}:{category}:{jscat}"


--------------------------------------------------------------------------------
/jamboree/middleware/procedures/models/_sklearn.py:
--------------------------------------------------------------------------------
 1 | from addict import Dict
 2 | from loguru import logger
 3 | from sklearn.base import BaseEstimator
 4 | from sklearn.datasets import make_friedman2
 5 | from sklearn.gaussian_process import GaussianProcessRegressor
 6 | from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
 7 | from jamboree.middleware.procedures import ModelProcedureAbstract
 8 | 
 9 | 
10 | 
11 | class SklearnProcedure(ModelProcedureAbstract):
12 |     def __init__(self, *args, **kwargs) -> None:
13 |         super().__init__()
14 |         self.requirements.model = True
15 |         self.requirements.criterion = False
16 |         self.requirements.optimizer = False
17 |         self.types.model = BaseEstimator
18 |         self.changed = False
19 |     
20 | 
21 | 
22 |     @property
23 |     def model(self) -> BaseEstimator:
24 |         self.verify()
25 |         return self.dictionary.model
26 |         
27 |     def set_params(self, **params):
28 |         self.changed = True
29 |         self.model.set_params(**params)
30 | 
31 |     @logger.catch
32 |     def get_params(self):
33 |         return self.model.get_params()
34 |         
35 |     @logger.catch
36 |     def predict(self, X, **kwargs):
37 |         return self.model.predict(X, **kwargs)
38 |     
39 |     @logger.catch
40 |     def predict_proba(self, X, **kwargs):
41 |         prediction = self.model.predict_proba(X, **kwargs)
42 |         return prediction
43 | 
44 |     @logger.catch
45 |     def partial_fit(self, X, y, **kwargs):
46 |         self.changed = True
47 |         self.model.partial_fit(X, y, **kwargs)
48 | 
49 |     def fit(self, X, y, **kwargs):
50 |         self.changed = True
51 |         self.model.fit(X, y, **kwargs)
52 | 
53 | class CustomSklearnGaussianProcedure(SklearnProcedure):
54 |     def __init__(self, *args, **kwargs) -> None:
55 |         super().__init__(*args, **kwargs)
56 |         kernel = DotProduct() + WhiteKernel()
57 |         gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
58 |         self.dictionary.model = gpr
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     general_procedure = CustomSklearnGaussianProcedure()
63 |     X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
64 |     general_procedure.fit(X, y)
65 |     print(general_procedure.predict(X[:2,:], return_std=True))
66 |     # print(general_procedure.get_params())
67 |     print(general_procedure.extract())
68 | 
69 | 


--------------------------------------------------------------------------------
/jamboree/storage/files/core.py:
--------------------------------------------------------------------------------
 1 | from copy import copy
 2 | from abc import ABC
 3 | from typing import Any, List
 4 | from addict import Dict
 5 | from version_query import VersionComponent
 6 | import hashlib 
 7 | 
 8 | class FileStorageConnection(ABC):
 9 |     def __init__(self, **kwargs) -> None:
10 |         self._connection = None
11 |         self._settings = Dict()
12 |         self._settings.overwrite = False
13 |         self._settings.sig_key = kwargs.get("signature", "basic_key")
14 |         
15 |         self._settings.preferences.by = "latest"
16 |         self._settings.preferences.limit = 500
17 |         self._settings.preferences.version = None
18 |         self._settings.default.version = "0.0.1"
19 |         self._settings.default.increment = VersionComponent.Patch
20 |         
21 | 
22 | 
23 | 
24 |     @property
25 |     def conn(self):
26 |         if self._connection is None:
27 |             raise AttributeError("You haven't added a main database connection as of yet.")
28 |         return self._connection
29 | 
30 |     @conn.setter
31 |     def conn(self, _conn):
32 |         self._connection = _conn
33 | 
34 |     @property
35 |     def settings(self):
36 |         return self._settings
37 | 
38 |     @settings.setter
39 |     def settings(self, _settings:Dict):
40 |         copied = self._settings
41 |         copied.update(_settings)
42 |         self.valid_settings(copied)
43 |         self._settings = copied
44 | 
45 |     def valid_settings(self, _settings):
46 |         if self._settings.preferences.by not in ["latest", "many", "all", "version"]:
47 |             raise ValueError("We must query within a given range of types: 'latest', 'all', 'version'")
48 |         
49 |         if self._settings.preferences.by == "version" and self._settings.preferences.version is None:
50 |             raise AttributeError("If you're querying by version, you have to include a version number (string_format)")
51 | 
52 |     @property
53 |     def is_overwrite(self) -> bool:
54 |         return self._settings.overwrite
55 | 
56 | 
57 |     """ Main Commands """
58 | 
59 |     def save(self, query:dict, obj:Any, **kwargs):
60 |         raise NotImplementedError("save not implemented")
61 |     
62 |     def query(self, query, **kwargs):
63 |         raise NotImplementedError("query not implemented")
64 |     
65 |     def delete(self, query, **kwargs):
66 |         raise NotImplementedError("delete_latest not implemented")
67 | 
68 |     def absolute_exists(self, query, **kwargs):
69 |         raise NotImplementedError


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | Pipfile
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | pip-wheel-metadata/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Testing and examples
 38 | test/
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | Pipfile.lock
 97 | 
 98 | # pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | 
136 | .vscode/


--------------------------------------------------------------------------------
/jamboree/handlers/complex/backtestable/default/db.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | from loguru import logger
 3 | 
 4 | import maya
 5 | import pandas as pd
 6 | 
 7 | from jamboree import Jamboree
 8 | from jamboree import JamboreeNew
 9 | 
10 | from jamboree.handlers.complex.meta import MetaHandler
11 | # from jamboree.handlers.complex.metric import MetricHandler
12 | 
13 | from jamboree.handlers.default.time import TimeHandler
14 | from jamboree.handlers.default.db import DBHandler
15 | from jamboree.handlers.default import BlobStorageHandler
16 | 
17 | class BacktestDBHandler(DBHandler):
18 |     """ 
19 |         # BACKTEST HANDLER
20 |         ---
21 | 
22 |         A way to load in time and metadata information into classes that already use DB handler. 
23 |         
24 |         If we're using blobhandler use object below
25 |         
26 | 
27 |     """
28 | 
29 |     def __init__(self):
30 |         super().__init__()
31 |         
32 |         
33 |         # Other objects to consider
34 |         self._time:TimeHandler = TimeHandler()
35 |         self._meta: MetaHandler = MetaHandler()
36 |         # self._metrics: MetricHandler = MetricHandler()
37 |         self._episode = uuid.uuid4().hex
38 |         
39 |         
40 |         self._is_live = False
41 |         self.is_event = False # use to make sure there's absolutely no duplicate data
42 |     
43 |     @property
44 |     def episode(self) -> str:
45 |         return self._episode
46 |     
47 |     @episode.setter
48 |     def episode(self, _episode:str):
49 |         self._episode = _episode
50 |     
51 |     @property
52 |     def live(self) -> bool:
53 |         return self._is_live
54 |     
55 |     @live.setter
56 |     def live(self, _live:bool):
57 |         self._is_live = _live
58 | 
59 |     @property
60 |     def time(self) -> 'TimeHandler':
61 |         # self._time.event = self.event
62 |         self._time.processor = self.processor
63 |         self._time['episode'] = self.episode
64 |         self._time['live'] = self.live
65 |         return self._time
66 |     
67 |     @time.setter
68 |     def time(self, _time:'TimeHandler'):
69 |         self._time = _time
70 | 
71 |     def reset(self):
72 |         """ Reset the data we're querying for. """
73 |         # self.reset_current_metric()
74 |         # self.metadata.reset()
75 |         self.time.reset()
76 |     
77 |     
78 |     
79 |     def __str__(self) -> str:
80 |         name = self["name"]
81 |         category = self["category"]
82 |         subcategories = self["subcategories"]
83 |         jscat = self.main_helper.generate_hash(subcategories)
84 |         return f"{name}:{category}:{jscat}"


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import codecs
 4 | import sys
 5 | from shutil import rmtree
 6 | from setuptools import setup, find_packages, Command
 7 | 
 8 | 
 9 | here = os.path.abspath(os.path.dirname(__file__))
10 | 
11 | 
12 | with open("README.md", "r") as fh:
13 |     long_description = fh.read()
14 | 
15 | 
16 | class UploadCommand(Command):
17 |     """Support setup.py publish."""
18 | 
19 |     description = "Build and publish the package."
20 |     user_options = []
21 | 
22 |     @staticmethod
23 |     def status(s):
24 |         """Prints things in bold."""
25 |         print("\033[1m{0}\033[0m".format(s))
26 | 
27 |     def initialize_options(self):
28 |         pass
29 | 
30 |     def finalize_options(self):
31 |         pass
32 | 
33 |     def run(self):
34 |         try:
35 |             self.status("Removing previous builds…")
36 |             rmtree(os.path.join(here, "dist"))
37 |         except FileNotFoundError:
38 |             pass
39 |         self.status("Building Source distribution…")
40 |         os.system("{0} setup.py sdist bdist_wheel".format(sys.executable))
41 |         self.status("Uploading the package to PyPi via Twine…")
42 |         os.system("twine upload dist/*")
43 |         sys.exit()
44 | 
45 | 
46 | 
47 | setup(
48 |     name="jamboree",
49 |     version="0.9.5",
50 |     author="Kevin Hill",
51 |     author_email="kah.kevin.hill@gmail.com",
52 |     description="A multi-layer event sourcing and general data library. SQL, Search, Event Sourcing, and File/Model storage combined into one.",
53 |     long_description=long_description,
54 |     long_description_content_type="text/markdown",
55 |     py_modules=["jamboree"],
56 |     install_requires=[
57 |         "numpy",
58 |         "scipy",
59 |         'maya',
60 |         "pandas-datareader",
61 |         "json-tricks",
62 |         "ujson",
63 |         'gym',
64 |         "orjson",
65 |         "pebble",
66 |         "cytoolz",
67 |         "loguru",
68 |         'redisearch',
69 |         "lz4",
70 |         "anycache",
71 |         "hiredis",
72 |         "eliot",
73 |         "eliot-tree",
74 |         "matplotlib",
75 |         "pandas",
76 |         "sklearn",
77 |         "crayons",
78 |         "creme",
79 |         'pydantic',
80 |         "yfinance",
81 |         "version_query",
82 |         "pandas",
83 |         "cerberus",
84 |         "addict",
85 |     ],
86 |     packages=find_packages(),
87 |     classifiers=[
88 |         "Programming Language :: Python :: 3",
89 |         "License :: OSI Approved :: MIT License",
90 |         "Operating System :: OS Independent",
91 |     ],
92 |     cmdclass={"upload": UploadCommand},
93 | )


--------------------------------------------------------------------------------
/docs/notebooks/Untitled.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Dataframe examples\n",
  8 |     "---"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 16,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import pandas as pd\n",
 18 |     "import numpy as np"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 17,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "frame = pd.DataFrame([[1, 2.0, \"a\", \"c\", 0]], columns=[\"A\",\"B\",\"C\",\"D\",\"E\"])"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 23,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "types = frame.dtypes"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 27,
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "FLOAT\n",
 49 |       "FLOAT\n",
 50 |       "FLOAT\n"
 51 |      ]
 52 |     }
 53 |    ],
 54 |    "source": [
 55 |     "for k, v in types.items():\n",
 56 |     "    if np.issubdtype(v, np.number):\n",
 57 |     "        print(\"FLOAT\")\n",
 58 |     "        continue\n",
 59 |     "        \n",
 60 |     "#     print(k)\n",
 61 |     "#     print(v)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": []
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": []
 77 |   }
 78 |  ],
 79 |  "metadata": {
 80 |   "kernelspec": {
 81 |    "display_name": "Python 3",
 82 |    "language": "python",
 83 |    "name": "python3"
 84 |   },
 85 |   "language_info": {
 86 |    "codemirror_mode": {
 87 |     "name": "ipython",
 88 |     "version": 3
 89 |    },
 90 |    "file_extension": ".py",
 91 |    "mimetype": "text/x-python",
 92 |    "name": "python",
 93 |    "nbconvert_exporter": "python",
 94 |    "pygments_lexer": "ipython3",
 95 |    "version": "3.6.8"
 96 |   },
 97 |   "varInspector": {
 98 |    "cols": {
 99 |     "lenName": 16,
100 |     "lenType": 16,
101 |     "lenVar": 40
102 |    },
103 |    "kernels_config": {
104 |     "python": {
105 |      "delete_cmd_postfix": "",
106 |      "delete_cmd_prefix": "del ",
107 |      "library": "var_list.py",
108 |      "varRefreshCmd": "print(var_dic_list())"
109 |     },
110 |     "r": {
111 |      "delete_cmd_postfix": ") ",
112 |      "delete_cmd_prefix": "rm(",
113 |      "library": "var_list.r",
114 |      "varRefreshCmd": "cat(var_dic_list()) "
115 |     }
116 |    },
117 |    "types_to_exclude": [
118 |     "module",
119 |     "function",
120 |     "builtin_function_or_method",
121 |     "instance",
122 |     "_Feature"
123 |    ],
124 |    "window_display": false
125 |   }
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 4
129 | }
130 | 


--------------------------------------------------------------------------------
/docs/notebooks/Verification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 33,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from cerberus import Validator"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from cerberus import Validator\n",
 19 |     "\n",
 20 |     "class JamboreeValidator(Validator):\n",
 21 |     "    def _validate_isodd(self, isodd, field, value):\n",
 22 |     "        \"\"\" Test the oddity of a value.\n",
 23 |     "\n",
 24 |     "        The rule's arguments are validated against this schema:\n",
 25 |     "        {'type': 'boolean'}\n",
 26 |     "        \"\"\"\n",
 27 |     "        if isodd and not bool(value & 1):\n",
 28 |     "            self._error(field, \"Must be an odd number\")"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 28,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "_global_validator = Validator(allow_unknown=True)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 29,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "def check_for_sub_query_dict(_dict:dict):\n",
 47 |     "    for v in _dict.values():\n",
 48 |     "        if isinstance(v, dict):\n",
 49 |     "            pass"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 36,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "# Check to see if the subkeys are of the given types. \n",
 59 |     "# If it's a dict. Make sure to check to see\n",
 60 |     "example_all_of_schema = {\n",
 61 |     "    \"keyrules\": {\"type\": \"string\"},\n",
 62 |     "    \"valuesrules\": {\n",
 63 |     "        \"type\": ['string', 'boolean', 'number', 'dict', 'list']\n",
 64 |     "    }\n",
 65 |     "}"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 37,
 71 |    "metadata": {},
 72 |    "outputs": [
 73 |     {
 74 |      "data": {
 75 |       "text/plain": [
 76 |        "True"
 77 |       ]
 78 |      },
 79 |      "execution_count": 37,
 80 |      "metadata": {},
 81 |      "output_type": "execute_result"
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "_global_validator.validate({\"1\":\"two\"},example_all_of_schema)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": []
 94 |   }
 95 |  ],
 96 |  "metadata": {
 97 |   "kernelspec": {
 98 |    "display_name": "Python 3",
 99 |    "language": "python",
100 |    "name": "python3"
101 |   },
102 |   "language_info": {
103 |    "codemirror_mode": {
104 |     "name": "ipython",
105 |     "version": 3
106 |    },
107 |    "file_extension": ".py",
108 |    "mimetype": "text/x-python",
109 |    "name": "python",
110 |    "nbconvert_exporter": "python",
111 |    "pygments_lexer": "ipython3",
112 |    "version": "3.7.3"
113 |   }
114 |  },
115 |  "nbformat": 4,
116 |  "nbformat_minor": 4
117 | }
118 | 


--------------------------------------------------------------------------------
/jamboree/storage/databases/database.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC
  2 | from jamboree.utils.helper import Helpers
  3 | from pebble.pool import ThreadPool
  4 | from multiprocessing import cpu_count
  5 | from typing import Union, Optional
  6 | from redis import Redis
  7 | from redis.client import Pipeline
  8 | 
  9 | 
 10 | class DatabaseConnection(ABC):
 11 |     def __init__(self) -> None:
 12 |         self._connection: Optional[Union[Redis, Pipeline]] = None
 13 |         self.helpers = Helpers()
 14 |         self._pool = ThreadPool(max_workers=(cpu_count() * 2))
 15 | 
 16 |     @property
 17 |     def connection(self) -> Union[Redis, Pipeline]:
 18 |         if self._connection is None:
 19 |             raise AttributeError(
 20 |                 "You haven't added a main database connection as of yet."
 21 |             )
 22 |         return self._connection
 23 | 
 24 |     @connection.setter
 25 |     def connection(self, _conn: Union[Redis, Pipeline]):
 26 |         self._connection = _conn
 27 | 
 28 |     @property
 29 |     def pool(self) -> ThreadPool:
 30 |         return self._pool
 31 | 
 32 |     @pool.setter
 33 |     def pool(self, _pool: ThreadPool):
 34 |         self.pool = _pool
 35 | 
 36 |     """ Save commands """
 37 | 
 38 |     def save(self, query):
 39 |         raise NotImplementedError("save not implemented")
 40 | 
 41 |     def save_many(self, query):
 42 |         raise NotImplementedError("save_many not implemented")
 43 | 
 44 |     """
 45 |         Update commands
 46 |     """
 47 | 
 48 |     def update_single(self, query):
 49 |         raise NotImplementedError("update_single not implemented")
 50 | 
 51 |     def update_many(self, query):
 52 |         raise NotImplementedError("update_many not implemented")
 53 | 
 54 |     """
 55 |         Delete Commands
 56 |     """
 57 | 
 58 |     def delete(self, query):
 59 |         raise NotImplementedError("delete function not implemented yet.")
 60 | 
 61 |     def delete_many(self, query):
 62 |         raise NotImplementedError("delete_many function not implemented yet.")
 63 | 
 64 |     def delete_all(self, query):
 65 |         raise NotImplementedError("delete_all not implemented")
 66 | 
 67 |     """ 
 68 |         Query commands
 69 |     """
 70 | 
 71 |     def query_latest(self):
 72 |         raise NotImplementedError("query_latest not implemented")
 73 | 
 74 |     def query_latest_many(self):
 75 |         raise NotImplementedError("query_latest_many not implemented")
 76 | 
 77 |     def query_between(self):
 78 |         raise NotImplementedError("query_between not implemented")
 79 | 
 80 |     def query_before(self):
 81 |         raise NotImplementedError("query_before not implemented")
 82 | 
 83 |     def query_after(self):
 84 |         raise NotImplementedError("query_after not implemented")
 85 | 
 86 |     def query_all(self):
 87 |         pass
 88 | 
 89 |     """ Other Functions """
 90 | 
 91 |     def reset(self, query):
 92 |         raise NotImplementedError("delete_all not implemented")
 93 | 
 94 |     def count(self):
 95 |         raise NotImplementedError("update_many not implemented")
 96 | 
 97 |     def general_lock(self, query: dict):
 98 |         raise NotImplementedError("general_lock not implemented")
 99 | 
100 | 


--------------------------------------------------------------------------------
/jamboree/middleware/procedures/models/_creme.py:
--------------------------------------------------------------------------------
  1 | from addict import Dict
  2 | from sklearn.base import BaseEstimator
  3 | from jamboree.middleware.procedures import ModelProcedureAbstract
  4 | from sklearn.datasets import make_friedman2
  5 | from sklearn.gaussian_process import GaussianProcessRegressor
  6 | from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
  7 | from loguru import logger
  8 | 
  9 | 
 10 | 
 11 | 
 12 | class CremeProcedure(ModelProcedureAbstract):
 13 |     def __init__(self, *args, **kwargs) -> None:
 14 |         super().__init__()
 15 |         self.requirements.model = True
 16 |         self.requirements.criterion = False
 17 |         self.requirements.optimizer = False
 18 |         
 19 |         # types = Dict()
 20 |         # types.model = BaseEstimator
 21 |         
 22 |         self.types.model = BaseEstimator
 23 |     
 24 |     @logger.catch
 25 |     def get_params(self):
 26 |         self.verify()
 27 |         return self.dictionary.model.get_params()
 28 |         
 29 |     @logger.catch
 30 |     def predict(self, X, **kwargs):
 31 |         self.verify()
 32 |         return self.dictionary.model.predict(X, **kwargs)
 33 |     
 34 |     @logger.catch
 35 |     def predict_prob(self, X, **kwargs):
 36 |         self.verify()
 37 |         return self.dictionary.model.predict_prob(X, **kwargs)
 38 | 
 39 |     @logger.catch
 40 |     def partial_fit(self, X, y, **kwargs):
 41 |         self.verify()
 42 |         self.dictionary.model.partial_fit(X, y, **kwargs)
 43 | 
 44 |     def fit(self, X, y, **kwargs):
 45 |         self.verify()
 46 |         self.dictionary.model.fit(X, y, **kwargs)
 47 |         # print(self.mdict.model.predict(X[:2,:], return_std=True))
 48 | 
 49 | 
 50 | 
 51 | def main():
 52 |     import datetime as dt
 53 |     from creme import compose
 54 |     from creme import datasets
 55 |     from creme import feature_extraction
 56 |     from creme import linear_model
 57 |     from creme import metrics as metricss
 58 |     from creme import preprocessing
 59 |     from creme import stats
 60 |     from creme import stream
 61 | 
 62 | 
 63 |     X_y = datasets.Bikes()
 64 |     X_y = stream.simulate_qa(X_y, moment='moment', delay=dt.timedelta(minutes=30))
 65 | 
 66 |     def add_time_features(x):
 67 |         return {
 68 |             **x,
 69 |             'hour': x['moment'].hour,
 70 |             'day': x['moment'].weekday()
 71 |         }
 72 | 
 73 |     model = add_time_features
 74 |     model |= (
 75 |         compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind') +
 76 |         feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean()) +
 77 |         feature_extraction.TargetAgg(by='station', how=stats.EWMean())
 78 |     )
 79 |     model |= preprocessing.StandardScaler()
 80 |     model |= linear_model.LinearRegression()
 81 | 
 82 |     metric = metricss.MAE()
 83 | 
 84 |     questions = {}
 85 | 
 86 |     for i, x, y in X_y:
 87 |         # Question
 88 |         is_question = y is None
 89 |         if is_question:
 90 |             y_pred = model.predict_one(x)
 91 |             questions[i] = y_pred
 92 | 
 93 |         # Answer
 94 |         else:
 95 |             metric.update(y, questions[i])
 96 |             model = model.fit_one(x, y)
 97 | 
 98 |             if i >= 30000 and i % 30000 == 0:
 99 |                 print(i, metric)
100 | 
101 | if __name__ == "__main__":
102 |     main()


--------------------------------------------------------------------------------
/jamboree/base/processors/files.py:
--------------------------------------------------------------------------------
 1 | import redis
 2 | from redis import Redis
 3 | from typing import Optional, Any
 4 | from jamboree.utils.helper import Helpers
 5 | from jamboree.storage.files.redisify import RedisFileProcessor, RedisFileConnection
 6 | from jamboree.base.processors.abstracts import FileProcessor
 7 | 
 8 | 
 9 | class JamboreeFileProcessor(FileProcessor):
10 |     def __init__(self) -> None:
11 |         self._redis:Optional[Redis] = None
12 |         self._redis_conn = RedisFileConnection()
13 |         self.helpers = Helpers()
14 |     
15 |     @property
16 |     def rconn(self) -> redis.client.Redis:
17 |         if self._redis is None:
18 |             raise AttributeError("You've yet to add a redis connection")
19 |         return self._redis
20 |     
21 |     @rconn.setter
22 |     def rconn(self, _redis:redis.client.Redis):
23 |         self._redis = _redis
24 | 
25 |     @property
26 |     def redis_conn(self) -> RedisFileConnection:
27 |         if self._redis_conn is None:
28 |             raise AttributeError("Redis connection hasn't been set")
29 |         return self._redis_conn
30 |     
31 |     @redis_conn.setter
32 |     def redis_conn(self, _rconn: RedisFileConnection):
33 |         self._redis_conn = _rconn
34 | 
35 |     def initialize(self):
36 |         """ Initialize database connections. Use this so we can use the same connections for search, files, and events. """
37 |         self.redis_conn = RedisFileConnection()
38 |         self.redis_conn.conn = self.rconn
39 | 
40 |     def _validate_query(self, query: dict):
41 |         """ Validates a query. Must have `type` and a second identifier at least"""
42 |         if 'type' not in query:
43 |             return False
44 |         if not isinstance(query['type'], str):
45 |             return False
46 |         if len(query) < 2:
47 |             return False
48 |         return True
49 |     
50 | 
51 |     """ 
52 |         These are the basic functions. We'll create more functions to handle the different scenarios. 
53 | 
54 |         NOTE:
55 |             * Eventually we'll add rocksdb and a check_local=False flag to explain that we want to search rocksdb first.
56 |             * We can add a changed flag into redis. 
57 |                 * If we've changed a model on a different machine we can just say check_local=False
58 |                 * Otherwise we can set check_local=True and force retrieval from redis.
59 |                 * If we were to do that, we'd have to separate all of the setup functions inside of the redis handler
60 |     """
61 | 
62 |     
63 |     def save(self, query:dict, obj:Any, **kwargs):
64 |         if not self._validate_query(query):
65 |             raise ValueError("Query isn't valid")
66 |         
67 |         self.redis_conn.save(query, obj, **kwargs)
68 |     
69 |     def query(self, query:dict, **kwargs):
70 |         if not self._validate_query(query):
71 |             raise ValueError("Query isn't valid")
72 |         data = self.redis_conn.query(query, **kwargs)
73 |         return data
74 | 
75 |     def delete(self, query:dict, **kwargs):
76 |         if not self._validate_query(query):
77 |             raise ValueError("Query isn't valid")
78 |         self.redis_conn.delete(query, **kwargs)
79 | 
80 |     def absolute_exists(self, query: dict, **kwargs):
81 |         if not self._validate_query(query):
82 |             raise ValueError("Query isn't valid")
83 |         return self.redis_conn.absolute_exists(query, **kwargs)
84 |     
85 |     
86 |     


--------------------------------------------------------------------------------
/test/experiments/notebooks/File Command Logic.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Making These Commands Easier To Handle\n",
  8 |     "\n",
  9 |     "I realized in my last file versioning notebook that it was hard to track the logic from the main commands:\n",
 10 |     "\n",
 11 |     "1. Save\n",
 12 |     "2. Query\n",
 13 |     "3. Latest\n",
 14 |     "\n",
 15 |     "It was obfuscated by the version control logic. Therefore I'm moving 100% of that to a second place without adding actual versioning. I think I have the incrementing logic working, and the capacity to save and load data. I'll be adding information.\n",
 16 |     "\n",
 17 |     "\n",
 18 |     "I'll have:\n",
 19 |     "\n",
 20 |     "1. Save\n",
 21 |     "2. Load\n",
 22 |     "3. Delete\n",
 23 |     "\n",
 24 |     "All of them here, as well as a nice projection of the outcomes of each possible activity."
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 3,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "from redis import Redis\n",
 34 |     "from jamboree.utils.core import consistent_hash\n",
 35 |     "from jamboree.utils.support import serialize, deserialize, create_checksum\n",
 36 |     "from jamboree.utils.core import consistent_hash, consistent_unhash"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "## Key things we'd be looking for with saving\n",
 44 |     "\n",
 45 |     "1. Are we overwriting the latest version if it exist\n",
 46 |     "    * We'd do this to resemble online learning\n",
 47 |     "2. If we aren't doing that, are we updating the version number with this\n",
 48 |     "3. Are we saving anything redundant\n",
 49 |     "    * This can't technically be done yet. Need to come up with a consistent way to pickle information."
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 4,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "# How to determine if we should overwrite?\n",
 59 |     "# is_overwrite enabled"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": []
 68 |   }
 69 |  ],
 70 |  "metadata": {
 71 |   "kernelspec": {
 72 |    "display_name": "Python 3",
 73 |    "language": "python",
 74 |    "name": "python3"
 75 |   },
 76 |   "language_info": {
 77 |    "codemirror_mode": {
 78 |     "name": "ipython",
 79 |     "version": 3
 80 |    },
 81 |    "file_extension": ".py",
 82 |    "mimetype": "text/x-python",
 83 |    "name": "python",
 84 |    "nbconvert_exporter": "python",
 85 |    "pygments_lexer": "ipython3",
 86 |    "version": "3.7.3"
 87 |   },
 88 |   "varInspector": {
 89 |    "cols": {
 90 |     "lenName": 16,
 91 |     "lenType": 16,
 92 |     "lenVar": 40
 93 |    },
 94 |    "kernels_config": {
 95 |     "python": {
 96 |      "delete_cmd_postfix": "",
 97 |      "delete_cmd_prefix": "del ",
 98 |      "library": "var_list.py",
 99 |      "varRefreshCmd": "print(var_dic_list())"
100 |     },
101 |     "r": {
102 |      "delete_cmd_postfix": ") ",
103 |      "delete_cmd_prefix": "rm(",
104 |      "library": "var_list.r",
105 |      "varRefreshCmd": "cat(var_dic_list()) "
106 |     }
107 |    },
108 |    "types_to_exclude": [
109 |     "module",
110 |     "function",
111 |     "builtin_function_or_method",
112 |     "instance",
113 |     "_Feature"
114 |    ],
115 |    "window_display": false
116 |   }
117 |  },
118 |  "nbformat": 4,
119 |  "nbformat_minor": 2
120 | }
121 | 


--------------------------------------------------------------------------------
/jamboree/middleware/processors/resample.py:
--------------------------------------------------------------------------------
 1 | from jamboree.handlers.processors import DataProcessorsAbstract
 2 | import pandas as pd
 3 | import numpy as np
 4 | import scipy.stats as stats
 5 | 
 6 | 
 7 | class DynamicResample(DataProcessorsAbstract):
 8 |     def __init__(self, name, **kwargs) -> None:
 9 |         self.time_info = {
10 |             "years": 0,
11 |             "months": 0,
12 |             "weeks":0,
13 |             "days": 0,
14 |             "hours": 0,
15 |             "minutes": 0,
16 |             "seconds": 0
17 |         }
18 |         self.time_formatting = {
19 |             "seconds": "S",
20 |             "minutes": "T",
21 |             "hours": "H",
22 |             "days": "D",
23 |             "weeks":"W",
24 |             "months": "M",
25 |             "years": "Y"
26 |         }
27 | 
28 |         self.base = 0
29 |         super().__init__(name, **kwargs)
30 | 
31 | 
32 |     def set_settings(self, **kwargs):
33 |         """ Updates the time information"""
34 |         self.time_info['seconds'] = kwargs.get("seconds",self.time_info['seconds'])
35 |         self.time_info['minutes'] = kwargs.get("minutes",self.time_info['minutes'])
36 |         self.time_info['hours'] = kwargs.get("hours",self.time_info['hours'])
37 |         self.time_info['days'] = kwargs.get("days",self.time_info['days'])
38 |         self.time_info['weeks'] = kwargs.get("weeks",self.time_info['weeks'])
39 |         self.time_info['months'] = kwargs.get("months",self.time_info['months'])
40 |         self.time_info['years'] = kwargs.get("years",self.time_info['years'])
41 |         self.base = kwargs.get("base", self.base)
42 | 
43 |     def validate_existing_times(self):
44 |         checkable_list = self.time_info.values()
45 |         all_zero = all(x==0 for x in checkable_list)
46 |         if all_zero:
47 |             self.time_info['hours'] = 1
48 | 
49 |     def generate_time_string(self):
50 |         self.validate_existing_times()
51 |         final_string = ""
52 |         for name, time_amount in self.time_info.items():
53 |             if time_amount == 0:
54 |                 continue
55 |             elif time_amount == 1:
56 |                 final_string = final_string + self.time_formatting.get(name)
57 |                 continue
58 |             
59 |             time_format = self.time_formatting.get(name)
60 |             final_string = final_string + f"{time_amount}{time_format}"
61 |         
62 |         return final_string
63 | 
64 |     def process(self, data: pd.DataFrame) -> pd.DataFrame:
65 |         if not (isinstance(data, pd.DataFrame)):
66 |             return pd.DataFrame()
67 |         
68 |         if data.empty:
69 |             return pd.DataFrame()
70 |         # Do preprocessing here
71 |         
72 |         dtypes = data.dtypes
73 |         aggregate_command = {}
74 |         for k, v in dtypes.items():
75 |             if np.issubdtype(v, np.number):
76 |                 command = {k:'mean'}
77 |                 aggregate_command.update(command)
78 |                 continue
79 |             else:
80 |                 command = {k: lambda x: stats.mode(x)[0]}
81 |                 aggregate_command.update(command)
82 |                 continue
83 |         
84 |         rule = self.generate_time_string()
85 | 
86 |         if self.base == 0:
87 |             resampled = data.resample(rule).apply(aggregate_command)
88 |             return resampled
89 |         resampled = data.resample(rule, base=self.base).apply(aggregate_command)
90 |         return resampled
91 | 
92 | if __name__ == "__main__":
93 |     import pandas_datareader.data as web
94 |     data_msft = web.DataReader('MSFT','yahoo',start='2008/1/1',end='2020/3/8').round(2)
95 |     mrsample = DynamicResample("modin", days=7)
96 | 
97 |     remicro = mrsample.process(data_msft)


--------------------------------------------------------------------------------
/jamboree/handlers/processors/resample.py:
--------------------------------------------------------------------------------
  1 | from jamboree.handlers.processors import DataProcessorsAbstract
  2 | import pandas as pd
  3 | import numpy as np
  4 | import scipy.stats as stats
  5 | 
  6 | 
  7 | class DynamicResample(DataProcessorsAbstract):
  8 |     def __init__(self, name, **kwargs) -> None:
  9 |         self.time_info = {
 10 |             "years": 0,
 11 |             "months": 0,
 12 |             "weeks":0,
 13 |             "days": 0,
 14 |             "hours": 0,
 15 |             "minutes": 0,
 16 |             "seconds": 0
 17 |         }
 18 |         self.time_formatting = {
 19 |             "seconds": "S",
 20 |             "minutes": "T",
 21 |             "hours": "H",
 22 |             "days": "D",
 23 |             "weeks":"W",
 24 |             "months": "M",
 25 |             "years": "Y"
 26 |         }
 27 | 
 28 |         self.base = 0
 29 |         super().__init__(name, **kwargs)
 30 | 
 31 | 
 32 |     def set_settings(self, **kwargs):
 33 |         """ Updates the time information"""
 34 |         self.time_info['seconds'] = kwargs.get("seconds",self.time_info['seconds'])
 35 |         self.time_info['minutes'] = kwargs.get("minutes",self.time_info['minutes'])
 36 |         self.time_info['hours'] = kwargs.get("hours",self.time_info['hours'])
 37 |         self.time_info['days'] = kwargs.get("days",self.time_info['days'])
 38 |         self.time_info['weeks'] = kwargs.get("weeks",self.time_info['weeks'])
 39 |         self.time_info['months'] = kwargs.get("months",self.time_info['months'])
 40 |         self.time_info['years'] = kwargs.get("years",self.time_info['years'])
 41 |         self.base = kwargs.get("base", self.base)
 42 | 
 43 |     def validate_existing_times(self):
 44 |         checkable_list = self.time_info.values()
 45 |         all_zero = all(x==0 for x in checkable_list)
 46 |         if all_zero:
 47 |             self.time_info['hours'] = 1
 48 | 
 49 |     def generate_time_string(self):
 50 |         self.validate_existing_times()
 51 |         final_string = ""
 52 |         for name, time_amount in self.time_info.items():
 53 |             if time_amount == 0:
 54 |                 continue
 55 |             elif time_amount == 1:
 56 |                 final_string = final_string + self.time_formatting.get(name)
 57 |                 continue
 58 |             
 59 |             time_format = self.time_formatting.get(name)
 60 |             final_string = final_string + f"{time_amount}{time_format}"
 61 |         
 62 |         return final_string
 63 | 
 64 |     def process(self, data: pd.DataFrame) -> pd.DataFrame:
 65 |         if not (isinstance(data, pd.DataFrame)):
 66 |             return pd.DataFrame()
 67 |         
 68 |         if data.empty:
 69 |             return pd.DataFrame()
 70 |         # Do preprocessing here
 71 |         
 72 |         dtypes = data.dtypes
 73 |         aggregate_command = {}
 74 |         for k, v in dtypes.items():
 75 |             if np.issubdtype(v, np.number):
 76 |                 command = {k:'mean'}
 77 |                 aggregate_command.update(command)
 78 |                 continue
 79 |             else:
 80 |                 command = {k: lambda x: stats.mode(x)[0]}
 81 |                 aggregate_command.update(command)
 82 |                 continue
 83 |         
 84 |         rule = self.generate_time_string()
 85 | 
 86 |         if self.base == 0:
 87 |             resampled = data.resample(rule).apply(aggregate_command)
 88 |             return resampled
 89 |         resampled = data.resample(rule, base=self.base).apply(aggregate_command)
 90 |         return resampled
 91 | 
 92 | if __name__ == "__main__":
 93 |     import pandas_datareader.data as web
 94 |     data_msft = web.DataReader('MSFT','yahoo',start='2008/1/1',end='2020/3/8').round(2)
 95 |     mrsample = DynamicResample("modin", days=7)
 96 | 
 97 |     remicro = mrsample.process(data_msft)
 98 |     print(remicro)
 99 |     # time_str = mrsample.generate_time_string()
100 |     # print(time_str)


--------------------------------------------------------------------------------
/examples/user_handler.py:
--------------------------------------------------------------------------------
  1 | import maya
  2 | from jamboree import Jamboree, DBHandler
  3 | import random
  4 | from copy import copy
  5 | from loguru import logger
  6 | 
  7 | 
  8 | class UserHandler(DBHandler):
  9 |     """Abstract handler that we use to keep track of information.
 10 |     """
 11 | 
 12 |     def __init__(self, **kwargs):
 13 |         super().__init__()
 14 |         self.entity = "user"
 15 |         self.required = {
 16 |             "user_id": str
 17 |         }
 18 |         self._balance = 0
 19 |         self._limit = 500
 20 |         self._settings_handler = None
 21 | 
 22 |     @property
 23 |     def limit(self):
 24 |         """ The maximum number of records we intend to get when calling the many function."""
 25 |         return self._limit
 26 | 
 27 |     @limit.setter
 28 |     def limit(self, limit):
 29 |         self._limit = limit
 30 | 
 31 |     @property
 32 |     def settings(self):
 33 |         if self._settings_handler is None:
 34 |             raise AttributeError
 35 |         return self._settings_handler
 36 | 
 37 |     @settings.setter
 38 |     def settings(self, _settings):
 39 |         self._settings_handler = _settings
 40 |         self._settings_handler.limit = self.limit
 41 | 
 42 |     def is_authenticated(self):
 43 |         return True
 44 | 
 45 |     def is_active(self):
 46 |         return True
 47 | 
 48 |     def is_anonymous(self):
 49 |         return False
 50 | 
 51 |     def _check_password_register(self, password: str, confirm: str):
 52 |         """ Run through a set of password conditions"""
 53 |         return password == confirm
 54 | 
 55 |     def register(self, password: str, confirm: str, first: str, middle: str, last: str):
 56 |         first = str.capitalize(first)
 57 |         middle = str.capitalize(middle)
 58 |         last = str.capitalize(last)
 59 | 
 60 |         is_match = self._check_password_register(password, confirm)
 61 |         if is_match:
 62 |             logger.debug("Passwords are valid")
 63 | 
 64 |     def login(self, password: str):
 65 |         pass
 66 | 
 67 |     def logout(self):
 68 |         pass
 69 | 
 70 |     def session(self):
 71 |         pass
 72 | 
 73 |     def deactivate(self):
 74 |         pass
 75 | 
 76 |     def reactivate(self):
 77 |         pass
 78 | 
 79 |     # --------------------------------------------------------
 80 |     # --------------------- Counting -------------------------
 81 |     # --------------------------------------------------------
 82 | 
 83 |     # Use to get counts inside of the database
 84 | 
 85 |     def user_record_count(self) -> int:
 86 |         count = self.count()
 87 |         return count
 88 | 
 89 |     # --------------------------------------------------------
 90 |     # --------------------- Querying -------------------------
 91 |     # --------------------------------------------------------
 92 | 
 93 |     def latest_user(self):
 94 |         """ Get the latest user record """
 95 |         last_state = self.last()
 96 |         return last_state
 97 | 
 98 |     def many_user(self):
 99 |         latest_user_records = self.many(self.limit)
100 |         return latest_user_records
101 | 
102 |     # --------------------------------------------------------
103 |     # ----------------------- Saving -------------------------
104 |     # --------------------------------------------------------
105 | 
106 |     def save_user(self, data: dict):
107 |         query = copy.copy(self._query)
108 |         query.update(data)
109 |         query['time'] = maya.now()._epoch
110 |         query['type'] = self.entity
111 |         query['timestamp'] = maya.now()._epoch
112 |         self.save(data)
113 | 
114 | 
115 | def flip(n=0.02):
116 |     if n >= random.uniform(0, 1):
117 |         return True
118 |     return False
119 | 
120 | 
121 | if __name__ == "__main__":
122 |     user_handler = UserHandler()
123 |     user_handler['user_id'] = "mygeneralemail@gmail.com"
124 |     user_handler.register("password1", "password1", "kevin", "andrew", "hill")
125 |     user_handler.login("password1")
126 | 


--------------------------------------------------------------------------------
/jamboree/middleware/procedures/models/_torch.py:
--------------------------------------------------------------------------------
  1 | from addict import Dict
  2 | from loguru import logger
  3 | from typing import Optional
  4 | 
  5 | from sklearn.base import BaseEstimator
  6 | from sklearn.datasets import make_friedman2
  7 | from sklearn.gaussian_process import GaussianProcessRegressor
  8 | from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
  9 | from jamboree.middleware.procedures import ModelProcedureAbstract
 10 | 
 11 | from skorch.net import NeuralNet
 12 | 
 13 | from torch.nn import Module
 14 | from torch.nn.modules.loss import _Loss
 15 | from torch.optim import Optimizer
 16 | from torch.optim import Adam
 17 | import numpy as np
 18 | from sklearn.datasets import make_classification
 19 | from torch import nn
 20 | import torch.nn.functional as F
 21 | 
 22 | from skorch import NeuralNetClassifier
 23 | 
 24 | 
 25 | class TorchProcedure(ModelProcedureAbstract):
 26 |     def __init__(self, *args, **kwargs) -> None:
 27 |         super().__init__()
 28 |         self.requirements.model = True
 29 |         self.requirements.criterion = True
 30 |         self.requirements.optimizer = True
 31 |         self.types.model = nn.Module
 32 |         self.types.criterion = _Loss
 33 |         self.types.optimizer = Optimizer
 34 |         self._compiled_model:Optional[NeuralNet] = None
 35 |     
 36 |     @property
 37 |     def model(self) -> NeuralNet:
 38 |         self.verify()
 39 |         if self._compiled_model is None:
 40 |             _compiled = NeuralNet(
 41 |                 module=self.dictionary.model,
 42 |                 criterion=self.dictionary.criterion,
 43 |                 optimizer=self.dictionary.optimizer,
 44 |                 max_epochs=10,
 45 |                 lr=0.1,
 46 |                 # Shuffle training data on each epoch
 47 |                 iterator_train__shuffle=True,
 48 |             )
 49 |             self._compiled_model = _compiled
 50 |         return self._compiled_model
 51 | 
 52 |     @logger.catch
 53 |     def set_params(self, **params):
 54 |         self.changed = True
 55 |         self.model.set_params(**params)
 56 | 
 57 |     @logger.catch
 58 |     def get_params(self):
 59 |         return self.model.get_params()
 60 |         
 61 |     @logger.catch
 62 |     def predict(self, X, **kwargs):
 63 |         return self.model.predict(X, **kwargs)
 64 |     
 65 |     @logger.catch
 66 |     def predict_proba(self, X, **kwargs):
 67 |         return self.model.predict_proba(X, **kwargs)
 68 | 
 69 |     @logger.catch
 70 |     def partial_fit(self, X, y, **kwargs):
 71 |         self.changed = True
 72 |         self.model.partial_fit(X, y, **kwargs)
 73 | 
 74 |     def fit(self, X, y, **kwargs):
 75 |         self.changed = True
 76 |         self.model.fit(X, y, **kwargs)
 77 | 
 78 | 
 79 | class MyModule(nn.Module):
 80 |     def __init__(self, num_units=10, nonlin=F.relu):
 81 |         super(MyModule, self).__init__()
 82 | 
 83 |         self.dense0 = nn.Linear(20, num_units)
 84 |         self.nonlin = nonlin
 85 |         self.dropout = nn.Dropout(0.5)
 86 |         self.dense1 = nn.Linear(num_units, 10)
 87 |         self.output = nn.Linear(10, 2)
 88 | 
 89 |     def forward(self, X, **kwargs):
 90 |         X = self.nonlin(self.dense0(X))
 91 |         X = self.dropout(X)
 92 |         X = F.relu(self.dense1(X))
 93 |         X = F.softmax(self.output(X))
 94 |         return X
 95 | 
 96 | 
 97 | class TestCustomTorchClassifier(TorchProcedure):
 98 |     def __init__(self, *args, **kwargs):
 99 |         super().__init__(*args, **kwargs)
100 |         self.dictionary.model = MyModule
101 |         self.dictionary.optimizer = Adam
102 |         self.dictionary.criterion = nn.NLLLoss
103 | 
104 | 
105 | 
106 | 
107 | def main():
108 |     
109 | 
110 | 
111 |     X, y = make_classification(1000, 20, n_informative=10, random_state=0)
112 |     X = X.astype(np.float32)
113 |     y = y.astype(np.int64)
114 | 
115 | 
116 |     net = TestCustomTorchClassifier()
117 | 
118 |     net.fit(X, y)
119 |     for _ in range(10):
120 |         y_proba = net.predict_proba(X)
121 |         print(y_proba)
122 |     
123 | 
124 | if __name__ == "__main__":
125 |     main()


--------------------------------------------------------------------------------
/jamboree/handlers/complex/model.py:
--------------------------------------------------------------------------------
  1 | # import random
  2 | # import time
  3 | # import uuid
  4 | # from pprint import pprint
  5 | # from typing import Any, List, Optional
  6 | 
  7 | # import maya
  8 | # from addict import Dict
  9 | # from loguru import logger
 10 | 
 11 | # from jamboree import Jamboree
 12 | # from jamboree.handlers.complex.engines import FileEngine
 13 | # from jamboree.middleware.procedures import (ModelProcedureAbstract,
 14 | #                                             ModelProcedureManagement,
 15 | #                                             ProcedureAbstract,
 16 | #                                             ProcedureManagement)
 17 | # from jamboree.utils.support.search import querying
 18 | 
 19 | # logger.disable(__name__)
 20 | 
 21 | # class ModelEngine(FileEngine):
 22 | #     """ """
 23 | 
 24 | #     def __init__(self, processor, **kwargs):
 25 | #         super().__init__(processor=processor, **kwargs)
 26 | #         self.pm = ModelProcedureManagement()
 27 | #         self.current_procedure = None
 28 | 
 29 | #     def init_specialized(self, **kwargs):
 30 | #         super().init_specialized(**kwargs)
 31 | #         self.model_type = kwargs.get("model_type", "sklearn")
 32 | 
 33 | #     def open_context(self):
 34 | #         if not self.file_reset:
 35 | #             self.reset()
 36 |         
 37 | 
 38 | #     def close_context(self):
 39 | #         current_model = self.model
 40 | #         if current_model.changed:
 41 | #             extracted = current_model.extract()
 42 | #             self.save_file(extracted)
 43 |         
 44 | #         # probably do some sort of metrics stuff :) 
 45 | 
 46 | 
 47 | #     def enterable(self):
 48 | #         """ Return the object we want to enter into """
 49 | #         return self.model
 50 | 
 51 | #     def custom_post_load(self, item):
 52 | #         proc = self.procedure
 53 | #         proc.dictionary = item
 54 | #         proc.verify()
 55 | #         self.current_procedure = proc
 56 |     
 57 | 
 58 | #     @property
 59 | #     def procedure(self) -> 'ModelProcedureAbstract':
 60 | #         if not self.current_procedure:
 61 | #             self.current_procedure = self.pm.access(self.model_type)
 62 | #             logger.success(f"Successfully accessed a procedure: {self.current_procedure}")
 63 | #         return self.current_procedure
 64 |     
 65 | #     @procedure.setter
 66 | #     def procedure(self, _procedure:'ModelProcedureAbstract'):
 67 | #         self.current_procedure = _procedure
 68 | 
 69 | #     @property
 70 | #     def model(self):
 71 | #         if self.current_procedure:
 72 | #             self.procedure.verify()
 73 | #             return self.procedure
 74 | #         raise AttributeError("You haven't added a procedure yet")
 75 |     
 76 | #     def file_from_dict(self, item:Dict):
 77 | #         reloaded = ModelEngine(
 78 | #             processor=self.processor,
 79 | #             name=item.name,
 80 | #             category=item.category,
 81 | #             subcategories=item.subcategories,
 82 | #             submetatype=item.submetatype,
 83 | #             abbreviation=item.abbreviation,
 84 | #             model_type=item.submetatype
 85 | #         )
 86 | #         return reloaded
 87 | 
 88 | 
 89 | #     def reset(self):
 90 | #         super().reset()
 91 | 
 92 | 
 93 | # def file_engine_main():
 94 | #     """ 
 95 | #         Creating a generic usage of the file engine instead of only model storage. 
 96 | 
 97 | #         To test, we're going to entirely duplicate the prior test. 
 98 | #         Only we're going to use generic functions and variables. In essensce, rebuild the `ModelEngine` starting with the file handler
 99 | 
100 | #     """
101 | 
102 | #     from jamboree.middleware.procedures.models import CustomSklearnGaussianProcedure
103 | #     file_name = uuid.uuid4().hex
104 | #     logger.info("Starting file engine experiment")
105 | #     logger.info(f"The file name is: {file_name}")
106 | #     jamboree_processor = Jamboree()
107 | #     with logger.catch(message="There should be no reason for this to fail"):
108 | #         # Initialize a file engine
109 | #         model_engine = ModelEngine(
110 | #             processor=jamboree_processor,
111 | #             name=file_name,
112 | #             category="machine",
113 | #             subcategories={"ml_type": "gaussian"},
114 | #             abbreviation="GAUSS",
115 | #             submetatype="sklearn",
116 | #             blobfile=CustomSklearnGaussianProcedure(),
117 | #         )
118 | #         model_engine.reset()
119 | 
120 | #         reloaded = model_engine.first(name=file_name)
121 | #         while True:
122 | #             with reloaded as model:
123 | #                 logger.debug(model)
124 | 
125 | 
126 | # if __name__ == "__main__":
127 | #     logger.enable(__name__)
128 | #     file_engine_main()
129 | 


--------------------------------------------------------------------------------
/jamboree/handlers/complex/meta.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import uuid
  3 | from typing import Optional
  4 | 
  5 | import maya
  6 | from jamboree import Jamboree
  7 | from jamboree.handlers.abstracted.search import MetadataSearchHandler
  8 | from jamboree.handlers.default import DBHandler, Access
  9 | from loguru import logger
 10 | 
 11 | 
 12 | class MetaHandler(Access):
 13 |     """ 
 14 |         # MetaDataHandler
 15 |         --- 
 16 |         Metadata is "data that provides information about other data".
 17 |         
 18 |         The MetaHandler is a way to interact with metadata on each data source we have. 
 19 |         
 20 |         
 21 |         It should be used with both the DataHandler an MultDataHandler. As well as any other form of common data we're looking for as well. 
 22 |         It would be there in the event that we would want to figure out properties of data without being forced to directly open the data.
 23 |         It should also give us the capacity to search for various bits of information (redis_search) in the near future.
 24 | 
 25 |         Some usecases of the metadata could include:
 26 | 
 27 |         1. Knowing the type of data we're looking at given some information.
 28 |             - Time-series
 29 |             - Machine Learning Model
 30 |             - Network/Graph Data
 31 |             - Events
 32 |             - Log Data
 33 |             - Meta Record
 34 |                 - A metarecord is a json representative to a complex datatype. 
 35 |         2. Times the data was initiatied
 36 |         3. Time the data was last modified
 37 |             - Modifications can be as simple as: 
 38 |                 - Adding a new ticker or bar for price information
 39 |                 - Partial-Fitting a machine learning model
 40 |                 - Adjusting a weight to a variable
 41 |         4. Getting the number of records for a given piece of information
 42 |             - Very useful if we're trying to plan around how much we're going to do for a piece of information
 43 |         5. Determining if such data exist
 44 |             - We would simply create a complex hash function that's pulled from all dbhandlers representing that data type. 
 45 |         6. Start and End Time for a given set of records
 46 |         7. Location information
 47 |             - There can be different location information for each piece of information.
 48 |             - Examples:
 49 |                 - Image weather data
 50 |                 - Market location data
 51 |                 - Social interaction location data
 52 |                 - Login, logout location data
 53 |             - Creating something flexible for this would probably be a good idea. 
 54 |     """
 55 |     def __init__(self):
 56 |         super().__init__()
 57 |         self.entity = "metadata"
 58 |         self.required = {
 59 |             "name": str,
 60 |             "category": str,
 61 |             "metatype": str,
 62 |             "submetatype": str,
 63 |             "abbreviation": str,
 64 |             "subcategories": dict
 65 |         }
 66 |         self._search = MetadataSearchHandler()
 67 |         self._settings = {}
 68 |         self.is_auto = False
 69 |         self.description: Optional[str] = None
 70 | 
 71 |     @property
 72 |     def search(self):
 73 |         metatype = self.metatype
 74 |         submetatype = self.submetatype
 75 |         self._search.entity = self.entity
 76 |         self._search['metatype'] = {
 77 |             "type": "TEXT",
 78 |             "is_filter": True,
 79 |             "values": {
 80 |                 "is_exact": True,
 81 |                 "term": metatype
 82 |             }
 83 |         }
 84 |         self._search['submetatype'] = {
 85 |             "type": "TEXT",
 86 |             "is_filter": True,
 87 |             "values": {
 88 |                 "is_exact": True,
 89 |                 "term": submetatype
 90 |             }
 91 |         }
 92 |         self._search['name']            = self.name
 93 |         self._search['category']        = self.category
 94 |         self._search['subcategories']   = self.subcategories
 95 |         self._search['abbreviation']    = self.abbreviation
 96 |         self._search.processor          = self.processor
 97 |         return self._search
 98 | 
 99 |     def reset(self):
100 |         self.check()
101 |         qo = self.setup_query()
102 |         qo.pop("mtype", None)
103 |         if self.description is not None:
104 |             qo['description'] = self.description
105 |         
106 |         return self.search.Create(allow_duplicates=False,
107 |                                   no_overwrite_must_have=True,
108 |                                   **qo)
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     jambo = Jamboree()
113 |     metahandler = MetaHandler()
114 |     metahandler.event = jambo
115 |     metahandler.reset()
116 | 


--------------------------------------------------------------------------------
/jamboree/utils/support/search/validation.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from cerberus import Validator
  3 | from redisearch import TextField, NumericField, TagField, GeoField
  4 | # from jamboree.utils.support.search import filtration_schemas
  5 | 
  6 | _global_validator = Validator(require_all=True, allow_unknown=True)
  7 | # _filtration_schemas = filtration_schemas()
  8 | 
  9 | class Geo(type):
 10 |     """ A geolocational type for """
 11 |     def __call__(cls):
 12 |         return cls.__new__(cls)
 13 |     def __repr__(self):
 14 |         return "GEO"
 15 |     
 16 |     def __str__(self):
 17 |         return "GEO"
 18 |     
 19 | 
 20 | def is_nested(d):
 21 |     return any(isinstance(i,dict) for i in d.values())
 22 | 
 23 | def is_gen_type(item, _type):
 24 |     try:
 25 |         return isinstance(item, _type) or issubclass(item, _type) or item == _type
 26 |     except:
 27 |         return False
 28 | 
 29 | def name_match(item:str, name:str):
 30 |     return item.lower() == name.lower()
 31 | 
 32 | 
 33 | def is_generic(_k):
 34 |     return _k in [str, float, int, list, bool]
 35 | 
 36 | 
 37 | def is_geo(k) -> bool:
 38 |     if is_gen_type(k, Geo):
 39 |         return True
 40 |     
 41 |     if is_gen_type(k, str):
 42 |         if name_match(k, "geo"):
 43 |             return True
 44 |     return False
 45 | 
 46 | def to_str(i):
 47 |     """Converts the item to a string version of it"""
 48 |     if i == bool:
 49 |         # This will be text that we'll force exact queries on
 50 |         return "BOOL"
 51 |     elif i == float or i == int:
 52 |         return "NUMERIC"
 53 |     elif i == str:
 54 |         return "TEXT"
 55 |     elif i == list:
 56 |         return "TAG"
 57 | 
 58 | 
 59 | def to_field(k, v):
 60 |     if v == "BOOL":
 61 |         return TextField(k, sortable=True)
 62 |     elif v == "NUMERIC":
 63 |         return NumericField(k, sortable=True)
 64 |     elif v == "TEXT":
 65 |         return TextField(k)
 66 |     elif v == "TAG":
 67 |         return TagField(k)
 68 |     else:
 69 |         return GeoField(k)
 70 | 
 71 | 
 72 | 
 73 | """
 74 |     Dictionary Validation
 75 | """
 76 | 
 77 | 
 78 | 
 79 | def is_valid_geo(_dict:dict):
 80 |     """ That we have the appropiate values """
 81 |     schema = {
 82 |         "long": {"type": "number"},
 83 |         "lat": {"type": "number"},
 84 |         "distance": {"type": "number", "required":False},
 85 |         "metric": {"type": "string", "allowed": ["m","km","mi","ft"], "required":False}
 86 |     }
 87 |     return _global_validator.validate(_dict, schema)
 88 | 
 89 | def is_valid_bool(_dict:dict):
 90 |     """ That we have the appropiate values to create a query function for a boolean """
 91 |     schema = {
 92 |         "toggle": {"type": "boolean"},
 93 |     }
 94 |     return _global_validator.validate(_dict, schema)
 95 | 
 96 | def is_valid_numeric(_dict:dict):
 97 |     """ That we have the appropiate values to do a numeric query """
 98 |     schema = {
 99 |         "operation": {"type": "string", "allowed": ['greater', 'lesser', 'between', 'exact']},
100 |         "upper": {"type": "number"}, 
101 |         "lower": {"type": "number"}
102 |     }
103 |     return _global_validator.validate(_dict, schema)
104 | 
105 | def is_valid_tags(_dict:dict):
106 |     schema = {
107 |         "operation": {"type": "string", "allowed": ['and', 'or']},
108 |         "tags": {"type": "list", "schema": {"type": "string"}}, 
109 |     }
110 |     return _global_validator.validate(_dict, schema)
111 | 
112 | def is_valid_text(_dict:dict):
113 |     schema = {
114 |         "term": {"type": "string"},
115 |         "is_exact": {"type": "boolean", "required":False}, 
116 |     }
117 |     return _global_validator.validate(_dict, schema)
118 | 
119 | def is_queryable_dict(_dict:dict):
120 |     """ """
121 |     schema = {
122 |         "type": {
123 |             "type": "string", 
124 |             "allowed": ["GEO", "TEXT", "BOOL", "NUMERIC", "TAG"]
125 |         },
126 |         "is_filter": {
127 |             "type": "boolean"
128 |         },
129 |         "values": {
130 |             "type": "dict"
131 |         }
132 |     }
133 |     return _global_validator.validate(_dict, schema)
134 | 
135 | # Specific queryable information
136 | 
137 | 
138 | def main():
139 |     _search_item = {
140 |         "type": "GEO",
141 |         "is_filter": False,
142 |         "values": {
143 |             "long": 33,
144 |             "lat": -10,
145 |             "distance": 1,
146 |             "metric": "km"
147 |         }
148 |     }
149 | 
150 |     _numeric_search_item = {
151 |         "operation": "between",
152 |         "upper": 0,
153 |         "lower": 0
154 |     }
155 | 
156 |     _bool_search_values = {
157 |         "toggle": True
158 |     }
159 | 
160 |     print(is_queryable_dict(_search_item))
161 |     print(is_valid_numeric(_numeric_search_item))
162 |     print(is_valid_bool(_bool_search_values))
163 | 
164 | 
165 | if __name__ == "__main__":
166 |     main()


--------------------------------------------------------------------------------
/examples/sample_env_refactor.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import maya
  3 | import uuid
  4 | import random
  5 | import numpy as np
  6 | import pandas as pd
  7 | from copy import copy
  8 | from loguru import logger
  9 | from jamboree import DBHandler
 10 | from jamboree import Jamboree
 11 | from contextlib import ContextDecorator
 12 | from pprint import pprint
 13 | 
 14 | class timecontext(ContextDecorator):
 15 |     def __enter__(self):
 16 |         self.start = maya.now()._epoch
 17 |         return self
 18 | 
 19 |     def __exit__(self, *exc):
 20 |         self.end = maya.now()._epoch
 21 |         delta = self.end - self.start
 22 |         print(f"It took {delta}ms")
 23 |         return False
 24 | 
 25 | 
 26 | class SampleEnvHandler(DBHandler):
 27 |     """Abstract handler that we use to keep track of information.
 28 |     """
 29 | 
 30 |     def __init__(self):
 31 |         # mongodb_host= "localhost", redis_host="localhost", redis_port=6379
 32 |         super().__init__()
 33 |         self.entity = "sample"
 34 |         self.required = {
 35 |             "episode": str
 36 |         }
 37 |         self._balance = 0
 38 |         self._limit = 100
 39 |         self['opt_type'] = "live"
 40 | 
 41 |     @property
 42 |     def limit(self):
 43 |         return self._limit
 44 | 
 45 |     @limit.setter
 46 |     def limit(self, limit):
 47 |         self._limit = limit
 48 | 
 49 |     @property
 50 |     def count(self):
 51 |         return super().count()
 52 | 
 53 |     @property
 54 |     def balance(self):
 55 |         """ Gets the sum of the last three pop_multiplevalues at set the value """
 56 |         return self._balance
 57 | 
 58 |     @property
 59 |     def transactions(self) -> vaex.dataframe:
 60 |         """ Get the last 100 transactions """
 61 |         many_records = self.many(self.limit)
 62 | 
 63 |         if isinstance(many_records, dict):
 64 |             frame = pd.DataFrame(many_records)
 65 |             transactions_frame = vaex.from_pandas(frame)
 66 |             return transactions_frame.sort('timestamp', ascending=False)
 67 | 
 68 |         if len(many_records) > 0:
 69 |             frame = pd.DataFrame(many_records)
 70 |             transactions_frame = vaex.from_pandas(frame)
 71 |             return transactions_frame.sort('timestamp', ascending=False)
 72 | 
 73 |         return vaex.from_pandas(pd.DataFrame())
 74 | 
 75 |     def save_update_recent(self, data: dict):
 76 |         transactions = self.transactions
 77 |         count = transactions.count()
 78 |         new_value = data['value'] + count
 79 |         data['value'] = int(new_value)
 80 |         super().save(data)
 81 | 
 82 |     def pop_many(self, _limit: int = 1, alt: dict = {}):
 83 |         return super().pop_many(_limit, alt)
 84 | 
 85 |     def copy(self):
 86 |         new_sample = SampleEnvHandler()
 87 |         new_sample.data = copy(self.data)
 88 |         new_sample.required = copy(self.required)
 89 |         new_sample._required = copy(self._required)
 90 |         new_sample.limit = copy(self.limit)
 91 |         new_sample.event_proc = self.event_proc
 92 |         return new_sample
 93 | 
 94 | 
 95 | def flip(n=0.02):
 96 |     if n >= random.uniform(0, 1):
 97 |         return True
 98 |     return False
 99 | 
100 | 
101 | def main():
102 |     jambo = Jamboree()
103 |     sample_env_handler = SampleEnvHandler()
104 |     sample_env_handler.limit = 250
105 |     sample_env_handler.processor = jambo
106 |     # with timecontext():
107 |     current_time = maya.now()._epoch
108 |     mult = 60
109 | 
110 |     # Create a new set of records and swap to another location to be acted on.
111 |     sample_env_handler['episode'] = uuid.uuid1().hex
112 |     with timecontext():
113 |         super_index = 0
114 |         for _ in range(100):
115 |             v1 = random.uniform(0, 12)
116 |             sample_env_handler.save({"value": v1, "time": (current_time + (mult * super_index))})
117 |             super_index += 1
118 |         
119 |         many_list = []
120 |         catch_index_1 = random.randint(super_index-10, super_index+3)
121 |         catch_index_2 = random.randint(super_index-10, super_index+3)
122 |         last_by_time = (current_time + (mult * catch_index_1))
123 |         last_by_time_2 = (current_time + (mult * catch_index_2))
124 |         for _ in range(10):
125 |             item = {"valuesssssss": random.uniform(0, 12), "time": (current_time + (mult * super_index))}
126 |             many_list.append(item)
127 |             super_index += 1
128 |         
129 |         sample_env_handler.save_many(many_list)
130 |         latest = sample_env_handler.last()
131 |         last_by = sample_env_handler.last_by(last_by_time, ar="relative")
132 |         last_by_2 = sample_env_handler.last_by(last_by_time_2, ar="relative")
133 |         
134 | 
135 |         t1 = last_by.get('time', time.time())
136 |         t2 = last_by_2.get('time', time.time())
137 |         
138 |         logger.info(latest)
139 |         logger.success(t1)
140 |         logger.error(t2)
141 | 
142 | 
143 | if __name__ == "__main__":
144 |     main()
145 | 


--------------------------------------------------------------------------------
/jamboree/handlers/complex/backtestable/db.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | from loguru import logger
  3 | 
  4 | import maya
  5 | import pandas as pd
  6 | 
  7 | from jamboree import Jamboree
  8 | from jamboree import JamboreeNew
  9 | 
 10 | from jamboree.handlers.complex.meta import MetaHandler
 11 | # from jamboree.handlers.complex.metric import MetricHandler
 12 | 
 13 | from jamboree.handlers.default.time import TimeHandler
 14 | from jamboree.handlers.default.db import DBHandler
 15 | from jamboree.handlers.default import BlobStorageHandler, Access
 16 | 
 17 | class BacktestDBHandler(DBHandler):
 18 |     """ 
 19 |         # BACKTEST HANDLER
 20 |         ---
 21 | 
 22 |         A way to load in time and metadata information into classes that already use DB handler. 
 23 |         
 24 |         If we're using blobhandler use object below
 25 |         
 26 | 
 27 |     """
 28 | 
 29 |     def __init__(self):
 30 |         super().__init__()
 31 |         
 32 |         
 33 |         # Other objects to consider
 34 |         self._time:TimeHandler = TimeHandler()
 35 |         self._meta: MetaHandler = MetaHandler()
 36 |         # self._metrics: MetricHandler = MetricHandler()
 37 |         self._episode = uuid.uuid4().hex
 38 |         
 39 |         
 40 |         self._is_live = False
 41 |         self.is_event = False # use to make sure there's absolutely no duplicate data
 42 |     
 43 |     @property
 44 |     def episode(self) -> str:
 45 |         return self._episode
 46 |     
 47 |     @episode.setter
 48 |     def episode(self, _episode:str):
 49 |         self._episode = _episode
 50 |     
 51 |     @property
 52 |     def live(self) -> bool:
 53 |         return self._is_live
 54 |     
 55 |     @live.setter
 56 |     def live(self, _live:bool):
 57 |         self._is_live = _live
 58 | 
 59 |     @property
 60 |     def time(self) -> 'TimeHandler':
 61 |         # self._time.event = self.event
 62 |         self._time.processor = self.processor
 63 |         self._time['episode'] = self.episode
 64 |         self._time['live'] = self.live
 65 |         return self._time
 66 |     
 67 |     @time.setter
 68 |     def time(self, _time:'TimeHandler'):
 69 |         self._time = _time
 70 | 
 71 |     def reset(self):
 72 |         """ Reset the data we're querying for. """
 73 |         # self.reset_current_metric()
 74 |         # self.metadata.reset()
 75 |         self.time.reset()
 76 |     
 77 |     
 78 |     
 79 |     def __str__(self) -> str:
 80 |         name = self["name"]
 81 |         category = self["category"]
 82 |         subcategories = self["subcategories"]
 83 |         jscat = self.main_helper.generate_hash(subcategories)
 84 |         return f"{name}:{category}:{jscat}"
 85 | 
 86 | 
 87 | class BacktestDBHandlerWithAccess(Access):
 88 |     """ 
 89 |         # BACKTEST HANDLER With Metadata Accessors
 90 |         ---
 91 | 
 92 |         A way to load in time and metadata information into classes that already use DB handler. 
 93 |         
 94 |         If we're using blobhandler use object below.
 95 |         
 96 | 
 97 |     """
 98 | 
 99 |     def __init__(self):
100 |         super().__init__()
101 |         self.required = {
102 |             "name": str,
103 |             "category": str,
104 |             "subcategories": dict,
105 |             "metatype": str,
106 |             "submetatype": str,
107 |             "abbreviation": str,
108 |         }
109 |         
110 |         # Other objects to consider
111 |         self._time:TimeHandler = TimeHandler()
112 |         self._meta: MetaHandler = MetaHandler()
113 |         # self._metrics: MetricHandler = MetricHandler()
114 |         self._episode = uuid.uuid4().hex
115 |         
116 |         
117 |         self._is_live = False
118 |         self.is_event = False # use to make sure there's absolutely no duplicate data
119 |     
120 |     @property
121 |     def episode(self) -> str:
122 |         return self._episode
123 |     
124 |     @episode.setter
125 |     def episode(self, _episode:str):
126 |         self._episode = _episode
127 |     
128 |     @property
129 |     def live(self) -> bool:
130 |         return self._is_live
131 |     
132 |     @live.setter
133 |     def live(self, _live:bool):
134 |         self._is_live = _live
135 | 
136 |     @property
137 |     def time(self) -> 'TimeHandler':
138 |         # self._time.event = self.event
139 |         self._time.processor = self.processor
140 |         self._time['episode'] = self.episode
141 |         self._time['live'] = self.live
142 |         return self._time
143 |     
144 |     @time.setter
145 |     def time(self, _time:'TimeHandler'):
146 |         self._time = _time
147 | 
148 |     def reset(self):
149 |         """ Reset the data we're querying for. """
150 |         # self.reset_current_metric()
151 |         # self.metadata.reset()
152 |         self.time.reset()
153 |     
154 |     
155 |     
156 |     def __str__(self) -> str:
157 |         name = self["name"]
158 |         category = self["category"]
159 |         subcategories = self["subcategories"]
160 |         jscat = self.main_helper.generate_hash(subcategories)
161 |         return f"{name}:{category}:{jscat}"


--------------------------------------------------------------------------------
/jamboree/utils/support/search/assistance/inserter.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from typing import List
  3 | from jamboree.utils.support.search import ( is_gen_type, is_generic, is_geo, is_valid_geo,
  4 |                                             is_nested, name_match, to_field,
  5 |                                             to_str, is_valid_text, is_valid_numeric, is_queryable_dict)
  6 | 
  7 | 
  8 | 
  9 | def boolean_process(field, is_true=False):
 10 |     """ Return a dicionary that has a TEXT value to represent a boolean """
 11 |     bstring = "FALSE"
 12 |     if is_true:
 13 |         bstring = "TRUE"
 14 |     return {
 15 |         field: bstring
 16 |     }
 17 | 
 18 | def list_process(field, item_list:List[str]):
 19 |     """ Return a dictionary representing a list of tags"""
 20 |     # if isinstance(item_list, list):
 21 |     if len(item_list) == 0:
 22 |         return {
 23 | 
 24 |         }
 25 |     saved_list = []
 26 | 
 27 |     for i in item_list:
 28 |         saved_list.append(f"{i}")
 29 |     return {
 30 |         field: ",".join(saved_list)
 31 |     }
 32 | 
 33 | def text_process(field:str, term:str, is_exact=False):
 34 |     if is_exact:
 35 |         return {
 36 |             field: term
 37 |         }
 38 |     return {
 39 |         field:term
 40 |     }
 41 | 
 42 | def number_process(field, number):
 43 |     return {
 44 |         field: number
 45 |     }
 46 | 
 47 | 
 48 | 
 49 | def geo_process_dict(field:str, dictionary:dict):
 50 |     """ Converts a dictionary into a dictionary string"""
 51 |     vals = dictionary['values']
 52 |     if not is_valid_geo(vals):
 53 |         return {}
 54 |     lon = vals.get("long")
 55 |     lat = vals.get("lat")
 56 |     return {
 57 |         field: f"{lon},{lat}"
 58 |     }
 59 | 
 60 | def num_process_dict(field:str, dictionary:dict):
 61 |     d_vals = dictionary['values']
 62 |     if is_valid_numeric(d_vals):
 63 |         _operation = d_vals.get("operation")
 64 |         _upper = d_vals.get("upper")
 65 |         _lower = d_vals.get("lower")
 66 |         
 67 |         if _operation == "greater":
 68 |             return number_process(field, _upper)
 69 |         elif _operation == "lesser":
 70 |             return number_process(field, _lower)
 71 |         elif _operation == "between":
 72 |             return number_process(field, _upper)
 73 |         elif _operation == "exact":
 74 |             _is_exact = (_upper == _lower)
 75 |             if _is_exact:
 76 |                 return number_process(field, _upper)
 77 |         return {}
 78 | 
 79 | def text_process_dict(field, dictionary:dict):
 80 |     """ Create a simple text field from the dictionary"""
 81 |     values = dictionary.get("values")
 82 |     if is_valid_text(values):
 83 |         is_exact = values.get("is_exact", False)
 84 |         _term = values.get("term", False)
 85 |         filtered_term = re.sub('[^a-zA-Z0-9\n\.|\*|\@|\|\_]', ' ', _term)
 86 |         return text_process(field, filtered_term, is_exact=is_exact)
 87 |     return {
 88 | 
 89 |     }
 90 | 
 91 | 
 92 | def create_insertable(example:dict):
 93 |     insertable = {}
 94 |     for k, v in example.items():
 95 |         if isinstance(v, list):
 96 |             insertable.update(list_process(k, v))
 97 |         elif isinstance(v, str):
 98 |             insertable.update(text_process(k, v))
 99 |         elif isinstance(v, bool):
100 |             insertable.update(boolean_process(k, v))
101 |         elif isinstance(v, float) or isinstance(v, int):
102 |             insertable.update(number_process(k, v))
103 |         elif isinstance(v, dict):
104 |             if not is_queryable_dict(v):
105 |                 continue
106 |             if v['type'] == "NUMERIC":
107 |                 insertable.update(num_process_dict(k, v))
108 |             
109 |             if v['type'] == "GEO":
110 |                 insertable.update(geo_process_dict(k, v))
111 |             
112 |             if v['type'] == "TEXT":
113 |                 insertable.update(text_process_dict(k, v))
114 |     return insertable
115 | 
116 | 
117 | def main():
118 |     """ Convert a dictionary into an insertable dictionary"""
119 |     example = {
120 |         "maybe": True,
121 |         "gtags": ["one", "two", "three"],
122 |         "current": {
123 |             "type": "NUMERIC",
124 |             "is_filter": True,
125 |             "values": {
126 |                 "lower": 33,
127 |                 "upper": 0,
128 |                 "operation": "between"
129 |             }
130 |         },
131 |         "loc": {
132 |             "type": "GEO",
133 |             "is_filter": True,
134 |             "values": {
135 |                 "long": 33,
136 |                 "lat": -10,
137 |                 "distance": 1.2,
138 |                 "metric": "km"
139 |             }
140 |         },
141 |         "exact_text": {
142 |             "type": "TEXT",
143 |             "is_filter": True,
144 |             "values": {
145 |                 "term": "hello world"
146 |             }
147 |         },
148 |     }
149 |     
150 | 
151 |     for _ in range(100):
152 |         insertable = create_insertable(example)
153 |         
154 |         print(insertable)
155 |     pass
156 | 
157 | 
158 | if __name__ == "__main__":
159 |     main()


--------------------------------------------------------------------------------
/jamboree/storage/databases/jmongo.py:
--------------------------------------------------------------------------------
  1 | import maya
  2 | from loguru import logger
  3 | from typing import Dict, List, Any
  4 | from jamboree.storage.databases import DatabaseConnection
  5 | 
  6 | 
  7 | class MongoDatabaseConnection(DatabaseConnection):
  8 |     def __init__(self) -> None:
  9 |         super().__init__()
 10 | 
 11 |     """ Save commands """
 12 | 
 13 |     def save(self, query: dict, data: dict):
 14 |         if not self.helpers.validate_query(query):
 15 |             # Log a warning here instead
 16 |             return
 17 |         timestamp = maya.now()._epoch
 18 |         query.update(data)
 19 |         query['timestamp'] = timestamp
 20 |         self.connection.store(query)
 21 | 
 22 |     def save_many(self, query: Dict[str, Any], data: List[Dict]):
 23 |         if not self.helpers.validate_query(query) or len(data) == 0:
 24 |             return
 25 | 
 26 |         first_item = data[0]
 27 |         first_item.update(query)
 28 |         updated_list = [self.helpers.update_dict(query, x) for x in data]
 29 |         self.connection.bulk_upsert(updated_list, _column_first=first_item.keys(), _in=['timestamp'])
 30 | 
 31 |     """
 32 |         Update commands
 33 |     """
 34 | 
 35 |     def update_single(self, query):
 36 |         pass
 37 | 
 38 |     def update_many(self, query):
 39 |         pass
 40 | 
 41 |     """
 42 |         Delete Commands
 43 |     """
 44 | 
 45 |     def delete(self, query: dict, details: dict):
 46 |         if not self.helpers.validate_query(query):
 47 |             return
 48 | 
 49 |         query.update(details)
 50 |         self.connection.delete(query)
 51 | 
 52 |     def delete_many(self, query: dict, details: dict = {}):
 53 |         if not self.helpers.validate_query(query):
 54 |             return
 55 | 
 56 |         query.update(details)
 57 |         self.connection.delete_many(query)
 58 | 
 59 |     def delete_all(self, query: dict):
 60 |         logger.info("Same as `delete_many`")
 61 |         self.delete_many(query)
 62 | 
 63 |     """ 
 64 |         Query commands
 65 |     """
 66 | 
 67 |     def query_latest(self, query: dict, abs_rel="absolute"):
 68 |         if not self.helpers.validate_query(query):
 69 |             return {}
 70 |         latest_items = self.connection.query_last(query)
 71 |         return latest_items
 72 | 
 73 |     def query_latest_many(self, query: dict):
 74 |         if not self.helpers.validate_query(query):
 75 |             return []
 76 |         latest_items = list(self.connection.query_latest(query))
 77 |         return latest_items
 78 | 
 79 |     def query_all(self, query: dict):
 80 |         if not self.helpers.validate_query(query):
 81 |             return []
 82 |         mongo_data = list(self.connection.query(query))
 83 |         return mongo_data
 84 |     
 85 | 
 86 |     def query_latest_by_time(self, query:dict, max_epoch:float, abs_rel:str="absolute", limit:int=10):
 87 |         if not self.helpers.validate_query(query):
 88 |             return {}
 89 |         latest_items = self.connection.query_closest(query)
 90 |         return latest_items
 91 | 
 92 |     def query_between(self, query:dict, min_epoch:float, max_epoch:float, abs_rel:str="absolute"):
 93 |         if not self.helpers.validate_query(query):
 94 |             return {}
 95 |         
 96 |         latest_items = list(self.connection.query_time(query, time_type="window", start=min_epoch))
 97 |         if len(latest_items) == 0:
 98 |             return []
 99 |         return latest_items
100 |     
101 |     def query_before(self, query):
102 |         if not self.helpers.validate_query(query):
103 |             return []
104 |         mongo_data = list(self.connection.query(query))
105 |         return mongo_data
106 |     
107 |     def query_after(self, query):
108 |         if not self.helpers.validate_query(query):
109 |             return []
110 |         mongo_data = list(self.connection.query(query))
111 |         return mongo_data
112 | 
113 |     """ Swap focused commands"""
114 | 
115 |     def query_mix_swap(self):
116 |         pass
117 | 
118 |     def swap(self):
119 |         pass
120 | 
121 |     """ 
122 |         Pop commands
123 |     """
124 | 
125 |     def pop(self, query: dict):
126 |         if not self.helpers.validate_query(query):
127 |             return []
128 | 
129 |         query['limit'] = 1
130 |         item = list(self.connection.query_latest(query))
131 |         if item is not None:
132 |             self.connection.delete(item)
133 |         return item
134 | 
135 |     def pop_many(self, query: dict, limit: int = 10):
136 |         if not self.helpers.validate_query(query):
137 |             return []
138 | 
139 |         query['limit'] = limit
140 |         items = list(self.connection.query_latest(query))
141 |         if len(items) == 0:
142 |             return []
143 |         for item in items:
144 |             self.connection.delete(item)
145 |         return items
146 | 
147 |     def get_latest_many_swap(self):
148 |         pass
149 | 
150 |     """ Other Functions """
151 | 
152 |     def reset(self, query: dict):
153 |         pass
154 | 
155 |     def count(self, query: dict):
156 |         if not self.helpers.validate_query(query):
157 |             return 0
158 |         query.pop('limit', None)
159 |         records = list(self.connection.query(query))
160 |         record_len = len(records)
161 |         return record_len
162 | 


--------------------------------------------------------------------------------
/jamboree/handlers/complex/metric.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | from loguru import logger
  3 | import maya
  4 | import pandas as pd
  5 | 
  6 | 
  7 | from jamboree import JamboreeNew
  8 | from jamboree.handlers.complex.backtestable import BacktestDBHandler
  9 | from jamboree.handlers.processors import DynamicResample, DataProcessorsAbstract
 10 | from jamboree.utils.core import omit
 11 | 
 12 | 
 13 | 
 14 | class MetricHandler(BacktestDBHandler):
 15 |     """ 
 16 |         # METRIC HANDLER
 17 |         ---
 18 | 
 19 |         A simple metric handler. To be used with all machine learning related functions.
 20 | 
 21 |         Given an episode and other crucial information, we'll give all information regarding how something has progressed. 
 22 | 
 23 |         Some considerations:
 24 | 
 25 |         * We'll load the most recent metrics into the metadata handler (which will let us search for the metric)
 26 |         * We want to know how a batch model has been doing between episodes
 27 |             * Something that sends aggregation commands for all models that have been touched for a given category, subcategory, name set
 28 |             * That would also require us to know how to preprocess that information prior to adding it into the database
 29 |             * We'd also want to not do this too often to reduce CPU initialization load
 30 |             * A form or rate limiter
 31 |         
 32 | 
 33 |     """
 34 | 
 35 |     def __init__(self):
 36 |         super().__init__()
 37 |         self.entity = "metric"
 38 |         self.required = {
 39 |             "category": str,
 40 |             "subcategories": dict,
 41 |             "name": str
 42 |         }
 43 |         
 44 |         self._preprocessor: DataProcessorsAbstract = DynamicResample("data")
 45 |         
 46 | 
 47 |     @property
 48 |     def preprocessor(self) -> DataProcessorsAbstract:
 49 |         return self._preprocessor
 50 |     
 51 |     @preprocessor.setter
 52 |     def preprocessor(self, _preprocessor: DataProcessorsAbstract):
 53 |         self._preprocessor = _preprocessor
 54 | 
 55 |     def log(self, metric_dict:dict):
 56 |         """ Logs a metrics at the current time """
 57 |         current_time = self.time.head
 58 |         metric_dict['time'] = current_time
 59 |         metric_dict['timestamp'] = maya.now()._epoch
 60 |         # Add something here to make this searchable as well.
 61 |         self.save(metric_dict)
 62 |     
 63 |     def latest(self):
 64 |         """ Get the latest """
 65 |         _latest = self.last(ar='relative')
 66 |         omitted = omit(['episode', 'mtype', 'live', 'category', 'subcategories', 'type', 'name'], _latest)
 67 |         return omitted
 68 | 
 69 |     def reset_current_metric(self):
 70 |         self['episode'] = self.episode
 71 |         self['live'] = self.live
 72 | 
 73 |     def reset(self):
 74 |         """ Reset the data we're querying for. """
 75 |         super().reset()
 76 |         self.reset_current_metric()
 77 |     
 78 |     def step_time(self):
 79 |         """ """
 80 |         self.time.step()
 81 |         pass
 82 |     
 83 |     
 84 |     
 85 |     def __str__(self) -> str:
 86 |         name = self["name"]
 87 |         category = self["category"]
 88 |         subcategories = self["subcategories"]
 89 |         jscat = self.main_helper.generate_hash(subcategories)
 90 |         return f"{name}:{category}:{jscat}"
 91 | 
 92 | def metric_test():
 93 |     """ Test monitoring an online learning algorithm (using creme). """
 94 |     import random
 95 |     
 96 |     jambo = JamboreeNew()
 97 |     metric_log = MetricHandler()
 98 |     metric_log['category'] = "model"
 99 |     metric_log['subcategories'] = {}
100 |     metric_log['name'] = "general_regressor"
101 |     metric_log.processor = jambo
102 |     metric_log.reset()
103 |     metric_log.time.change_stepsize(hours=0, microseconds=10)
104 |     while True:
105 |         metric_log.reset_current_metric()
106 |         metric_schema = {
107 |             "accuracy": random.uniform(0, 1),
108 |             "f1": random.uniform(0, 1)
109 |         }
110 |         metric_log.log(metric_schema)
111 |         saved_metric = metric_log.latest()
112 |         metric_log.step_time()
113 |         print(saved_metric)
114 | 
115 |     
116 | if __name__ == "__main__":
117 |     metric_test()
118 |     # import pandas_datareader.data as web
119 |     # data_msft = web.DataReader('MSFT','yahoo',start='2010/1/1',end='2020/1/30').round(2)
120 |     # data_apple = web.DataReader('AAPL','yahoo',start='2010/1/1',end='2020/1/30').round(2)
121 |     # print(data_apple)
122 |     # episode_id = uuid.uuid4().hex
123 |     # jambo = Jamboree()
124 |     # jam_processor = JamboreeNew()
125 |     # data_hander = DataHandler()
126 |     # data_hander.event = jambo
127 |     # data_hander.processor = jam_processor
128 |     # # The episode and live parameters are probably not good for the scenario. Will probably need to switch to something else to identify data
129 |     # data_hander.episode = episode_id
130 |     # data_hander.live = False
131 |     # data_hander['category'] = "markets"
132 |     # data_hander['subcategories'] = {
133 |     #     "market": "stock",
134 |     #     "country": "US",
135 |     #     "sector": "techologyyyyyyyy"
136 |     # }
137 |     # data_hander['name'] = "MSFT"
138 |     # data_hander.reset()
139 |     # data_hander.store_time_df(data_msft, is_bar=True)
140 | 
141 | 
142 |     # data_hander['name'] = "AAPL"
143 |     # data_hander.store_time_df(data_apple, is_bar=True)
144 |     # data_hander.reset()
145 | 
146 |     # data_hander.time.head = maya.now().subtract(weeks=200, hours=14)._epoch
147 |     # data_hander.time.change_stepsize(microseconds=0, days=1, hours=0)
148 |     # data_hander.time.change_lookback(microseconds=0, weeks=4, hours=0)
149 | 
150 |     
151 |     # while data_hander.is_next:
152 |     #     logger.info(magenta(data_hander.time.head, bold=True))
153 |     #     print(data_hander.dataframe_from_head())
154 |     #     data_hander.time.step()


--------------------------------------------------------------------------------
/jamboree/handlers/abstracted/datasets/price.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | import warnings
  3 | warnings.simplefilter(action='ignore', category=FutureWarning)
  4 | import maya
  5 | import pprint
  6 | from loguru import logger
  7 | from typing import List
  8 | from jamboree.handlers.default import DataHandler
  9 | from jamboree import Jamboree
 10 | from jamboree.utils.support.search import querying
 11 | 
 12 | class PriceData(DataHandler):
 13 |     """
 14 |         # Price Data
 15 |         
 16 |         A way to browse and interact with price data. Is an extension of DataHandler and includes basic searches.
 17 | 
 18 |     """
 19 |     def __init__(self):
 20 |         super().__init__()
 21 |         self['category'] = "markets"
 22 |         self['submetatype'] = "price"
 23 |         self.sc = "subcategories" # storing the placeholder key to prevent misspelling
 24 |         # self.cat = "category" # storing variable placeholder key to prevent misspelling
 25 | 
 26 |     @property
 27 |     def markets(self) -> List[str]:
 28 |         return [
 29 |             'crypto', 'stock', 'commodities', 'forex', 'simulation'
 30 |         ]
 31 | 
 32 |     
 33 | 
 34 |     def by_market(self, market_type:str):
 35 |         """
 36 |             # Find All Datasets By Market
 37 | 
 38 |             market_type: ['crypto', 'stock', 'commodities', 'forex', 'simulation']
 39 |         """
 40 |         if market_type not in self.markets:
 41 |             logger.error(f"Not the correct type: {market_type} must be {self.markets}")
 42 |             return []
 43 | 
 44 |         _search = self.search
 45 |         _search[self.sc] = {
 46 |             "market": market_type
 47 |         }
 48 |         # print(_search.query_builder.build())
 49 |         return _search.find()
 50 |     
 51 |     def by_country(self, country:str):
 52 | 
 53 |         """
 54 |             # Find All Datasets By Country
 55 | 
 56 |         """
 57 | 
 58 |         if not isinstance(country, str):
 59 |             logger.error("The country is not the string.")
 60 |             return []
 61 |         _search = self.search
 62 |         _search[self.sc] = {
 63 |             "country": country,
 64 |             # "data"
 65 |         }
 66 |         return _search.find()
 67 |     
 68 |     def by_sector(self, sector:str):
 69 |         """
 70 |             # Find All Datasets By Sector
 71 | 
 72 |         """
 73 | 
 74 | 
 75 |         if not isinstance(sector, str):
 76 |             logger.error("The sector should be a string.")
 77 |             return []
 78 |         _search = self.search
 79 |         _search[self.sc] = {
 80 |             "sector": sector
 81 |         }
 82 |         return _search.find()
 83 |     
 84 |     def by_name(self, name:str):
 85 |         """
 86 |             # Find All Datasets By Sector
 87 | 
 88 |         """
 89 | 
 90 | 
 91 |         if not isinstance(name, str):
 92 |             logger.error("The sector should be a string.")
 93 |             return []
 94 |         _search = self.search
 95 |         _search["name"] = name
 96 |         return _search.find()
 97 |     
 98 | 
 99 |     def by_exchange(self, name:str):
100 |         if not isinstance(name, str):
101 |             logger.error("The sector should be a string.")
102 |             return []
103 |         _search = self.search
104 |         _search[self.sc] = {
105 |             "exchange": name
106 |         }
107 |         return _search.find()
108 | 
109 |     
110 |     def multi_search(self, name=None, country=None, sector=None, market=None, exchange=None, is_exact_subcategory=False):
111 |         """ Search with our conventional parameters for our pricing datasets """
112 |         all_variables = {"name": name, "country": country, "sector": sector, "market": market, "exchange": exchange}
113 |         _name = None
114 |         _subcat_dict = {}
115 |         for k, v in all_variables.items():
116 |             if v is None:
117 |                 continue
118 |             if k == "name":
119 |                 _name = v
120 |                 continue
121 |             if k == "market":
122 |                 if v not in self.markets:
123 |                     continue
124 |             
125 |             if is_exact_subcategory:
126 |                 _subcat_dict[k] = querying.text.exact(v)
127 |             else:
128 |                 _subcat_dict[k] = v
129 |         
130 |         is_size = (len(_subcat_dict) == 0)
131 |         is_name = (_name is None)
132 |         if is_size and is_name:
133 |             return []
134 | 
135 |         _search = self.search
136 | 
137 |         if not is_name:
138 |             _search["name"] = name
139 |         if not is_size:
140 |             _search[self.sc] = _subcat_dict
141 |         _search.processor = self.processor
142 |         return _search.find()
143 |     
144 | 
145 |     def build(self, name:str, abbv:str, country:str="US", sector:str="tech", market:str="stock", exchange:str="binance"):
146 |         self['name'] = name
147 |         self['abbreviation'] = abbv
148 |         self['subcategories'] = {
149 |             "market": market,
150 |             "country": country,
151 |             "sector": sector,
152 |             "exchange": exchange,
153 |         }
154 |         return self
155 | 
156 |     # def get(self, name=None, country=None, sector=None, )
157 | 
158 | def main():
159 |     import pandas_datareader.data as web
160 |     # data_msft = web.DataReader('MSFT','yahoo',start='2019/9/1',end='2020/1/30').round(2)
161 |     # data_apple = web.DataReader('AAPL','yahoo',start='2019/9/1',end='2020/1/30').round(2)
162 |     episode_id = uuid.uuid4().hex
163 |     jambo = Jamboree()
164 |     jam_processor = Jamboree()
165 |     data_hander = PriceData()
166 |     data_hander.processor = jam_processor
167 |     trx_tron = data_hander.build("Tron", "TRX", country="Japan", sector="oil", market="commodities", exchange="binance")
168 |     # The episode and live parameters are probably not good for the scenario. Will probably need to switch to something else to identify data
169 |     trx_tron.episode = episode_id
170 |     trx_tron.live = False
171 |     trx_tron.reset()
172 |     
173 | 
174 |     res = trx_tron.multi_search(country="jap")
175 |     pprint.pprint(res)
176 | 
177 | 
178 | if __name__ == "__main__":
179 |     main()


--------------------------------------------------------------------------------
/jamboree/handlers/abstracted/datasets/orderbook.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | import warnings
  3 | warnings.simplefilter(action='ignore', category=FutureWarning)
  4 | import maya
  5 | import pprint
  6 | from loguru import logger
  7 | from typing import List
  8 | from jamboree.handlers.default import DataHandler
  9 | from jamboree import Jamboree
 10 | from jamboree.utils.support.search import querying
 11 | # from jamboree.handlers.abstracted.search.meta import MetaDataSearchHandler
 12 | class OrderbookData(DataHandler):
 13 |     """
 14 |         # Orderbook Data
 15 |         
 16 |         A way to browse and interact with price data. Is an extension of DataHandler and includes basic searches.
 17 | 
 18 |     """
 19 |     def __init__(self):
 20 |         super().__init__()
 21 |         self['subtype'] = "orderbook" 
 22 |         self['category'] = "markets"
 23 | 
 24 |         self.sc = "subcategories" # storing the placeholder key to prevent misspelling
 25 |         self.cat = "category" # storing variable placeholder key to prevent misspelling
 26 | 
 27 |     @property
 28 |     def markets(self) -> List[str]:
 29 |         return [
 30 |             'crypto', 'stock', 'commodities', 'forex', 'simulation'
 31 |         ]
 32 | 
 33 | 
 34 |     def by_market(self, market_type:str):
 35 |         """
 36 |             # Find All Datasets By Market
 37 | 
 38 |             market_type: ['crypto', 'stock', 'commodities', 'forex', 'simulation']
 39 |         """
 40 |         if market_type not in self.markets:
 41 |             logger.error(f"Not the correct type: {market_type} must be {self.markets}")
 42 |             return []
 43 | 
 44 |         _search = self.search
 45 |         _search[self.cat] = "markets"
 46 |         _search[self.sc] = {
 47 |             "market": market_type
 48 |         }
 49 |         # print(_search.query_builder.build())
 50 |         return _search.find()
 51 |     
 52 |     def by_country(self, country:str):
 53 | 
 54 |         """
 55 |             # Find All Datasets By Country
 56 | 
 57 |         """
 58 | 
 59 |         if not isinstance(country, str):
 60 |             logger.error("The country is not the string.")
 61 |             return []
 62 |         _search = self.search
 63 |         _search[self.cat] = "markets"
 64 |         _search[self.sc] = {
 65 |             "country": country
 66 |         }
 67 |         return _search.find()
 68 |     
 69 |     def by_sector(self, sector:str):
 70 |         """
 71 |             # Find All Datasets By Sector
 72 | 
 73 |         """
 74 | 
 75 | 
 76 |         if not isinstance(sector, str):
 77 |             logger.error("The sector should be a string.")
 78 |             return []
 79 |         _search = self.search
 80 |         _search[self.cat] = "markets"
 81 |         _search[self.sc] = {
 82 |             "sector": sector
 83 |         }
 84 |         return _search.find()
 85 |     
 86 |     def by_name(self, name:str):
 87 |         """
 88 |             # Find All Datasets By Sector
 89 | 
 90 |         """
 91 | 
 92 | 
 93 |         if not isinstance(name, str):
 94 |             logger.error("The sector should be a string.")
 95 |             return []
 96 |         _search = self.search
 97 |         _search[self.cat] = "markets"
 98 |         _search["name"] = name
 99 |         return _search.find()
100 |     
101 |     
102 |     
103 |     def multi_part_search(self, name=None, country=None, sector=None, market=None, exchange=None):
104 |         """  """
105 |         all_variables = {"name": name, "country": country, "sector": sector, "market": market, "exchange": exchange}
106 |         _name = None
107 |         _subcat_dict = {}
108 |         for k, v in all_variables.items():
109 |             if v is None:
110 |                 continue
111 |             if k == "name":
112 |                 _name = v
113 |                 continue
114 | 
115 |             _subcat_dict[k] = querying.text.exact(v)
116 |         
117 |         is_size = (len(_subcat_dict) == 0)
118 |         is_name = (_name is None)
119 |         if is_size and is_name:
120 |             return []
121 | 
122 |         _search = self.search
123 |         # _search.reset()
124 | 
125 |         if not is_name:
126 |             logger.warning(name)
127 |             _search["name"] = name
128 |         _search[self.cat] = "markets"
129 |         if not is_size:
130 |             _search[self.sc] = _subcat_dict
131 |         _search.processor = self.processor
132 |         return _search.find()
133 | 
134 | def main():
135 |     import pandas_datareader.data as web
136 |     # data_msft = web.DataReader('MSFT','yahoo',start='2019/9/1',end='2020/1/30').round(2)
137 |     # data_apple = web.DataReader('AAPL','yahoo',start='2019/9/1',end='2020/1/30').round(2)
138 |     episode_id = uuid.uuid4().hex
139 |     jambo = Jamboree()
140 |     jam_processor = Jamboree()
141 |     data_hander = OrderbookData()
142 |     data_hander.processor = jam_processor
143 |     data_hander.event = jambo
144 |     # The episode and live parameters are probably not good for the scenario. Will probably need to switch to something else to identify data
145 |     data_hander.episode = episode_id
146 |     data_hander.live = False
147 |     data_hander['subcategories'] = {
148 |         "market": "stock",
149 |         "country": "Mexico",
150 |         "sector": "tech",
151 |         "exchange": "binance",
152 |     }
153 |     data_hander['name'] = "ETH Ethereum"
154 |     data_hander.reset()
155 |     # data_hander.store_time_df(data_msft, is_bar=True)
156 | 
157 | 
158 |     data_hander['name'] = "BTC Bitcoin"
159 |     # data_hander.reset()
160 |     # data_hander.store_time_df(data_apple, is_bar=True)
161 | 
162 |     start = maya.now()._epoch
163 |     # res1 = data_hander.by_name("Bitcoin")
164 | 
165 |     # logger.debug(res1)
166 |     end = maya.now()._epoch
167 |     logger.info(end-start)
168 | 
169 |     res = data_hander.multi_part_search(market="stock", exchange="binance", country="Mexico")
170 |     logger.warning(res)
171 |     
172 |     # _search = data_hander.search
173 |     # _search['subcategories'] = {
174 |     #     "market": "stock"
175 |     # }
176 |     # _search.remove()
177 |     # data_hander.time.head = maya.now().subtract(weeks=200, hours=14)._epoch
178 |     # data_hander.time.change_stepsize(microseconds=0, days=1, hours=0)
179 |     # data_hander.time.change_lookback(microseconds=0, weeks=4, hours=0)
180 | 
181 |     
182 |     # while data_hander.is_next:
183 |     #     logger.debug(data_hander.time.head)
184 |     #     print(data_hander.closest_head())
185 |     #     data_hander.time.step()
186 | 
187 | 
188 | if __name__ == "__main__":
189 |     main()


--------------------------------------------------------------------------------
/jamboree/utils/support/search/core.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import time
  4 | import warnings
  5 | from copy import copy
  6 | from pprint import pprint
  7 | from typing import Any, Dict, List
  8 | warnings.simplefilter(action='ignore', category=FutureWarning)
  9 | 
 10 | 
 11 | from addict import Dict as ADict
 12 | from cerberus import Validator
 13 | from eliot import log_call, to_file
 14 | from loguru import logger
 15 | from redis.exceptions import ResponseError
 16 | from redisearch import Client, Query
 17 | 
 18 | from jamboree.utils.core import consistent_hash
 19 | from jamboree.utils.support.search import (InsertBuilder, QueryBuilder,
 20 |                                            is_gen_type, is_generic, is_geo,
 21 |                                            is_nested, is_queryable_dict,
 22 |                                            name_match, to_field, to_str)
 23 | from jamboree.utils.support.search.assistance import Keystore
 24 | 
 25 | 
 26 | logger.disable(__name__)
 27 | """
 28 | 
 29 |     # NOTE
 30 | 
 31 |     Basic CRUD operations for the search handler. 
 32 | """
 33 | 
 34 | REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
 35 | REDIS_HOST = str(os.getenv("REDIS_HOST", "localhost"))
 36 | 
 37 | def split_doc(doc):
 38 |     return doc.id, ADict(**doc.__dict__)
 39 | 
 40 | def dictify(doc):
 41 |     item = ADict(**doc.__dict__)
 42 |     item.pop("super_id", None)
 43 |     item.pop("payload", None)
 44 |     return item
 45 | 
 46 | class BaseSearchHandlerSupport(object):
 47 |     def __init__(self):
 48 |         self._requirements_str = {
 49 |             
 50 |         }
 51 |         self._subkey_names = set()
 52 |         self._indexable = set()
 53 |         self.__indexable = []
 54 |         self._index_key:str = ""
 55 |         self._sub_fields = {}
 56 |         self.insert_builder = InsertBuilder()
 57 |         self.query_builder = QueryBuilder()
 58 |         self.keystore = Keystore()
 59 |         self.added = set()
 60 |         # Boolean explaining if this is a subquery
 61 |         self.is_sub_key = False
 62 |     
 63 |     
 64 |     @property
 65 |     def indexable(self):
 66 |         return self.__indexable
 67 |     
 68 |     @property
 69 |     def subnames(self):
 70 |         return self._subkey_names
 71 |     @property
 72 |     def index(self):
 73 |         """Index key for the requirements"""
 74 |         return self._index_key
 75 |     
 76 |     @index.setter
 77 |     def index(self, _index):
 78 |         """Index key for the requirements"""
 79 |         self._index_key = _index
 80 |     
 81 |     @property
 82 |     def subfields(self):
 83 |         return self._sub_fields
 84 |     
 85 |     def process_subfields(self):
 86 |         for key in self.subnames:
 87 |             self._sub_fields[key] = f"{self.index}:{key}"
 88 |     
 89 |     def process_requirements(self, _requirements:dict):
 90 |         """
 91 |             Process the required fields. That includes:
 92 |             
 93 |             1. Creating a requirements string. That's so we can create a key representing the field that exist.
 94 |             2. Listing all of the subkeys that we'd need to take in consideration.
 95 |             3. Creating an index hash to locate all relavent documents
 96 |             4. Creation of a list of fields so we can create a schema at that index hash
 97 |             5. Creation of all subkeys so we can quickly access them by name later
 98 |             
 99 |         """
100 |         for k, v in _requirements.items():
101 |             if is_generic(v):
102 |                 sval = to_str(v)
103 |                 _agg = f"{k}:{sval}"
104 |                 if _agg not in self.added:
105 |                     self.added.add(_agg)
106 |                     self._requirements_str[k] = sval
107 |                     field = to_field(k, sval)
108 |                     
109 |                     self.__indexable.append(field)
110 |                 continue
111 |                 
112 |             if v == dict:
113 |                 _agg = f"{k}:SUB"
114 |                 if _agg not in self.added:
115 |                     self.added.add(_agg)
116 |                     self._requirements_str[k] = "SUB"
117 |                     self.subnames.add(k)
118 |                 continue
119 | 
120 |             if is_geo(v):
121 |                 _agg = f"{k}:GEO"
122 |                 if _agg not in self.added:
123 |                     self.added.add(_agg)
124 |                     self._requirements_str[k] = "GEO"
125 |                     self.__indexable.append(to_field(k, "GEO"))
126 | 
127 |                 continue
128 |         
129 |         # self._indexable = set(unique(self._indexable, key=lambda x: x.redis_args()[0]))
130 |         if not self.is_sub_key:
131 |             self._index_key = consistent_hash(self._requirements_str)
132 |             self.process_subfields()
133 | 
134 |     def is_sub(self, name:str) -> bool:
135 |         """ Check to see if this is a subfield """
136 |         return name in self.subnames
137 | 
138 |     def is_queryable(self, _dict):
139 |         if isinstance(_dict, dict):
140 |             if is_queryable_dict(_dict):
141 |                 return True
142 |         return False
143 | 
144 |     def is_valid_sub_key_information(self, subkey_dict:dict):
145 |         """ Check to see if the subkey is valid"""
146 |         
147 |         if len(subkey_dict) == 0:
148 |             return False
149 |         
150 |         
151 |         # Run validation to see if all of the keys are reducible to a type and base type
152 |         for k, v in subkey_dict.items():
153 |             if is_generic(v):
154 |                 continue
155 |             if isinstance(v, dict):
156 |                 if not is_queryable_dict(v):
157 |                     logger.error(f"{k} is not valid")
158 |                     return False
159 |         return True
160 | 
161 |     def queryable_to_type(self, _dict:dict):
162 |         """ Converts a queryable dictionary into a type"""
163 |         dtype = _dict['type']
164 |         if dtype == "GEO":
165 |             return "GEO"
166 |         elif dtype == "TEXT":
167 |             return str
168 |         elif dtype == "BOOL":
169 |             return bool
170 |         elif dtype == "NUMERIC":
171 |             return float        
172 |         elif dtype == "TAG":
173 |             return list
174 | 
175 |     def loaded_dict_to_requirements(self, _dict:dict):
176 |         """ 
177 |             # Loaded Dict To Requirements
178 |             
179 |             Convert a dictionary into a requirements dict. 
180 | 
181 |             Use to create a requirements
182 | 
183 |             Returns an empty dict if nothing is there.
184 |         """
185 |         req = {}
186 |         for k, v in _dict.items():
187 |             _ktype = type(v)
188 |             if is_generic(_ktype):
189 |                 req[k] = _ktype
190 |             if self.is_queryable(v):
191 |                 req[k] = self.queryable_to_type(v)
192 |                 
193 |         return req
194 | 
195 | 
196 |     def reset_builders(self):
197 |         self.insert_builder = InsertBuilder()
198 |         self.query_builder = QueryBuilder()


--------------------------------------------------------------------------------
/jamboree/handlers/default/blob.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     Basic storage handler
  3 |     ---
  4 | 
  5 | """
  6 | 
  7 | import copy
  8 | from typing import Any, Dict, Optional
  9 | 
 10 | import ujson
 11 | from addict import Dict as ADict
 12 | 
 13 | from jamboree.base.processors.abstracts import EventProcessor, Processor
 14 | from jamboree.handlers.base import BaseFileHandler, BaseHandler
 15 | from jamboree.utils.helper import Helpers
 16 | 
 17 | 
 18 | class BlobStorageHandler(BaseHandler):
 19 |     """ 
 20 |         A simple event store using a variation of databases.
 21 |         ---
 22 |         
 23 |         Currently uses zadd to work
 24 |     """
 25 |     def __init__(self):
 26 |         # print("DBHandler")
 27 |         self._entity = ""
 28 |         self._meta_type = "storage"
 29 |         self._required = {}
 30 |         self._query = {}
 31 |         self._data = {}
 32 |         self._is_event = True
 33 |         self._processor: Optional[Processor] = None
 34 |         self.event_proc: Optional[EventProcessor] = None
 35 |         self.main_helper: Helpers = Helpers()
 36 |         self.changed_since_command: bool = False
 37 |         self.is_skip_check: bool = False
 38 |         self.call_params = {}
 39 | 
 40 |     def __setitem__(self, key, value):
 41 |         if bool(self.required):
 42 | 
 43 |             if key in self.required:
 44 |                 self._query[key] = value
 45 |                 return self._query
 46 |         self._data[key] = value
 47 |         self.changed_since_command = True
 48 | 
 49 |         return self._data
 50 | 
 51 |     def __getitem__(self, key):
 52 |         if key in self._query.keys():
 53 |             return self._query.get(key, None)
 54 |         else:
 55 |             if key in self._data.keys():
 56 |                 return self._data.get(key, None)
 57 |         return None
 58 | 
 59 |     def setup_query(self, alt={}):
 60 |         query = copy.copy(self._query)
 61 |         query["type"] = self.entity
 62 |         query["mtype"] = self._meta_type
 63 |         query.update(alt)
 64 |         query.update(self._data)
 65 |         return query
 66 | 
 67 |     @property
 68 |     def is_event(self) -> bool:
 69 |         """ Determines if we're going to add event ids to what we're doing. We can essentially set certain conditions"""
 70 |         return self._is_event
 71 | 
 72 |     @is_event.setter
 73 |     def is_event(self, is_true: bool = False):
 74 |         self._is_event = is_true
 75 | 
 76 |     @property
 77 |     def processor(self) -> "Processor":
 78 |         if self._processor is None:
 79 |             raise AttributeError("The Processor is missing")
 80 |         return self._processor
 81 | 
 82 |     @processor.setter
 83 |     def processor(self, _processor: "Processor"):
 84 |         self._processor = _processor
 85 | 
 86 |     def clear_event(self) -> None:
 87 |         self._processor = None
 88 | 
 89 |     @property
 90 |     def entity(self):
 91 |         return self._entity
 92 | 
 93 |     @entity.setter
 94 |     def entity(self, _entity: str):
 95 |         self._entity = str(_entity)
 96 | 
 97 |     @property
 98 |     def required(self):
 99 |         return self._required
100 | 
101 |     @required.setter
102 |     def required(self, _required: Dict[str, Any]):
103 |         # check to make sure it's not empty
104 |         self._required = _required
105 | 
106 |     @property
107 |     def query(self):
108 |         return self._query
109 | 
110 |     @query.setter
111 |     def query(self, _query: Dict[str, Any]):
112 |         if len(_query.keys()) > 0:
113 |             self._query = _query
114 | 
115 |     def check(self):
116 |         if ((not bool(self._entity)) or (not bool(self._required))
117 |             or (not bool(self._query))):
118 |             raise AttributeError(f"One of the key variables is missing.")
119 | 
120 |         for req in self._required.keys():
121 |             _type = self._required[req]
122 |             if req not in self._query:
123 |                 raise AttributeError(f"{req} is not in the requirements")
124 |             if not isinstance(self._query[req], _type):
125 |                 raise AttributeError(f"{req} is not a {_type}")
126 |         return True
127 | 
128 |     def save(self, data: dict, alt={}, is_overwrite=False):
129 |         self.check()
130 |         query = self.setup_query(alt)
131 |         # Put settings here
132 |         current_settings = ADict()
133 |         current_settings.overwrite = is_overwrite
134 |         self.processor.storage.save(query, data, **current_settings.to_dict())
135 |         self.changed_since_command = False
136 | 
137 |     def save_version(
138 |         self, data: dict, version: str, alt={}, is_overwrite=False
139 |     ):
140 |         self.check()
141 |         query = self.setup_query(alt)
142 |         # Put settings here
143 |         current_settings = ADict()
144 |         self.processor.storage.save(query, data, **current_settings.to_dict())
145 | 
146 |         self.changed_since_command = False
147 | 
148 |     def absolute_exists(self, alt={}):
149 |         self.check()
150 |         query = self.setup_query(alt)
151 |         # Put settings here
152 |         current_settings = ADict()
153 |         current_settings.is_force = self.changed_since_command
154 |         avs = self.processor.storage.absolute_exists(
155 |             query, **current_settings.to_dict()
156 |         )
157 |         self.changed_since_command = False
158 |         return avs
159 | 
160 |     def last(self, alt={}):
161 |         self.check()
162 |         query = self.setup_query(alt)
163 |         current_settings = ADict()
164 |         self.changed_since_command = False
165 |         obj = self.processor.storage.query(query, **current_settings.to_dict())
166 |         return obj
167 | 
168 |     def by_version(self, version: str, alt={}):
169 |         """ Get the data by version. """
170 |         self.check()
171 |         query = self.setup_query(alt)
172 |         current_settings = ADict()
173 |         self.processor.storage.query(query, **current_settings.to_dict())
174 |         self.changed_since_command = False
175 | 
176 |     def delete(self, query: dict, alt={}):
177 |         self.check()
178 |         query = self.setup_query(alt)
179 |         current_settings = ADict()
180 | 
181 |         self.processor.storage.delete(query, **current_settings)
182 |         self.changed_since_command = False
183 | 
184 |     def lock(self, alt={}):
185 |         self.check()
186 |         query = self.setup_query(alt)
187 |         self.changed_since_command = False
188 |         return self.processor.event.lock(query)
189 | 
190 |     def clear(self):
191 |         """ Clear in-memory cache. Will likely port to rocksdb for many of these parts. """
192 |         self.changed_since_command = True
193 |         self.is_skip_check = True
194 |         self.call_params = {}
195 | 
196 |     def __call__(self, **kwargs):
197 |         if "alt" in kwargs:
198 |             alt = kwargs.get("alt")
199 |             if alt is isinstance(alt, dict):
200 |                 self.call_params["alt"]
201 | 
202 |     def __enter__(self):
203 |         self.check()
204 |         self.is_skip_check = True
205 | 
206 |         return self
207 | 
208 |     def __exit__(self, exc_type, exc_val, exc_tb):
209 |         self.clear()
210 | 


--------------------------------------------------------------------------------
/jamboree/base/handler.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, ABCMeta
  2 | import copy
  3 | 
  4 | from typing import Dict, Any, List
  5 | from loguru import logger
  6 | from .processor import EventProcessor
  7 | 
  8 | 
  9 | class BaseHandler(object, metaclass=ABCMeta):
 10 |     """ 
 11 |         A way to handle reads and writes consistently without having to write every single variable:
 12 |     """
 13 | 
 14 |     def __init__(self):
 15 |         pass
 16 | 
 17 |     def check(self):
 18 |         raise NotImplementedError
 19 | 
 20 |     def save(self, data: dict):
 21 |         raise NotImplementedError
 22 | 
 23 |     def _bulk_save(self, query: dict, data: list):
 24 |         raise NotImplementedError
 25 | 
 26 |     def _get_many(self):
 27 |         raise NotImplementedError
 28 | 
 29 |     def last(self):
 30 |         raise NotImplementedError
 31 | 
 32 |     def many(self, limit: int = 100):
 33 |         raise NotImplementedError
 34 | 
 35 |     def save_many(self, query: dict, data: list):
 36 |         raise NotImplementedError
 37 | 
 38 |     def pop_multiple(self, query, _limit: int = 1):
 39 |         raise NotImplementedError
 40 | 
 41 |     def swap(self, query, alt: dict = {}):
 42 |         """ Swap betwen the first and last item """
 43 |         raise NotImplementedError
 44 | 
 45 |     def query_mix(self, query: dict, alt: dict = {}):
 46 |         raise NotImplementedError
 47 | 
 48 | 
 49 | class DBHandler(BaseHandler):
 50 |     """ 
 51 |         A way to handle reads and writes consistently without having to write every single variable:
 52 |     """
 53 | 
 54 |     def __init__(self):
 55 |         # print("DBHandler")
 56 |         self._entity = ""
 57 |         self._required = {}
 58 |         self._query = {}
 59 |         self.data = {}
 60 |         self.event_proc = None
 61 | 
 62 |     def __setitem__(self, key, value):
 63 |         if bool(self.required):
 64 |             if key in self.required:
 65 |                 self._query[key] = value
 66 |                 return self._query
 67 |         self.data[key] = value
 68 |         return self.data
 69 | 
 70 |     def __getitem__(self, key):
 71 |         if key in self._query.keys():
 72 |             return self._query.get(key, None)
 73 |         else:
 74 |             if key in self.data.keys():
 75 |                 return self.data.get(key, None)
 76 |         return None
 77 | 
 78 |     @property
 79 |     def event(self):
 80 |         return self.event_proc
 81 | 
 82 |     @event.setter
 83 |     def event(self, _event: EventProcessor):
 84 |         # Use to process event
 85 |         self.event_proc = _event
 86 | 
 87 |     @property
 88 |     def entity(self):
 89 |         return self._entity
 90 | 
 91 |     @entity.setter
 92 |     def entity(self, _entity: str):
 93 |         self._entity = str(_entity)
 94 | 
 95 |     @property
 96 |     def required(self):
 97 |         return self._required
 98 | 
 99 |     @required.setter
100 |     def required(self, _required: Dict[str, Any]):
101 |         # check to make sure it's not empty
102 |         self._required = _required
103 | 
104 |     @property
105 |     def query(self):
106 |         return self._query
107 | 
108 |     @required.setter
109 |     def query(self, _query: Dict[str, Any]):
110 |         if len(_query.keys()) > 0:
111 |             self._query = _query
112 | 
113 |     def check(self):
114 |         if self.event_proc is None:
115 |             raise AttributeError("Event processor isn't available.")
116 | 
117 |         if (not bool(self._entity)) or (not bool(self._required)) or (not bool(self._query)):
118 |             raise AttributeError(f"One of the key variables is missing.")
119 | 
120 |         for req in self._required.keys():
121 |             _type = self._required[req]
122 |             if req not in self._query:
123 |                 raise AttributeError(f"{req} is not in the requirements")
124 |             if not isinstance(self._query[req], _type):
125 |                 raise AttributeError(f"{req} is not a {_type}")
126 |         return True
127 | 
128 |     def save(self, data: dict, alt={}):
129 |         self.check()
130 | 
131 |         query = copy.copy(self._query)
132 |         query['type'] = self.entity
133 |         query.update(alt)
134 |         query.update(self.data)
135 |         self.event_proc.save(query, data)
136 | 
137 |     def save_many(self, data: list, alt={}):
138 |         self.check()
139 | 
140 |         query = copy.copy(self._query)
141 |         query['type'] = self.entity
142 |         # logger.info(query)
143 |         query.update(alt)
144 |         query.update(self.data)
145 |         self.event_proc._bulk_save(query, data)
146 | 
147 |     def _get_many(self, limit: int, alt={}):
148 |         """ Aims to get many variables """
149 |         self.check()
150 |         query = copy.copy(self._query)
151 |         query['type'] = self.entity
152 |         query.update(alt)
153 |         query.update(self.data)
154 |         latest_many = self.event_proc.get_latest_many(query, limit=limit)
155 |         return latest_many
156 | 
157 |     def _get_latest(self, alt={}):
158 |         self.check()
159 |         query = copy.copy(self._query)
160 |         query['type'] = self.entity
161 |         query.update(alt)
162 |         query.update(self.data)
163 |         latest = self.event_proc.get_latest(query)
164 |         return latest
165 | 
166 |     def last(self, alt={}):
167 |         alt.update(self.data)
168 |         return self._get_latest(alt)
169 | 
170 |     def many(self, limit=1000, alt={}):
171 |         alt.update(self.data)
172 |         return self._get_many(limit, alt=alt)
173 | 
174 |     def pop(self, alt={}):
175 |         query = copy.copy(self._query)
176 |         query['type'] = self.entity
177 |         query.update(alt)
178 |         query.update(self.data)
179 |         self.event_proc.remove_first(query)
180 | 
181 |     def pop_many(self, _limit, alt={}):
182 |         query = copy.copy(self._query)
183 |         query['type'] = self.entity
184 |         query.update(alt)
185 |         query.update(self.data)
186 |         return self.event_proc.pop_multiple(query, _limit)
187 | 
188 |     def count(self, alt={}):
189 |         """ Aims to get many variables """
190 |         self.check()
191 |         query = copy.copy(self._query)
192 |         query['type'] = self.entity
193 |         query.update(alt)
194 |         query.update(self.data)
195 |         return self.event_proc.count(query)
196 | 
197 |     def swap_many(self, limit: int = 10, alt={}):
198 |         """ Move items from the main list to a swapped list. """
199 |         self.check()
200 |         query = copy.copy(self._query)
201 |         query['type'] = self.entity
202 |         query.update(alt)
203 |         query.update(self.data)
204 |         return self.event_proc.multi_swap(query, limit)
205 | 
206 |     def query_mix(self, limit: int = 10, alt: dict = {}):
207 |         self.check()
208 |         query = copy.copy(self._query)
209 |         query['type'] = self.entity
210 |         query.update(alt)
211 |         query.update(self.data)
212 |         return self.event_proc.query_mix(query, limit)
213 | 
214 |     def query_many_swap(self, limit: int = 10, alt: dict = {}):
215 |         self.check()
216 |         query = copy.copy(self._query)
217 |         query['type'] = self.entity
218 |         query.update(alt)
219 |         query.update(self.data)
220 |         return self.event_proc.get_latest_many_swap(query, limit)
221 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Jamboree: A Fast General Data Engineering Library
  2 |  .
  3 | 
  4 | ![Logo](docs/imgs/jamboree-long-new.png)
  5 | 
  6 | **`Jamboree` is extremely early, meaning it should be used with caution. There are plans to improve the system and many components are subject to change. If you look at the improvement plans linked at the bottom you'll be able to see it.**
  7 | 
  8 | The goal of `jamboree` was to manage the complexities of data engineering.
  9 | 
 10 | ## Install
 11 | 
 12 | The library requires and `redis` to operate for the time being.
 13 | 
 14 | ```bash
 15 | pip install jamboree
 16 | ```
 17 | 
 18 | ## Install Redis
 19 | 
 20 | All of the redis installation instructions are [here](https://redis.io/topics/quickstart). Though because the current module setup uses redisearch and will likely use many other modules in the future. Because installing modules is a bit more complex than necessary right now it's best to use docker:
 21 | 
 22 | ```bash
 23 | $ docker run \
 24 |     -p 6379:6379 \
 25 |     -v /home/{PUTNAMEHERE}/data:/data \
 26 |     redislabs/redismod \
 27 |     --dir /data
 28 | ```
 29 | 
 30 | **The output should look like the following.**
 31 | 
 32 | ```bash
 33 | 1:C 24 Apr 2019 21:46:40.382 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo
 34 | ...
 35 | 1:M 24 Apr 2019 21:46:40.474 * Module 'ai' loaded from /usr/lib/redis/modules/redisai.so
 36 | 1:M 24 Apr 2019 21:46:40.474 * <ft> RediSearch version 1.4.7 (Git=)
 37 | 1:M 24 Apr 2019 21:46:40.474 * <ft> concurrency: ON, gc: ON, prefix min length: 2, prefix max expansions: 200, query timeout (ms): 500, timeout policy: return, cursor read size: 1000, cursor max idle (ms): 300000, max doctable size: 1000000, search pool size: 20, index pool size: 8, 
 38 | 1:M 24 Apr 2019 21:46:40.475 * <ft> Initialized thread pool!
 39 | 1:M 24 Apr 2019 21:46:40.475 * Module 'ft' loaded from /usr/lib/redis/modules/redisearch.so
 40 | 1:M 24 Apr 2019 21:46:40.476 * <graph> Thread pool created, using 8 threads.
 41 | 1:M 24 Apr 2019 21:46:40.476 * Module 'graph' loaded from /usr/lib/redis/modules/redisgraph.so
 42 | loaded default MAX_SAMPLE_PER_CHUNK policy: 360 
 43 | 1:M 24 Apr 2019 21:46:40.476 * Module 'timeseries' loaded from /usr/lib/redis/modules/redistimeseries.so
 44 | 1:M 24 Apr 2019 21:46:40.476 # <ReJSON> JSON data type for Redis v1.0.4 [encver 0]
 45 | 1:M 24 Apr 2019 21:46:40.476 * Module 'ReJSON' loaded from /usr/lib/redis/modules/rejson.so
 46 | 1:M 24 Apr 2019 21:46:40.476 * Module 'bf' loaded from /usr/lib/redis/modules/rebloom.so
 47 | 1:M 24 Apr 2019 21:46:40.477 * <rg> RedisGears version 0.2.1, git_sha=fb97ad757eb7238259de47035bdd582735b5c81b
 48 | 1:M 24 Apr 2019 21:46:40.477 * <rg> PythonHomeDir:/usr/lib/redis/modules/deps/cpython/
 49 | 1:M 24 Apr 2019 21:46:40.477 * <rg> MaxExecutions:1000
 50 | 1:M 24 Apr 2019 21:46:40.477 * <rg> RedisAI api loaded successfully.
 51 | 1:M 24 Apr 2019 21:46:40.477 # <rg> RediSearch api loaded successfully.
 52 | 1:M 24 Apr 2019 21:46:40.521 * Module 'rg' loaded from /usr/lib/redis/modules/redisgears.so
 53 | 1:M 24 Apr 2019 21:46:40.521 * Ready to accept connections
 54 | ```
 55 | 
 56 | To run it in the background and let it start when the computer does
 57 | 
 58 | ```bash
 59 | $ docker run \
 60 |     -p 6379:6379 -d \
 61 |     --restart=always \
 62 |     -v /home/{PUTNAMEHERE}/data:/data \
 63 |     redislabs/redismod \
 64 |     --dir /data
 65 | ```
 66 | 
 67 | ## What is Event State Carrying?
 68 | 
 69 | State Carrying is a round about way of saying tracking information through their interactions oversp time more so than exact states. It helps us construct a story of all things that have happened in a system over time. It looks like the image below.
 70 | 
 71 | ![Event Sourcing](docs/imgs/event-sourcing_long.png)
 72 | 
 73 | State carrying is dragging the current state along over time.
 74 | 
 75 | The ultimate result is that you'd have tracability in your system. This is great when you're trying to see how interactions happen through time.
 76 | 
 77 | ## How The Library Works
 78 | 
 79 | The Jamboree Library Is Split In Two Parts:
 80 | 
 81 | 1. Jamboree Event Sourcing
 82 | 2. Object Handler
 83 | 
 84 | The `Jamboree` object is rather simple. It only saves, reads, and deletes records in both `redis` and `mongodb`. Redis to give it fast read times, mongodb as backup to the data. `Handlers` have very explicit storage procedures that interact with the Jamboree object. A good example is the code below.
 85 | 
 86 | The idea is straightforward:
 87 | 
 88 | 1. We create a `Jamboree` object. The Jamboree object manages connections to databases at a high speed and low latency.
 89 | 2. After we create the Handler object, and set the limit (max number of records we want to look at), we start adding records until we stop. At the end, we get the amount of time it took to push the records.
 90 |     * Periodically, we do a small calculation to older information prior to adding a record.
 91 | 
 92 | ## Creating a Handler
 93 | 
 94 | ```py
 95 | class SampleEnvHandler(DBHandler):
 96 |     """Abstract handler that we use to keep track of information.
 97 |     """
 98 | 
 99 |     def __init__(self, **kwargs):
100 |         super().__init__()
101 |         self.entity = "sample"
102 |         self.required = {
103 |             "episode": str
104 |         }
105 |         self._balance = 0
106 |         self._limit = 100
107 | 
108 |     @property
109 |     def limit(self):
110 |         return self._limit
111 | 
112 |     @limit.setter
113 |     def limit(self, limit):
114 |         self._limit = limit
115 | 
116 |     @property
117 |     def balance(self):
118 |         """ Gets the sum of the last three values at set the value """
119 |         return self._balance
120 | 
121 |     @property
122 |     def transactions(self)->vaex.dataframe:
123 |         """ Get the last 100 transactions """
124 |         many_records = self.many(self.limit)
125 | 
126 |         if isinstance(many_records, dict):
127 |             frame = pd.DataFrame(many_records)
128 |             transactions_frame = vaex.from_pandas(frame)
129 |             return transactions_frame.sort('timestamp', ascending=False)
130 | 
131 |         if len(many_records) > 0:
132 |             frame = pd.DataFrame(many_records)
133 |             transactions_frame = vaex.from_pandas(frame)
134 |             return transactions_frame.sort('timestamp', ascending=False)
135 | 
136 |         return vaex.from_pandas(pd.DataFrame())
137 | 
138 |     def save_update_recent(self, data:dict):
139 |         transactions = self.transactions
140 |         count = transactions.count()
141 |         new_value = data['value'] + count
142 |         data['value'] = int(new_value)
143 |         super().save(data)
144 | 
145 | def flip(n=0.02):
146 |     if n >= random.uniform(0, 1):
147 |         return True
148 |     return False
149 | 
150 | if __name__ == "__main__":
151 |     main()
152 | ```
153 | 
154 | 
155 | ## Timing The Handler
156 | 
157 | ```py
158 | jambo = Jamboree()
159 | sample_env_handler = SampleEnvHandler()
160 | sample_env_handler.limit = 250
161 | sample_env_handler.event = jambo
162 | sample_env_handler['episode'] = uuid.uuid1().hex
163 | with timecontext():
164 |     for i in range(10000):
165 |         v1 = randint(0, 12)      
166 |         sample_env_handler.save({"value": v1})
167 |         if flip(0.05):
168 |             sample_env_handler.save_update_recent({"value": v1})
169 | ```
170 | 
171 | ## Improvement Plans
172 | 
173 | Jamboree currently has a list of improvements that
174 | 
175 | https://trello.com/b/9vwpc5C6


--------------------------------------------------------------------------------
/jamboree/handlers/abstracted/search/updated.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | import maya
  3 | 
  4 | from typing import Optional
  5 | from jamboree import Jamboree
  6 | from jamboree.base.processors.abstracts import Processor
  7 | from jamboree.handlers.default.search import BaseSearchHandler
  8 | 
  9 | from loguru import logger
 10 | 
 11 | class ParameterizedSearch(BaseSearchHandler):
 12 |     """ 
 13 |         An abstract over the base search handler. 
 14 |         
 15 |         Use to avoid using the normal formatting. 
 16 | 
 17 |         Example:
 18 | 
 19 |         Normally you'd have to use the following:
 20 | 
 21 |         ::
 22 |             >>> search['item1'] = 'value'
 23 |             >>> search['item2'] = 'value'
 24 |             >>> search['item3'] = 'value'
 25 |             >>> search['item4'] = 'value'
 26 |             >>> search['item5'] = 'value'
 27 |             >>> search.insert(allow_duplicates=False)
 28 | 
 29 |         Instead you'll use the pattern: 
 30 |         
 31 |         ::
 32 |             >>> id_of_insert = search.Create(
 33 |             >>>             allow_duplicates=False, 
 34 |             >>>             no_overwrite_reqs=False, 
 35 |             >>>             item1='value', item2='value', 
 36 |             >>>             item3='value', item4='value', 
 37 |             >>>             item5='value')
 38 |             # The record's id is set here
 39 |             '249fabf229374715ae7e65b7061c0faf'
 40 |         
 41 | 
 42 |         To define a schema we set the variable `self.allrequirements`.
 43 | 
 44 | 
 45 |         ::
 46 | 
 47 |     """
 48 |     
 49 |     def __init__(self):
 50 |         """ Initialize the function. Pulls from existing SearchHandler
 51 | 
 52 |         Add `must_have` in inherited classes. Use to make certain variable names mandetory.
 53 | 
 54 |         Example:
 55 | 
 56 |         ::
 57 |             >>> def __init__(self):
 58 |             >>>    self.must_have = ["category", "name", "abbreviation"]
 59 |         
 60 | 
 61 | 
 62 |             >>> id_of_insert = search.Create(
 63 |             >>>             allow_duplicates=True, 
 64 |             >>>             no_overwrite_reqs=True)
 65 |             
 66 |             Would immediately break because `category`, `name` and `abbreviation`
 67 |             
 68 |             
 69 |         """
 70 |         super().__init__()
 71 |         self.must_have = [] # Forced fields
 72 | 
 73 |     def check_requirements(self, items: dict):
 74 |         """ Checks that the fields inside of `must_have` are inside of the dictioary we're going to be adding. """
 75 |         for _abs in self.must_have:
 76 |             if _abs not in items:
 77 |                 raise AttributeError(
 78 |                     f"{_abs} has to be added. The absolute required variables are the following: {self.must_have}"
 79 |                 )
 80 |         # """ 
 81 |         #     Insert a document. All fields defined inside of **kwargs.
 82 | 
 83 |         #     Parameters:
 84 |         #         allow_duplicates (bool): Determines if we want to allow duplicates of the exact same document inside of the search database.
 85 |         #         no_overwrite_must_have (bool): Determines if we're only checking for a small range of fields. Identified inside of `self.must_have`
 86 |         #         kwargs (Any): Any field we want to add to the database. It's key and value. The databse must have
 87 |         # """
 88 | 
 89 |     
 90 |     
 91 | 
 92 |     def Create(self,
 93 |                allow_duplicates=False,
 94 |                no_overwrite_must_have=False,
 95 |                **kwargs) -> str:
 96 |         """Insert a new record into redisearch.
 97 | 
 98 |         Args:
 99 |             allow_duplicates (bool, optional): Determines if you're going to allow for duplicates inside of the database. Defaults to False.
100 |             no_overwrite_must_have (bool, optional): Determines if we're going to allow for more than 1 record that matches `must_have`. Defaults to False.
101 | 
102 |         Returns:
103 |             str: The inserted record's id
104 |         """
105 |         self.reset()
106 |         self.check_requirements(kwargs)
107 | 
108 |         if no_overwrite_must_have and len(self.must_have) > 0:
109 |             
110 |             _all = self.FForced(**kwargs)
111 |             if len(_all) > 0:
112 |                 return _all[0].id
113 | 
114 |         for k, v in kwargs.items():
115 |             self[k] = v
116 | 
117 |         identity = self.insert(allow_duplicates=allow_duplicates)
118 |         return identity
119 | 
120 |     def UpdateID(self, identity: str, **kwargs):
121 |         """ Updates a record by ID. Gives a warning if you're using a must have variable."""
122 |         self.reset()
123 |         for k, v in kwargs.items():
124 |             self.replacement[k] = v
125 |         self.update_id(identity)
126 | 
127 |     def UpdateMany(self, search_dict: dict, force_must_have=False, **replacements):
128 |         """Replaces many records for the user. 
129 | 
130 |         Args:
131 |             search_dict (dict): Search for what we're replacing.
132 |             force_must_have (bool, optional): Checks that the `search_dict` has all of the `must_have` variables. Defaults to False.
133 | 
134 |         Raises:
135 |             ValueError: If our search parameters or replacement dictionaries are empty.
136 |         """
137 |         self.reset()
138 |         if not bool(search_dict) or not bool(replacements):
139 |             raise ValueError(
140 |                 "You need to have query information AND something to replace it with."
141 |             )
142 |         if force_must_have:
143 |             self.check_requirements(search_dict)
144 | 
145 |         for k, v in search_dict.items():
146 |             self[k] = v
147 | 
148 |         for k, v in replacements.items():
149 |             self.replacement[k] = v
150 |         self.update()
151 | 
152 |     def Find(self, general=None,force_must=False, **fields):
153 |         """Find Searches through the database for our records.
154 | 
155 |         Run a generalized search through the database.
156 | 
157 |         Keyword Arguments:
158 |             general {str} -- A general term that will allow us to find terms in a fuzzy search (default: {None})
159 |             force_must {bool} -- Checks that the search fields we have contains everything we declare as important `must_have` (default: {False})
160 | 
161 |         Raises:
162 |             ValueError: If there's nothing for us to search field. Both fields and general are empty.
163 | 
164 |         Returns:
165 |             [list] -- A list of descriptions. 
166 |         """
167 |         self.reset()
168 |         if general is not None:
169 |             self.general = general
170 |         if not bool(fields):
171 |             if general is not None:
172 |                 return self.find()
173 |             raise ValueError("You have to search using something")
174 |         if force_must:
175 |             self.check_requirements(fields)
176 | 
177 |         for k, v in fields.items():
178 |             self[k] = v
179 |         return self.find()
180 | 
181 |     def FindById(self, identity: str):
182 |         self.reset()
183 |         remainder = self.pick(identity)
184 |         return remainder
185 |     
186 |     def FindForced(self, **kwargs):
187 |         self.reset()
188 |         self.check_requirements(kwargs)
189 |         for k in self.must_have:
190 |             self[k] = kwargs.get(k)
191 |         _all = self.find()
192 |         return _all
193 |     
194 |     def FForced(self, **kwargs):
195 |         for k in self.must_have:
196 |             self[k] = kwargs.get(k)
197 |         _all = self.find()
198 |         return _all
199 | 
200 |     def Remove(self, **kwargs):
201 |         self.reset()
202 |         for k, v in kwargs.items():
203 |             self[k] = v
204 |         self.remove()


--------------------------------------------------------------------------------
/jamboree/middleware/procedures/core.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC
  2 | from typing import List, Dict, Any
  3 | from addict import Dict as ADict
  4 | # from addict import Dict
  5 | 
  6 | class ProcedureAbstract(ABC):
  7 |     """ 
  8 |         Procedures ensure everything is consistent.
  9 |     """
 10 | 
 11 |     def verify(self):
 12 |         """ Ensures we have all of the required variables in place."""
 13 |         raise NotImplementedError("Verify function not implmented")
 14 | 
 15 |     def extract(self):
 16 |         """ The item that will be serialized. """
 17 |         raise NotImplementedError("Extract function not implemented")
 18 | 
 19 | 
 20 | class NamedModelMetric(ABC):
 21 |     def __init__(self, name:str):
 22 |         self.name = name
 23 |     
 24 | 
 25 |     def get_metric(self, y_pred, y_actual) -> Dict:
 26 |         raise NotImplementedError("You need to have a way to get a metric")
 27 | 
 28 | 
 29 | class NamedModelMetricSet:
 30 |     """ A single place to hold all of the model metrics (in a set)"""
 31 |     def __init__(self):
 32 |         self.metric_set:List[NamedModelMetric] = []
 33 |     
 34 | 
 35 |     def metrics(self, y_, y) -> Dict:
 36 |         if len(self.metric_set) == 0:
 37 |             return {}
 38 |         metric_listing = {}
 39 |         for metric in self.metric_set:
 40 |             name = metric.name
 41 |             metric_output = metric.get_metric(y_, y)
 42 |             metric_listing[name] = metric_output
 43 |         return metric_listing
 44 |     
 45 | 
 46 |     
 47 | class ProcedureManagement(ABC):
 48 |     """ A way to interact with procedures. Use to embed things like:
 49 |         
 50 |         1. Accessing procedures for a given code base.
 51 |         2. Check for certain attributes within the class
 52 |         3. Declare what's acceptable
 53 |     """
 54 |     def __init__(self):
 55 |         self.required_attributes:List[str] = []
 56 | 
 57 | 
 58 |     @property
 59 |     def allowed(self) -> List[str]:
 60 |         raise NotImplementedError("You need to set the allowed keys we'll take.")
 61 | 
 62 |     
 63 |     def check_allowed(self, key:str):
 64 |         if key not in self.allowed:
 65 |             raise ValueError(f"{key} has to be of the allowed keys ... ")
 66 | 
 67 | 
 68 |     def access(self, key:str) -> 'ProcedureAbstract':
 69 |         """ Access the procedure we need. Returns a procedure given the key we set it."""
 70 |         raise NotImplementedError("You need to create an access procedure")
 71 |     
 72 |     
 73 |     def isattr(self, parentinstance:Any):
 74 |         """ Checks to see if all of the attributes are in the parent class instance. """
 75 |         if len(self.required_attributes) > 0:
 76 |             for attr in self.required_attributes:
 77 |                 if not hasattr(parentinstance, attr):
 78 |                     cls_name = parentinstance.__class__.__name__
 79 |                     msg = f"{cls_name} does not have the attribute {attr}"
 80 |                     raise AttributeError(msg)
 81 | 
 82 | 
 83 | 
 84 | class ModelProcedureAbstract(ProcedureAbstract):
 85 |     # _dict = None
 86 |     def __init__(self):
 87 |         self._mod = None
 88 |         self._opt = None
 89 |         self._crit = None
 90 |         
 91 |         self._model_dict = ADict()
 92 |         self._model_dict.model = None
 93 |         self._model_dict.optimizer = None
 94 |         self._model_dict.criteria = None
 95 | 
 96 |         self._model_typing = ADict()
 97 |         self._model_typing.model = None
 98 |         self._model_typing.optimizer = None
 99 |         self._model_typing.criteria = None
100 | 
101 |         self._model_requirements = ADict()
102 |         self._model_requirements.model = True
103 |         self._model_requirements.optimizer = False
104 |         self._model_requirements.criteria = False
105 |         
106 |         self.changed = False
107 |         self.named_metric_set = NamedModelMetricSet()
108 | 
109 | 
110 |     @property
111 |     def dictionary(self):
112 |         """ A dictionary with all of the model information contained inside. """
113 |         return self._model_dict
114 |     
115 |     @dictionary.setter
116 |     def dictionary(self, _md:ADict):
117 |         """ Load in raw model dict information """
118 |         self._model_dict.update(_md)
119 |         # self.verify()
120 | 
121 |     @property
122 |     def requirements(self) -> ADict:
123 |         """ Return a dictionary of requirements for the ... checking requirements"""
124 |         return self._model_requirements
125 |     
126 |     @requirements.setter
127 |     def requirements(self, _md:ADict):
128 |         """ Load in raw model dict information """
129 |         self._model_requirements.update(_md)
130 |     
131 |     @property
132 |     def types(self) -> ADict:
133 |         return self._model_typing
134 | 
135 |     @types.setter
136 |     def types(self, _mt:ADict):
137 |         self._model_typing.update(_mt)
138 | 
139 |     """
140 |         Verification
141 |     """
142 | 
143 |     def verify_model_typing(self):
144 |         """Check that none of the model types are none """
145 |         for k, v in self.requirements.items():
146 |             if not isinstance(v, bool):
147 |                 raise ValueError(f"Model Requirement \'{k}\' must be a boolean value")
148 |             if v == True:
149 |                 if self.types[k] is None:
150 |                     raise ValueError(f"\'{k}\' Cannot be None in typing delarations")
151 |                 if self.dictionary[k] is None:
152 |                     raise ValueError(f"\'{k}\' Cannot be None inside of the main model dictionary")
153 |     
154 |     def verify_model_dict(self):
155 |         """ Verify that """
156 |         for name, _type in self.types.items():
157 |             if name is None or _type is None:
158 |                 continue
159 |             current_item = self.dictionary[name]
160 |             if not isinstance(current_item, _type) and not issubclass(current_item, _type):
161 |                 raise TypeError(f"{name} is not an instance of {_type}")
162 | 
163 |     def verify(self):
164 |         self.verify_model_typing()
165 |         self.verify_model_dict()
166 |     
167 |     def is_valid_data(self, _data) -> bool:
168 |         """ Determines if the data we're about to use is valid"""
169 |         raise NotImplementedError("Data validation not implemented yet")
170 | 
171 |     def split(self, X, y, **params):
172 |         raise NotImplementedError
173 | 
174 |     def fit(self, X, y, **params):
175 |         raise NotImplementedError
176 | 
177 |     def partial_fit(self, X, y, **params):
178 |         raise NotImplementedError
179 |     
180 |     def predict(self, X, **params):
181 |         raise NotImplementedError
182 | 
183 |     def predict_proba(self, X, **params):
184 |         raise NotImplementedError
185 | 
186 |     def score(self, X, y, **params):
187 |         raise NotImplementedError
188 | 
189 |     def get_params(self, **params):
190 |         raise NotImplementedError
191 | 
192 |     def set_params(self, **params):
193 |         raise NotImplementedError
194 | 
195 |     def extract(self):
196 |         """ Get a dictionary to save the model. Should be called in close """
197 |         return self.dictionary
198 | 
199 |     @property
200 |     def metrics(self):
201 |         """ Given the information we have, return a set of metrics"""
202 |         metric_set = self.named_metric_set.metrics(0, 0)
203 |         return metric_set
204 |     
205 |     
206 | 
207 |     
208 | 
209 | if __name__ == "__main__":
210 |     model_types = ADict()
211 |     model_vals = ADict()
212 |     model_types.model = bool
213 |     model_types.optimizer = str
214 |     model_types.criteria = str
215 | 
216 |     model_vals.model = False
217 |     model_vals.optimizer = "str"
218 |     model_vals.criteria = "str"
219 | 
220 |     base_model_procedure = ModelProcedureAbstract()
221 |     base_model_procedure.mtypes = model_types
222 |     base_model_procedure.mdict = model_vals
223 |     base_model_procedure.verify()
224 |     
225 |     print(base_model_procedure)


--------------------------------------------------------------------------------
/docs/readme/Insert No Duplicates.md:
--------------------------------------------------------------------------------
  1 | # Inserting Data Without Duplicates
  2 | 
  3 | Here we test inserting data without duplicates. Afterwards we'll test for missing data inside of the databases.
  4 | 
  5 | 
  6 | ```python
  7 | from jamboree import Jamboree
  8 | import pandas as pd
  9 | import datetime
 10 | import pandas_datareader.data as web
 11 | from pandas import Series, DataFrame
 12 | ```
 13 | 
 14 | 
 15 | ```python
 16 | from maya import MayaDT
 17 | import maya
 18 | import copy
 19 | ```
 20 | 
 21 | 
 22 | ```python
 23 | import random
 24 | import orjson
 25 | ```
 26 | 
 27 | 
 28 | ```python
 29 | from typing import List, Dict, Any
 30 | ```
 31 | 
 32 | 
 33 | ```python
 34 | jam_session = Jamboree()
 35 | ```
 36 | 
 37 |     Unable to create library with name: events
 38 | 
 39 | 
 40 | 
 41 |     ---------------------------------------------------------------------------
 42 | 
 43 |     LibraryNotFoundException                  Traceback (most recent call last)
 44 | 
 45 |     <ipython-input-7-00a3bcc8c141> in <module>
 46 |     ----> 1 jam_session = Jamboree()
 47 |     
 48 | 
 49 |     ~/PycharmProjects/jamboree/jamboree/base/main.py in __init__(self, mongodb_host, redis_host, redis_port)
 50 |          55     def __init__(self, mongodb_host="localhost", redis_host="localhost", redis_port=6379):
 51 |          56         self.redis = Redis(redis_host, port=redis_port)
 52 |     ---> 57         self.store = Store(mongodb_host).create_lib('events').get_store()['events']
 53 |          58         self.pool = ThreadPool(max_workers=cpu_count() * 4)
 54 |          59 
 55 | 
 56 | 
 57 |     ~/.local/lib/python3.6/site-packages/arctic/arctic.py in __getitem__(self, key)
 58 |         373     def __getitem__(self, key):
 59 |         374         if isinstance(key, string_types):
 60 |     --> 375             return self.get_library(key)
 61 |         376         else:
 62 |         377             raise ArcticException("Unrecognised library specification - use [libraryName]")
 63 | 
 64 | 
 65 |     ~/.local/lib/python3.6/site-packages/arctic/arctic.py in get_library(self, library)
 66 |         358         if error:
 67 |         359             raise LibraryNotFoundException("Library %s was not correctly initialized in %s.\nReason: %r)" %
 68 |     --> 360                                            (library, self, error))
 69 |         361         elif not lib_type:
 70 |         362             raise LibraryNotFoundException("Library %s was not correctly initialized in %s." %
 71 | 
 72 | 
 73 |     LibraryNotFoundException: Library events was not correctly initialized in <Arctic at 0x7efbaa926128, connected to MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, maxpoolsize=4, sockettimeoutms=600000, connecttimeoutms=2000, serverselectiontimeoutms=30000)>.
 74 |     Reason: ServerSelectionTimeoutError('localhost:27017: [Errno 111] Connection refused',))
 75 | 
 76 | 
 77 | 
 78 | ```python
 79 | start = datetime.datetime(1986, 3, 14)
 80 | end = datetime.datetime(2020, 1, 6)
 81 | ```
 82 | 
 83 | 
 84 | ```python
 85 | apple_df = web.DataReader("AAPL", 'yahoo', start, end)
 86 | msft_df = web.DataReader("MSFT", 'yahoo', start, end)
 87 | ```
 88 | 
 89 | 
 90 | ```python
 91 | apple_df
 92 | ```
 93 | 
 94 | 
 95 | ```python
 96 | def get_year_month_day(time:MayaDT):
 97 |     print(f"{time.day}-{time.month}-{time.year}")
 98 | ```
 99 | 
100 | 
101 | ```python
102 | def get_time_dt(df):
103 |     indexes = df.index
104 |     indexes = [maya.MayaDT.from_datetime(index.to_pydatetime()) for index in indexes]
105 |     return indexes
106 | ```
107 | 
108 | 
109 | ```python
110 | def df_records(df):
111 |     return df.to_dict("records")
112 | ```
113 | 
114 | 
115 | ```python
116 | def standardize_record(record):
117 |     closing_record = {}
118 |     if "Close" in record:
119 |         closing_record['close'] = record["Close"]
120 |     if "Open" in record:
121 |         closing_record['open'] = record["Open"]
122 |     if "Low" in record:
123 |         closing_record['low'] = record["Low"]
124 |     if "High" in record:
125 |         closing_record['high'] = record["High"]
126 |     if "Volume" in record:
127 |         closing_record['volume'] = record["Volume"]
128 |     
129 |     return closing_record
130 | ```
131 | 
132 | 
133 | ```python
134 | def standardize_outputs(records:List[Dict[str, Any]]):
135 |     if len(records) == 0:
136 |         return []
137 |     _records = [standardize_record(rec) for rec in records]
138 |     return _records
139 | ```
140 | 
141 | 
142 | ```python
143 | def add_time(records, times):
144 |     if len(records) == 0 or (len(records) != len(times)):
145 |         return []
146 |     
147 |     _records = []
148 |     for index, rec in enumerate(records):
149 |         rec['time'] = times[index]._epoch
150 |         _records.append(rec)
151 |     return _records
152 | ```
153 | 
154 | 
155 | ```python
156 | def teardown(df):
157 |     """Breaks the dataframe into a bunch of dictionaries"""
158 |     indexes = get_time_dt(df)
159 |     records = df_records(df)
160 |     standardized = standardize_outputs(records)
161 | #     print(standardized)
162 |     with_time = add_time(standardized, indexes)
163 |     return with_time
164 | ```
165 | 
166 | 
167 | ```python
168 | dt_time = teardown(apple_df)
169 | ```
170 | 
171 | 
172 | ```python
173 | def flip(n=0.05):
174 |     if random.uniform(0, 1) < n:
175 |         return True
176 |     return False
177 | ```
178 | 
179 | 
180 | ```python
181 | def create_duplicates(frame_dict_list:List[Dict]):
182 |     if len(frame_dict_list) == 0:
183 |         return []
184 |     
185 |     final_list = []
186 |     for item in frame_dict_list:
187 |         final_list.append(item)
188 |         if flip(0.1):
189 |             final_list.append(item)
190 |     return final_list
191 | ```
192 | 
193 | 
194 | ```python
195 | 
196 | ```
197 | 
198 | 
199 | ```python
200 | # len(dups)
201 | ```
202 | 
203 | 
204 | ```python
205 | last_200 = dt_time[-200:]
206 | last_300 = dt_time[-300:]
207 | last_200_dups = create_duplicates(last_200)
208 | last_300_dups = create_duplicates(last_300)
209 | ```
210 | 
211 | 
212 | ```python
213 | upsert_data_one = jam_session.bulk_upsert_redis({"type": "sample_save", "asset": "AAPL", "label": "duplication"}, last_200)
214 | upsert_data_two = jam_session.bulk_upsert_redis({"type": "sample_save", "asset": "AAPL", "label": "duplication"}, last_300)
215 | upsert_data_one_dups = jam_session.bulk_upsert_redis({"type": "sample_save", "asset": "AAPL", "label": "duplication"}, last_200_dups)
216 | upsert_data_two_dups = jam_session.bulk_upsert_redis({"type": "sample_save", "asset": "AAPL", "label": "duplication"}, last_300_dups)
217 | ```
218 | 
219 | 
220 | ```python
221 | main_hash = upsert_data_one.get("hash")
222 | ```
223 | 
224 | 
225 | ```python
226 | up1 = upsert_data_one.get('updated', [])
227 | up2 = upsert_data_two.get('updated', [])
228 | up3 = upsert_data_one_dups.get('updated', [])
229 | up4 = upsert_data_two_dups.get('updated', [])
230 | ```
231 | 
232 | 
233 | ```python
234 | cr1 = [orjson.dumps(x) for x in up1]
235 | cr2 = [orjson.dumps(x) for x in up2]
236 | cr3 = [orjson.dumps(x) for x in up3]
237 | cr4 = [orjson.dumps(x) for x in up4]
238 | ```
239 | 
240 | 
241 | ```python
242 | set1 = set(cr1)
243 | set2 = set(cr2)
244 | set3 = set(cr3)
245 | set4 = set(cr4)
246 | ```
247 | 
248 | 
249 | ```python
250 | print(len(set1))
251 | print(len(set2))
252 | print(len(set3))
253 | print(len(set4))
254 | ```
255 | 
256 | 
257 | ```python
258 | jam_session.redis.sadd(set_key, *set(cr3))
259 | ```
260 | 
261 | 
262 | ```python
263 | 
264 | ```
265 | 
266 | 
267 | ```python
268 | def deserialize_list(serialized_list:list):
269 |     if len(serialized_list) == 0:
270 |         return []
271 |     
272 |     return [orjson.loads(x) for x in serialized_list]
273 | ```
274 | 
275 | 
276 | ```python
277 | def add_timestamp(item):
278 |     item['timestamp'] = maya.now()._epoch
279 |     return item
280 | ```
281 | 
282 | 
283 | ```python
284 | def get_addable_items(set_key, added_set):
285 |     existing = set(jam_session.redis.smembers(set_key))
286 |     addable_items = set(set2 - existing)
287 |     if len(addable_items) == 0:
288 |         return []
289 |     listified = list(addable_items)
290 |     deku = deserialize_list(listified)
291 |     timestamped = [add_timestamp(x) for x in deku]
292 |     return timestamped
293 | ```
294 | 
295 | 
296 | ```python
297 | # updated_set = set(serialized_updated)
298 | ```
299 | 
300 | 
301 | ```python
302 | get_addable_items(set_key, set2)
303 | ```
304 | 
305 | 
306 | ```python
307 | # jam_session.redis.smembers(set_key, 0, -1)
308 | ```
309 | 
310 | 
311 | ```python
312 | set(retrieved - updated_set)
313 | ```
314 | 
315 | 
316 | ```python
317 | len(retrieved)
318 | ```
319 | 
320 | 
321 | ```python
322 | len(updated_set)
323 | ```
324 | 
325 | 
326 | ```python
327 | 
328 | ```
329 | 


--------------------------------------------------------------------------------
/jamboree/storage/files/redisify/core.py:
--------------------------------------------------------------------------------
  1 | import maya
  2 | from threading import local
  3 | from jamboree.storage.files import FileStorageConnection
  4 | from jamboree.utils.core import consistent_hash
  5 | from jamboree.utils.support.storage import serialize, deserialize
  6 | from jamboree.utils.context import watch_loop
  7 | from addict import Dict
  8 | import redis
  9 | from redis import Redis
 10 | from redis.client import Pipeline
 11 | import version_query
 12 | from loguru import logger
 13 | logger.disable(__name__)
 14 | 
 15 | class RedisFileProcessor(object):
 16 |     def __init__(self, *args, **kwargs):
 17 |         self._pipe = None
 18 |         self._conn = None
 19 |     
 20 |     @property
 21 |     def conn(self) -> Redis:
 22 |         if self._conn is None:
 23 |             raise AttributeError("Pipe hasn't been set")
 24 |         return self._conn
 25 |     @conn.setter
 26 |     def conn(self, _pipe:Redis):
 27 |         self._conn = _pipe
 28 |     
 29 |     @property
 30 |     def pipe(self) -> Pipeline:
 31 |         if self._pipe is None:
 32 |             raise AttributeError("Pipe hasn't been set")
 33 |         return self._pipe
 34 |     @pipe.setter
 35 |     def pipe(self, _pipe:Pipeline):
 36 |         self._pipe = _pipe
 37 |     
 38 |     
 39 |     def reset(self):
 40 |         self.pipe = None
 41 |         self.conn = None
 42 | 
 43 | 
 44 | class RedisFileConnection(FileStorageConnection):
 45 |     def __init__(self, **kwargs):
 46 |         super().__init__(**kwargs)
 47 |         """ NOTE: Expiriment with sorted sets """
 48 |         self.current_query = {}
 49 |         self.current_hash = None
 50 |         self.current_query_exist = None
 51 |         self.current_pipe = None
 52 |         self.current_hash_keys = None
 53 |         self.current_version = None
 54 |         self.current_version_exist = None
 55 |         self.setup_run = None
 56 | 
 57 |     def gwatch(self):
 58 |         sorted_version = self.keys.version.sorted
 59 |         set_version = self.keys.version.set
 60 |         self.pipe.watch(sorted_version)
 61 |         self.pipe.watch(set_version)
 62 | 
 63 |     @property
 64 |     def version(self):
 65 |         """ Get the latest version or the default"""
 66 |         sorted_version = self.keys.version.sorted
 67 |         set_version = self.keys.version.set
 68 |         # self.pipe.watch(sorted_version)
 69 |         # self.pipe.watch(set_version)
 70 |         if self.query_exists and self.current_version is None:
 71 |             # latest_version = self.connection.zrange(sorted_version, -1, -1)
 72 |             # _all_versions = self.connection.zrange(sorted_version, 0, -1)
 73 |             latest_version = self.pipe.zrange(sorted_version, -1, -1)
 74 |             
 75 |             if latest_version is not None and len(latest_version) > 0:
 76 |                 latest_version = latest_version[0]
 77 |                 self.current_version = latest_version.decode()
 78 |         elif self.current_version is not None:
 79 |             return self.current_version
 80 |         else:
 81 |             latest_version = self.settings.default.version
 82 |             self.pipe.zadd(sorted_version, {latest_version: maya.now()._epoch})
 83 |             self.pipe.sadd(set_version, latest_version)
 84 |             self.current_version = latest_version
 85 |         return self.current_version
 86 |     
 87 |     @version.setter
 88 |     def version(self, _version:str):
 89 |         sorted_version = self.keys.version.sorted
 90 |         set_version = self.keys.version.set
 91 |         self.pipe.zadd(sorted_version, {_version: maya.now()._epoch})
 92 |         self.pipe.sadd(set_version, *_version)
 93 |         self.current_version = _version
 94 |     
 95 | 
 96 | 
 97 |     @property
 98 |     def hash_query(self):
 99 |         if self.current_hash is None:
100 |             self.current_hash = consistent_hash(self.current_query)
101 |         return self.current_hash
102 |     
103 |     @property
104 |     def keys(self):
105 |         """ Set all of the required keys to something that fits in memory. """
106 |         if self.current_hash_keys is None:
107 |             self.current_hash_keys = Dict()
108 |             self.current_hash_keys.version.set = f"{self.hash_query}:versions"
109 |             self.current_hash_keys.version.sorted = f"{self.hash_query}:zversions"
110 |             self.current_hash_keys.file.sum = f"{self.hash_query}:sums"
111 |             self.current_hash_keys.sum = f"{self.hash_query}:sum"
112 |             self.current_hash_keys.version.index = f"{self.hash_query}:incr"
113 |         return self.current_hash_keys
114 |     
115 |     @property
116 |     def query_exists(self) -> bool:
117 |         if self.current_query_exist is None:
118 |             version_set_exist = self.pipe.exists(self.keys.version.set)
119 |             sorted_version_exist = self.pipe.exists(self.keys.version.sorted)
120 | 
121 |             self.current_query_exist = (version_set_exist == 1 and sorted_version_exist == 1)
122 |         return self.current_query_exist
123 |     
124 |     @property
125 |     def file_exist(self) -> bool:
126 |         """ Does the current file version exist"""
127 |         
128 |         if self.current_version_exist is None:
129 |             vk = self.version_key
130 |             self.pipe.watch(vk)
131 |             version_set_exist = self.pipe.exists(vk)
132 |             self.current_version_exist = (version_set_exist == 1)
133 |         return self.current_version_exist
134 |     
135 |     @property
136 |     def pipe(self):
137 |         if self.current_pipe is None:
138 |             raise AttributeError("Pipe cannot be non-existent")
139 |         return self.current_pipe
140 | 
141 |     @property
142 |     def version_key(self) -> str:
143 |         return f"{self.hash_query}:{self.version}"
144 |     
145 | 
146 |     def update_version(self):
147 |         """ Save version in multiple places to be found later"""
148 |         version =  self.version
149 |         if self.query_exists and not self.is_overwrite:
150 |             vs = version_query.Version.from_str(version)
151 |             new_vs = vs.increment(self.settings.default.increment)
152 |             new_vs_str = new_vs.to_str()
153 |             # print(new_vs_str)
154 |             self.version = new_vs_str
155 |         
156 |     
157 |     def update_file(self, _file):
158 |         """Update the file"""
159 |         vkey = self.version_key
160 |         self.pipe.set(vkey, _file)
161 | 
162 |     def update(self, file):
163 |         """ Update the file version and update the file. """
164 |         self.update_version()
165 |         self.update_file(file)
166 |         logger.error(file)
167 | 
168 |     @property
169 |     def garbage_patch(self):
170 |         """ 
171 |             Basically a way to see if both the file and query key exist. 
172 |             It's pretty janky. It'll be fine though. 
173 |         """
174 |         return (self.query_exists and self.file_exist)
175 | 
176 |     def absolute_exists(self, query, **kwargs):
177 |         # self.setup_run = None
178 |         self.setup(query, **kwargs)
179 |         return (self.query_exists and self.file_exist)
180 | 
181 | 
182 |     @logger.catch
183 |     def save(self, query:dict, obj, **kwargs):
184 |         self.setup(query, **kwargs)
185 |         serial_item = serialize(obj)
186 |         self.update(serial_item)
187 |         
188 |         
189 |     
190 | 
191 |     @logger.catch
192 |     def query(self, query:dict, **kwargs):
193 |         self.setup(query, **kwargs)
194 |         
195 |         if self.garbage_patch:
196 |             # If the query and file exist
197 |             # logger.debug("File exist, we're gonna try pulling it")
198 |             # logger.debug(self.version)
199 |             item = self.pipe.get(self.version_key)
200 |             unpacked = deserialize(item)
201 |             if unpacked is None:
202 |                 raise AttributeError("Pickled Item Not Found")
203 |             return unpacked
204 | 
205 | 
206 |     def delete(self, query:dict, **kwargs):
207 |         self.setup(query, **kwargs)
208 |         if self.query_exists and self.file_exist:
209 |             # If the query and file exist
210 |             sorted_version = self.keys.version.sorted
211 |             set_version = self.keys.version.set
212 |             self.pipe.delete(self.version_key)
213 |             self.pipe.zrem(sorted_version, self.version)
214 |             self.pipe.srem(set_version, self.version)
215 |     
216 |     def setup(self, query:dict, **kwargs):
217 |         is_force = kwargs.pop("is_force", False)
218 |         if self.setup_run is None or is_force:
219 |             self.reset()
220 |             self.settings = Dict(**kwargs)
221 |             self.current_query = query
222 |             self.gwatch()
223 |             self.version
224 |             self.setup_run = True
225 |             self.file_exist
226 |     
227 |     def reset(self):
228 |         """ Reset all placeholder variables"""
229 |         self.current_query = {}
230 |         self.current_hash = None
231 |         self.current_query_exist = None
232 |         self.current_pipe = self.conn.pipeline()
233 |         self.current_version = None
234 |         self.current_version_exist = None
235 |         self.setup_run = None
236 | 
237 | 
238 | 
239 | class SampleObj(object):
240 |     def __init__(self) -> None:
241 |         self.one = "one"
242 |         self.two = "two"
243 | 
244 | def main():
245 |     current_settings = Dict()
246 |     query_dict = Dict()
247 |     current_settings.overwrite = True
248 |     current_settings.preferences = query_dict
249 |     samp_opt = SampleObj()
250 |     redpill = redis.Redis()
251 |     redconn = RedisFileConnection()
252 |     redconn.conn = redpill
253 |     redconn.save({"one": "twoss"}, samp_opt, **current_settings)
254 |     item = redconn.query({"one": "twoss"})
255 |     logger.info(item)
256 | 
257 |     # redconn.pipe.execute()
258 | 
259 | if __name__ == "__main__":
260 |     main()


--------------------------------------------------------------------------------