├── test ├── test_real └── experiments │ ├── functions │ └── files │ │ ├── signature.py │ │ └── compressed_pickle.py │ ├── abstractions │ └── search │ │ └── general_search.py │ └── notebooks │ └── File Command Logic.ipynb ├── examples ├── sample_env.py ├── user_settings.py ├── user_handler.py └── sample_env_refactor.py ├── jamboree ├── base │ ├── __init__.py │ ├── old │ │ └── __init__.py │ ├── processors │ │ ├── legacy.py │ │ ├── __init__.py │ │ ├── abstracts │ │ │ ├── __init__.py │ │ │ ├── search.py │ │ │ ├── files.py │ │ │ ├── main.py │ │ │ ├── legacy.py │ │ │ └── event.py │ │ ├── search.py │ │ ├── main.py │ │ └── files.py │ ├── core.py │ └── handler.py ├── storage │ ├── __init__.py │ ├── files │ │ ├── __init__.py │ │ ├── redisify │ │ │ ├── __init__.py │ │ │ └── core.py │ │ └── core.py │ ├── databases │ │ ├── __init__.py │ │ ├── database.py │ │ └── jmongo.py │ └── README.md ├── utils │ ├── settings.py │ ├── support │ │ ├── storage │ │ │ ├── checksums.py │ │ │ ├── __init__.py │ │ │ └── cereal.py │ │ ├── search │ │ │ ├── assistance │ │ │ │ ├── cache.py │ │ │ │ ├── __init__.py │ │ │ │ ├── keystore.py │ │ │ │ └── inserter.py │ │ │ ├── __init__.py │ │ │ ├── querying.py │ │ │ ├── validation.py │ │ │ └── core.py │ │ ├── __init__.py │ │ └── events │ │ │ ├── feature.py │ │ │ ├── clock.py │ │ │ └── cereal.py │ ├── __init__.py │ ├── context │ │ ├── __init__.py │ │ └── main.py │ ├── core │ │ ├── __init__.py │ │ ├── ordefault.py │ │ └── fhash.py │ └── caches.py ├── middleware │ ├── __init__.py │ ├── procedures │ │ ├── management │ │ │ ├── __init__.py │ │ │ └── strategy.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── README.md │ │ │ ├── _flow.py │ │ │ ├── _sklearn.py │ │ │ ├── _creme.py │ │ │ └── _torch.py │ │ ├── __init__.py │ │ ├── README.md │ │ └── core.py │ └── processors │ │ ├── __init__.py │ │ ├── base.py │ │ └── resample.py ├── handlers │ ├── abstracted │ │ ├── __init__.py │ │ ├── features.py │ │ ├── search │ │ │ ├── __init__.py │ │ │ ├── meta.py │ │ │ └── updated.py │ │ └── datasets │ │ │ ├── __init__.py │ │ │ ├── economic.py │ │ │ ├── price.py │ │ │ └── orderbook.py │ ├── complex │ │ ├── __init__.py │ │ ├── engines │ │ │ └── __init__.py │ │ ├── backtestable │ │ │ ├── default │ │ │ │ ├── __init__.py │ │ │ │ ├── files.py │ │ │ │ └── db.py │ │ │ ├── __init__.py │ │ │ ├── files.py │ │ │ └── db.py │ │ ├── README.md │ │ ├── model.py │ │ ├── meta.py │ │ └── metric.py │ ├── processors │ │ ├── __init__.py │ │ ├── base.py │ │ └── resample.py │ ├── __init__.py │ ├── default │ │ ├── __init__.py │ │ ├── access.py │ │ └── blob.py │ └── base.py └── __init__.py ├── scripts └── search │ └── meta_search_handler.py ├── docs ├── .DS_Store ├── event_sourcing.png ├── jamboree_logo.png ├── readme │ ├── .DS_Store │ ├── How Jamboree Works.md │ └── Insert No Duplicates.md ├── imgs │ ├── event_sourcing.png │ ├── jamboree_logo.png │ ├── jamboree_long.png │ ├── event-sourcing_long.png │ └── jamboree-long-new.png ├── redis_event_source.png ├── redis_mongo_layer.png ├── Event Source Redis Key System.png ├── JIP │ └── SchemaDesign.md └── notebooks │ ├── Untitled.ipynb │ └── Verification.ipynb ├── .dockerignore ├── PKG-INFO ├── pyproject.toml ├── .gitignore ├── setup.py └── README.md /test/test_real: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/sample_env.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jamboree/base/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jamboree/storage/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jamboree/utils/settings.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jamboree/base/old/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jamboree/middleware/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jamboree/base/processors/legacy.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jamboree/handlers/abstracted/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jamboree/handlers/abstracted/features.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jamboree/utils/support/storage/checksums.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jamboree/handlers/complex/__init__.py: -------------------------------------------------------------------------------- 1 | from .meta import MetaHandler -------------------------------------------------------------------------------- /jamboree/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .caches import memoized_method, omit -------------------------------------------------------------------------------- /scripts/search/meta_search_handler.py: -------------------------------------------------------------------------------- 1 | 2 | def main(): 3 | pass -------------------------------------------------------------------------------- /jamboree/storage/files/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import FileStorageConnection -------------------------------------------------------------------------------- /jamboree/handlers/complex/engines/__init__.py: -------------------------------------------------------------------------------- 1 | from .files import FileEngine -------------------------------------------------------------------------------- /jamboree/utils/support/search/assistance/cache.py: -------------------------------------------------------------------------------- 1 | """ 2 | Cache 3 | """ -------------------------------------------------------------------------------- /docs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/.DS_Store -------------------------------------------------------------------------------- /jamboree/utils/support/search/assistance/__init__.py: -------------------------------------------------------------------------------- 1 | from .keystore import Keystore -------------------------------------------------------------------------------- /jamboree/utils/support/__init__.py: -------------------------------------------------------------------------------- 1 | from .storage.cereal import serialize, deserialize -------------------------------------------------------------------------------- /jamboree/utils/support/events/feature.py: -------------------------------------------------------------------------------- 1 | """ 2 | Here for feature conversions 3 | """ -------------------------------------------------------------------------------- /jamboree/utils/support/storage/__init__.py: -------------------------------------------------------------------------------- 1 | from .cereal import serialize, deserialize -------------------------------------------------------------------------------- /docs/event_sourcing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/event_sourcing.png -------------------------------------------------------------------------------- /docs/jamboree_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/jamboree_logo.png -------------------------------------------------------------------------------- /docs/readme/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/readme/.DS_Store -------------------------------------------------------------------------------- /jamboree/middleware/procedures/management/__init__.py: -------------------------------------------------------------------------------- 1 | # from .models import ModelProcedureManagement -------------------------------------------------------------------------------- /jamboree/middleware/procedures/management/strategy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Strategy procedure managements 3 | """ -------------------------------------------------------------------------------- /jamboree/storage/files/redisify/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import RedisFileProcessor, RedisFileConnection -------------------------------------------------------------------------------- /docs/imgs/event_sourcing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/imgs/event_sourcing.png -------------------------------------------------------------------------------- /docs/imgs/jamboree_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/imgs/jamboree_logo.png -------------------------------------------------------------------------------- /docs/imgs/jamboree_long.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/imgs/jamboree_long.png -------------------------------------------------------------------------------- /docs/redis_event_source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/redis_event_source.png -------------------------------------------------------------------------------- /docs/redis_mongo_layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/redis_mongo_layer.png -------------------------------------------------------------------------------- /jamboree/utils/context/__init__.py: -------------------------------------------------------------------------------- 1 | from .main import example_space, timecontext, watch_loop, watch_loop_callback -------------------------------------------------------------------------------- /docs/imgs/event-sourcing_long.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/imgs/event-sourcing_long.png -------------------------------------------------------------------------------- /docs/imgs/jamboree-long-new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/imgs/jamboree-long-new.png -------------------------------------------------------------------------------- /jamboree/handlers/processors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import DataProcessorsAbstract 2 | from .resample import DynamicResample -------------------------------------------------------------------------------- /test/experiments/functions/files/signature.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create a signature for a complex data type 3 | """ 4 | 5 | 6 | -------------------------------------------------------------------------------- /docs/Event Source Redis Key System.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kivo360/jamboree/HEAD/docs/Event Source Redis Key System.png -------------------------------------------------------------------------------- /jamboree/middleware/processors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import DataProcessorsAbstract 2 | from .resample import DynamicResample -------------------------------------------------------------------------------- /jamboree/utils/support/events/clock.py: -------------------------------------------------------------------------------- 1 | """ 2 | # Clock 3 | 4 | - All of the time functions you need for zsets 5 | """ -------------------------------------------------------------------------------- /test/experiments/abstractions/search/general_search.py: -------------------------------------------------------------------------------- 1 | class GeneralSearch(object): 2 | def __init__(self): 3 | pass -------------------------------------------------------------------------------- /jamboree/handlers/abstracted/search/__init__.py: -------------------------------------------------------------------------------- 1 | from .updated import ParameterizedSearch 2 | from .meta import MetadataSearchHandler -------------------------------------------------------------------------------- /jamboree/handlers/complex/backtestable/default/__init__.py: -------------------------------------------------------------------------------- 1 | from .db import BacktestDBHandler 2 | from .files import BlobStorageHandler -------------------------------------------------------------------------------- /jamboree/utils/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .fhash import consistent_hash, consistent_unhash, omit 2 | from .ordefault import dict_validation -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .gitignore 3 | LICENSE 4 | VERSION 5 | Changelog.md 6 | Makefile 7 | docker-compose.yml 8 | .gitlab-ci.yml 9 | README.* -------------------------------------------------------------------------------- /jamboree/handlers/complex/backtestable/__init__.py: -------------------------------------------------------------------------------- 1 | from .default.db import BacktestDBHandler 2 | from .default.files import BacktestBlobHandler -------------------------------------------------------------------------------- /jamboree/handlers/abstracted/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .economic import EconomicData 2 | from .price import PriceData 3 | from .orderbook import OrderbookData -------------------------------------------------------------------------------- /jamboree/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseHandler 2 | from .default import DBHandler, TimeHandler, DataHandler, MultiDataManagement 3 | # from .complex.model import ModelEngine -------------------------------------------------------------------------------- /jamboree/base/processors/__init__.py: -------------------------------------------------------------------------------- 1 | from .abstracts.legacy import LegacyProcessor 2 | from .abstracts.event import EventProcessor 3 | from .abstracts.files import FileProcessor 4 | from .abstracts.search import SearchProcessor -------------------------------------------------------------------------------- /jamboree/base/processors/abstracts/__init__.py: -------------------------------------------------------------------------------- 1 | from .legacy import LegacyProcessor 2 | from .event import EventProcessor 3 | from .files import FileProcessor 4 | from .search import SearchProcessor 5 | from .main import Processor -------------------------------------------------------------------------------- /jamboree/base/processors/abstracts/search.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import List 3 | 4 | class SearchProcessor(ABC): 5 | """ All of the common search queries will go here """ 6 | def search(self): 7 | pass -------------------------------------------------------------------------------- /jamboree/middleware/procedures/models/__init__.py: -------------------------------------------------------------------------------- 1 | from ._creme import CremeProcedure 2 | from ._sklearn import SklearnProcedure, CustomSklearnGaussianProcedure 3 | from ._torch import TorchProcedure 4 | from ._flow import TFKerasProcedure -------------------------------------------------------------------------------- /jamboree/handlers/default/__init__.py: -------------------------------------------------------------------------------- 1 | from .db import DBHandler 2 | from .access import Access 3 | from .time import TimeHandler 4 | from .data import DataHandler 5 | from .multi import MultiDataManagement 6 | from .blob import BlobStorageHandler 7 | -------------------------------------------------------------------------------- /jamboree/storage/databases/__init__.py: -------------------------------------------------------------------------------- 1 | from .database import DatabaseConnection 2 | from .jmongo import MongoDatabaseConnection 3 | from .jredis import RedisDatabaseConnection 4 | from .jredis_zset import RedisDatabaseZSetsConnection as ZRedisDatabaseConnection -------------------------------------------------------------------------------- /jamboree/middleware/procedures/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import ProcedureAbstract, ProcedureManagement, ModelProcedureAbstract 2 | # from .models import CremeProcedure, SklearnProcedure, TFKerasProcedure, TorchProcedure 3 | # from .management import ModelProcedureManagement -------------------------------------------------------------------------------- /jamboree/__init__.py: -------------------------------------------------------------------------------- 1 | from .base.core import Jamboree 2 | from .base.processors.main import Jamboree as JamboreeNew 3 | from .handlers.base import BaseHandler 4 | from .handlers.default.db import DBHandler 5 | from .handlers.default.data import DataHandler 6 | from .handlers.default.time import TimeHandler -------------------------------------------------------------------------------- /jamboree/handlers/abstracted/datasets/economic.py: -------------------------------------------------------------------------------- 1 | from jamboree.handlers.default import DataHandler 2 | 3 | class EconomicData(DataHandler): 4 | """ 5 | # Economic Data 6 | 7 | A way to browse economic data. Is an extension of DataHandler and includes basic searches. 8 | """ 9 | pass -------------------------------------------------------------------------------- /jamboree/utils/core/ordefault.py: -------------------------------------------------------------------------------- 1 | def dict_validation(obj:dict) -> bool: 2 | obj_keys = list(obj.keys()) 3 | for x in ['subcategories', 'entity', 'submetatype', 'name', 'metatype', 'category', "abbreviation"]: 4 | if x not in obj_keys: 5 | return False 6 | return True 7 | 8 | def default(obj): 9 | pass -------------------------------------------------------------------------------- /jamboree/utils/support/search/__init__.py: -------------------------------------------------------------------------------- 1 | from .validation import is_nested, is_gen_type, name_match, is_generic, is_geo, to_str, to_field, is_queryable_dict 2 | from .validation import is_valid_geo, is_valid_bool, is_valid_numeric, is_valid_tags, is_valid_text 3 | from .builders import InsertBuilder, QueryBuilder 4 | from .core import BaseSearchHandlerSupport -------------------------------------------------------------------------------- /jamboree/utils/support/events/cereal.py: -------------------------------------------------------------------------------- 1 | """ 2 | # Serialization commands 3 | 4 | JSON serialization functions specically tailored to the events segment of the code base 5 | """ 6 | 7 | def single_one(): 8 | pass 9 | 10 | def bulk_serialize(): 11 | pass 12 | 13 | def bulk_unserialize(): 14 | pass 15 | 16 | def serialize_df(): 17 | pass -------------------------------------------------------------------------------- /jamboree/base/processors/search.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import List 3 | 4 | class SearchProcessor(ABC): 5 | """ All of the common search queries will go here """ 6 | def search(self): 7 | pass 8 | 9 | 10 | def search_specific(self): 11 | pass 12 | 13 | def search_all(self): 14 | """ Search all of the tags""" 15 | pass -------------------------------------------------------------------------------- /docs/JIP/SchemaDesign.md: -------------------------------------------------------------------------------- 1 | ```py 2 | class RequirementsSchema(Schema): 3 | __metadata__ = DescriptionObject(**parameters) 4 | 5 | field = FieldType(name,**parameters) # parameters here describe how the data will be used 6 | field = FieldType(**parameters) 7 | field = FieldType(**parameters) 8 | field = FieldType(**parameters) 9 | field = FieldType(**parameters) 10 | ``` -------------------------------------------------------------------------------- /jamboree/utils/support/storage/cereal.py: -------------------------------------------------------------------------------- 1 | import lz4.frame 2 | import dill 3 | 4 | """ 5 | # COMPRESSED SERIALIZATION LIBRARY 6 | 7 | Simply compress and serialize 8 | """ 9 | 10 | def serialize(obj): 11 | """ Should take a complex object and pickle it""" 12 | pickled = dill.dumps(obj, byref=False) 13 | compressed = lz4.frame.compress(pickled) 14 | return compressed 15 | 16 | def deserialize(obj): 17 | """ Should take a serialized object and pickle""" 18 | decompressed = lz4.frame.decompress(obj) 19 | unpickled = dill.loads(decompressed) 20 | return unpickled -------------------------------------------------------------------------------- /jamboree/middleware/processors/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import Any 3 | 4 | 5 | class DataProcessorsAbstract(ABC): 6 | """ DataProcessor is used to """ 7 | def __init__(self, name, **kwargs): 8 | self._name = name 9 | self.set_settings(**kwargs) 10 | 11 | def set_settings(self, **kwargs): 12 | raise NotImplementedError( 13 | "Need to set the settings you're expecting for this preprocessor" 14 | ) 15 | 16 | def process(self, data:Any) -> Any: 17 | raise NotImplementedError( 18 | "A command to preprocess information and return that info." 19 | ) 20 | 21 | -------------------------------------------------------------------------------- /jamboree/middleware/procedures/models/README.md: -------------------------------------------------------------------------------- 1 | # Machine Learning Interaction Procedures 2 | 3 | 4 | Forces all ML libraries to be accessed using the exact same sklearn like API. 5 | 6 | 7 | * `predict(X, **params)` 8 | * `pred_proba(X, y, **params)` 9 | * `fit(X, y, **params)` 10 | * `partial_fit(X, y, **params)` 11 | * `adjust(X, y, **params)` 12 | * `.metrics` - Gets the metrics of the model. `X` and `y` are supposed to be taken from the adjust column. 13 | * `get_params()` 14 | * `set_params(**params)` 15 | * `extract()` - Gets the full model in a storable format 16 | 17 | Will need to run through some walk-forward testing examples after the backtest is done. -------------------------------------------------------------------------------- /jamboree/handlers/processors/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import Any 3 | 4 | 5 | class DataProcessorsAbstract(ABC): 6 | """ DataProcessor is used to transform""" 7 | def __init__(self, name, **kwargs): 8 | self._name = name 9 | self.set_settings(**kwargs) 10 | 11 | def set_settings(self, **kwargs): 12 | raise NotImplementedError( 13 | "Need to set the settings you're expecting for this preprocessor" 14 | ) 15 | 16 | def process(self, data:Any) -> Any: 17 | raise NotImplementedError( 18 | "A command to preprocess information and return that info." 19 | ) 20 | 21 | -------------------------------------------------------------------------------- /jamboree/utils/core/fhash.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import ujson 3 | import orjson 4 | from cytoolz import keyfilter 5 | 6 | 7 | def consistent_hash(query: dict) -> str: 8 | _hash = ujson.dumps(query, sort_keys=True) 9 | _hash = base64.b64encode(str.encode(_hash)) 10 | _hash = _hash.decode('utf-8') 11 | return _hash 12 | 13 | def consistent_unhash(_hash:str) -> str: 14 | """ Take a consistent hash (sorted) and turn it back into a dictionary""" 15 | decoded_hash = base64.b64decode(_hash).decode('utf-8') 16 | _hash_dict = ujson.loads(decoded_hash) 17 | return _hash_dict 18 | 19 | 20 | def omit(blacklist, d): 21 | return keyfilter(lambda k: k not in blacklist, d) 22 | 23 | 24 | -------------------------------------------------------------------------------- /test/experiments/functions/files/compressed_pickle.py: -------------------------------------------------------------------------------- 1 | import cloudpickle as clp 2 | import lz4.frame 3 | 4 | 5 | def serialize(obj): 6 | """ Should take a complex object and pickle it""" 7 | pickled = clp.dumps(obj) 8 | compressed = lz4.frame.compress(pickled) 9 | return compressed 10 | 11 | def deserialize(obj): 12 | """ Should take a serialized object and pickle""" 13 | decompressed = lz4.frame.decompress(obj) 14 | unpickled = clp.loads(decompressed) 15 | return unpickled 16 | 17 | class SampleObject(object): 18 | def __init__(self) -> None: 19 | self.one = "IAHSUALKS" 20 | self.two = "AYVUKASAVS" 21 | 22 | def main(): 23 | sample = SampleObject() 24 | ssample = serialize(sample) 25 | dsample = deserialize(ssample) 26 | assert sample.one == dsample.one 27 | 28 | if __name__ == "__main__": 29 | main() -------------------------------------------------------------------------------- /jamboree/storage/README.md: -------------------------------------------------------------------------------- 1 | # Storage Models 2 | 3 | The storage model will present a common interface for all common queries and write commands. They will be separated into two parts: 4 | 5 | 1. Files (TBA) 6 | * This will be for everything related to file management. The central idea behind it is that we'll be able to store gigabyte to terrabyte sized files into cloud platforms, such as S3 & DataLake. 7 | * We'll also have procedures to store information into memory, such as redis. We'll split the files at a higher level so they can be better handled. 8 | 2. Database Connection. 9 | * Since the main Jamboree object is starting to become bloated, the main goal here is to create something that would allow us to run through different datastores with little to no problem. 10 | * We're starting with `mongodb` and `redis`, but with the abstracts available we'll be able to move into other data stores as well. -------------------------------------------------------------------------------- /jamboree/utils/caches.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import weakref 3 | 4 | from cytoolz import keyfilter 5 | 6 | 7 | def omit(blacklist, d): 8 | return keyfilter(lambda k: k not in blacklist, d) 9 | 10 | def memoized_method(*lru_args, **lru_kwargs): 11 | def decorator(func): 12 | @functools.wraps(func) 13 | def wrapped_func(self, *args, **kwargs): 14 | # We're storing the wrapped method inside the instance. If we had 15 | # a strong reference to self the instance would never die. 16 | self_weak = weakref.ref(self) 17 | @functools.wraps(func) 18 | @functools.lru_cache(*lru_args, **lru_kwargs) 19 | def cached_method(*args, **kwargs): 20 | return func(self_weak(), *args, **kwargs) 21 | setattr(self, func.__name__, cached_method) 22 | return cached_method(*args, **kwargs) 23 | return wrapped_func 24 | return decorator 25 | -------------------------------------------------------------------------------- /jamboree/base/processors/main.py: -------------------------------------------------------------------------------- 1 | from redis import Redis 2 | from jamboree.base.processors.abstracts import Processor 3 | from jamboree.base.processors.event import JamboreeEvents 4 | from jamboree.base.processors.files import JamboreeFileProcessor 5 | class Jamboree(Processor): 6 | def __init__(self, **kwargs) -> None: 7 | super().__init__() 8 | 9 | redis_host = kwargs.get("REDIS_HOST", "localhost") 10 | redis_port = int(kwargs.get("REDIS_PORT", "6379")) 11 | mongo_host = kwargs.get("MONGO_HOST", "localhost") 12 | rconn = Redis(host=redis_host, port=redis_port) 13 | # redis.Redis(redis_host, port=redis_port) 14 | 15 | self.event = JamboreeEvents( 16 | mongodb_host=mongo_host, 17 | redis_host=redis_host, 18 | redis_port=redis_port 19 | ) 20 | 21 | # Set the files management here 22 | self.storage = JamboreeFileProcessor() 23 | self.storage.rconn = rconn 24 | self.event.rconn = rconn 25 | self.event.initialize() 26 | self.storage.initialize() 27 | -------------------------------------------------------------------------------- /jamboree/base/core.py: -------------------------------------------------------------------------------- 1 | from redis import Redis 2 | from jamboree.base.processors.abstracts import Processor 3 | from jamboree.base.processors.event import JamboreeEvents 4 | from jamboree.base.processors.files import JamboreeFileProcessor 5 | 6 | class Jamboree(Processor): 7 | def __init__(self, **kwargs) -> None: 8 | super().__init__() 9 | 10 | redis_host = kwargs.get("REDIS_HOST", "localhost") 11 | redis_port = int(kwargs.get("REDIS_PORT", "6379")) 12 | mongo_host = kwargs.get("MONGO_HOST", "localhost") 13 | rconn = Redis(host=redis_host, port=redis_port) 14 | # redis.Redis(redis_host, port=redis_port) 15 | 16 | self.event = JamboreeEvents( 17 | mongodb_host=mongo_host, 18 | redis_host=redis_host, 19 | redis_port=redis_port 20 | ) 21 | 22 | # Set the files management here 23 | self.storage = JamboreeFileProcessor() 24 | self.storage.rconn = rconn 25 | self.event.rconn = rconn 26 | self.event.initialize() 27 | self.storage.initialize() 28 | self.rconn = rconn -------------------------------------------------------------------------------- /jamboree/handlers/complex/README.md: -------------------------------------------------------------------------------- 1 | # Complex Handlers 2 | 3 | Complex handlers have multiple types included inside that need to be synced. They're more common inside of Linkkt's proprietry systems. These will be systems we'll leave a bit more exposed. A good set of examples: 4 | 5 | 1. MetaHandler 6 | 1. Will keep track of all metadata for our system 7 | 2. Metadata will be overwritable records and also include the future search handler 8 | 3. The search handler will help us find associated data 9 | 2. Metric 10 | 1. The metric handler will extend from the DBhandler, through it'll also 11 | 1. Searchable by including our MetaHandler 12 | 2. Attach our TimeHandler so we can dynamically backtest and see a model perform over time 13 | 3. ModelHandler 14 | 1. Will extend from the BlobHandler 15 | 2. It'll also include a `MetricHandler` and `MetaHandler` 16 | 3. The `MetricHandler` will allow us to monitor predictions are progressing over time 17 | 1. Since this has a `TimeHandler` included we'll be able to track metrics for a given model 18 | 2. Since this also has its own designated `MetaHandler` we'll be able to search for a model's effectiveness over time from a different system. 19 | 4. The `MetaHandler` will also help us find the model later -------------------------------------------------------------------------------- /PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: jamboree 3 | Version: 0.5.2 4 | Summary: A multi-layer event sourcing and general data library 5 | Author: Kevin Hill 6 | Author-email: kah.kevin.hill@gmail.com 7 | Requires-Python: >=3.7,<4.0 8 | Classifier: Programming Language :: Python :: 3 9 | Classifier: Programming Language :: Python :: 3.7 10 | Classifier: Programming Language :: Python :: 3.8 11 | Requires-Dist: addict (>=2.2.1,<3.0.0) 12 | Requires-Dist: crayons (>=0.3.0,<0.4.0) 13 | Requires-Dist: cytoolz (>=0.10.1,<0.11.0) 14 | Requires-Dist: dill (>=0.3.1,<0.4.0) 15 | Requires-Dist: funtime (>=0.4.7,<0.5.0) 16 | Requires-Dist: gym (>=0.17.1,<0.18.0) 17 | Requires-Dist: json-tricks (>=3.14.0,<4.0.0) 18 | Requires-Dist: loguru (>=0.4.1,<0.5.0) 19 | Requires-Dist: lz4 (>=3.0.2,<4.0.0) 20 | Requires-Dist: maya (>=0.6.1,<0.7.0) 21 | Requires-Dist: numpy 22 | Requires-Dist: pandas (>=1.0.3,<2.0.0) 23 | Requires-Dist: pandas_datareader (>=0.8.1,<0.9.0) 24 | Requires-Dist: pebble (>=4.5.1,<5.0.0) 25 | Requires-Dist: pytest (>=5.4.1,<6.0.0) 26 | Requires-Dist: redis (==3.3.11) 27 | Requires-Dist: sklearn (>=0.0,<0.1) 28 | Requires-Dist: torch (>=1.4.0,<2.0.0) 29 | Requires-Dist: torchvision (>=0.5.0,<0.6.0) 30 | Requires-Dist: ujson (>=2.0.2,<3.0.0) 31 | Requires-Dist: version_query (>=1.1.0,<2.0.0) 32 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "jamboree" 3 | version = "0.8.8" 4 | description = "A multi-layer event sourcing and general data library" 5 | authors = ["Kevin Hill "] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.6.8" 9 | numpy = "pandas" 10 | pandas = "^1.0.3" 11 | loguru = "^0.4.1" 12 | pebble = "^4.5.1" 13 | maya = "^0.6.1" 14 | ujson = "^2.0.2" 15 | gym = "^0.17.1" 16 | lz4 = "^3.0.2" 17 | cytoolz = "^0.10.1" 18 | pytest = "^5.4.1" 19 | addict = "^2.2.1" 20 | version_query = "^1.1.0" 21 | redis = "3.3.11" 22 | pandas_datareader = "^0.8.1" 23 | dill = "^0.3.1" 24 | json-tricks = "^3.14.0" 25 | sklearn = "^0.0" 26 | crayons = "^0.3.0" 27 | skorch = "^0.7.0" 28 | creme = "^0.5.0" 29 | jupyter = "^1.0.0" 30 | cerberus = "^1.3.2" 31 | hiredis = "^1.0.1" 32 | eliot = "^1.12.0" 33 | eliot-tree = "^19.0.1" 34 | yfinance = "^0.1.54" 35 | anycache = "^2.0.7" 36 | tqdm = "^4.45.0" 37 | orjson = "^3.0.2" 38 | pandas-datareader = "^0.8.1" 39 | pydantic = "^1.5.1" 40 | redisearch = "^0.9.0" 41 | matplotlib = "^3.2.1" 42 | pillow = "^7.2.0" 43 | 44 | 45 | 46 | [tool.poetry.dev-dependencies] 47 | pylint = "^2.5.2" 48 | black = {version = "^19.10b0", allow-prereleases = true} 49 | flake8 = "^3.8.3" 50 | mypy = "^0.782" 51 | yapf = "^0.30.0" 52 | [build-system] 53 | requires = ["poetry>=0.12"] 54 | build-backend = "poetry.masonry.api" 55 | -------------------------------------------------------------------------------- /jamboree/middleware/procedures/README.md: -------------------------------------------------------------------------------- 1 | # Procedures 2 | 3 | Procedures are abstracts that we use to call things in consistent ways. The core example is a model procedure. Multiple different models are called in multiple different ways yet they can look extremely consistent on the surface. For example, calling a `fit` and `partial_fit` can look the same for all major machine learning libraries. 4 | 5 | Let's compare using `sklearn` and `creme-ml` for a basic fit example. 6 | 7 | 8 | ```py 9 | class SKLearnProcedure(object): 10 | def __init__(self, *args, **kwargs): 11 | pass 12 | 13 | def partial_fit(self, data): 14 | pass 15 | 16 | def fit(self, data): 17 | pass 18 | 19 | 20 | 21 | class CremeProcedure(object): 22 | def __init__(self, *args, **kwargs): 23 | pass 24 | 25 | def partial_fit(self, data): 26 | # All steps goes here 27 | pass 28 | 29 | def fit(self, data): 30 | # All steps goes here 31 | pass 32 | ``` 33 | 34 | 35 | We can call the exact same calls using the exact same data and get exactly what we need. 36 | 37 | 38 | ```py 39 | data = pd.DataFrame() 40 | 41 | creme_model = CremeProcedure() 42 | sklearn_model = SKlearnProcedure() 43 | 44 | 45 | sklearn_model.fit(data) 46 | creme_model.fit(data) 47 | ``` 48 | 49 | Each one of these models will have procedures to handle what's inputted into them. -------------------------------------------------------------------------------- /jamboree/base/processors/abstracts/files.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import List 3 | 4 | class FileProcessor(ABC): 5 | """ 6 | # File processor abstract. 7 | 8 | Deals with all blobs and files.. 9 | """ 10 | 11 | def initialize(self): 12 | pass 13 | 14 | def save(self, query: dict, data: dict, **kwargs): 15 | """ Save a single blob of data. """ 16 | raise NotImplementedError 17 | 18 | def save_version(self, query, **kwargs): 19 | """ Save a single blob of data at a given version. """ 20 | raise NotADirectoryError 21 | 22 | def query(self, query, **kwargs): 23 | """ Query a blob of data. Get the latest """ 24 | raise NotImplementedError 25 | 26 | def query_version(self, query, **kwargs): 27 | """ Save an explicit version of data """ 28 | raise NotImplementedError 29 | 30 | def delete(self, query, **kwargs): 31 | """ Delete the latest version of data """ 32 | raise NotImplementedError 33 | 34 | def delete_version(self, query:dict, **kwargs): 35 | """ Delete a given version of data if it exist """ 36 | raise NotImplementedError 37 | 38 | def delete_all(self, query:dict, **kwargs): 39 | """ Purge everything """ 40 | raise NotImplementedError 41 | 42 | def absolute_exists(self, query:dict, **kwargs): 43 | raise NotImplementedError -------------------------------------------------------------------------------- /jamboree/handlers/abstracted/search/meta.py: -------------------------------------------------------------------------------- 1 | from jamboree.handlers.abstracted.search import ParameterizedSearch 2 | from jamboree.handlers.default.search import BaseSearchHandler 3 | 4 | 5 | class MetadataSearchHandler(ParameterizedSearch): 6 | """ 7 | # 10 metatypes 8 | 9 | Metatypes are 10 | 11 | 1. Strategy 12 | 2. Data 13 | 3. Model 14 | 4. Meta 15 | 16 | """ 17 | def __init__(self): 18 | super().__init__() 19 | self.entity = "metadata" 20 | self.dreq = { 21 | "name": str, 22 | # The type of metadata we're positioning 23 | # strategy, data, model, metainfo (metadata about metadata) are the clear items in mind 24 | "metatype": str, 25 | # another identifiable metatype to narrow down results 26 | # pricing(data), economic, weather (data), social(data), political (data), features (data) 27 | # batch (models), online (models), micro (strategies), macro (strategies), supporting_group (complex) 28 | "submetatype": str, 29 | "category": str, 30 | "subcategories": dict, 31 | "description": str, 32 | "info": dict, 33 | # The location about the information involved 34 | "location": "GEO", 35 | "abbreviation": str 36 | } 37 | self.must_have = ['name', 'metatype', 'category', 'submetatype', 'abbreviation'] 38 | 39 | -------------------------------------------------------------------------------- /jamboree/utils/support/search/assistance/keystore.py: -------------------------------------------------------------------------------- 1 | """ 2 | Temporarily stores all keys that we'll possibly be using later. 3 | 4 | Entirely used to get the subdocuments by id. 5 | Store all subdocuments by super_id 6 | 7 | 8 | """ 9 | 10 | from addict import Dict 11 | 12 | class Keystore(object): 13 | def __init__(self): 14 | self.store = Dict() 15 | 16 | 17 | def add_by_superid(self, superid:str, key:str, _dict:dict): 18 | # _dict.pop("super_id", None) 19 | _dict.pop("id", None) 20 | _dict.pop("payload", None) 21 | 22 | item = { 23 | key: _dict 24 | } 25 | super_item = { 26 | str(superid): item 27 | } 28 | self.store.update(super_item) 29 | 30 | def get_by_superid(self, superid:str): 31 | if superid in self.store: 32 | return self.store[superid] 33 | return {} 34 | 35 | 36 | def add(self, superid:str, key:str, _dict:dict): 37 | _dict.pop("super_id", None) 38 | _dict.pop("id", None) 39 | _dict.pop("payload", None) 40 | 41 | item = { 42 | key: _dict 43 | } 44 | super_item = { 45 | str(superid): item 46 | } 47 | self.store.update(super_item) 48 | 49 | def get(self, superid:str): 50 | if superid in self.store: 51 | return self.store[superid] 52 | return {} 53 | 54 | def reset(self): 55 | self.store = Dict() -------------------------------------------------------------------------------- /jamboree/base/processors/abstracts/main.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import List, Optional 3 | from jamboree.base.processors.abstracts import EventProcessor, FileProcessor, SearchProcessor 4 | 5 | 6 | class Processor(ABC): 7 | """ Use to allow for multiple items""" 8 | def __init__(self): 9 | self._event:Optional[EventProcessor] = None 10 | self._storage:Optional[FileProcessor] = None 11 | self._search:Optional[SearchProcessor] = None 12 | 13 | @property 14 | def event(self) -> EventProcessor: 15 | if not isinstance(self._event, EventProcessor): 16 | raise AttributeError("EventProcessor not added yet ... ") 17 | return self._event 18 | 19 | @event.setter 20 | def event(self, _event:EventProcessor) -> EventProcessor: 21 | self._event = _event 22 | 23 | 24 | @property 25 | def storage(self) -> FileProcessor: 26 | if not isinstance(self._storage, FileProcessor): 27 | raise AttributeError("FileProcessor not added yet ... ") 28 | return self._storage 29 | 30 | @storage.setter 31 | def storage(self, _storage:FileProcessor): 32 | self._storage = _storage 33 | 34 | @property 35 | def search(self) -> SearchProcessor: 36 | if not isinstance(self._search, SearchProcessor): 37 | raise AttributeError("SearchProcessor not added yet ... ") 38 | return self._search 39 | 40 | def search(self, _search:SearchProcessor): 41 | self._search = self.search -------------------------------------------------------------------------------- /jamboree/utils/support/search/querying.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | class text(object): 3 | @staticmethod 4 | def exact(term): 5 | return { 6 | "type": "TEXT", 7 | "is_filter": True, 8 | "values": { 9 | "term": term, 10 | "is_exact": True 11 | } 12 | } 13 | 14 | @staticmethod 15 | def fuzzy(term): 16 | return { 17 | "type": "TEXT", 18 | "is_filter": True, 19 | "values": { 20 | "term": f"%{term}%", 21 | "is_exact": False 22 | } 23 | } 24 | 25 | @staticmethod 26 | def orlist(terms:List[str], is_bundle=False): 27 | _term = text.orliststr(terms, is_bundle) 28 | return { 29 | "type": "TEXT", 30 | "is_filter": True, 31 | "values": { 32 | "term": _term, 33 | "is_exact": False 34 | } 35 | } 36 | 37 | @staticmethod 38 | def orliststr(terms:List[str], is_bundle=False): 39 | if len(terms) == 0: 40 | return "" 41 | _term = "|".join(terms) 42 | if is_bundle: 43 | _temp = f"({_term})" 44 | _term = _temp 45 | return _term 46 | 47 | 48 | class tags(object): 49 | 50 | @staticmethod 51 | def andfieldstr(field, items:List[str]): 52 | if len(items) == 0: 53 | return "" 54 | 55 | and_fields_str = [f"{field}:{item} " for item in items] 56 | return and_fields_str -------------------------------------------------------------------------------- /jamboree/middleware/procedures/models/_flow.py: -------------------------------------------------------------------------------- 1 | from addict import Dict 2 | from sklearn.base import BaseEstimator 3 | from jamboree.middleware.procedures import ModelProcedureAbstract 4 | from sklearn.datasets import make_friedman2 5 | from sklearn.gaussian_process import GaussianProcessRegressor 6 | from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel 7 | from loguru import logger 8 | 9 | 10 | """ TODO: FIX THIS CRAP!!!""" 11 | 12 | 13 | class TFKerasProcedure(ModelProcedureAbstract): 14 | def __init__(self, *args, **kwargs) -> None: 15 | super().__init__() 16 | self.requirements.model = True 17 | self.requirements.criterion = False 18 | self.requirements.optimizer = False 19 | 20 | # types = Dict() 21 | # types.model = BaseEstimator 22 | 23 | self.types.model = BaseEstimator 24 | 25 | @logger.catch 26 | def get_params(self): 27 | self.verify() 28 | return self.dictionary.model.get_params() 29 | 30 | @logger.catch 31 | def predict(self, X, **kwargs): 32 | self.verify() 33 | return self.dictionary.model.predict(X, **kwargs) 34 | 35 | @logger.catch 36 | def predict_prob(self, X, **kwargs): 37 | self.verify() 38 | return self.dictionary.model.predict_prob(X, **kwargs) 39 | 40 | @logger.catch 41 | def partial_fit(self, X, y, **kwargs): 42 | self.verify() 43 | self.dictionary.model.partial_fit(X, y, **kwargs) 44 | 45 | def fit(self, X, y, **kwargs): 46 | self.verify() 47 | self.dictionary.model.fit(X, y, **kwargs) 48 | # print(self.mdict.model.predict(X[:2,:], return_std=True)) -------------------------------------------------------------------------------- /jamboree/base/processors/abstracts/legacy.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import List 3 | 4 | 5 | class LegacyProcessor(ABC): 6 | """ Use to make the main jamboree object still function without a problem. We have a few ways to explore this concept.""" 7 | def save(self, query: dict, data: dict, abs_rel="absolute"): 8 | raise NotImplementedError 9 | 10 | 11 | def save_many(self, query: dict, data: List[dict], abs_rel="absolute"): 12 | raise NotImplementedError 13 | 14 | 15 | def get_latest(self, query, abs_rel="absolute") -> dict: 16 | raise NotImplementedError 17 | 18 | 19 | def get_latest_many(self, query, abs_rel="absolute", limit=1000): 20 | raise NotImplementedError 21 | 22 | 23 | def get_between(self, query:dict, min_epoch:float, max_epoch:float, abs_rel:str="absolute") -> list: 24 | raise NotImplementedError 25 | 26 | 27 | def get_latest_by(self, query:dict, max_epoch, abs_rel="absolute", limit:int=10) -> dict: 28 | raise NotImplementedError 29 | 30 | 31 | def count(self, query: dict) -> int: 32 | raise NotImplementedError 33 | 34 | 35 | def remove_first(self, query: dict): 36 | raise NotImplementedError 37 | 38 | 39 | def pop_multiple(self, query: dict, limit: int): 40 | raise NotImplementedError 41 | 42 | 43 | def _bulk_save(self, query: dict, data: list): 44 | raise NotImplementedError 45 | 46 | 47 | def single_get(self, query:dict): 48 | raise NotImplementedError 49 | 50 | 51 | def single_set(self, query:dict, data:dict): 52 | raise NotImplementedError 53 | 54 | 55 | def single_delete(self, query:dict): 56 | raise NotImplementedError -------------------------------------------------------------------------------- /jamboree/handlers/default/access.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from typing import Any, AnyStr, Dict 3 | from jamboree.handlers.default.db import DBHandler 4 | 5 | 6 | 7 | class Access(DBHandler): 8 | # --------------------------------------------------------------------------------- 9 | # Simple Accessor Properties 10 | # --------------------------------------------------------------------------------- 11 | 12 | @property 13 | def name(self): 14 | return self['name'] 15 | 16 | @name.setter 17 | def name(self, __name: str): 18 | self['name'] = __name 19 | 20 | 21 | @property 22 | def category(self) -> str: 23 | return self['category'] 24 | 25 | @category.setter 26 | def category(self, _category: str): 27 | self['category'] = _category 28 | 29 | @property 30 | def subcategories(self) -> str: 31 | return self['subcategories'] 32 | 33 | @subcategories.setter 34 | def subcategories(self, __subcatgeories: Dict[AnyStr, Any]): 35 | self['subcategories'] = __subcatgeories 36 | 37 | @property 38 | def metatype(self) -> str: 39 | return self['metatype'] 40 | 41 | @metatype.setter 42 | def metatype(self, __metatype: str): 43 | self['metatype'] = __metatype 44 | 45 | @property 46 | def submetatype(self) -> str: 47 | return self['submetatype'] 48 | 49 | @submetatype.setter 50 | def submetatype(self, __submetatype: str): 51 | self['submetatype'] = __submetatype 52 | 53 | @property 54 | def abbreviation(self) -> str: 55 | return self['abbreviation'] 56 | 57 | @abbreviation.setter 58 | def abbreviation(self, __abb: str): 59 | self['abbreviation'] = __abb -------------------------------------------------------------------------------- /jamboree/base/processors/abstracts/event.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import List 3 | 4 | 5 | class EventProcessor(ABC): 6 | def initialize(self): 7 | raise NotImplementedError 8 | 9 | def save(self, query: dict, data: dict, abs_rel="absolute"): 10 | raise NotImplementedError 11 | 12 | 13 | def save_many(self, query: dict, data: List[dict], abs_rel="absolute"): 14 | raise NotImplementedError 15 | 16 | 17 | def get_latest(self, query, abs_rel="absolute") -> dict: 18 | raise NotImplementedError 19 | 20 | 21 | def get_latest_many(self, query, abs_rel="absolute", limit=1000): 22 | raise NotImplementedError 23 | 24 | 25 | def get_between(self, query:dict, min_epoch:float, max_epoch:float, abs_rel:str="absolute") -> list: 26 | raise NotImplementedError 27 | 28 | 29 | def get_latest_by(self, query:dict, max_epoch, abs_rel="absolute", limit:int=10) -> dict: 30 | raise NotImplementedError 31 | 32 | 33 | def get_all(self, query:dict, abs_rel:str="relative"): 34 | raise NotImplementedError 35 | 36 | def count(self, query: dict) -> int: 37 | raise NotImplementedError 38 | 39 | 40 | def remove_first(self, query: dict): 41 | raise NotImplementedError 42 | 43 | 44 | def pop_multiple(self, query: dict, limit: int): 45 | raise NotImplementedError 46 | 47 | 48 | def _bulk_save(self, query: dict, data: list): 49 | raise NotImplementedError 50 | 51 | 52 | def single_get(self, query:dict): 53 | raise NotImplementedError 54 | 55 | 56 | def single_set(self, query:dict, data:dict): 57 | raise NotImplementedError 58 | 59 | 60 | def single_delete(self, query:dict): 61 | raise NotImplementedError 62 | 63 | def lock(self, query): 64 | raise NotImplementedError -------------------------------------------------------------------------------- /examples/user_settings.py: -------------------------------------------------------------------------------- 1 | 2 | import random 3 | import time 4 | import uuid 5 | from contextlib import ContextDecorator 6 | from copy import copy 7 | from pprint import pprint 8 | from random import randint 9 | 10 | import maya 11 | import numpy as np 12 | import pandas as pd 13 | from loguru import logger 14 | from toolz.itertoolz import pluck 15 | 16 | import vaex 17 | from jamboree import DBHandler, Jamboree 18 | 19 | 20 | class UserSettingsHandler(DBHandler): 21 | """Abstract handler that we use to keep track of information. 22 | """ 23 | 24 | def __init__(self, **kwargs): 25 | super().__init__() 26 | self.entity = "user_settings" 27 | self.required = { 28 | "email": str, 29 | "episode": str 30 | } 31 | self._limit = 100 32 | self._settings_handler = None 33 | 34 | @property 35 | def limit(self): 36 | """ The maximum number of records we intend to get when calling the many function.""" 37 | return self._limit 38 | 39 | @limit.setter 40 | def limit(self, limit): 41 | self._limit = limit 42 | 43 | def is_authenticated(self): 44 | return True 45 | 46 | def is_active(self): 47 | return True 48 | 49 | def is_anonymous(self): 50 | return False 51 | 52 | def register(self, password:str, confirm:str): 53 | pass 54 | 55 | def login(self, password:str): 56 | pass 57 | 58 | def logout(self): 59 | pass 60 | 61 | def session(self): 62 | pass 63 | 64 | 65 | def deactivate(self): 66 | pass 67 | 68 | def reactivate(self): 69 | pass 70 | 71 | 72 | def latest_user(self): 73 | pass 74 | 75 | def save_user(self): 76 | pass 77 | 78 | 79 | 80 | def flip(n=0.02): 81 | if n >= random.uniform(0, 1): 82 | return True 83 | return False 84 | -------------------------------------------------------------------------------- /jamboree/utils/context/main.py: -------------------------------------------------------------------------------- 1 | from contextlib import ContextDecorator, contextmanager 2 | 3 | import maya 4 | import redis 5 | from loguru import logger 6 | from redis.exceptions import WatchError 7 | 8 | 9 | class timecontext(ContextDecorator): 10 | def __enter__(self): 11 | self.start = maya.now()._epoch 12 | return self 13 | 14 | def __exit__(self, *exc): 15 | self.end = maya.now()._epoch 16 | delta = self.end - self.start 17 | logger.success(f"It took {delta}s") 18 | logger.success(f"It took {(delta*1000)}ms") 19 | return False 20 | 21 | 22 | @contextmanager 23 | def watch_loop(): 24 | while True: 25 | try: 26 | yield 27 | break 28 | except WatchError: 29 | continue 30 | 31 | 32 | def watch_loop_callback(callback): 33 | while True: 34 | try: 35 | callback() 36 | break 37 | except WatchError: 38 | continue 39 | 40 | 41 | class example_space(ContextDecorator): 42 | def __init__(self, name) -> None: 43 | self.name = name 44 | self.is_pass = True 45 | self.start = maya.now()._epoch 46 | 47 | def __enter__(self): 48 | 49 | return self 50 | 51 | def failed(self): 52 | self.is_pass = False 53 | 54 | def __exit__(self, type, value, traceback): 55 | self.end = maya.now()._epoch 56 | delta = self.end - self.start 57 | if value is not None or self.is_pass == False: 58 | logger.error( 59 | "----------------------------------------- Example didn't pass --------------------------------------------" 60 | ) 61 | else: 62 | logger.success( 63 | "------------------------------------------ Example did pass ----------------------------------------------" 64 | ) 65 | logger.info(f"It took {delta}ms") 66 | return False 67 | 68 | 69 | if __name__ == "__main__": 70 | with example_space("Printing") as example: 71 | print("Don't want to kill my vibe") 72 | # example.failed() 73 | -------------------------------------------------------------------------------- /jamboree/handlers/base.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from abc import ABC, ABCMeta 3 | from typing import Dict, Any, List 4 | 5 | 6 | class BaseHandler(object, metaclass=ABCMeta): 7 | """ 8 | A way to handle reads and writes consistently without having to write every single variable: 9 | """ 10 | 11 | def __init__(self): 12 | pass 13 | 14 | def check(self): 15 | raise NotImplementedError 16 | 17 | def save(self, data: dict): 18 | raise NotImplementedError 19 | 20 | def _bulk_save(self, query: dict, data: list): 21 | raise NotImplementedError 22 | 23 | def _get_many(self, limit: int, ar: str, alt={}): 24 | raise NotImplementedError 25 | 26 | def last(self): 27 | raise NotImplementedError 28 | 29 | def many(self, limit: int = 100): 30 | raise NotImplementedError 31 | 32 | def save_many(self, query: dict, data: list): 33 | raise NotImplementedError 34 | 35 | def pop_multiple(self, query, _limit: int = 1): 36 | raise NotImplementedError 37 | 38 | def swap(self, query, alt: dict = {}): 39 | """ Swap betwen the first and last item """ 40 | raise NotImplementedError 41 | 42 | def query_mix(self, query: dict, alt: dict = {}): 43 | raise NotImplementedError 44 | 45 | 46 | class BaseFileHandler(object, metaclass=ABCMeta): 47 | """ 48 | A way to handle reads and writes consistently without having to write every single variable: 49 | """ 50 | 51 | def __init__(self): 52 | pass 53 | 54 | def check(self): 55 | raise NotImplementedError 56 | 57 | def save(self, data: dict): 58 | raise NotImplementedError 59 | 60 | def save_version(self, query:dict, data): 61 | pass 62 | 63 | def last(self): 64 | raise NotImplementedError 65 | 66 | def many(self, limit: int = 100): 67 | raise NotImplementedError 68 | 69 | def save_many(self, query: dict, data: list): 70 | raise NotImplementedError 71 | 72 | def delete(self, query:dict): 73 | raise NotImplementedError 74 | 75 | def delete_version(self, query:dict, version:str): 76 | raise NotImplementedError -------------------------------------------------------------------------------- /docs/readme/How Jamboree Works.md: -------------------------------------------------------------------------------- 1 | # Key Generation 2 | Here we test how we're going to generate a key for redis so we can query for information later. 3 | 4 | 5 | ```python 6 | %pwd 7 | ``` 8 | 9 | 10 | 11 | 12 | '/home/skywalker/PycharmProjects/jamboree/test/notebooks' 13 | 14 | 15 | 16 | 17 | ```python 18 | %cd ../.. 19 | ``` 20 | 21 | /home/skywalker/PycharmProjects/jamboree 22 | 23 | 24 | 25 | ```python 26 | import orjson 27 | import maya 28 | import random 29 | ``` 30 | 31 | 32 | ```python 33 | from jamboree.utils.helper import Helpers 34 | ``` 35 | 36 | 37 | ```python 38 | helpers = Helpers() 39 | ``` 40 | 41 | 42 | ```python 43 | sample_key = helpers.generate_hash({"type":"hello_world", "name": "Jamboree", "count": 0}) 44 | ``` 45 | 46 | 47 | ```python 48 | print(f"The sample key is: '{sample_key}'") 49 | ``` 50 | 51 | The sample key is: 'eyJjb3VudCI6MCwibmFtZSI6IkphbWJvcmVlIiwidHlwZSI6ImhlbGxvX3dvcmxkIn0=' 52 | 53 | 54 | 55 | ## How we use the generated key to create an eventsource 56 | 57 | We'd then store that key into redis and start appending other serialized variables into a list. It looks like the following diagram. 58 | 59 | 60 | ```python 61 | orjson.dumps({"hello": "world", "number": random.uniform(0, 100), "timestamp": maya.now()._epoch}) 62 | ``` 63 | 64 | 65 | 66 | 67 | b'{"hello":"world","number":20.127252760805113,"timestamp":1579529262.278698}' 68 | 69 | 70 | 71 | # Event Diagram 72 | 73 | Here's a diagram of an event source. 74 | 75 | ![redis picture](../../docs/redis_event_source.png) 76 | 77 | We continuously push information information into a key, at the tail end of the list. A more recent version of the event sourcing system uses `zlists` to query timing. This actually allows us to have better time indexing. 78 | 79 | # Double layer storage system 80 | 81 | The Jamboree system uses a double layer storage system. The general idea behind it is that the top layer stores information largely in memory, then the bottom layer stores information in long-term memory. We'll periodically pull records from the hard storage database and repopulate the in-memory database. 82 | 83 | ![redis picture](../../docs/redis_mongo_layer.png) 84 | 85 | 86 | ```python 87 | 88 | ``` 89 | -------------------------------------------------------------------------------- /jamboree/handlers/complex/backtestable/files.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from loguru import logger 3 | 4 | import maya 5 | import pandas as pd 6 | 7 | from jamboree import Jamboree 8 | from jamboree import JamboreeNew 9 | 10 | from jamboree.handlers.complex.meta import MetaHandler 11 | # from jamboree.handlers.complex.metric import MetricHandler 12 | 13 | from jamboree.handlers.default.time import TimeHandler 14 | from jamboree.handlers.default.db import DBHandler 15 | from jamboree.handlers.default import BlobStorageHandler 16 | 17 | class BacktestBlobHandler(BlobStorageHandler): 18 | def __init__(self): 19 | super().__init__() 20 | 21 | 22 | # Other objects to consider 23 | self._time:TimeHandler = TimeHandler() 24 | self._meta:MetaHandler = MetaHandler() 25 | self._episode = uuid.uuid4().hex 26 | 27 | 28 | self._is_live = False 29 | self.is_event = False # use to make sure there's absolutely no duplicate data 30 | 31 | # Create a context for this to remove complete dependency from the model handler. 32 | 33 | @property 34 | def episode(self) -> str: 35 | return self._episode 36 | 37 | @episode.setter 38 | def episode(self, _episode:str): 39 | self._episode = _episode 40 | 41 | @property 42 | def live(self) -> bool: 43 | return self._is_live 44 | 45 | @live.setter 46 | def live(self, _live:bool): 47 | self._is_live = _live 48 | 49 | @property 50 | def time(self) -> 'TimeHandler': 51 | # self._time.event = self.event 52 | self._time.processor = self.processor 53 | self._time['episode'] = self.episode 54 | self._time['live'] = self.live 55 | return self._time 56 | 57 | @time.setter 58 | def time(self, _time:'TimeHandler'): 59 | self._time = _time 60 | 61 | 62 | def reset(self): 63 | """ Reset the data we're querying for. """ 64 | self.time.reset() 65 | 66 | 67 | 68 | def __str__(self) -> str: 69 | name = self["name"] 70 | category = self["category"] 71 | subcategories = self["subcategories"] 72 | jscat = self.main_helper.generate_hash(subcategories) 73 | return f"{name}:{category}:{jscat}" -------------------------------------------------------------------------------- /jamboree/handlers/complex/backtestable/default/files.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from loguru import logger 3 | 4 | import maya 5 | import pandas as pd 6 | 7 | from jamboree import Jamboree 8 | from jamboree import JamboreeNew 9 | 10 | from jamboree.handlers.complex.meta import MetaHandler 11 | # from jamboree.handlers.complex.metric import MetricHandler 12 | 13 | from jamboree.handlers.default.time import TimeHandler 14 | from jamboree.handlers.default.db import DBHandler 15 | from jamboree.handlers.default import BlobStorageHandler 16 | 17 | class BacktestBlobHandler(BlobStorageHandler): 18 | def __init__(self): 19 | super().__init__() 20 | 21 | 22 | # Other objects to consider 23 | self._time:TimeHandler = TimeHandler() 24 | self._meta:MetaHandler = MetaHandler() 25 | self._episode = uuid.uuid4().hex 26 | 27 | 28 | self._is_live = False 29 | self.is_event = False # use to make sure there's absolutely no duplicate data 30 | 31 | # Create a context for this to remove complete dependency from the model handler. 32 | 33 | @property 34 | def episode(self) -> str: 35 | return self._episode 36 | 37 | @episode.setter 38 | def episode(self, _episode:str): 39 | self._episode = _episode 40 | 41 | @property 42 | def live(self) -> bool: 43 | return self._is_live 44 | 45 | @live.setter 46 | def live(self, _live:bool): 47 | self._is_live = _live 48 | 49 | @property 50 | def time(self) -> 'TimeHandler': 51 | # self._time.event = self.event 52 | self._time.processor = self.processor 53 | self._time['episode'] = self.episode 54 | self._time['live'] = self.live 55 | return self._time 56 | 57 | @time.setter 58 | def time(self, _time:'TimeHandler'): 59 | self._time = _time 60 | 61 | 62 | def reset(self): 63 | """ Reset the data we're querying for. """ 64 | self.time.reset() 65 | 66 | 67 | 68 | def __str__(self) -> str: 69 | name = self["name"] 70 | category = self["category"] 71 | subcategories = self["subcategories"] 72 | jscat = self.main_helper.generate_hash(subcategories) 73 | return f"{name}:{category}:{jscat}" -------------------------------------------------------------------------------- /jamboree/middleware/procedures/models/_sklearn.py: -------------------------------------------------------------------------------- 1 | from addict import Dict 2 | from loguru import logger 3 | from sklearn.base import BaseEstimator 4 | from sklearn.datasets import make_friedman2 5 | from sklearn.gaussian_process import GaussianProcessRegressor 6 | from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel 7 | from jamboree.middleware.procedures import ModelProcedureAbstract 8 | 9 | 10 | 11 | class SklearnProcedure(ModelProcedureAbstract): 12 | def __init__(self, *args, **kwargs) -> None: 13 | super().__init__() 14 | self.requirements.model = True 15 | self.requirements.criterion = False 16 | self.requirements.optimizer = False 17 | self.types.model = BaseEstimator 18 | self.changed = False 19 | 20 | 21 | 22 | @property 23 | def model(self) -> BaseEstimator: 24 | self.verify() 25 | return self.dictionary.model 26 | 27 | def set_params(self, **params): 28 | self.changed = True 29 | self.model.set_params(**params) 30 | 31 | @logger.catch 32 | def get_params(self): 33 | return self.model.get_params() 34 | 35 | @logger.catch 36 | def predict(self, X, **kwargs): 37 | return self.model.predict(X, **kwargs) 38 | 39 | @logger.catch 40 | def predict_proba(self, X, **kwargs): 41 | prediction = self.model.predict_proba(X, **kwargs) 42 | return prediction 43 | 44 | @logger.catch 45 | def partial_fit(self, X, y, **kwargs): 46 | self.changed = True 47 | self.model.partial_fit(X, y, **kwargs) 48 | 49 | def fit(self, X, y, **kwargs): 50 | self.changed = True 51 | self.model.fit(X, y, **kwargs) 52 | 53 | class CustomSklearnGaussianProcedure(SklearnProcedure): 54 | def __init__(self, *args, **kwargs) -> None: 55 | super().__init__(*args, **kwargs) 56 | kernel = DotProduct() + WhiteKernel() 57 | gpr = GaussianProcessRegressor(kernel=kernel, random_state=0) 58 | self.dictionary.model = gpr 59 | 60 | 61 | if __name__ == "__main__": 62 | general_procedure = CustomSklearnGaussianProcedure() 63 | X, y = make_friedman2(n_samples=500, noise=0, random_state=0) 64 | general_procedure.fit(X, y) 65 | print(general_procedure.predict(X[:2,:], return_std=True)) 66 | # print(general_procedure.get_params()) 67 | print(general_procedure.extract()) 68 | 69 | -------------------------------------------------------------------------------- /jamboree/storage/files/core.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | from abc import ABC 3 | from typing import Any, List 4 | from addict import Dict 5 | from version_query import VersionComponent 6 | import hashlib 7 | 8 | class FileStorageConnection(ABC): 9 | def __init__(self, **kwargs) -> None: 10 | self._connection = None 11 | self._settings = Dict() 12 | self._settings.overwrite = False 13 | self._settings.sig_key = kwargs.get("signature", "basic_key") 14 | 15 | self._settings.preferences.by = "latest" 16 | self._settings.preferences.limit = 500 17 | self._settings.preferences.version = None 18 | self._settings.default.version = "0.0.1" 19 | self._settings.default.increment = VersionComponent.Patch 20 | 21 | 22 | 23 | 24 | @property 25 | def conn(self): 26 | if self._connection is None: 27 | raise AttributeError("You haven't added a main database connection as of yet.") 28 | return self._connection 29 | 30 | @conn.setter 31 | def conn(self, _conn): 32 | self._connection = _conn 33 | 34 | @property 35 | def settings(self): 36 | return self._settings 37 | 38 | @settings.setter 39 | def settings(self, _settings:Dict): 40 | copied = self._settings 41 | copied.update(_settings) 42 | self.valid_settings(copied) 43 | self._settings = copied 44 | 45 | def valid_settings(self, _settings): 46 | if self._settings.preferences.by not in ["latest", "many", "all", "version"]: 47 | raise ValueError("We must query within a given range of types: 'latest', 'all', 'version'") 48 | 49 | if self._settings.preferences.by == "version" and self._settings.preferences.version is None: 50 | raise AttributeError("If you're querying by version, you have to include a version number (string_format)") 51 | 52 | @property 53 | def is_overwrite(self) -> bool: 54 | return self._settings.overwrite 55 | 56 | 57 | """ Main Commands """ 58 | 59 | def save(self, query:dict, obj:Any, **kwargs): 60 | raise NotImplementedError("save not implemented") 61 | 62 | def query(self, query, **kwargs): 63 | raise NotImplementedError("query not implemented") 64 | 65 | def delete(self, query, **kwargs): 66 | raise NotImplementedError("delete_latest not implemented") 67 | 68 | def absolute_exists(self, query, **kwargs): 69 | raise NotImplementedError -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Pipfile 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Testing and examples 38 | test/ 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | Pipfile.lock 97 | 98 | # pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | 136 | .vscode/ -------------------------------------------------------------------------------- /jamboree/handlers/complex/backtestable/default/db.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from loguru import logger 3 | 4 | import maya 5 | import pandas as pd 6 | 7 | from jamboree import Jamboree 8 | from jamboree import JamboreeNew 9 | 10 | from jamboree.handlers.complex.meta import MetaHandler 11 | # from jamboree.handlers.complex.metric import MetricHandler 12 | 13 | from jamboree.handlers.default.time import TimeHandler 14 | from jamboree.handlers.default.db import DBHandler 15 | from jamboree.handlers.default import BlobStorageHandler 16 | 17 | class BacktestDBHandler(DBHandler): 18 | """ 19 | # BACKTEST HANDLER 20 | --- 21 | 22 | A way to load in time and metadata information into classes that already use DB handler. 23 | 24 | If we're using blobhandler use object below 25 | 26 | 27 | """ 28 | 29 | def __init__(self): 30 | super().__init__() 31 | 32 | 33 | # Other objects to consider 34 | self._time:TimeHandler = TimeHandler() 35 | self._meta: MetaHandler = MetaHandler() 36 | # self._metrics: MetricHandler = MetricHandler() 37 | self._episode = uuid.uuid4().hex 38 | 39 | 40 | self._is_live = False 41 | self.is_event = False # use to make sure there's absolutely no duplicate data 42 | 43 | @property 44 | def episode(self) -> str: 45 | return self._episode 46 | 47 | @episode.setter 48 | def episode(self, _episode:str): 49 | self._episode = _episode 50 | 51 | @property 52 | def live(self) -> bool: 53 | return self._is_live 54 | 55 | @live.setter 56 | def live(self, _live:bool): 57 | self._is_live = _live 58 | 59 | @property 60 | def time(self) -> 'TimeHandler': 61 | # self._time.event = self.event 62 | self._time.processor = self.processor 63 | self._time['episode'] = self.episode 64 | self._time['live'] = self.live 65 | return self._time 66 | 67 | @time.setter 68 | def time(self, _time:'TimeHandler'): 69 | self._time = _time 70 | 71 | def reset(self): 72 | """ Reset the data we're querying for. """ 73 | # self.reset_current_metric() 74 | # self.metadata.reset() 75 | self.time.reset() 76 | 77 | 78 | 79 | def __str__(self) -> str: 80 | name = self["name"] 81 | category = self["category"] 82 | subcategories = self["subcategories"] 83 | jscat = self.main_helper.generate_hash(subcategories) 84 | return f"{name}:{category}:{jscat}" -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import codecs 4 | import sys 5 | from shutil import rmtree 6 | from setuptools import setup, find_packages, Command 7 | 8 | 9 | here = os.path.abspath(os.path.dirname(__file__)) 10 | 11 | 12 | with open("README.md", "r") as fh: 13 | long_description = fh.read() 14 | 15 | 16 | class UploadCommand(Command): 17 | """Support setup.py publish.""" 18 | 19 | description = "Build and publish the package." 20 | user_options = [] 21 | 22 | @staticmethod 23 | def status(s): 24 | """Prints things in bold.""" 25 | print("\033[1m{0}\033[0m".format(s)) 26 | 27 | def initialize_options(self): 28 | pass 29 | 30 | def finalize_options(self): 31 | pass 32 | 33 | def run(self): 34 | try: 35 | self.status("Removing previous builds…") 36 | rmtree(os.path.join(here, "dist")) 37 | except FileNotFoundError: 38 | pass 39 | self.status("Building Source distribution…") 40 | os.system("{0} setup.py sdist bdist_wheel".format(sys.executable)) 41 | self.status("Uploading the package to PyPi via Twine…") 42 | os.system("twine upload dist/*") 43 | sys.exit() 44 | 45 | 46 | 47 | setup( 48 | name="jamboree", 49 | version="0.9.5", 50 | author="Kevin Hill", 51 | author_email="kah.kevin.hill@gmail.com", 52 | description="A multi-layer event sourcing and general data library. SQL, Search, Event Sourcing, and File/Model storage combined into one.", 53 | long_description=long_description, 54 | long_description_content_type="text/markdown", 55 | py_modules=["jamboree"], 56 | install_requires=[ 57 | "numpy", 58 | "scipy", 59 | 'maya', 60 | "pandas-datareader", 61 | "json-tricks", 62 | "ujson", 63 | 'gym', 64 | "orjson", 65 | "pebble", 66 | "cytoolz", 67 | "loguru", 68 | 'redisearch', 69 | "lz4", 70 | "anycache", 71 | "hiredis", 72 | "eliot", 73 | "eliot-tree", 74 | "matplotlib", 75 | "pandas", 76 | "sklearn", 77 | "crayons", 78 | "creme", 79 | 'pydantic', 80 | "yfinance", 81 | "version_query", 82 | "pandas", 83 | "cerberus", 84 | "addict", 85 | ], 86 | packages=find_packages(), 87 | classifiers=[ 88 | "Programming Language :: Python :: 3", 89 | "License :: OSI Approved :: MIT License", 90 | "Operating System :: OS Independent", 91 | ], 92 | cmdclass={"upload": UploadCommand}, 93 | ) -------------------------------------------------------------------------------- /docs/notebooks/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Dataframe examples\n", 8 | "---" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 16, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import pandas as pd\n", 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 17, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "frame = pd.DataFrame([[1, 2.0, \"a\", \"c\", 0]], columns=[\"A\",\"B\",\"C\",\"D\",\"E\"])" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 23, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "types = frame.dtypes" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 27, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "FLOAT\n", 49 | "FLOAT\n", 50 | "FLOAT\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "for k, v in types.items():\n", 56 | " if np.issubdtype(v, np.number):\n", 57 | " print(\"FLOAT\")\n", 58 | " continue\n", 59 | " \n", 60 | "# print(k)\n", 61 | "# print(v)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [] 77 | } 78 | ], 79 | "metadata": { 80 | "kernelspec": { 81 | "display_name": "Python 3", 82 | "language": "python", 83 | "name": "python3" 84 | }, 85 | "language_info": { 86 | "codemirror_mode": { 87 | "name": "ipython", 88 | "version": 3 89 | }, 90 | "file_extension": ".py", 91 | "mimetype": "text/x-python", 92 | "name": "python", 93 | "nbconvert_exporter": "python", 94 | "pygments_lexer": "ipython3", 95 | "version": "3.6.8" 96 | }, 97 | "varInspector": { 98 | "cols": { 99 | "lenName": 16, 100 | "lenType": 16, 101 | "lenVar": 40 102 | }, 103 | "kernels_config": { 104 | "python": { 105 | "delete_cmd_postfix": "", 106 | "delete_cmd_prefix": "del ", 107 | "library": "var_list.py", 108 | "varRefreshCmd": "print(var_dic_list())" 109 | }, 110 | "r": { 111 | "delete_cmd_postfix": ") ", 112 | "delete_cmd_prefix": "rm(", 113 | "library": "var_list.r", 114 | "varRefreshCmd": "cat(var_dic_list()) " 115 | } 116 | }, 117 | "types_to_exclude": [ 118 | "module", 119 | "function", 120 | "builtin_function_or_method", 121 | "instance", 122 | "_Feature" 123 | ], 124 | "window_display": false 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 4 129 | } 130 | -------------------------------------------------------------------------------- /docs/notebooks/Verification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 33, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from cerberus import Validator" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from cerberus import Validator\n", 19 | "\n", 20 | "class JamboreeValidator(Validator):\n", 21 | " def _validate_isodd(self, isodd, field, value):\n", 22 | " \"\"\" Test the oddity of a value.\n", 23 | "\n", 24 | " The rule's arguments are validated against this schema:\n", 25 | " {'type': 'boolean'}\n", 26 | " \"\"\"\n", 27 | " if isodd and not bool(value & 1):\n", 28 | " self._error(field, \"Must be an odd number\")" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 28, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "_global_validator = Validator(allow_unknown=True)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 29, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "def check_for_sub_query_dict(_dict:dict):\n", 47 | " for v in _dict.values():\n", 48 | " if isinstance(v, dict):\n", 49 | " pass" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 36, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "# Check to see if the subkeys are of the given types. \n", 59 | "# If it's a dict. Make sure to check to see\n", 60 | "example_all_of_schema = {\n", 61 | " \"keyrules\": {\"type\": \"string\"},\n", 62 | " \"valuesrules\": {\n", 63 | " \"type\": ['string', 'boolean', 'number', 'dict', 'list']\n", 64 | " }\n", 65 | "}" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 37, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/plain": [ 76 | "True" 77 | ] 78 | }, 79 | "execution_count": 37, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "_global_validator.validate({\"1\":\"two\"},example_all_of_schema)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [] 94 | } 95 | ], 96 | "metadata": { 97 | "kernelspec": { 98 | "display_name": "Python 3", 99 | "language": "python", 100 | "name": "python3" 101 | }, 102 | "language_info": { 103 | "codemirror_mode": { 104 | "name": "ipython", 105 | "version": 3 106 | }, 107 | "file_extension": ".py", 108 | "mimetype": "text/x-python", 109 | "name": "python", 110 | "nbconvert_exporter": "python", 111 | "pygments_lexer": "ipython3", 112 | "version": "3.7.3" 113 | } 114 | }, 115 | "nbformat": 4, 116 | "nbformat_minor": 4 117 | } 118 | -------------------------------------------------------------------------------- /jamboree/storage/databases/database.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from jamboree.utils.helper import Helpers 3 | from pebble.pool import ThreadPool 4 | from multiprocessing import cpu_count 5 | from typing import Union, Optional 6 | from redis import Redis 7 | from redis.client import Pipeline 8 | 9 | 10 | class DatabaseConnection(ABC): 11 | def __init__(self) -> None: 12 | self._connection: Optional[Union[Redis, Pipeline]] = None 13 | self.helpers = Helpers() 14 | self._pool = ThreadPool(max_workers=(cpu_count() * 2)) 15 | 16 | @property 17 | def connection(self) -> Union[Redis, Pipeline]: 18 | if self._connection is None: 19 | raise AttributeError( 20 | "You haven't added a main database connection as of yet." 21 | ) 22 | return self._connection 23 | 24 | @connection.setter 25 | def connection(self, _conn: Union[Redis, Pipeline]): 26 | self._connection = _conn 27 | 28 | @property 29 | def pool(self) -> ThreadPool: 30 | return self._pool 31 | 32 | @pool.setter 33 | def pool(self, _pool: ThreadPool): 34 | self.pool = _pool 35 | 36 | """ Save commands """ 37 | 38 | def save(self, query): 39 | raise NotImplementedError("save not implemented") 40 | 41 | def save_many(self, query): 42 | raise NotImplementedError("save_many not implemented") 43 | 44 | """ 45 | Update commands 46 | """ 47 | 48 | def update_single(self, query): 49 | raise NotImplementedError("update_single not implemented") 50 | 51 | def update_many(self, query): 52 | raise NotImplementedError("update_many not implemented") 53 | 54 | """ 55 | Delete Commands 56 | """ 57 | 58 | def delete(self, query): 59 | raise NotImplementedError("delete function not implemented yet.") 60 | 61 | def delete_many(self, query): 62 | raise NotImplementedError("delete_many function not implemented yet.") 63 | 64 | def delete_all(self, query): 65 | raise NotImplementedError("delete_all not implemented") 66 | 67 | """ 68 | Query commands 69 | """ 70 | 71 | def query_latest(self): 72 | raise NotImplementedError("query_latest not implemented") 73 | 74 | def query_latest_many(self): 75 | raise NotImplementedError("query_latest_many not implemented") 76 | 77 | def query_between(self): 78 | raise NotImplementedError("query_between not implemented") 79 | 80 | def query_before(self): 81 | raise NotImplementedError("query_before not implemented") 82 | 83 | def query_after(self): 84 | raise NotImplementedError("query_after not implemented") 85 | 86 | def query_all(self): 87 | pass 88 | 89 | """ Other Functions """ 90 | 91 | def reset(self, query): 92 | raise NotImplementedError("delete_all not implemented") 93 | 94 | def count(self): 95 | raise NotImplementedError("update_many not implemented") 96 | 97 | def general_lock(self, query: dict): 98 | raise NotImplementedError("general_lock not implemented") 99 | 100 | -------------------------------------------------------------------------------- /jamboree/middleware/procedures/models/_creme.py: -------------------------------------------------------------------------------- 1 | from addict import Dict 2 | from sklearn.base import BaseEstimator 3 | from jamboree.middleware.procedures import ModelProcedureAbstract 4 | from sklearn.datasets import make_friedman2 5 | from sklearn.gaussian_process import GaussianProcessRegressor 6 | from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel 7 | from loguru import logger 8 | 9 | 10 | 11 | 12 | class CremeProcedure(ModelProcedureAbstract): 13 | def __init__(self, *args, **kwargs) -> None: 14 | super().__init__() 15 | self.requirements.model = True 16 | self.requirements.criterion = False 17 | self.requirements.optimizer = False 18 | 19 | # types = Dict() 20 | # types.model = BaseEstimator 21 | 22 | self.types.model = BaseEstimator 23 | 24 | @logger.catch 25 | def get_params(self): 26 | self.verify() 27 | return self.dictionary.model.get_params() 28 | 29 | @logger.catch 30 | def predict(self, X, **kwargs): 31 | self.verify() 32 | return self.dictionary.model.predict(X, **kwargs) 33 | 34 | @logger.catch 35 | def predict_prob(self, X, **kwargs): 36 | self.verify() 37 | return self.dictionary.model.predict_prob(X, **kwargs) 38 | 39 | @logger.catch 40 | def partial_fit(self, X, y, **kwargs): 41 | self.verify() 42 | self.dictionary.model.partial_fit(X, y, **kwargs) 43 | 44 | def fit(self, X, y, **kwargs): 45 | self.verify() 46 | self.dictionary.model.fit(X, y, **kwargs) 47 | # print(self.mdict.model.predict(X[:2,:], return_std=True)) 48 | 49 | 50 | 51 | def main(): 52 | import datetime as dt 53 | from creme import compose 54 | from creme import datasets 55 | from creme import feature_extraction 56 | from creme import linear_model 57 | from creme import metrics as metricss 58 | from creme import preprocessing 59 | from creme import stats 60 | from creme import stream 61 | 62 | 63 | X_y = datasets.Bikes() 64 | X_y = stream.simulate_qa(X_y, moment='moment', delay=dt.timedelta(minutes=30)) 65 | 66 | def add_time_features(x): 67 | return { 68 | **x, 69 | 'hour': x['moment'].hour, 70 | 'day': x['moment'].weekday() 71 | } 72 | 73 | model = add_time_features 74 | model |= ( 75 | compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind') + 76 | feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean()) + 77 | feature_extraction.TargetAgg(by='station', how=stats.EWMean()) 78 | ) 79 | model |= preprocessing.StandardScaler() 80 | model |= linear_model.LinearRegression() 81 | 82 | metric = metricss.MAE() 83 | 84 | questions = {} 85 | 86 | for i, x, y in X_y: 87 | # Question 88 | is_question = y is None 89 | if is_question: 90 | y_pred = model.predict_one(x) 91 | questions[i] = y_pred 92 | 93 | # Answer 94 | else: 95 | metric.update(y, questions[i]) 96 | model = model.fit_one(x, y) 97 | 98 | if i >= 30000 and i % 30000 == 0: 99 | print(i, metric) 100 | 101 | if __name__ == "__main__": 102 | main() -------------------------------------------------------------------------------- /jamboree/base/processors/files.py: -------------------------------------------------------------------------------- 1 | import redis 2 | from redis import Redis 3 | from typing import Optional, Any 4 | from jamboree.utils.helper import Helpers 5 | from jamboree.storage.files.redisify import RedisFileProcessor, RedisFileConnection 6 | from jamboree.base.processors.abstracts import FileProcessor 7 | 8 | 9 | class JamboreeFileProcessor(FileProcessor): 10 | def __init__(self) -> None: 11 | self._redis:Optional[Redis] = None 12 | self._redis_conn = RedisFileConnection() 13 | self.helpers = Helpers() 14 | 15 | @property 16 | def rconn(self) -> redis.client.Redis: 17 | if self._redis is None: 18 | raise AttributeError("You've yet to add a redis connection") 19 | return self._redis 20 | 21 | @rconn.setter 22 | def rconn(self, _redis:redis.client.Redis): 23 | self._redis = _redis 24 | 25 | @property 26 | def redis_conn(self) -> RedisFileConnection: 27 | if self._redis_conn is None: 28 | raise AttributeError("Redis connection hasn't been set") 29 | return self._redis_conn 30 | 31 | @redis_conn.setter 32 | def redis_conn(self, _rconn: RedisFileConnection): 33 | self._redis_conn = _rconn 34 | 35 | def initialize(self): 36 | """ Initialize database connections. Use this so we can use the same connections for search, files, and events. """ 37 | self.redis_conn = RedisFileConnection() 38 | self.redis_conn.conn = self.rconn 39 | 40 | def _validate_query(self, query: dict): 41 | """ Validates a query. Must have `type` and a second identifier at least""" 42 | if 'type' not in query: 43 | return False 44 | if not isinstance(query['type'], str): 45 | return False 46 | if len(query) < 2: 47 | return False 48 | return True 49 | 50 | 51 | """ 52 | These are the basic functions. We'll create more functions to handle the different scenarios. 53 | 54 | NOTE: 55 | * Eventually we'll add rocksdb and a check_local=False flag to explain that we want to search rocksdb first. 56 | * We can add a changed flag into redis. 57 | * If we've changed a model on a different machine we can just say check_local=False 58 | * Otherwise we can set check_local=True and force retrieval from redis. 59 | * If we were to do that, we'd have to separate all of the setup functions inside of the redis handler 60 | """ 61 | 62 | 63 | def save(self, query:dict, obj:Any, **kwargs): 64 | if not self._validate_query(query): 65 | raise ValueError("Query isn't valid") 66 | 67 | self.redis_conn.save(query, obj, **kwargs) 68 | 69 | def query(self, query:dict, **kwargs): 70 | if not self._validate_query(query): 71 | raise ValueError("Query isn't valid") 72 | data = self.redis_conn.query(query, **kwargs) 73 | return data 74 | 75 | def delete(self, query:dict, **kwargs): 76 | if not self._validate_query(query): 77 | raise ValueError("Query isn't valid") 78 | self.redis_conn.delete(query, **kwargs) 79 | 80 | def absolute_exists(self, query: dict, **kwargs): 81 | if not self._validate_query(query): 82 | raise ValueError("Query isn't valid") 83 | return self.redis_conn.absolute_exists(query, **kwargs) 84 | 85 | 86 | -------------------------------------------------------------------------------- /test/experiments/notebooks/File Command Logic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Making These Commands Easier To Handle\n", 8 | "\n", 9 | "I realized in my last file versioning notebook that it was hard to track the logic from the main commands:\n", 10 | "\n", 11 | "1. Save\n", 12 | "2. Query\n", 13 | "3. Latest\n", 14 | "\n", 15 | "It was obfuscated by the version control logic. Therefore I'm moving 100% of that to a second place without adding actual versioning. I think I have the incrementing logic working, and the capacity to save and load data. I'll be adding information.\n", 16 | "\n", 17 | "\n", 18 | "I'll have:\n", 19 | "\n", 20 | "1. Save\n", 21 | "2. Load\n", 22 | "3. Delete\n", 23 | "\n", 24 | "All of them here, as well as a nice projection of the outcomes of each possible activity." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "from redis import Redis\n", 34 | "from jamboree.utils.core import consistent_hash\n", 35 | "from jamboree.utils.support import serialize, deserialize, create_checksum\n", 36 | "from jamboree.utils.core import consistent_hash, consistent_unhash" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Key things we'd be looking for with saving\n", 44 | "\n", 45 | "1. Are we overwriting the latest version if it exist\n", 46 | " * We'd do this to resemble online learning\n", 47 | "2. If we aren't doing that, are we updating the version number with this\n", 48 | "3. Are we saving anything redundant\n", 49 | " * This can't technically be done yet. Need to come up with a consistent way to pickle information." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "# How to determine if we should overwrite?\n", 59 | "# is_overwrite enabled" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [] 68 | } 69 | ], 70 | "metadata": { 71 | "kernelspec": { 72 | "display_name": "Python 3", 73 | "language": "python", 74 | "name": "python3" 75 | }, 76 | "language_info": { 77 | "codemirror_mode": { 78 | "name": "ipython", 79 | "version": 3 80 | }, 81 | "file_extension": ".py", 82 | "mimetype": "text/x-python", 83 | "name": "python", 84 | "nbconvert_exporter": "python", 85 | "pygments_lexer": "ipython3", 86 | "version": "3.7.3" 87 | }, 88 | "varInspector": { 89 | "cols": { 90 | "lenName": 16, 91 | "lenType": 16, 92 | "lenVar": 40 93 | }, 94 | "kernels_config": { 95 | "python": { 96 | "delete_cmd_postfix": "", 97 | "delete_cmd_prefix": "del ", 98 | "library": "var_list.py", 99 | "varRefreshCmd": "print(var_dic_list())" 100 | }, 101 | "r": { 102 | "delete_cmd_postfix": ") ", 103 | "delete_cmd_prefix": "rm(", 104 | "library": "var_list.r", 105 | "varRefreshCmd": "cat(var_dic_list()) " 106 | } 107 | }, 108 | "types_to_exclude": [ 109 | "module", 110 | "function", 111 | "builtin_function_or_method", 112 | "instance", 113 | "_Feature" 114 | ], 115 | "window_display": false 116 | } 117 | }, 118 | "nbformat": 4, 119 | "nbformat_minor": 2 120 | } 121 | -------------------------------------------------------------------------------- /jamboree/middleware/processors/resample.py: -------------------------------------------------------------------------------- 1 | from jamboree.handlers.processors import DataProcessorsAbstract 2 | import pandas as pd 3 | import numpy as np 4 | import scipy.stats as stats 5 | 6 | 7 | class DynamicResample(DataProcessorsAbstract): 8 | def __init__(self, name, **kwargs) -> None: 9 | self.time_info = { 10 | "years": 0, 11 | "months": 0, 12 | "weeks":0, 13 | "days": 0, 14 | "hours": 0, 15 | "minutes": 0, 16 | "seconds": 0 17 | } 18 | self.time_formatting = { 19 | "seconds": "S", 20 | "minutes": "T", 21 | "hours": "H", 22 | "days": "D", 23 | "weeks":"W", 24 | "months": "M", 25 | "years": "Y" 26 | } 27 | 28 | self.base = 0 29 | super().__init__(name, **kwargs) 30 | 31 | 32 | def set_settings(self, **kwargs): 33 | """ Updates the time information""" 34 | self.time_info['seconds'] = kwargs.get("seconds",self.time_info['seconds']) 35 | self.time_info['minutes'] = kwargs.get("minutes",self.time_info['minutes']) 36 | self.time_info['hours'] = kwargs.get("hours",self.time_info['hours']) 37 | self.time_info['days'] = kwargs.get("days",self.time_info['days']) 38 | self.time_info['weeks'] = kwargs.get("weeks",self.time_info['weeks']) 39 | self.time_info['months'] = kwargs.get("months",self.time_info['months']) 40 | self.time_info['years'] = kwargs.get("years",self.time_info['years']) 41 | self.base = kwargs.get("base", self.base) 42 | 43 | def validate_existing_times(self): 44 | checkable_list = self.time_info.values() 45 | all_zero = all(x==0 for x in checkable_list) 46 | if all_zero: 47 | self.time_info['hours'] = 1 48 | 49 | def generate_time_string(self): 50 | self.validate_existing_times() 51 | final_string = "" 52 | for name, time_amount in self.time_info.items(): 53 | if time_amount == 0: 54 | continue 55 | elif time_amount == 1: 56 | final_string = final_string + self.time_formatting.get(name) 57 | continue 58 | 59 | time_format = self.time_formatting.get(name) 60 | final_string = final_string + f"{time_amount}{time_format}" 61 | 62 | return final_string 63 | 64 | def process(self, data: pd.DataFrame) -> pd.DataFrame: 65 | if not (isinstance(data, pd.DataFrame)): 66 | return pd.DataFrame() 67 | 68 | if data.empty: 69 | return pd.DataFrame() 70 | # Do preprocessing here 71 | 72 | dtypes = data.dtypes 73 | aggregate_command = {} 74 | for k, v in dtypes.items(): 75 | if np.issubdtype(v, np.number): 76 | command = {k:'mean'} 77 | aggregate_command.update(command) 78 | continue 79 | else: 80 | command = {k: lambda x: stats.mode(x)[0]} 81 | aggregate_command.update(command) 82 | continue 83 | 84 | rule = self.generate_time_string() 85 | 86 | if self.base == 0: 87 | resampled = data.resample(rule).apply(aggregate_command) 88 | return resampled 89 | resampled = data.resample(rule, base=self.base).apply(aggregate_command) 90 | return resampled 91 | 92 | if __name__ == "__main__": 93 | import pandas_datareader.data as web 94 | data_msft = web.DataReader('MSFT','yahoo',start='2008/1/1',end='2020/3/8').round(2) 95 | mrsample = DynamicResample("modin", days=7) 96 | 97 | remicro = mrsample.process(data_msft) -------------------------------------------------------------------------------- /jamboree/handlers/processors/resample.py: -------------------------------------------------------------------------------- 1 | from jamboree.handlers.processors import DataProcessorsAbstract 2 | import pandas as pd 3 | import numpy as np 4 | import scipy.stats as stats 5 | 6 | 7 | class DynamicResample(DataProcessorsAbstract): 8 | def __init__(self, name, **kwargs) -> None: 9 | self.time_info = { 10 | "years": 0, 11 | "months": 0, 12 | "weeks":0, 13 | "days": 0, 14 | "hours": 0, 15 | "minutes": 0, 16 | "seconds": 0 17 | } 18 | self.time_formatting = { 19 | "seconds": "S", 20 | "minutes": "T", 21 | "hours": "H", 22 | "days": "D", 23 | "weeks":"W", 24 | "months": "M", 25 | "years": "Y" 26 | } 27 | 28 | self.base = 0 29 | super().__init__(name, **kwargs) 30 | 31 | 32 | def set_settings(self, **kwargs): 33 | """ Updates the time information""" 34 | self.time_info['seconds'] = kwargs.get("seconds",self.time_info['seconds']) 35 | self.time_info['minutes'] = kwargs.get("minutes",self.time_info['minutes']) 36 | self.time_info['hours'] = kwargs.get("hours",self.time_info['hours']) 37 | self.time_info['days'] = kwargs.get("days",self.time_info['days']) 38 | self.time_info['weeks'] = kwargs.get("weeks",self.time_info['weeks']) 39 | self.time_info['months'] = kwargs.get("months",self.time_info['months']) 40 | self.time_info['years'] = kwargs.get("years",self.time_info['years']) 41 | self.base = kwargs.get("base", self.base) 42 | 43 | def validate_existing_times(self): 44 | checkable_list = self.time_info.values() 45 | all_zero = all(x==0 for x in checkable_list) 46 | if all_zero: 47 | self.time_info['hours'] = 1 48 | 49 | def generate_time_string(self): 50 | self.validate_existing_times() 51 | final_string = "" 52 | for name, time_amount in self.time_info.items(): 53 | if time_amount == 0: 54 | continue 55 | elif time_amount == 1: 56 | final_string = final_string + self.time_formatting.get(name) 57 | continue 58 | 59 | time_format = self.time_formatting.get(name) 60 | final_string = final_string + f"{time_amount}{time_format}" 61 | 62 | return final_string 63 | 64 | def process(self, data: pd.DataFrame) -> pd.DataFrame: 65 | if not (isinstance(data, pd.DataFrame)): 66 | return pd.DataFrame() 67 | 68 | if data.empty: 69 | return pd.DataFrame() 70 | # Do preprocessing here 71 | 72 | dtypes = data.dtypes 73 | aggregate_command = {} 74 | for k, v in dtypes.items(): 75 | if np.issubdtype(v, np.number): 76 | command = {k:'mean'} 77 | aggregate_command.update(command) 78 | continue 79 | else: 80 | command = {k: lambda x: stats.mode(x)[0]} 81 | aggregate_command.update(command) 82 | continue 83 | 84 | rule = self.generate_time_string() 85 | 86 | if self.base == 0: 87 | resampled = data.resample(rule).apply(aggregate_command) 88 | return resampled 89 | resampled = data.resample(rule, base=self.base).apply(aggregate_command) 90 | return resampled 91 | 92 | if __name__ == "__main__": 93 | import pandas_datareader.data as web 94 | data_msft = web.DataReader('MSFT','yahoo',start='2008/1/1',end='2020/3/8').round(2) 95 | mrsample = DynamicResample("modin", days=7) 96 | 97 | remicro = mrsample.process(data_msft) 98 | print(remicro) 99 | # time_str = mrsample.generate_time_string() 100 | # print(time_str) -------------------------------------------------------------------------------- /examples/user_handler.py: -------------------------------------------------------------------------------- 1 | import maya 2 | from jamboree import Jamboree, DBHandler 3 | import random 4 | from copy import copy 5 | from loguru import logger 6 | 7 | 8 | class UserHandler(DBHandler): 9 | """Abstract handler that we use to keep track of information. 10 | """ 11 | 12 | def __init__(self, **kwargs): 13 | super().__init__() 14 | self.entity = "user" 15 | self.required = { 16 | "user_id": str 17 | } 18 | self._balance = 0 19 | self._limit = 500 20 | self._settings_handler = None 21 | 22 | @property 23 | def limit(self): 24 | """ The maximum number of records we intend to get when calling the many function.""" 25 | return self._limit 26 | 27 | @limit.setter 28 | def limit(self, limit): 29 | self._limit = limit 30 | 31 | @property 32 | def settings(self): 33 | if self._settings_handler is None: 34 | raise AttributeError 35 | return self._settings_handler 36 | 37 | @settings.setter 38 | def settings(self, _settings): 39 | self._settings_handler = _settings 40 | self._settings_handler.limit = self.limit 41 | 42 | def is_authenticated(self): 43 | return True 44 | 45 | def is_active(self): 46 | return True 47 | 48 | def is_anonymous(self): 49 | return False 50 | 51 | def _check_password_register(self, password: str, confirm: str): 52 | """ Run through a set of password conditions""" 53 | return password == confirm 54 | 55 | def register(self, password: str, confirm: str, first: str, middle: str, last: str): 56 | first = str.capitalize(first) 57 | middle = str.capitalize(middle) 58 | last = str.capitalize(last) 59 | 60 | is_match = self._check_password_register(password, confirm) 61 | if is_match: 62 | logger.debug("Passwords are valid") 63 | 64 | def login(self, password: str): 65 | pass 66 | 67 | def logout(self): 68 | pass 69 | 70 | def session(self): 71 | pass 72 | 73 | def deactivate(self): 74 | pass 75 | 76 | def reactivate(self): 77 | pass 78 | 79 | # -------------------------------------------------------- 80 | # --------------------- Counting ------------------------- 81 | # -------------------------------------------------------- 82 | 83 | # Use to get counts inside of the database 84 | 85 | def user_record_count(self) -> int: 86 | count = self.count() 87 | return count 88 | 89 | # -------------------------------------------------------- 90 | # --------------------- Querying ------------------------- 91 | # -------------------------------------------------------- 92 | 93 | def latest_user(self): 94 | """ Get the latest user record """ 95 | last_state = self.last() 96 | return last_state 97 | 98 | def many_user(self): 99 | latest_user_records = self.many(self.limit) 100 | return latest_user_records 101 | 102 | # -------------------------------------------------------- 103 | # ----------------------- Saving ------------------------- 104 | # -------------------------------------------------------- 105 | 106 | def save_user(self, data: dict): 107 | query = copy.copy(self._query) 108 | query.update(data) 109 | query['time'] = maya.now()._epoch 110 | query['type'] = self.entity 111 | query['timestamp'] = maya.now()._epoch 112 | self.save(data) 113 | 114 | 115 | def flip(n=0.02): 116 | if n >= random.uniform(0, 1): 117 | return True 118 | return False 119 | 120 | 121 | if __name__ == "__main__": 122 | user_handler = UserHandler() 123 | user_handler['user_id'] = "mygeneralemail@gmail.com" 124 | user_handler.register("password1", "password1", "kevin", "andrew", "hill") 125 | user_handler.login("password1") 126 | -------------------------------------------------------------------------------- /jamboree/middleware/procedures/models/_torch.py: -------------------------------------------------------------------------------- 1 | from addict import Dict 2 | from loguru import logger 3 | from typing import Optional 4 | 5 | from sklearn.base import BaseEstimator 6 | from sklearn.datasets import make_friedman2 7 | from sklearn.gaussian_process import GaussianProcessRegressor 8 | from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel 9 | from jamboree.middleware.procedures import ModelProcedureAbstract 10 | 11 | from skorch.net import NeuralNet 12 | 13 | from torch.nn import Module 14 | from torch.nn.modules.loss import _Loss 15 | from torch.optim import Optimizer 16 | from torch.optim import Adam 17 | import numpy as np 18 | from sklearn.datasets import make_classification 19 | from torch import nn 20 | import torch.nn.functional as F 21 | 22 | from skorch import NeuralNetClassifier 23 | 24 | 25 | class TorchProcedure(ModelProcedureAbstract): 26 | def __init__(self, *args, **kwargs) -> None: 27 | super().__init__() 28 | self.requirements.model = True 29 | self.requirements.criterion = True 30 | self.requirements.optimizer = True 31 | self.types.model = nn.Module 32 | self.types.criterion = _Loss 33 | self.types.optimizer = Optimizer 34 | self._compiled_model:Optional[NeuralNet] = None 35 | 36 | @property 37 | def model(self) -> NeuralNet: 38 | self.verify() 39 | if self._compiled_model is None: 40 | _compiled = NeuralNet( 41 | module=self.dictionary.model, 42 | criterion=self.dictionary.criterion, 43 | optimizer=self.dictionary.optimizer, 44 | max_epochs=10, 45 | lr=0.1, 46 | # Shuffle training data on each epoch 47 | iterator_train__shuffle=True, 48 | ) 49 | self._compiled_model = _compiled 50 | return self._compiled_model 51 | 52 | @logger.catch 53 | def set_params(self, **params): 54 | self.changed = True 55 | self.model.set_params(**params) 56 | 57 | @logger.catch 58 | def get_params(self): 59 | return self.model.get_params() 60 | 61 | @logger.catch 62 | def predict(self, X, **kwargs): 63 | return self.model.predict(X, **kwargs) 64 | 65 | @logger.catch 66 | def predict_proba(self, X, **kwargs): 67 | return self.model.predict_proba(X, **kwargs) 68 | 69 | @logger.catch 70 | def partial_fit(self, X, y, **kwargs): 71 | self.changed = True 72 | self.model.partial_fit(X, y, **kwargs) 73 | 74 | def fit(self, X, y, **kwargs): 75 | self.changed = True 76 | self.model.fit(X, y, **kwargs) 77 | 78 | 79 | class MyModule(nn.Module): 80 | def __init__(self, num_units=10, nonlin=F.relu): 81 | super(MyModule, self).__init__() 82 | 83 | self.dense0 = nn.Linear(20, num_units) 84 | self.nonlin = nonlin 85 | self.dropout = nn.Dropout(0.5) 86 | self.dense1 = nn.Linear(num_units, 10) 87 | self.output = nn.Linear(10, 2) 88 | 89 | def forward(self, X, **kwargs): 90 | X = self.nonlin(self.dense0(X)) 91 | X = self.dropout(X) 92 | X = F.relu(self.dense1(X)) 93 | X = F.softmax(self.output(X)) 94 | return X 95 | 96 | 97 | class TestCustomTorchClassifier(TorchProcedure): 98 | def __init__(self, *args, **kwargs): 99 | super().__init__(*args, **kwargs) 100 | self.dictionary.model = MyModule 101 | self.dictionary.optimizer = Adam 102 | self.dictionary.criterion = nn.NLLLoss 103 | 104 | 105 | 106 | 107 | def main(): 108 | 109 | 110 | 111 | X, y = make_classification(1000, 20, n_informative=10, random_state=0) 112 | X = X.astype(np.float32) 113 | y = y.astype(np.int64) 114 | 115 | 116 | net = TestCustomTorchClassifier() 117 | 118 | net.fit(X, y) 119 | for _ in range(10): 120 | y_proba = net.predict_proba(X) 121 | print(y_proba) 122 | 123 | 124 | if __name__ == "__main__": 125 | main() -------------------------------------------------------------------------------- /jamboree/handlers/complex/model.py: -------------------------------------------------------------------------------- 1 | # import random 2 | # import time 3 | # import uuid 4 | # from pprint import pprint 5 | # from typing import Any, List, Optional 6 | 7 | # import maya 8 | # from addict import Dict 9 | # from loguru import logger 10 | 11 | # from jamboree import Jamboree 12 | # from jamboree.handlers.complex.engines import FileEngine 13 | # from jamboree.middleware.procedures import (ModelProcedureAbstract, 14 | # ModelProcedureManagement, 15 | # ProcedureAbstract, 16 | # ProcedureManagement) 17 | # from jamboree.utils.support.search import querying 18 | 19 | # logger.disable(__name__) 20 | 21 | # class ModelEngine(FileEngine): 22 | # """ """ 23 | 24 | # def __init__(self, processor, **kwargs): 25 | # super().__init__(processor=processor, **kwargs) 26 | # self.pm = ModelProcedureManagement() 27 | # self.current_procedure = None 28 | 29 | # def init_specialized(self, **kwargs): 30 | # super().init_specialized(**kwargs) 31 | # self.model_type = kwargs.get("model_type", "sklearn") 32 | 33 | # def open_context(self): 34 | # if not self.file_reset: 35 | # self.reset() 36 | 37 | 38 | # def close_context(self): 39 | # current_model = self.model 40 | # if current_model.changed: 41 | # extracted = current_model.extract() 42 | # self.save_file(extracted) 43 | 44 | # # probably do some sort of metrics stuff :) 45 | 46 | 47 | # def enterable(self): 48 | # """ Return the object we want to enter into """ 49 | # return self.model 50 | 51 | # def custom_post_load(self, item): 52 | # proc = self.procedure 53 | # proc.dictionary = item 54 | # proc.verify() 55 | # self.current_procedure = proc 56 | 57 | 58 | # @property 59 | # def procedure(self) -> 'ModelProcedureAbstract': 60 | # if not self.current_procedure: 61 | # self.current_procedure = self.pm.access(self.model_type) 62 | # logger.success(f"Successfully accessed a procedure: {self.current_procedure}") 63 | # return self.current_procedure 64 | 65 | # @procedure.setter 66 | # def procedure(self, _procedure:'ModelProcedureAbstract'): 67 | # self.current_procedure = _procedure 68 | 69 | # @property 70 | # def model(self): 71 | # if self.current_procedure: 72 | # self.procedure.verify() 73 | # return self.procedure 74 | # raise AttributeError("You haven't added a procedure yet") 75 | 76 | # def file_from_dict(self, item:Dict): 77 | # reloaded = ModelEngine( 78 | # processor=self.processor, 79 | # name=item.name, 80 | # category=item.category, 81 | # subcategories=item.subcategories, 82 | # submetatype=item.submetatype, 83 | # abbreviation=item.abbreviation, 84 | # model_type=item.submetatype 85 | # ) 86 | # return reloaded 87 | 88 | 89 | # def reset(self): 90 | # super().reset() 91 | 92 | 93 | # def file_engine_main(): 94 | # """ 95 | # Creating a generic usage of the file engine instead of only model storage. 96 | 97 | # To test, we're going to entirely duplicate the prior test. 98 | # Only we're going to use generic functions and variables. In essensce, rebuild the `ModelEngine` starting with the file handler 99 | 100 | # """ 101 | 102 | # from jamboree.middleware.procedures.models import CustomSklearnGaussianProcedure 103 | # file_name = uuid.uuid4().hex 104 | # logger.info("Starting file engine experiment") 105 | # logger.info(f"The file name is: {file_name}") 106 | # jamboree_processor = Jamboree() 107 | # with logger.catch(message="There should be no reason for this to fail"): 108 | # # Initialize a file engine 109 | # model_engine = ModelEngine( 110 | # processor=jamboree_processor, 111 | # name=file_name, 112 | # category="machine", 113 | # subcategories={"ml_type": "gaussian"}, 114 | # abbreviation="GAUSS", 115 | # submetatype="sklearn", 116 | # blobfile=CustomSklearnGaussianProcedure(), 117 | # ) 118 | # model_engine.reset() 119 | 120 | # reloaded = model_engine.first(name=file_name) 121 | # while True: 122 | # with reloaded as model: 123 | # logger.debug(model) 124 | 125 | 126 | # if __name__ == "__main__": 127 | # logger.enable(__name__) 128 | # file_engine_main() 129 | -------------------------------------------------------------------------------- /jamboree/handlers/complex/meta.py: -------------------------------------------------------------------------------- 1 | import time 2 | import uuid 3 | from typing import Optional 4 | 5 | import maya 6 | from jamboree import Jamboree 7 | from jamboree.handlers.abstracted.search import MetadataSearchHandler 8 | from jamboree.handlers.default import DBHandler, Access 9 | from loguru import logger 10 | 11 | 12 | class MetaHandler(Access): 13 | """ 14 | # MetaDataHandler 15 | --- 16 | Metadata is "data that provides information about other data". 17 | 18 | The MetaHandler is a way to interact with metadata on each data source we have. 19 | 20 | 21 | It should be used with both the DataHandler an MultDataHandler. As well as any other form of common data we're looking for as well. 22 | It would be there in the event that we would want to figure out properties of data without being forced to directly open the data. 23 | It should also give us the capacity to search for various bits of information (redis_search) in the near future. 24 | 25 | Some usecases of the metadata could include: 26 | 27 | 1. Knowing the type of data we're looking at given some information. 28 | - Time-series 29 | - Machine Learning Model 30 | - Network/Graph Data 31 | - Events 32 | - Log Data 33 | - Meta Record 34 | - A metarecord is a json representative to a complex datatype. 35 | 2. Times the data was initiatied 36 | 3. Time the data was last modified 37 | - Modifications can be as simple as: 38 | - Adding a new ticker or bar for price information 39 | - Partial-Fitting a machine learning model 40 | - Adjusting a weight to a variable 41 | 4. Getting the number of records for a given piece of information 42 | - Very useful if we're trying to plan around how much we're going to do for a piece of information 43 | 5. Determining if such data exist 44 | - We would simply create a complex hash function that's pulled from all dbhandlers representing that data type. 45 | 6. Start and End Time for a given set of records 46 | 7. Location information 47 | - There can be different location information for each piece of information. 48 | - Examples: 49 | - Image weather data 50 | - Market location data 51 | - Social interaction location data 52 | - Login, logout location data 53 | - Creating something flexible for this would probably be a good idea. 54 | """ 55 | def __init__(self): 56 | super().__init__() 57 | self.entity = "metadata" 58 | self.required = { 59 | "name": str, 60 | "category": str, 61 | "metatype": str, 62 | "submetatype": str, 63 | "abbreviation": str, 64 | "subcategories": dict 65 | } 66 | self._search = MetadataSearchHandler() 67 | self._settings = {} 68 | self.is_auto = False 69 | self.description: Optional[str] = None 70 | 71 | @property 72 | def search(self): 73 | metatype = self.metatype 74 | submetatype = self.submetatype 75 | self._search.entity = self.entity 76 | self._search['metatype'] = { 77 | "type": "TEXT", 78 | "is_filter": True, 79 | "values": { 80 | "is_exact": True, 81 | "term": metatype 82 | } 83 | } 84 | self._search['submetatype'] = { 85 | "type": "TEXT", 86 | "is_filter": True, 87 | "values": { 88 | "is_exact": True, 89 | "term": submetatype 90 | } 91 | } 92 | self._search['name'] = self.name 93 | self._search['category'] = self.category 94 | self._search['subcategories'] = self.subcategories 95 | self._search['abbreviation'] = self.abbreviation 96 | self._search.processor = self.processor 97 | return self._search 98 | 99 | def reset(self): 100 | self.check() 101 | qo = self.setup_query() 102 | qo.pop("mtype", None) 103 | if self.description is not None: 104 | qo['description'] = self.description 105 | 106 | return self.search.Create(allow_duplicates=False, 107 | no_overwrite_must_have=True, 108 | **qo) 109 | 110 | 111 | if __name__ == "__main__": 112 | jambo = Jamboree() 113 | metahandler = MetaHandler() 114 | metahandler.event = jambo 115 | metahandler.reset() 116 | -------------------------------------------------------------------------------- /jamboree/utils/support/search/validation.py: -------------------------------------------------------------------------------- 1 | 2 | from cerberus import Validator 3 | from redisearch import TextField, NumericField, TagField, GeoField 4 | # from jamboree.utils.support.search import filtration_schemas 5 | 6 | _global_validator = Validator(require_all=True, allow_unknown=True) 7 | # _filtration_schemas = filtration_schemas() 8 | 9 | class Geo(type): 10 | """ A geolocational type for """ 11 | def __call__(cls): 12 | return cls.__new__(cls) 13 | def __repr__(self): 14 | return "GEO" 15 | 16 | def __str__(self): 17 | return "GEO" 18 | 19 | 20 | def is_nested(d): 21 | return any(isinstance(i,dict) for i in d.values()) 22 | 23 | def is_gen_type(item, _type): 24 | try: 25 | return isinstance(item, _type) or issubclass(item, _type) or item == _type 26 | except: 27 | return False 28 | 29 | def name_match(item:str, name:str): 30 | return item.lower() == name.lower() 31 | 32 | 33 | def is_generic(_k): 34 | return _k in [str, float, int, list, bool] 35 | 36 | 37 | def is_geo(k) -> bool: 38 | if is_gen_type(k, Geo): 39 | return True 40 | 41 | if is_gen_type(k, str): 42 | if name_match(k, "geo"): 43 | return True 44 | return False 45 | 46 | def to_str(i): 47 | """Converts the item to a string version of it""" 48 | if i == bool: 49 | # This will be text that we'll force exact queries on 50 | return "BOOL" 51 | elif i == float or i == int: 52 | return "NUMERIC" 53 | elif i == str: 54 | return "TEXT" 55 | elif i == list: 56 | return "TAG" 57 | 58 | 59 | def to_field(k, v): 60 | if v == "BOOL": 61 | return TextField(k, sortable=True) 62 | elif v == "NUMERIC": 63 | return NumericField(k, sortable=True) 64 | elif v == "TEXT": 65 | return TextField(k) 66 | elif v == "TAG": 67 | return TagField(k) 68 | else: 69 | return GeoField(k) 70 | 71 | 72 | 73 | """ 74 | Dictionary Validation 75 | """ 76 | 77 | 78 | 79 | def is_valid_geo(_dict:dict): 80 | """ That we have the appropiate values """ 81 | schema = { 82 | "long": {"type": "number"}, 83 | "lat": {"type": "number"}, 84 | "distance": {"type": "number", "required":False}, 85 | "metric": {"type": "string", "allowed": ["m","km","mi","ft"], "required":False} 86 | } 87 | return _global_validator.validate(_dict, schema) 88 | 89 | def is_valid_bool(_dict:dict): 90 | """ That we have the appropiate values to create a query function for a boolean """ 91 | schema = { 92 | "toggle": {"type": "boolean"}, 93 | } 94 | return _global_validator.validate(_dict, schema) 95 | 96 | def is_valid_numeric(_dict:dict): 97 | """ That we have the appropiate values to do a numeric query """ 98 | schema = { 99 | "operation": {"type": "string", "allowed": ['greater', 'lesser', 'between', 'exact']}, 100 | "upper": {"type": "number"}, 101 | "lower": {"type": "number"} 102 | } 103 | return _global_validator.validate(_dict, schema) 104 | 105 | def is_valid_tags(_dict:dict): 106 | schema = { 107 | "operation": {"type": "string", "allowed": ['and', 'or']}, 108 | "tags": {"type": "list", "schema": {"type": "string"}}, 109 | } 110 | return _global_validator.validate(_dict, schema) 111 | 112 | def is_valid_text(_dict:dict): 113 | schema = { 114 | "term": {"type": "string"}, 115 | "is_exact": {"type": "boolean", "required":False}, 116 | } 117 | return _global_validator.validate(_dict, schema) 118 | 119 | def is_queryable_dict(_dict:dict): 120 | """ """ 121 | schema = { 122 | "type": { 123 | "type": "string", 124 | "allowed": ["GEO", "TEXT", "BOOL", "NUMERIC", "TAG"] 125 | }, 126 | "is_filter": { 127 | "type": "boolean" 128 | }, 129 | "values": { 130 | "type": "dict" 131 | } 132 | } 133 | return _global_validator.validate(_dict, schema) 134 | 135 | # Specific queryable information 136 | 137 | 138 | def main(): 139 | _search_item = { 140 | "type": "GEO", 141 | "is_filter": False, 142 | "values": { 143 | "long": 33, 144 | "lat": -10, 145 | "distance": 1, 146 | "metric": "km" 147 | } 148 | } 149 | 150 | _numeric_search_item = { 151 | "operation": "between", 152 | "upper": 0, 153 | "lower": 0 154 | } 155 | 156 | _bool_search_values = { 157 | "toggle": True 158 | } 159 | 160 | print(is_queryable_dict(_search_item)) 161 | print(is_valid_numeric(_numeric_search_item)) 162 | print(is_valid_bool(_bool_search_values)) 163 | 164 | 165 | if __name__ == "__main__": 166 | main() -------------------------------------------------------------------------------- /examples/sample_env_refactor.py: -------------------------------------------------------------------------------- 1 | import time 2 | import maya 3 | import uuid 4 | import random 5 | import numpy as np 6 | import pandas as pd 7 | from copy import copy 8 | from loguru import logger 9 | from jamboree import DBHandler 10 | from jamboree import Jamboree 11 | from contextlib import ContextDecorator 12 | from pprint import pprint 13 | 14 | class timecontext(ContextDecorator): 15 | def __enter__(self): 16 | self.start = maya.now()._epoch 17 | return self 18 | 19 | def __exit__(self, *exc): 20 | self.end = maya.now()._epoch 21 | delta = self.end - self.start 22 | print(f"It took {delta}ms") 23 | return False 24 | 25 | 26 | class SampleEnvHandler(DBHandler): 27 | """Abstract handler that we use to keep track of information. 28 | """ 29 | 30 | def __init__(self): 31 | # mongodb_host= "localhost", redis_host="localhost", redis_port=6379 32 | super().__init__() 33 | self.entity = "sample" 34 | self.required = { 35 | "episode": str 36 | } 37 | self._balance = 0 38 | self._limit = 100 39 | self['opt_type'] = "live" 40 | 41 | @property 42 | def limit(self): 43 | return self._limit 44 | 45 | @limit.setter 46 | def limit(self, limit): 47 | self._limit = limit 48 | 49 | @property 50 | def count(self): 51 | return super().count() 52 | 53 | @property 54 | def balance(self): 55 | """ Gets the sum of the last three pop_multiplevalues at set the value """ 56 | return self._balance 57 | 58 | @property 59 | def transactions(self) -> vaex.dataframe: 60 | """ Get the last 100 transactions """ 61 | many_records = self.many(self.limit) 62 | 63 | if isinstance(many_records, dict): 64 | frame = pd.DataFrame(many_records) 65 | transactions_frame = vaex.from_pandas(frame) 66 | return transactions_frame.sort('timestamp', ascending=False) 67 | 68 | if len(many_records) > 0: 69 | frame = pd.DataFrame(many_records) 70 | transactions_frame = vaex.from_pandas(frame) 71 | return transactions_frame.sort('timestamp', ascending=False) 72 | 73 | return vaex.from_pandas(pd.DataFrame()) 74 | 75 | def save_update_recent(self, data: dict): 76 | transactions = self.transactions 77 | count = transactions.count() 78 | new_value = data['value'] + count 79 | data['value'] = int(new_value) 80 | super().save(data) 81 | 82 | def pop_many(self, _limit: int = 1, alt: dict = {}): 83 | return super().pop_many(_limit, alt) 84 | 85 | def copy(self): 86 | new_sample = SampleEnvHandler() 87 | new_sample.data = copy(self.data) 88 | new_sample.required = copy(self.required) 89 | new_sample._required = copy(self._required) 90 | new_sample.limit = copy(self.limit) 91 | new_sample.event_proc = self.event_proc 92 | return new_sample 93 | 94 | 95 | def flip(n=0.02): 96 | if n >= random.uniform(0, 1): 97 | return True 98 | return False 99 | 100 | 101 | def main(): 102 | jambo = Jamboree() 103 | sample_env_handler = SampleEnvHandler() 104 | sample_env_handler.limit = 250 105 | sample_env_handler.processor = jambo 106 | # with timecontext(): 107 | current_time = maya.now()._epoch 108 | mult = 60 109 | 110 | # Create a new set of records and swap to another location to be acted on. 111 | sample_env_handler['episode'] = uuid.uuid1().hex 112 | with timecontext(): 113 | super_index = 0 114 | for _ in range(100): 115 | v1 = random.uniform(0, 12) 116 | sample_env_handler.save({"value": v1, "time": (current_time + (mult * super_index))}) 117 | super_index += 1 118 | 119 | many_list = [] 120 | catch_index_1 = random.randint(super_index-10, super_index+3) 121 | catch_index_2 = random.randint(super_index-10, super_index+3) 122 | last_by_time = (current_time + (mult * catch_index_1)) 123 | last_by_time_2 = (current_time + (mult * catch_index_2)) 124 | for _ in range(10): 125 | item = {"valuesssssss": random.uniform(0, 12), "time": (current_time + (mult * super_index))} 126 | many_list.append(item) 127 | super_index += 1 128 | 129 | sample_env_handler.save_many(many_list) 130 | latest = sample_env_handler.last() 131 | last_by = sample_env_handler.last_by(last_by_time, ar="relative") 132 | last_by_2 = sample_env_handler.last_by(last_by_time_2, ar="relative") 133 | 134 | 135 | t1 = last_by.get('time', time.time()) 136 | t2 = last_by_2.get('time', time.time()) 137 | 138 | logger.info(latest) 139 | logger.success(t1) 140 | logger.error(t2) 141 | 142 | 143 | if __name__ == "__main__": 144 | main() 145 | -------------------------------------------------------------------------------- /jamboree/handlers/complex/backtestable/db.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from loguru import logger 3 | 4 | import maya 5 | import pandas as pd 6 | 7 | from jamboree import Jamboree 8 | from jamboree import JamboreeNew 9 | 10 | from jamboree.handlers.complex.meta import MetaHandler 11 | # from jamboree.handlers.complex.metric import MetricHandler 12 | 13 | from jamboree.handlers.default.time import TimeHandler 14 | from jamboree.handlers.default.db import DBHandler 15 | from jamboree.handlers.default import BlobStorageHandler, Access 16 | 17 | class BacktestDBHandler(DBHandler): 18 | """ 19 | # BACKTEST HANDLER 20 | --- 21 | 22 | A way to load in time and metadata information into classes that already use DB handler. 23 | 24 | If we're using blobhandler use object below 25 | 26 | 27 | """ 28 | 29 | def __init__(self): 30 | super().__init__() 31 | 32 | 33 | # Other objects to consider 34 | self._time:TimeHandler = TimeHandler() 35 | self._meta: MetaHandler = MetaHandler() 36 | # self._metrics: MetricHandler = MetricHandler() 37 | self._episode = uuid.uuid4().hex 38 | 39 | 40 | self._is_live = False 41 | self.is_event = False # use to make sure there's absolutely no duplicate data 42 | 43 | @property 44 | def episode(self) -> str: 45 | return self._episode 46 | 47 | @episode.setter 48 | def episode(self, _episode:str): 49 | self._episode = _episode 50 | 51 | @property 52 | def live(self) -> bool: 53 | return self._is_live 54 | 55 | @live.setter 56 | def live(self, _live:bool): 57 | self._is_live = _live 58 | 59 | @property 60 | def time(self) -> 'TimeHandler': 61 | # self._time.event = self.event 62 | self._time.processor = self.processor 63 | self._time['episode'] = self.episode 64 | self._time['live'] = self.live 65 | return self._time 66 | 67 | @time.setter 68 | def time(self, _time:'TimeHandler'): 69 | self._time = _time 70 | 71 | def reset(self): 72 | """ Reset the data we're querying for. """ 73 | # self.reset_current_metric() 74 | # self.metadata.reset() 75 | self.time.reset() 76 | 77 | 78 | 79 | def __str__(self) -> str: 80 | name = self["name"] 81 | category = self["category"] 82 | subcategories = self["subcategories"] 83 | jscat = self.main_helper.generate_hash(subcategories) 84 | return f"{name}:{category}:{jscat}" 85 | 86 | 87 | class BacktestDBHandlerWithAccess(Access): 88 | """ 89 | # BACKTEST HANDLER With Metadata Accessors 90 | --- 91 | 92 | A way to load in time and metadata information into classes that already use DB handler. 93 | 94 | If we're using blobhandler use object below. 95 | 96 | 97 | """ 98 | 99 | def __init__(self): 100 | super().__init__() 101 | self.required = { 102 | "name": str, 103 | "category": str, 104 | "subcategories": dict, 105 | "metatype": str, 106 | "submetatype": str, 107 | "abbreviation": str, 108 | } 109 | 110 | # Other objects to consider 111 | self._time:TimeHandler = TimeHandler() 112 | self._meta: MetaHandler = MetaHandler() 113 | # self._metrics: MetricHandler = MetricHandler() 114 | self._episode = uuid.uuid4().hex 115 | 116 | 117 | self._is_live = False 118 | self.is_event = False # use to make sure there's absolutely no duplicate data 119 | 120 | @property 121 | def episode(self) -> str: 122 | return self._episode 123 | 124 | @episode.setter 125 | def episode(self, _episode:str): 126 | self._episode = _episode 127 | 128 | @property 129 | def live(self) -> bool: 130 | return self._is_live 131 | 132 | @live.setter 133 | def live(self, _live:bool): 134 | self._is_live = _live 135 | 136 | @property 137 | def time(self) -> 'TimeHandler': 138 | # self._time.event = self.event 139 | self._time.processor = self.processor 140 | self._time['episode'] = self.episode 141 | self._time['live'] = self.live 142 | return self._time 143 | 144 | @time.setter 145 | def time(self, _time:'TimeHandler'): 146 | self._time = _time 147 | 148 | def reset(self): 149 | """ Reset the data we're querying for. """ 150 | # self.reset_current_metric() 151 | # self.metadata.reset() 152 | self.time.reset() 153 | 154 | 155 | 156 | def __str__(self) -> str: 157 | name = self["name"] 158 | category = self["category"] 159 | subcategories = self["subcategories"] 160 | jscat = self.main_helper.generate_hash(subcategories) 161 | return f"{name}:{category}:{jscat}" -------------------------------------------------------------------------------- /jamboree/utils/support/search/assistance/inserter.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import List 3 | from jamboree.utils.support.search import ( is_gen_type, is_generic, is_geo, is_valid_geo, 4 | is_nested, name_match, to_field, 5 | to_str, is_valid_text, is_valid_numeric, is_queryable_dict) 6 | 7 | 8 | 9 | def boolean_process(field, is_true=False): 10 | """ Return a dicionary that has a TEXT value to represent a boolean """ 11 | bstring = "FALSE" 12 | if is_true: 13 | bstring = "TRUE" 14 | return { 15 | field: bstring 16 | } 17 | 18 | def list_process(field, item_list:List[str]): 19 | """ Return a dictionary representing a list of tags""" 20 | # if isinstance(item_list, list): 21 | if len(item_list) == 0: 22 | return { 23 | 24 | } 25 | saved_list = [] 26 | 27 | for i in item_list: 28 | saved_list.append(f"{i}") 29 | return { 30 | field: ",".join(saved_list) 31 | } 32 | 33 | def text_process(field:str, term:str, is_exact=False): 34 | if is_exact: 35 | return { 36 | field: term 37 | } 38 | return { 39 | field:term 40 | } 41 | 42 | def number_process(field, number): 43 | return { 44 | field: number 45 | } 46 | 47 | 48 | 49 | def geo_process_dict(field:str, dictionary:dict): 50 | """ Converts a dictionary into a dictionary string""" 51 | vals = dictionary['values'] 52 | if not is_valid_geo(vals): 53 | return {} 54 | lon = vals.get("long") 55 | lat = vals.get("lat") 56 | return { 57 | field: f"{lon},{lat}" 58 | } 59 | 60 | def num_process_dict(field:str, dictionary:dict): 61 | d_vals = dictionary['values'] 62 | if is_valid_numeric(d_vals): 63 | _operation = d_vals.get("operation") 64 | _upper = d_vals.get("upper") 65 | _lower = d_vals.get("lower") 66 | 67 | if _operation == "greater": 68 | return number_process(field, _upper) 69 | elif _operation == "lesser": 70 | return number_process(field, _lower) 71 | elif _operation == "between": 72 | return number_process(field, _upper) 73 | elif _operation == "exact": 74 | _is_exact = (_upper == _lower) 75 | if _is_exact: 76 | return number_process(field, _upper) 77 | return {} 78 | 79 | def text_process_dict(field, dictionary:dict): 80 | """ Create a simple text field from the dictionary""" 81 | values = dictionary.get("values") 82 | if is_valid_text(values): 83 | is_exact = values.get("is_exact", False) 84 | _term = values.get("term", False) 85 | filtered_term = re.sub('[^a-zA-Z0-9\n\.|\*|\@|\|\_]', ' ', _term) 86 | return text_process(field, filtered_term, is_exact=is_exact) 87 | return { 88 | 89 | } 90 | 91 | 92 | def create_insertable(example:dict): 93 | insertable = {} 94 | for k, v in example.items(): 95 | if isinstance(v, list): 96 | insertable.update(list_process(k, v)) 97 | elif isinstance(v, str): 98 | insertable.update(text_process(k, v)) 99 | elif isinstance(v, bool): 100 | insertable.update(boolean_process(k, v)) 101 | elif isinstance(v, float) or isinstance(v, int): 102 | insertable.update(number_process(k, v)) 103 | elif isinstance(v, dict): 104 | if not is_queryable_dict(v): 105 | continue 106 | if v['type'] == "NUMERIC": 107 | insertable.update(num_process_dict(k, v)) 108 | 109 | if v['type'] == "GEO": 110 | insertable.update(geo_process_dict(k, v)) 111 | 112 | if v['type'] == "TEXT": 113 | insertable.update(text_process_dict(k, v)) 114 | return insertable 115 | 116 | 117 | def main(): 118 | """ Convert a dictionary into an insertable dictionary""" 119 | example = { 120 | "maybe": True, 121 | "gtags": ["one", "two", "three"], 122 | "current": { 123 | "type": "NUMERIC", 124 | "is_filter": True, 125 | "values": { 126 | "lower": 33, 127 | "upper": 0, 128 | "operation": "between" 129 | } 130 | }, 131 | "loc": { 132 | "type": "GEO", 133 | "is_filter": True, 134 | "values": { 135 | "long": 33, 136 | "lat": -10, 137 | "distance": 1.2, 138 | "metric": "km" 139 | } 140 | }, 141 | "exact_text": { 142 | "type": "TEXT", 143 | "is_filter": True, 144 | "values": { 145 | "term": "hello world" 146 | } 147 | }, 148 | } 149 | 150 | 151 | for _ in range(100): 152 | insertable = create_insertable(example) 153 | 154 | print(insertable) 155 | pass 156 | 157 | 158 | if __name__ == "__main__": 159 | main() -------------------------------------------------------------------------------- /jamboree/storage/databases/jmongo.py: -------------------------------------------------------------------------------- 1 | import maya 2 | from loguru import logger 3 | from typing import Dict, List, Any 4 | from jamboree.storage.databases import DatabaseConnection 5 | 6 | 7 | class MongoDatabaseConnection(DatabaseConnection): 8 | def __init__(self) -> None: 9 | super().__init__() 10 | 11 | """ Save commands """ 12 | 13 | def save(self, query: dict, data: dict): 14 | if not self.helpers.validate_query(query): 15 | # Log a warning here instead 16 | return 17 | timestamp = maya.now()._epoch 18 | query.update(data) 19 | query['timestamp'] = timestamp 20 | self.connection.store(query) 21 | 22 | def save_many(self, query: Dict[str, Any], data: List[Dict]): 23 | if not self.helpers.validate_query(query) or len(data) == 0: 24 | return 25 | 26 | first_item = data[0] 27 | first_item.update(query) 28 | updated_list = [self.helpers.update_dict(query, x) for x in data] 29 | self.connection.bulk_upsert(updated_list, _column_first=first_item.keys(), _in=['timestamp']) 30 | 31 | """ 32 | Update commands 33 | """ 34 | 35 | def update_single(self, query): 36 | pass 37 | 38 | def update_many(self, query): 39 | pass 40 | 41 | """ 42 | Delete Commands 43 | """ 44 | 45 | def delete(self, query: dict, details: dict): 46 | if not self.helpers.validate_query(query): 47 | return 48 | 49 | query.update(details) 50 | self.connection.delete(query) 51 | 52 | def delete_many(self, query: dict, details: dict = {}): 53 | if not self.helpers.validate_query(query): 54 | return 55 | 56 | query.update(details) 57 | self.connection.delete_many(query) 58 | 59 | def delete_all(self, query: dict): 60 | logger.info("Same as `delete_many`") 61 | self.delete_many(query) 62 | 63 | """ 64 | Query commands 65 | """ 66 | 67 | def query_latest(self, query: dict, abs_rel="absolute"): 68 | if not self.helpers.validate_query(query): 69 | return {} 70 | latest_items = self.connection.query_last(query) 71 | return latest_items 72 | 73 | def query_latest_many(self, query: dict): 74 | if not self.helpers.validate_query(query): 75 | return [] 76 | latest_items = list(self.connection.query_latest(query)) 77 | return latest_items 78 | 79 | def query_all(self, query: dict): 80 | if not self.helpers.validate_query(query): 81 | return [] 82 | mongo_data = list(self.connection.query(query)) 83 | return mongo_data 84 | 85 | 86 | def query_latest_by_time(self, query:dict, max_epoch:float, abs_rel:str="absolute", limit:int=10): 87 | if not self.helpers.validate_query(query): 88 | return {} 89 | latest_items = self.connection.query_closest(query) 90 | return latest_items 91 | 92 | def query_between(self, query:dict, min_epoch:float, max_epoch:float, abs_rel:str="absolute"): 93 | if not self.helpers.validate_query(query): 94 | return {} 95 | 96 | latest_items = list(self.connection.query_time(query, time_type="window", start=min_epoch)) 97 | if len(latest_items) == 0: 98 | return [] 99 | return latest_items 100 | 101 | def query_before(self, query): 102 | if not self.helpers.validate_query(query): 103 | return [] 104 | mongo_data = list(self.connection.query(query)) 105 | return mongo_data 106 | 107 | def query_after(self, query): 108 | if not self.helpers.validate_query(query): 109 | return [] 110 | mongo_data = list(self.connection.query(query)) 111 | return mongo_data 112 | 113 | """ Swap focused commands""" 114 | 115 | def query_mix_swap(self): 116 | pass 117 | 118 | def swap(self): 119 | pass 120 | 121 | """ 122 | Pop commands 123 | """ 124 | 125 | def pop(self, query: dict): 126 | if not self.helpers.validate_query(query): 127 | return [] 128 | 129 | query['limit'] = 1 130 | item = list(self.connection.query_latest(query)) 131 | if item is not None: 132 | self.connection.delete(item) 133 | return item 134 | 135 | def pop_many(self, query: dict, limit: int = 10): 136 | if not self.helpers.validate_query(query): 137 | return [] 138 | 139 | query['limit'] = limit 140 | items = list(self.connection.query_latest(query)) 141 | if len(items) == 0: 142 | return [] 143 | for item in items: 144 | self.connection.delete(item) 145 | return items 146 | 147 | def get_latest_many_swap(self): 148 | pass 149 | 150 | """ Other Functions """ 151 | 152 | def reset(self, query: dict): 153 | pass 154 | 155 | def count(self, query: dict): 156 | if not self.helpers.validate_query(query): 157 | return 0 158 | query.pop('limit', None) 159 | records = list(self.connection.query(query)) 160 | record_len = len(records) 161 | return record_len 162 | -------------------------------------------------------------------------------- /jamboree/handlers/complex/metric.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from loguru import logger 3 | import maya 4 | import pandas as pd 5 | 6 | 7 | from jamboree import JamboreeNew 8 | from jamboree.handlers.complex.backtestable import BacktestDBHandler 9 | from jamboree.handlers.processors import DynamicResample, DataProcessorsAbstract 10 | from jamboree.utils.core import omit 11 | 12 | 13 | 14 | class MetricHandler(BacktestDBHandler): 15 | """ 16 | # METRIC HANDLER 17 | --- 18 | 19 | A simple metric handler. To be used with all machine learning related functions. 20 | 21 | Given an episode and other crucial information, we'll give all information regarding how something has progressed. 22 | 23 | Some considerations: 24 | 25 | * We'll load the most recent metrics into the metadata handler (which will let us search for the metric) 26 | * We want to know how a batch model has been doing between episodes 27 | * Something that sends aggregation commands for all models that have been touched for a given category, subcategory, name set 28 | * That would also require us to know how to preprocess that information prior to adding it into the database 29 | * We'd also want to not do this too often to reduce CPU initialization load 30 | * A form or rate limiter 31 | 32 | 33 | """ 34 | 35 | def __init__(self): 36 | super().__init__() 37 | self.entity = "metric" 38 | self.required = { 39 | "category": str, 40 | "subcategories": dict, 41 | "name": str 42 | } 43 | 44 | self._preprocessor: DataProcessorsAbstract = DynamicResample("data") 45 | 46 | 47 | @property 48 | def preprocessor(self) -> DataProcessorsAbstract: 49 | return self._preprocessor 50 | 51 | @preprocessor.setter 52 | def preprocessor(self, _preprocessor: DataProcessorsAbstract): 53 | self._preprocessor = _preprocessor 54 | 55 | def log(self, metric_dict:dict): 56 | """ Logs a metrics at the current time """ 57 | current_time = self.time.head 58 | metric_dict['time'] = current_time 59 | metric_dict['timestamp'] = maya.now()._epoch 60 | # Add something here to make this searchable as well. 61 | self.save(metric_dict) 62 | 63 | def latest(self): 64 | """ Get the latest """ 65 | _latest = self.last(ar='relative') 66 | omitted = omit(['episode', 'mtype', 'live', 'category', 'subcategories', 'type', 'name'], _latest) 67 | return omitted 68 | 69 | def reset_current_metric(self): 70 | self['episode'] = self.episode 71 | self['live'] = self.live 72 | 73 | def reset(self): 74 | """ Reset the data we're querying for. """ 75 | super().reset() 76 | self.reset_current_metric() 77 | 78 | def step_time(self): 79 | """ """ 80 | self.time.step() 81 | pass 82 | 83 | 84 | 85 | def __str__(self) -> str: 86 | name = self["name"] 87 | category = self["category"] 88 | subcategories = self["subcategories"] 89 | jscat = self.main_helper.generate_hash(subcategories) 90 | return f"{name}:{category}:{jscat}" 91 | 92 | def metric_test(): 93 | """ Test monitoring an online learning algorithm (using creme). """ 94 | import random 95 | 96 | jambo = JamboreeNew() 97 | metric_log = MetricHandler() 98 | metric_log['category'] = "model" 99 | metric_log['subcategories'] = {} 100 | metric_log['name'] = "general_regressor" 101 | metric_log.processor = jambo 102 | metric_log.reset() 103 | metric_log.time.change_stepsize(hours=0, microseconds=10) 104 | while True: 105 | metric_log.reset_current_metric() 106 | metric_schema = { 107 | "accuracy": random.uniform(0, 1), 108 | "f1": random.uniform(0, 1) 109 | } 110 | metric_log.log(metric_schema) 111 | saved_metric = metric_log.latest() 112 | metric_log.step_time() 113 | print(saved_metric) 114 | 115 | 116 | if __name__ == "__main__": 117 | metric_test() 118 | # import pandas_datareader.data as web 119 | # data_msft = web.DataReader('MSFT','yahoo',start='2010/1/1',end='2020/1/30').round(2) 120 | # data_apple = web.DataReader('AAPL','yahoo',start='2010/1/1',end='2020/1/30').round(2) 121 | # print(data_apple) 122 | # episode_id = uuid.uuid4().hex 123 | # jambo = Jamboree() 124 | # jam_processor = JamboreeNew() 125 | # data_hander = DataHandler() 126 | # data_hander.event = jambo 127 | # data_hander.processor = jam_processor 128 | # # The episode and live parameters are probably not good for the scenario. Will probably need to switch to something else to identify data 129 | # data_hander.episode = episode_id 130 | # data_hander.live = False 131 | # data_hander['category'] = "markets" 132 | # data_hander['subcategories'] = { 133 | # "market": "stock", 134 | # "country": "US", 135 | # "sector": "techologyyyyyyyy" 136 | # } 137 | # data_hander['name'] = "MSFT" 138 | # data_hander.reset() 139 | # data_hander.store_time_df(data_msft, is_bar=True) 140 | 141 | 142 | # data_hander['name'] = "AAPL" 143 | # data_hander.store_time_df(data_apple, is_bar=True) 144 | # data_hander.reset() 145 | 146 | # data_hander.time.head = maya.now().subtract(weeks=200, hours=14)._epoch 147 | # data_hander.time.change_stepsize(microseconds=0, days=1, hours=0) 148 | # data_hander.time.change_lookback(microseconds=0, weeks=4, hours=0) 149 | 150 | 151 | # while data_hander.is_next: 152 | # logger.info(magenta(data_hander.time.head, bold=True)) 153 | # print(data_hander.dataframe_from_head()) 154 | # data_hander.time.step() -------------------------------------------------------------------------------- /jamboree/handlers/abstracted/datasets/price.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import warnings 3 | warnings.simplefilter(action='ignore', category=FutureWarning) 4 | import maya 5 | import pprint 6 | from loguru import logger 7 | from typing import List 8 | from jamboree.handlers.default import DataHandler 9 | from jamboree import Jamboree 10 | from jamboree.utils.support.search import querying 11 | 12 | class PriceData(DataHandler): 13 | """ 14 | # Price Data 15 | 16 | A way to browse and interact with price data. Is an extension of DataHandler and includes basic searches. 17 | 18 | """ 19 | def __init__(self): 20 | super().__init__() 21 | self['category'] = "markets" 22 | self['submetatype'] = "price" 23 | self.sc = "subcategories" # storing the placeholder key to prevent misspelling 24 | # self.cat = "category" # storing variable placeholder key to prevent misspelling 25 | 26 | @property 27 | def markets(self) -> List[str]: 28 | return [ 29 | 'crypto', 'stock', 'commodities', 'forex', 'simulation' 30 | ] 31 | 32 | 33 | 34 | def by_market(self, market_type:str): 35 | """ 36 | # Find All Datasets By Market 37 | 38 | market_type: ['crypto', 'stock', 'commodities', 'forex', 'simulation'] 39 | """ 40 | if market_type not in self.markets: 41 | logger.error(f"Not the correct type: {market_type} must be {self.markets}") 42 | return [] 43 | 44 | _search = self.search 45 | _search[self.sc] = { 46 | "market": market_type 47 | } 48 | # print(_search.query_builder.build()) 49 | return _search.find() 50 | 51 | def by_country(self, country:str): 52 | 53 | """ 54 | # Find All Datasets By Country 55 | 56 | """ 57 | 58 | if not isinstance(country, str): 59 | logger.error("The country is not the string.") 60 | return [] 61 | _search = self.search 62 | _search[self.sc] = { 63 | "country": country, 64 | # "data" 65 | } 66 | return _search.find() 67 | 68 | def by_sector(self, sector:str): 69 | """ 70 | # Find All Datasets By Sector 71 | 72 | """ 73 | 74 | 75 | if not isinstance(sector, str): 76 | logger.error("The sector should be a string.") 77 | return [] 78 | _search = self.search 79 | _search[self.sc] = { 80 | "sector": sector 81 | } 82 | return _search.find() 83 | 84 | def by_name(self, name:str): 85 | """ 86 | # Find All Datasets By Sector 87 | 88 | """ 89 | 90 | 91 | if not isinstance(name, str): 92 | logger.error("The sector should be a string.") 93 | return [] 94 | _search = self.search 95 | _search["name"] = name 96 | return _search.find() 97 | 98 | 99 | def by_exchange(self, name:str): 100 | if not isinstance(name, str): 101 | logger.error("The sector should be a string.") 102 | return [] 103 | _search = self.search 104 | _search[self.sc] = { 105 | "exchange": name 106 | } 107 | return _search.find() 108 | 109 | 110 | def multi_search(self, name=None, country=None, sector=None, market=None, exchange=None, is_exact_subcategory=False): 111 | """ Search with our conventional parameters for our pricing datasets """ 112 | all_variables = {"name": name, "country": country, "sector": sector, "market": market, "exchange": exchange} 113 | _name = None 114 | _subcat_dict = {} 115 | for k, v in all_variables.items(): 116 | if v is None: 117 | continue 118 | if k == "name": 119 | _name = v 120 | continue 121 | if k == "market": 122 | if v not in self.markets: 123 | continue 124 | 125 | if is_exact_subcategory: 126 | _subcat_dict[k] = querying.text.exact(v) 127 | else: 128 | _subcat_dict[k] = v 129 | 130 | is_size = (len(_subcat_dict) == 0) 131 | is_name = (_name is None) 132 | if is_size and is_name: 133 | return [] 134 | 135 | _search = self.search 136 | 137 | if not is_name: 138 | _search["name"] = name 139 | if not is_size: 140 | _search[self.sc] = _subcat_dict 141 | _search.processor = self.processor 142 | return _search.find() 143 | 144 | 145 | def build(self, name:str, abbv:str, country:str="US", sector:str="tech", market:str="stock", exchange:str="binance"): 146 | self['name'] = name 147 | self['abbreviation'] = abbv 148 | self['subcategories'] = { 149 | "market": market, 150 | "country": country, 151 | "sector": sector, 152 | "exchange": exchange, 153 | } 154 | return self 155 | 156 | # def get(self, name=None, country=None, sector=None, ) 157 | 158 | def main(): 159 | import pandas_datareader.data as web 160 | # data_msft = web.DataReader('MSFT','yahoo',start='2019/9/1',end='2020/1/30').round(2) 161 | # data_apple = web.DataReader('AAPL','yahoo',start='2019/9/1',end='2020/1/30').round(2) 162 | episode_id = uuid.uuid4().hex 163 | jambo = Jamboree() 164 | jam_processor = Jamboree() 165 | data_hander = PriceData() 166 | data_hander.processor = jam_processor 167 | trx_tron = data_hander.build("Tron", "TRX", country="Japan", sector="oil", market="commodities", exchange="binance") 168 | # The episode and live parameters are probably not good for the scenario. Will probably need to switch to something else to identify data 169 | trx_tron.episode = episode_id 170 | trx_tron.live = False 171 | trx_tron.reset() 172 | 173 | 174 | res = trx_tron.multi_search(country="jap") 175 | pprint.pprint(res) 176 | 177 | 178 | if __name__ == "__main__": 179 | main() -------------------------------------------------------------------------------- /jamboree/handlers/abstracted/datasets/orderbook.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import warnings 3 | warnings.simplefilter(action='ignore', category=FutureWarning) 4 | import maya 5 | import pprint 6 | from loguru import logger 7 | from typing import List 8 | from jamboree.handlers.default import DataHandler 9 | from jamboree import Jamboree 10 | from jamboree.utils.support.search import querying 11 | # from jamboree.handlers.abstracted.search.meta import MetaDataSearchHandler 12 | class OrderbookData(DataHandler): 13 | """ 14 | # Orderbook Data 15 | 16 | A way to browse and interact with price data. Is an extension of DataHandler and includes basic searches. 17 | 18 | """ 19 | def __init__(self): 20 | super().__init__() 21 | self['subtype'] = "orderbook" 22 | self['category'] = "markets" 23 | 24 | self.sc = "subcategories" # storing the placeholder key to prevent misspelling 25 | self.cat = "category" # storing variable placeholder key to prevent misspelling 26 | 27 | @property 28 | def markets(self) -> List[str]: 29 | return [ 30 | 'crypto', 'stock', 'commodities', 'forex', 'simulation' 31 | ] 32 | 33 | 34 | def by_market(self, market_type:str): 35 | """ 36 | # Find All Datasets By Market 37 | 38 | market_type: ['crypto', 'stock', 'commodities', 'forex', 'simulation'] 39 | """ 40 | if market_type not in self.markets: 41 | logger.error(f"Not the correct type: {market_type} must be {self.markets}") 42 | return [] 43 | 44 | _search = self.search 45 | _search[self.cat] = "markets" 46 | _search[self.sc] = { 47 | "market": market_type 48 | } 49 | # print(_search.query_builder.build()) 50 | return _search.find() 51 | 52 | def by_country(self, country:str): 53 | 54 | """ 55 | # Find All Datasets By Country 56 | 57 | """ 58 | 59 | if not isinstance(country, str): 60 | logger.error("The country is not the string.") 61 | return [] 62 | _search = self.search 63 | _search[self.cat] = "markets" 64 | _search[self.sc] = { 65 | "country": country 66 | } 67 | return _search.find() 68 | 69 | def by_sector(self, sector:str): 70 | """ 71 | # Find All Datasets By Sector 72 | 73 | """ 74 | 75 | 76 | if not isinstance(sector, str): 77 | logger.error("The sector should be a string.") 78 | return [] 79 | _search = self.search 80 | _search[self.cat] = "markets" 81 | _search[self.sc] = { 82 | "sector": sector 83 | } 84 | return _search.find() 85 | 86 | def by_name(self, name:str): 87 | """ 88 | # Find All Datasets By Sector 89 | 90 | """ 91 | 92 | 93 | if not isinstance(name, str): 94 | logger.error("The sector should be a string.") 95 | return [] 96 | _search = self.search 97 | _search[self.cat] = "markets" 98 | _search["name"] = name 99 | return _search.find() 100 | 101 | 102 | 103 | def multi_part_search(self, name=None, country=None, sector=None, market=None, exchange=None): 104 | """ """ 105 | all_variables = {"name": name, "country": country, "sector": sector, "market": market, "exchange": exchange} 106 | _name = None 107 | _subcat_dict = {} 108 | for k, v in all_variables.items(): 109 | if v is None: 110 | continue 111 | if k == "name": 112 | _name = v 113 | continue 114 | 115 | _subcat_dict[k] = querying.text.exact(v) 116 | 117 | is_size = (len(_subcat_dict) == 0) 118 | is_name = (_name is None) 119 | if is_size and is_name: 120 | return [] 121 | 122 | _search = self.search 123 | # _search.reset() 124 | 125 | if not is_name: 126 | logger.warning(name) 127 | _search["name"] = name 128 | _search[self.cat] = "markets" 129 | if not is_size: 130 | _search[self.sc] = _subcat_dict 131 | _search.processor = self.processor 132 | return _search.find() 133 | 134 | def main(): 135 | import pandas_datareader.data as web 136 | # data_msft = web.DataReader('MSFT','yahoo',start='2019/9/1',end='2020/1/30').round(2) 137 | # data_apple = web.DataReader('AAPL','yahoo',start='2019/9/1',end='2020/1/30').round(2) 138 | episode_id = uuid.uuid4().hex 139 | jambo = Jamboree() 140 | jam_processor = Jamboree() 141 | data_hander = OrderbookData() 142 | data_hander.processor = jam_processor 143 | data_hander.event = jambo 144 | # The episode and live parameters are probably not good for the scenario. Will probably need to switch to something else to identify data 145 | data_hander.episode = episode_id 146 | data_hander.live = False 147 | data_hander['subcategories'] = { 148 | "market": "stock", 149 | "country": "Mexico", 150 | "sector": "tech", 151 | "exchange": "binance", 152 | } 153 | data_hander['name'] = "ETH Ethereum" 154 | data_hander.reset() 155 | # data_hander.store_time_df(data_msft, is_bar=True) 156 | 157 | 158 | data_hander['name'] = "BTC Bitcoin" 159 | # data_hander.reset() 160 | # data_hander.store_time_df(data_apple, is_bar=True) 161 | 162 | start = maya.now()._epoch 163 | # res1 = data_hander.by_name("Bitcoin") 164 | 165 | # logger.debug(res1) 166 | end = maya.now()._epoch 167 | logger.info(end-start) 168 | 169 | res = data_hander.multi_part_search(market="stock", exchange="binance", country="Mexico") 170 | logger.warning(res) 171 | 172 | # _search = data_hander.search 173 | # _search['subcategories'] = { 174 | # "market": "stock" 175 | # } 176 | # _search.remove() 177 | # data_hander.time.head = maya.now().subtract(weeks=200, hours=14)._epoch 178 | # data_hander.time.change_stepsize(microseconds=0, days=1, hours=0) 179 | # data_hander.time.change_lookback(microseconds=0, weeks=4, hours=0) 180 | 181 | 182 | # while data_hander.is_next: 183 | # logger.debug(data_hander.time.head) 184 | # print(data_hander.closest_head()) 185 | # data_hander.time.step() 186 | 187 | 188 | if __name__ == "__main__": 189 | main() -------------------------------------------------------------------------------- /jamboree/utils/support/search/core.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import time 4 | import warnings 5 | from copy import copy 6 | from pprint import pprint 7 | from typing import Any, Dict, List 8 | warnings.simplefilter(action='ignore', category=FutureWarning) 9 | 10 | 11 | from addict import Dict as ADict 12 | from cerberus import Validator 13 | from eliot import log_call, to_file 14 | from loguru import logger 15 | from redis.exceptions import ResponseError 16 | from redisearch import Client, Query 17 | 18 | from jamboree.utils.core import consistent_hash 19 | from jamboree.utils.support.search import (InsertBuilder, QueryBuilder, 20 | is_gen_type, is_generic, is_geo, 21 | is_nested, is_queryable_dict, 22 | name_match, to_field, to_str) 23 | from jamboree.utils.support.search.assistance import Keystore 24 | 25 | 26 | logger.disable(__name__) 27 | """ 28 | 29 | # NOTE 30 | 31 | Basic CRUD operations for the search handler. 32 | """ 33 | 34 | REDIS_PORT = int(os.getenv("REDIS_PORT", "6379")) 35 | REDIS_HOST = str(os.getenv("REDIS_HOST", "localhost")) 36 | 37 | def split_doc(doc): 38 | return doc.id, ADict(**doc.__dict__) 39 | 40 | def dictify(doc): 41 | item = ADict(**doc.__dict__) 42 | item.pop("super_id", None) 43 | item.pop("payload", None) 44 | return item 45 | 46 | class BaseSearchHandlerSupport(object): 47 | def __init__(self): 48 | self._requirements_str = { 49 | 50 | } 51 | self._subkey_names = set() 52 | self._indexable = set() 53 | self.__indexable = [] 54 | self._index_key:str = "" 55 | self._sub_fields = {} 56 | self.insert_builder = InsertBuilder() 57 | self.query_builder = QueryBuilder() 58 | self.keystore = Keystore() 59 | self.added = set() 60 | # Boolean explaining if this is a subquery 61 | self.is_sub_key = False 62 | 63 | 64 | @property 65 | def indexable(self): 66 | return self.__indexable 67 | 68 | @property 69 | def subnames(self): 70 | return self._subkey_names 71 | @property 72 | def index(self): 73 | """Index key for the requirements""" 74 | return self._index_key 75 | 76 | @index.setter 77 | def index(self, _index): 78 | """Index key for the requirements""" 79 | self._index_key = _index 80 | 81 | @property 82 | def subfields(self): 83 | return self._sub_fields 84 | 85 | def process_subfields(self): 86 | for key in self.subnames: 87 | self._sub_fields[key] = f"{self.index}:{key}" 88 | 89 | def process_requirements(self, _requirements:dict): 90 | """ 91 | Process the required fields. That includes: 92 | 93 | 1. Creating a requirements string. That's so we can create a key representing the field that exist. 94 | 2. Listing all of the subkeys that we'd need to take in consideration. 95 | 3. Creating an index hash to locate all relavent documents 96 | 4. Creation of a list of fields so we can create a schema at that index hash 97 | 5. Creation of all subkeys so we can quickly access them by name later 98 | 99 | """ 100 | for k, v in _requirements.items(): 101 | if is_generic(v): 102 | sval = to_str(v) 103 | _agg = f"{k}:{sval}" 104 | if _agg not in self.added: 105 | self.added.add(_agg) 106 | self._requirements_str[k] = sval 107 | field = to_field(k, sval) 108 | 109 | self.__indexable.append(field) 110 | continue 111 | 112 | if v == dict: 113 | _agg = f"{k}:SUB" 114 | if _agg not in self.added: 115 | self.added.add(_agg) 116 | self._requirements_str[k] = "SUB" 117 | self.subnames.add(k) 118 | continue 119 | 120 | if is_geo(v): 121 | _agg = f"{k}:GEO" 122 | if _agg not in self.added: 123 | self.added.add(_agg) 124 | self._requirements_str[k] = "GEO" 125 | self.__indexable.append(to_field(k, "GEO")) 126 | 127 | continue 128 | 129 | # self._indexable = set(unique(self._indexable, key=lambda x: x.redis_args()[0])) 130 | if not self.is_sub_key: 131 | self._index_key = consistent_hash(self._requirements_str) 132 | self.process_subfields() 133 | 134 | def is_sub(self, name:str) -> bool: 135 | """ Check to see if this is a subfield """ 136 | return name in self.subnames 137 | 138 | def is_queryable(self, _dict): 139 | if isinstance(_dict, dict): 140 | if is_queryable_dict(_dict): 141 | return True 142 | return False 143 | 144 | def is_valid_sub_key_information(self, subkey_dict:dict): 145 | """ Check to see if the subkey is valid""" 146 | 147 | if len(subkey_dict) == 0: 148 | return False 149 | 150 | 151 | # Run validation to see if all of the keys are reducible to a type and base type 152 | for k, v in subkey_dict.items(): 153 | if is_generic(v): 154 | continue 155 | if isinstance(v, dict): 156 | if not is_queryable_dict(v): 157 | logger.error(f"{k} is not valid") 158 | return False 159 | return True 160 | 161 | def queryable_to_type(self, _dict:dict): 162 | """ Converts a queryable dictionary into a type""" 163 | dtype = _dict['type'] 164 | if dtype == "GEO": 165 | return "GEO" 166 | elif dtype == "TEXT": 167 | return str 168 | elif dtype == "BOOL": 169 | return bool 170 | elif dtype == "NUMERIC": 171 | return float 172 | elif dtype == "TAG": 173 | return list 174 | 175 | def loaded_dict_to_requirements(self, _dict:dict): 176 | """ 177 | # Loaded Dict To Requirements 178 | 179 | Convert a dictionary into a requirements dict. 180 | 181 | Use to create a requirements 182 | 183 | Returns an empty dict if nothing is there. 184 | """ 185 | req = {} 186 | for k, v in _dict.items(): 187 | _ktype = type(v) 188 | if is_generic(_ktype): 189 | req[k] = _ktype 190 | if self.is_queryable(v): 191 | req[k] = self.queryable_to_type(v) 192 | 193 | return req 194 | 195 | 196 | def reset_builders(self): 197 | self.insert_builder = InsertBuilder() 198 | self.query_builder = QueryBuilder() -------------------------------------------------------------------------------- /jamboree/handlers/default/blob.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic storage handler 3 | --- 4 | 5 | """ 6 | 7 | import copy 8 | from typing import Any, Dict, Optional 9 | 10 | import ujson 11 | from addict import Dict as ADict 12 | 13 | from jamboree.base.processors.abstracts import EventProcessor, Processor 14 | from jamboree.handlers.base import BaseFileHandler, BaseHandler 15 | from jamboree.utils.helper import Helpers 16 | 17 | 18 | class BlobStorageHandler(BaseHandler): 19 | """ 20 | A simple event store using a variation of databases. 21 | --- 22 | 23 | Currently uses zadd to work 24 | """ 25 | def __init__(self): 26 | # print("DBHandler") 27 | self._entity = "" 28 | self._meta_type = "storage" 29 | self._required = {} 30 | self._query = {} 31 | self._data = {} 32 | self._is_event = True 33 | self._processor: Optional[Processor] = None 34 | self.event_proc: Optional[EventProcessor] = None 35 | self.main_helper: Helpers = Helpers() 36 | self.changed_since_command: bool = False 37 | self.is_skip_check: bool = False 38 | self.call_params = {} 39 | 40 | def __setitem__(self, key, value): 41 | if bool(self.required): 42 | 43 | if key in self.required: 44 | self._query[key] = value 45 | return self._query 46 | self._data[key] = value 47 | self.changed_since_command = True 48 | 49 | return self._data 50 | 51 | def __getitem__(self, key): 52 | if key in self._query.keys(): 53 | return self._query.get(key, None) 54 | else: 55 | if key in self._data.keys(): 56 | return self._data.get(key, None) 57 | return None 58 | 59 | def setup_query(self, alt={}): 60 | query = copy.copy(self._query) 61 | query["type"] = self.entity 62 | query["mtype"] = self._meta_type 63 | query.update(alt) 64 | query.update(self._data) 65 | return query 66 | 67 | @property 68 | def is_event(self) -> bool: 69 | """ Determines if we're going to add event ids to what we're doing. We can essentially set certain conditions""" 70 | return self._is_event 71 | 72 | @is_event.setter 73 | def is_event(self, is_true: bool = False): 74 | self._is_event = is_true 75 | 76 | @property 77 | def processor(self) -> "Processor": 78 | if self._processor is None: 79 | raise AttributeError("The Processor is missing") 80 | return self._processor 81 | 82 | @processor.setter 83 | def processor(self, _processor: "Processor"): 84 | self._processor = _processor 85 | 86 | def clear_event(self) -> None: 87 | self._processor = None 88 | 89 | @property 90 | def entity(self): 91 | return self._entity 92 | 93 | @entity.setter 94 | def entity(self, _entity: str): 95 | self._entity = str(_entity) 96 | 97 | @property 98 | def required(self): 99 | return self._required 100 | 101 | @required.setter 102 | def required(self, _required: Dict[str, Any]): 103 | # check to make sure it's not empty 104 | self._required = _required 105 | 106 | @property 107 | def query(self): 108 | return self._query 109 | 110 | @query.setter 111 | def query(self, _query: Dict[str, Any]): 112 | if len(_query.keys()) > 0: 113 | self._query = _query 114 | 115 | def check(self): 116 | if ((not bool(self._entity)) or (not bool(self._required)) 117 | or (not bool(self._query))): 118 | raise AttributeError(f"One of the key variables is missing.") 119 | 120 | for req in self._required.keys(): 121 | _type = self._required[req] 122 | if req not in self._query: 123 | raise AttributeError(f"{req} is not in the requirements") 124 | if not isinstance(self._query[req], _type): 125 | raise AttributeError(f"{req} is not a {_type}") 126 | return True 127 | 128 | def save(self, data: dict, alt={}, is_overwrite=False): 129 | self.check() 130 | query = self.setup_query(alt) 131 | # Put settings here 132 | current_settings = ADict() 133 | current_settings.overwrite = is_overwrite 134 | self.processor.storage.save(query, data, **current_settings.to_dict()) 135 | self.changed_since_command = False 136 | 137 | def save_version( 138 | self, data: dict, version: str, alt={}, is_overwrite=False 139 | ): 140 | self.check() 141 | query = self.setup_query(alt) 142 | # Put settings here 143 | current_settings = ADict() 144 | self.processor.storage.save(query, data, **current_settings.to_dict()) 145 | 146 | self.changed_since_command = False 147 | 148 | def absolute_exists(self, alt={}): 149 | self.check() 150 | query = self.setup_query(alt) 151 | # Put settings here 152 | current_settings = ADict() 153 | current_settings.is_force = self.changed_since_command 154 | avs = self.processor.storage.absolute_exists( 155 | query, **current_settings.to_dict() 156 | ) 157 | self.changed_since_command = False 158 | return avs 159 | 160 | def last(self, alt={}): 161 | self.check() 162 | query = self.setup_query(alt) 163 | current_settings = ADict() 164 | self.changed_since_command = False 165 | obj = self.processor.storage.query(query, **current_settings.to_dict()) 166 | return obj 167 | 168 | def by_version(self, version: str, alt={}): 169 | """ Get the data by version. """ 170 | self.check() 171 | query = self.setup_query(alt) 172 | current_settings = ADict() 173 | self.processor.storage.query(query, **current_settings.to_dict()) 174 | self.changed_since_command = False 175 | 176 | def delete(self, query: dict, alt={}): 177 | self.check() 178 | query = self.setup_query(alt) 179 | current_settings = ADict() 180 | 181 | self.processor.storage.delete(query, **current_settings) 182 | self.changed_since_command = False 183 | 184 | def lock(self, alt={}): 185 | self.check() 186 | query = self.setup_query(alt) 187 | self.changed_since_command = False 188 | return self.processor.event.lock(query) 189 | 190 | def clear(self): 191 | """ Clear in-memory cache. Will likely port to rocksdb for many of these parts. """ 192 | self.changed_since_command = True 193 | self.is_skip_check = True 194 | self.call_params = {} 195 | 196 | def __call__(self, **kwargs): 197 | if "alt" in kwargs: 198 | alt = kwargs.get("alt") 199 | if alt is isinstance(alt, dict): 200 | self.call_params["alt"] 201 | 202 | def __enter__(self): 203 | self.check() 204 | self.is_skip_check = True 205 | 206 | return self 207 | 208 | def __exit__(self, exc_type, exc_val, exc_tb): 209 | self.clear() 210 | -------------------------------------------------------------------------------- /jamboree/base/handler.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, ABCMeta 2 | import copy 3 | 4 | from typing import Dict, Any, List 5 | from loguru import logger 6 | from .processor import EventProcessor 7 | 8 | 9 | class BaseHandler(object, metaclass=ABCMeta): 10 | """ 11 | A way to handle reads and writes consistently without having to write every single variable: 12 | """ 13 | 14 | def __init__(self): 15 | pass 16 | 17 | def check(self): 18 | raise NotImplementedError 19 | 20 | def save(self, data: dict): 21 | raise NotImplementedError 22 | 23 | def _bulk_save(self, query: dict, data: list): 24 | raise NotImplementedError 25 | 26 | def _get_many(self): 27 | raise NotImplementedError 28 | 29 | def last(self): 30 | raise NotImplementedError 31 | 32 | def many(self, limit: int = 100): 33 | raise NotImplementedError 34 | 35 | def save_many(self, query: dict, data: list): 36 | raise NotImplementedError 37 | 38 | def pop_multiple(self, query, _limit: int = 1): 39 | raise NotImplementedError 40 | 41 | def swap(self, query, alt: dict = {}): 42 | """ Swap betwen the first and last item """ 43 | raise NotImplementedError 44 | 45 | def query_mix(self, query: dict, alt: dict = {}): 46 | raise NotImplementedError 47 | 48 | 49 | class DBHandler(BaseHandler): 50 | """ 51 | A way to handle reads and writes consistently without having to write every single variable: 52 | """ 53 | 54 | def __init__(self): 55 | # print("DBHandler") 56 | self._entity = "" 57 | self._required = {} 58 | self._query = {} 59 | self.data = {} 60 | self.event_proc = None 61 | 62 | def __setitem__(self, key, value): 63 | if bool(self.required): 64 | if key in self.required: 65 | self._query[key] = value 66 | return self._query 67 | self.data[key] = value 68 | return self.data 69 | 70 | def __getitem__(self, key): 71 | if key in self._query.keys(): 72 | return self._query.get(key, None) 73 | else: 74 | if key in self.data.keys(): 75 | return self.data.get(key, None) 76 | return None 77 | 78 | @property 79 | def event(self): 80 | return self.event_proc 81 | 82 | @event.setter 83 | def event(self, _event: EventProcessor): 84 | # Use to process event 85 | self.event_proc = _event 86 | 87 | @property 88 | def entity(self): 89 | return self._entity 90 | 91 | @entity.setter 92 | def entity(self, _entity: str): 93 | self._entity = str(_entity) 94 | 95 | @property 96 | def required(self): 97 | return self._required 98 | 99 | @required.setter 100 | def required(self, _required: Dict[str, Any]): 101 | # check to make sure it's not empty 102 | self._required = _required 103 | 104 | @property 105 | def query(self): 106 | return self._query 107 | 108 | @required.setter 109 | def query(self, _query: Dict[str, Any]): 110 | if len(_query.keys()) > 0: 111 | self._query = _query 112 | 113 | def check(self): 114 | if self.event_proc is None: 115 | raise AttributeError("Event processor isn't available.") 116 | 117 | if (not bool(self._entity)) or (not bool(self._required)) or (not bool(self._query)): 118 | raise AttributeError(f"One of the key variables is missing.") 119 | 120 | for req in self._required.keys(): 121 | _type = self._required[req] 122 | if req not in self._query: 123 | raise AttributeError(f"{req} is not in the requirements") 124 | if not isinstance(self._query[req], _type): 125 | raise AttributeError(f"{req} is not a {_type}") 126 | return True 127 | 128 | def save(self, data: dict, alt={}): 129 | self.check() 130 | 131 | query = copy.copy(self._query) 132 | query['type'] = self.entity 133 | query.update(alt) 134 | query.update(self.data) 135 | self.event_proc.save(query, data) 136 | 137 | def save_many(self, data: list, alt={}): 138 | self.check() 139 | 140 | query = copy.copy(self._query) 141 | query['type'] = self.entity 142 | # logger.info(query) 143 | query.update(alt) 144 | query.update(self.data) 145 | self.event_proc._bulk_save(query, data) 146 | 147 | def _get_many(self, limit: int, alt={}): 148 | """ Aims to get many variables """ 149 | self.check() 150 | query = copy.copy(self._query) 151 | query['type'] = self.entity 152 | query.update(alt) 153 | query.update(self.data) 154 | latest_many = self.event_proc.get_latest_many(query, limit=limit) 155 | return latest_many 156 | 157 | def _get_latest(self, alt={}): 158 | self.check() 159 | query = copy.copy(self._query) 160 | query['type'] = self.entity 161 | query.update(alt) 162 | query.update(self.data) 163 | latest = self.event_proc.get_latest(query) 164 | return latest 165 | 166 | def last(self, alt={}): 167 | alt.update(self.data) 168 | return self._get_latest(alt) 169 | 170 | def many(self, limit=1000, alt={}): 171 | alt.update(self.data) 172 | return self._get_many(limit, alt=alt) 173 | 174 | def pop(self, alt={}): 175 | query = copy.copy(self._query) 176 | query['type'] = self.entity 177 | query.update(alt) 178 | query.update(self.data) 179 | self.event_proc.remove_first(query) 180 | 181 | def pop_many(self, _limit, alt={}): 182 | query = copy.copy(self._query) 183 | query['type'] = self.entity 184 | query.update(alt) 185 | query.update(self.data) 186 | return self.event_proc.pop_multiple(query, _limit) 187 | 188 | def count(self, alt={}): 189 | """ Aims to get many variables """ 190 | self.check() 191 | query = copy.copy(self._query) 192 | query['type'] = self.entity 193 | query.update(alt) 194 | query.update(self.data) 195 | return self.event_proc.count(query) 196 | 197 | def swap_many(self, limit: int = 10, alt={}): 198 | """ Move items from the main list to a swapped list. """ 199 | self.check() 200 | query = copy.copy(self._query) 201 | query['type'] = self.entity 202 | query.update(alt) 203 | query.update(self.data) 204 | return self.event_proc.multi_swap(query, limit) 205 | 206 | def query_mix(self, limit: int = 10, alt: dict = {}): 207 | self.check() 208 | query = copy.copy(self._query) 209 | query['type'] = self.entity 210 | query.update(alt) 211 | query.update(self.data) 212 | return self.event_proc.query_mix(query, limit) 213 | 214 | def query_many_swap(self, limit: int = 10, alt: dict = {}): 215 | self.check() 216 | query = copy.copy(self._query) 217 | query['type'] = self.entity 218 | query.update(alt) 219 | query.update(self.data) 220 | return self.event_proc.get_latest_many_swap(query, limit) 221 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Jamboree: A Fast General Data Engineering Library 2 | . 3 | 4 | ![Logo](docs/imgs/jamboree-long-new.png) 5 | 6 | **`Jamboree` is extremely early, meaning it should be used with caution. There are plans to improve the system and many components are subject to change. If you look at the improvement plans linked at the bottom you'll be able to see it.** 7 | 8 | The goal of `jamboree` was to manage the complexities of data engineering. 9 | 10 | ## Install 11 | 12 | The library requires and `redis` to operate for the time being. 13 | 14 | ```bash 15 | pip install jamboree 16 | ``` 17 | 18 | ## Install Redis 19 | 20 | All of the redis installation instructions are [here](https://redis.io/topics/quickstart). Though because the current module setup uses redisearch and will likely use many other modules in the future. Because installing modules is a bit more complex than necessary right now it's best to use docker: 21 | 22 | ```bash 23 | $ docker run \ 24 | -p 6379:6379 \ 25 | -v /home/{PUTNAMEHERE}/data:/data \ 26 | redislabs/redismod \ 27 | --dir /data 28 | ``` 29 | 30 | **The output should look like the following.** 31 | 32 | ```bash 33 | 1:C 24 Apr 2019 21:46:40.382 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo 34 | ... 35 | 1:M 24 Apr 2019 21:46:40.474 * Module 'ai' loaded from /usr/lib/redis/modules/redisai.so 36 | 1:M 24 Apr 2019 21:46:40.474 * RediSearch version 1.4.7 (Git=) 37 | 1:M 24 Apr 2019 21:46:40.474 * concurrency: ON, gc: ON, prefix min length: 2, prefix max expansions: 200, query timeout (ms): 500, timeout policy: return, cursor read size: 1000, cursor max idle (ms): 300000, max doctable size: 1000000, search pool size: 20, index pool size: 8, 38 | 1:M 24 Apr 2019 21:46:40.475 * Initialized thread pool! 39 | 1:M 24 Apr 2019 21:46:40.475 * Module 'ft' loaded from /usr/lib/redis/modules/redisearch.so 40 | 1:M 24 Apr 2019 21:46:40.476 * Thread pool created, using 8 threads. 41 | 1:M 24 Apr 2019 21:46:40.476 * Module 'graph' loaded from /usr/lib/redis/modules/redisgraph.so 42 | loaded default MAX_SAMPLE_PER_CHUNK policy: 360 43 | 1:M 24 Apr 2019 21:46:40.476 * Module 'timeseries' loaded from /usr/lib/redis/modules/redistimeseries.so 44 | 1:M 24 Apr 2019 21:46:40.476 # JSON data type for Redis v1.0.4 [encver 0] 45 | 1:M 24 Apr 2019 21:46:40.476 * Module 'ReJSON' loaded from /usr/lib/redis/modules/rejson.so 46 | 1:M 24 Apr 2019 21:46:40.476 * Module 'bf' loaded from /usr/lib/redis/modules/rebloom.so 47 | 1:M 24 Apr 2019 21:46:40.477 * RedisGears version 0.2.1, git_sha=fb97ad757eb7238259de47035bdd582735b5c81b 48 | 1:M 24 Apr 2019 21:46:40.477 * PythonHomeDir:/usr/lib/redis/modules/deps/cpython/ 49 | 1:M 24 Apr 2019 21:46:40.477 * MaxExecutions:1000 50 | 1:M 24 Apr 2019 21:46:40.477 * RedisAI api loaded successfully. 51 | 1:M 24 Apr 2019 21:46:40.477 # RediSearch api loaded successfully. 52 | 1:M 24 Apr 2019 21:46:40.521 * Module 'rg' loaded from /usr/lib/redis/modules/redisgears.so 53 | 1:M 24 Apr 2019 21:46:40.521 * Ready to accept connections 54 | ``` 55 | 56 | To run it in the background and let it start when the computer does 57 | 58 | ```bash 59 | $ docker run \ 60 | -p 6379:6379 -d \ 61 | --restart=always \ 62 | -v /home/{PUTNAMEHERE}/data:/data \ 63 | redislabs/redismod \ 64 | --dir /data 65 | ``` 66 | 67 | ## What is Event State Carrying? 68 | 69 | State Carrying is a round about way of saying tracking information through their interactions oversp time more so than exact states. It helps us construct a story of all things that have happened in a system over time. It looks like the image below. 70 | 71 | ![Event Sourcing](docs/imgs/event-sourcing_long.png) 72 | 73 | State carrying is dragging the current state along over time. 74 | 75 | The ultimate result is that you'd have tracability in your system. This is great when you're trying to see how interactions happen through time. 76 | 77 | ## How The Library Works 78 | 79 | The Jamboree Library Is Split In Two Parts: 80 | 81 | 1. Jamboree Event Sourcing 82 | 2. Object Handler 83 | 84 | The `Jamboree` object is rather simple. It only saves, reads, and deletes records in both `redis` and `mongodb`. Redis to give it fast read times, mongodb as backup to the data. `Handlers` have very explicit storage procedures that interact with the Jamboree object. A good example is the code below. 85 | 86 | The idea is straightforward: 87 | 88 | 1. We create a `Jamboree` object. The Jamboree object manages connections to databases at a high speed and low latency. 89 | 2. After we create the Handler object, and set the limit (max number of records we want to look at), we start adding records until we stop. At the end, we get the amount of time it took to push the records. 90 | * Periodically, we do a small calculation to older information prior to adding a record. 91 | 92 | ## Creating a Handler 93 | 94 | ```py 95 | class SampleEnvHandler(DBHandler): 96 | """Abstract handler that we use to keep track of information. 97 | """ 98 | 99 | def __init__(self, **kwargs): 100 | super().__init__() 101 | self.entity = "sample" 102 | self.required = { 103 | "episode": str 104 | } 105 | self._balance = 0 106 | self._limit = 100 107 | 108 | @property 109 | def limit(self): 110 | return self._limit 111 | 112 | @limit.setter 113 | def limit(self, limit): 114 | self._limit = limit 115 | 116 | @property 117 | def balance(self): 118 | """ Gets the sum of the last three values at set the value """ 119 | return self._balance 120 | 121 | @property 122 | def transactions(self)->vaex.dataframe: 123 | """ Get the last 100 transactions """ 124 | many_records = self.many(self.limit) 125 | 126 | if isinstance(many_records, dict): 127 | frame = pd.DataFrame(many_records) 128 | transactions_frame = vaex.from_pandas(frame) 129 | return transactions_frame.sort('timestamp', ascending=False) 130 | 131 | if len(many_records) > 0: 132 | frame = pd.DataFrame(many_records) 133 | transactions_frame = vaex.from_pandas(frame) 134 | return transactions_frame.sort('timestamp', ascending=False) 135 | 136 | return vaex.from_pandas(pd.DataFrame()) 137 | 138 | def save_update_recent(self, data:dict): 139 | transactions = self.transactions 140 | count = transactions.count() 141 | new_value = data['value'] + count 142 | data['value'] = int(new_value) 143 | super().save(data) 144 | 145 | def flip(n=0.02): 146 | if n >= random.uniform(0, 1): 147 | return True 148 | return False 149 | 150 | if __name__ == "__main__": 151 | main() 152 | ``` 153 | 154 | 155 | ## Timing The Handler 156 | 157 | ```py 158 | jambo = Jamboree() 159 | sample_env_handler = SampleEnvHandler() 160 | sample_env_handler.limit = 250 161 | sample_env_handler.event = jambo 162 | sample_env_handler['episode'] = uuid.uuid1().hex 163 | with timecontext(): 164 | for i in range(10000): 165 | v1 = randint(0, 12) 166 | sample_env_handler.save({"value": v1}) 167 | if flip(0.05): 168 | sample_env_handler.save_update_recent({"value": v1}) 169 | ``` 170 | 171 | ## Improvement Plans 172 | 173 | Jamboree currently has a list of improvements that 174 | 175 | https://trello.com/b/9vwpc5C6 -------------------------------------------------------------------------------- /jamboree/handlers/abstracted/search/updated.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import maya 3 | 4 | from typing import Optional 5 | from jamboree import Jamboree 6 | from jamboree.base.processors.abstracts import Processor 7 | from jamboree.handlers.default.search import BaseSearchHandler 8 | 9 | from loguru import logger 10 | 11 | class ParameterizedSearch(BaseSearchHandler): 12 | """ 13 | An abstract over the base search handler. 14 | 15 | Use to avoid using the normal formatting. 16 | 17 | Example: 18 | 19 | Normally you'd have to use the following: 20 | 21 | :: 22 | >>> search['item1'] = 'value' 23 | >>> search['item2'] = 'value' 24 | >>> search['item3'] = 'value' 25 | >>> search['item4'] = 'value' 26 | >>> search['item5'] = 'value' 27 | >>> search.insert(allow_duplicates=False) 28 | 29 | Instead you'll use the pattern: 30 | 31 | :: 32 | >>> id_of_insert = search.Create( 33 | >>> allow_duplicates=False, 34 | >>> no_overwrite_reqs=False, 35 | >>> item1='value', item2='value', 36 | >>> item3='value', item4='value', 37 | >>> item5='value') 38 | # The record's id is set here 39 | '249fabf229374715ae7e65b7061c0faf' 40 | 41 | 42 | To define a schema we set the variable `self.allrequirements`. 43 | 44 | 45 | :: 46 | 47 | """ 48 | 49 | def __init__(self): 50 | """ Initialize the function. Pulls from existing SearchHandler 51 | 52 | Add `must_have` in inherited classes. Use to make certain variable names mandetory. 53 | 54 | Example: 55 | 56 | :: 57 | >>> def __init__(self): 58 | >>> self.must_have = ["category", "name", "abbreviation"] 59 | 60 | 61 | 62 | >>> id_of_insert = search.Create( 63 | >>> allow_duplicates=True, 64 | >>> no_overwrite_reqs=True) 65 | 66 | Would immediately break because `category`, `name` and `abbreviation` 67 | 68 | 69 | """ 70 | super().__init__() 71 | self.must_have = [] # Forced fields 72 | 73 | def check_requirements(self, items: dict): 74 | """ Checks that the fields inside of `must_have` are inside of the dictioary we're going to be adding. """ 75 | for _abs in self.must_have: 76 | if _abs not in items: 77 | raise AttributeError( 78 | f"{_abs} has to be added. The absolute required variables are the following: {self.must_have}" 79 | ) 80 | # """ 81 | # Insert a document. All fields defined inside of **kwargs. 82 | 83 | # Parameters: 84 | # allow_duplicates (bool): Determines if we want to allow duplicates of the exact same document inside of the search database. 85 | # no_overwrite_must_have (bool): Determines if we're only checking for a small range of fields. Identified inside of `self.must_have` 86 | # kwargs (Any): Any field we want to add to the database. It's key and value. The databse must have 87 | # """ 88 | 89 | 90 | 91 | 92 | def Create(self, 93 | allow_duplicates=False, 94 | no_overwrite_must_have=False, 95 | **kwargs) -> str: 96 | """Insert a new record into redisearch. 97 | 98 | Args: 99 | allow_duplicates (bool, optional): Determines if you're going to allow for duplicates inside of the database. Defaults to False. 100 | no_overwrite_must_have (bool, optional): Determines if we're going to allow for more than 1 record that matches `must_have`. Defaults to False. 101 | 102 | Returns: 103 | str: The inserted record's id 104 | """ 105 | self.reset() 106 | self.check_requirements(kwargs) 107 | 108 | if no_overwrite_must_have and len(self.must_have) > 0: 109 | 110 | _all = self.FForced(**kwargs) 111 | if len(_all) > 0: 112 | return _all[0].id 113 | 114 | for k, v in kwargs.items(): 115 | self[k] = v 116 | 117 | identity = self.insert(allow_duplicates=allow_duplicates) 118 | return identity 119 | 120 | def UpdateID(self, identity: str, **kwargs): 121 | """ Updates a record by ID. Gives a warning if you're using a must have variable.""" 122 | self.reset() 123 | for k, v in kwargs.items(): 124 | self.replacement[k] = v 125 | self.update_id(identity) 126 | 127 | def UpdateMany(self, search_dict: dict, force_must_have=False, **replacements): 128 | """Replaces many records for the user. 129 | 130 | Args: 131 | search_dict (dict): Search for what we're replacing. 132 | force_must_have (bool, optional): Checks that the `search_dict` has all of the `must_have` variables. Defaults to False. 133 | 134 | Raises: 135 | ValueError: If our search parameters or replacement dictionaries are empty. 136 | """ 137 | self.reset() 138 | if not bool(search_dict) or not bool(replacements): 139 | raise ValueError( 140 | "You need to have query information AND something to replace it with." 141 | ) 142 | if force_must_have: 143 | self.check_requirements(search_dict) 144 | 145 | for k, v in search_dict.items(): 146 | self[k] = v 147 | 148 | for k, v in replacements.items(): 149 | self.replacement[k] = v 150 | self.update() 151 | 152 | def Find(self, general=None,force_must=False, **fields): 153 | """Find Searches through the database for our records. 154 | 155 | Run a generalized search through the database. 156 | 157 | Keyword Arguments: 158 | general {str} -- A general term that will allow us to find terms in a fuzzy search (default: {None}) 159 | force_must {bool} -- Checks that the search fields we have contains everything we declare as important `must_have` (default: {False}) 160 | 161 | Raises: 162 | ValueError: If there's nothing for us to search field. Both fields and general are empty. 163 | 164 | Returns: 165 | [list] -- A list of descriptions. 166 | """ 167 | self.reset() 168 | if general is not None: 169 | self.general = general 170 | if not bool(fields): 171 | if general is not None: 172 | return self.find() 173 | raise ValueError("You have to search using something") 174 | if force_must: 175 | self.check_requirements(fields) 176 | 177 | for k, v in fields.items(): 178 | self[k] = v 179 | return self.find() 180 | 181 | def FindById(self, identity: str): 182 | self.reset() 183 | remainder = self.pick(identity) 184 | return remainder 185 | 186 | def FindForced(self, **kwargs): 187 | self.reset() 188 | self.check_requirements(kwargs) 189 | for k in self.must_have: 190 | self[k] = kwargs.get(k) 191 | _all = self.find() 192 | return _all 193 | 194 | def FForced(self, **kwargs): 195 | for k in self.must_have: 196 | self[k] = kwargs.get(k) 197 | _all = self.find() 198 | return _all 199 | 200 | def Remove(self, **kwargs): 201 | self.reset() 202 | for k, v in kwargs.items(): 203 | self[k] = v 204 | self.remove() -------------------------------------------------------------------------------- /jamboree/middleware/procedures/core.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import List, Dict, Any 3 | from addict import Dict as ADict 4 | # from addict import Dict 5 | 6 | class ProcedureAbstract(ABC): 7 | """ 8 | Procedures ensure everything is consistent. 9 | """ 10 | 11 | def verify(self): 12 | """ Ensures we have all of the required variables in place.""" 13 | raise NotImplementedError("Verify function not implmented") 14 | 15 | def extract(self): 16 | """ The item that will be serialized. """ 17 | raise NotImplementedError("Extract function not implemented") 18 | 19 | 20 | class NamedModelMetric(ABC): 21 | def __init__(self, name:str): 22 | self.name = name 23 | 24 | 25 | def get_metric(self, y_pred, y_actual) -> Dict: 26 | raise NotImplementedError("You need to have a way to get a metric") 27 | 28 | 29 | class NamedModelMetricSet: 30 | """ A single place to hold all of the model metrics (in a set)""" 31 | def __init__(self): 32 | self.metric_set:List[NamedModelMetric] = [] 33 | 34 | 35 | def metrics(self, y_, y) -> Dict: 36 | if len(self.metric_set) == 0: 37 | return {} 38 | metric_listing = {} 39 | for metric in self.metric_set: 40 | name = metric.name 41 | metric_output = metric.get_metric(y_, y) 42 | metric_listing[name] = metric_output 43 | return metric_listing 44 | 45 | 46 | 47 | class ProcedureManagement(ABC): 48 | """ A way to interact with procedures. Use to embed things like: 49 | 50 | 1. Accessing procedures for a given code base. 51 | 2. Check for certain attributes within the class 52 | 3. Declare what's acceptable 53 | """ 54 | def __init__(self): 55 | self.required_attributes:List[str] = [] 56 | 57 | 58 | @property 59 | def allowed(self) -> List[str]: 60 | raise NotImplementedError("You need to set the allowed keys we'll take.") 61 | 62 | 63 | def check_allowed(self, key:str): 64 | if key not in self.allowed: 65 | raise ValueError(f"{key} has to be of the allowed keys ... ") 66 | 67 | 68 | def access(self, key:str) -> 'ProcedureAbstract': 69 | """ Access the procedure we need. Returns a procedure given the key we set it.""" 70 | raise NotImplementedError("You need to create an access procedure") 71 | 72 | 73 | def isattr(self, parentinstance:Any): 74 | """ Checks to see if all of the attributes are in the parent class instance. """ 75 | if len(self.required_attributes) > 0: 76 | for attr in self.required_attributes: 77 | if not hasattr(parentinstance, attr): 78 | cls_name = parentinstance.__class__.__name__ 79 | msg = f"{cls_name} does not have the attribute {attr}" 80 | raise AttributeError(msg) 81 | 82 | 83 | 84 | class ModelProcedureAbstract(ProcedureAbstract): 85 | # _dict = None 86 | def __init__(self): 87 | self._mod = None 88 | self._opt = None 89 | self._crit = None 90 | 91 | self._model_dict = ADict() 92 | self._model_dict.model = None 93 | self._model_dict.optimizer = None 94 | self._model_dict.criteria = None 95 | 96 | self._model_typing = ADict() 97 | self._model_typing.model = None 98 | self._model_typing.optimizer = None 99 | self._model_typing.criteria = None 100 | 101 | self._model_requirements = ADict() 102 | self._model_requirements.model = True 103 | self._model_requirements.optimizer = False 104 | self._model_requirements.criteria = False 105 | 106 | self.changed = False 107 | self.named_metric_set = NamedModelMetricSet() 108 | 109 | 110 | @property 111 | def dictionary(self): 112 | """ A dictionary with all of the model information contained inside. """ 113 | return self._model_dict 114 | 115 | @dictionary.setter 116 | def dictionary(self, _md:ADict): 117 | """ Load in raw model dict information """ 118 | self._model_dict.update(_md) 119 | # self.verify() 120 | 121 | @property 122 | def requirements(self) -> ADict: 123 | """ Return a dictionary of requirements for the ... checking requirements""" 124 | return self._model_requirements 125 | 126 | @requirements.setter 127 | def requirements(self, _md:ADict): 128 | """ Load in raw model dict information """ 129 | self._model_requirements.update(_md) 130 | 131 | @property 132 | def types(self) -> ADict: 133 | return self._model_typing 134 | 135 | @types.setter 136 | def types(self, _mt:ADict): 137 | self._model_typing.update(_mt) 138 | 139 | """ 140 | Verification 141 | """ 142 | 143 | def verify_model_typing(self): 144 | """Check that none of the model types are none """ 145 | for k, v in self.requirements.items(): 146 | if not isinstance(v, bool): 147 | raise ValueError(f"Model Requirement \'{k}\' must be a boolean value") 148 | if v == True: 149 | if self.types[k] is None: 150 | raise ValueError(f"\'{k}\' Cannot be None in typing delarations") 151 | if self.dictionary[k] is None: 152 | raise ValueError(f"\'{k}\' Cannot be None inside of the main model dictionary") 153 | 154 | def verify_model_dict(self): 155 | """ Verify that """ 156 | for name, _type in self.types.items(): 157 | if name is None or _type is None: 158 | continue 159 | current_item = self.dictionary[name] 160 | if not isinstance(current_item, _type) and not issubclass(current_item, _type): 161 | raise TypeError(f"{name} is not an instance of {_type}") 162 | 163 | def verify(self): 164 | self.verify_model_typing() 165 | self.verify_model_dict() 166 | 167 | def is_valid_data(self, _data) -> bool: 168 | """ Determines if the data we're about to use is valid""" 169 | raise NotImplementedError("Data validation not implemented yet") 170 | 171 | def split(self, X, y, **params): 172 | raise NotImplementedError 173 | 174 | def fit(self, X, y, **params): 175 | raise NotImplementedError 176 | 177 | def partial_fit(self, X, y, **params): 178 | raise NotImplementedError 179 | 180 | def predict(self, X, **params): 181 | raise NotImplementedError 182 | 183 | def predict_proba(self, X, **params): 184 | raise NotImplementedError 185 | 186 | def score(self, X, y, **params): 187 | raise NotImplementedError 188 | 189 | def get_params(self, **params): 190 | raise NotImplementedError 191 | 192 | def set_params(self, **params): 193 | raise NotImplementedError 194 | 195 | def extract(self): 196 | """ Get a dictionary to save the model. Should be called in close """ 197 | return self.dictionary 198 | 199 | @property 200 | def metrics(self): 201 | """ Given the information we have, return a set of metrics""" 202 | metric_set = self.named_metric_set.metrics(0, 0) 203 | return metric_set 204 | 205 | 206 | 207 | 208 | 209 | if __name__ == "__main__": 210 | model_types = ADict() 211 | model_vals = ADict() 212 | model_types.model = bool 213 | model_types.optimizer = str 214 | model_types.criteria = str 215 | 216 | model_vals.model = False 217 | model_vals.optimizer = "str" 218 | model_vals.criteria = "str" 219 | 220 | base_model_procedure = ModelProcedureAbstract() 221 | base_model_procedure.mtypes = model_types 222 | base_model_procedure.mdict = model_vals 223 | base_model_procedure.verify() 224 | 225 | print(base_model_procedure) -------------------------------------------------------------------------------- /docs/readme/Insert No Duplicates.md: -------------------------------------------------------------------------------- 1 | # Inserting Data Without Duplicates 2 | 3 | Here we test inserting data without duplicates. Afterwards we'll test for missing data inside of the databases. 4 | 5 | 6 | ```python 7 | from jamboree import Jamboree 8 | import pandas as pd 9 | import datetime 10 | import pandas_datareader.data as web 11 | from pandas import Series, DataFrame 12 | ``` 13 | 14 | 15 | ```python 16 | from maya import MayaDT 17 | import maya 18 | import copy 19 | ``` 20 | 21 | 22 | ```python 23 | import random 24 | import orjson 25 | ``` 26 | 27 | 28 | ```python 29 | from typing import List, Dict, Any 30 | ``` 31 | 32 | 33 | ```python 34 | jam_session = Jamboree() 35 | ``` 36 | 37 | Unable to create library with name: events 38 | 39 | 40 | 41 | --------------------------------------------------------------------------- 42 | 43 | LibraryNotFoundException Traceback (most recent call last) 44 | 45 | in 46 | ----> 1 jam_session = Jamboree() 47 | 48 | 49 | ~/PycharmProjects/jamboree/jamboree/base/main.py in __init__(self, mongodb_host, redis_host, redis_port) 50 | 55 def __init__(self, mongodb_host="localhost", redis_host="localhost", redis_port=6379): 51 | 56 self.redis = Redis(redis_host, port=redis_port) 52 | ---> 57 self.store = Store(mongodb_host).create_lib('events').get_store()['events'] 53 | 58 self.pool = ThreadPool(max_workers=cpu_count() * 4) 54 | 59 55 | 56 | 57 | ~/.local/lib/python3.6/site-packages/arctic/arctic.py in __getitem__(self, key) 58 | 373 def __getitem__(self, key): 59 | 374 if isinstance(key, string_types): 60 | --> 375 return self.get_library(key) 61 | 376 else: 62 | 377 raise ArcticException("Unrecognised library specification - use [libraryName]") 63 | 64 | 65 | ~/.local/lib/python3.6/site-packages/arctic/arctic.py in get_library(self, library) 66 | 358 if error: 67 | 359 raise LibraryNotFoundException("Library %s was not correctly initialized in %s.\nReason: %r)" % 68 | --> 360 (library, self, error)) 69 | 361 elif not lib_type: 70 | 362 raise LibraryNotFoundException("Library %s was not correctly initialized in %s." % 71 | 72 | 73 | LibraryNotFoundException: Library events was not correctly initialized in . 74 | Reason: ServerSelectionTimeoutError('localhost:27017: [Errno 111] Connection refused',)) 75 | 76 | 77 | 78 | ```python 79 | start = datetime.datetime(1986, 3, 14) 80 | end = datetime.datetime(2020, 1, 6) 81 | ``` 82 | 83 | 84 | ```python 85 | apple_df = web.DataReader("AAPL", 'yahoo', start, end) 86 | msft_df = web.DataReader("MSFT", 'yahoo', start, end) 87 | ``` 88 | 89 | 90 | ```python 91 | apple_df 92 | ``` 93 | 94 | 95 | ```python 96 | def get_year_month_day(time:MayaDT): 97 | print(f"{time.day}-{time.month}-{time.year}") 98 | ``` 99 | 100 | 101 | ```python 102 | def get_time_dt(df): 103 | indexes = df.index 104 | indexes = [maya.MayaDT.from_datetime(index.to_pydatetime()) for index in indexes] 105 | return indexes 106 | ``` 107 | 108 | 109 | ```python 110 | def df_records(df): 111 | return df.to_dict("records") 112 | ``` 113 | 114 | 115 | ```python 116 | def standardize_record(record): 117 | closing_record = {} 118 | if "Close" in record: 119 | closing_record['close'] = record["Close"] 120 | if "Open" in record: 121 | closing_record['open'] = record["Open"] 122 | if "Low" in record: 123 | closing_record['low'] = record["Low"] 124 | if "High" in record: 125 | closing_record['high'] = record["High"] 126 | if "Volume" in record: 127 | closing_record['volume'] = record["Volume"] 128 | 129 | return closing_record 130 | ``` 131 | 132 | 133 | ```python 134 | def standardize_outputs(records:List[Dict[str, Any]]): 135 | if len(records) == 0: 136 | return [] 137 | _records = [standardize_record(rec) for rec in records] 138 | return _records 139 | ``` 140 | 141 | 142 | ```python 143 | def add_time(records, times): 144 | if len(records) == 0 or (len(records) != len(times)): 145 | return [] 146 | 147 | _records = [] 148 | for index, rec in enumerate(records): 149 | rec['time'] = times[index]._epoch 150 | _records.append(rec) 151 | return _records 152 | ``` 153 | 154 | 155 | ```python 156 | def teardown(df): 157 | """Breaks the dataframe into a bunch of dictionaries""" 158 | indexes = get_time_dt(df) 159 | records = df_records(df) 160 | standardized = standardize_outputs(records) 161 | # print(standardized) 162 | with_time = add_time(standardized, indexes) 163 | return with_time 164 | ``` 165 | 166 | 167 | ```python 168 | dt_time = teardown(apple_df) 169 | ``` 170 | 171 | 172 | ```python 173 | def flip(n=0.05): 174 | if random.uniform(0, 1) < n: 175 | return True 176 | return False 177 | ``` 178 | 179 | 180 | ```python 181 | def create_duplicates(frame_dict_list:List[Dict]): 182 | if len(frame_dict_list) == 0: 183 | return [] 184 | 185 | final_list = [] 186 | for item in frame_dict_list: 187 | final_list.append(item) 188 | if flip(0.1): 189 | final_list.append(item) 190 | return final_list 191 | ``` 192 | 193 | 194 | ```python 195 | 196 | ``` 197 | 198 | 199 | ```python 200 | # len(dups) 201 | ``` 202 | 203 | 204 | ```python 205 | last_200 = dt_time[-200:] 206 | last_300 = dt_time[-300:] 207 | last_200_dups = create_duplicates(last_200) 208 | last_300_dups = create_duplicates(last_300) 209 | ``` 210 | 211 | 212 | ```python 213 | upsert_data_one = jam_session.bulk_upsert_redis({"type": "sample_save", "asset": "AAPL", "label": "duplication"}, last_200) 214 | upsert_data_two = jam_session.bulk_upsert_redis({"type": "sample_save", "asset": "AAPL", "label": "duplication"}, last_300) 215 | upsert_data_one_dups = jam_session.bulk_upsert_redis({"type": "sample_save", "asset": "AAPL", "label": "duplication"}, last_200_dups) 216 | upsert_data_two_dups = jam_session.bulk_upsert_redis({"type": "sample_save", "asset": "AAPL", "label": "duplication"}, last_300_dups) 217 | ``` 218 | 219 | 220 | ```python 221 | main_hash = upsert_data_one.get("hash") 222 | ``` 223 | 224 | 225 | ```python 226 | up1 = upsert_data_one.get('updated', []) 227 | up2 = upsert_data_two.get('updated', []) 228 | up3 = upsert_data_one_dups.get('updated', []) 229 | up4 = upsert_data_two_dups.get('updated', []) 230 | ``` 231 | 232 | 233 | ```python 234 | cr1 = [orjson.dumps(x) for x in up1] 235 | cr2 = [orjson.dumps(x) for x in up2] 236 | cr3 = [orjson.dumps(x) for x in up3] 237 | cr4 = [orjson.dumps(x) for x in up4] 238 | ``` 239 | 240 | 241 | ```python 242 | set1 = set(cr1) 243 | set2 = set(cr2) 244 | set3 = set(cr3) 245 | set4 = set(cr4) 246 | ``` 247 | 248 | 249 | ```python 250 | print(len(set1)) 251 | print(len(set2)) 252 | print(len(set3)) 253 | print(len(set4)) 254 | ``` 255 | 256 | 257 | ```python 258 | jam_session.redis.sadd(set_key, *set(cr3)) 259 | ``` 260 | 261 | 262 | ```python 263 | 264 | ``` 265 | 266 | 267 | ```python 268 | def deserialize_list(serialized_list:list): 269 | if len(serialized_list) == 0: 270 | return [] 271 | 272 | return [orjson.loads(x) for x in serialized_list] 273 | ``` 274 | 275 | 276 | ```python 277 | def add_timestamp(item): 278 | item['timestamp'] = maya.now()._epoch 279 | return item 280 | ``` 281 | 282 | 283 | ```python 284 | def get_addable_items(set_key, added_set): 285 | existing = set(jam_session.redis.smembers(set_key)) 286 | addable_items = set(set2 - existing) 287 | if len(addable_items) == 0: 288 | return [] 289 | listified = list(addable_items) 290 | deku = deserialize_list(listified) 291 | timestamped = [add_timestamp(x) for x in deku] 292 | return timestamped 293 | ``` 294 | 295 | 296 | ```python 297 | # updated_set = set(serialized_updated) 298 | ``` 299 | 300 | 301 | ```python 302 | get_addable_items(set_key, set2) 303 | ``` 304 | 305 | 306 | ```python 307 | # jam_session.redis.smembers(set_key, 0, -1) 308 | ``` 309 | 310 | 311 | ```python 312 | set(retrieved - updated_set) 313 | ``` 314 | 315 | 316 | ```python 317 | len(retrieved) 318 | ``` 319 | 320 | 321 | ```python 322 | len(updated_set) 323 | ``` 324 | 325 | 326 | ```python 327 | 328 | ``` 329 | -------------------------------------------------------------------------------- /jamboree/storage/files/redisify/core.py: -------------------------------------------------------------------------------- 1 | import maya 2 | from threading import local 3 | from jamboree.storage.files import FileStorageConnection 4 | from jamboree.utils.core import consistent_hash 5 | from jamboree.utils.support.storage import serialize, deserialize 6 | from jamboree.utils.context import watch_loop 7 | from addict import Dict 8 | import redis 9 | from redis import Redis 10 | from redis.client import Pipeline 11 | import version_query 12 | from loguru import logger 13 | logger.disable(__name__) 14 | 15 | class RedisFileProcessor(object): 16 | def __init__(self, *args, **kwargs): 17 | self._pipe = None 18 | self._conn = None 19 | 20 | @property 21 | def conn(self) -> Redis: 22 | if self._conn is None: 23 | raise AttributeError("Pipe hasn't been set") 24 | return self._conn 25 | @conn.setter 26 | def conn(self, _pipe:Redis): 27 | self._conn = _pipe 28 | 29 | @property 30 | def pipe(self) -> Pipeline: 31 | if self._pipe is None: 32 | raise AttributeError("Pipe hasn't been set") 33 | return self._pipe 34 | @pipe.setter 35 | def pipe(self, _pipe:Pipeline): 36 | self._pipe = _pipe 37 | 38 | 39 | def reset(self): 40 | self.pipe = None 41 | self.conn = None 42 | 43 | 44 | class RedisFileConnection(FileStorageConnection): 45 | def __init__(self, **kwargs): 46 | super().__init__(**kwargs) 47 | """ NOTE: Expiriment with sorted sets """ 48 | self.current_query = {} 49 | self.current_hash = None 50 | self.current_query_exist = None 51 | self.current_pipe = None 52 | self.current_hash_keys = None 53 | self.current_version = None 54 | self.current_version_exist = None 55 | self.setup_run = None 56 | 57 | def gwatch(self): 58 | sorted_version = self.keys.version.sorted 59 | set_version = self.keys.version.set 60 | self.pipe.watch(sorted_version) 61 | self.pipe.watch(set_version) 62 | 63 | @property 64 | def version(self): 65 | """ Get the latest version or the default""" 66 | sorted_version = self.keys.version.sorted 67 | set_version = self.keys.version.set 68 | # self.pipe.watch(sorted_version) 69 | # self.pipe.watch(set_version) 70 | if self.query_exists and self.current_version is None: 71 | # latest_version = self.connection.zrange(sorted_version, -1, -1) 72 | # _all_versions = self.connection.zrange(sorted_version, 0, -1) 73 | latest_version = self.pipe.zrange(sorted_version, -1, -1) 74 | 75 | if latest_version is not None and len(latest_version) > 0: 76 | latest_version = latest_version[0] 77 | self.current_version = latest_version.decode() 78 | elif self.current_version is not None: 79 | return self.current_version 80 | else: 81 | latest_version = self.settings.default.version 82 | self.pipe.zadd(sorted_version, {latest_version: maya.now()._epoch}) 83 | self.pipe.sadd(set_version, latest_version) 84 | self.current_version = latest_version 85 | return self.current_version 86 | 87 | @version.setter 88 | def version(self, _version:str): 89 | sorted_version = self.keys.version.sorted 90 | set_version = self.keys.version.set 91 | self.pipe.zadd(sorted_version, {_version: maya.now()._epoch}) 92 | self.pipe.sadd(set_version, *_version) 93 | self.current_version = _version 94 | 95 | 96 | 97 | @property 98 | def hash_query(self): 99 | if self.current_hash is None: 100 | self.current_hash = consistent_hash(self.current_query) 101 | return self.current_hash 102 | 103 | @property 104 | def keys(self): 105 | """ Set all of the required keys to something that fits in memory. """ 106 | if self.current_hash_keys is None: 107 | self.current_hash_keys = Dict() 108 | self.current_hash_keys.version.set = f"{self.hash_query}:versions" 109 | self.current_hash_keys.version.sorted = f"{self.hash_query}:zversions" 110 | self.current_hash_keys.file.sum = f"{self.hash_query}:sums" 111 | self.current_hash_keys.sum = f"{self.hash_query}:sum" 112 | self.current_hash_keys.version.index = f"{self.hash_query}:incr" 113 | return self.current_hash_keys 114 | 115 | @property 116 | def query_exists(self) -> bool: 117 | if self.current_query_exist is None: 118 | version_set_exist = self.pipe.exists(self.keys.version.set) 119 | sorted_version_exist = self.pipe.exists(self.keys.version.sorted) 120 | 121 | self.current_query_exist = (version_set_exist == 1 and sorted_version_exist == 1) 122 | return self.current_query_exist 123 | 124 | @property 125 | def file_exist(self) -> bool: 126 | """ Does the current file version exist""" 127 | 128 | if self.current_version_exist is None: 129 | vk = self.version_key 130 | self.pipe.watch(vk) 131 | version_set_exist = self.pipe.exists(vk) 132 | self.current_version_exist = (version_set_exist == 1) 133 | return self.current_version_exist 134 | 135 | @property 136 | def pipe(self): 137 | if self.current_pipe is None: 138 | raise AttributeError("Pipe cannot be non-existent") 139 | return self.current_pipe 140 | 141 | @property 142 | def version_key(self) -> str: 143 | return f"{self.hash_query}:{self.version}" 144 | 145 | 146 | def update_version(self): 147 | """ Save version in multiple places to be found later""" 148 | version = self.version 149 | if self.query_exists and not self.is_overwrite: 150 | vs = version_query.Version.from_str(version) 151 | new_vs = vs.increment(self.settings.default.increment) 152 | new_vs_str = new_vs.to_str() 153 | # print(new_vs_str) 154 | self.version = new_vs_str 155 | 156 | 157 | def update_file(self, _file): 158 | """Update the file""" 159 | vkey = self.version_key 160 | self.pipe.set(vkey, _file) 161 | 162 | def update(self, file): 163 | """ Update the file version and update the file. """ 164 | self.update_version() 165 | self.update_file(file) 166 | logger.error(file) 167 | 168 | @property 169 | def garbage_patch(self): 170 | """ 171 | Basically a way to see if both the file and query key exist. 172 | It's pretty janky. It'll be fine though. 173 | """ 174 | return (self.query_exists and self.file_exist) 175 | 176 | def absolute_exists(self, query, **kwargs): 177 | # self.setup_run = None 178 | self.setup(query, **kwargs) 179 | return (self.query_exists and self.file_exist) 180 | 181 | 182 | @logger.catch 183 | def save(self, query:dict, obj, **kwargs): 184 | self.setup(query, **kwargs) 185 | serial_item = serialize(obj) 186 | self.update(serial_item) 187 | 188 | 189 | 190 | 191 | @logger.catch 192 | def query(self, query:dict, **kwargs): 193 | self.setup(query, **kwargs) 194 | 195 | if self.garbage_patch: 196 | # If the query and file exist 197 | # logger.debug("File exist, we're gonna try pulling it") 198 | # logger.debug(self.version) 199 | item = self.pipe.get(self.version_key) 200 | unpacked = deserialize(item) 201 | if unpacked is None: 202 | raise AttributeError("Pickled Item Not Found") 203 | return unpacked 204 | 205 | 206 | def delete(self, query:dict, **kwargs): 207 | self.setup(query, **kwargs) 208 | if self.query_exists and self.file_exist: 209 | # If the query and file exist 210 | sorted_version = self.keys.version.sorted 211 | set_version = self.keys.version.set 212 | self.pipe.delete(self.version_key) 213 | self.pipe.zrem(sorted_version, self.version) 214 | self.pipe.srem(set_version, self.version) 215 | 216 | def setup(self, query:dict, **kwargs): 217 | is_force = kwargs.pop("is_force", False) 218 | if self.setup_run is None or is_force: 219 | self.reset() 220 | self.settings = Dict(**kwargs) 221 | self.current_query = query 222 | self.gwatch() 223 | self.version 224 | self.setup_run = True 225 | self.file_exist 226 | 227 | def reset(self): 228 | """ Reset all placeholder variables""" 229 | self.current_query = {} 230 | self.current_hash = None 231 | self.current_query_exist = None 232 | self.current_pipe = self.conn.pipeline() 233 | self.current_version = None 234 | self.current_version_exist = None 235 | self.setup_run = None 236 | 237 | 238 | 239 | class SampleObj(object): 240 | def __init__(self) -> None: 241 | self.one = "one" 242 | self.two = "two" 243 | 244 | def main(): 245 | current_settings = Dict() 246 | query_dict = Dict() 247 | current_settings.overwrite = True 248 | current_settings.preferences = query_dict 249 | samp_opt = SampleObj() 250 | redpill = redis.Redis() 251 | redconn = RedisFileConnection() 252 | redconn.conn = redpill 253 | redconn.save({"one": "twoss"}, samp_opt, **current_settings) 254 | item = redconn.query({"one": "twoss"}) 255 | logger.info(item) 256 | 257 | # redconn.pipe.execute() 258 | 259 | if __name__ == "__main__": 260 | main() --------------------------------------------------------------------------------