├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.md │ └── improvement.md └── PULL_REQUEST_TEMPLATE │ └── pull_request_template.md ├── .gitignore ├── AUTHORS.txt ├── LICENSE ├── README.md ├── cats ├── __init__.py ├── executor │ └── __init__.py ├── factory │ └── __init__.py ├── io │ ├── __init__.py │ ├── input │ │ ├── __init__.py │ │ ├── function.py │ │ └── structure.py │ └── output │ │ └── __init__.py ├── network │ ├── __init__.py │ ├── aws.py │ └── cod.py ├── node.py ├── service │ ├── __init__.py │ └── utils.py └── utils │ └── __init__.py ├── cats_demo.ipynb ├── data └── iris.csv ├── dist ├── cats-0.0.0-py3-none-any.whl └── cats-0.0.0.tar.gz ├── docs ├── CONTRIBUTING.md ├── DEMO.md ├── DEPS.md ├── DESIGN.md ├── ENV.md ├── EXAMPLES.md ├── ORG.md ├── TEST.md └── ubuntu2004.md ├── images ├── CATkernel.jpeg ├── CATs_bom_activity.jpeg ├── CATs_bom_ag.jpeg ├── CATs_bom_connect.jpeg ├── CATs_chaordic_kernel.jpeg ├── cid_example.jpeg ├── data_product_domain.jpeg ├── simple_CAT2a.jpeg └── simple_CAT2b.jpeg ├── logs.ipynb ├── main.tf ├── process.py ├── pyproject.toml └── tests ├── __init__.py └── verification_test.py /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Create a bug report to help us improve CATs' package 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Description:** 11 | 12 | 13 | **Related Issue(s):** 14 | 15 | - ... 16 | - ... 17 | 18 | **Steps to Reproduce:** 19 | 20 | 1. ... 21 | 2. ... 22 | 23 | **Expected Behavior:** 24 | 25 | 26 | **Examples** 27 | 28 | 29 | **Desktop (please complete the following information):** 30 | - OS: [e.g. Ubuntu 20.04.4 LTS, iOS] 31 | - Python Version: [3.10.13] 32 | 33 | **Additional Context:** 34 | 35 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/improvement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Improvement 3 | about: Improvement to existing functionality 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your issue related to a problem? Please describe:** 11 | 12 | 13 | **Describe the solution you'd like:** 14 | 15 | 16 | **Describe alternatives you've considered:** 17 | 18 | 19 | **Additional Context:** 20 | 21 | 22 | - [ ] I have performed a self-review of my own code 23 | - [ ] I have commented my code, particularly in hard-to-understand areas 24 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- 1 | **Creation Checklist:** 2 | - [ ] Review [CONTRIBUTING.md](../docs/CONTRIBUTING.md) file for general contributing guideline updates. 3 | - [ ] This PR is not a duplicate 4 | - [ ] Your PR targets the `release` branch of this repository. 5 | 6 | 7 | # Description: 8 | 9 | 10 | **Description Checklist:** 11 | - [ ] Please include a description of the change. 12 | - [ ] List any dependencies that are required for this change. 13 | 14 | # Issues: 15 | 16 | 17 | **Issues Checklist:** 18 | - [ ] List relevant issues in this PR. 19 | - [ ] Include this PR link in relevant issues. 20 | 21 | 22 | ## Type of change: 23 | - [ ] Bug fix (non-breaking change which fixes an issue) 24 | - [ ] New feature (change which adds functionality) 25 | - [ ] Breaking change (change that would cause existing functionality to not work as expected) 26 | - [ ] This change requires a documentation update 27 | 28 | # How Has This Been Tested? 29 | 30 | **Tests:** 31 | - [ ] [Test A](./docs/TEST.md) 32 | - [ ] [Test B](./docs/TEST.md) 33 | - [ ] ... 34 | 35 | # Merge Checklist: 36 | - [ ] All other **Checklist** are fulfilled. 37 | - [ ] At least 1 **Type of Change** is indicated. 38 | - [ ] Relevant tests are indicated and pass locally. 39 | - [ ] Creation Checklist are fulfilled. 40 | - [ ] Corresponding changes have been made to the documentation. 41 | - [ ] Dependent changes have been merged into this PR's "from" branch. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | .idea/ 3 | .terraform/* 4 | */.terraform 5 | .terraform.* 6 | terraform.* 7 | cats_vault/ 8 | docs/ws.md 9 | old 10 | *.pyc 11 | .ipynb_checkpoints 12 | .pytest_cache 13 | /examples/ 14 | venv 15 | .obsidian 16 | offline_experiments 17 | testing/features/cat.py 18 | testing/tests/cat.py 19 | bom.car 20 | bom.json 21 | invoice.json 22 | order.json 23 | cat-action-plane-config 24 | old/experiments/catMesh 25 | online 26 | requirements.txt -------------------------------------------------------------------------------- /AUTHORS.txt: -------------------------------------------------------------------------------- 1 | Authors 2 | ======= 3 | 4 | 5 | Implementation: 6 | - Joshua E. Jodesty from 2023 to present 7 | 8 | Research & Design: 9 | - Joshua E. Jodesty from 2020 to present 10 | - David Sisson from 2016 to present 11 | - Michael Zargham from 2016 to present 12 | 13 | Project Maintainers: 14 | - Joshua E. Jodesty 15 | 16 | Original Contributors: 17 | - Joshua E. Jodesty 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021-2024 BlockScience 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CATs: Content-Addressable Transformers 2 | ![alt_text](images/CATs_chaordic_kernel.jpeg) 3 | 4 | ## Description: 5 | **Content-Addressable Transformers** (**CATs**) is a unified Data Service Collaboration framework for organizations. 6 | CATs connect collaborators between organizations on a Data Mesh with interoperable parallelized and distributed 7 | computing at horizontal & vertical scale. CATs' establish a scalable and self-serviced Data Platform as a Data Mesh 8 | network of scalable and interoperable distributed computing workloads with Data Provenance deployable on Kubernetes. 9 | These workloads [CAT(s)] enable for Big Data processing with Scientific Computing capabilities. CATs are integration 10 | points which enable scaled data processing portability between client-server cloud platforms and mesh (p2p) networks 11 | with minimal rework or modification. 12 | 13 | CATs enables the 14 | [continuous reification of **Data Initiatives**](https://github.com/BlockScience/cats?tab=readme-ov-file#continuous-data-initiative-reification) 15 | by cataloging discoverable, accessable, and re-executable workloads as 16 | [**Data Service Collaboration**](https://github.com/BlockScience/cats?tab=readme-ov-file#continuous-data-initiative-reification) 17 | composable records between organizations. These records provide a reliable and efficient way to manage, share, and 18 | reference data processes via [**Content-Addressing**](https://en.wikipedia.org/wiki/Content-addressable_storage) Data 19 | Provenance records. 20 | 21 | **Content-Addressing** is a method of uniquely identifying and retrieving data based on its content rather than its 22 | location or address. CATs provides verifiable data processing and transport on a Mesh network of CATs interconnected by 23 | Content-Addressing Data Provenance records with [IPFS](https://ipfs.io/) 24 | [**CIDs**](https://docs.ipfs.io/concepts/content-addressing/) (Content-Identifiers) as content addresses issued by IPFS 25 | **[client](https://docs.ipfs.io/install/command-line/#official-distributions)** to identify and retrieve inputs, 26 | transformations, outputs, and infrastructure (as code [IaC]) for verifying transformation accuracy given CIDs. 27 | ![alt_text](images/cid_example.jpeg) 28 | 29 | ### Specification: 30 | CATs' utilizes [Ray](https://www.ray.io/) for interoperable & parallelized distributed computing frameworks deployable 31 | on **[Kubernetes](https://kubernetes.io/)** for Big Data processing with Scientific Computing. Ray is a unified compute 32 | framework that enables the development of parallel and distributed applications for scalable data transformation, 33 | Machine Learning, and AI. Ray provides CATs with interoperable computing frameworks with its 34 | [ecosystem integrations](https://docs.ray.io/en/latest/ray-overview/ray-libraries.html) such as 35 | [Apache Spark](https://spark.apache.org/), and [PyTorch](https://pytorch.org/). 36 | 37 | Ray is deployed as an execution middleware on top of [Bacalhau’s](https://www.bacalhau.org/) [Compute Over Data (CoD)](https://github.com/bacalhau-project/bacalhau). 38 | CoD enables IPFS to serve as CATs' Data Mesh's network layer to provide parallelized data ingress and egress for IPFS 39 | data. This portability closes the gap between data analysis and business operations by connecting the network planes of 40 | the cloud service model (SaaS, PaaS, IaaS) with IPFS. CATs connect these network planes by enabling the instantiation of 41 | FaaS with cloud services in AWS, GCP, Azure, etc. on a **Data Mesh** network of CATs. CoD enables this connection as p2p 42 | distributed-computing job submission in addition to the client-server job submission provided by Ray. 43 | ![alt_text](images/simple_CAT2b.jpeg) 44 | 45 | ### Get Started!: 46 | 0. **Install [Dependencies](./docs/DEPS.md)** 47 | 1. **Install CATs:** 48 | ```bash 49 | git clone git@github.com:BlockScience/cats.git 50 | cd cats 51 | # Optional: Create Virtual Environment 52 | # python -m venv ./venv 53 | # source ./venv/bin/activate 54 | python -m pip install --upgrade pip 55 | pip install dist/*.whl 56 | ``` 57 | 2. **Demo:** [**Establish a CAT Mesh**](./docs/DEMO.md) 58 | 3. **Test:** [**CAT Mesh Verification**](./docs/TEST.md) 59 | 60 | ### [Contribute!](docs/CONTRIBUTING.md) 61 | 62 | ### CATs' Architectural Quantum: 63 | Organizations and collaborators participating will employ CATs for rapid ratification of service agreements within 64 | collaborative feedback loops of [**Data Initiatives**](https://github.com/BlockScience/cats?tab=readme-ov-file#continuous-data-initiative). 65 | CATs' apply an **Architectural Quantum** Domain-Driven Design principle described in 66 | [**Data Mesh of Data Products**](https://martinfowler.com/articles/data-mesh-principles.html) to reify Data Initiatives. 67 | (* [**Design Description**](docs/DESIGN.md)) 68 | ![alt_text](images/CATkernel.jpeg) 69 | 70 | ### Continuous Data Initiative Reification: 71 | **Data Initiatives** will be naturally reified as a result of **Data Service Collaboration** on CATs. CATs will be 72 | compiled and executed as interconnecting services on a Data Mesh that grows naturally when organizations communicate 73 | CATs provenance records within feedback loops of Data Initiatives. 74 | ![alt_text](images/CATs_bom_ag.jpeg) 75 | 76 | ### CATs' Data Provenance Record: 77 | **BOM (Bill of Materials)** are CATs' Content-Addressed Data Provenance record for verifiable data processing and 78 | transport on a Mesh network of CATs. BOMs are used as CAT’ input & output that contain CATs’ means of data processing. 79 | * BOMs employ CIDs for location-agnostic retrieval based on its content as well as processes and 80 | [Data Verification](https://en.wikipedia.org/wiki/Data_verification). BOM CIDs can be used to verify the means of processing 81 | data (input, transformation / process, output, infrastructure-as-code (IaC)) they can also make CATs resilient by 82 | enabling re-execution via retrieval. CATs certifies the accuracy of data processing on data products and pipelines by 83 | enabling maintenance and reporting of 84 | [data and process lineage & provenance](https://bi-insider.com/posts/data-lineage-and-data-provenance/) as chains of 85 | evidence using CIDs. 86 | ![alt_text](images/CATs_bom_activity.jpeg) 87 | * CAT Mesh is composed by CATs executing BOMs. 88 | ![alt_text](images/CATs_bom_connect.jpeg) 89 | 90 | ### CAT Mesh: CATs Data Mesh platform with Data Provenance 91 | **CAT Mesh** is a self-serviced Data Mesh platform with Data Provenance. **CAT Nodes** are CAT Mesh peers that enable 92 | workloads to be portable between client-server cloud platforms and p2p mesh network with minimal rework or modification. 93 | 94 | Multi-disciplinary and cross-functional teams can use CAT Nodes to verify and scale distributed computing workloads. 95 | Workloads (CATs) executed by CAT Nodes interface cloud service model (SaaS, PaaS, IaaS) offered by providers such as 96 | AWS, GCP, Azure, etc. on a Mesh Network interconnected by IPFS. 97 | 98 | CAT Nodes are **Data Products** - peer-nodes on a mesh network that encapsulate components (*) to function as a service 99 | providing access to a domain's analytical data as a product; * code, data & metadata, and infrastructure. 100 | 101 | **In the following image:** 102 | * Large ovals in the image above represent **Data Products** servicing each other with Data 103 | * "O" ovals are Operational Data web service endpoints 104 | * "D" ovals are Analytical Data web service endpoints 105 | * Source: [Data Mesh Principles and Logical Architecture](https://martinfowler.com/articles/data-mesh-principles.html) - Zhamak 106 | Dehghani, et al. 107 | ![alt_text](images/data_product_domain.jpeg) 108 | 109 | ## Key Concepts: 110 | * **[Data Verification](https://en.wikipedia.org/wiki/Data_verification)** - a process for which data is checked for 111 | accuracy and inconsistencies before processed 112 | * **[Data Provenance](https://bi-insider.com/posts/data-lineage-and-data-provenance/)** - a means of proving data 113 | lineage using historical records that provide the means 114 | of pipeline re-execution and **[data validation](https://en.wikipedia.org/wiki/Data_validation)** 115 | * **[Data Lineage](https://bi-insider.com/posts/data-lineage-and-data-provenance/)** - reporting of data lifecyle from 116 | source to destination 117 | * **[Distributed Computing](https://en.wikipedia.org/wiki/Distributed_computing)** - typically the concurrent and/or 118 | parallel execution of job tasks distributed to networked computers processing data 119 | * **[Bill of Materials (BOM)](https://en.wikipedia.org/wiki/Bill_of_materials)** - an extensive list of raw materials, 120 | components, and instructions required to construct, manufacture, or repair a product or service 121 | 122 | ### Image Citations: 123 | * **["Illustrated CAT"](https://github.com/BlockScience/cats#illustrated-cat)** 124 | * [Python logo](https://tse4.mm.bing.net/th?id=OIP.ubux1yLT726_fVc3A7WSXgHaHa&pid=Api) 125 | * [SQL logo](https://cdn3.iconfinder.com/data/icons/dompicon-glyph-file-format-2/256/file-sql-format-type-128.png) 126 | * [Terraform logo](https://tse2.mm.bing.net/th?id=OIP.1gAEVon2RF5oko4iWCfftgHaHO&pid=Api) 127 | * [IPFS logo](https://tse1.mm.bing.net/th?id=OIP.BRyW5Tdm5_6VQxCsGr_sQAHaHa&pid=Api) 128 | * [cat image](https://tse1.mm.bing.net/th?id=OIP.xS_itpeyTImMcrcQ_YNsfQHaIu&pid=Api) 129 | * [ray.io logo](https://open-datastudio.io/_images/ray-logo.png) 130 | 131 | -------------------------------------------------------------------------------- /cats/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import dirname, abspath 3 | 4 | CATS_HOME = dirname(dirname(abspath(__file__))) 5 | DATA_HOME = CATS_HOME + '/data' 6 | CWD = os.getcwd() 7 | AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID') 8 | AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY') 9 | -------------------------------------------------------------------------------- /cats/executor/__init__.py: -------------------------------------------------------------------------------- 1 | import json, os 2 | from cats.io.input.structure import Structure 3 | from cats.io.input.function import Function 4 | from cats.service import Service 5 | 6 | 7 | class Executor(Structure): 8 | def __init__(self, 9 | service: Service 10 | ): 11 | self.service: Service = service 12 | self.structure: Structure = Structure(self.service) 13 | self.function: Function = Function(self.service) 14 | self.bom_json_cid: str = self.service.bom_json_cid 15 | self.enhanced_bom, self.bom = self.service.meshClient.getEnhancedBom(self.bom_json_cid) 16 | self.orderCID = None 17 | self.invoiceCID = None 18 | 19 | self.ingress_job_id = None 20 | self.integration_s3_output = None 21 | self.egress_job_id = None 22 | 23 | # self.order = None 24 | # self.structure: Structure = self.order.structure 25 | # self.function: Function = self.order.function 26 | 27 | def execute(self, enhanced_bom=None): 28 | if enhanced_bom is not None: 29 | self.enhanced_bom = enhanced_bom 30 | 31 | self.invoiceCID = self.enhanced_bom['invoice_cid'] 32 | self.orderCID = self.enhanced_bom['invoice']['order_cid'] 33 | 34 | self.structure.redeploy() 35 | self.ingress_job_id, self.integration_s3_output, self.egress_job_id = self.function.execute() 36 | 37 | self.enhanced_bom['function'] = json.loads(self.service.meshClient.cat(self.enhanced_bom['order']['function_cid'])) 38 | self.enhanced_bom['log'] = { 39 | 'ingress_job_id': self.ingress_job_id, 40 | 'integration_output': self.integration_s3_output, 41 | 'egress_job_id': self.egress_job_id 42 | } 43 | self.enhanced_bom['invoice']['data_cid'] = self.service.meshClient.getEgressOutput(job_id=self.egress_job_id) 44 | self.enhanced_bom['log_cid'] = self.service.ipfsClient.add_json(self.enhanced_bom['log']) 45 | 46 | del self.enhanced_bom['bom_json_cid'] 47 | del self.enhanced_bom['init_data_cid'] 48 | 49 | os.remove("bom.json") 50 | os.remove("invoice.json") 51 | os.remove("order.json") 52 | os.remove("bom.car") 53 | os.remove("cat-action-plane-config") 54 | return self.enhanced_bom, None 55 | # return self.invoiceCID 56 | -------------------------------------------------------------------------------- /cats/factory/__init__.py: -------------------------------------------------------------------------------- 1 | from cats.executor import Executor 2 | from cats.service import Service 3 | 4 | 5 | class Factory: 6 | def __init__(self, 7 | service: Service, 8 | order=None 9 | ): 10 | self.Executor = Executor(service=service) 11 | self.order = order 12 | 13 | def initCAT(self, 14 | function_cid, ipfs_uri, 15 | structure_cid=None, structure_filepath=None 16 | ): 17 | return self.service.initBOMcar( 18 | structure_cid=structure_cid, 19 | structure_filepath=structure_filepath, 20 | function_cid=function_cid, 21 | init_data_cid=ipfs_uri 22 | ) 23 | 24 | def produce(self): 25 | return self.Executor 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /cats/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/cats/io/__init__.py -------------------------------------------------------------------------------- /cats/io/input/__init__.py: -------------------------------------------------------------------------------- 1 | from cats.io.input.function import InfraFunction, Processor 2 | from cats.service import Service 3 | 4 | 5 | class Function(InfraFunction): 6 | def __init__(self, 7 | service: Service 8 | ): 9 | self.service: Service = service 10 | self.infraFunc = InfraFunction.__init__(self, self.service) 11 | self.orderCID: str = self.service.orderCID 12 | self.processCID = self.service.processCID 13 | self.process: Processor = None 14 | self.dataCID: str = None 15 | self.invoiceCID: str = None 16 | 17 | def execute(self): 18 | self.process = self.infraFunc.configureProcess() 19 | self.dataCID = self.process.execute(self.processCID) 20 | self.invoiceCID = self.service.createInvoice(self.orderCID, self.dataCID, self.service.seedCID) 21 | return self.invoiceCID 22 | 23 | -------------------------------------------------------------------------------- /cats/io/input/function.py: -------------------------------------------------------------------------------- 1 | from ray.data import Dataset 2 | from cats.service import Service 3 | 4 | 5 | class IO: 6 | def __init__(self, reader, writer): 7 | self.processor: Processor = None 8 | self.input, self.output = None, None 9 | self.function = None 10 | self.Reader = reader 11 | self.Writer = writer 12 | self.ds_in: Dataset = None 13 | self.ds_out: Dataset = None 14 | 15 | def read(self): 16 | self.input = self.processor.ingress_input 17 | self.ds_in = self.Reader(self.input) 18 | 19 | def write(self): 20 | self.output = self.processor.integration_output 21 | self.Writer(self.ds_out, self.output) 22 | 23 | def transform(self, processor): 24 | self.processor = processor 25 | self.read() 26 | self.ds_out = self.processor.process(self.ds_in) 27 | print(self.ds_out.show(limit=1)) 28 | self.write() 29 | return self.ds_out 30 | 31 | def view(self, processor): 32 | self.processor = processor 33 | self.read() 34 | self.ds_out = self.processor.process(self.ds_in) 35 | self.write() 36 | return self.ds_out 37 | 38 | 39 | class Processor: 40 | def __init__(self, service: Service): 41 | self.service = service 42 | self.processCID = self.service.processCID 43 | self.process = self.service.process 44 | self.inDataCID = self.service.enhanced_bom['init_data_cid'] 45 | # self.inDataCID = self.service.enhanced_bom['invoice']['data_cid'] 46 | self.outDataCID = None 47 | self.seedCID = None 48 | 49 | self.ds_in = None 50 | self.ds_out = None 51 | 52 | self.ingress_job_id = None 53 | self.ingress_input = None 54 | self.integration_output = None 55 | # self.integration_job_id = None 56 | self.egress_job_id = None 57 | 58 | def Ingress(self): 59 | self.ingress_job_id = self.service.meshClient.ingress(input=self.inDataCID) 60 | self.service.meshClient.checkStatusOfJob(job_id=self.ingress_job_id) 61 | return self.ingress_job_id 62 | 63 | def Integration(self): 64 | self.ingress_input = self.service.meshClient.integrate(job_id=self.ingress_job_id) 65 | self.integration_output = "s3://" + self.ingress_input.split('//')[-1].rsplit('/outputs/')[0] + "-integrated" 66 | self.process(self.ingress_input, self.integration_output) 67 | return self.integration_output 68 | 69 | def Egress(self): 70 | self.egress_job_id = self.service.meshClient.egress(integration_s3_output=self.integration_output) 71 | self.service.meshClient.checkStatusOfJob(job_id=self.egress_job_id) 72 | return self.egress_job_id 73 | 74 | def execute(self): 75 | self.ingress_job_id = self.Ingress() 76 | self.integration_output = self.Integration() 77 | self.egress_job_id = self.Egress() 78 | return self.ingress_job_id, self.integration_output, self.egress_job_id 79 | 80 | 81 | class InfraFunction(Processor): 82 | def __init__(self, service: Service): 83 | self.service = service 84 | # self.infrafunctionCID = self.service.infrafunctionCID 85 | self.process: Processor = Processor(self.service) 86 | 87 | 88 | class Function(InfraFunction): 89 | def __init__(self, service: Service): 90 | self.service: Service = service 91 | self.infraFunction: InfraFunction = InfraFunction(self.service) 92 | self.processor: Processor = self.infraFunction.process 93 | self.process = self.service.process 94 | self.ingress_job_id = None 95 | self.integration_s3_output = None 96 | self.egress_job_id = None 97 | 98 | def execute(self): 99 | self.ingress_job_id, self.integration_s3_output, self.egress_job_id = self.processor.execute() 100 | return self.ingress_job_id, self.integration_s3_output, self.egress_job_id -------------------------------------------------------------------------------- /cats/io/input/structure.py: -------------------------------------------------------------------------------- 1 | from cats.service import Service 2 | 3 | 4 | class Structure: 5 | def __init__(self, 6 | service: Service = None 7 | ): 8 | self.service: Service = service 9 | self.bom_json_cid = self.service.bom_json_cid 10 | # self.plant: Plant = plant 11 | # self.infraStructure: Terraform = Terraform(working_dir=cats.CWD) 12 | 13 | def destroy(self): 14 | print('Destroy Structure!') 15 | self.service.executeCMD(['terraform', 'destroy', '--auto-approve']) 16 | print() 17 | print() 18 | 19 | def initialize(self): 20 | print('Initialize Structure!') 21 | self.service.executeCMD(['terraform', 'init', '--upgrade']) # self.service.executeCMD(['terraform', 'plan']) 22 | print() 23 | print() 24 | 25 | def apply(self): 26 | print('Apply Structure!') 27 | self.service.executeCMD(['terraform', 'apply', '--auto-approve']) 28 | print() 29 | print() 30 | 31 | def redeploy(self): 32 | print() 33 | print() 34 | print('Deploy Structure!') 35 | self.destroy() 36 | self.initialize() 37 | self.apply() 38 | -------------------------------------------------------------------------------- /cats/io/output/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/cats/io/output/__init__.py -------------------------------------------------------------------------------- /cats/network/__init__.py: -------------------------------------------------------------------------------- 1 | import json, subprocess 2 | from copy import copy, deepcopy 3 | 4 | from cats import CATS_HOME 5 | from cats.network.aws import s3_client 6 | from cats.network.cod import CoD 7 | 8 | 9 | class MeshClient(CoD): 10 | def __init__(self, ipfsClient, filecoinClient=None, awsClient=None): 11 | self.ipfsClient = ipfsClient 12 | self.filecoinClient = filecoinClient 13 | self.awsClient = awsClient 14 | self.context = ... 15 | CoD.__init__(self) 16 | 17 | def initBOMjson(self, 18 | structure_cid: str, structure_filepath: str, function_cid: str, init_data_cid: str, 19 | seed_cid=None 20 | ): 21 | init_invoice = { 22 | 'order_cid': None, 23 | # 'data_cid': None, 24 | 'seed_cid': seed_cid, 25 | } 26 | init_order = { 27 | 'invoice_cid': None, 28 | 'function_cid': function_cid, 29 | 'structure_cid': structure_cid, 30 | 'structure_filepath': structure_filepath 31 | } 32 | 33 | init_invoice_cid = self.ipfsClient.add_json(init_invoice) 34 | init_order['invoice_cid'] = init_invoice_cid 35 | init_order_cid = self.ipfsClient.add_json(init_order) 36 | 37 | invoice = copy(init_invoice) 38 | invoice['order_cid'] = init_order_cid 39 | invoice_cid = self.ipfsClient.add_json(invoice) 40 | 41 | 42 | init_bom = { 43 | 'invoice_cid': invoice_cid, 44 | 'log_cid': None, 45 | 'init_data_cid': init_data_cid 46 | } 47 | init_bom_json_cid = self.ipfsClient.add_json(init_bom) 48 | return init_bom_json_cid 49 | 50 | def initBOMcar(self, structure_cid: str, structure_filepath: str, function_cid: str, init_data_cid: str, init_bom_filename: str, seed_cid=None): 51 | init_bom_json_cid = self.initBOMjson(structure_cid, structure_filepath, function_cid, init_data_cid) 52 | car_bom_cid, init_bom_json_cid = self.convertBOMtoCAR(init_bom_json_cid, init_bom_filename) 53 | return car_bom_cid, init_bom_json_cid 54 | 55 | def linkData(self, cid, subdir=' - outputs/'): 56 | cmd = f"ipfs ls {cid}" 57 | response = subprocess.check_output(cmd.split(' ')).decode() 58 | dirs = response.split('\n') 59 | res = [i for i in dirs if subdir in i] 60 | return res[0].split(' - ')[0] 61 | 62 | def get(self, cid: str, filepath: str, output: str = CATS_HOME): 63 | subprocess.check_output( 64 | f"ipfs get {cid} --output {output}/{filepath}", 65 | stderr=subprocess.STDOUT, 66 | shell=True 67 | ) 68 | return filepath 69 | 70 | def cat(self, cid: str): 71 | return subprocess.check_output(['ipfs', 'cat', cid]).decode() 72 | 73 | def catObj(self, cid: str): 74 | return subprocess.check_output(['ipfs', 'cat', cid]) 75 | 76 | def getCar(self, cid: str, filepath: str): 77 | subprocess.check_output( 78 | f"ipfs dag export {cid} > {filepath}", 79 | stderr=subprocess.STDOUT, 80 | shell=True 81 | ) 82 | 83 | def getBom(self, cid: str, filepath: str): 84 | self.get(cid, filepath) 85 | bom = dict(json.loads(filepath)) 86 | subprocess.check_output( 87 | f"rm {filepath}", 88 | stderr=subprocess.STDOUT, 89 | shell=True 90 | ) 91 | return bom 92 | 93 | def BOMcarToIPFS(self, bom_cid: str, filepath: str): 94 | self.getCar(bom_cid, filepath) 95 | storage_bom_cid = self.ipfsClient.post_upload(filepath) 96 | return storage_bom_cid, bom_cid 97 | 98 | def convertBOMtoCAR(self, bom_cid: str, filepath: str): 99 | self.getCar(bom_cid, filepath) 100 | car_bom_cid = None 101 | try: 102 | car_bom_cid = self.ipfsClient.add(filepath)['Hash'] 103 | except: 104 | for attrs in self.ipfsClient.add(filepath): 105 | if attrs['Name'] == filepath: 106 | print(attrs) 107 | car_bom_cid = attrs['Hash'] 108 | return car_bom_cid, bom_cid 109 | 110 | def getEnhancedBom(self, bom_json_cid: str): 111 | self.get(bom_json_cid, 'bom.json') 112 | bom = json.loads(open('bom.json', 'r').read()) 113 | enhanced_bom = deepcopy(bom) 114 | enhanced_bom['bom_json_cid'] = bom_json_cid 115 | 116 | self.get(bom['invoice_cid'], 'invoice.json') 117 | enhanced_bom['invoice'] = json.loads(open('invoice.json', 'r').read()) 118 | 119 | self.get(enhanced_bom['invoice']['order_cid'], 'order.json') 120 | enhanced_bom['order'] = json.loads(open('order.json', 'r').read()) 121 | 122 | self.get( 123 | enhanced_bom['order']['structure_cid'], 124 | enhanced_bom['order']['structure_filepath'] 125 | ) 126 | return deepcopy(enhanced_bom), bom 127 | 128 | def createInvoice(self, orderCID: str, dataCID: str, seedCID: str): 129 | invoice = {'orderCID': orderCID, 'dataCID': dataCID, 'seedCID': seedCID} 130 | invoice_cid = self.ipfsClient.add_json(invoice) 131 | return invoice_cid 132 | 133 | def cidFile(self, filepath): 134 | file_json = self.ipfsClient.add(filepath) 135 | file_cid = file_json['Hash'] 136 | file_name = file_json['Name'] 137 | return file_cid, file_name 138 | 139 | def cidDir(self, filepath: str): 140 | data = self.ipfsClient.add(filepath) 141 | data_dir = filepath.split('/')[-1] 142 | if type(data) is list: 143 | data_json = list(filter(lambda x: x['Name'] == data_dir, data))[-1] 144 | data_cid = data_json['Hash'] 145 | return data_cid 146 | else: 147 | data_json = data 148 | data_cid = data_json['Hash'] 149 | return data_cid -------------------------------------------------------------------------------- /cats/network/aws.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from cats import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY 3 | 4 | s3_client = boto3.client( 5 | 's3', 6 | region_name='us-east-2', 7 | aws_access_key_id=AWS_ACCESS_KEY_ID, 8 | aws_secret_access_key=AWS_SECRET_ACCESS_KEY 9 | ) -------------------------------------------------------------------------------- /cats/network/cod.py: -------------------------------------------------------------------------------- 1 | import json, glob, os, multiprocessing, shutil, subprocess, tempfile, time 2 | from pprint import pprint 3 | 4 | 5 | class CoD: 6 | def __init__(self): ... 7 | # checkStatusOfJob checks the status of a Bacalhau job 8 | def checkStatusOfJob(self, job_id: str) -> str: 9 | assert len(job_id) > 0 10 | p = subprocess.run( 11 | ["bacalhau", "list", "--output", "json", "--id-filter", job_id], 12 | stdout=subprocess.PIPE, 13 | stderr=subprocess.PIPE, 14 | text=True, 15 | ) 16 | r = self.parseJobStatus(p.stdout) 17 | if r == "": 18 | print("job status is empty! %s" % job_id) 19 | elif r == "Completed": 20 | print("job completed: %s" % job_id) 21 | else: 22 | print("job not completed: %s - %s" % (job_id, r)) 23 | 24 | return r 25 | 26 | def ingress(self, input: str): 27 | publisher = "s3://catstore3/boms/result-{date}-{jobID}/,opt=region=us-east-2" 28 | cmd = f"bacalhau docker run -i {input} -p {publisher} --id-only --wait alpine -- sh -c" 29 | # print(input) 30 | # print(publisher) 31 | # exit() 32 | cmd_list = cmd.split(' ') + ['cp -r /inputs/* /outputs/'] 33 | submit = subprocess.run( 34 | cmd_list, 35 | stdout=subprocess.PIPE, 36 | stderr=subprocess.PIPE, 37 | text=True, 38 | ) 39 | if submit.returncode != 0: 40 | print("failed (%d) job: %s" % (submit.returncode, submit.stdout)) 41 | job_id = submit.stdout.strip() 42 | print("job submitted: %s" % job_id) 43 | return job_id 44 | 45 | def integrate(self, job_id: str): 46 | cmd = f"bacalhau describe {job_id} --json".split(' ') 47 | result = subprocess.run(cmd, stdout=subprocess.PIPE) 48 | job_json = json.loads(result.stdout) 49 | executions = job_json["State"]["Executions"] 50 | execution = list(filter(lambda d: d['State'] in ['Completed'], executions)).pop() 51 | ingress_bucket = execution['PublishedResults']['S3']['Bucket'] 52 | ingress_key = execution['PublishedResults']['S3']['Key'].rstrip('/') 53 | ingress_s3_input = f"s3://{ingress_bucket}/{ingress_key}/outputs/" 54 | return ingress_s3_input 55 | 56 | def getEgressOutput(self, job_id: str): 57 | cmd = f"bacalhau describe {job_id} --json".split(' ') 58 | result = subprocess.run(cmd, stdout=subprocess.PIPE) 59 | job_json = json.loads(result.stdout) 60 | executions = job_json["State"]["Executions"] 61 | execution = list(filter(lambda d: d['State'] in ['Completed'], executions)).pop() 62 | data_cid = execution['PublishedResults']['CID'] 63 | return data_cid 64 | 65 | def egress(self, integration_s3_output: str): 66 | input = f"{integration_s3_output}/,opt=region=us-east-2" 67 | cmd = f"bacalhau docker run -i {input} --id-only --wait alpine -- sh -c" 68 | cmd_list = cmd.split(' ') + ['cp -r /inputs/* /outputs/'] 69 | submit = subprocess.run( 70 | cmd_list, 71 | stdout=subprocess.PIPE, 72 | stderr=subprocess.PIPE, 73 | text=True, 74 | ) 75 | if submit.returncode != 0: 76 | print("failed (%d) job: %s" % (submit.returncode, submit.stdout)) 77 | job_id = submit.stdout.strip() 78 | print("job submitted: %s" % job_id) 79 | 80 | return job_id 81 | 82 | # submitJob submits a job to the Bacalhau network 83 | def submitJob(self, cid: str) -> str: 84 | assert len(cid) > 0 85 | p = subprocess.run( 86 | [ 87 | "bacalhau", 88 | "docker", 89 | "run", 90 | "--id-only", 91 | "--wait=false", 92 | "--input", 93 | "ipfs://" + cid + ":/inputs/data.tar.gz", 94 | "ghcr.io/bacalhau-project/examples/blockchain-etl:0.0.6", 95 | ], 96 | stdout=subprocess.PIPE, 97 | stderr=subprocess.PIPE, 98 | text=True, 99 | ) 100 | if p.returncode != 0: 101 | print("failed (%d) job: %s" % (p.returncode, p.stdout)) 102 | job_id = p.stdout.strip() 103 | print("job submitted: %s" % job_id) 104 | 105 | return job_id 106 | 107 | # getResultsFromJob gets the results from a Bacalhau job 108 | def getResultsFromJob(self, job_id: str) -> str: 109 | assert len(job_id) > 0 110 | temp_dir = tempfile.mkdtemp() 111 | print("getting results for job: %s" % job_id) 112 | for i in range(0, 5): # try 5 times 113 | p = subprocess.run( 114 | [ 115 | "bacalhau", 116 | "get", 117 | "--output-dir", 118 | temp_dir, 119 | job_id, 120 | ], 121 | stdout=subprocess.PIPE, 122 | stderr=subprocess.PIPE, 123 | text=True, 124 | ) 125 | if p.returncode == 0: 126 | break 127 | else: 128 | print("failed (exit %d) to get job: %s" % (p.returncode, p.stdout)) 129 | 130 | return temp_dir 131 | 132 | # parseJobStatus parses the status of a Bacalhau job 133 | def parseJobStatus(self, result: str) -> str: 134 | if len(result) == 0: 135 | return "" 136 | r = json.loads(result) 137 | if len(r) > 0: 138 | return r[0]["State"]["State"] 139 | return "" 140 | 141 | # parseHashes splits lines from a text file into a list 142 | def parseHashes(self, filename: str) -> list: 143 | assert os.path.exists(filename) 144 | with open(filename, "r") as f: 145 | hashes = f.read().splitlines() 146 | return hashes 147 | 148 | def parseHashesFromFile(self, file: str, num_files: int = -1): 149 | # Use multiprocessing to work in parallel 150 | count = multiprocessing.cpu_count() 151 | with multiprocessing.Pool(processes=count) as pool: 152 | hashes = self.parseHashes(file)[:num_files] 153 | print("submitting %d jobs" % len(hashes)) 154 | job_ids = pool.map(self.submitJob, hashes) 155 | assert len(job_ids) == len(hashes) 156 | 157 | print("waiting for jobs to complete...") 158 | while True: 159 | job_statuses = pool.map(self.checkStatusOfJob, job_ids) 160 | total_finished = sum(map(lambda x: x == "Completed", job_statuses)) 161 | if total_finished >= len(job_ids): 162 | break 163 | print("%d/%d jobs completed" % (total_finished, len(job_ids))) 164 | time.sleep(2) 165 | 166 | print("all jobs completed, saving results...") 167 | results = pool.map(self.getResultsFromJob, job_ids) 168 | print("finished saving results") 169 | 170 | # Do something with the results 171 | shutil.rmtree("../../results", ignore_errors=True) 172 | os.makedirs("../../results", exist_ok=True) 173 | for r in results: 174 | path = os.path.join(r, "outputs", "*.csv") 175 | csv_file = glob.glob(path) 176 | for f in csv_file: 177 | print("moving %s to results" % f) 178 | shutil.move(f, "../../results") 179 | -------------------------------------------------------------------------------- /cats/node.py: -------------------------------------------------------------------------------- 1 | import json 2 | import ipfsapi as ipfsApi 3 | from cats.network import MeshClient 4 | from cats.service import Service 5 | from cats.factory import Factory 6 | from flask import Flask, request, jsonify 7 | 8 | cat = Flask(__name__) 9 | 10 | node_service = Service( 11 | meshClient=MeshClient( 12 | ipfsClient=ipfsApi.Client('127.0.0.1', 5001) 13 | ) 14 | ) 15 | 16 | 17 | def initFactory(order_request, ipfs_uri): 18 | # if cod_out is False: 19 | # ipfs_uri = f'ipfs://{order_request["invoice"]["data_cid"]}/*csv' 20 | # elif cod_out is True: 21 | # ipfs_uri = f'ipfs://{order_request["invoice"]["data_cid"]}/output/*csv' 22 | node_service.initBOMcar( 23 | structure_cid=order_request['order']['structure_cid'], 24 | structure_filepath=order_request['order']['structure_filepath'], 25 | function_cid=order_request['order']['function_cid'], 26 | init_data_cid=ipfs_uri 27 | ) 28 | catFactory = Factory(node_service) 29 | return catFactory, order_request 30 | 31 | 32 | def execute(catFactory, order_request): 33 | executor = catFactory.produce() 34 | enhanced_bom, _ = executor.execute() 35 | 36 | invoice = {} 37 | enhanced_bom['invoice']['order_cid'] = node_service.ipfsClient.add_str( 38 | json.dumps(order_request['order']) 39 | ) 40 | invoice['invoice_cid'] = node_service.ipfsClient.add_str( 41 | json.dumps(enhanced_bom['invoice']) 42 | ) 43 | invoice['invoice'] = enhanced_bom['invoice'] 44 | 45 | bom = { 46 | 'log_cid': enhanced_bom['log_cid'], 47 | 'invoice_cid': invoice['invoice_cid'] 48 | } 49 | bom_response = { 50 | 'bom': bom, 51 | 'bom_cid': node_service.ipfsClient.add_str(json.dumps(bom)) 52 | } 53 | return bom_response 54 | 55 | 56 | @cat.route('/cat/node/init', methods=['POST']) 57 | def execute_init_cat(): 58 | try: 59 | order_request = request.get_json() 60 | order_request["order"] = json.loads(node_service.meshClient.cat(order_request["order_cid"])) 61 | order_request['invoice'] = json.loads(node_service.meshClient.cat(order_request['order']['invoice_cid'])) 62 | 63 | # IPFS checks 64 | # if 'bom_cid' not in bom: 65 | # return jsonify({'error': 'CID not provided'}), 400 66 | 67 | 68 | ipfs_uri = f'ipfs://{order_request["invoice"]["data_cid"]}/*csv' 69 | catFactory, updated_order_request = initFactory(order_request, ipfs_uri) 70 | bom_response = execute(catFactory, updated_order_request) 71 | 72 | # Return BOM 73 | return jsonify(bom_response) 74 | 75 | except Exception as e: 76 | return jsonify({'error': str(e)}) 77 | 78 | 79 | @cat.route('/cat/node/link', methods=['POST']) 80 | def execute_link_cat(): 81 | try: 82 | order_request = request.get_json() 83 | order_request["order"] = json.loads(node_service.meshClient.cat(order_request["order_cid"])) 84 | order_request['invoice'] = json.loads(node_service.meshClient.cat(order_request['order']['invoice_cid'])) 85 | 86 | prev_data_cid = order_request['invoice']['data_cid'] 87 | data_cid = node_service.meshClient.linkData(prev_data_cid) 88 | ipfs_uri = f'ipfs://{data_cid}/*csv' 89 | catFactory, updated_order_request = initFactory(order_request, ipfs_uri) 90 | bom_response = execute(catFactory, updated_order_request) 91 | 92 | # Return BOM 93 | return jsonify(bom_response) 94 | 95 | except Exception as e: 96 | return jsonify({'error': str(e)}) 97 | 98 | 99 | if __name__ == '__main__': 100 | # Run the Flask application on http://127.0.0.1:5000/ 101 | cat.run(debug=True) -------------------------------------------------------------------------------- /cats/service/__init__.py: -------------------------------------------------------------------------------- 1 | import glob, json, os, pickle, subprocess 2 | from copy import deepcopy 3 | from pprint import pprint 4 | import pandas as pd 5 | 6 | from cats import CATS_HOME 7 | from cats.service.utils import executeCMD 8 | import ipfsapi as ipfsApi 9 | from cats.network import MeshClient 10 | 11 | 12 | class Service: 13 | def __init__(self, 14 | meshClient: MeshClient 15 | ): 16 | self.meshClient: MeshClient = meshClient 17 | self.ipfsClient: ipfsApi = self.meshClient.ipfsClient 18 | self.executeCMD = executeCMD 19 | 20 | self.init_bom_json_cid = None 21 | self.bom_json_cid = None 22 | self.init_bom_car_cid = None 23 | self.enhanced_init_bom = None 24 | self.enhanced_bom = None 25 | # self.enhanced_init_bom = None 26 | 27 | self.ingress_subproc_cid = None 28 | self.integration_subproc_cid = None 29 | self.egress_subproc_cid = None 30 | 31 | self.ingress_subproc = None 32 | self.integration_subproc = None 33 | self.egress_subproc = None 34 | 35 | self.orderCID = None 36 | self.dataCID = None 37 | self.functionCID = None 38 | self.processCID = None 39 | self.order = None 40 | self.process = None 41 | 42 | def cid_to_pandasDF(self, cid, download_dir, format='*.csv', read_dir='/outputs', parrent_dir=CATS_HOME): 43 | path = f'{parrent_dir}/{download_dir}' 44 | os.system(f"rm -rf {path}") 45 | self.meshClient.get(cid, download_dir, parrent_dir) 46 | 47 | # Get the files from the path provided 48 | files = glob.glob(os.path.join(f"{path}{read_dir}", format)) 49 | dfs = list(pd.read_csv(f).assign(filename=f) for f in files) 50 | df = None 51 | for dfx in dfs: 52 | if df is None: 53 | df = dfx 54 | else: 55 | df = pd.concat([df, dfx], ignore_index=True) 56 | return df 57 | 58 | def initBOMcar(self, 59 | function_cid, init_data_cid, init_bom_filename='bom.car', 60 | structure_cid=None, structure_filepath=None 61 | ): 62 | self.init_bom_car_cid, self.init_bom_json_cid = self.meshClient.initBOMcar( 63 | # structure_path=self.MeshClient.g, 64 | structure_cid=structure_cid, 65 | structure_filepath=structure_filepath, 66 | function_cid=function_cid, 67 | init_data_cid=init_data_cid, 68 | init_bom_filename=init_bom_filename 69 | ) 70 | self.enhanced_bom, init_bom = self.meshClient.getEnhancedBom(bom_json_cid=self.init_bom_json_cid) 71 | 72 | self.functionCID = self.enhanced_bom['order']['function_cid'] 73 | function_dict = json.loads(self.meshClient.cat(self.functionCID)) 74 | self.processCID = function_dict['process_cid'] 75 | self.process = pickle.loads(self.meshClient.catObj(self.processCID)) 76 | 77 | self.order_cid = self.enhanced_bom['invoice']['order_cid'] 78 | self.init_bom_json_cid = self.enhanced_bom['bom_json_cid'] 79 | self.bom_json_cid = self.init_bom_json_cid 80 | return self.init_bom_car_cid, self.init_bom_json_cid 81 | 82 | def catSubmit(self, bom): 83 | order = json.loads(self.meshClient.cat(bom["order_cid"])) 84 | print("Order:") 85 | print() 86 | pprint(order) 87 | print() 88 | print() 89 | 90 | ppost = lambda args, endpoint: \ 91 | f'curl -X POST -H "Content-Type: application/json" -d \\\n\'{json.dumps(**args)}\' {endpoint}' 92 | post = lambda args, endpoint: \ 93 | 'curl -X POST -H "Content-Type: application/json" -d \'' + json.dumps(**args) + f'\' {endpoint}' 94 | 95 | post_cmd = post({'obj': bom}, order["endpoint"]) 96 | print(ppost({'obj': bom, 'indent': 4}, order["endpoint"])) 97 | print() 98 | print() 99 | response_str = subprocess.check_output(post_cmd, shell=True) 100 | output_bom = json.loads(response_str) 101 | 102 | output_bom['POST'] = post_cmd 103 | return output_bom 104 | 105 | def flatten_bom(self, bom_response): 106 | invoice = json.loads( 107 | self.meshClient.cat(bom_response["bom"]["invoice_cid"]) 108 | ) 109 | invoice['order'] = json.loads( 110 | self.meshClient.cat(invoice['order_cid']), 111 | ) 112 | invoice['order']['flat'] = { 113 | 'function': json.loads(self.meshClient.cat(invoice['order']["function_cid"])), 114 | 'invoice': json.loads(self.meshClient.cat(invoice['order']["invoice_cid"])) 115 | } 116 | bom_response["flat_bom"] = { 117 | 'invoice': invoice, 118 | 'log': json.loads( 119 | self.meshClient.cat(bom_response["bom"]["log_cid"]) 120 | ) 121 | } 122 | return bom_response 123 | 124 | def create_order_request(self, 125 | process_obj, data_dirpath, structure_filepath, 126 | endpoint='http://127.0.0.1:5000/cat/node/execute' 127 | ): 128 | structure_cid, structure_name = self.meshClient.cidFile(structure_filepath) 129 | function = { 130 | 'process_cid': self.ipfsClient.add_pyobj(process_obj), 131 | 'infrafunction_cid': None 132 | } 133 | invoice = { 134 | "data_cid": self.meshClient.cidDir(data_dirpath) 135 | } 136 | order = { 137 | "function_cid": self.ipfsClient.add_str(json.dumps(function)), 138 | "structure_cid": structure_cid, 139 | "invoice_cid": self.ipfsClient.add_str(json.dumps(invoice)), 140 | "structure_filepath": structure_name, 141 | "endpoint": endpoint 142 | } 143 | self.order = { 144 | 'order_cid': self.ipfsClient.add_str(json.dumps(order)) 145 | } 146 | return self.order 147 | 148 | def linkProcess(self, cat_response, process_obj): 149 | flattened_bom = self.flatten_bom(cat_response) 150 | flat_bom = deepcopy(flattened_bom['flat_bom']) 151 | 152 | function = { 153 | 'process_cid': self.ipfsClient.add_pyobj(process_obj), 154 | 'infrafunction': None 155 | } 156 | 157 | invoice = flat_bom['invoice'] 158 | input_invoice = {'data_cid': invoice['data_cid']} 159 | new_function_cid = self.ipfsClient.add_str(json.dumps(function)) 160 | new_invoice_cid = self.ipfsClient.add_str(json.dumps(input_invoice)) 161 | 162 | order = invoice['order'] 163 | order['function_cid'] = new_function_cid 164 | order['invoice_cid'] = new_invoice_cid 165 | del order['flat'] 166 | order['endpoint'] = 'http://127.0.0.1:5000/cat/node/link' 167 | 168 | order_request = {'order_cid': self.ipfsClient.add_str(json.dumps(order))} 169 | return order_request 170 | -------------------------------------------------------------------------------- /cats/service/utils.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | 4 | def executeCMD(cmd): 5 | def execute(x): 6 | popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True) 7 | for stdout_line in iter(popen.stdout.readline, ""): 8 | yield stdout_line 9 | popen.stdout.close() 10 | return_code = popen.wait() 11 | if return_code: 12 | raise subprocess.CalledProcessError(return_code, x) 13 | 14 | for path in execute(cmd): 15 | print(path, end="") 16 | -------------------------------------------------------------------------------- /cats/utils/__init__.py: -------------------------------------------------------------------------------- 1 | class Dict2Class(object): 2 | def __init__(self, my_dict): 3 | for key in my_dict: 4 | setattr(self, key, my_dict[key]) 5 | -------------------------------------------------------------------------------- /cats_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "38958585-cae4-422b-98ae-79fd203f9f19", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stderr", 11 | "output_type": "stream", 12 | "text": [ 13 | "/tmp/ipykernel_124287/1461130463.py:2: FutureWarning: The `ipfsapi` library is deprecated and will stop receiving updates on the 31.12.2019! If you are on Python 3.5+ please enable and fix all Python deprecation warnings (CPython flag `-Wd`) and switch to the new `ipfshttpclient` library name. Python 2.7 and 3.4 will not be supported by the new library, so please upgrade.\n", 14 | " import ipfsapi as ipfsApi\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "from pprint import pprint\n", 20 | "import ipfsapi as ipfsApi\n", 21 | "from cats.network import MeshClient\n", 22 | "from cats.service import Service\n", 23 | "from process import *" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "id": "5a10b583-b05a-41f0-af3c-bf36bc6dc51b", 29 | "metadata": {}, 30 | "source": [ 31 | "### Execute Initial CAT0:" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "2519f361-95d9-41bd-abfb-c677e4ac9047", 37 | "metadata": {}, 38 | "source": [ 39 | "##### Instantiate CAT Mesh Service:" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "id": "579b6eef-b426-4279-aa62-19a31ec65219", 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "service = Service(\n", 50 | " meshClient=MeshClient(\n", 51 | " ipfsClient=ipfsApi.Client('127.0.0.1', 5001)\n", 52 | " )\n", 53 | ")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "id": "8724e0fa-f0d4-44ac-b504-af812e800e98", 59 | "metadata": {}, 60 | "source": [ 61 | "##### Compose Initial CAT Order request for CAT Node" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 3, 67 | "id": "e877dffa-d3c6-4aab-a9a4-0c672ce50d3e", 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "{'order_cid': 'QmSmGTZTUaFyhxdfp8uLrAWPvkREmRq1vxbnPYEVTvw2Xu'}\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "cat_order_request_0 = service.create_order_request(\n", 80 | " process_obj=process_0,\n", 81 | " data_dirpath='data',\n", 82 | " structure_filepath='main.tf',\n", 83 | " endpoint='http://127.0.0.1:5000/cat/node/init'\n", 84 | ")\n", 85 | "pprint(cat_order_request_0)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "id": "c24bbef4-b4df-44d0-b6d5-52553552dcb8", 91 | "metadata": {}, 92 | "source": [ 93 | "##### Submit Initial CAT Order request to CAT Node" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 4, 99 | "id": "09d5b3d3-0f3f-4a45-b5d3-22981d807db6", 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "Order:\n", 107 | "\n", 108 | "{'endpoint': 'http://127.0.0.1:5000/cat/node/init',\n", 109 | " 'function_cid': 'QmPMniqGmZ28QnDwa2UJqkcWjkkaehkEKcXD47kLUVWaEd',\n", 110 | " 'invoice_cid': 'QmQnZ1DR9CMjcyzH5RpD9mMcg39AMyjf2mRNEwj8tPHBMJ',\n", 111 | " 'structure_cid': 'QmYyFroE2Nw1BVg3D1MQdeZFrMAn9XWYHgWueMUKaRGops',\n", 112 | " 'structure_filepath': 'main.tf'}\n", 113 | "\n", 114 | "\n", 115 | "curl -X POST -H \"Content-Type: application/json\" -d \\\n", 116 | "'{\n", 117 | " \"order_cid\": \"QmSmGTZTUaFyhxdfp8uLrAWPvkREmRq1vxbnPYEVTvw2Xu\"\n", 118 | "}' http://127.0.0.1:5000/cat/node/init\n", 119 | "\n", 120 | "\n" 121 | ] 122 | }, 123 | { 124 | "name": "stderr", 125 | "output_type": "stream", 126 | "text": [ 127 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 128 | " Dload Upload Total Spent Left Speed\n", 129 | "100 278 100 215 0 63 2 0 0:01:47 0:01:33 0:00:14 62\n" 130 | ] 131 | }, 132 | { 133 | "name": "stdout", 134 | "output_type": "stream", 135 | "text": [ 136 | "{'POST': 'curl -X POST -H \"Content-Type: application/json\" -d \\'{\"order_cid\": '\n", 137 | " '\"QmSmGTZTUaFyhxdfp8uLrAWPvkREmRq1vxbnPYEVTvw2Xu\"}\\' '\n", 138 | " 'http://127.0.0.1:5000/cat/node/init',\n", 139 | " 'bom': {'invoice_cid': 'QmXUaNBEZR3BgRvosxuMYzb3G71piW7MXt1jTpzQxGgKzt',\n", 140 | " 'log_cid': 'QmRvJaPH2KSTTg4WcQpCopqmUBMHs4NoFHLDafNR24iksg'},\n", 141 | " 'bom_cid': 'QmdFDNHNdHxG798sgtX9a6ALdYDffa7dp5TLiD3WLhcfh4',\n", 142 | " 'flat_bom': {'invoice': {'data_cid': 'QmdCLefpJfvbChyu2mjZmG37wFucgHd2sMPDaZzcsQvrZw',\n", 143 | " 'order': {'endpoint': 'http://127.0.0.1:5000/cat/node/init',\n", 144 | " 'flat': {'function': {'infrafunction_cid': None,\n", 145 | " 'process_cid': 'QmXqKpSVBuoZD2QG8hT3tTF8ai6cHV8iTHt2tF5sM3scmc'},\n", 146 | " 'invoice': {'data_cid': 'QmQpyDtFsz2JLNTSrPRzLs1tzPrfBxYbCw6kehVWqUXLVN'}},\n", 147 | " 'function_cid': 'QmPMniqGmZ28QnDwa2UJqkcWjkkaehkEKcXD47kLUVWaEd',\n", 148 | " 'invoice_cid': 'QmQnZ1DR9CMjcyzH5RpD9mMcg39AMyjf2mRNEwj8tPHBMJ',\n", 149 | " 'structure_cid': 'QmYyFroE2Nw1BVg3D1MQdeZFrMAn9XWYHgWueMUKaRGops',\n", 150 | " 'structure_filepath': 'main.tf'},\n", 151 | " 'order_cid': 'QmSmGTZTUaFyhxdfp8uLrAWPvkREmRq1vxbnPYEVTvw2Xu',\n", 152 | " 'seed_cid': None},\n", 153 | " 'log': {'egress_job_id': '13f8318f-a69e-41cb-8117-176539a82744',\n", 154 | " 'ingress_job_id': '69f93ce2-df7e-4a27-a05e-1ec13c5813a5',\n", 155 | " 'integration_output': 's3://catstore3/boms/result-20240123-69f93ce2-df7e-4a27-a05e-1ec13c5813a5-integrated'}}}\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "cat_invoiced_response_0 = service.catSubmit(cat_order_request_0)\n", 161 | "# pprint(cat_invoiced_response_0)\n", 162 | "flat_cat_invoiced_response_0 = service.flatten_bom(cat_invoiced_response_0)\n", 163 | "pprint(flat_cat_invoiced_response_0)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "id": "d8fc148d-daf4-4657-91fa-1d0a32ce218f", 169 | "metadata": {}, 170 | "source": [ 171 | "### Execute CAT1:\n", 172 | "Compose a modified CAT Order request that executes CAT1 with CAT0's Structure a new Process" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 5, 178 | "id": "b4ef2988-34fa-4b25-8975-360be65f0b35", 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "name": "stdout", 183 | "output_type": "stream", 184 | "text": [ 185 | "{'order_cid': 'QmNU5EAmWNDc7U3bjZ8X2rzjD3iN83KXcarsvnyk8AXA9o'}\n" 186 | ] 187 | } 188 | ], 189 | "source": [ 190 | "cat_order_request_1 = service.linkProcess(cat_invoiced_response_0, process_1)\n", 191 | "pprint(cat_order_request_1)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "id": "50b9f1e1-3704-471f-8ea9-0a58341a17ea", 197 | "metadata": {}, 198 | "source": [ 199 | "##### Submit modified CAT Order request to CAT Node" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 6, 205 | "id": "695a1368-5120-409d-8216-602cb82bd052", 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "Order:\n", 213 | "\n", 214 | "{'endpoint': 'http://127.0.0.1:5000/cat/node/link',\n", 215 | " 'function_cid': 'QmXoU3V8JWvHm12rbsm4czhQAS1k47ESY5Uto1Pqekb87q',\n", 216 | " 'invoice_cid': 'QmU62uGkaqZyrvyAqAggiDDzmo7aTX745gb6kbzkk5R66y',\n", 217 | " 'structure_cid': 'QmYyFroE2Nw1BVg3D1MQdeZFrMAn9XWYHgWueMUKaRGops',\n", 218 | " 'structure_filepath': 'main.tf'}\n", 219 | "\n", 220 | "\n", 221 | "curl -X POST -H \"Content-Type: application/json\" -d \\\n", 222 | "'{\n", 223 | " \"order_cid\": \"QmNU5EAmWNDc7U3bjZ8X2rzjD3iN83KXcarsvnyk8AXA9o\"\n", 224 | "}' http://127.0.0.1:5000/cat/node/link\n", 225 | "\n", 226 | "\n" 227 | ] 228 | }, 229 | { 230 | "name": "stderr", 231 | "output_type": "stream", 232 | "text": [ 233 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 234 | " Dload Upload Total Spent Left Speed\n", 235 | "100 63 0 0 0 63 0 0 --:--:-- 0:02:35 --:--:-- 0" 236 | ] 237 | }, 238 | { 239 | "name": "stdout", 240 | "output_type": "stream", 241 | "text": [ 242 | "{'POST': 'curl -X POST -H \"Content-Type: application/json\" -d \\'{\"order_cid\": '\n", 243 | " '\"QmNU5EAmWNDc7U3bjZ8X2rzjD3iN83KXcarsvnyk8AXA9o\"}\\' '\n", 244 | " 'http://127.0.0.1:5000/cat/node/link',\n", 245 | " 'bom': {'invoice_cid': 'Qma8s7kaqqdSw6tLkV4e1KZXAT8UPnqzU5MjD49pZUigd9',\n", 246 | " 'log_cid': 'QmeyyQVpGhPwbBk588MZjxMGEbfTdSBjXbn6wHyDrYXnwp'},\n", 247 | " 'bom_cid': 'Qmc58h4o1V8Dv7Z8goXUNKMTU3MF2YTG4V7Ug2VCu2hX3H',\n", 248 | " 'flat_bom': {'invoice': {'data_cid': 'QmeVuroH6jX8j8jC2wE2suMwVVN5YicK8q9FGYEigrHZ9J',\n", 249 | " 'order': {'endpoint': 'http://127.0.0.1:5000/cat/node/link',\n", 250 | " 'flat': {'function': {'infrafunction': None,\n", 251 | " 'process_cid': 'QmXC2XLDePUxCoPtETPejd5uwF3BcaoQNAn2vRU4a4BK7z'},\n", 252 | " 'invoice': {'data_cid': 'QmdCLefpJfvbChyu2mjZmG37wFucgHd2sMPDaZzcsQvrZw'}},\n", 253 | " 'function_cid': 'QmXoU3V8JWvHm12rbsm4czhQAS1k47ESY5Uto1Pqekb87q',\n", 254 | " 'invoice_cid': 'QmU62uGkaqZyrvyAqAggiDDzmo7aTX745gb6kbzkk5R66y',\n", 255 | " 'structure_cid': 'QmYyFroE2Nw1BVg3D1MQdeZFrMAn9XWYHgWueMUKaRGops',\n", 256 | " 'structure_filepath': 'main.tf'},\n", 257 | " 'order_cid': 'QmNU5EAmWNDc7U3bjZ8X2rzjD3iN83KXcarsvnyk8AXA9o',\n", 258 | " 'seed_cid': None},\n", 259 | " 'log': {'egress_job_id': '2c01bad7-00df-4220-9755-0a022112d3cd',\n", 260 | " 'ingress_job_id': 'a1983925-6bdc-4dfc-9d42-d4fb1d23e8d4',\n", 261 | " 'integration_output': 's3://catstore3/boms/result-20240123-a1983925-6bdc-4dfc-9d42-d4fb1d23e8d4-integrated'}}}\n" 262 | ] 263 | }, 264 | { 265 | "name": "stderr", 266 | "output_type": "stream", 267 | "text": [ 268 | "100 278 100 215 0 63 1 0 0:03:35 0:02:35 0:01:00 66\n" 269 | ] 270 | } 271 | ], 272 | "source": [ 273 | "cat_invoiced_response_1 = service.catSubmit(cat_order_request_1)\n", 274 | "# pprint(cat_invoiced_response_1)\n", 275 | "flat_cat_invoiced_response_1 = service.flatten_bom(cat_invoiced_response_1)\n", 276 | "pprint(flat_cat_invoiced_response_1)" 277 | ] 278 | } 279 | ], 280 | "metadata": { 281 | "kernelspec": { 282 | "display_name": "Python 3 (ipykernel)", 283 | "language": "python", 284 | "name": "python3" 285 | }, 286 | "language_info": { 287 | "codemirror_mode": { 288 | "name": "ipython", 289 | "version": 3 290 | }, 291 | "file_extension": ".py", 292 | "mimetype": "text/x-python", 293 | "name": "python", 294 | "nbconvert_exporter": "python", 295 | "pygments_lexer": "ipython3", 296 | "version": "3.10.13" 297 | } 298 | }, 299 | "nbformat": 4, 300 | "nbformat_minor": 5 301 | } 302 | -------------------------------------------------------------------------------- /data/iris.csv: -------------------------------------------------------------------------------- 1 | sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target 2 | 5.1,3.5,1.4,0.2,0 3 | 4.9,3.0,1.4,0.2,0 4 | 4.7,3.2,1.3,0.2,0 5 | 4.6,3.1,1.5,0.2,0 6 | 5.0,3.6,1.4,0.2,0 7 | 5.4,3.9,1.7,0.4,0 8 | 4.6,3.4,1.4,0.3,0 9 | 5.0,3.4,1.5,0.2,0 10 | 4.4,2.9,1.4,0.2,0 11 | 4.9,3.1,1.5,0.1,0 12 | 5.4,3.7,1.5,0.2,0 13 | 4.8,3.4,1.6,0.2,0 14 | 4.8,3.0,1.4,0.1,0 15 | 4.3,3.0,1.1,0.1,0 16 | 5.8,4.0,1.2,0.2,0 17 | 5.7,4.4,1.5,0.4,0 18 | 5.4,3.9,1.3,0.4,0 19 | 5.1,3.5,1.4,0.3,0 20 | 5.7,3.8,1.7,0.3,0 21 | 5.1,3.8,1.5,0.3,0 22 | 5.4,3.4,1.7,0.2,0 23 | 5.1,3.7,1.5,0.4,0 24 | 4.6,3.6,1.0,0.2,0 25 | 5.1,3.3,1.7,0.5,0 26 | 4.8,3.4,1.9,0.2,0 27 | 5.0,3.0,1.6,0.2,0 28 | 5.0,3.4,1.6,0.4,0 29 | 5.2,3.5,1.5,0.2,0 30 | 5.2,3.4,1.4,0.2,0 31 | 4.7,3.2,1.6,0.2,0 32 | 4.8,3.1,1.6,0.2,0 33 | 5.4,3.4,1.5,0.4,0 34 | 5.2,4.1,1.5,0.1,0 35 | 5.5,4.2,1.4,0.2,0 36 | 4.9,3.1,1.5,0.2,0 37 | 5.0,3.2,1.2,0.2,0 38 | 5.5,3.5,1.3,0.2,0 39 | 4.9,3.6,1.4,0.1,0 40 | 4.4,3.0,1.3,0.2,0 41 | 5.1,3.4,1.5,0.2,0 42 | 5.0,3.5,1.3,0.3,0 43 | 4.5,2.3,1.3,0.3,0 44 | 4.4,3.2,1.3,0.2,0 45 | 5.0,3.5,1.6,0.6,0 46 | 5.1,3.8,1.9,0.4,0 47 | 4.8,3.0,1.4,0.3,0 48 | 5.1,3.8,1.6,0.2,0 49 | 4.6,3.2,1.4,0.2,0 50 | 5.3,3.7,1.5,0.2,0 51 | 5.0,3.3,1.4,0.2,0 52 | 7.0,3.2,4.7,1.4,1 53 | 6.4,3.2,4.5,1.5,1 54 | 6.9,3.1,4.9,1.5,1 55 | 5.5,2.3,4.0,1.3,1 56 | 6.5,2.8,4.6,1.5,1 57 | 5.7,2.8,4.5,1.3,1 58 | 6.3,3.3,4.7,1.6,1 59 | 4.9,2.4,3.3,1.0,1 60 | 6.6,2.9,4.6,1.3,1 61 | 5.2,2.7,3.9,1.4,1 62 | 5.0,2.0,3.5,1.0,1 63 | 5.9,3.0,4.2,1.5,1 64 | 6.0,2.2,4.0,1.0,1 65 | 6.1,2.9,4.7,1.4,1 66 | 5.6,2.9,3.6,1.3,1 67 | 6.7,3.1,4.4,1.4,1 68 | 5.6,3.0,4.5,1.5,1 69 | 5.8,2.7,4.1,1.0,1 70 | 6.2,2.2,4.5,1.5,1 71 | 5.6,2.5,3.9,1.1,1 72 | 5.9,3.2,4.8,1.8,1 73 | 6.1,2.8,4.0,1.3,1 74 | 6.3,2.5,4.9,1.5,1 75 | 6.1,2.8,4.7,1.2,1 76 | 6.4,2.9,4.3,1.3,1 77 | 6.6,3.0,4.4,1.4,1 78 | 6.8,2.8,4.8,1.4,1 79 | 6.7,3.0,5.0,1.7,1 80 | 6.0,2.9,4.5,1.5,1 81 | 5.7,2.6,3.5,1.0,1 82 | 5.5,2.4,3.8,1.1,1 83 | 5.5,2.4,3.7,1.0,1 84 | 5.8,2.7,3.9,1.2,1 85 | 6.0,2.7,5.1,1.6,1 86 | 5.4,3.0,4.5,1.5,1 87 | 6.0,3.4,4.5,1.6,1 88 | 6.7,3.1,4.7,1.5,1 89 | 6.3,2.3,4.4,1.3,1 90 | 5.6,3.0,4.1,1.3,1 91 | 5.5,2.5,4.0,1.3,1 92 | 5.5,2.6,4.4,1.2,1 93 | 6.1,3.0,4.6,1.4,1 94 | 5.8,2.6,4.0,1.2,1 95 | 5.0,2.3,3.3,1.0,1 96 | 5.6,2.7,4.2,1.3,1 97 | 5.7,3.0,4.2,1.2,1 98 | 5.7,2.9,4.2,1.3,1 99 | 6.2,2.9,4.3,1.3,1 100 | 5.1,2.5,3.0,1.1,1 101 | 5.7,2.8,4.1,1.3,1 102 | 6.3,3.3,6.0,2.5,2 103 | 5.8,2.7,5.1,1.9,2 104 | 7.1,3.0,5.9,2.1,2 105 | 6.3,2.9,5.6,1.8,2 106 | 6.5,3.0,5.8,2.2,2 107 | 7.6,3.0,6.6,2.1,2 108 | 4.9,2.5,4.5,1.7,2 109 | 7.3,2.9,6.3,1.8,2 110 | 6.7,2.5,5.8,1.8,2 111 | 7.2,3.6,6.1,2.5,2 112 | 6.5,3.2,5.1,2.0,2 113 | 6.4,2.7,5.3,1.9,2 114 | 6.8,3.0,5.5,2.1,2 115 | 5.7,2.5,5.0,2.0,2 116 | 5.8,2.8,5.1,2.4,2 117 | 6.4,3.2,5.3,2.3,2 118 | 6.5,3.0,5.5,1.8,2 119 | 7.7,3.8,6.7,2.2,2 120 | 7.7,2.6,6.9,2.3,2 121 | 6.0,2.2,5.0,1.5,2 122 | 6.9,3.2,5.7,2.3,2 123 | 5.6,2.8,4.9,2.0,2 124 | 7.7,2.8,6.7,2.0,2 125 | 6.3,2.7,4.9,1.8,2 126 | 6.7,3.3,5.7,2.1,2 127 | 7.2,3.2,6.0,1.8,2 128 | 6.2,2.8,4.8,1.8,2 129 | 6.1,3.0,4.9,1.8,2 130 | 6.4,2.8,5.6,2.1,2 131 | 7.2,3.0,5.8,1.6,2 132 | 7.4,2.8,6.1,1.9,2 133 | 7.9,3.8,6.4,2.0,2 134 | 6.4,2.8,5.6,2.2,2 135 | 6.3,2.8,5.1,1.5,2 136 | 6.1,2.6,5.6,1.4,2 137 | 7.7,3.0,6.1,2.3,2 138 | 6.3,3.4,5.6,2.4,2 139 | 6.4,3.1,5.5,1.8,2 140 | 6.0,3.0,4.8,1.8,2 141 | 6.9,3.1,5.4,2.1,2 142 | 6.7,3.1,5.6,2.4,2 143 | 6.9,3.1,5.1,2.3,2 144 | 5.8,2.7,5.1,1.9,2 145 | 6.8,3.2,5.9,2.3,2 146 | 6.7,3.3,5.7,2.5,2 147 | 6.7,3.0,5.2,2.3,2 148 | 6.3,2.5,5.0,1.9,2 149 | 6.5,3.0,5.2,2.0,2 150 | 6.2,3.4,5.4,2.3,2 151 | 5.9,3.0,5.1,1.8,2 152 | -------------------------------------------------------------------------------- /dist/cats-0.0.0-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/dist/cats-0.0.0-py3-none-any.whl -------------------------------------------------------------------------------- /dist/cats-0.0.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/dist/cats-0.0.0.tar.gz -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Getting Started 2 | Contributions are made to this repo via Issues and Pull Requests (PRs). A few general guidelines that cover both: 3 | - Search for existing Issues and PRs before creating your own. 4 | 5 | ### Issues 6 | Issues should be used to report problems with the package or discuss potential changes before a PR is created. Create a 7 | new Issue using [templates](https://github.com/BlockScience/cats/issues/new/choose) to guide you through collecting and 8 | providing the information we need to investigate. 9 | 10 | If you find an Issue that addresses the problem you're having, please add your own reproduction information to the 11 | existing issue rather than creating a new one. 12 | 13 | ### Pull Requests 14 | Pull-requests (PRs) will be made using a PR [template](../.github/PULL_REQUEST_TEMPLATE/pull_request_template.md) 15 | (loaded automatically when a PR is created). PRs will reference issues that will include your fix or improvement slated 16 | for the next release: 17 | - Only fix/add the functionality in question. 18 | - Include documentation in the repo if applicable. 19 | 20 | In general, we follow the ["fork-and-pull"](https://github.com/susam/gitpr) Git workflow. 21 | 22 | 1. Fork the repository to your own Github account 23 | 2. Clone the project to your machine 24 | 3. Create a new branch from the `release` branch locally with a succinct but descriptive name 25 | 4. Commit changes to new branch 26 | 5. Following any formatting and testing guidelines specific to this repo 27 | 6. Push changes to your fork 28 | 7. Open a PR in our repository and follow the PR template so that we can efficiently review the changes. -------------------------------------------------------------------------------- /docs/DEMO.md: -------------------------------------------------------------------------------- 1 | ## [Establish a CAT Mesh:](../cats_demo.ipynb) 2 | #### Steps: 3 | ##### 0. Start IPFS daemon: 4 | ```bash 5 | ipfs daemon 6 | ``` 7 | * **Optional:** 8 | * Shut down IPFS daemon: `ipfs shutdown` 9 | ##### 1. [Create Virtual Environment](./docs/ENV.md) 10 | ```bash 11 | # CATs working directory 12 | cd cats 13 | python -m venv ./venv 14 | ``` 15 | ##### 2. Activate Virtual Environment 16 | ```bash 17 | source ./venv/bin/activate 18 | # (venv) $ 19 | ``` 20 | ##### 3. Deploy CAT Node: 21 | ```bash 22 | # (venv) $ 23 | PYTHONPATH=./ python cats/node.py 24 | ``` 25 | ##### 4. Establish Data (CAT) Mesh: [Demo](../cats_demo.ipynb) 26 | Execute a CATs on a single node Mesh. 27 | ```bash 28 | # (venv) $ 29 | jupyter notebook cats_demo.ipynb 30 | # Run > Run All Cells 31 | ``` -------------------------------------------------------------------------------- /docs/DEPS.md: -------------------------------------------------------------------------------- 1 | ##### Platform Dependencies: 2 | 0. [**Docker:**](https://www.digitalocean.com/community/tutorials/how-to-install-and-use-docker-on-ubuntu-20-04) 3 | 1. [**Ubuntu_20.04 Installation**](./ubuntu2004.md#docker) 4 | 1. [**Python**](https://www.python.org/downloads/release/python-31013/) (>= 3.10.13) 5 | 2. [**kind**](https://kind.sigs.k8s.io/docs/user/quick-start/#installing-from-release-binaries) (>= 0.12.0) 6 | 3. [**kubectl**](https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/) (>= 1.22.2) 7 | 1. [**Ubuntu_20.04 Installation**](./ubuntu2004.md#kubectl) 8 | 4. [**helm**](https://helm.sh/docs/intro/install/) (>= v3.13.1) 9 | 5. [**CoD**](https://docs.bacalhau.org/getting-started/installation/) (>= v1.2.0) 10 | ```bash 11 | curl -sL https://get.bacalhau.org/install.sh | bash 12 | ``` 13 | 6. [**Terraform**](https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli) (>= 1.5.2) 14 | 1. [**Ubuntu_20.04 Installation**](./ubuntu2004.md#ifrastructure-as-code-iac) 15 | 7. [**Go**](https://go.dev/dl/) (>= v3.13.1) 16 | 1. [**Ubuntu_20.04 Installation**](./ubuntu2004.md#Go) 17 | 8. [**IPFS Kubo**](https://docs.ipfs.tech/install/command-line/#system-requirements) (0.24.0) 18 | 9. [**AWS S3**](https://aws.amazon.com/s3/) 19 | 1. Requires: [**AWS Account**](https://aws.amazon.com/) 20 | 1. [**Instructions**](https://aws.amazon.com/premiumsupport/knowledge-center/create-and-activate-aws-account/) 21 | 2. Generate [**Credentials**](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) 22 | 1. [**Create AWS IAM Users & Access Keys**](https://aws.amazon.com/premiumsupport/knowledge-center/create-access-key/) 23 | 1. [**Ubuntu 20.04 Installation**](https://github.com/BlockScience/cats/blob/cats2/docs/ubuntu2004.md#aws-s3) 24 | -------------------------------------------------------------------------------- /docs/DESIGN.md: -------------------------------------------------------------------------------- 1 | ### CAT Node's Architectural Quantum (Domain-Driven Design principle): 2 | 3 | CAT Node uses the Architectural Quantum Domain-Driven Design principle described in 4 | [**Data Mesh of Data Products**](https://martinfowler.com/articles/data-mesh-principles.html) 5 | 6 | This design principle enables effective cross-domain collaboration on Data Products across business and 7 | knowledge domains between cross-functional & multi-disciplinary teams and organizations. 8 | 9 | ![alt_text](../images/CATkernel.jpeg) 10 | 11 | CAT’s architectural design and implementation are the result of applied Engineering, Computer Science, Network Science, 12 | and Social Science. CATs is software executing on a network client ontological to an MicroKernel Operating System. CATs’ 13 | is designed to enable Data Products implemented as compute node peers on a Data Mesh network that encapsulate code, 14 | data, metadata, and infrastructure to function as a service providing access to the business domain's analytical data as 15 | a product. Data Products use the Architectural Quantum domain-driven design principle for peer nodes that represent the 16 | “smallest unit of architecture that can be independently deployed with high functional cohesion, and includes all the 17 | structural elements required for its function” 18 | ([“Data Mesh Principles and Logical Architecture”](https://martinfowler.com/articles/data-mesh-principles.html#:~:text=smallest%20unit%20of%20architecture%20that%20can%20be%20independently%20deployed%20with%20high%20functional%20cohesion%2C%20and%20includes%20all%20the%20structural%20elements%20required%20for%20its%20function.) - Zhamak Dehghani, et al.). 19 | 20 | ### Collaborative value of CATs Architectural Quantum: 21 | The operation and maintenance of CATs’ Data Products on a Data Mesh can occur between independent teams that will 22 | operate, contribute, and maintain different portions of the entire cloud-service model in adherence to CATs' 23 | Architectural Quantum in a way suitable for their roles using the CATs’ API to serve individual Data Model entities on a 24 | Data Mesh for a variety of use-cases. CAT’s Data Product teams can be multidisciplinary due to the fact they can operate 25 | and maintain the different portions of the entire Web2 cloud service model based on role. 26 | 27 | For example: 28 | * An **Analyst** or **Data Scientist** will use CATs Process interface deployed as FaaS for Analytical Data transformation 29 | * A **Backend** or **ML-Engineer** implementing a CAT application as SaaS for a Data Scientist to execute machine 30 | learning models / pipelines on a cloud managed Kubernetes PaaS as opposed to a machine learning handoff deployment. 31 | * An **Infrastructure Engineer** would use the Terraform to deploy multi-cloud IaaS for a CAT to be deployed by the 32 | Backend or ML-Engineer 33 | 34 | ### [**Orginizational Value**](./ORG.md) -------------------------------------------------------------------------------- /docs/ENV.md: -------------------------------------------------------------------------------- 1 | ### Manage CATs' Virtual Environment: 2 | ##### 0. Create `venv`: 3 | ```bash 4 | # CATs working directory 5 | cd cats 6 | python -m venv ./venv 7 | ``` 8 | #### 1. Activate/Deactivate Virtual Environment 9 | **Activate `venv`:** 10 | ```bash 11 | source ./venv/bin/activate 12 | # (venv) $ 13 | ``` 14 | **Deactivate `venv` (Optional):** 15 | ```bash 16 | deactivate 17 | # $ 18 | ``` -------------------------------------------------------------------------------- /docs/EXAMPLES.md: -------------------------------------------------------------------------------- 1 | ### [Demos:](../cats_demo.ipynb) Execute a CATs on a single node Data (CAT) Mesh 2 | ### [Test:](../tests/verification_test.py) `python -m pytest -s testing/tests/initCAT.py` -------------------------------------------------------------------------------- /docs/ORG.md: -------------------------------------------------------------------------------- 1 | ### Organizational Value: 2 | CATs empower effective cross-domain collaboration on a 3 | [**Data Mesh of Data Products**](https://martinfowler.com/articles/data-mesh-principles.html) across business and 4 | knowledge domains between cross-functional & multi-disciplinary teams and organizations. A Data Mesh solution involves 5 | Data Products implemented, operated, and maintained by multidisciplinary teams on a self-service Data Mesh platform. 6 | Data Products service each other as well as end users on this platform. The Data Mesh solution is accomplished by 7 | federated governance of Data Products and Domains served on a Data Mesh. Individual Data Products and their 8 | infrastructure are owned and operated by multidisciplinary teams. This is a customer-centric approach to 9 | overall project implementation life-cycles with nested Data Product life-cycles that have tighter loops (a.k.a. 10 | Agility). 11 | The advantages of principles and architecture include the following: 12 | 13 | * **Operational Value:** 14 | * Reduced operational bottlenecks caused by the communication overheads of cross-team/departmental synchronization 15 | * between siloed groups such as cross-disciplinary miscommunication. These overheads are associated with the 16 | * coordination of siloed groups constituted of different roles within organizations. 17 | * Increased service and product agility with the employment of multi-disciplinary teams that operate, maintain, and 18 | * potentially own their own infrastructure because there is less infrastructure to own 19 | * Reduced the operational overhead of data discovery by Content-Addressing the means of processing (input and output 20 | data, transformation / process, and infrastructure [as Code (IaC)] and using Content-Addresses as the means of data 21 | transport between services. 22 | * “Improving data quality and resilience of analysis (BI, AI) by bringing producers closer to consumers (removing 23 | the complexity of intermediate data pipelines)” ([1.](https://en.blog.businessdecision.com/data-domains-data-mesh-gives-business-domains-superpowers/)) 24 | 25 | * **Business Value:** 26 | * Enables more control over their data, agility for market reactivity and business scope changes, and data quality increases due to a self-service Data Mesh platform that enables federated governance and increased project visibility ([1.](https://en.blog.businessdecision.com/data-mesh-ultimate-model-for-data-driven-companies/)) 27 | * Enables rational expense estimates of operational and maintenance per data domain ([2.](https://en.blog.businessdecision.com/data-infrastructure-self-service-data-mesh/)) 28 | * Enables Data Services to grow with and adapt to changes to the organization and/or business ([1.](https://en.blog.businessdecision.com/data-domains-data-mesh-gives-business-domains-superpowers/)) 29 | -------------------------------------------------------------------------------- /docs/TEST.md: -------------------------------------------------------------------------------- 1 | ## [Test(s)](../tests/verification_test.py): 2 | * **[Install CATs](https://github.com/BlockScience/cats/tree/cats2?tab=readme-ov-file#get-started)** 3 | * **[Create Virtual Environment](./ENV.md)** 4 | * **Activate Virtual Environment** 5 | ```bash 6 | cd cats 7 | source ./venv/bin/activate 8 | # (venv) $ 9 | ``` 10 | * **Session 1** 11 | ```bash 12 | # (venv) $ 13 | PYTHONPATH=./ python cats/node.py 14 | ``` 15 | * **Session 2** 16 | ```bash 17 | # (venv) $ 18 | pytest -s tests/verification_test.py 19 | ``` -------------------------------------------------------------------------------- /docs/ubuntu2004.md: -------------------------------------------------------------------------------- 1 | # Installation: 2 | This is a walk through on how to install dependencies for CATs on Ubuntu 20.04 LTS. 3 | 4 | ### Ubuntu 20.04 Dependencies 5 | ```bash 6 | mkdir ~/install 7 | cd install 8 | sudo apt update 9 | sudo apt upgrade 10 | sudo apt install wget build-essential ca-certificates 11 | sudo apt install libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev 12 | sudo apt-get update 13 | sudo apt-get upgrade 14 | sudo apt-get install curl dpkg apt-transport-https gnupg software-properties-common git zlib1g-dev 15 | ``` 16 | 17 | ### [Docker:](https://www.digitalocean.com/community/tutorials/how-to-install-and-use-docker-on-ubuntu-20-04) 18 | * **Installation:** 19 | ```bash 20 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 21 | sudo add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu focal stable" 22 | apt-cache policy docker-ce 23 | sudo apt install docker-ce 24 | docker --version 25 | ``` 26 | * [**Manage Docker as a non-root user:**](https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user) 27 | ```bash 28 | sudo usermod -aG docker ${USER} 29 | echo | su - ${USER} 30 | groups 31 | sudo usermod -aG docker 32 | ``` 33 | 34 | ### [kubectl:](https://kubernetes.io/docs/tasks/tools/) 35 | ```bash 36 | curl -LO https://storage.googleapis.com/kubernetes-release/release/`curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt`/bin/linux/amd64/kubectl 37 | chmod +x ./kubectl 38 | sudo cp ./kubectl /usr/local/bin/kubectl 39 | kubectl version -o json 40 | ``` 41 | * **Set `KUBE_CONFIG_PATH`:** 42 | ```bash 43 | echo 'export KUBE_CONFIG_PATH=~/.kube/config' >> ~/.profile 44 | source ~/.profile 45 | ``` 46 | 47 | ### [AWS Account:](https://aws.amazon.com/) 48 | * [**Instructions**](https://aws.amazon.com/premiumsupport/knowledge-center/create-and-activate-aws-account/) 49 | 50 | ### [AWS S3](https://aws.amazon.com/s3/) 51 | * Generate [**Credentials**](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) 52 | * [**Create AWS IAM Users & Access Keys**](https://aws.amazon.com/premiumsupport/knowledge-center/create-access-key/) 53 | * **Export AWS Access Keys to Bash Profile:** 54 | ```bash 55 | echo 'export AWS_ACCESS_KEY_ID=' >> ~/.profile 56 | echo 'export AWS_SECRET_ACCESS_KEY=' >> ~/.profile 57 | source ~/.profile 58 | ``` 59 | 60 | ### [**Go**](https://go.dev/dl/) (>= v3.13.1) 61 | ```bash 62 | wget https://dl.google.com/go/go1.13.6.linux-amd64.tar.gz 63 | sudo tar -zxvf go1.13.6.linux-amd64.tar.gz -C /usr/local 64 | sudo echo 'export PATH=$PATH:/usr/local/go/bin' >> ~/.profile 65 | sudo echo 'export GOPATH=$HOME/go' >> ~/.profile 66 | source ~/.profile 67 | go version 68 | ``` 69 | 70 | ### [**Java 11:**](https://www.digitalocean.com/community/tutorials/how-to-install-java-with-apt-on-ubuntu-20-04): 71 | ```bash 72 | sudo apt install openjdk-11-jre 73 | sudo apt install openjdk-11-jdk 74 | sudo echo 'export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64' >> ~/.profile 75 | source ~/.profile 76 | echo $JAVA_HOME 77 | javac -version 78 | java -version 79 | ``` 80 | 81 | ### Scala: 2.11.12 82 | ```bash 83 | wget https://downloads.lightbend.com/scala/2.11.12/scala-2.11.12.deb 84 | sudo dpkg -i scala-2.11.12.deb 85 | sudo apt-get update 86 | scala -version 87 | ``` 88 | 89 | ### [sbt: 1.5.5](https://www.scala-sbt.org/download.html?_ga=2.195232236.1901884640.1633358692-54053138.1633358495) 90 | ```bash 91 | echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list 92 | echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | sudo tee /etc/apt/sources.list.d/sbt_old.list 93 | curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo apt-key add 94 | sudo apt-get update 95 | sudo apt-get install sbt=1.5.5 96 | sbt sbtVersion 97 | ``` 98 | 99 | ### Ifrastructure as Code (IaC): 100 | * Install [**Terraform**](https://learn.hashicorp.com/tutorials/terraform/install-cli) 101 | ```bash 102 | sudo apt-get update 103 | sudo apt-get install gnupg2 104 | curl https://apt.releases.hashicorp.com/gpg | gpg --dearmor > hashicorp.gpg 105 | curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add - 106 | sudo apt-add-repository "deb [arch=$(dpkg --print-architecture)] https://apt.releases.hashicorp.com $(lsb_release -cs) main" 107 | sudo apt-get update 108 | sudo apt-get install terraform=1.1.9 109 | terraform --version 110 | ``` -------------------------------------------------------------------------------- /images/CATkernel.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/images/CATkernel.jpeg -------------------------------------------------------------------------------- /images/CATs_bom_activity.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/images/CATs_bom_activity.jpeg -------------------------------------------------------------------------------- /images/CATs_bom_ag.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/images/CATs_bom_ag.jpeg -------------------------------------------------------------------------------- /images/CATs_bom_connect.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/images/CATs_bom_connect.jpeg -------------------------------------------------------------------------------- /images/CATs_chaordic_kernel.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/images/CATs_chaordic_kernel.jpeg -------------------------------------------------------------------------------- /images/cid_example.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/images/cid_example.jpeg -------------------------------------------------------------------------------- /images/data_product_domain.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/images/data_product_domain.jpeg -------------------------------------------------------------------------------- /images/simple_CAT2a.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/images/simple_CAT2a.jpeg -------------------------------------------------------------------------------- /images/simple_CAT2b.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/images/simple_CAT2b.jpeg -------------------------------------------------------------------------------- /logs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "c50dde91-e227-48e4-8bdd-e1263b7ee940", 6 | "metadata": {}, 7 | "source": [ 8 | "### Week 3 (1/8 - 1/12): \n", 9 | "* Review Designs within the context of Data Sovereignty;\n", 10 | "* https://en.wikipedia.org/wiki/Data_sovereignty\n", 11 | "* https://www.ibm.com/blog/living-in-a-data-sovereign-world/\n", 12 | "* https://atlan.com/data-sovereignty-requirements/\n", 13 | "* Research CLI wrapper alternative to CDKTF \n", 14 | "* Review Database Sharding within the context of Data Products’ data: https://aws.amazon.com/what-is/database-sharding/\n", 15 | "* Review Value of data\n", 16 | "* Verify CATs’ Project Update: Factory & Executor components; Invoice, Order, Function, Executor, & BOM Block Designs, Structure’s Ray Cluster Deployment on Kubernetese, BOM Initialization, CAT Node & Node Design" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "d2cd5d37-99b5-4957-8ac3-0422a4e6199e", 22 | "metadata": {}, 23 | "source": [ 24 | "### Week 4 (1/15 - 1/19):\n", 25 | "* Research Dynamic Terraform Providers for Plant Deployments\n", 26 | "* Verify CATs’ Project Update: Structure Block Design, Data Service Collaboration Diagram, Ray Integration\n", 27 | "* Watched Computational Governance Panel\n", 28 | "* Review Ray documentation for InfraFunction Hooks\n", 29 | "* Research Open Contracting Data Standard with respect to Data Product Teams: https://standard.open-contracting.org/latest/en/" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "78c9dc52-030b-4754-8669-0c10dbdc9faf", 35 | "metadata": {}, 36 | "source": [ 37 | "### Week 5 (1/22 - 1/26)\n", 38 | "* 1/22: \n", 39 | " * Updated CATs integration tests and demo\n", 40 | " * Resolved dependency bug\n", 41 | " * Verify CATs’ Project Update: Process Component, Sub-Process Logging, Executor & Function Components\n", 42 | "* 1/23: \n", 43 | " * Updated Documentation and Demo\n", 44 | " * Added License and Packaging for CATs\n", 45 | " * Verify CATs’ Project Update: s3 & CoD Integration\n", 46 | "* 1/24: \n", 47 | " * Updated Documentation & Refactor\n", 48 | " * CATs Data Verification\n", 49 | " * Verify CATs’ Project Update: Updating Order Structure, Node, Service & Structure Components\n", 50 | "* 1/25 - 1/26: \n", 51 | " * Updated Documentation & Refactor\n", 52 | " * Update Factory\n", 53 | " * Reviewed Novo Nordisk Data Mesh Platform discussion\n", 54 | " * Verify CATs’ Project Update: CATs s3 cache, BOM ERD" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "id": "d5e1d554-ac2a-4f3e-af07-4be7e1493485", 60 | "metadata": {}, 61 | "source": [ 62 | "### Week 6 (1/29 - 2/2): \n", 63 | "* Included Ubuntu 20.04 Installation Update\n", 64 | "* Refactored CATs\n", 65 | "* Researched CAT cache access management\n", 66 | "* Research Economic Adapters for CATs\n", 67 | "* Research multilevel linked-list for CATs’ subgraph" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "baee354c-dee3-4a6c-8d94-664e4e22ae30", 73 | "metadata": {}, 74 | "source": [ 75 | "### Week 7 (1/5 – 1/9): \n", 76 | "* Research bidirectional mapping supports multilevel linked-list for CATs’ subgraph\n", 77 | "* Consider Transducers for CAT MIMO\n", 78 | "* Updated PR Template\n", 79 | "* Review Model-Driven Engineering: https://en.wikipedia.org/wiki/Model-driven_engineering" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "id": "abcb43ac-28fc-4a45-b2cc-4b20028c5981", 85 | "metadata": {}, 86 | "source": [ 87 | "### Week 8 (2/12 - 2/16):\n", 88 | "* 2/12: Drafted CATs capabilities in GitHub Project and reviewed Activity Artifact Policy\n", 89 | "* 2/13: Reviewed implementation examples of Data Contracts\n", 90 | "* 2/14 - 2/15: \n", 91 | " * Reviewed Data Mesh Roundtable Discussions about Data Contracts and “Agile” Data Products\n", 92 | " * Attended Protocol Labs project updates\n", 93 | "* 2/16: Research System Architecture layers and wrote notes as Data Contract Article for CATs\n", 94 | "\n", 95 | "Data Mesh Resources:\n", 96 | "* “Inside a Data Contract”: https://www.youtube.com/watch?v=ye4geXMuJKs\n", 97 | "* “Agile in Data”: https://www.youtube.com/watch?v=XnstATam0jM\n", 98 | "* Data Contract Articles: https://www.datamesh-architecture.com/#data-contract\n", 99 | "\n", 100 | "Data Contract Implementation Examples:\n", 101 | "* https://blog.det.life/data-contracts-a-guide-to-implementation-86cf9b032065\n", 102 | "* https://levelup.gitconnected.com/create-a-web-scraping-pipeline-with-python-using-data-contracts-281a30440442\n", 103 | "* https://docs.soda.io/soda/data-contracts.html\n", 104 | "\n", 105 | "System Architecture:\n", 106 | "* https://blog.jgriffiths.org/systems-architecture-conceptual-logical-and-physical/\n", 107 | "\n", 108 | "#### What does a CATs data contract do?\n", 109 | "\n", 110 | "Data Contract is a Service agreement between producer and consumer with attribute dependencies for downstream Data Product evolution with dedicated lineage. A data contracts can provide tools for collaboration on data requirements as product promises within a shared context that inform policies for contract mutation along side Data Product versioning.\n", 111 | "\n", 112 | "A Data Contract’s Product Promises are what the data product owners expect from its data consumer up to the latest block of information. These promises may include data quality, data usage terms and conditions, schema, service-objectives, billing, etc. Data Contract policy mutation cascaded downstream as bilateral lateral agreements that “forks” lineage as a new Data Product version. For Example, the consumer takes the risk of violating privacy. Data Producers create Data Contracts on Organization and Business Terms. The consumer of the Data Contract enforces Governance policies. The producer of the Data Contract owns the Data Product if the organization doesn't have a Governance body. \n", 113 | "\n", 114 | "Governance policies are discussed between data producers and consumers to agree upon data producer requirements. These discussions should culminate into an amenable data structure / dataset. Structured data is conducive for pre-exsisting policies and less discussion. Less structured data will need more discussion and policy feedback loops. We need a Minimal Viable Data Contract that includes what is necessary for an organization to govern with the means of supporting policy feedback loops in a way that guides discussion in a way that balances the prioritization of outcomes and methodologies.\n", 115 | "\n", 116 | "Interdependent data domains have sub-domains with identifiers for generating Data Products. CAT Nodes will generate and execute Virtual Data Products composed as Data Contracts that enforce Data Provenance using Bills of Materials (BOMs). BOMs are CATs' Content-Addressed Data Provenance record for verifiable data processing and transport on a Mesh network of CAT Nodes. Data Contracts will contain a BOMs lineages and act as block headers for Content-Addressed Transformers (CATs) instances. Data Products are mutated during policy feedback loops informed collaborators communicating their understanding of knowledge domains. Collaborators will identify knowledge sub-domains with references and will access sub-domains using Content-Addresses. Access is federated via knowledge domain hierarchies in abstractions that enable collaborators to participate in governance cycles by leveraging their understanding of knowledge." 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "id": "e23e7e39-ed27-47bc-9107-f6a5fcd71457", 122 | "metadata": {}, 123 | "source": [ 124 | "### Week 9 (2/19 - 2/23):\n", 125 | "* 2/19 - 2/21: Contextualize value of BOM within the context of Data as a Product that contains Data Contracts\n", 126 | "* 2/22 - 2/23: Updated Readme informed by examples of Data Assets within the context of Machine-Readable Cataloging\n", 127 | "\n", 128 | "Resources:\n", 129 | "* https://www.loc.gov/marc/umb/um01to06.html\n", 130 | "* https://docs.informatica.com/data-engineering/data-engineering-quality/10-2-1/business-glossary-guide/glossary-content-management/business-term-links/data-asset.html\n", 131 | "\n", 132 | "#### What is a Content-Addressed Data Asset (CADA)?\n", 133 | "\n", 134 | "CATs Data Products will consist of Data Contracts with provenance as executable BOMs lineages and act as block headers for Content-Addressed Transformers (CATs) instances that contain Data Assets. BOMs are CATs' Content-Addressed Data Provenance record for verifiable data processing and transport on a Mesh network of CAT Nodes that can contain Data Assets. A data asset may be a system or application output” (dataset) that holds value for an organization or individual that is accessible. Data Assets’ value can derive from the data's potential for generating insights, informing decision-making, contributing to product development, enhancing operational efficiency, or creating economic benefits through its sale or exchange. \n", 135 | "\n", 136 | "CATs' Content-Addressed Data Assets are processed, sold / exchanged / published on CAT’s Data Mesh via CAT Nodes subsumed by downstream CATs’ Data Products. Data Assets consist of the following:\n", 137 | "* **Data Domains** - \"A predefined or user-defined Model repository object that represents the functional meaning of an\" attribute \"based on column data or column name such as\" account identification.\n", 138 | "* **Data Objects** - Content-Addresses of data sources used to extract metadata for analysis." 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "id": "d4209114-d726-4326-a9ad-b7d69249dc4e", 144 | "metadata": {}, 145 | "source": [ 146 | "### Week 10 (2/26 - 3/1):\n", 147 | "\n", 148 | "* 2/26: Researched Digital Asset Management related Data Contracts and Data Mesh Registry & considered a Rule Asset being used for Network Policies in addition to Attribute Quality \n", 149 | "* 2/27: Considered Rule Assets in addition to Data Assets for Data Mesh Registry Artifact Schema\n", 150 | " * https://towardsdatascience.com/the-data-mesh-registry-a-window-into-your-data-mesh-20dece35e05a \n", 151 | " * https://docs.informatica.com/data-engineering/data-engineering-quality/10-2-1/business-glossary-guide/glossary-content-management/business-term-links/data-asset.html\n", 152 | " * https://docs.informatica.com/data-engineering/data-engineering-quality/10-2-1/business-glossary-guide/glossary-content-management/business-term-links/rule-asset.html\n", 153 | "* 2/28: Verify CATs Executing FaaS on PaaS\n", 154 | " * https://www.ibm.com/topics/faas\n", 155 | " * https://www.ibm.com/topics/iaas-paas-saas\n", 156 | "* 2/29: Review Domain-Oriented Ownership with respect to Conway's law\n", 157 | " * https://www.starburst.io/blog/data-mesh-book-bulletin-principle-of-domain-ownership/\n", 158 | " * https://developer.confluent.io/courses/data-mesh/data-ownership/\n", 159 | " * https://en.wikipedia.org/wiki/Conway%27s_law\n", 160 | "* 3/1: Review Data Column Lineage value to in establishing Domain-Oriented Ownership in CATs Invoice in a way that makes BOM’s searchable and discoverable" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "id": "b25e3f7e-9bf1-4c1a-95ae-2371e9532779", 166 | "metadata": {}, 167 | "source": [ 168 | "### Week 11 (3/4 - 3/8):\n", 169 | "* 3/4: Contextualize “Data as an asset” with CATs Architecture\n", 170 | " * https://atlan.com/data-as-an-asset/\n", 171 | "* 3/5: Contextualize Data sovereignty with “Data as an asset” for CATs Data Mesh\n", 172 | " * https://www.nnlm.gov/guides/data-glossary/data-sovereignty#:~:text=Definition,storage%2C%20and%20interpretation%20of%20data.\n", 173 | "* 3/6: Contextually map Data Contract initialization roles to cross-functional Operational Model for Data Products\n", 174 | " * https://standard.open-contracting.org/latest/en/guidance/design/#build-your-team\n", 175 | "* 3/7: Contextually map \"Fractional Ownership\" of \"Decentralized Data Objects\" (\"DDOs\" / \"Data Assets\") to \"Data as an asset\" and Data Partioning / Sharding\n", 176 | " * https://docs.oceanprotocol.com/developers/fractional-ownership \n", 177 | " * https://docs.oceanprotocol.com/developers/ddo-specification\n", 178 | " * https://en.wikipedia.org/wiki/Partition_(database)\n", 179 | " * https://en.wikipedia.org/wiki/Shard_(database_architecture)\n", 180 | "* 3/8: Contextualize Ocean Protocol & CATs Architecture with prosumption\n", 181 | " * https://docs.oceanprotocol.com/developers/architecture \n", 182 | " * https://en.wikipedia.org/wiki/Prosumer\n", 183 | "\n", 184 | "“Data as an asset” enables the consumption, production, [prosumption](https://en.wikipedia.org/wiki/Prosumer) of Data Assets on CATs Data Mesh\n", 185 | "\n", 186 | "“Data as an asset” [0.](https://atlan.com/data-as-an-asset/) conceptually emphasizes recognizing and treating data as a strategic investment organizations can leverage to deliver future economic benefits by enabling the consumption, production, prosumption of ones own data as an asset. Prosumption is the consumption and production of value, \"either for self-consumption or consumption by others, and can receive implicit or explicit incentives from organizations involved in the exchange.\" [1.](https://doi.org/10.1108/JOSM-05-2020-0155)\n", 187 | "\n", 188 | "The availability of high-quality and domain-specified Data Assets enables Data Products on inter-connected CAT Nodes on CATs Data Mesh to facilitate cross-functional asset utilization within Data Initiatives in a way that support Data Sovereignty. \"Data sovereignty refers to a group or individual’s right to control and maintain their own data, which includes the collection, storage, and interpretation of data.\" [2.](https://www.nnlm.gov/guides/data-glossary/data-sovereignty#:~:text=Definition,storage%2C%20and%20interpretation%20of%20data.)\n", 189 | "\n", 190 | "CATs Registry Catalog (\"CATlog Registry\") can accelerate innovative Data Product creation and facilitate Data Sovereignty in Data Initiatives that discover and utilize “Data as an asset”. Data Products use and operate CAT Nodes to produce, register, and catalog “Data as an asset” as searchable and discoverable Data Assets by Data Products on CATs Data Mesh. CATs Data Assets enhances strategic, operational, and analysis informed decision-making by using BOMs as feedback loop mechanisms across domains in a way that suits specific collaborative contexts across organizations.\n", 191 | "\n", 192 | "Resources:\n", 193 | "* https://www.youtube.com/watch?v=uv52swYfStU&t=6s\n", 194 | "* https://www.youtube.com/watch?v=pbBGciy8ZbM" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "id": "d9e2711b-f704-4e69-961d-21ad3b6e233c", 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [] 204 | } 205 | ], 206 | "metadata": { 207 | "kernelspec": { 208 | "display_name": "Python 3 (ipykernel)", 209 | "language": "python", 210 | "name": "python3" 211 | }, 212 | "language_info": { 213 | "codemirror_mode": { 214 | "name": "ipython", 215 | "version": 3 216 | }, 217 | "file_extension": ".py", 218 | "mimetype": "text/x-python", 219 | "name": "python", 220 | "nbconvert_exporter": "python", 221 | "pygments_lexer": "ipython3", 222 | "version": "3.10.13" 223 | } 224 | }, 225 | "nbformat": 4, 226 | "nbformat_minor": 5 227 | } 228 | -------------------------------------------------------------------------------- /main.tf: -------------------------------------------------------------------------------- 1 | # install 2 | # kubectl, helm 3 | # SDKs: ipfs, cod, terraform 4 | 5 | terraform { 6 | required_providers { 7 | shell = { 8 | source = "scottwinkler/shell" 9 | version = "1.7.10" 10 | } 11 | kind = { 12 | source = "tehcyx/kind" 13 | version = "0.2.0" 14 | } 15 | } 16 | } 17 | 18 | #variable "KUBE_CONFIG_PATH" { 19 | # type = string 20 | #} 21 | 22 | 23 | provider "shell" { 24 | sensitive_environment = { 25 | # KUBE_CONFIG_PATH = var.KUBE_CONFIG_PATH 26 | KUBE_CONFIG_PATH = "~/.kube/config" 27 | } 28 | interpreter = ["/bin/sh", "-c"] 29 | enable_parallelism = false 30 | } 31 | 32 | # InfraStructure cleanup 33 | resource "shell_script" "delete_cats_k8s" { 34 | lifecycle_commands { 35 | create = <<-EOF 36 | cd ~/Projects/cats-research 37 | kind delete cluster --name cat-action-plane 38 | EOF 39 | delete = "" 40 | } 41 | } 42 | 43 | #resource "shell_script" "setup_cod" { 44 | # lifecycle_commands { 45 | # create = <<-EOF 46 | # cd ~/Projects/Research/cats-research/ 47 | # if test -f /usr/local/bin/bacalhau; 48 | # then 49 | # curl -sL https://get.bacalhau.org/install.sh | bash 50 | ## wget https://github.com/bacalhau-project/bacalhau/releases/download/v1.1.5/bacalhau_v1.1.5_linux_amd64.tar.gz 51 | ## tar -xvzf bacalhau_v1.1.5_linux_amd64.tar.gz 52 | ## sudo mv ./bacalhau /usr/local/bin/ 53 | # fi 54 | # EOF 55 | ## delete = "rm bacalhau_v1.1.5_linux_amd64.tar.gz" 56 | # delete = "" 57 | # } 58 | # depends_on = [ 59 | # shell_script.delete_cats_k8s 60 | # ] 61 | #} 62 | 63 | provider "kind" { 64 | # Configuration options 65 | } 66 | 67 | resource "kind_cluster" "default" { 68 | name = "cat-action-plane" 69 | node_image = "kindest/node:v1.23.0" 70 | wait_for_ready = "true" 71 | depends_on = [ 72 | shell_script.delete_cats_k8s, 73 | # shell_script.setup_cod 74 | ] 75 | } 76 | 77 | #resource "shell_script" "setup_helm" { 78 | # lifecycle_commands { 79 | # create = <<-EOF 80 | # cd ~/Projects/cats-research 81 | # if ! command -v helm &> /dev/null; 82 | # then 83 | # sudo curl -sL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash 84 | # fi 85 | # EOF 86 | # delete = "" 87 | # } 88 | # depends_on = [ 89 | # kind_cluster.default, 90 | # shell_script.delete_cats_k8s 91 | # ] 92 | #} 93 | 94 | provider "helm" { 95 | kubernetes { 96 | config_context_cluster = "kind-cat-action-plane" 97 | config_path = "~/.kube/config" 98 | } 99 | } 100 | 101 | resource "helm_release" "kuberay-operator" { 102 | name = "kuberay-operator" 103 | repository = "https://ray-project.github.io/kuberay-helm/" 104 | chart = "kuberay-operator" 105 | version = "1.0.0" 106 | wait_for_jobs = "true" 107 | depends_on = [ 108 | kind_cluster.default 109 | ] 110 | } 111 | 112 | resource "helm_release" "ray-cluster" { 113 | name = "raycluster" 114 | repository = "https://ray-project.github.io/kuberay-helm/" 115 | chart = "ray-cluster" 116 | version = "0.6.0" 117 | wait_for_jobs = "true" 118 | # set { 119 | # name = "image.tag" 120 | # value = "nightly-aarch64" 121 | # type = "string" 122 | # } 123 | depends_on = [ 124 | kind_cluster.default, 125 | helm_release.kuberay-operator 126 | ] 127 | } 128 | 129 | #resource "helm_release" "hdfs" { 130 | # name = "hdfs" 131 | # repository = "https://gradiant.github.io/charts" 132 | # chart = "gradiant" 133 | # version = "0.1.10" 134 | # depends_on = [ 135 | # kind_cluster.default 136 | # ] 137 | #} 138 | 139 | 140 | #resource "helm_release" "hdfs" { 141 | # name = "hdfs" 142 | # repository = "https://gchq.github.io/gaffer-docker" 143 | # chart = "hdfs" 144 | # version = "2.0.0" 145 | # set { 146 | # name = "hdfs.namenode.tag" 147 | # value = "3.3.3" 148 | # } 149 | # set { 150 | # name = "hdfs.datanode.tag" 151 | # value = "3.3.3" 152 | # } 153 | # set { 154 | # name = "hdfs.shell.tag" 155 | # value = "3.3.3" 156 | # } 157 | # depends_on = [ 158 | # kind_cluster.default 159 | # ] 160 | #} -------------------------------------------------------------------------------- /process.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from typing import Dict 3 | import numpy as np 4 | import ray 5 | 6 | 7 | def function_0(batch: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: 8 | vec_a = batch["petal length (cm)"] 9 | vec_b = batch["petal width (cm)"] 10 | batch["petal area (cm^2)"] = vec_a * vec_b 11 | return batch 12 | 13 | def function_1(batch: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: 14 | vec_a = batch["petal length (cm)"] 15 | vec_b = batch["petal width (cm)"] 16 | batch["DUPLICATE petal area (cm^2)"] = vec_a * vec_b 17 | return batch 18 | 19 | 20 | def process_0(input, output): 21 | # ray.init() 22 | ds_in = ray.data.read_csv(input) 23 | ds_out = ds_in.map_batches(function_0) 24 | idx_ds = ray.data.range(ds_out.count()) 25 | ds_out = idx_ds.zip(ds_out) 26 | print(ds_out.show(limit=1)) 27 | ds_out.write_csv(output) 28 | ray.shutdown() 29 | return ds_out 30 | 31 | def process_1(input, output): 32 | # ray.init() 33 | ds_in = ray.data.read_csv(input) 34 | ds_out = ds_in.map_batches(function_1) 35 | print(ds_out.show(limit=1)) 36 | ds_out.write_csv(output) 37 | ray.shutdown() 38 | return ds_out 39 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=61.0", 4 | "pytest==7.4.4", 5 | "ipfsApi==0.4.4", 6 | "ray[default, data]==2.7.1", 7 | "flask==3.0.0", 8 | "notebook", 9 | "tqdm", 10 | "ipywidgets", 11 | "boto3==1.34.21" 12 | ] 13 | build-backend = "setuptools.build_meta" 14 | 15 | [tool.setuptools.packages.find] 16 | where = ["cats/"] 17 | include = ["cats"] 18 | 19 | [project] 20 | name = "cats" 21 | version = "0.0.0" 22 | authors = [ 23 | { name="Joshua E. Jodesty", email="joshua@block.science"}, 24 | ] 25 | description = "CATs" 26 | readme = "README.md" 27 | requires-python = ">=3.10.13" 28 | classifiers = [ 29 | "Programming Language :: Python :: 3", 30 | "License :: OSI Approved :: MIT License", 31 | "Operating System :: OS Independent", 32 | ] 33 | 34 | [project.urls] 35 | Homepage = "https://github.com/BlockScience/cats" 36 | Issues = "https://github.com/BlockScience/cats/issues" -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BlockScience/cats/72263573aa3a2f02da2dccc8b5f09015c3df5fb5/tests/__init__.py -------------------------------------------------------------------------------- /tests/verification_test.py: -------------------------------------------------------------------------------- 1 | import pytest, os 2 | from cats import CATS_HOME, DATA_HOME 3 | import ipfsapi as ipfsApi 4 | from cats.network import MeshClient 5 | from cats.service import Service 6 | from process import * 7 | 8 | service = Service( 9 | meshClient=MeshClient( 10 | ipfsClient=ipfsApi.Client('127.0.0.1', 5001) 11 | ) 12 | ) 13 | structure_filepath = os.path.join(CATS_HOME, 'main.tf') 14 | cat_order_request_0 = service.create_order_request( 15 | process_obj=process_0, 16 | data_dirpath=DATA_HOME, 17 | structure_filepath=structure_filepath, 18 | endpoint='http://127.0.0.1:5000/cat/node/init' 19 | ) 20 | cat_invoiced_response_0 = service.catSubmit(cat_order_request_0) 21 | 22 | 23 | class TestDataVerificationCAT0: 24 | flat_cat_invoiced_response_0 = service.flatten_bom(cat_invoiced_response_0) 25 | cat0_input_df = service.cid_to_pandasDF( 26 | cid=flat_cat_invoiced_response_0['flat_bom']['invoice']['order']['flat']['invoice']['data_cid'], 27 | read_dir='', 28 | download_dir='online/cat0_input' 29 | ) 30 | source_df = cat0_input_df.drop(columns=['filename']).values 31 | cat0_output_df = service.cid_to_pandasDF( 32 | cid=flat_cat_invoiced_response_0['flat_bom']['invoice']['data_cid'], 33 | download_dir='online/cat0_output' 34 | ) 35 | 36 | def test_cat0_data_verification(self): 37 | assert np.array_equal( 38 | self.source_df, 39 | self.cat0_output_df.sort_values('id').drop(columns=['id', 'filename', 'petal area (cm^2)']).values 40 | ) 41 | 42 | 43 | cat_order_request_1 = service.linkProcess(cat_invoiced_response_0, process_1) 44 | class TestDataVerificationCAT1: 45 | cat_invoiced_response_1 = service.catSubmit(cat_order_request_1) 46 | flat_cat_invoiced_response_1 = service.flatten_bom(cat_invoiced_response_1) 47 | cat1_input_df = service.cid_to_pandasDF( 48 | cid=flat_cat_invoiced_response_1['flat_bom']['invoice']['order']['flat']['invoice']['data_cid'], 49 | # read_dir='', 50 | download_dir='online/cat1_input' 51 | ) 52 | cat1_output_df = service.cid_to_pandasDF( 53 | cid=flat_cat_invoiced_response_1['flat_bom']['invoice']['data_cid'], 54 | download_dir='online/cat1_output' 55 | ) 56 | 57 | def test_cat1_data_verification(self): 58 | assert np.array_equal( 59 | self.cat1_input_df.sort_values('id').drop(columns=['id', 'filename']).values, 60 | self.cat1_output_df.sort_values('id').drop(columns=['id', 'filename', 'DUPLICATE petal area (cm^2)']).values 61 | ) 62 | 63 | 64 | class TestDataVerification(TestDataVerificationCAT0, TestDataVerificationCAT1): 65 | pass 66 | 67 | 68 | class TestDataTransferVerification(TestDataVerification): 69 | def test_catMesh_data_transfer_verification(self): 70 | assert np.array_equal( 71 | self.cat0_output_df.sort_values('id').drop(columns=['id', 'filename']).values, 72 | self.cat1_input_df.sort_values('id').drop(columns=['id', 'filename']).values 73 | ) 74 | 75 | 76 | class TestLineageVerification(TestDataTransferVerification): 77 | def test_cat1_input_lineage_verification(self): 78 | assert np.array_equal( 79 | self.source_df, 80 | self.cat1_input_df.sort_values('id').drop( 81 | columns=['id', 'filename', 'petal area (cm^2)'] 82 | ).values 83 | ) 84 | 85 | def test_cat1_output_lineage_verification(self): 86 | assert np.array_equal( 87 | self.source_df, 88 | self.cat1_output_df.sort_values('id').drop( 89 | columns=['id', 'filename', 'petal area (cm^2)', 'DUPLICATE petal area (cm^2)'] 90 | ).values 91 | ) --------------------------------------------------------------------------------