├── .gitignore ├── .idea ├── .gitignore ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── sunbird-ml-workbench.iml └── vcs.xml ├── Dockerfile ├── LICENSE ├── README.md ├── bin ├── daggit-0.5.0-py3-none-any.whl └── daggit-0.5.0.tar.gz ├── build.py ├── build.sh ├── daggit_api.py ├── docker-compose.yml ├── docs ├── Makefile ├── _build │ ├── .buildinfo │ ├── .doctrees │ │ ├── daggit.contrib.doctree │ │ ├── daggit.contrib.sunbird.doctree │ │ ├── daggit.contrib.sunbird.operators.doctree │ │ ├── daggit.core.base.doctree │ │ ├── daggit.core.doctree │ │ ├── daggit.core.io.doctree │ │ ├── daggit.core.operators.doctree │ │ ├── daggit.doctree │ │ ├── daggit.runtime.doctree │ │ ├── environment.pickle │ │ ├── getting_started.doctree │ │ ├── index.doctree │ │ └── intro.doctree │ ├── .nojekyll │ ├── _modules │ │ ├── daggit │ │ │ ├── contrib │ │ │ │ └── sunbird │ │ │ │ │ └── operators │ │ │ │ │ ├── contentTagging.html │ │ │ │ │ ├── contentTaggingUtils.html │ │ │ │ │ └── interruptedcontentTagging.html │ │ │ ├── core │ │ │ │ ├── base │ │ │ │ │ ├── factory.html │ │ │ │ │ ├── parser.html │ │ │ │ │ └── utils.html │ │ │ │ ├── io │ │ │ │ │ └── io.html │ │ │ │ └── operators │ │ │ │ │ ├── dfsplitters.html │ │ │ │ │ ├── etl.html │ │ │ │ │ ├── keras.html │ │ │ │ │ ├── operators_registry.html │ │ │ │ │ └── sklearn.html │ │ │ └── runtime │ │ │ │ └── airflow_runtime.html │ │ └── index.html │ ├── _sources │ │ ├── daggit.contrib.rst.txt │ │ ├── daggit.contrib.sunbird.operators.rst.txt │ │ ├── daggit.contrib.sunbird.rst.txt │ │ ├── daggit.core.base.rst.txt │ │ ├── daggit.core.io.rst.txt │ │ ├── daggit.core.operators.rst.txt │ │ ├── daggit.core.rst.txt │ │ ├── daggit.rst.txt │ │ ├── daggit.runtime.rst.txt │ │ ├── getting_started.rst.txt │ │ ├── index.rst.txt │ │ └── intro.rst.txt │ ├── _static │ │ ├── ajax-loader.gif │ │ ├── basic.css │ │ ├── comment-bright.png │ │ ├── comment-close.png │ │ ├── comment.png │ │ ├── css │ │ │ ├── badge_only.css │ │ │ └── theme.css │ │ ├── doctools.js │ │ ├── documentation_options.js │ │ ├── down-pressed.png │ │ ├── down.png │ │ ├── file.png │ │ ├── fonts │ │ │ ├── Inconsolata-Bold.ttf │ │ │ ├── Inconsolata-Regular.ttf │ │ │ ├── Inconsolata.ttf │ │ │ ├── Lato-Bold.ttf │ │ │ ├── Lato-Regular.ttf │ │ │ ├── Lato │ │ │ │ ├── lato-bold.eot │ │ │ │ ├── lato-bold.ttf │ │ │ │ ├── lato-bold.woff │ │ │ │ ├── lato-bold.woff2 │ │ │ │ ├── lato-bolditalic.eot │ │ │ │ ├── lato-bolditalic.ttf │ │ │ │ ├── lato-bolditalic.woff │ │ │ │ ├── lato-bolditalic.woff2 │ │ │ │ ├── lato-italic.eot │ │ │ │ ├── lato-italic.ttf │ │ │ │ ├── lato-italic.woff │ │ │ │ ├── lato-italic.woff2 │ │ │ │ ├── lato-regular.eot │ │ │ │ ├── lato-regular.ttf │ │ │ │ ├── lato-regular.woff │ │ │ │ └── lato-regular.woff2 │ │ │ ├── RobotoSlab-Bold.ttf │ │ │ ├── RobotoSlab-Regular.ttf │ │ │ ├── RobotoSlab │ │ │ │ ├── roboto-slab-v7-bold.eot │ │ │ │ ├── roboto-slab-v7-bold.ttf │ │ │ │ ├── roboto-slab-v7-bold.woff │ │ │ │ ├── roboto-slab-v7-bold.woff2 │ │ │ │ ├── roboto-slab-v7-regular.eot │ │ │ │ ├── roboto-slab-v7-regular.ttf │ │ │ │ ├── roboto-slab-v7-regular.woff │ │ │ │ └── roboto-slab-v7-regular.woff2 │ │ │ ├── fontawesome-webfont.eot │ │ │ ├── fontawesome-webfont.svg │ │ │ ├── fontawesome-webfont.ttf │ │ │ ├── fontawesome-webfont.woff │ │ │ └── fontawesome-webfont.woff2 │ │ ├── jquery-3.2.1.js │ │ ├── jquery-3.4.1.js │ │ ├── jquery.js │ │ ├── js │ │ │ ├── modernizr.min.js │ │ │ └── theme.js │ │ ├── language_data.js │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── underscore-1.3.1.js │ │ ├── underscore.js │ │ ├── up-pressed.png │ │ ├── up.png │ │ └── websupport.js │ ├── daggit.contrib.html │ ├── daggit.contrib.sunbird.html │ ├── daggit.contrib.sunbird.operators.html │ ├── daggit.core.base.html │ ├── daggit.core.html │ ├── daggit.core.io.html │ ├── daggit.core.operators.html │ ├── daggit.html │ ├── daggit.runtime.html │ ├── genindex.html │ ├── getting_started.html │ ├── index.html │ ├── intro.html │ ├── objects.inv │ ├── py-modindex.html │ ├── search.html │ └── searchindex.js ├── build │ ├── doctrees │ │ ├── daggit.contrib.doctree │ │ ├── daggit.contrib.sunbird.doctree │ │ ├── daggit.contrib.sunbird.operators.doctree │ │ ├── daggit.core.base.doctree │ │ ├── daggit.core.doctree │ │ ├── daggit.core.io.doctree │ │ ├── daggit.core.operators.doctree │ │ ├── daggit.doctree │ │ ├── daggit.runtime.doctree │ │ ├── environment.pickle │ │ ├── getting_started.doctree │ │ ├── index.doctree │ │ └── intro.doctree │ └── html │ │ ├── .buildinfo │ │ ├── .nojekyll │ │ ├── _modules │ │ ├── daggit │ │ │ ├── contrib │ │ │ │ └── sunbird │ │ │ │ │ └── operators │ │ │ │ │ ├── contentTagging.html │ │ │ │ │ ├── contentTaggingUtils.html │ │ │ │ │ └── interruptedcontentTagging.html │ │ │ ├── core │ │ │ │ ├── base │ │ │ │ │ ├── factory.html │ │ │ │ │ ├── parser.html │ │ │ │ │ └── utils.html │ │ │ │ ├── io │ │ │ │ │ └── io.html │ │ │ │ └── operators │ │ │ │ │ ├── etl.html │ │ │ │ │ ├── keras.html │ │ │ │ │ ├── operators_registry.html │ │ │ │ │ └── sklearn.html │ │ │ └── runtime │ │ │ │ └── airflow_runtime.html │ │ └── index.html │ │ ├── _sources │ │ ├── daggit.contrib.rst.txt │ │ ├── daggit.contrib.sunbird.operators.rst.txt │ │ ├── daggit.contrib.sunbird.rst.txt │ │ ├── daggit.core.base.rst.txt │ │ ├── daggit.core.io.rst.txt │ │ ├── daggit.core.operators.rst.txt │ │ ├── daggit.core.rst.txt │ │ ├── daggit.rst.txt │ │ ├── daggit.runtime.rst.txt │ │ ├── getting_started.rst.txt │ │ ├── index.rst.txt │ │ └── intro.rst.txt │ │ ├── _static │ │ ├── ajax-loader.gif │ │ ├── basic.css │ │ ├── comment-bright.png │ │ ├── comment-close.png │ │ ├── comment.png │ │ ├── css │ │ │ ├── badge_only.css │ │ │ └── theme.css │ │ ├── doctools.js │ │ ├── documentation_options.js │ │ ├── down-pressed.png │ │ ├── down.png │ │ ├── file.png │ │ ├── fonts │ │ │ ├── Inconsolata-Bold.ttf │ │ │ ├── Inconsolata-Regular.ttf │ │ │ ├── Inconsolata.ttf │ │ │ ├── Lato-Bold.ttf │ │ │ ├── Lato-Regular.ttf │ │ │ ├── Lato │ │ │ │ ├── lato-bold.eot │ │ │ │ ├── lato-bold.ttf │ │ │ │ ├── lato-bold.woff │ │ │ │ ├── lato-bold.woff2 │ │ │ │ ├── lato-bolditalic.eot │ │ │ │ ├── lato-bolditalic.ttf │ │ │ │ ├── lato-bolditalic.woff │ │ │ │ ├── lato-bolditalic.woff2 │ │ │ │ ├── lato-italic.eot │ │ │ │ ├── lato-italic.ttf │ │ │ │ ├── lato-italic.woff │ │ │ │ ├── lato-italic.woff2 │ │ │ │ ├── lato-regular.eot │ │ │ │ ├── lato-regular.ttf │ │ │ │ ├── lato-regular.woff │ │ │ │ └── lato-regular.woff2 │ │ │ ├── RobotoSlab-Bold.ttf │ │ │ ├── RobotoSlab-Regular.ttf │ │ │ ├── RobotoSlab │ │ │ │ ├── roboto-slab-v7-bold.eot │ │ │ │ ├── roboto-slab-v7-bold.ttf │ │ │ │ ├── roboto-slab-v7-bold.woff │ │ │ │ ├── roboto-slab-v7-bold.woff2 │ │ │ │ ├── roboto-slab-v7-regular.eot │ │ │ │ ├── roboto-slab-v7-regular.ttf │ │ │ │ ├── roboto-slab-v7-regular.woff │ │ │ │ └── roboto-slab-v7-regular.woff2 │ │ │ ├── fontawesome-webfont.eot │ │ │ ├── fontawesome-webfont.svg │ │ │ ├── fontawesome-webfont.ttf │ │ │ ├── fontawesome-webfont.woff │ │ │ └── fontawesome-webfont.woff2 │ │ ├── jquery-3.2.1.js │ │ ├── jquery.js │ │ ├── js │ │ │ ├── modernizr.min.js │ │ │ └── theme.js │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── underscore-1.3.1.js │ │ ├── underscore.js │ │ ├── up-pressed.png │ │ ├── up.png │ │ └── websupport.js │ │ ├── daggit.contrib.html │ │ ├── daggit.contrib.sunbird.html │ │ ├── daggit.contrib.sunbird.operators.html │ │ ├── daggit.core.base.html │ │ ├── daggit.core.html │ │ ├── daggit.core.io.html │ │ ├── daggit.core.operators.html │ │ ├── daggit.html │ │ ├── daggit.runtime.html │ │ ├── genindex.html │ │ ├── getting_started.html │ │ ├── index.html │ │ ├── intro.html │ │ ├── objects.inv │ │ ├── py-modindex.html │ │ ├── search.html │ │ └── searchindex.js ├── make.bat └── source │ ├── conf.py │ ├── daggit.contrib.rst │ ├── daggit.contrib.sunbird.operators.rst │ ├── daggit.contrib.sunbird.rst │ ├── daggit.core.base.rst │ ├── daggit.core.io.rst │ ├── daggit.core.operators.rst │ ├── daggit.core.rst │ ├── daggit.rst │ ├── daggit.runtime.rst │ ├── getting_started.rst │ ├── index.rst │ └── intro.rst ├── examples ├── content_reuse │ ├── content_reuse.yaml │ ├── inputs │ │ ├── base_ref_data.csv │ │ ├── base_stb_data.csv │ │ ├── credentials.ini │ │ ├── font_corpus │ │ │ ├── Gujarati_lohit.ttf │ │ │ ├── Hindi_lohit.ttf │ │ │ ├── Kannada_lohit.ttf │ │ │ ├── Malayalam_lohit.ttf │ │ │ ├── Oriya_lohit.ttf │ │ │ ├── Panjabi_lohit.ttf │ │ │ ├── Tamil_lohit.ttf │ │ │ └── Telugu_lohit.ttf │ │ ├── language_mapping.json │ │ └── siamese_configuration.json │ ├── server_start.sh │ ├── textbook_to_concept_mapping.yaml │ └── textbook_to_textbook_mapping.yaml ├── content_tagging │ ├── content_tagging.yaml │ ├── inputs │ │ ├── category_lookup.yaml │ │ ├── corpus │ │ │ ├── Biology.csv │ │ │ ├── English.csv │ │ │ ├── Environmental Studies.csv │ │ │ ├── Geography.csv │ │ │ ├── History.csv │ │ │ ├── Mathematics.csv │ │ │ ├── Physical Science.csv │ │ │ ├── Science.csv │ │ │ ├── Social Science.csv │ │ │ └── Social_Science.csv │ │ ├── credentials.ini │ │ └── taxonomy.csv │ └── load_corpusToRedis.py ├── dtb_creation │ ├── dtb_creation.yaml │ ├── inputs │ │ ├── ToC.csv │ │ └── fullannotation_text.txt │ └── outputs │ │ └── dtb.json ├── housing_prices │ ├── data │ │ ├── test.csv │ │ └── train.csv │ ├── housing_prices.yaml │ └── outputs │ │ ├── model_dt │ │ ├── model.pkl │ │ └── report.txt │ │ ├── model_rf │ │ ├── model.pkl │ │ └── report.txt │ │ └── model_xgb │ │ ├── model.pkl │ │ └── report.txt ├── iris_classification │ ├── README.md │ ├── inputs │ │ └── iris.csv │ ├── iris_classification_graph.yaml │ └── outputs │ │ ├── model │ │ └── model_report └── iris_classification_with_keras │ ├── README.md │ ├── inputs │ └── iris.csv │ └── iris_classification_using_keras_graph.yaml ├── expt_name_map.json ├── requirements.txt ├── src ├── main │ ├── __init__.py │ ├── python │ │ ├── __init__.py │ │ └── daggit │ │ │ ├── __init__.py │ │ │ ├── contrib │ │ │ ├── __init__.py │ │ │ └── sunbird │ │ │ │ ├── __init__.py │ │ │ │ ├── nodes │ │ │ │ ├── __init__.py │ │ │ │ ├── content_reuse.py │ │ │ │ ├── contenttagging.py │ │ │ │ └── dtb.py │ │ │ │ └── oplib │ │ │ │ ├── __init__.py │ │ │ │ ├── content_reuse_utils.py │ │ │ │ ├── contentreuseEvaluationUtils.py │ │ │ │ ├── dtb.py │ │ │ │ └── taggingUtils.py │ │ │ ├── core │ │ │ ├── __init__.py │ │ │ ├── base │ │ │ │ ├── __init__.py │ │ │ │ ├── config.py │ │ │ │ ├── factory.py │ │ │ │ ├── parser.py │ │ │ │ └── utils.py │ │ │ ├── io │ │ │ │ ├── __init__.py │ │ │ │ ├── files.py │ │ │ │ ├── io.py │ │ │ │ └── redis.py │ │ │ ├── nodes │ │ │ │ ├── __init__.py │ │ │ │ ├── dfsplitters.py │ │ │ │ ├── keras.py │ │ │ │ ├── registry.py │ │ │ │ └── sklearn.py │ │ │ └── oplib │ │ │ │ ├── __init__.py │ │ │ │ ├── distanceUtils.py │ │ │ │ ├── etl.py │ │ │ │ ├── misc.py │ │ │ │ └── nlp.py │ │ │ └── runtime │ │ │ ├── __init__.py │ │ │ ├── airflow_dag.py │ │ │ └── airflow_runtime.py │ └── scripts │ │ └── daggit └── unittest │ └── python │ └── contrib │ └── sunbird │ ├── TestingUtils.py │ ├── __init__.py │ ├── content_reuse_tests.py │ ├── content_tagging_tests.py │ └── test_cases_data │ ├── PdfText │ ├── id_1 │ │ ├── ExpText.txt │ │ └── actualText.pdf │ ├── id_2 │ │ ├── ExpText.txt │ │ └── actualText.pdf │ └── id_3 │ │ ├── ExpText.txt │ │ └── actualText.pdf │ ├── SpeechText │ ├── id_1 │ │ └── assets │ │ │ ├── audio_split │ │ │ └── id_1 │ │ │ │ ├── id_1_0.mp3 │ │ │ │ └── id_1_0.wav │ │ │ └── id_1.mp3 │ └── speech_to_text_exp_output.txt │ ├── bert_scoring │ ├── bert_scoring_mandatory_fields.yaml │ ├── siamese_configuration.json │ ├── tokenizer.pkl │ └── topic_aggregation_mandatory_fields.yaml │ ├── df_feature_check │ ├── ContentTagging_mandatory_fields.yaml │ ├── Content_Meta_feature_checking_df_1.csv │ ├── Content_Meta_feature_checking_df_2.csv │ ├── Content_Meta_feature_checking_df_3.csv │ ├── content_reuse_evaluation_feature_check.csv │ └── content_reuse_preparation_feature_check.csv │ ├── keyword_extraction │ ├── empty.txt │ ├── eng_text_actual_keywords.csv │ └── english.txt │ └── keywords.csv └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .coverage 3 | venv 4 | **/target 5 | framework/build_env 6 | .DS_Store 7 | */nohup.out 8 | .pybuilder 9 | .tox 10 | DAGGIT_HOME -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/sunbird-ml-workbench.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # ML-Workbench Dockerfile 3 | # 4 | 5 | FROM aleenaraj/raj_ubuntu 6 | MAINTAINER Aleena Raj "aleenar@ilimi.in" 7 | 8 | # Setting up DS_DATA_HOME 9 | RUN mkdir /home/DS_DATA_HOME 10 | RUN mkdir /home/ML-Workbench 11 | 12 | # Setting the working directory 13 | WORKDIR /home 14 | 15 | ADD . /home/ML-Workbench 16 | 17 | ADD google_cred.json /home 18 | ADD credentials.ini /home 19 | 20 | # Setting the environment variable 21 | ENV GOOGLE_APPLICATION_CREDENTIALS /home/google_cred.json 22 | 23 | RUN pwd 24 | 25 | #Running MLWB 26 | RUN pip3 install -r /home/ML-Workbench/requirements.txt 27 | 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 EkStep 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ML-Workbench 2 | 3 | ## What is it? 4 | ML-Workbench is a way to create, collaborate and consume Machine Learning (ML) tools and processes. It creates a level of abstraction that enables its users to express a ML application as a Directed Acyclic Graph. Each vertex of the graph represents an operation on the incoming data, while the edges represent the data flow. 5 | 6 | It is natural for ML solutions to go through revisions during the design phase or even through their lifetime, as they are unfinished by design. Also, the desired implementation of components that make up a ML application may not always be available in a single library or a language. This has created a high entry and customization barrier, making it difficult to create and maintain ML solutions. 7 | 8 | We have designed ML-Workbench as a solution to the above issues at [Ekstep](https://ekstep.org/). ML-Workbench will host common ML operations and processes that are widely recognised in the ML community, to help you quickly get to a baseline solution. These operations and processes may have multiple implementations to suit the needs of different types or scales of data. It will also provide different levels of engagement for people working on the solution design, operational implementation and scalability of the solution, to enable better collaboration and experimentation. 9 | 10 | ## Who should use it? 11 | If your solution has a long standing application, it is inevitable that the solution will require revisions and collaboration amongst multiple people. We recommend using ML-Workbench for individuals or organisation that are designing such long standing applications. 12 | 13 | ## Guiding principles 14 | * **Easy to initiate**: ML workbench will provide a ready-made library and documentation, that can enable even novice users to readily write new applications from scratch. 15 | * **Highly customizable**: The library will ensure that solutions are highly customizable, as the user can play and experiment with input parameters of APIs. It should enable addition, deletion or modification of intermediate steps. 16 | * **Extensible**: ML Workbench library will allow users to add their own custom libraries that comply with the specified guidelines and conventions. 17 | * **Automatically deployable**: The ML workbench will support creation of models and configuration files that can be directly used for deployment in production environment without further human intervention. 18 | * **Scalable**: The ML workbench will enable creation of an end-to-end ML application that can work on large scale data, with high performance. 19 | * **Repeatable**: The ML workbench will enable creation of applications which are robust and consistent, i.e. given identical datasets as input for different runs of an application, they would produce identical results without failure. 20 | 21 | ## Getting started 22 | 23 | ### Requirements 24 | - python and pip (supports python 3.6) 25 | - virtualenv (run `pip install virtualenv` or visit https://virtualenv.pypa.io/en/stable/installation/) 26 | 27 | ### Installation 28 | #### Installation from binary 29 | 1. The binary file is present at the following location `bin/` 30 | 2. Install - `pip install ML-Workbench/bin/daggit-0.5.0.tar.gz` 31 | #### Installation after build 32 | 1. Clone the repository or download the zipped file from `https://github.com/ekstep/ML-Workbench.git` 33 | 2. Change directory into ML-Workbench 34 | 3. Run `bash build.sh` 35 | 4. Install - `pip install bin/daggit-0.5.0.tar.gz` 36 | 37 | ### DAG Execution 38 | #### Inititalize a DAG 39 | 1. Use Command `daggit init ` 40 | #### Run a DAG 41 | 1. Use Command - `daggit run ` 42 | #### Seek help 43 | 1. Use - `daggit --help` to know more about the command 44 | 2. Help on daggit commands can be found using `daggit --help` 45 | 46 | ## License 47 | 48 | [MIT License](LICENSE) 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /bin/daggit-0.5.0-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/bin/daggit-0.5.0-py3-none-any.whl -------------------------------------------------------------------------------- /bin/daggit-0.5.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/bin/daggit-0.5.0.tar.gz -------------------------------------------------------------------------------- /build.py: -------------------------------------------------------------------------------- 1 | from pybuilder.core import init, use_plugin 2 | 3 | use_plugin("python.core") 4 | use_plugin("python.install_dependencies") 5 | use_plugin("python.distutils") 6 | use_plugin('copy_resources') 7 | use_plugin("python.unittest") 8 | # use_plugin("python.coverage") 9 | use_plugin('python.flake8') 10 | use_plugin("exec") 11 | use_plugin('python.pycharm') 12 | use_plugin('python.sphinx') 13 | use_plugin('python.integrationtest') 14 | 15 | default_task = "publish" 16 | 17 | name = "daggit" 18 | version = "0.5.0" 19 | license = "MIT License" 20 | 21 | 22 | @init 23 | def initialize(project): 24 | project.plugin_depends_on("flake8", "~=3.7") 25 | project.depends_on_requirements("requirements.txt") 26 | project.build_depends_on('mockito') 27 | project.set_property_if_unset("filter_resources_target", "$dir_target") 28 | project.get_property('copy_resources_glob').append('LICENSE') 29 | project.set_property_if_unset("flake8_break_build", False) 30 | project.set_property_if_unset("flake8_max_line_length", 120) 31 | project.set_property_if_unset("flake8_include_patterns", None) 32 | project.set_property_if_unset("flake8_exclude_patterns", None) 33 | project.set_property_if_unset("flake8_include_test_sources", False) 34 | project.set_property_if_unset("flake8_include_scripts", True) 35 | project.set_property_if_unset("flake8_max_complexity", None) 36 | project.set_property_if_unset("flake8_verbose_output", False) 37 | project.set_property_if_unset("coverage_break_build", False) 38 | project.set_property("sphinx_config_path", "docs/source/") 39 | project.set_property("sphinx_source_dir", "docs/source/") 40 | project.set_property("sphinx_output_dir", "docs/_build") 41 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | cd "$(dirname "$0")" 5 | 6 | pybuilderInstalled=`pip freeze | grep 'pybuilder' | wc -l` 7 | 8 | if [ $pybuilderInstalled != 1 ] 9 | then 10 | echo "Installing pybuilder" 11 | pip install pybuilder 12 | fi 13 | 14 | pyb install_dependencies clean publish 15 | pyb sphinx_generate_documentation 16 | tox 17 | 18 | if [ ! -d "bin" ]; then 19 | mkdir 'bin' 20 | fi 21 | 22 | cp target/dist/daggit-0.5.0/dist/* bin/ 23 | 24 | rm -rf target/ 25 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | 3 | services: 4 | es: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:6.2.3 6 | container_name: es 7 | environment: 8 | - discovery.type=single-node 9 | ports: 10 | - 9200:9200 11 | 12 | auto_tagging: 13 | build: . 14 | depends_on: 15 | - es 16 | container_name: auto_tagging 17 | stdin_open: true 18 | tty: true 19 | 20 | 21 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/_build/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 9bbe0906636c8f99cb7ede7efed4d99f 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/_build/.doctrees/daggit.contrib.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/daggit.contrib.doctree -------------------------------------------------------------------------------- /docs/_build/.doctrees/daggit.contrib.sunbird.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/daggit.contrib.sunbird.doctree -------------------------------------------------------------------------------- /docs/_build/.doctrees/daggit.contrib.sunbird.operators.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/daggit.contrib.sunbird.operators.doctree -------------------------------------------------------------------------------- /docs/_build/.doctrees/daggit.core.base.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/daggit.core.base.doctree -------------------------------------------------------------------------------- /docs/_build/.doctrees/daggit.core.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/daggit.core.doctree -------------------------------------------------------------------------------- /docs/_build/.doctrees/daggit.core.io.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/daggit.core.io.doctree -------------------------------------------------------------------------------- /docs/_build/.doctrees/daggit.core.operators.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/daggit.core.operators.doctree -------------------------------------------------------------------------------- /docs/_build/.doctrees/daggit.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/daggit.doctree -------------------------------------------------------------------------------- /docs/_build/.doctrees/daggit.runtime.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/daggit.runtime.doctree -------------------------------------------------------------------------------- /docs/_build/.doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/_build/.doctrees/getting_started.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/getting_started.doctree -------------------------------------------------------------------------------- /docs/_build/.doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/index.doctree -------------------------------------------------------------------------------- /docs/_build/.doctrees/intro.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.doctrees/intro.doctree -------------------------------------------------------------------------------- /docs/_build/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/.nojekyll -------------------------------------------------------------------------------- /docs/_build/_modules/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Overview: module code — daggit 0.5.0 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 |
44 | 45 | 93 | 94 |
95 | 96 | 97 | 103 | 104 | 105 |
106 | 107 |
108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 |
126 | 127 |
    128 | 129 |
  • Docs »
  • 130 | 131 |
  • Overview: module code
  • 132 | 133 | 134 |
  • 135 | 136 |
  • 137 | 138 |
139 | 140 | 141 |
142 |
143 |
144 |
145 | 146 |

All modules for which code is available

147 | 153 | 154 |
155 | 156 |
157 |
158 | 159 | 160 |
161 | 162 |
163 |

164 | © Copyright 2019, Sunbird 165 | 166 |

167 |
168 | Built with Sphinx using a theme provided by Read the Docs. 169 | 170 |
171 | 172 |
173 |
174 | 175 |
176 | 177 |
178 | 179 | 180 | 181 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | -------------------------------------------------------------------------------- /docs/_build/_sources/daggit.contrib.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.contrib package 2 | ====================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.contrib.sunbird 10 | 11 | Module contents 12 | --------------- 13 | 14 | .. automodule:: daggit.contrib 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /docs/_build/_sources/daggit.contrib.sunbird.operators.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.contrib.sunbird.operators package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.contrib.sunbird.operators.contentTagging module 8 | ------------------------------------------------------ 9 | 10 | .. automodule:: daggit.contrib.sunbird.operators.contentTagging 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.contrib.sunbird.operators.contentTaggingUtils module 16 | ----------------------------------------------------------- 17 | 18 | .. automodule:: daggit.contrib.sunbird.operators.contentTaggingUtils 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | daggit.contrib.sunbird.operators.interruptedcontentTagging module 24 | ----------------------------------------------------------------- 25 | 26 | .. automodule:: daggit.contrib.sunbird.operators.interruptedcontentTagging 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: daggit.contrib.sunbird.operators 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/_build/_sources/daggit.contrib.sunbird.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.contrib.sunbird package 2 | ============================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.contrib.sunbird.operators 10 | 11 | Module contents 12 | --------------- 13 | 14 | .. automodule:: daggit.contrib.sunbird 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /docs/_build/_sources/daggit.core.base.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.core.base package 2 | ======================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.core.base.config module 8 | ------------------------------ 9 | 10 | .. automodule:: daggit.core.base.config 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.core.base.factory module 16 | ------------------------------- 17 | 18 | .. automodule:: daggit.core.base.factory 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | daggit.core.base.parser module 24 | ------------------------------ 25 | 26 | .. automodule:: daggit.core.base.parser 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | daggit.core.base.utils module 32 | ----------------------------- 33 | 34 | .. automodule:: daggit.core.base.utils 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: daggit.core.base 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/_build/_sources/daggit.core.io.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.core.io package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.core.io.io module 8 | ------------------------ 9 | 10 | .. automodule:: daggit.core.io.io 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: daggit.core.io 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/_build/_sources/daggit.core.operators.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.core.operators package 2 | ============================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.core.operators.dfsplitters module 8 | ---------------------------------------- 9 | 10 | .. automodule:: daggit.core.operators.dfsplitters 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.core.operators.etl module 16 | -------------------------------- 17 | 18 | .. automodule:: daggit.core.operators.etl 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | daggit.core.operators.keras module 24 | ---------------------------------- 25 | 26 | .. automodule:: daggit.core.operators.keras 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | daggit.core.operators.operators\_registry module 32 | ------------------------------------------------ 33 | 34 | .. automodule:: daggit.core.operators.operators_registry 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | daggit.core.operators.sklearn module 40 | ------------------------------------ 41 | 42 | .. automodule:: daggit.core.operators.sklearn 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | Module contents 49 | --------------- 50 | 51 | .. automodule:: daggit.core.operators 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | -------------------------------------------------------------------------------- /docs/_build/_sources/daggit.core.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.core package 2 | =================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.core.base 10 | daggit.core.io 11 | daggit.core.operators 12 | 13 | Module contents 14 | --------------- 15 | 16 | .. automodule:: daggit.core 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | -------------------------------------------------------------------------------- /docs/_build/_sources/daggit.rst.txt: -------------------------------------------------------------------------------- 1 | daggit package 2 | ============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.contrib 10 | daggit.core 11 | daggit.runtime 12 | 13 | Module contents 14 | --------------- 15 | 16 | .. automodule:: daggit 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | -------------------------------------------------------------------------------- /docs/_build/_sources/daggit.runtime.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.runtime package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.runtime.airflow\_dag module 8 | ---------------------------------- 9 | 10 | .. automodule:: daggit.runtime.airflow_dag 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.runtime.airflow\_runtime module 16 | -------------------------------------- 17 | 18 | .. automodule:: daggit.runtime.airflow_runtime 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: daggit.runtime 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/_build/_sources/getting_started.rst.txt: -------------------------------------------------------------------------------- 1 | .. _getting_started: 2 | 3 | Getting Started 4 | =============== 5 | 6 | 7 | Requirement 8 | ----------- 9 | 10 | - python and pip (supports python 3.6) 11 | - virtualenv (run ``pip install virtualenv`` or visit ``_) 12 | 13 | Installation 14 | ------------ 15 | 16 | Installation from binary 17 | ~~~~~~~~~~~~~~~~~~~~~~~~ 18 | 19 | - The binary file is present at the following location ``bin/`` 20 | - Install daggit- 21 | .. parsed-literal:: 22 | pip install bin/daggit-0.5.0.tar.gz 23 | 24 | Installation after build 25 | ~~~~~~~~~~~~~~~~~~~~~~~~ 26 | 27 | - Clone the repository or download the zipped file from ``_ 28 | .. parsed-literal:: 29 | git clone ``_ 30 | - Change directory into ML-Workbench 31 | - Run ``bash build.sh`` 32 | - Install - ``pip install bin/daggit-0.5.0.tar.gz`` 33 | 34 | DAG execution 35 | ------------- 36 | 37 | Initialize a DAG 38 | ~~~~~~~~~~~~~~~~ 39 | 40 | .. parsed-literal:: 41 | daggit init 42 | 43 | Run a DAG 44 | ~~~~~~~~~ 45 | 46 | .. parsed-literal:: 47 | daggit run 48 | 49 | Seek help 50 | ~~~~~~~~~ 51 | 52 | - Use ``daggit --help`` to know more about the command 53 | - Help on dagit commands can be found using ``daggit --help`` 54 | 55 | 56 | -------------------------------------------------------------------------------- /docs/_build/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | .. daggit documentation master file, created by 2 | sphinx-quickstart on Thu Jan 17 15:41:29 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to daggit's documentation! 7 | ================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 4 11 | :caption: Contents: 12 | 13 | intro 14 | getting_started 15 | daggit 16 | 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /docs/_build/_sources/intro.rst.txt: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | 5 | ML-Workbench 6 | ------------ 7 | 8 | What is it? 9 | ~~~~~~~~~~~ 10 | 11 | ML-Workbench is a way to create, collaborate and consume Machine Learning (ML) tools and processes. It creates a level of abstraction that enables its users to express a ML application as a Directed Acyclic Graph. Each vertex of the graph represents an operation on the incoming data, while the edges represent the data flow. 12 | 13 | It is natural for ML solutions to go through revisions during the design phase or even through their lifetime, as they are unfinished by design. Also, the desired implementation of components that make up a ML application may not always be available in a single library or a language. This has created a high entry and customization barrier, making it difficult to create and maintain ML solutions. 14 | 15 | We have designed ML-Workbench as a solution to the above issues at Ekstep. ML-Workbench will host common ML operations and processes that are widely recognised in the ML community, to help you quickly get to a baseline solution. These operations and processes may have multiple implementations to suit the needs of different types or scales of data. It will also provide different levels of engagement for people working on the solution design, operational implementation and scalability of the solution, to enable better collaboration and experimentation. 16 | 17 | Who should use it? 18 | ~~~~~~~~~~~~~~~~~~ 19 | 20 | If your solution has a long standing application, it is inevitable that the solution will require revisions and collaboration amongst multiple people. We recommend using ML-Workbench for individuals or organisation that are designing such long standing applications. 21 | 22 | Guiding principles 23 | ~~~~~~~~~~~~~~~~~~ 24 | 25 | * **Easy to initiate**: ML workbench will provide a ready-made library and documentation, that can enable even novice users to readily write new applications from scratch. 26 | * **Highly customizable**: The library will ensure that solutions are highly customizable, as the user can play and experiment with input parameters of APIs. It should enable addition, deletion or modification of intermediate steps. 27 | * **Extensible**: ML Workbench library will allow users to add their own custom libraries that comply with the specified guidelines and conventions. 28 | * **Automatically deployable**: The ML workbench will support creation of models and configuration files that can be directly used for deployment in production environment without further human intervention. 29 | * **Scalable**: The ML workbench will enable creation of an end-to-end ML application that can work on large scale data, with high performance. 30 | * **Repeatable**: The ML workbench will enable creation of applications which are robust and consistent, i.e. given identical datasets as input for different runs of an application, they would produce identical results without failure. 31 | 32 | -------------------------------------------------------------------------------- /docs/_build/_static/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/ajax-loader.gif -------------------------------------------------------------------------------- /docs/_build/_static/comment-bright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/comment-bright.png -------------------------------------------------------------------------------- /docs/_build/_static/comment-close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/comment-close.png -------------------------------------------------------------------------------- /docs/_build/_static/comment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/comment.png -------------------------------------------------------------------------------- /docs/_build/_static/css/badge_only.css: -------------------------------------------------------------------------------- 1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../fonts/fontawesome-webfont.eot");src:url("../fonts/fontawesome-webfont.eot?#iefix") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff") format("woff"),url("../fonts/fontawesome-webfont.ttf") format("truetype"),url("../fonts/fontawesome-webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:""}.icon-book:before{content:""}.fa-caret-down:before{content:""}.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.icon-caret-up:before{content:""}.fa-caret-left:before{content:""}.icon-caret-left:before{content:""}.fa-caret-right:before{content:""}.icon-caret-right:before{content:""}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} 2 | -------------------------------------------------------------------------------- /docs/_build/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '0.5.0', 4 | LANGUAGE: 'None', 5 | COLLAPSE_INDEX: false, 6 | FILE_SUFFIX: '.html', 7 | HAS_SOURCE: true, 8 | SOURCELINK_SUFFIX: '.txt', 9 | NAVIGATION_WITH_KEYS: false 10 | }; -------------------------------------------------------------------------------- /docs/_build/_static/down-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/down-pressed.png -------------------------------------------------------------------------------- /docs/_build/_static/down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/down.png -------------------------------------------------------------------------------- /docs/_build/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/file.png -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Inconsolata-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Inconsolata-Bold.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Inconsolata-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Inconsolata-Regular.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Inconsolata.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Inconsolata.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato-Bold.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato-Regular.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-bold.eot -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-bold.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-bold.woff -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-bolditalic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-bolditalic.eot -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-bolditalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-bolditalic.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-bolditalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-bolditalic.woff -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-bolditalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-bolditalic.woff2 -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-italic.eot -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-italic.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-italic.woff -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-italic.woff2 -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-regular.eot -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-regular.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-regular.woff -------------------------------------------------------------------------------- /docs/_build/_static/fonts/Lato/lato-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/Lato/lato-regular.woff2 -------------------------------------------------------------------------------- /docs/_build/_static/fonts/RobotoSlab-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/RobotoSlab-Bold.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/RobotoSlab-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/RobotoSlab-Regular.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot -------------------------------------------------------------------------------- /docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff -------------------------------------------------------------------------------- /docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 -------------------------------------------------------------------------------- /docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot -------------------------------------------------------------------------------- /docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff -------------------------------------------------------------------------------- /docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 -------------------------------------------------------------------------------- /docs/_build/_static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/_build/_static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/_build/_static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/_build/_static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/_build/_static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/_build/_static/js/theme.js: -------------------------------------------------------------------------------- 1 | /* sphinx_rtd_theme version 0.4.3 | MIT license */ 2 | /* Built 20190212 16:02 */ 3 | require=function r(s,a,l){function c(e,n){if(!a[e]){if(!s[e]){var i="function"==typeof require&&require;if(!n&&i)return i(e,!0);if(u)return u(e,!0);var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}var o=a[e]={exports:{}};s[e][0].call(o.exports,function(n){return c(s[e][1][n]||n)},o,o.exports,r,s,a,l)}return a[e].exports}for(var u="function"==typeof require&&require,n=0;n"),i("table.docutils.footnote").wrap("
"),i("table.docutils.citation").wrap("
"),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var e=i(this);expand=i(''),expand.on("click",function(n){return t.toggleCurrent(e),n.stopPropagation(),!1}),e.prepend(expand)})},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),i=e.find('[href="'+n+'"]');if(0===i.length){var t=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(i=e.find('[href="#'+t.attr("id")+'"]')).length&&(i=e.find('[href="#"]'))}0this.docHeight||(this.navBar.scrollTop(i),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:e.exports.ThemeNav,StickyNav:e.exports.ThemeNav}),function(){for(var r=0,n=["ms","moz","webkit","o"],e=0;e 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Search — daggit 0.5.0 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 |
45 | 46 | 94 | 95 |
96 | 97 | 98 | 104 | 105 | 106 |
107 | 108 |
109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 |
127 | 128 |
    129 | 130 |
  • Docs »
  • 131 | 132 |
  • Search
  • 133 | 134 | 135 |
  • 136 | 137 | 138 | 139 |
  • 140 | 141 |
142 | 143 | 144 |
145 |
146 |
147 |
148 | 149 | 157 | 158 | 159 |
160 | 161 |
162 | 163 |
164 | 165 |
166 |
167 | 168 | 169 |
170 | 171 |
172 |

173 | © Copyright 2019, Sunbird 174 | 175 |

176 |
177 | Built with Sphinx using a theme provided by Read the Docs. 178 | 179 |
180 | 181 |
182 |
183 | 184 |
185 | 186 |
187 | 188 | 189 | 190 | 195 | 196 | 197 | 198 | 199 | 200 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | -------------------------------------------------------------------------------- /docs/build/doctrees/daggit.contrib.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/daggit.contrib.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/daggit.contrib.sunbird.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/daggit.contrib.sunbird.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/daggit.contrib.sunbird.operators.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/daggit.contrib.sunbird.operators.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/daggit.core.base.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/daggit.core.base.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/daggit.core.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/daggit.core.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/daggit.core.io.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/daggit.core.io.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/daggit.core.operators.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/daggit.core.operators.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/daggit.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/daggit.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/daggit.runtime.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/daggit.runtime.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/build/doctrees/getting_started.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/getting_started.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/intro.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/doctrees/intro.doctree -------------------------------------------------------------------------------- /docs/build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 2247f5a7068599d8612a323d6bdc9c0f 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/build/html/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/.nojekyll -------------------------------------------------------------------------------- /docs/build/html/_sources/daggit.contrib.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.contrib package 2 | ====================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.contrib.sunbird 10 | 11 | Module contents 12 | --------------- 13 | 14 | .. automodule:: daggit.contrib 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /docs/build/html/_sources/daggit.contrib.sunbird.operators.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.contrib.sunbird.operators package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.contrib.sunbird.operators.contentTagging module 8 | ------------------------------------------------------ 9 | 10 | .. automodule:: daggit.contrib.sunbird.operators.contentTagging 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.contrib.sunbird.operators.contentTaggingUtils module 16 | ----------------------------------------------------------- 17 | 18 | .. automodule:: daggit.contrib.sunbird.operators.contentTaggingUtils 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | daggit.contrib.sunbird.operators.interruptedcontentTagging module 24 | ----------------------------------------------------------------- 25 | 26 | .. automodule:: daggit.contrib.sunbird.operators.interruptedcontentTagging 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: daggit.contrib.sunbird.operators 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/build/html/_sources/daggit.contrib.sunbird.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.contrib.sunbird package 2 | ============================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.contrib.sunbird.operators 10 | 11 | Module contents 12 | --------------- 13 | 14 | .. automodule:: daggit.contrib.sunbird 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /docs/build/html/_sources/daggit.core.base.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.core.base package 2 | ======================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.core.base.config module 8 | ------------------------------ 9 | 10 | .. automodule:: daggit.core.base.config 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.core.base.factory module 16 | ------------------------------- 17 | 18 | .. automodule:: daggit.core.base.factory 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | daggit.core.base.parser module 24 | ------------------------------ 25 | 26 | .. automodule:: daggit.core.base.parser 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | daggit.core.base.utils module 32 | ----------------------------- 33 | 34 | .. automodule:: daggit.core.base.utils 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: daggit.core.base 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/build/html/_sources/daggit.core.io.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.core.io package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.core.io.io module 8 | ------------------------ 9 | 10 | .. automodule:: daggit.core.io.io 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: daggit.core.io 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/build/html/_sources/daggit.core.operators.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.core.operators package 2 | ============================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.core.operators.dfsplitters module 8 | ---------------------------------------- 9 | 10 | .. automodule:: daggit.core.operators.dfsplitters 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.core.operators.etl module 16 | -------------------------------- 17 | 18 | .. automodule:: daggit.core.operators.etl 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | daggit.core.operators.keras module 24 | ---------------------------------- 25 | 26 | .. automodule:: daggit.core.operators.keras 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | daggit.core.operators.operators\_registry module 32 | ------------------------------------------------ 33 | 34 | .. automodule:: daggit.core.operators.operators_registry 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | daggit.core.operators.sklearn module 40 | ------------------------------------ 41 | 42 | .. automodule:: daggit.core.operators.sklearn 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | Module contents 49 | --------------- 50 | 51 | .. automodule:: daggit.core.operators 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | -------------------------------------------------------------------------------- /docs/build/html/_sources/daggit.core.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.core package 2 | =================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.core.base 10 | daggit.core.io 11 | daggit.core.operators 12 | 13 | Module contents 14 | --------------- 15 | 16 | .. automodule:: daggit.core 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | -------------------------------------------------------------------------------- /docs/build/html/_sources/daggit.rst.txt: -------------------------------------------------------------------------------- 1 | daggit package 2 | ============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.contrib 10 | daggit.core 11 | daggit.runtime 12 | 13 | Module contents 14 | --------------- 15 | 16 | .. automodule:: daggit 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | -------------------------------------------------------------------------------- /docs/build/html/_sources/daggit.runtime.rst.txt: -------------------------------------------------------------------------------- 1 | daggit.runtime package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.runtime.airflow\_dag module 8 | ---------------------------------- 9 | 10 | .. automodule:: daggit.runtime.airflow_dag 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.runtime.airflow\_runtime module 16 | -------------------------------------- 17 | 18 | .. automodule:: daggit.runtime.airflow_runtime 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: daggit.runtime 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/build/html/_sources/getting_started.rst.txt: -------------------------------------------------------------------------------- 1 | .. _getting_started: 2 | 3 | Getting Started 4 | =============== 5 | 6 | 7 | Requirement 8 | ----------- 9 | 10 | - python and pip (supports python 3.6) 11 | - virtualenv (run ``pip install virtualenv`` or visit ``_) 12 | 13 | Installation 14 | ------------ 15 | 16 | Installation from binary 17 | ~~~~~~~~~~~~~~~~~~~~~~~~ 18 | 19 | - The binary file is present at the following location ``bin/`` 20 | - Install daggit- 21 | .. parsed-literal:: 22 | pip install bin/daggit-0.5.0.tar.gz 23 | 24 | Installation after build 25 | ~~~~~~~~~~~~~~~~~~~~~~~~ 26 | 27 | - Clone the repository or download the zipped file from ``_ 28 | .. parsed-literal:: 29 | git clone ``_ 30 | - Change directory into ML-Workbench 31 | - Run ``bash build.sh`` 32 | - Install - ``pip install bin/daggit-0.5.0.tar.gz`` 33 | 34 | DAG execution 35 | ------------- 36 | 37 | Initialize a DAG 38 | ~~~~~~~~~~~~~~~~ 39 | 40 | .. parsed-literal:: 41 | daggit init 42 | 43 | Run a DAG 44 | ~~~~~~~~~ 45 | 46 | .. parsed-literal:: 47 | daggit run 48 | 49 | Seek help 50 | ~~~~~~~~~ 51 | 52 | - Use ``daggit --help`` to know more about the command 53 | - Help on dagit commands can be found using ``daggit --help`` 54 | 55 | 56 | -------------------------------------------------------------------------------- /docs/build/html/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | .. daggit documentation master file, created by 2 | sphinx-quickstart on Thu Jan 17 15:41:29 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to daggit's documentation! 7 | ================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 4 11 | :caption: Contents: 12 | 13 | intro 14 | getting_started 15 | daggit 16 | 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /docs/build/html/_sources/intro.rst.txt: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | 5 | ML-Workbench 6 | ------------ 7 | 8 | What is it? 9 | ~~~~~~~~~~~ 10 | 11 | ML-Workbench is a way to create, collaborate and consume Machine Learning (ML) tools and processes. It creates a level of abstraction that enables its users to express a ML application as a Directed Acyclic Graph. Each vertex of the graph represents an operation on the incoming data, while the edges represent the data flow. 12 | 13 | It is natural for ML solutions to go through revisions during the design phase or even through their lifetime, as they are unfinished by design. Also, the desired implementation of components that make up a ML application may not always be available in a single library or a language. This has created a high entry and customization barrier, making it difficult to create and maintain ML solutions. 14 | 15 | We have designed ML-Workbench as a solution to the above issues at Ekstep. ML-Workbench will host common ML operations and processes that are widely recognised in the ML community, to help you quickly get to a baseline solution. These operations and processes may have multiple implementations to suit the needs of different types or scales of data. It will also provide different levels of engagement for people working on the solution design, operational implementation and scalability of the solution, to enable better collaboration and experimentation. 16 | 17 | Who should use it? 18 | ~~~~~~~~~~~~~~~~~~ 19 | 20 | If your solution has a long standing application, it is inevitable that the solution will require revisions and collaboration amongst multiple people. We recommend using ML-Workbench for individuals or organisation that are designing such long standing applications. 21 | 22 | Guiding principles 23 | ~~~~~~~~~~~~~~~~~~ 24 | 25 | * **Easy to initiate**: ML workbench will provide a ready-made library and documentation, that can enable even novice users to readily write new applications from scratch. 26 | * **Highly customizable**: The library will ensure that solutions are highly customizable, as the user can play and experiment with input parameters of APIs. It should enable addition, deletion or modification of intermediate steps. 27 | * **Extensible**: ML Workbench library will allow users to add their own custom libraries that comply with the specified guidelines and conventions. 28 | * **Automatically deployable**: The ML workbench will support creation of models and configuration files that can be directly used for deployment in production environment without further human intervention. 29 | * **Scalable**: The ML workbench will enable creation of an end-to-end ML application that can work on large scale data, with high performance. 30 | * **Repeatable**: The ML workbench will enable creation of applications which are robust and consistent, i.e. given identical datasets as input for different runs of an application, they would produce identical results without failure. 31 | 32 | -------------------------------------------------------------------------------- /docs/build/html/_static/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/ajax-loader.gif -------------------------------------------------------------------------------- /docs/build/html/_static/comment-bright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/comment-bright.png -------------------------------------------------------------------------------- /docs/build/html/_static/comment-close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/comment-close.png -------------------------------------------------------------------------------- /docs/build/html/_static/comment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/comment.png -------------------------------------------------------------------------------- /docs/build/html/_static/css/badge_only.css: -------------------------------------------------------------------------------- 1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../fonts/fontawesome-webfont.eot");src:url("../fonts/fontawesome-webfont.eot?#iefix") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff") format("woff"),url("../fonts/fontawesome-webfont.ttf") format("truetype"),url("../fonts/fontawesome-webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:""}.icon-book:before{content:""}.fa-caret-down:before{content:""}.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.icon-caret-up:before{content:""}.fa-caret-left:before{content:""}.icon-caret-left:before{content:""}.fa-caret-right:before{content:""}.icon-caret-right:before{content:""}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} 2 | -------------------------------------------------------------------------------- /docs/build/html/_static/down-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/down-pressed.png -------------------------------------------------------------------------------- /docs/build/html/_static/down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/down.png -------------------------------------------------------------------------------- /docs/build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/file.png -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Inconsolata-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Inconsolata-Bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Inconsolata-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Inconsolata-Regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Inconsolata.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Inconsolata.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato-Bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato-Regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-bold.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-bolditalic.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-italic.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-italic.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-italic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-italic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-regular.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-regular.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/Lato/lato-regular.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/RobotoSlab-Bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/RobotoSlab-Regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/docs/build/html/_static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/js/theme.js: -------------------------------------------------------------------------------- 1 | /* sphinx_rtd_theme version 0.4.2 | MIT license */ 2 | /* Built 20181005 13:10 */ 3 | require=function r(s,a,l){function c(e,n){if(!a[e]){if(!s[e]){var i="function"==typeof require&&require;if(!n&&i)return i(e,!0);if(u)return u(e,!0);var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}var o=a[e]={exports:{}};s[e][0].call(o.exports,function(n){return c(s[e][1][n]||n)},o,o.exports,r,s,a,l)}return a[e].exports}for(var u="function"==typeof require&&require,n=0;n"),i("table.docutils.footnote").wrap("
"),i("table.docutils.citation").wrap("
"),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var e=i(this);expand=i(''),expand.on("click",function(n){return t.toggleCurrent(e),n.stopPropagation(),!1}),e.prepend(expand)})},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),i=e.find('[href="'+n+'"]');if(0===i.length){var t=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(i=e.find('[href="#'+t.attr("id")+'"]')).length&&(i=e.find('[href="#"]'))}0this.docHeight||(this.navBar.scrollTop(i),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:e.exports.ThemeNav,StickyNav:e.exports.ThemeNav}),function(){for(var r=0,n=["ms","moz","webkit","o"],e=0;e 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Search — daggit 0.5.0 documentation 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 |
41 | 42 | 43 | 91 | 92 |
93 | 94 | 95 | 101 | 102 | 103 |
104 | 105 |
106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 |
124 | 125 |
    126 | 127 |
  • Docs »
  • 128 | 129 |
  • 130 | 131 | 132 |
  • 133 | 134 | 135 | 136 |
  • 137 | 138 |
139 | 140 | 141 |
142 |
143 |
144 |
145 | 146 | 154 | 155 | 156 |
157 | 158 |
159 | 160 |
161 | 162 |
163 |
164 | 165 | 166 |
167 | 168 |
169 |

170 | © Copyright 2019, Sunbird 171 | 172 |

173 |
174 | Built with Sphinx using a theme provided by Read the Docs. 175 | 176 |
177 | 178 |
179 |
180 | 181 |
182 | 183 |
184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 207 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/daggit.contrib.rst: -------------------------------------------------------------------------------- 1 | daggit.contrib package 2 | ====================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.contrib.sunbird 10 | 11 | Module contents 12 | --------------- 13 | 14 | .. automodule:: daggit.contrib 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /docs/source/daggit.contrib.sunbird.operators.rst: -------------------------------------------------------------------------------- 1 | daggit.contrib.sunbird.operators package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.contrib.sunbird.operators.contentTagging module 8 | ------------------------------------------------------ 9 | 10 | .. automodule:: daggit.contrib.sunbird.operators.contentTagging 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.contrib.sunbird.operators.contentTaggingUtils module 16 | ----------------------------------------------------------- 17 | 18 | .. automodule:: daggit.contrib.sunbird.operators.contentTaggingUtils 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | daggit.contrib.sunbird.operators.interruptedcontentTagging module 24 | ----------------------------------------------------------------- 25 | 26 | .. automodule:: daggit.contrib.sunbird.operators.interruptedcontentTagging 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: daggit.contrib.sunbird.operators 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/source/daggit.contrib.sunbird.rst: -------------------------------------------------------------------------------- 1 | daggit.contrib.sunbird package 2 | ============================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.contrib.sunbird.operators 10 | 11 | Module contents 12 | --------------- 13 | 14 | .. automodule:: daggit.contrib.sunbird 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /docs/source/daggit.core.base.rst: -------------------------------------------------------------------------------- 1 | daggit.core.base package 2 | ======================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.core.base.config module 8 | ------------------------------ 9 | 10 | .. automodule:: daggit.core.base.config 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.core.base.factory module 16 | ------------------------------- 17 | 18 | .. automodule:: daggit.core.base.factory 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | daggit.core.base.parser module 24 | ------------------------------ 25 | 26 | .. automodule:: daggit.core.base.parser 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | daggit.core.base.utils module 32 | ----------------------------- 33 | 34 | .. automodule:: daggit.core.base.utils 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: daggit.core.base 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/source/daggit.core.io.rst: -------------------------------------------------------------------------------- 1 | daggit.core.io package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.core.io.io module 8 | ------------------------ 9 | 10 | .. automodule:: daggit.core.io.io 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: daggit.core.io 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/source/daggit.core.operators.rst: -------------------------------------------------------------------------------- 1 | daggit.core.operators package 2 | ============================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.core.operators.dfsplitters module 8 | ---------------------------------------- 9 | 10 | .. automodule:: daggit.core.operators.dfsplitters 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.core.operators.etl module 16 | -------------------------------- 17 | 18 | .. automodule:: daggit.core.operators.etl 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | daggit.core.operators.keras module 24 | ---------------------------------- 25 | 26 | .. automodule:: daggit.core.operators.keras 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | daggit.core.operators.operators\_registry module 32 | ------------------------------------------------ 33 | 34 | .. automodule:: daggit.core.operators.operators_registry 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | daggit.core.operators.sklearn module 40 | ------------------------------------ 41 | 42 | .. automodule:: daggit.core.operators.sklearn 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | Module contents 49 | --------------- 50 | 51 | .. automodule:: daggit.core.operators 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | -------------------------------------------------------------------------------- /docs/source/daggit.core.rst: -------------------------------------------------------------------------------- 1 | daggit.core package 2 | =================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.core.base 10 | daggit.core.io 11 | daggit.core.operators 12 | 13 | Module contents 14 | --------------- 15 | 16 | .. automodule:: daggit.core 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | -------------------------------------------------------------------------------- /docs/source/daggit.rst: -------------------------------------------------------------------------------- 1 | daggit package 2 | ============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | daggit.contrib 10 | daggit.core 11 | daggit.runtime 12 | 13 | Module contents 14 | --------------- 15 | 16 | .. automodule:: daggit 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | -------------------------------------------------------------------------------- /docs/source/daggit.runtime.rst: -------------------------------------------------------------------------------- 1 | daggit.runtime package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | daggit.runtime.airflow\_dag module 8 | ---------------------------------- 9 | 10 | .. automodule:: daggit.runtime.airflow_dag 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | daggit.runtime.airflow\_runtime module 16 | -------------------------------------- 17 | 18 | .. automodule:: daggit.runtime.airflow_runtime 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: daggit.runtime 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/getting_started.rst: -------------------------------------------------------------------------------- 1 | .. _getting_started: 2 | 3 | Getting Started 4 | =============== 5 | 6 | 7 | Requirement 8 | ----------- 9 | 10 | - python and pip (supports python 3.6) 11 | - virtualenv (run ``pip install virtualenv`` or visit ``_) 12 | 13 | Installation 14 | ------------ 15 | 16 | Installation from binary 17 | ~~~~~~~~~~~~~~~~~~~~~~~~ 18 | 19 | - The binary file is present at the following location ``bin/`` 20 | - Install daggit- 21 | .. parsed-literal:: 22 | pip install bin/daggit-0.5.0.tar.gz 23 | 24 | Installation after build 25 | ~~~~~~~~~~~~~~~~~~~~~~~~ 26 | 27 | - Clone the repository or download the zipped file from ``_ 28 | .. parsed-literal:: 29 | git clone ``_ 30 | - Change directory into ML-Workbench 31 | - Run ``bash build.sh`` 32 | - Install - ``pip install bin/daggit-0.5.0.tar.gz`` 33 | 34 | DAG execution 35 | ------------- 36 | 37 | Initialize a DAG 38 | ~~~~~~~~~~~~~~~~ 39 | 40 | .. parsed-literal:: 41 | daggit init 42 | 43 | Run a DAG 44 | ~~~~~~~~~ 45 | 46 | .. parsed-literal:: 47 | daggit run 48 | 49 | Seek help 50 | ~~~~~~~~~ 51 | 52 | - Use ``daggit --help`` to know more about the command 53 | - Help on dagit commands can be found using ``daggit --help`` 54 | 55 | 56 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. daggit documentation master file, created by 2 | sphinx-quickstart on Thu Jan 17 15:41:29 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to daggit's documentation! 7 | ================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 4 11 | :caption: Contents: 12 | 13 | intro 14 | getting_started 15 | daggit 16 | 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /docs/source/intro.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | 5 | ML-Workbench 6 | ------------ 7 | 8 | What is it? 9 | ~~~~~~~~~~~ 10 | 11 | ML-Workbench is a way to create, collaborate and consume Machine Learning (ML) tools and processes. It creates a level of abstraction that enables its users to express a ML application as a Directed Acyclic Graph. Each vertex of the graph represents an operation on the incoming data, while the edges represent the data flow. 12 | 13 | It is natural for ML solutions to go through revisions during the design phase or even through their lifetime, as they are unfinished by design. Also, the desired implementation of components that make up a ML application may not always be available in a single library or a language. This has created a high entry and customization barrier, making it difficult to create and maintain ML solutions. 14 | 15 | We have designed ML-Workbench as a solution to the above issues at Ekstep. ML-Workbench will host common ML operations and processes that are widely recognised in the ML community, to help you quickly get to a baseline solution. These operations and processes may have multiple implementations to suit the needs of different types or scales of data. It will also provide different levels of engagement for people working on the solution design, operational implementation and scalability of the solution, to enable better collaboration and experimentation. 16 | 17 | Who should use it? 18 | ~~~~~~~~~~~~~~~~~~ 19 | 20 | If your solution has a long standing application, it is inevitable that the solution will require revisions and collaboration amongst multiple people. We recommend using ML-Workbench for individuals or organisation that are designing such long standing applications. 21 | 22 | Guiding principles 23 | ~~~~~~~~~~~~~~~~~~ 24 | 25 | * **Easy to initiate**: ML workbench will provide a ready-made library and documentation, that can enable even novice users to readily write new applications from scratch. 26 | * **Highly customizable**: The library will ensure that solutions are highly customizable, as the user can play and experiment with input parameters of APIs. It should enable addition, deletion or modification of intermediate steps. 27 | * **Extensible**: ML Workbench library will allow users to add their own custom libraries that comply with the specified guidelines and conventions. 28 | * **Automatically deployable**: The ML workbench will support creation of models and configuration files that can be directly used for deployment in production environment without further human intervention. 29 | * **Scalable**: The ML workbench will enable creation of an end-to-end ML application that can work on large scale data, with high performance. 30 | * **Repeatable**: The ML workbench will enable creation of applications which are robust and consistent, i.e. given identical datasets as input for different runs of an application, they would produce identical results without failure. 31 | 32 | -------------------------------------------------------------------------------- /examples/content_reuse/content_reuse.yaml: -------------------------------------------------------------------------------- 1 | experiment_name: Content_reuse 2 | owner: sunbird 3 | 4 | inputs: 5 | DS_DATA_HOME: 6 | pathTocredentials: inputs/credentials.ini 7 | pathToLanguageMapping: inputs/language_mapping.json 8 | pathToPDF: 9 | pathToToc: 10 | pathToReference_DTB: 11 | pathToActualDTB: 12 | 13 | 14 | outputs: 15 | dummy: /home/DS_HOME 16 | 17 | 18 | graph: 19 | - node_name: ocr_text_extraction 20 | inputs: [DS_DATA_HOME, pathTocredentials, pathToPDF] 21 | outputs: [path_to_result_folder] 22 | operation: contentreuse.OcrTextExtraction 23 | arguments: 24 | gcp_bucket_name: contentreuse_experiment 25 | content_id: 26 | ocr_method: GOCR 27 | 28 | - node_name: text_extraction_evaluation 29 | inputs: [path_to_result_folder, pathToLanguageMapping, pathToToc] 30 | outputs: [path_to_evaluation_result] 31 | operation: contentreuse.TextExtractionEvaluation 32 | 33 | - node_name: create_dtb 34 | inputs: [ocr_text_extraction.path_to_result_folder, pathToToc] 35 | outputs: [path_to_dtb_json_file] 36 | operation: contentreuse.CreateDTB 37 | arguments: 38 | col_name: [Chapter Name] 39 | 40 | - node_name: dtb_creation_evaluation 41 | inputs: [create_dtb.path_to_dtb_json_file, pathToToc, pathToActualDTB] 42 | outputs: [path_to_dtb_evaluation_result] 43 | operation: contentreuse.DTBCreationEvaluation 44 | arguments: 45 | level: Chapter Name 46 | 47 | - node_name: dtb_mapping 48 | inputs: [ocr_text_extraction.path_to_result_folder, create_dtb.path_to_dtb_json_file, pathToReference_DTB] 49 | outputs: [path_to_mapping_json] 50 | operation: contentreuse.DTBMapping 51 | arguments: 52 | no_of_recommendations: 5 53 | distance_method: BERT 54 | -------------------------------------------------------------------------------- /examples/content_reuse/inputs/credentials.ini: -------------------------------------------------------------------------------- 1 | [graph] 2 | scheme = 'bolt' 3 | host = 'localhost' 4 | port = 7687 5 | user = 'neo4j' 6 | password = 'hello' 7 | max_connections = 40 8 | secure = False 9 | [relationship] 10 | start_node_label = 'DTBUnit' 11 | end_node_label = 'Concept' 12 | relationship_label = 'memberOf' 13 | relationship_properties = {'fromNodeType': 'DTBUnit', 'toNodeType': 'Concept', 'method': 'BERT', 'version': ''} -------------------------------------------------------------------------------- /examples/content_reuse/inputs/font_corpus/Gujarati_lohit.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/content_reuse/inputs/font_corpus/Gujarati_lohit.ttf -------------------------------------------------------------------------------- /examples/content_reuse/inputs/font_corpus/Hindi_lohit.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/content_reuse/inputs/font_corpus/Hindi_lohit.ttf -------------------------------------------------------------------------------- /examples/content_reuse/inputs/font_corpus/Kannada_lohit.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/content_reuse/inputs/font_corpus/Kannada_lohit.ttf -------------------------------------------------------------------------------- /examples/content_reuse/inputs/font_corpus/Malayalam_lohit.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/content_reuse/inputs/font_corpus/Malayalam_lohit.ttf -------------------------------------------------------------------------------- /examples/content_reuse/inputs/font_corpus/Oriya_lohit.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/content_reuse/inputs/font_corpus/Oriya_lohit.ttf -------------------------------------------------------------------------------- /examples/content_reuse/inputs/font_corpus/Panjabi_lohit.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/content_reuse/inputs/font_corpus/Panjabi_lohit.ttf -------------------------------------------------------------------------------- /examples/content_reuse/inputs/font_corpus/Tamil_lohit.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/content_reuse/inputs/font_corpus/Tamil_lohit.ttf -------------------------------------------------------------------------------- /examples/content_reuse/inputs/font_corpus/Telugu_lohit.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/content_reuse/inputs/font_corpus/Telugu_lohit.ttf -------------------------------------------------------------------------------- /examples/content_reuse/inputs/language_mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "English": "/Library/Fonts/Arial Unicode.ttf", 3 | "Telugu": "/font_corpus/Telugu_lohit.ttf", 4 | "Kannada": "/font_corpus/Kannada_lohit.ttf", 5 | "Tamil": "/font_corpus/Tamil_lohit.ttf", 6 | "Malayalam": "/font_corpus/Malayalam_lohit.ttf", 7 | "Hindi": "/font_corpus/Hindi_lohit.ttf" 8 | } -------------------------------------------------------------------------------- /examples/content_reuse/inputs/siamese_configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "EMBEDDING_DIM": 768, 3 | "MAX_SEQUENCE_LENGTH" : 10, 4 | "VALIDATION_SPLIT": 0.1, 5 | "RATE_DROP_LSTM": 0.17, 6 | "RATE_DROP_DENSE": 0.25, 7 | "NUMBER_LSTM": 50, 8 | "NUMBER_DENSE_UNITS": 50, 9 | "ACTIVATION_FUNCTION": "relu" 10 | } -------------------------------------------------------------------------------- /examples/content_reuse/server_start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #opening a new terminal 4 | #gnome-terminal --tab #ubuntu 5 | 6 | _cwd="$PWD" 7 | 8 | bert_model= "uncased_L-12_H-768_A-12" 9 | file_name=$_cwd"/inputs/uncased_L-12_H-768_A-12.zip" 10 | 11 | echo "Starting server for Bert model $bert_model" 12 | 13 | if [[ -f "$file_name" ]]; then 14 | echo "$file_name exists. Skipping download." 15 | else 16 | curl -o $file_name https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip 17 | fi 18 | 19 | echo "looking for $_cwd/inputs/$bert_model" 20 | 21 | #curl -o $_cwd"/inputs/"$bert_model".zip" https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip 22 | 23 | cd $_cwd"/inputs" 24 | unzip "uncased_L-12_H-768_A-12.zip" 25 | cd .. 26 | 27 | 28 | echo "The current working directory: $PWD" 29 | 30 | bert-serving-start -model_dir "./inputs/uncased_L-12_H-768_A-12/" -num_worker=1& 31 | 32 | 33 | -------------------------------------------------------------------------------- /examples/content_reuse/textbook_to_concept_mapping.yaml: -------------------------------------------------------------------------------- 1 | experiment_name: textbook_to_concept_mapping 2 | owner: sunbird 3 | 4 | inputs: 5 | path_to_stb_data: inputs/base_stb_data.csv 6 | path_to_ref_data: inputs/base_ref_data.csv 7 | path_to_result_folder: !ENV ${APP_HOME} 8 | path_to_neo4j_configuration: inputs/credentials.ini 9 | path_to_pickled_tokenizer: !ENV ${APP_HOME}/model/tokenizer.pkl 10 | path_to_siamese_configuration: inputs/siamese_configuration.json 11 | path_to_trained_model: !ENV ${APP_HOME}/model/lstm_50_50_0_17_0_25.h5 12 | 13 | outputs: 14 | dummy: /home/DS_HOME 15 | 16 | graph: 17 | - 18 | - node_name: data_preparation 19 | inputs: [path_to_stb_data, path_to_ref_data, path_to_result_folder] 20 | outputs: [path_to_cosine_similarity_matrix, path_to_complete_data_set] 21 | operation: content_reuse.DataPreparationV2 22 | arguments: 23 | sentence_length: 4 24 | cosine_score_threshold: 0.51 25 | 26 | - node_name: bert_scoring 27 | inputs: [data_preparation.path_to_complete_data_set, path_to_trained_model, path_to_pickled_tokenizer, path_to_siamese_configuration] 28 | outputs: [path_to_predicted_output] 29 | operation: content_reuse.BertScoring 30 | arguments: 31 | filter_by_type_of_match: nan 32 | filter_by_grade_range_: nan 33 | threshold: 0.482528924942016 34 | embedding_method: BERT 35 | 36 | - node_name: topic_aggregation 37 | inputs: [bert_scoring.path_to_predicted_output] 38 | outputs: [path_to_output_topic_agg] 39 | operation: content_reuse.TopicLevelAggregation 40 | arguments: 41 | aggregation_criteria: average 42 | compute_topic_similarity: True 43 | mandatory_column_names: 44 | stb_topic_col_name: stb_id 45 | ref_topic_col_name: ref_id 46 | pred_agg_col_name: pred_score_mean 47 | data_labeled: False 48 | 49 | - node_name: recommend_k_concepts_per_topic 50 | inputs: [topic_aggregation.path_to_output_topic_agg] 51 | outputs: [path_to_dtb_mapping_file] 52 | operation: content_reuse.RecommendKConceptsPerTopic 53 | arguments: 54 | window: 5 55 | 56 | - node_name: write_relationships_to_neo4j 57 | inputs: [path_to_neo4j_configuration, recommend_k_concepts_per_topic.path_to_dtb_mapping_file] 58 | outputs: [] 59 | operation: content_reuse.WriteRelationshipsToNeo4j 60 | -------------------------------------------------------------------------------- /examples/content_reuse/textbook_to_textbook_mapping.yaml: -------------------------------------------------------------------------------- 1 | experiment_name: textbook_to_textbook_mapping 2 | owner: sunbird 3 | 4 | inputs: 5 | path_to_base_data: !ENV ${APP_HOME}/input/base_ref_data.csv 6 | path_to_result_folder: !ENV ${APP_HOME} 7 | path_to_trained_model: !ENV ${APP_HOME}/model/lstm_50_50_0_17_0_25.h5 8 | path_to_pickled_tokenizer: !ENV ${APP_HOME}/model/tokenizer.pkl 9 | path_to_siamese_configuration: !ENV ${APP_HOME}/model/siamese_configuration.json 10 | 11 | outputs: 12 | dummy: /home/DS_HOME 13 | 14 | graph: 15 | 16 | - node_name: data_preparation 17 | inputs: [path_to_base_data, path_to_result_folder] 18 | outputs: [path_to_cosine_similarity_matrix, path_to_complete_data_set] 19 | operation: content_reuse.DataPreparation 20 | arguments: 21 | sentence_length: 4 22 | cosine_score_threshold: 0.51 23 | 24 | - node_name: bert_scoring 25 | inputs: [data_preparation.path_to_complete_data_set, path_to_trained_model, path_to_pickled_tokenizer, path_to_siamese_configuration] 26 | outputs: [path_to_predicted_output] 27 | operation: content_reuse.BertScoring 28 | arguments: 29 | filter_by_type_of_match: nan 30 | filter_by_grade_range_: nan 31 | threshold: 0.482528924942016 32 | embedding_method: BERT 33 | 34 | - node_name: topic_aggregation 35 | inputs: [bert_scoring.path_to_predicted_output] 36 | outputs: [path_to_output_topic_agg] 37 | operation: content_reuse.TopicLevelAggregation 38 | arguments: 39 | aggregation_criteria: average 40 | compute_topic_similarity: True 41 | mandatory_column_names: 42 | stb_topic_col_name: stb_id 43 | ref_topic_col_name: ref_id 44 | pred_agg_col_name: pred_score_mean 45 | label_col_name: actual_label 46 | tp_col_name: TP_count 47 | fp_col_name: FP_count 48 | tn_col_name: TN_count 49 | fn_col_name: FN_count 50 | data_labeled: True 51 | 52 | - node_name: evaluation 53 | inputs: [topic_aggregation.path_to_output_topic_agg] 54 | outputs: [path_to_k_eval_metrics] 55 | operation: content_reuse.ContentReuseEvaluation 56 | arguments: 57 | window: 5 58 | -------------------------------------------------------------------------------- /examples/content_tagging/content_tagging.yaml: -------------------------------------------------------------------------------- 1 | experiment_name: Content_tagging 2 | owner: sunbird 3 | 4 | inputs: 5 | DS_DATA_HOME: /home/DS_DATA_HOME 6 | localpathTocontentMeta: 7 | pathTocredentials: inputs/credentials.ini 8 | categoryLookup: inputs/category_lookup.yaml 9 | pathTotaxonomy: inputs/taxonomy.csv 10 | 11 | outputs: 12 | dummy: /home/DS_HOME 13 | 14 | graph: 15 | 16 | - node_name: content_to_text 17 | inputs: [DS_DATA_HOME, localpathTocontentMeta, pathTocredentials] 18 | outputs: [timestamp_folder] 19 | operation: contenttagging.ContentToTextRead 20 | arguments: 21 | range_start: START 22 | range_end: END 23 | num_of_processes: 1 24 | content_type: 25 | youtube: 26 | contentDownloadField: artifactUrl 27 | video_to_speech: none 28 | speech_to_text: googleAT 29 | image_to_text: none 30 | pdf_to_text: none 31 | ecml_index_to_text: none 32 | ecml: 33 | contentDownloadField: downloadUrl 34 | video_to_speech: ffmpeg 35 | speech_to_text: googleAT 36 | image_to_text: googleVision 37 | pdf_to_text: pdfminer 38 | ecml_index_to_text: parse 39 | pdf: 40 | contentDownloadField: downloadUrl 41 | video_to_speech: none 42 | speech_to_text: none 43 | image_to_text: none 44 | pdf_to_text: pdfminer 45 | ecml_index_to_text: none 46 | 47 | - node_name: keyword_extraction 48 | inputs: [content_to_text.timestamp_folder, pathTocredentials, categoryLookup] 49 | outputs: [pathTocontentKeywords] 50 | operation: contenttagging.KeywordExtraction 51 | arguments: 52 | extract_keywords: tagme 53 | filter_criteria: dbpedia 54 | update_corpus: 0 55 | filter_score_val: 0.4 56 | num_keywords: 5 57 | 58 | - node_name: writeToKafka 59 | inputs: [keyword_extraction.pathTocontentKeywords, pathTocredentials] 60 | outputs: [] 61 | operation: contenttagging.WriteToKafkaTopic 62 | arguments: 63 | write_to_kafkaTopic: new_topic 64 | 65 | -------------------------------------------------------------------------------- /examples/content_tagging/inputs/category_lookup.yaml: -------------------------------------------------------------------------------- 1 | Environmental Studies: 2 | - Environmental_studies 3 | - Natural_sciences 4 | Geography: 5 | - Geography 6 | - Social_sciences 7 | - Earth_sciences 8 | - Solar_System 9 | Health and Physical Education: 10 | - Physical_education 11 | - Health 12 | - Public_health 13 | - Physical_education 14 | History: 15 | - History 16 | - Social_sciences 17 | - History_of_India 18 | - Archaeology 19 | Mathematics: 20 | - Mathematical_concepts 21 | - Mathematics 22 | Science: 23 | - Science 24 | - Physics 25 | - Biology 26 | - Chemistry 27 | - Technology 28 | - Environmental_science 29 | Social_Science: 30 | - Geography 31 | - Social_sciences 32 | - History_of_India 33 | - History 34 | - Environmental_studies 35 | - Civics 36 | - Politics_of_India 37 | - Political_science 38 | -------------------------------------------------------------------------------- /examples/content_tagging/inputs/corpus/Biology.csv: -------------------------------------------------------------------------------- 1 | keyword,dbpedia_score 2 | -------------------------------------------------------------------------------- /examples/content_tagging/inputs/corpus/English.csv: -------------------------------------------------------------------------------- 1 | keyword,dbpedia_score 2 | -------------------------------------------------------------------------------- /examples/content_tagging/inputs/corpus/Environmental Studies.csv: -------------------------------------------------------------------------------- 1 | keyword,dbpedia_score 2 | -------------------------------------------------------------------------------- /examples/content_tagging/inputs/corpus/Physical Science.csv: -------------------------------------------------------------------------------- 1 | keyword,dbpedia_score 2 | -------------------------------------------------------------------------------- /examples/content_tagging/inputs/corpus/Science.csv: -------------------------------------------------------------------------------- 1 | keyword,dbpedia_score 2 | -------------------------------------------------------------------------------- /examples/content_tagging/inputs/corpus/Social Science.csv: -------------------------------------------------------------------------------- 1 | keyword,dbpedia_score 2 | -------------------------------------------------------------------------------- /examples/content_tagging/inputs/credentials.ini: -------------------------------------------------------------------------------- 1 | [google application credentials] 2 | GOOGLE_APPLICATION_CREDENTIALS = /***/google_cred.json 3 | 4 | [postman credentials] 5 | api_key = ***Bearer key*** 6 | 7 | [redis] 8 | host=localhost 9 | port=6379 10 | password= 11 | 12 | 13 | [tagme credentials] 14 | gcube_token = *** 15 | postman_token = *** 16 | 17 | 18 | [kafka] 19 | host=localhost 20 | port=9092 -------------------------------------------------------------------------------- /examples/content_tagging/load_corpusToRedis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import redis 3 | import configparser 4 | import pandas as pd 5 | 6 | credentials_loc = os.path.join(os.getcwd(), "examples/content_tagging/inputs/credentials.ini") 7 | config = configparser.ConfigParser(allow_no_value=True) 8 | config.read(credentials_loc) 9 | redis_host = config["redis"]["host"] 10 | redis_port = config["redis"]["port"] 11 | redis_password = config["redis"]["password"] 12 | 13 | loc=os.path.join(os.getcwd(), "examples/content_tagging/inputs/corpus") 14 | files=os.listdir(loc) 15 | 16 | r = redis.StrictRedis(host=redis_host, port=redis_port, password=redis_password, decode_responses=True) 17 | for f in files: 18 | df=pd.read_csv(os.path.join(loc,f)) 19 | pipe = r.pipeline() 20 | for ind,val in df.iterrows(): 21 | pipe.set(f[:-4]+'.'+val['keyword'], val['dbpedia_score']) 22 | pipe.execute() -------------------------------------------------------------------------------- /examples/dtb_creation/dtb_creation.yaml: -------------------------------------------------------------------------------- 1 | experiment_name: DTB_Creation 2 | owner: sunbird 3 | 4 | inputs: 5 | toc_csv_file: inputs/ToC.csv 6 | extract_text_file: inputs/fullannotation_text.txt 7 | 8 | outputs: 9 | dtb_json_file: outputs/dtb.json 10 | 11 | graph: 12 | 13 | - node_name: create_dtb 14 | inputs: [toc_csv_file, extract_text_file] 15 | outputs: [dtb_json_file] 16 | operation: dtb.CreateDTB -------------------------------------------------------------------------------- /examples/housing_prices/housing_prices.yaml: -------------------------------------------------------------------------------- 1 | experiment_name: House_Prices_Regression_Baseline 2 | owner: sunbird 3 | 4 | inputs: 5 | train: "data/train.csv" 6 | test: "data/test.csv" 7 | 8 | outputs: 9 | model_dt.model: "outputs/model_dt/model.pkl" 10 | model_dt.report: "outputs/model_dt/report.txt" 11 | model_rf.model: "outputs/model_rf/model.pkl" 12 | model_rf.report: "outputs/model_rf/report.txt" 13 | model_xgb.model: "outputs/model_xgb/model.pkl" 14 | model_xgb.report: "outputs/model_xgb/report.txt" 15 | 16 | graph: 17 | 18 | - node_name: preprocess 19 | inputs: [train, test] 20 | outputs: [preprocessed_train, preprocessed_test] 21 | operation: sklearn.CustomPreprocess 22 | arguments: 23 | drop_missing_perc: 0.9 24 | target_variable: SalePrice 25 | ignore_variables: Id 26 | numeric_impute: median 27 | categorical_impute: most_frequent 28 | 29 | - node_name: model_dt 30 | inputs: preprocess.preprocessed_train 31 | outputs: [report, model] 32 | operation: sklearn.CrossValidate 33 | arguments: 34 | model_args: 35 | name: sklearn.CrossValidate 36 | arguments: 37 | max_depth: 2 38 | random_state: 0 39 | target_variable: SalePrice 40 | cv_args: 41 | scoring: neg_mean_squared_error 42 | cv: 5 43 | imports: 44 | - [sklearn.tree, DecisionTreeRegressor] 45 | 46 | - node_name: model_rf 47 | inputs: preprocess.preprocessed_train 48 | outputs: [report, model] 49 | operation: sklearn.CrossValidate 50 | arguments: 51 | model_args: 52 | name: RandomForestRegressor 53 | arguments: 54 | max_depth: 2 55 | random_state: 0 56 | target_variable: SalePrice 57 | cv_args: 58 | scoring: neg_mean_squared_error 59 | cv: 5 60 | install: [package1==0.0.1, package2] #TODO 61 | imports: 62 | - [sklearn.ensemble, RandomForestRegressor] 63 | 64 | - node_name: model_xgb 65 | inputs: preprocess.preprocessed_train 66 | outputs: [report, model] 67 | operation: sklearn.CrossValidate 68 | arguments: 69 | model_args: 70 | name: XGBRegressor 71 | arguments: 72 | random_state: 0 73 | target_variable: SalePrice 74 | cv_args: 75 | scoring: neg_mean_squared_error 76 | cv: 5 77 | imports: 78 | - [xgboost, XGBRegressor] -------------------------------------------------------------------------------- /examples/housing_prices/outputs/model_dt/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/housing_prices/outputs/model_dt/model.pkl -------------------------------------------------------------------------------- /examples/housing_prices/outputs/model_dt/report.txt: -------------------------------------------------------------------------------- 1 | MSE: -2388908099.48 (+/- 193323889.66) -------------------------------------------------------------------------------- /examples/housing_prices/outputs/model_rf/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/housing_prices/outputs/model_rf/model.pkl -------------------------------------------------------------------------------- /examples/housing_prices/outputs/model_rf/report.txt: -------------------------------------------------------------------------------- 1 | MSE: -2052100128.84 (+/- 250437797.00) -------------------------------------------------------------------------------- /examples/housing_prices/outputs/model_xgb/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/housing_prices/outputs/model_xgb/model.pkl -------------------------------------------------------------------------------- /examples/housing_prices/outputs/model_xgb/report.txt: -------------------------------------------------------------------------------- 1 | MSE: -290670437.29 (+/- 29324707.80) -------------------------------------------------------------------------------- /examples/iris_classification/iris_classification_graph.yaml: -------------------------------------------------------------------------------- 1 | experiment_name: Example1_Iris_Classification 2 | owner: sunbird 3 | 4 | inputs: 5 | raw_data: "inputs/iris.csv" 6 | outputs: 7 | report: 'outputs/model_report' 8 | model: 'outputs/model' 9 | 10 | graph: 11 | 12 | - node_name: split 13 | inputs: raw_data 14 | outputs: [train, test] 15 | operation: sklearn.Splitters 16 | arguments: 17 | model_args: 18 | name: train_test_split 19 | arguments: 20 | test_size: 0.2 21 | imports: 22 | - [sklearn.model_selection, train_test_split] 23 | 24 | - node_name: model_logistic_regression 25 | inputs: split.train 26 | outputs: [report, model] 27 | operation: sklearn.CrossValidate 28 | arguments: 29 | model_args: 30 | name: LogisticRegression 31 | arguments: 32 | max_iter: 200 33 | target_variable: Species 34 | cv_args: 35 | cv: 5 36 | imports: 37 | - [sklearn.linear_model, LogisticRegression] 38 | -------------------------------------------------------------------------------- /examples/iris_classification/outputs/model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/examples/iris_classification/outputs/model -------------------------------------------------------------------------------- /examples/iris_classification/outputs/model_report: -------------------------------------------------------------------------------- 1 | MSE: 0.92 (+/- 0.10) -------------------------------------------------------------------------------- /examples/iris_classification_with_keras/iris_classification_using_keras_graph.yaml: -------------------------------------------------------------------------------- 1 | experiment_name: Example1a_Iris_Classification_with_keras 2 | owner: sunbird 3 | experiment_dir: 'MLWBdemo1a/experiment/' 4 | 5 | inputs: 6 | raw_data: "inputs/iris.csv" 7 | outputs: 8 | fitted_model: "outputs/model" 9 | predictions: "outputs/predictions.csv" 10 | report: "outputs/model_report.txt" 11 | 12 | graph: 13 | 14 | - node_name: split 15 | inputs: raw_data 16 | outputs: [train, test] 17 | operation: sklearn.Splitters 18 | arguments: 19 | model_args: 20 | name: train_test_split 21 | arguments: 22 | test_size: 0.2 23 | imports: 24 | - [sklearn.model_selection, train_test_split] 25 | 26 | - node_name: keras_model 27 | inputs: [split.train, split.test] 28 | outputs: [fitted_model, fitted_model_weights, predictions, report] 29 | operation: keras.train_svm_model 30 | arguments: 31 | model_args: 32 | name: KerasClassifier 33 | module_path: keras.wrappers.scikit_learn 34 | arguments: 35 | epochs: 200 36 | batch_size: 5 37 | verbose: 0 38 | target_variable: Species 39 | ignore_variables: Id 40 | imports: 41 | - [keras.models, Sequential, model_from_json] 42 | 43 | -------------------------------------------------------------------------------- /expt_name_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "diksha_content_keyword_tagging": "/examples/content_tagging/content_tagging.yaml", 3 | "housing_prices": "/examples/housing_prices/housing_prices.yaml", 4 | "iris_classification": "/examples/iris_classification/iris_classification_graph.yaml", 5 | "iris_classification_with_keras": "/examples/iris_classification_with_keras/iris_classification_using_keras_graph.yaml", 6 | "textbook_to_textbook_mapping": "/examples/content_reuse/textbook_to_textbook_mapping.yaml", 7 | "textbook_to_concept_mapping": "/examples/content_reuse/textbook_to_concept_mapping.yaml" 8 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | apache-airflow==1.10.4 2 | pytest-runner==5.1 3 | dateutils==0.6.6 4 | h5py==2.9.0 5 | PyYAML==5.1.2 6 | boto==2.49.0 7 | boto3==1.9.130 8 | botocore==1.12.130 9 | scikit-learn==0.20.2 10 | pandas==0.25.1 11 | numpy==1.17.1 12 | Pint==0.9 13 | scipy==1.3.1 14 | six==1.12.0 15 | xgboost==0.90 16 | Sphinx==2.2.0 17 | tox==3.13.2 18 | networkx==2.3 19 | findspark==1.3.0 20 | gensim==3.8.0 21 | nltk==3.4.5 22 | grpcio==1.23.0 23 | grpcio-tools==1.23.0 24 | ffmpy==0.2.2 25 | ffmpeg-python==0.2.0 26 | youtube-dl==2019.8.13 27 | google-cloud==0.34.0 28 | google-cloud-core==1.0.3 29 | google-cloud-storage==1.19.0 30 | google-cloud-translate==1.6.0 31 | google-cloud-vision==0.39.0 32 | google-api-python-client==1.7.11 33 | oauth2client==4.1.3 34 | google==2.0.2 35 | pydub==0.23.1 36 | mutagen==1.42.0 37 | PyPDF2==1.26.0 38 | SpeechRecognition==3.8.1 39 | plotly==4.1.0 40 | natsort==6.0.0 41 | SPARQLWrapper==1.8.4 42 | elasticsearch==7.0.4 43 | pdfminer.six==20181108 44 | configparser==3.5.3 45 | sphinx-rtd-theme==0.4.3 46 | kafka==1.3.5 47 | pyspark==2.4.4 48 | redis==3.3.8 49 | mock==3.0.5 50 | ruptures==1.0.1 51 | biopython==1.74 52 | python-Levenshtein==0.12.0 53 | bert-serving-server==1.9.6 54 | bert-serving-client==1.9.6 55 | pyemd==0.5.1 56 | tensorflow==1.14.0 57 | gspread==3.1.0 58 | opencv-python==4.1.1.26 59 | spacy==2.1.8 60 | pdf2image==1.8.0 61 | Pillow==7.1.1 62 | img2pdf==0.3.3 63 | transformers==2.1.1 64 | Keras==2.1.1 65 | sklearn==0.0 66 | torch==1.4.0 67 | Werkzeug<1.0.0 68 | py2neo==4.3.0 69 | Pygments==2.3.1 70 | SQLAlchemy==1.3.16 71 | Flask==1.1.2 72 | -------------------------------------------------------------------------------- /src/main/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/__init__.py -------------------------------------------------------------------------------- /src/main/python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/python/__init__.py -------------------------------------------------------------------------------- /src/main/python/daggit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/python/daggit/__init__.py -------------------------------------------------------------------------------- /src/main/python/daggit/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/python/daggit/contrib/__init__.py -------------------------------------------------------------------------------- /src/main/python/daggit/contrib/sunbird/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/python/daggit/contrib/sunbird/__init__.py -------------------------------------------------------------------------------- /src/main/python/daggit/contrib/sunbird/nodes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/python/daggit/contrib/sunbird/nodes/__init__.py -------------------------------------------------------------------------------- /src/main/python/daggit/contrib/sunbird/nodes/dtb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import daggit 4 | import requests 5 | import io 6 | import time 7 | import re 8 | import logging 9 | import configparser 10 | import Levenshtein 11 | import pandas as pd 12 | 13 | 14 | from daggit.core.io.io import File_Txt 15 | from daggit.core.io.io import File_JSON 16 | from daggit.core.io.io import ReadDaggitTask_Folderpath 17 | from daggit.core.base.factory import BaseOperator 18 | 19 | from daggit.contrib.sunbird.oplib.dtb import create_dtb 20 | 21 | 22 | 23 | class CreateDTB(BaseOperator): 24 | 25 | @property 26 | def inputs(self): 27 | """ 28 | Inputs needed to create DTB 29 | 30 | :returns: toc and text files 31 | 32 | """ 33 | return { 34 | "toc_csv_file": ReadDaggitTask_Folderpath(self.node.inputs[0]), 35 | "extract_text_file": ReadDaggitTask_Folderpath(self.node.inputs[1]), 36 | } 37 | 38 | @property 39 | def outputs(self): 40 | """ 41 | Outputs created while creating DTB 42 | 43 | :returns: Returns the path to the DTB file 44 | 45 | """ 46 | return {"dtb_json_file": File_JSON( 47 | self.node.outputs[0])} 48 | 49 | def run(self): 50 | 51 | """ 52 | Creates the DTB by aligning ToC with text extractd from text extracted from any textbook 53 | 54 | :returns: Returns the path to the DTB file 55 | 56 | """ 57 | 58 | f_toc = self.inputs["toc_csv_file"].read_loc() 59 | f_text = self.inputs["extract_text_file"].read_loc() 60 | dtb = create_dtb(f_toc,f_text) 61 | self.outputs["dtb_json_file"].write(dtb) -------------------------------------------------------------------------------- /src/main/python/daggit/contrib/sunbird/oplib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/python/daggit/contrib/sunbird/oplib/__init__.py -------------------------------------------------------------------------------- /src/main/python/daggit/contrib/sunbird/oplib/contentreuseEvaluationUtils.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import pandas as pd 3 | import numpy as np 4 | import os 5 | import json 6 | import sys 7 | import pyvips 8 | import cv2 9 | import img2pdf 10 | from PIL import Image, ImageDraw, ImageFont 11 | from pdf2image import convert_from_path 12 | import natsort 13 | from natsort import natsorted, ns 14 | 15 | 16 | def concat_resize_min(image_ls): 17 | im1 = Image.open(image_ls[0]) 18 | im2 = Image.open(image_ls[1]) 19 | dst = Image.new('RGB', (im1.width + im2.width, im1.height+int(im2.height/2)) ) 20 | dst.paste(im1, (0, 0)) 21 | dst.paste(im2, (im1.width, 0)) 22 | return dst 23 | 24 | 25 | def text_image(pdf_path, gocr_path, medium, Language_mapping_file, font_size, output_loc): 26 | MAX_W, MAX_H = 2000, 2000 27 | lang_mapping = json.loads(open(Language_mapping_file).read()) 28 | font_path = lang_mapping[medium] 29 | pages = convert_from_path(pdf_path, dpi= 500) 30 | for i in range(len(pages)): 31 | try: 32 | gocr_img = os.path.join(output_loc , "Gocr_"+str(i)+'.png') 33 | pdf_img = os.path.join( output_loc , "pdf_"+str(i)+ '.png') 34 | pdf_page = pages[i] 35 | pdf_page.save(pdf_img, 'JPEG') 36 | 37 | with open(os.path.join(gocr_path ,"output-"+str(i+1)+"-to-"+str(i+1)+".json"), "r") as read_file: 38 | page_by_page_response = json.load(read_file) 39 | text = page_by_page_response['responses'][0]['fullTextAnnotation']['text'] 40 | ls = [str(i) + ") " + text.splitlines()[i] for i in range(len(text.splitlines()))] 41 | joined_text = "\n".join(ls) 42 | chars_to_remove = ['&'] 43 | joined_text = ''.join([c for c in joined_text if c not in set(chars_to_remove)]) 44 | font = ImageFont.truetype(font_path, font_size) 45 | image = pyvips.Image.text(joined_text, width=MAX_W, height=MAX_H, fontfile = font_path,dpi=400) 46 | image.write_to_file(gocr_img) 47 | concat_resize_min([ pdf_img, gocr_img ]).save(os.path.join(output_loc,"output_"+str(i)+".png")) 48 | os.remove(gocr_img) 49 | os.remove(pdf_img) 50 | except: 51 | print("missed_pages"+str(i)) -------------------------------------------------------------------------------- /src/main/python/daggit/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/python/daggit/core/__init__.py -------------------------------------------------------------------------------- /src/main/python/daggit/core/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Conversion register 2 | from pint import UnitRegistry 3 | ureg = UnitRegistry() 4 | Q_ = ureg.Quantity 5 | 6 | __all__ = ['base'] 7 | -------------------------------------------------------------------------------- /src/main/python/daggit/core/base/config.py: -------------------------------------------------------------------------------- 1 | DAGGIT_HOME = "DAGGIT_HOME" 2 | STORE = 'Local' 3 | ORCHESTRATOR = 'airflow' 4 | STORAGE_FORMAT = '.h5' 5 | ORCHESTRATOR_ATTR = dict({'AIRFLOW': dict({'dag_config':dict({'depends_on_past': False, 'schedule_interval':'@once','start_date':'20-11-2018'})})}) -------------------------------------------------------------------------------- /src/main/python/daggit/core/base/factory.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from daggit.core.base.utils import get_as_list 3 | 4 | 5 | class BaseOperator(object): 6 | __metaclass__ = ABCMeta 7 | 8 | def __init__(self, node): 9 | self.node = node 10 | 11 | @property 12 | @abstractmethod 13 | def inputs(self): 14 | raise NotImplementedError('Needs to be implemented!') 15 | 16 | @property 17 | @abstractmethod 18 | def outputs(self): 19 | raise NotImplementedError('Needs to be implemented!') 20 | 21 | @abstractmethod 22 | def run(self, **kwargs): 23 | raise NotImplementedError('Needs to be implemented!') 24 | 25 | def _run(self, **kwargs): 26 | self._pre_execute() 27 | self.run(**kwargs) 28 | 29 | def _pre_execute(self): 30 | pass 31 | 32 | 33 | class BaseDAGManager(object): 34 | __metaclass__ = ABCMeta 35 | 36 | def __init__(self, dag): 37 | self.dag = dag 38 | 39 | @abstractmethod 40 | def init(self): 41 | raise NotImplementedError('daggit init Needs to be implemented!') 42 | 43 | @abstractmethod 44 | def run(self): 45 | raise NotImplementedError('daggit run needs to be implemented!') 46 | 47 | 48 | class NodeData(object): 49 | 50 | def __init__( 51 | self, 52 | dag_id, 53 | task_id, 54 | data_alias, 55 | parent_task=None, 56 | external_ref=None): 57 | self.dag_id = dag_id 58 | self.task_id = task_id 59 | self.data_alias = data_alias 60 | self.parent_task = parent_task 61 | self.external_ref = external_ref 62 | 63 | 64 | class Node(object): 65 | 66 | def __init__( 67 | self, 68 | node_config, 69 | experiment_name, 70 | owner, 71 | graph_inputs, 72 | graph_outputs, 73 | inputs_parents): 74 | 75 | self.dag_id = experiment_name 76 | self.owner = owner 77 | self.graph_inputs = graph_inputs 78 | self.graph_outputs = graph_outputs 79 | self.task_id = node_config["node_name"] 80 | self.inputs = get_as_list(node_config["inputs"]) 81 | self.outputs = get_as_list(node_config["outputs"]) 82 | self.operation = node_config["operation"] 83 | self.inputs_parents = inputs_parents 84 | 85 | if "imports" in node_config: 86 | self.imports = node_config["imports"] 87 | else: 88 | self.imports = None 89 | 90 | if "arguments" in node_config: 91 | self.arguments = node_config["arguments"] 92 | else: 93 | self.arguments = {} 94 | 95 | self.infer_data_objects() 96 | 97 | def infer_data_objects(self): 98 | 99 | inputs_list = [] 100 | for data_label in self.inputs: 101 | 102 | if data_label in self.graph_inputs: 103 | data_object = NodeData( 104 | dag_id=self.dag_id, 105 | task_id=self.task_id, 106 | data_alias=data_label, 107 | external_ref=self.graph_inputs[data_label]) 108 | else: 109 | try: 110 | data_name = data_label.split(".")[1] 111 | except IndexError: 112 | data_name = data_label.split(".")[0] 113 | data_object = NodeData( 114 | dag_id=self.dag_id, 115 | task_id=self.task_id, 116 | data_alias=data_name, 117 | parent_task=self.inputs_parents[data_label]) 118 | 119 | inputs_list.append(data_object) 120 | self.inputs = inputs_list 121 | 122 | outputs_list = [] 123 | for data_label in self.outputs: 124 | namespaced_data_label = self.task_id + '.' + data_label 125 | if namespaced_data_label in self.graph_outputs: 126 | data_object = NodeData( 127 | dag_id=self.dag_id, 128 | task_id=self.task_id, 129 | parent_task=self.task_id, 130 | data_alias=data_label, 131 | external_ref=self.graph_outputs[namespaced_data_label]) 132 | elif data_label in self.graph_outputs: 133 | data_object = NodeData( 134 | dag_id=self.dag_id, 135 | task_id=self.task_id, 136 | parent_task=self.task_id, 137 | data_alias=data_label, 138 | external_ref=self.graph_outputs[data_label]) 139 | else: 140 | data_object = NodeData( 141 | dag_id=self.dag_id, 142 | task_id=self.task_id, 143 | data_alias=data_label, 144 | parent_task=self.task_id) 145 | 146 | outputs_list.append(data_object) 147 | self.outputs = outputs_list 148 | -------------------------------------------------------------------------------- /src/main/python/daggit/core/base/parser.py: -------------------------------------------------------------------------------- 1 | import six 2 | import yaml 3 | import os 4 | from airflow.models import DAG 5 | 6 | 7 | from datetime import datetime 8 | from daggit.core.base.config import STORE 9 | from daggit.core.base.config import ORCHESTRATOR_ATTR 10 | from daggit.core.base.factory import Node 11 | from daggit.core.base.utils import get_as_list, normalize_path 12 | from daggit.runtime.airflow_runtime import DaggitPyOp 13 | from daggit.core.base.utils import parse_config 14 | 15 | 16 | def create_dag(dag_config_file): 17 | """Creating dags from graph location""" 18 | print("dag_config_file:", dag_config_file) 19 | dag_config = parse_config(path=dag_config_file) 20 | # dag_config = {} 21 | # with open(dag_config_file, 'r', encoding="latin1") as stream: 22 | # try: 23 | # dag_config = yaml.load(stream) 24 | # except yaml.YAMLError as exc: 25 | # print(exc) 26 | 27 | experiment_name = dag_config["experiment_name"] 28 | owner = dag_config["owner"] 29 | 30 | default_args = {} 31 | 32 | # depends_on_past 33 | try: 34 | default_args['depends_on_past'] = dag_config['dag_config']['depends_on_past'] 35 | except BaseException: 36 | default_args['depends_on_past'] = ORCHESTRATOR_ATTR['AIRFLOW']['dag_config']['depends_on_past'] 37 | 38 | # start_date 39 | datepattern = '%d-%m-%Y' 40 | try: 41 | default_args['start_date'] = datetime.strptime( 42 | dag_config['dag_config']['start_date'], datepattern) 43 | except BaseException: 44 | default_args['start_date'] = datetime.strptime( 45 | ORCHESTRATOR_ATTR['AIRFLOW']['dag_config']['start_date'], datepattern) 46 | 47 | # schedule_interval 48 | try: 49 | schedule_interval = dag_config['dag_config']['schedule_interval'] 50 | except BaseException: 51 | schedule_interval = ORCHESTRATOR_ATTR['AIRFLOW']['dag_config']['schedule_interval'] 52 | 53 | default_args['owner'] = owner 54 | print("Experiment name: ", experiment_name) 55 | dag = DAG( 56 | experiment_name, 57 | default_args=default_args, 58 | schedule_interval=schedule_interval) 59 | 60 | graph_inputs = {} 61 | if STORE.lower() == 'local': 62 | for key in dag_config["inputs"].keys(): 63 | graph_inputs[key] = normalize_path( 64 | cwd=os.path.dirname(dag_config_file), 65 | path=dag_config["inputs"][key]) 66 | else: 67 | graph_inputs = dag_config["inputs"] 68 | 69 | graph_outputs = {} 70 | if STORE.lower() == 'local': 71 | for key in dag_config["outputs"].keys(): 72 | 73 | graph_outputs[key] = normalize_path( 74 | cwd=os.path.dirname(dag_config_file), 75 | path=dag_config["outputs"][key]) 76 | else: 77 | graph_outputs = dag_config["outputs"] 78 | 79 | # graph_inputs = dag_config["inputs"] 80 | # graph_outputs = dag_config["outputs"] 81 | graph_config = dag_config["graph"] 82 | 83 | inputs_parents = {} 84 | for node_config in graph_config: 85 | task = node_config['node_name'] 86 | outputs = get_as_list(node_config['outputs']) 87 | for label in outputs: 88 | inputs_parents[".".join([task, label])] = task 89 | 90 | nodes_bag = {} 91 | for node_config in graph_config: 92 | node = Node( 93 | node_config, 94 | experiment_name, 95 | owner, 96 | graph_inputs, 97 | graph_outputs, 98 | inputs_parents) 99 | nodes_bag[node.task_id] = node 100 | 101 | # Dag creation 102 | nodes_upstream = {} 103 | for node in list(nodes_bag.values()): 104 | globals()[node.task_id] = DaggitPyOp(node=node, dag=dag) 105 | upstream_tasks = [] 106 | for i in node.inputs: 107 | upstream_tasks.append(i.parent_task) 108 | nodes_upstream[node.task_id] = list(set(upstream_tasks)) 109 | 110 | for node, upstream_list in six.iteritems(nodes_upstream): 111 | upstream_list = [e for e in upstream_list if e is not None] 112 | if len(upstream_list) > 0: 113 | for upstream_task in upstream_list: 114 | globals()[node].set_upstream(globals()[upstream_task]) 115 | 116 | return dag 117 | -------------------------------------------------------------------------------- /src/main/python/daggit/core/base/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import errno 3 | import contextlib 4 | import sys 5 | import mock 6 | import yaml 7 | import re 8 | 9 | 10 | def create_dir(directory): 11 | try: 12 | os.makedirs(directory) 13 | except OSError as e: 14 | if e.errno != errno.EEXIST: 15 | raise 16 | 17 | 18 | class DummyFile(object): 19 | def write(self, x): pass 20 | 21 | # def flush(self): pass 22 | 23 | 24 | @contextlib.contextmanager 25 | def nostdout(): 26 | save_stdout = sys.stdout 27 | #sys.stdout = DummyFile() 28 | sys.stdout = mock.MagicMock() 29 | yield 30 | sys.stdout = save_stdout 31 | 32 | 33 | def normalize_path(cwd, path): 34 | if os.path.isabs(path): 35 | return path 36 | else: 37 | return os.path.normpath(os.path.join(cwd, path)) 38 | 39 | 40 | def get_as_list(string_or_list): 41 | if isinstance(string_or_list, list): 42 | return string_or_list 43 | else: 44 | return [string_or_list] 45 | 46 | 47 | def parse_config(path=None, data=None, tag='!ENV'): 48 | pattern = re.compile('.*?\${(\w+)}.*?') 49 | loader = yaml.SafeLoader 50 | loader.add_implicit_resolver(tag, pattern, None) 51 | 52 | def constructor_env_variables(loader, node): 53 | value = loader.construct_scalar(node) 54 | match = pattern.findall(value) # to find all env variables in line 55 | if match: 56 | full_value = value 57 | for g in match: 58 | full_value = full_value.replace( 59 | f'${{{g}}}', os.environ.get(g, g) 60 | ) 61 | return full_value 62 | return value 63 | 64 | loader.add_constructor(tag, constructor_env_variables) 65 | 66 | if path: 67 | with open(path) as conf_data: 68 | return yaml.load(conf_data, Loader=loader) 69 | elif data: 70 | return yaml.load(data, Loader=loader) 71 | else: 72 | raise ValueError('Either a path or data should be defined as input') -------------------------------------------------------------------------------- /src/main/python/daggit/core/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/python/daggit/core/io/__init__.py -------------------------------------------------------------------------------- /src/main/python/daggit/core/io/files.py: -------------------------------------------------------------------------------- 1 | import zipfile 2 | import requests 3 | import os 4 | import io 5 | import pickle 6 | 7 | 8 | def downloadZipFile(url, directory): 9 | 10 | """ 11 | Multimedia Content are stored in cloud in ecar or zip format. 12 | This function downloads a zip file pointed by url location. 13 | The user is expected to have access to the file pointed by url. 14 | The extracted file is available in location specified by directory. 15 | :param url(str): A valid url pointing to zipped Content location on cloud 16 | :returns: Status of download.``True``for uccessful download and ``False`` for unsuccesful download 17 | """ 18 | r = requests.get(url) 19 | try: 20 | z = zipfile.ZipFile(io.BytesIO(r.content)) 21 | z.extractall(directory) 22 | return r.ok 23 | except BaseException: 24 | return False 25 | 26 | 27 | def findFiles(directory, substrings): 28 | """ 29 | Accio!! 30 | For a given directory, the function looks for any occurance of a particular 31 | file type mentioned by the substrings parameter. 32 | :param directory(str): The path to a folder 33 | :param substrings(list of strings): An array of extensions to be searched within the directory. 34 | ``eg: jpg, png, webm, mp4`` 35 | :returns: List of paths to the detected files 36 | """ 37 | ls = [] 38 | if isinstance(directory, str) and isinstance(substrings, list): 39 | if os.path.isdir(directory): 40 | for dirname, dirnames, filenames in os.walk(directory): 41 | for filename in filenames: 42 | string = os.path.join(dirname, filename) 43 | for substring in substrings: 44 | if(string.find(substring) >= 0): 45 | ls.append(string) 46 | return ls 47 | 48 | 49 | def unzip_files(directory): 50 | """ 51 | This function iterates through all the files in a directory and unzip those that are zipped (.zip) to that same folder 52 | :param directory(str): A directory or path to a folder 53 | """ 54 | assert isinstance(directory, str) 55 | zip_list = findFiles(directory, ['.zip']) 56 | bugs = {} 57 | for zip_file in zip_list: 58 | try: 59 | with zipfile.ZipFile(zip_file, 'r') as z: 60 | z.extractall(directory) 61 | os.remove(zip_file) 62 | except BaseException: 63 | bugs.append(zip_file) 64 | 65 | 66 | def save_obj(obj, name): 67 | with open(name + '.pkl', 'wb') as f: 68 | pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) 69 | 70 | 71 | def load_obj(name): 72 | with open(name + '.pkl', 'rb') as f: 73 | return pickle.load(f, encoding='latin1') 74 | -------------------------------------------------------------------------------- /src/main/python/daggit/core/io/redis.py: -------------------------------------------------------------------------------- 1 | import redis 2 | 3 | def setRediskey(key, val, host, port, password): 4 | 5 | """ 6 | This function writes a key value pair into Redis cache. It is a wrapper on set operation of redis-py. 7 | :param key(str): The key. 8 | :param val(str): The value assigned to key. 9 | :param host(str): redis server host. default:'localhost'. 10 | :param port(str): redis server port. default: 6379'. 11 | :param password(str): redis server password. default: None. 12 | :returns: The detected language for the given text. 13 | """ 14 | try: 15 | r = redis.StrictRedis(host=redis_host, port=redis_port, password=redis_password, decode_responses=True) 16 | msg = r.set(key, val) 17 | except Exception as e: 18 | print(e) 19 | 20 | def getRediskey(key, host, port, password): 21 | """ 22 | This function reads the value from Redis cache based on the key. It is a wrapper on get operation of redis-py . 23 | :param key(str): The key. 24 | :param host(str): redis server host. default:'localhost'. 25 | :param port(str): redis server port. default: 6379'. 26 | :param password(str): redis server password. default: None. 27 | :returns: The detected language for the given text. 28 | """ 29 | try: 30 | r = redis.StrictRedis(host=redis_host, port=redis_port, password=redis_password, decode_responses=True) 31 | msg = r.get(key) 32 | return msg 33 | except Exception as e: 34 | print(e) 35 | 36 | -------------------------------------------------------------------------------- /src/main/python/daggit/core/nodes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/python/daggit/core/nodes/__init__.py -------------------------------------------------------------------------------- /src/main/python/daggit/core/nodes/dfsplitters.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import KFold 2 | from src.main.python.daggit.core.io.io import Pandas_Dataframe 3 | from src.main.python.daggit.core.base.factory import BaseOperator 4 | 5 | 6 | class k_fold_splitter(BaseOperator): 7 | 8 | @property 9 | def inputs(self): 10 | return {"input_df": Pandas_Dataframe(self.node.inputs[0]) 11 | } 12 | 13 | @property 14 | def outputs(self): 15 | return {"train": Pandas_Dataframe(self.node.outputs[0]), 16 | "test": Pandas_Dataframe(self.node.outputs[1])} 17 | 18 | def run( 19 | self, 20 | drop_missing_perc, 21 | target_variable, 22 | ignore_variables, 23 | categorical_impute, 24 | numeric_impute): 25 | input_df = self.inputs["input_df"].read() 26 | 27 | kf = KFold(**self.node.arguments) 28 | for train_index, test_index in kf.split(input_df): 29 | data_train = input_df.iloc[train_index] 30 | data_test = input_df.iloc[test_index] 31 | self.outputs["train"].write(data_train) 32 | self.outputs["test"].write(data_test) 33 | -------------------------------------------------------------------------------- /src/main/python/daggit/core/nodes/keras.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import pandas as pd 3 | from daggit.core.base.factory import BaseOperator 4 | from daggit.core.io.io import Pandas_Dataframe, Pickle_Obj, File_Txt 5 | from daggit.core.nodes.registry import get_node_callable 6 | 7 | 8 | def baseline_model_svm(): 9 | from keras.models import Sequential 10 | from keras.layers import Dense, Dropout 11 | # create model 12 | model = Sequential() 13 | model.add(Dense(8, input_dim=4, activation='relu')) 14 | model.add(Dropout(0.2)) 15 | model.add(Dense(16, input_dim=8, activation='relu')) 16 | model.add(Dropout(0.2)) 17 | model.add(Dense(3, activation='softmax')) 18 | # Compile model 19 | model.compile( 20 | loss='categorical_hinge', 21 | optimizer='adam', 22 | metrics=['accuracy']) 23 | return model 24 | 25 | 26 | class train_svm_model(BaseOperator): 27 | 28 | @property 29 | def inputs(self): 30 | return {"train": Pandas_Dataframe(self.node.inputs[0]), 31 | "test": Pandas_Dataframe(self.node.inputs[1])} 32 | 33 | @property 34 | def outputs(self): 35 | return {"model": File_Txt(self.node.outputs[0]), 36 | "model_weights": Pickle_Obj(self.node.outputs[1]), 37 | "predictions": Pandas_Dataframe(self.node.outputs[2]), 38 | "report": Pickle_Obj(self.node.outputs[3]), 39 | } 40 | 41 | def run( 42 | self, 43 | target_variable, 44 | ignore_variables=None, 45 | model_args=None, 46 | cv_args=None): 47 | from keras.utils import np_utils 48 | from sklearn.preprocessing import LabelEncoder 49 | 50 | train = self.inputs["train"].read().drop(ignore_variables, axis=1) 51 | test = self.inputs["test"].read().drop(ignore_variables, axis=1) 52 | 53 | if ignore_variables is not list: 54 | ignore_variables = [ignore_variables] 55 | 56 | train_y = train[target_variable] 57 | train_X = train.drop(target_variable, axis=1).values 58 | 59 | test_y = test[target_variable] 60 | test_X = test.drop(target_variable, axis=1).values 61 | 62 | imports = list(self.node.imports) 63 | for importlist in imports: 64 | module_name = importlist.pop(0) 65 | for imp in importlist: 66 | importlib.import_module(name=module_name, package=imp) 67 | print(self.node.imports) 68 | model = get_node_callable( 69 | module_path=model_args['module_path'], 70 | operator=model_args['name']) 71 | model_params = model_args['arguments'] 72 | 73 | encoder = LabelEncoder() 74 | encoder.fit(train_y) 75 | encoded_Y = encoder.transform(train_y) 76 | # convert integers to dummy variables (i.e. one hot encoded) 77 | dummy_y = np_utils.to_categorical(encoded_Y) 78 | 79 | encoded_Y_test = encoder.transform(test_y) 80 | # convert integers to dummy variables (i.e. one hot encoded) 81 | dummy_y_test = np_utils.to_categorical(encoded_Y_test) 82 | estimator = model(build_fn=baseline_model_svm, **model_params) 83 | 84 | estimator.fit(x=train_X, y=dummy_y) 85 | 86 | model_score = estimator.score(x=test_X, y=dummy_y_test) 87 | pred = estimator.predict(x=test_X) 88 | predictions = pd.DataFrame( 89 | encoder.inverse_transform(pred), 90 | columns=['predictions']) 91 | report = 'The accuracy of the model is: ' + str(model_score) 92 | 93 | model_json = estimator.model.to_json() 94 | model_weights = estimator.model.get_weights() 95 | 96 | self.outputs["model"].write(model_json) 97 | self.outputs["model_weights"].write(model_weights) 98 | self.outputs["predictions"].write(predictions) 99 | self.outputs["report"].write(report) 100 | -------------------------------------------------------------------------------- /src/main/python/daggit/core/nodes/registry.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | 4 | this_dir = os.path.dirname(os.path.realpath(__file__)) 5 | abs_path = os.path.abspath(os.path.join(this_dir, '../..')) 6 | 7 | 8 | def get_node_callable(operator, module_path=None): #get_op_callable 9 | if module_path: 10 | module = importlib.import_module(module_path) 11 | return getattr(module, operator) 12 | else: 13 | operator_full_path = operator.split('.') 14 | operator_name = operator_full_path.pop(-1) 15 | operator_module = '.'.join(operator_full_path) 16 | module_path_list = [] 17 | for root, dirs, _ in os.walk(abs_path): 18 | if len(dirs) > 0: 19 | if "nodes" in dirs: 20 | operator_dir = os.path.join(root, "nodes") 21 | module_path_list.append( 22 | operator_dir[operator_dir.rfind("daggit"):].replace("/", ".")) 23 | for path in module_path_list: 24 | try: 25 | module = importlib.import_module(path + '.' + operator_module) 26 | except BaseException: 27 | continue 28 | return getattr(module, operator_name) 29 | -------------------------------------------------------------------------------- /src/main/python/daggit/core/nodes/sklearn.py: -------------------------------------------------------------------------------- 1 | from sklearn.pipeline import Pipeline 2 | 3 | from daggit.core.io.io import Pandas_Dataframe, File_Txt, Pickle_Obj 4 | from daggit.core.base.factory import BaseOperator 5 | from daggit.core.nodes.registry import get_node_callable 6 | from daggit.core.oplib.etl import DFFeatureUnion, ColumnExtractor 7 | from daggit.core.oplib.etl import DFMissingStr, DFOneHot 8 | from daggit.core.oplib.etl import DFMissingNum 9 | 10 | 11 | class CustomPreprocess(BaseOperator): 12 | 13 | @property 14 | def inputs(self): 15 | return {"train": Pandas_Dataframe(self.node.inputs[0]), 16 | "test": Pandas_Dataframe(self.node.inputs[1])} 17 | 18 | @property 19 | def outputs(self): 20 | return {"preprocessed_train": Pandas_Dataframe(self.node.outputs[0]), 21 | "preprocessed_test": Pandas_Dataframe(self.node.outputs[1])} 22 | 23 | def run( 24 | self, 25 | drop_missing_perc, 26 | target_variable, 27 | ignore_variables, 28 | categorical_impute, 29 | numeric_impute): 30 | train = self.inputs["train"].read() 31 | test = self.inputs["test"].read() 32 | 33 | if ignore_variables is not list: 34 | ignore_variables = [ignore_variables] 35 | 36 | data_availability = train.describe( 37 | include='all').loc['count'] / train.shape[0] 38 | selected_cols = data_availability[data_availability > 39 | drop_missing_perc].index 40 | selected_cols = set(selected_cols) - \ 41 | (set([target_variable]).union(set(ignore_variables))) 42 | 43 | numeric_cols = list( 44 | set(list(train._get_numeric_data())).intersection(selected_cols)) 45 | categorical_cols = list(selected_cols - set(numeric_cols)) 46 | 47 | preprocess = Pipeline([("features", 48 | DFFeatureUnion([("numeric", 49 | Pipeline([("num_sel", 50 | ColumnExtractor(numeric_cols)), 51 | ("num_impute", 52 | DFMissingNum(replace='median'))])), 53 | ("categorical", 54 | Pipeline([("cat_sel", 55 | ColumnExtractor(categorical_cols)), 56 | ("str_impute", 57 | DFMissingStr(replace='most_frequent')), 58 | ("one_hot", 59 | DFOneHot())]))]))]) 60 | 61 | processed_train = preprocess.fit_transform(train) 62 | processed_train[target_variable] = train[target_variable] 63 | processed_test = preprocess.transform(test) 64 | 65 | self.outputs["preprocessed_train"].write(processed_train) 66 | self.outputs["preprocessed_test"].write(processed_test) 67 | 68 | 69 | class CrossValidate(BaseOperator): 70 | 71 | @property 72 | def inputs(self): 73 | return {"preprocessed_train": Pandas_Dataframe(self.node.inputs[0])} 74 | 75 | @property 76 | def outputs(self): 77 | return { 78 | "report": File_Txt( 79 | self.node.outputs[0]), "model": Pickle_Obj( 80 | self.node.outputs[1])} 81 | 82 | def run(self, target_variable, model_args, cv_args): 83 | from sklearn.model_selection import cross_val_score 84 | preprocessed_train = self.inputs['preprocessed_train'].read() 85 | 86 | y = preprocessed_train[target_variable] 87 | X = preprocessed_train.drop(target_variable, axis=1).values 88 | 89 | imports = self.node.imports 90 | model = get_node_callable(imports[0][1], module_path=imports[0][0]) 91 | 92 | model_params = model_args['arguments'] 93 | reg = model(**model_params) 94 | scores = cross_val_score(reg, X, y, **cv_args) 95 | 96 | reg.fit(X, y) 97 | report = "MSE: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2) 98 | 99 | self.outputs["model"].write(model) 100 | self.outputs["report"].write(report) 101 | 102 | 103 | class Splitters(BaseOperator): 104 | @property 105 | def inputs(self): 106 | return {"input_df": Pandas_Dataframe(self.node.inputs[0]) 107 | } 108 | 109 | @property 110 | def outputs(self): 111 | return {"train": Pandas_Dataframe(self.node.outputs[0]), 112 | "test": Pandas_Dataframe(self.node.outputs[1])} 113 | 114 | def run(self, model_args): 115 | input_df = self.inputs['input_df'].read() 116 | 117 | imports = self.node.imports 118 | model = get_node_callable(imports[0][1], module_path=imports[0][0]) 119 | 120 | model_params = model_args['arguments'] 121 | 122 | train, test = model(input_df, **model_params) 123 | self.outputs["train"].write(train) 124 | self.outputs["test"].write(test) 125 | -------------------------------------------------------------------------------- /src/main/python/daggit/core/oplib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/python/daggit/core/oplib/__init__.py -------------------------------------------------------------------------------- /src/main/python/daggit/runtime/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/main/python/daggit/runtime/__init__.py -------------------------------------------------------------------------------- /src/main/python/daggit/runtime/airflow_dag.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from airflow.models import DAG 4 | from daggit.core.base.parser import create_dag 5 | 6 | # DO NOT REMOVE THE COMMENT 7 | # add b'DAG', b'airflow' to script 8 | 9 | dirname = os.path.dirname(__file__) 10 | filename = os.path.join(dirname, 'graph_location') 11 | 12 | f = open(filename, "r") 13 | lines = f.read().splitlines() 14 | last_yaml = lines[-1] 15 | dag_name = os.path.basename(last_yaml)[:-5] + str(0) 16 | globals()[dag_name] = create_dag(last_yaml) 17 | 18 | # for index, dag in enumerate(f): 19 | # file = dag.replace('\n', '') 20 | # dag_name = os.path.basename(file)[:-5] + str(index) 21 | # # dag_name = file[:-5].split("/")[-1] + str(index) 22 | # globals()[dag_name] = create_dag(file) 23 | -------------------------------------------------------------------------------- /src/main/python/daggit/runtime/airflow_runtime.py: -------------------------------------------------------------------------------- 1 | from future.builtins import super 2 | from airflow.operators.python_operator import PythonOperator 3 | from airflow.utils.trigger_rule import TriggerRule 4 | from daggit.core.nodes.registry import get_node_callable 5 | 6 | 7 | class DaggitPyOp(PythonOperator): 8 | 9 | def __init__(self, node, dag, *args, **kwargs): 10 | 11 | task_id = node.task_id 12 | # do import, search and load 13 | concrete_op_object = get_node_callable(node.operation)(node) 14 | python_callable = getattr(concrete_op_object, '_run') 15 | op_kwargs = node.arguments 16 | super().__init__( 17 | task_id=task_id, 18 | python_callable=python_callable, 19 | op_kwargs=op_kwargs, 20 | dag=dag, 21 | *args, 22 | **kwargs) 23 | -------------------------------------------------------------------------------- /src/main/scripts/daggit: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | import yaml 6 | import shutil 7 | import argparse 8 | from subprocess import call 9 | 10 | from daggit.core.base.utils import nostdout 11 | from daggit.core.base.config import ORCHESTRATOR 12 | 13 | with nostdout(): 14 | from airflow.models import DagModel 15 | from airflow import settings 16 | from airflow.utils import db 17 | from airflow import models 18 | 19 | 20 | # improve orchestrator factory and replace if based switching 21 | def _unpause(dag): 22 | 23 | if ORCHESTRATOR == 'airflow': 24 | 25 | dag = dag 26 | 27 | session = settings.Session() 28 | dm = session.query(DagModel).filter( 29 | DagModel.dag_id == dag.dag_id).first() 30 | dm.is_paused = False 31 | session.commit() 32 | 33 | msg = "Dag: {}, paused: {}".format(dag, str(dag.is_paused)) 34 | print(msg) 35 | 36 | 37 | def init(graph): 38 | if ORCHESTRATOR == 'airflow': 39 | os.path.expanduser('~') 40 | airflow_home = os.getenv('AIRFLOW_HOME', os.path.expanduser('~/airflow')) 41 | dag_location = os.path.join(airflow_home, 'dags') 42 | pkgdir = sys.modules['daggit'].__path__[0] 43 | airflow_dag_file = pkgdir + '/runtime/airflow_dag.py' 44 | try: 45 | os.remove(os.path.join(dag_location,'airflow_dag.py')) 46 | except OSError: 47 | pass 48 | try: 49 | os.remove(os.path.join(airflow_home, 'airflow.db')) 50 | except OSError: 51 | pass 52 | if not os.path.exists(dag_location): 53 | os.makedirs(dag_location) 54 | shutil.copy2(airflow_dag_file, dag_location) 55 | 56 | graph_location = os.path.join(os.getcwd(), graph) 57 | 58 | if os.path.isfile(os.path.join(os.getcwd(), graph)): 59 | pass 60 | else: 61 | raise FileNotFoundError("The path to the dag file could not be resolved") 62 | # edit today 63 | graph_name = os.path.join(dag_location, 'graph_location') 64 | # f = open(os.path.join(dag_location, 'graph_location'), "w+") 65 | # f.write(graph_location) 66 | # f.close() 67 | # db.initdb() 68 | if os.path.exists(graph_name) and os.path.getsize(graph_name)>0: 69 | file = open(graph_name, "r") 70 | read_file = file.readlines() 71 | new_lines = [] 72 | for line in read_file: 73 | # Strip white spaces 74 | line = line.strip() 75 | if line == graph_location: 76 | pass 77 | else: 78 | new_lines.append(line) 79 | with open(graph_name, "w") as fp: 80 | for n in new_lines: 81 | fp.write(n+"\n") 82 | 83 | f = open(graph_name, "a+") 84 | f.write(graph_location+"\n") 85 | f.close() 86 | db.initdb() 87 | 88 | 89 | def run(dag_id, clear_all, clear_failed): 90 | 91 | if ORCHESTRATOR == 'airflow': 92 | if clear_all: 93 | call(["airflow", "clear", "-c", dag_id]) 94 | if clear_failed: 95 | call(["airflow", "clear", "-c", "-f", dag_id]) 96 | print("Running----") 97 | dbag = models.DagBag() 98 | mlwb = dbag.get_dag(dag_id) 99 | #_unpause(mlwb) 100 | print("mlwb:", mlwb) 101 | print("---start running") 102 | mlwb.run() 103 | print("---stop running") 104 | 105 | 106 | if __name__ == '__main__': 107 | 108 | from sys import argv 109 | 110 | parser = argparse.ArgumentParser(prog='daggit') 111 | parser.add_argument("-v", "--version", help="MLWB version" 112 | , action='store_true', dest='version', default=False) 113 | subparsers = parser.add_subparsers(dest='cmd', help='sub-commands') 114 | 115 | parser_init = subparsers.add_parser('init', help='Initiates a DAG.') 116 | parser_init.add_argument('graph', help='YAML file location.') 117 | 118 | parser_run = subparsers.add_parser('run', help='Runs a DAG.') 119 | parser_run.add_argument('dag_id', help='Experiment name (DAG ID)') 120 | parser_run.add_argument('--clear_all', action='store_true', dest='clear_all', default=False 121 | , help='Clears state of all the tasks before running the DAG.') 122 | parser_run.add_argument('--clear_failed', action='store_true', dest='clear_failed', default=False 123 | , help='Clears state of failed tasks before running the DAG.') 124 | 125 | args = parser.parse_args() 126 | 127 | 128 | if args.cmd == "init": 129 | init(args.graph) 130 | 131 | if args.cmd == "run": 132 | run(args.dag_id, args.clear_all, args.clear_failed) 133 | 134 | if args.version: 135 | print("version - 0.0.2") 136 | -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/TestingUtils.py: -------------------------------------------------------------------------------- 1 | import difflib 2 | import pandas as pd 3 | from daggit.contrib.sunbird.oplib.taggingUtils import get_tagme_spots 4 | 5 | 6 | def sentence_similarity(sentence1, sentence2, threshold): 7 | sentence = difflib.SequenceMatcher(lambda x: x == " ", sentence1, sentence2) 8 | similarity_score = sentence.ratio()*100 9 | if similarity_score >= threshold: 10 | return 1 11 | else: 12 | return 0 13 | 14 | 15 | def intersection_lists(list_1, list_2, threshold): 16 | if 1.0*(len(set(list_1) & set(list_2))/min(len(set(list_1)), len(set(list_2)))) > threshold: 17 | return 1 18 | else: 19 | return 0 20 | 21 | 22 | def keyword_extraction(path_to_text, expected_output): 23 | file_ = open(path_to_text, "r") 24 | text = file_.readline() 25 | if text == '': 26 | return "Text is not available" 27 | else: 28 | tagme_df = get_tagme_spots(path_to_text) 29 | actual_output = list(tagme_df["keyword"]) 30 | intersection_lists_output = intersection_lists(actual_output, expected_output, 0.8) 31 | return intersection_lists_output 32 | 33 | 34 | def text_reading(text_location): 35 | file = open(text_location, "r") 36 | text = file.readline() 37 | return text 38 | -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/unittest/python/contrib/sunbird/__init__.py -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/content_reuse_tests.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | import pandas as pd 5 | import os 6 | from daggit.core.oplib.misc import df_feature_check 7 | 8 | testdir = os.path.dirname(os.path.realpath(__file__)) 9 | srcdir = '../../../../../src/unittest/python/contrib' 10 | os.chdir(os.path.join(testdir, '../../../')) 11 | abs_path = os.path.abspath(os.path.join(testdir, srcdir)) 12 | test_case_data_location = abs_path + "/sunbird/test_cases_data/" 13 | sys.path.insert(0, abs_path) 14 | 15 | 16 | class UnitTests(unittest.TestCase): 17 | 18 | @staticmethod 19 | def test_content_resuse_scoring_data(): 20 | cols = ['STB_Id', 'STB_Grade', 'STB_Section', 'STB_Text', 'Ref_id', 'Ref_Grade', 'Ref_Section', 'Ref_Text'] 21 | case1 = pd.read_csv( 22 | test_case_data_location + "df_feature_check/" + "content_reuse_preparation_feature_check.csv") 23 | assert df_feature_check(case1, cols) 24 | 25 | @staticmethod 26 | def test_content_reuse_evaluation_data(): 27 | cols = ['state_topic_id', 'reference_topic_id', 'pred_label_percentage', 'TP_count', 'FP_count', 'TN_count', 28 | 'FN_count', 'actual_label'] 29 | case1 = pd.read_csv( 30 | test_case_data_location + "df_feature_check/" + "content_reuse_evaluation_feature_check.csv") 31 | assert df_feature_check(case1, cols) 32 | -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/PdfText/id_1/ExpText.txt: -------------------------------------------------------------------------------- 1 | 'A Simple PDF File This is a small demonstration .pdf file just for use in the Virtual Mechanics tutorials. More text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. Boring, zzzzz. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. Even more. Continued on page ... Simple PDF File ...continued from page . Yet more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. Oh, how boring typing this stuff. But not as boring as watching paint dry. And more text. And more text. And more text. And more text. Boring. More, a little more text. The end, and just as well. A Simple PDF File This is a small demonstration .pdf file just for use in the Virtual Mechanics tutorials. More text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. Boring, zzzzz. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. Even more. Continued on page ... Simple PDF File ...continued from page . Yet more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. Oh, how boring typing this stuff. But not as boring as watching paint dry. And more text. And more text. And more text. And more text. Boring. More, a little more text. The end, and just as well.' -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/PdfText/id_1/actualText.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/unittest/python/contrib/sunbird/test_cases_data/PdfText/id_1/actualText.pdf -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/PdfText/id_2/ExpText.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/unittest/python/contrib/sunbird/test_cases_data/PdfText/id_2/ExpText.txt -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/PdfText/id_2/actualText.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/unittest/python/contrib/sunbird/test_cases_data/PdfText/id_2/actualText.pdf -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/PdfText/id_3/ExpText.txt: -------------------------------------------------------------------------------- 1 | LESSON PLAN 2 | Subject: Mathematics 3 | Unit: Polygon 4 | Topic: How to find the sum of the interior angle of nth polygon ? Class: 9th 5 | 1. Learning points. 6 | How to find the sum of the interior angle of nth polygon ? 7 | 2. Learning outcomes, 8 | To facilitate the students, 9 | 1. To recall the definition of polygon 10 | 2. To recognize the different types of polygon. 11 | 3. To recognize how the polygon is divided into geometrical figures 12 | when they draw the diagonal. 13 | 4. To recognize to find the sum of the interior angle of nth polygon. 14 | 3. Management of learners. Group learning 15 | Group type: learning together 16 | 4. Instructional strategies: Indo detective method 17 | 5. Resources required 18 | Powerpoint presentation, geogebra, image of different types of polygon . 19 | 6. Evidence for learning Test. 20 | What teacher doesWhat students will do 21 | Engage 22 | 360. 23 | No response.Statement of aim: in todays class we will study “ how to find the sum of the interior angle of nth polygon?” 24 | What teacher does What students will do We shall learn this answer through an activity. 25 | Now make a group of 4 to 5 members. 26 | Now, listen students, I will show pictures on screen observe it, note Ok 27 | Students will make group. 28 | Ok mamdown in books then I will give time discuss in group and answer to my question. 29 | Explore: 30 | in geogebra Observe each stage 31 | Express: 32 | Each one of you have observed each stage start discuss in group what you have observed. 33 | After few minutes teacher ask each group to tell what they have observed? 34 | Answer will be elicited from each group. 35 | inside the polygon after drawing diagonal? -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/PdfText/id_3/actualText.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/unittest/python/contrib/sunbird/test_cases_data/PdfText/id_3/actualText.pdf -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/SpeechText/id_1/assets/audio_split/id_1/id_1_0.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/unittest/python/contrib/sunbird/test_cases_data/SpeechText/id_1/assets/audio_split/id_1/id_1_0.mp3 -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/SpeechText/id_1/assets/audio_split/id_1/id_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/unittest/python/contrib/sunbird/test_cases_data/SpeechText/id_1/assets/audio_split/id_1/id_1_0.wav -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/SpeechText/id_1/assets/id_1.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/unittest/python/contrib/sunbird/test_cases_data/SpeechText/id_1/assets/id_1.mp3 -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/SpeechText/speech_to_text_exp_output.txt: -------------------------------------------------------------------------------- 1 | 'hello and welcome British Council was the teacher development service for secondary school teachers in Bihar result a part of bless all the Bihar language initiative Secondary School this is a teacher development project in partnership with British Council Bihar Madhyamik Shiksha Parishad and ESIC the DVD and workbook access access learning resource for English language teachers hello and welcome British Council was the teacher development service for secondary school teachers in Bihar results are part of this order Bihar language initiative for secondary school this is a teacher development project in partnership with British Council Bihar Madhyamik Shiksha Parishad and BF I see the DVD and workbook access access learning resource for English language teachers hello and welcome British Council was the teacher development films for secondary school teacher in Bihar results are part of blood are the Bihar language initiative for secondary school this is a teacher development project in partnership with British Council Bihar Madhyamik Shiksha Parishad and ESIC the DVD and what is a self access learning resource for English language teachers ' -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/bert_scoring/bert_scoring_mandatory_fields.yaml: -------------------------------------------------------------------------------- 1 | mandatory_fields: !!set 2 | sentence1_score: null 3 | sentence2_score: null 4 | pred_score: null 5 | sent1_general: null 6 | sent2_general: null 7 | ref_topic_id: null 8 | ref_grade: null 9 | ref_section: null 10 | actual_label: null 11 | stb_topic_id: null 12 | STB_Grade: null 13 | STB_Section: null 14 | TypeofMatch: null 15 | -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/bert_scoring/siamese_configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "EMBEDDING_DIM": 768, 3 | "MAX_SEQUENCE_LENGTH" : 10, 4 | "VALIDATION_SPLIT": 0.1, 5 | "RATE_DROP_LSTM": 0.17, 6 | "RATE_DROP_DENSE": 0.25, 7 | "NUMBER_LSTM": 50, 8 | "NUMBER_DENSE_UNITS": 50, 9 | "ACTIVATION_FUNCTION": "relu" 10 | } -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/bert_scoring/tokenizer.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/unittest/python/contrib/sunbird/test_cases_data/bert_scoring/tokenizer.pkl -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/bert_scoring/topic_aggregation_mandatory_fields.yaml: -------------------------------------------------------------------------------- 1 | mandatory_fields: 2 | state_topic_id: null 3 | reference_topic_id: null 4 | pred_label_percentage: null 5 | actual_label: null 6 | tp_col_name: null 7 | fp_col_name: null 8 | tn_col_name: null 9 | fn_col_name: null 10 | 11 | mandatory_column_name: 12 | stb_topic_col_name: state_topic_id 13 | ref_topic_col_name: reference_topic_id 14 | pred_agg_col_name: pred_label_percentage 15 | label_col_name: actual_label 16 | tp_col_name: TP_count 17 | fp_col_name: FP_count 18 | tn_col_name: TN_count 19 | fn_col_name: FN_count 20 | -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/df_feature_check/ContentTagging_mandatory_fields.yaml: -------------------------------------------------------------------------------- 1 | mandatory_fields: !!set 2 | artifactUrl: null 3 | concepts: null 4 | contentType: null 5 | createdBy: null 6 | description: null 7 | downloadUrl: null 8 | gradeLevel: null 9 | identifier: null 10 | keywords: null 11 | language: null 12 | name: null 13 | objectType: null 14 | subject: null 15 | -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/df_feature_check/Content_Meta_feature_checking_df_2.csv: -------------------------------------------------------------------------------- 1 | ,Col1,Col2 2 | 0,Alex,10 3 | 1,Bob,12 4 | 2,Clarke,13 5 | -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/df_feature_check/Content_Meta_feature_checking_df_3.csv: -------------------------------------------------------------------------------- 1 | ,index,col1,col2,col3,author,concepts,contentType,createdBy,description,downloadUrl,gradeLevel,identifier,keywords,language,name,objectType,status,subject,variants,content_type 2 | 0,2,,https://www.youtube.com/embed/ypxU42bbqWw?autoplay=1&enablejsapi=1,[TISS],,[BED20000],Resource,2defd89a-5ed9-4ced-8efd-9fabeebe0f03,Activity 1: BMI Activity,https://ekstep-public-prod.s3-ap-south-1.amazonaws.com/ecar_files/do_3123527002562723842121/activity-1-bmi-activity_1530899952197_do_3123527002562723842121_2.0.ecar,[Other],do_3123527002562723842121,[Course],[English],Activity 1: BMI Activity,Content,Live,Other,"{u'spine': {u'size': 685416.0, u'ecarUrl': u'https://ekstep-public-prod.s3-ap-south-1.amazonaws.com/ecar_files/do_3123527002562723842121/activity-1-bmi-activity_1530899952416_do_3123527002562723842121_2.0_spine.ecar'}}",youtube 3 | 1,3,,https://www.youtube.com/embed/_FqFkn9Ns50?autoplay=1&enablejsapi=1,[TISS],,[BED20000],Resource,2defd89a-5ed9-4ced-8efd-9fabeebe0f03,Activity 1: Designing lessons using TPACK framework,https://ekstep-public-prod.s3-ap-south-1.amazonaws.com/ecar_files/do_3123527054692761601141/activity-1-designing-lessons-using-tpack-framework_1530899953604_do_3123527054692761601141_2.0.ecar,[Other],do_3123527054692761601141,[Course],[English],Activity 1: Designing lessons using TPACK framework,Content,Live,Other,"{u'spine': {u'size': 685423.0, u'ecarUrl': u'https://ekstep-public-prod.s3-ap-south-1.amazonaws.com/ecar_files/do_3123527054692761601141/activity-1-designing-lessons-using-tpack-framework_1530899953801_do_3123527054692761601141_2.0_spine.ecar'}}",youtube 4 | 2,0,,https://www.youtube.com/embed/fmZrPQ4eynk?autoplay=1&enablejsapi=1,,,[LO39],Resource,f79637f0-2c24-42ca-84b8-fc0d8190a392,ABCD,https://ekstep-public-prod.s3-ap-south-1.amazonaws.com/ecar_files/do_31248639264259276813829/abcd_1525418702948_do_31248639264259276813829_1.0.ecar,[Class 2],do_31248639264259276813829,[Story],[English],ABCD,Content,Live,Hindi,"{u'spine': {u'size': 92516.0, u'ecarUrl': u'https://ekstep-public-prod.s3-ap-south-1.amazonaws.com/ecar_files/do_31248639264259276813829/abcd_1525418703259_do_31248639264259276813829_1.0_spine.ecar'}}",youtube 5 | 3,11,,https://youtu.be/iEtjl3TqwB4,[TISS],,[BED20000],Resource,2defd89a-5ed9-4ced-8efd-9fabeebe0f03,Activity 2 (Optional): Paulo Freire's Video,https://ekstep-public-prod.s3-ap-south-1.amazonaws.com/ecar_files/do_312352520252432384264/activity-2-optional-paulo-freires-video_1530899930533_do_312352520252432384264_2.0.ecar,[Other],do_312352520252432384264,[Course],[English],Activity 2 (Optional): Paulo Freire's Video,Content,Live,Other,"{u'spine': {u'size': 685383.0, u'ecarUrl': u'https://ekstep-public-prod.s3-ap-south-1.amazonaws.com/ecar_files/do_312352520252432384264/activity-2-optional-paulo-freires-video_1530899930717_do_312352520252432384264_2.0_spine.ecar'}}",youtube 6 | -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/keyword_extraction/empty.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-sunbird/sunbird-ml-workbench/e971357c8030200cfa645e0cc31ffcfc3916d9d9/src/unittest/python/contrib/sunbird/test_cases_data/keyword_extraction/empty.txt -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/keyword_extraction/eng_text_actual_keywords.csv: -------------------------------------------------------------------------------- 1 | KEYWORDS 2 | sss 3 | shiksha 4 | service 5 | english 6 | development project 7 | press 8 | resource 9 | project 10 | madhyamik 11 | british council 12 | english language 13 | hello and welcome 14 | bless 15 | teacher 16 | secondary school 17 | development 18 | initiative 19 | partnership 20 | gf 21 | school teachers 22 | bihar 23 | access 24 | workbook 25 | learning 26 | teachers 27 | dvd 28 | -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/keyword_extraction/english.txt: -------------------------------------------------------------------------------- 1 | hello and welcome British Council press the teacher development service for secondary school teachers in Bihar this is a part of bless all the Bihar language initiative Secondary School this is a teacher development project in partnership with British Council Bihar Madhyamik Shiksha Parishad and GF I see the DVD and workbook SSS access learning resource for English language teachers -------------------------------------------------------------------------------- /src/unittest/python/contrib/sunbird/test_cases_data/keywords.csv: -------------------------------------------------------------------------------- 1 | KEYWORDS 2 | teachers 3 | shiksha 4 | british council 5 | access 6 | development 7 | teacher 8 | bihar 9 | madhyamik 10 | service 11 | sss 12 | school teachers 13 | english 14 | learning 15 | resource 16 | english language 17 | workbook 18 | press 19 | partnership 20 | project 21 | secondary school 22 | gf 23 | bless 24 | development project 25 | dvd 26 | hello and welcome 27 | initiative 28 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | skipsdist=True 3 | envlist = py36 4 | [testenv] 5 | deps = pybuilder 6 | commands = pyb install_dependencies 7 | pyb analyze 8 | pyb publish 9 | 10 | [testenv:docs] 11 | basepython = python 12 | changedir = doc 13 | deps = sphinx 14 | commands = sphinx-build -W -b html -d {envtmpdir}/doctrees . {envtmpdir}/html 15 | --------------------------------------------------------------------------------