├── .github └── FUNDING.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE.md ├── ML-on-code-programming-source-code.md ├── Programming-in-Python.md ├── Python-Performance.md ├── README-details.md ├── README.md ├── blogs └── keras-bag-of-words-expanded-version │ ├── .DS_Store │ ├── README.md │ ├── Running-keras-so-posts-python-program.gif │ ├── building-the-model.md │ ├── confusion_matrix_keras_posts.png │ ├── data-scripts-notebooks │ ├── keras-bow-model.ipynb │ ├── keras-so-posts.py │ ├── stack-overflow-posts.csv │ └── wandb-keras-so-posts.py │ ├── generating-predictions.md │ ├── google-bigquery │ ├── 01-google-account-sign-in.png │ ├── 02-google-create-project-01.png │ ├── 03-google-create-project-02.png │ ├── 04-add-member-to-project.png │ ├── 05-enable-billing.png │ ├── 06-enable-BigQuery-API.png │ ├── 07-BigQuery-Web-Console.png │ ├── 08-add-credentials.png │ ├── 09-add-credentials-for-service-accounts.png │ ├── 10-BigQuery-console.png │ ├── 11-credentials-confirmation.png │ ├── 12-save-sql-query-results.png │ ├── 13-download-JSON-or-CSV.png │ ├── 14-create-a-dataset-01.png │ ├── 14-create-a-dataset-02.png │ ├── 15-copy-table.png │ ├── 16-BigQuery-table-details.png │ ├── 17-google-cold-storage.png │ ├── 18-cold-storage-create-bucket-01.png │ ├── 18-cold-storage-create-bucket-02.png │ ├── 18-cold-storage-create-bucket-03.png │ ├── 19-export-to-cold-storage-01.png │ ├── 19-export-to-cold-storage-02.png │ ├── 19-export-to-cold-storage-03.png │ └── README.md │ ├── metrics.md │ ├── preprocessing-data.md │ ├── preprocessing-data.png │ ├── processing-input-features.md │ ├── processing-output-labels.md │ └── training-results-animated.gif ├── cloud-devops-infra ├── README.md ├── about-neural-magic.md ├── about-vast.ai.md └── gpus │ ├── Applications-of-GPU-Server.jpg │ ├── Avermedia-Box-PC-and-Carrier-1-of-2-works-with-NVidia-Jetson.jpg │ ├── Avermedia-Box-PC-and-Carrier-2-of-2-works-with-NVidia-Jetson.jpg │ ├── GPU-Server-side-1-of-2.jpg │ ├── GPU-Server-side-2-of-2.jpg │ └── rapids.md ├── competitions.md ├── courses.md ├── data ├── README.md ├── about-Dataiku.md ├── about-Google-Data-Studio.md ├── about-H2O-Driverless-AI.md ├── about-Microstrategy.md ├── about-ModeAnalytics.md ├── about-Pipeline.ai.md ├── about-Tableau-Prep.md ├── about-Valohai.md ├── about-Weights-and-Biases.md ├── about-fast.ai.md ├── bamboolib.md ├── courses-books.md ├── data-exploratory-analysis.md ├── data-generation.md ├── data-preparation.md ├── databases.md ├── datasets.md ├── feature-engineering.md ├── feature-selection.md ├── frameworks-checklists.md ├── how-to-choose-your-data-visualisations.jpg ├── model-analysis-interpretation-explainability.md ├── model-creation.md ├── pandas-profiling.md ├── programs-and-tools.md ├── statistics.md ├── wandb │ ├── Activation-Function.png │ ├── More-resources.md │ ├── Quick-and-Dirty-CNN.py │ └── code-snippets.py └── what-is-a-tensor.jpg ├── details ├── articles-papers-code-data-courses.md ├── artificial-intelligence.md ├── cheatsheets.md ├── dmm.md ├── java-jvm.md ├── javascript.md ├── julia-python-and-r.md ├── julia-python-and-r │ ├── deep-learning.md │ ├── machine-learning.md │ └── reinforcement-learning.md ├── machine-learning-algorithms.md ├── mathematica-wolfram-Language.md ├── maths-stats-probability.md ├── misc.md ├── pytorch.md └── visualisation.md ├── examples ├── JuPyteR │ ├── .gitignore │ ├── Docker-container-instructions.md │ ├── LICENSE.txt │ ├── Other-kernel-installation-methods.md │ ├── README.md │ ├── beakerx-homepage-screenshot.png │ ├── build-docker-image │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── buildDockerImage.sh │ │ ├── graalvm_jdk_version.txt │ │ ├── graalvm_version.txt │ │ ├── install-java-kernel.sh │ │ ├── install-jupyter-notebooks.sh │ │ ├── push-jupyter-java-docker-image-to-hub.sh │ │ ├── removeUnusedContainersAndImages.sh │ │ └── runLocal.sh │ ├── installDocker.sh │ ├── notebooks │ │ └── MyFirstJavaNotebook.ipynb │ ├── runDockerContainer.sh │ └── version.txt ├── apache-zeppelin │ ├── .gitignore │ ├── LICENSE.txt │ ├── README.md │ ├── Zeppelin-Dockerfile │ ├── buildZeppelinDockerImage.sh │ ├── graalvm_jdk_version.txt │ ├── graalvm_version.txt │ ├── installDocker.sh │ ├── push-apache-zeppelin-docker-image-to-hub.sh │ ├── removeUnusedContainersAndImages.sh │ ├── runZeppelinDockerContainer.sh │ └── version.txt ├── better-nlp │ ├── .gitignore │ ├── Examples.md │ ├── LICENSE.md │ ├── README.md │ ├── build │ │ ├── Dockerfile │ │ ├── buildDockerImage.sh │ │ ├── install-dependencies.sh │ │ ├── install-linux.sh │ │ ├── install-macos.sh │ │ └── push-better-nlp-docker-image-to-hub.sh │ ├── docs │ │ ├── Better-NLP-in-Jupyter-Notebook.png │ │ ├── Docker-container-console-Jupyter-lab-loading.png │ │ ├── Docker_environment.md │ │ ├── Examples.md │ │ └── Jupyter_notebook.md │ ├── library │ │ ├── examples │ │ │ ├── extract-entities-from-text.py │ │ │ ├── extract-noun-chunks-from-text.py │ │ │ ├── gather-facts-from-text.py │ │ │ └── obfuscate-privacy-details-in-the-text.py │ │ └── org │ │ │ ├── __init__.py │ │ │ └── neomatrix369 │ │ │ ├── __init__.py │ │ │ ├── better_nlp.py │ │ │ ├── nlp_profiler.py │ │ │ ├── summariser_cosine.py │ │ │ ├── summariser_pytextrank.py │ │ │ ├── summariser_tfidf.py │ │ │ └── summariser_tfidf_variation.py │ ├── notebooks │ │ ├── google-colab │ │ │ ├── better_nlp_spacy_texacy_examples.ipynb │ │ │ └── better_nlp_summarisers.ipynb │ │ └── jupyter │ │ │ ├── better_nlp_spacy_texacy_examples.ipynb │ │ │ ├── better_nlp_summarisers.ipynb │ │ │ ├── nlp_profiler-granular.ipynb │ │ │ └── nlp_profiler.ipynb │ ├── presentations │ │ ├── 09-Mar-2019 │ │ │ └── Better-NLP-Presentation-Slides.pdf │ │ └── 29-Jun-2019 │ │ │ └── Better-NLP-2.0-one-library-rules-them-all-Presentation-Slides.pdf │ ├── runDockerImage.sh │ └── version.txt ├── cloud-devops-infra │ ├── valohai │ │ ├── MLPMnist │ │ │ ├── .gitignore │ │ │ └── README.md │ │ └── nlp-cuda │ │ │ └── README.md │ └── wandb │ │ ├── feature-importance │ │ ├── catboost_feature_importance_tutorial.ipynb │ │ └── lightgbm_feature_importance_tutorial.ipynb │ │ └── sb-fx-competition │ │ ├── Method-02-Linear-Model.html │ │ ├── Method-02-Linear-Model.ipynb │ │ └── README.md ├── data │ ├── databases │ │ └── graph │ │ │ └── grakn │ │ │ ├── .bashrc │ │ │ ├── .dockerignore │ │ │ ├── 0001-Build-a-distribution-with-uber-jars.patch │ │ │ ├── Dockerfile │ │ │ ├── LICENSE.txt │ │ │ ├── README.md │ │ │ ├── build-all-versions-grakn-graalvm-docker-images.sh │ │ │ ├── build-on-the-cloud │ │ │ ├── create-project.sh │ │ │ ├── exec-step.sh │ │ │ ├── show-final-result.sh │ │ │ └── watch-execution.sh │ │ │ ├── builder.sh │ │ │ ├── common.sh │ │ │ ├── docs │ │ │ ├── build-the-grakn-docker-container.md │ │ │ ├── building-grakn.md │ │ │ ├── grakn-docker-container.md │ │ │ ├── run-the-performance-script.md │ │ │ ├── scripts-provided.md │ │ │ └── successful-run-console.md │ │ │ ├── graalvm_jdk_version.txt │ │ │ ├── graalvm_version.txt │ │ │ ├── grakn-graalvm-version-matrix.txt │ │ │ ├── grakn-jar-runner.sh │ │ │ ├── grakn-runner.sh │ │ │ ├── grakn_version.txt │ │ │ ├── graql │ │ │ ├── English-to-Graql.md │ │ │ ├── Graql-to-English.ipynb │ │ │ ├── Graql-to-English.md │ │ │ ├── README.md │ │ │ ├── english-graql-queries.py │ │ │ ├── extract_keywords.py │ │ │ ├── grakn_console_client.py │ │ │ ├── pattern-matching-analysis.py │ │ │ ├── pattern-matching.py │ │ │ ├── requirements.txt │ │ │ ├── run-python-in-docker.sh │ │ │ ├── schema-relationship-graph.png │ │ │ └── workbase-front-screen.png │ │ │ ├── mergeJson.sh │ │ │ ├── performance-benchmark-scripts │ │ │ ├── iterate-thru-grakn-graalvm-versions.sh │ │ │ ├── linux │ │ │ │ ├── grakn-1.4.3-graalvm-ce-19.0.0-startup-times.logs │ │ │ │ ├── grakn-1.4.3-graalvm-ce-19.1.0-startup-times.logs │ │ │ │ ├── grakn-1.4.3-graalvm-ce-19.2.0-startup-times.logs │ │ │ │ ├── grakn-1.4.3-graalvm-ce-java8-19.3.0-startup-times.logs │ │ │ │ ├── grakn-1.5.2-graalvm-ce-19.0.0-startup-times.logs │ │ │ │ ├── grakn-1.5.2-graalvm-ce-19.1.0-startup-times.logs │ │ │ │ ├── grakn-1.5.2-graalvm-ce-19.2.0-startup-times.logs │ │ │ │ ├── grakn-1.5.2-graalvm-ce-java8-19.3.0-startup-times.logs │ │ │ │ ├── grakn-1.5.7-graalvm-ce-19.0.0-startup-times.logs │ │ │ │ ├── grakn-1.5.7-graalvm-ce-19.1.0-startup-times.logs │ │ │ │ ├── grakn-1.5.7-graalvm-ce-19.2.0-startup-times.logs │ │ │ │ ├── grakn-1.5.7-graalvm-ce-java8-19.3.0-startup-times.logs │ │ │ │ ├── grakn-1.6.0-graalvm-ce-19.0.0-startup-times.logs │ │ │ │ ├── grakn-1.6.0-graalvm-ce-19.1.0-startup-times.logs │ │ │ │ ├── grakn-1.6.0-graalvm-ce-19.2.0-startup-times.logs │ │ │ │ ├── grakn-1.6.0-graalvm-ce-java8-19.3.0-startup-times.logs │ │ │ │ ├── grakn-1.6.2-graalvm-ce-19.0.0-startup-times.logs │ │ │ │ ├── grakn-1.6.2-graalvm-ce-19.1.0-startup-times.logs │ │ │ │ ├── grakn-1.6.2-graalvm-ce-19.2.0-startup-times.logs │ │ │ │ └── grakn-1.6.2-graalvm-ce-java8-19.3.0-startup-times.logs │ │ │ ├── measureTradVersusGraalVMStartupTime.sh │ │ │ ├── output-from-running-performance-script.md │ │ │ └── runPerformanceBenchmark.sh │ │ │ ├── presentations │ │ │ ├── GraknCosmos2020 │ │ │ │ └── Naturally,-getting-productive,-my-journey-with-Grakn-and-Graql.pdf │ │ │ └── README.md │ │ │ ├── pull-docker-images-from-hub.sh │ │ │ ├── startGraknAndGraql.sh │ │ │ └── valohai.yaml │ ├── dataiku │ │ ├── Dockerfile │ │ ├── LICENSE.txt │ │ ├── README.md │ │ ├── buildDockerImage.sh │ │ ├── dataiku-dss-page-01.png │ │ ├── dataiku-dss-page-02.png │ │ ├── dataiku-dss-page-03.png │ │ ├── graalvm_jdk_version.txt │ │ ├── graalvm_version.txt │ │ ├── measureTradVersusGraalVMLoadTime.sh │ │ ├── push-dss-docker-image-to-hub.sh │ │ ├── removeUnusedContainersAndImages.sh │ │ ├── runDSS.sh │ │ ├── runDssInDocker.sh │ │ └── successful-run-console.md │ └── feature-importance-filtering │ │ └── feature_importance.py ├── deepnetts │ ├── .gitignore │ ├── CONTRIBUTING.md │ ├── LICENSE.txt │ ├── README.md │ ├── build.gradle │ ├── builder.sh │ ├── common.sh │ ├── datasets │ │ ├── iris.csv │ │ ├── iris_data_normalised.txt │ │ └── linear.csv │ ├── deployments │ │ └── oci │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── credentials.rc_template │ │ │ ├── get-instance-public-ip.sh │ │ │ ├── get-notebook-url.sh │ │ │ ├── infrastructure.tf │ │ │ ├── init.sh │ │ │ ├── open-notebook-in-browser.sh │ │ │ ├── provision.sh │ │ │ └── run-docker-container-in-the-cloud.sh │ ├── docker-image │ │ ├── .bashrc │ │ ├── Dockerfile │ │ ├── common.sh │ │ ├── deepnetts_version.txt │ │ ├── graalvm_jdk_version.txt │ │ ├── graalvm_version.txt │ │ ├── install-deepnetts.sh │ │ ├── install-java-kernel.sh │ │ └── version.txt │ ├── docker-runner.sh │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── pom.xml │ ├── settings.gradle │ └── src │ │ └── main │ │ └── java │ │ └── org │ │ └── neomatrix369 │ │ └── deepnetts │ │ ├── ClassificationExample.java │ │ ├── DeepNettsMachine.java │ │ ├── RegressionExample.java │ │ └── utils │ │ ├── CsvFile.java │ │ ├── ExampleDataSets.java │ │ └── Plot.java ├── ensembler │ ├── .gitignore │ ├── LICENSE.txt │ ├── README.md │ ├── analysis │ │ ├── 01_ComparingResults.ipynb │ │ ├── 02_EnsembleTribuoResults.ipynb │ │ └── 03_EnsembleTribuoDeepNettsResults.ipynb │ ├── build.gradle │ ├── builder.sh │ ├── common.sh │ ├── datasets │ │ ├── deepnetts-linear-regression-validation.csv │ │ ├── deepnetts-tribuo-4-lr-ensembles-average.csv │ │ ├── deepnetts-tribuo-4-lr-ensembles-geomean.csv │ │ ├── deepnetts-tribuo-4-lr-ensembles-harmonic-mean.csv │ │ ├── deepnetts-tribuo-4-lr-ensembles-median.csv │ │ ├── linear-for-deepnetts.csv │ │ ├── linear-for-tribuo.csv │ │ ├── tribuo-3-lr-ensembles-average.csv │ │ ├── tribuo-3-lr-ensembles-geomean.csv │ │ ├── tribuo-3-lr-ensembles-harmonic-mean.csv │ │ ├── tribuo-3-lr-ensembles-median.csv │ │ ├── tribuo-linear-regression-ada-validation.csv │ │ ├── tribuo-linear-regression-cart-validation.csv │ │ ├── tribuo-linear-regression-sgd-validation.csv │ │ └── tribuo-linear-regression-xgb-validation.csv │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── pom.xml │ ├── settings.gradle │ └── src │ │ └── main │ │ └── java │ │ └── org │ │ └── neomatrix369 │ │ └── ensembler │ │ ├── EnsemblerMachine.java │ │ ├── RegressionDeepNettsExample.java │ │ ├── RegressionTribuoExample.java │ │ └── utils │ │ ├── CsvFile.java │ │ ├── ExampleDataSets.java │ │ └── Plot.java ├── nlp-java-jvm │ └── README.md ├── python │ └── frameworks │ │ └── streamlit │ │ ├── README.md │ │ ├── app.py │ │ ├── example.jpg │ │ └── example.mp3 └── tribuo │ ├── .gitignore │ ├── CONTRIBUTING.md │ ├── LICENSE.txt │ ├── README.md │ ├── build.gradle │ ├── builder.sh │ ├── common.sh │ ├── datasets │ ├── bezdekIris.data │ └── winequality-red.csv │ ├── deployments │ └── oci │ │ ├── .gitignore │ │ ├── README.md │ │ ├── credentials.rc_template │ │ ├── get-instance-public-ip.sh │ │ ├── get-notebook-url.sh │ │ ├── infrastructure.tf │ │ ├── init.sh │ │ ├── open-notebook-in-browser.sh │ │ ├── provision.sh │ │ └── run-docker-container-in-the-cloud.sh │ ├── docker-image │ ├── .bashrc │ ├── Dockerfile │ ├── common.sh │ ├── graalvm_jdk_version.txt │ ├── graalvm_version.txt │ ├── install-java-kernel.sh │ ├── install-tribuo.sh │ ├── tribuo_version.txt │ └── version.txt │ ├── docker-runner.sh │ ├── gradle │ └── wrapper │ │ ├── gradle-wrapper.jar │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── outputs │ ├── native-image-build-output.log │ ├── tribuo-classification-example-output.txt │ └── tribuo-regression-example-output.txt │ ├── pom.xml │ ├── settings.gradle │ └── src │ └── main │ └── java │ └── org │ └── neomatrix369 │ └── tribuo │ ├── ClassificationExample.java │ ├── RegressionExample.java │ └── TribuoMachine.java ├── guides.md ├── natural-language-processing ├── README.md ├── code-mixing.md ├── course-tutorial-learning-resources.md ├── formulae │ ├── DMM_formula.png │ ├── cu_metric.png │ ├── dmm_graphical_view.png │ ├── i_index.png │ ├── lang_entropy.png │ ├── likehood_DMM.png │ ├── m_index.png │ └── scoring.png ├── general.md ├── java-jvm.md ├── library-framework-models-tools-services.md ├── measures.md ├── metaphor-detection.md ├── ner.md ├── sentiment-analysis.md └── summarise-text.md ├── notebooks ├── README.md ├── data │ ├── DSfIOT_Machine_Learning_End_to_End_Tutorial.ipynb │ └── data-processing │ │ └── Normalising-a-distribution.ipynb ├── jupyter │ └── data │ │ ├── .gitignore │ │ ├── 01_Exploratory_Data_Analysis_(Do_we_know_our_data).ipynb │ │ ├── 02_Data_Preparation_(Do_we_know_our_data).ipynb │ │ ├── 03_Feature_Engineering_(Do_we_know_our_data).ipynb │ │ ├── LICENSE.txt │ │ └── data-generation │ │ ├── LICENSE.txt │ │ ├── README.md │ │ ├── add_duplicates.py │ │ ├── add_nulls.py │ │ ├── create-archive.sh │ │ ├── make-dataset-unclean.sh │ │ └── release-to-github.sh └── nlp │ └── 20190411-spacy-and-textblob-nlp-entity-recognition-and-sentiment-analysis.ipynb ├── papers └── google-x │ ├── Page 01 - Abstract (Tensor Network).jpg │ ├── Page 02 - What is a Tensor network.jpg │ ├── Page 03 - What is a Tensor network.jpg │ ├── Page 04 - A tree Tensor network....jpg │ ├── Page 05 - Tensor Network - a library for ...jpg │ ├── Page 06 - Generalisation & no overfitting - Abstract.jpg │ ├── Page 07 - Generalisation & no overfitting.jpg │ ├── Page 08 - Dynamical systems and Deep nets.jpg │ ├── Page 09 - Generalisation bounds.jpg │ ├── Page 10 - Norm-minimising Gradient descent.jpg │ ├── Page 11 - Norm-minimising Gradient descent.jpg │ ├── Page 12 - Comparing BN and WN.jpg │ ├── Page 13 - Ranking Minimas.jpg │ ├── Page 14 - Contributions to Science & Engineering of Intelligence.jpg │ └── README.md ├── presentations ├── README.md ├── awesome-ai-ml-dl │ ├── 01-jonconf-2020 │ │ ├── README.md │ │ └── _nn_things_every_Java_Developer_should_know_about_AI_ML_DL.pdf │ ├── 02-abhishektalks-2020 │ │ ├── Backend_developer_to_Machine_Learning_Engineer.pdf │ │ └── README.md │ ├── 03-makeitweek-2020 │ │ ├── README.md │ │ └── Tribuo_an_introduction_to_a_Java_ML_Library.pdf │ ├── 04-gba-apac-tour-2020 │ │ ├── README.md │ │ └── _nn_things_every_Java_Developer_should_know_about_AI_ML_DL.pdf │ ├── 05-ai-enterprise-virtual-user-group-2021 │ │ ├── Looking_into_Java_ML_DL_Libraries_Tribuo_and_DeepNetts.pdf │ │ └── README.md │ └── 06-the-out-of-the-box-developer-2022 │ │ ├── AI-v_s-ML-v_s-DL.pdf │ │ └── README.md ├── data │ ├── 01-mam-ml-study-group-meetup │ │ └── Introduction_to_Data_Analysis_and_Cleaning.pdf │ ├── 02-devoxx-uk-2019 │ │ ├── Do_we_know_our_data,_as_good_as_we_know_our_tools_.pdf │ │ └── README.md │ ├── 03-meetup-uk-2019 │ │ └── Towards-Explainable-AI.pdf │ ├── 04-grakn-cosmos-2020 │ │ └── README.md │ ├── 05-kaggle-days-delhi-ncr-2022 │ │ ├── README.md │ │ └── Studying-the-limitations-of-statistical-measurements.pdf │ ├── Data Visualization – How to Pick the Right Chart Type-1.pdf │ └── Trackener-physics-functions-usage-example.pptx └── nlp │ └── Natural_Language_Processing_-_MaM.pdf ├── study-notes ├── README.md ├── confusion-matrix-table-template.png └── precision-versus-recall.png ├── things-to-know.md └── time-series_anomaly-detection ├── README.md └── time-series.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: neomatrix369 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Virtual environment 2 | venv/ 3 | 4 | # Packages 5 | *.py[a-z] 6 | *.pyc 7 | __pycache__/* 8 | _pycache_/* 9 | .pylintrc 10 | 11 | ======= 12 | *.py[co] 13 | __pycache__ 14 | *~ 15 | *.bak 16 | \#*# 17 | .#* 18 | 19 | *.swp 20 | *.map 21 | .idea/ 22 | 23 | /.project 24 | /.pydevproject 25 | .ipynb_checkpoints 26 | *shared/ 27 | *.cache 28 | wandb/ 29 | catboost_info/ 30 | *.logs 31 | .DS_Store -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guidelines 2 | 3 | At the moment there isn't a lot written or spoken about this subject but nevertheless we would like consider links that are useful for the wider community, so we think the below would help: 4 | 5 | (a) widely recommended, regardless of personal opinion 6 | 7 | (b) discussed in the community (mentioned via social media) 8 | 9 | (c) a good starting point for the subject or related subjects (contains examples) 10 | 11 | (d) implementations that would be a good example for everyone to follow and be inspired by 12 | 13 | 14 | ## Pull Requests 15 | 16 | There are two required criteria for a pull request: 17 | 18 | 1. If an entry has a similar scope as other entries in the same category, the description must state the unique features that distinguishes it from the other entries. 19 | 20 | 2. If an entry does not meet conditions *(a)* to *(d)* there has to be an explanation either in the description or the pull request why it should be added to the list. 21 | 22 | Self-promotion is not encouraged, but your suggestion will of course be approved if the criteria match. Self-promotion can be done within reasons if the content shared is useful to the audience and helps the current and future community, or around those lines. These kinds of submissions will be looked into case-by-case basis. 23 | 24 | Furthermore, please ensure your pull request follows the following guidelines: 25 | 26 | * Please search previous suggestions before making a new one, as yours may be a duplicate. 27 | * Please make an individual pull request for each suggestion. 28 | * Use the following format for libraries: \[RESOURCE\]\(LINK\) - DESCRIPTION. 29 | * Entries should be sorted in ascending alphabetical order, i.e. a to z. 30 | * New categories or improvements to the existing categorization are welcome. 31 | * Keep descriptions short, simple and unbiased. 32 | * Check your spelling and grammar. 33 | * Make sure your text editor is set to remove trailing whitespace. 34 | 35 | Thank you for your suggestions! 36 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | [CC-BY-SA-4.0](https://creativecommons.org/licenses/by-sa/4.0/legalcode) -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/.DS_Store -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/Running-keras-so-posts-python-program.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/Running-keras-so-posts-python-program.gif -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/building-the-model.md: -------------------------------------------------------------------------------- 1 | ## Building the model 2 | 3 | ### Keras Sequential model API 4 | 5 | [https://keras.io/models/sequential/]() 6 | 7 | ### Dense layer 8 | 9 | Each neuron in one layer will be fully connected to all neurons in the next layer 10 | 11 | [https://keras.io/layers/core/]() 12 | 13 | **Parameters** 14 | 15 | - the dimensionality of the layer’s output (number of neurons) 16 | - the shape of the input data 17 | 18 | **Dimensionality** 19 | 20 | 512 dimension layer: power of 2 as the number of dimensions, is a common practice 21 | 22 | **Batch size** 23 | 24 | Number of rows in the input data = number of posts to be passed to the model at each training step 25 | 26 | Number of columns = vocabulary size (top 1000 most commonly used words) 27 | 28 | ### Activation function 29 | 30 | Tells our model how to calculate the output of a layer, see [ReLU](http://cs231n.github.io/neural-networks-1/) 31 | 32 | **Softmax Activation function** 33 | 34 | The model will normalize the evidence for each possible label into a probability (from 0 to 1). In the context of the posts, the probabilities assigned to the 20 tags for a given comment will sum up to 1. Each tag will have a probability between 0 and 1. 35 | 36 | [https://en.wikipedia.org/wiki/Softmax_function]() 37 | 38 | ```python 39 | [continuation from previous snippet] 40 | . 41 | . 42 | . 43 | model = Sequential() 44 | model.add(Dense(512, input_shape=(vocabulary_size,))) 45 | model.add(Activation('relu')) 46 | 47 | # The model will take the vocabulary_size input, 48 | # transform it to a 512-dimensional layer, 49 | # and transform that into an output layer with 20 probability neurons 50 | # with the help of Keras, provided with shape of the input data, 51 | # the shape of the output data, and the type of each layer 52 | num_labels = 20 53 | model.add(Dense(num_labels)) 54 | model.add(Activation('softmax')) 55 | ``` 56 | 57 | Input shape v/s unit v/s dim: [https://gitlab.com/gdgcloud/tensorflow/issues/15#note_100912332]() 58 | -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/confusion_matrix_keras_posts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/confusion_matrix_keras_posts.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/01-google-account-sign-in.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/01-google-account-sign-in.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/02-google-create-project-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/02-google-create-project-01.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/03-google-create-project-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/03-google-create-project-02.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/04-add-member-to-project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/04-add-member-to-project.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/05-enable-billing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/05-enable-billing.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/06-enable-BigQuery-API.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/06-enable-BigQuery-API.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/07-BigQuery-Web-Console.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/07-BigQuery-Web-Console.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/08-add-credentials.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/08-add-credentials.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/09-add-credentials-for-service-accounts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/09-add-credentials-for-service-accounts.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/10-BigQuery-console.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/10-BigQuery-console.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/11-credentials-confirmation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/11-credentials-confirmation.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/12-save-sql-query-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/12-save-sql-query-results.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/13-download-JSON-or-CSV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/13-download-JSON-or-CSV.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/14-create-a-dataset-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/14-create-a-dataset-01.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/14-create-a-dataset-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/14-create-a-dataset-02.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/15-copy-table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/15-copy-table.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/16-BigQuery-table-details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/16-BigQuery-table-details.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/17-google-cold-storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/17-google-cold-storage.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/18-cold-storage-create-bucket-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/18-cold-storage-create-bucket-01.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/18-cold-storage-create-bucket-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/18-cold-storage-create-bucket-02.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/18-cold-storage-create-bucket-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/18-cold-storage-create-bucket-03.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/19-export-to-cold-storage-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/19-export-to-cold-storage-01.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/19-export-to-cold-storage-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/19-export-to-cold-storage-02.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/google-bigquery/19-export-to-cold-storage-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/google-bigquery/19-export-to-cold-storage-03.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/preprocessing-data.md: -------------------------------------------------------------------------------- 1 | ### Preprocessing data 2 | 3 | #### Wikipedia 4 | 5 | [https://en.wikipedia.org/wiki/Bag-of-words_model]() 6 | 7 | **Example of implementation** 8 | 9 | The bag-of-words model is commonly used in methods of document classification where the (frequency of) occurrence of each word is used as a feature for training a classifier. 10 | 11 | The following models a text document using bag-of-words. Here are two simple text documents: 12 | 13 | ``` 14 | (1) John likes to watch movies. Mary likes movies too. 15 | ``` 16 | ``` 17 | (2) John also likes to watch football games. 18 | ``` 19 | Based on these two text documents, a list constructed as follows for each document: 20 | 21 | ```json 22 | "John","likes","to","watch","movies","Mary","likes","movies","too" 23 | 24 | "John","also","likes","to","watch","football","games" 25 | ``` 26 | 27 | Representing each bag-of-words as a JSON object, and attributing to the respective Javascript variable: 28 | 29 | ```json 30 | BoW1 = {"John":1,"likes":2,"to":1,"watch":1,"movies":2,"Mary":1,"too":1}; 31 | BoW2 = {"John":1,"also":1,"likes":1,"to":1,"watch":1,"football":1,"games":1}; 32 | ``` 33 | 34 | Each key is the word, and each value is the number of occurrences of that word in the given text document. 35 | 36 | The order of elements is free, so, for example `{"too":1,"Mary":1,"movies":2,"John":1,"watch":1,"likes":2,"to":1}` is also BoW1. It is also what we expect from a strict JSON object representation. 37 | 38 | Note: if another document is like a union of these two, 39 | 40 | ``` 41 | (3) John likes to watch movies. Mary likes movies too. John also likes to watch football games. 42 | ``` 43 | 44 | its Javascript representation will be: 45 | 46 | ```json 47 | BoW3 = {"John":2,"likes":3,"to":2,"watch":2,"movies":2,"Mary":1,"too":1,"also":1,"football":1,"games":1}; 48 | ``` 49 | 50 | So, as we see in the bag algebra, the "union" of two documents in the bags-of-words representation is, formally, the disjoint union, summing the multiplicities of each element. 51 | 52 | ![image](preprocessing-data.png) -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/preprocessing-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/preprocessing-data.png -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/processing-output-labels.md: -------------------------------------------------------------------------------- 1 | ## Processing output labels 2 | 3 | ### Representations 4 | 5 | Post: Write once, run anywhere..... 6 | 7 | Tag: Java 8 | 9 | **5-tags index** 10 | ``` 11 | [javascript, python, cobol, java, ruby] 12 | ``` 13 | **One-hot vector** 14 | 15 | Representation of tag in the post 16 | ``` 17 | [0 0 0 1 0] 18 | ``` 19 | 20 | **A vector of probabilities** 21 | ``` 22 | [ 0.08078627 0.24490279 0.21754906 0.23220219 0.22455971 ] 23 | ``` 24 | 25 | ### LabelBinarizer class of scikit-learn 26 | 27 | Makes it easy to build these one-hot vectors 28 | 29 | ```python 30 | [continuation from previous snippet] 31 | . 32 | . 33 | . 34 | encoder = LabelBinarizer() 35 | encoder.fit(train_tags) 36 | y_train = encoder.transform(train_tags) 37 | 38 | # training dataset: one-hot vectors of the tags per post 39 | # [[0 0 0 ... 0 1 0] 40 | # [0 0 0 ... 0 0 0] 41 | # [0 0 0 ... 0 0 0] 42 | # ... 43 | # [0 0 0 ... 0 0 0] 44 | # [0 0 0 ... 1 0 0] 45 | # [0 0 0 ... 0 0 0]] 46 | 47 | y_test = encoder.transform(test_tags) 48 | 49 | # test dataset: one-hot vectors of the tags per post 50 | # [[0 0 0 ... 0 0 1] 51 | # [0 0 0 ... 0 0 0] 52 | # [0 0 0 ... 0 0 0] 53 | # ... 54 | # [0 0 0 ... 0 0 0] 55 | # [0 0 0 ... 0 0 0] 56 | # [0 0 0 ... 0 0 0]] 57 | ``` -------------------------------------------------------------------------------- /blogs/keras-bag-of-words-expanded-version/training-results-animated.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/blogs/keras-bag-of-words-expanded-version/training-results-animated.gif -------------------------------------------------------------------------------- /cloud-devops-infra/about-vast.ai.md: -------------------------------------------------------------------------------- 1 | ## vast.ai 2 | 3 | - [http://vast.ai]() 4 | - One simple interface to find the best cloud GPU rentals. Reduce cloud compute costs by 3X to 5X. Both spot (interruptible) and on-demand instances are available. 5 | - Quick and easy to start, very competitively priced, found machines with high-spec as low as $0.001 per hour of usage 6 | 7 | Additional references 8 | 9 | - https://vast.ai/faq/#Introduction 10 | - https://vast.ai/console/create/ 11 | 12 | --- 13 | 14 | - [ ] [AI/ML/DL Library / Package / Framework: applicable] 15 | - [x] **[Inexpensive crowd-sourced infrastructure sharing: applicable]** 16 | - [ ] [Data querying: manual / tools available] 17 | - [ ] [Data analytics: manual / tools available] 18 | - [ ] [Data visualisation: manual / tools available] 19 | - [ ] [Data cleaning: manual / no tools available] 20 | - [ ] [Data validation: manual / no tools available] 21 | - [ ] [Feature extraction: manual / no tools available] 22 | - [ ] [Model creation: available] 23 | - [ ] [Execute experiments: available] 24 | - [ ] [Hyper parameter tuning: available] 25 | - [ ] [Model saving: available] 26 | 27 | Back to [Data preparation, cleaning, validation, model creation, training resources](../data/README.md) -------------------------------------------------------------------------------- /cloud-devops-infra/gpus/Applications-of-GPU-Server.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/cloud-devops-infra/gpus/Applications-of-GPU-Server.jpg -------------------------------------------------------------------------------- /cloud-devops-infra/gpus/Avermedia-Box-PC-and-Carrier-1-of-2-works-with-NVidia-Jetson.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/cloud-devops-infra/gpus/Avermedia-Box-PC-and-Carrier-1-of-2-works-with-NVidia-Jetson.jpg -------------------------------------------------------------------------------- /cloud-devops-infra/gpus/Avermedia-Box-PC-and-Carrier-2-of-2-works-with-NVidia-Jetson.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/cloud-devops-infra/gpus/Avermedia-Box-PC-and-Carrier-2-of-2-works-with-NVidia-Jetson.jpg -------------------------------------------------------------------------------- /cloud-devops-infra/gpus/GPU-Server-side-1-of-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/cloud-devops-infra/gpus/GPU-Server-side-1-of-2.jpg -------------------------------------------------------------------------------- /cloud-devops-infra/gpus/GPU-Server-side-2-of-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/cloud-devops-infra/gpus/GPU-Server-side-2-of-2.jpg -------------------------------------------------------------------------------- /data/about-Google-Data-Studio.md: -------------------------------------------------------------------------------- 1 | ## Google Data Studio 2 | 3 | - Signup https://datastudio.google.com/ 4 | - Dashboarding & Data Visualization Tools - Google Data Studio - https://marketingplatform.google.com/about/data-studio/ 5 | - Pros 6 | - Rich with features and a slick looking UI 7 | - Leans on analytics a lot 8 | - Visualisations: lots of nice tables and charts (if you are able to load your dataset and configure the rest of it) 9 | - Can quickly get a report or chart created if you know the environment well 10 | - Links up to the rest of the assets / resources on GCP 11 | - Nice, sample walkthrough, helping learn how to build reports 12 | - Google Docs like sharing / collaborating functionality available 13 | - Cons 14 | - Lot's of bells and whistles: there is a learning curve 15 | - Couldnt upload most of the datasets, it seems they had issues (while worked well with other tools) 16 | - UI wasn't intuitive enough to get from start to finish with a report or chart from the dataset uploaded 17 | - [Videos on YouTube](https://www.youtube.com/results?search_query=google+datastudio&page=&utm_source=opensearch) 18 | - Examples 19 | - https://datastudiogallery.appspot.com/gallery 20 | - https://marketingplatform.google.com/about/data-studio/gallery/ 21 | - https://blog.prototypr.io/the-6-best-free-google-data-studio-templates-9825c7f0fbd9?gi=82313a30836 22 | - Tutorials 23 | - https://www.distilled.net/google-data-studio-tutorial/ 24 | - https://www.seerinteractive.com/blog/google-data-studio-tutorial/ 25 | 26 | --- 27 | 28 | - [ ] [AI/ML/DL Library / Package / Framework: applicable] 29 | - [ ] [Inexpensive crowd-sourced infrastructure sharing: applicable] 30 | - [x] **[Data querying: manual / tools available]** 31 | - [x] **[Data analytics: manual / tools available]** 32 | - [x] **[Data visualisation: manual / tools available]** 33 | - [ ] [Data cleaning: manual / no tools available] 34 | - [ ] [Data validation: manual / no tools available] 35 | - [ ] [Feature extraction: manual / no tools available] 36 | - [ ] [Model creation: available] 37 | - [ ] [Execute experiments: available] 38 | - [ ] [Hyper parameter tuning: available] 39 | - [ ] [Model saving: available] 40 | 41 | Back to [Programs and Tools](./programs-and-tools.md#programs-and-tools).
42 | Back to [Data page](./README.md#data). -------------------------------------------------------------------------------- /data/about-H2O-Driverless-AI.md: -------------------------------------------------------------------------------- 1 | ## H2O Driverless AI 2 | 3 | - Download from https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/dai/rel-1.4.2-9/index.html 4 | - Installation 5 | - Linux: https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/dai/rel-1.4.2-9/docs/userguide/installing.html (See relevant Linux category) (server app via browser) 6 | - MacOS: https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/dai/rel-1.4.2-9/docs/userguide/install/mac-osx.html?highlight=mac (desktop app via Docker container via browser) 7 | - Resources 8 | - https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/dai/rel-1.4.2-9/docs/booklets/DriverlessAIBooklet.pdf 9 | - https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/dai/rel-1.4.2-9/docs/booklets/MLIBooklet.pdf 10 | - [Recipes for Driverless AI](https://github.com/h2oai/driverlessai-recipes) 11 | - Videos 12 | - https://www.youtube.com/results?search_query=h2o+driverless 13 | - https://www.youtube.com/watch?v=yzAhjinmdzk 14 | - https://www.youtube.com/watch?v=axIqeaUhow0 15 | - [H2O meetups](https://github.com/h2oai/h2o-meetups) 16 | --- 17 | 18 | - [ ] [AI/ML/DL Library / Package / Framework: applicable] 19 | - [ ] [Inexpensive crowd-sourced infrastructure sharing: applicable] 20 | - [ ] [Data querying: manual / tools available] 21 | - [ ] [Data analytics: manual / tools available] 22 | - [x] **[Data visualisation: automatic]** 23 | - [x] **[Data cleaning: can handle unclean data but does not clean data]** 24 | - [x] **[Data validation: automatic]** 25 | - [x] **[Feature extraction: automatic]** 26 | - [x] **[Model creation: automatic]** 27 | - [x] **[Experiments execution: available]** 28 | - [x] **[Hyper parameter tuning: automatic]** 29 | - [x] **[Model saving: automatic]** 30 | 31 | Back to [Programs and Tools](./programs-and-tools.md#programs-and-tools).
32 | Back to [Data page](./README.md#data). -------------------------------------------------------------------------------- /data/about-Microstrategy.md: -------------------------------------------------------------------------------- 1 | ## Microstrategy 2 | 3 | - Download desktop app from https://www.microstrategy.com/us/get-started/desktop (Mac or Windows) 4 | - Install app and run it 5 | - Create a new Dossier via the Create New Dossier button 6 | - Add new dataset via the New data button (under Add Data) 7 | - File From Disk > Choose files 8 | - Select one or more files 9 | - Select Prepare Data 10 | - Data wrangling 11 | - Cleaning and wrangling 12 | - https://www.youtube.com/watch?v=m1ZMxSPGM6I 13 | - Wrangling 14 | - https://www.youtube.com/watch?v=m1ZMxSPGM6I 15 | - https://www.youtube.com/watch?v=E6b6gLESXiY 16 | - Actions 17 | - Was able to do the following to the `haystack_http` dataset: 18 | - Fill the blank cells in the activity column with ‘null activity’ 19 | - Split the date column into date and time columns 20 | - Extract just the domain name from the url column and create a new column from it 21 | - Limitations 22 | - Wasn’t able to label the url or domain column (needs sophisticated function work) 23 | - Wasn’t able to transform/extract from the date column into weekday or weekend labels 24 | - Wasn’t able to transform/extract from the time column into working hour/non-working hour labels 25 | 26 | --- 27 | 28 | - [ ] [AI/ML/DL Library / Package / Framework: applicable] 29 | - [ ] [Inexpensive crowd-sourced infrastructure sharing: applicable] 30 | - [x] **[Data querying: manual / tools available]** 31 | - [ ] [Data analytics: manual / tools available] 32 | - [x] **[Data visualisation: manual / tools available]** 33 | - [x] **[Data cleaning: manual]** 34 | - [x] **[Data validation: semi-automatic/manual]** 35 | - [x] **[Feature extraction: manual/tools available]** 36 | - [ ] [Model creation: available] 37 | - [ ] [Execute experiments: available] 38 | - [ ] [Hyper parameter tuning: available] 39 | - [ ] [Model saving: available] 40 | 41 | Back to [Programs and Tools](./programs-and-tools.md#programs-and-tools).
42 | Back to [Data page](./README.md#data). -------------------------------------------------------------------------------- /data/about-ModeAnalytics.md: -------------------------------------------------------------------------------- 1 | ## ModeAnalytics 2 | 3 | - Mode Studio: https://modeanalytics.com/ 4 | - https://modeanalytics.com/editor/neomatrix369/reports/new 5 | - The data platform that brings everyone together, and gets out of the way. 6 | - Connect your data warehouse. Analyze with SQL, Python, or R. 7 | - Share across your organization.Choose the right language for the job: SQL, Python, and R, all in one platform. 8 | - Create beautiful charts in seconds, or customize with D3, matplotlib, ggplot, and more. 9 | - Share your work with a URL, email, or Slack for fresh data anytime, anywhere. 10 | 11 | --- 12 | 13 | - [ ] [AI/ML/DL Library / Package / Framework: applicable] 14 | - [ ] [Inexpensive crowd-sourced infrastructure sharing: applicable] 15 | - [x] **[Data querying: manual / tools available]** 16 | - [ ] [Data analytics: manual / tools available] 17 | - [x] **[Data visualisation: manual / tools available]** 18 | - [x] **[Data cleaning: manual]** 19 | - [x] **[Data validation: semi-automatic/manual]** 20 | - [x] **[Feature extraction: manual/tools available]** 21 | - [ ] [Model creation: available] 22 | - [ ] [Execute experiments: available] 23 | - [ ] [Hyper parameter tuning: available] 24 | - [ ] [Model saving: available] 25 | 26 | Back to [Programs and Tools](./programs-and-tools.md#programs-and-tools).
27 | Back to [Data page](./README.md#data). -------------------------------------------------------------------------------- /data/about-Pipeline.ai.md: -------------------------------------------------------------------------------- 1 | ## Pipeline.ai 2 | 3 | - Signup at https://pipeline.ai/ 4 | - Comparisons: https://pipeline.ai/products/ 5 | - Videos: https://www.youtube.com/c/PipelineAI/ 6 | - At a glance: PipelineAI is a real-time model-prediction platform that optimizes, scales, and explains the predictions of any cognitive decision-making system at the speed of thought. The platform clears the path from local-laptop research to global-scale production. PipelineAI is a zero-touch continuous model optimization platform that automatically generates, deploys, and compares hundreds of model variants - from just a single model. Scientists and Engineers can instantly launch any experiment using any language and any ML/AI framework. 7 | - PipelineAI automatically coordinates the validation, optimization, training and explainability of the model. VOTE involves many automated cycles of model training and testing using different sets of online hyperparameters including system configuration, network speed, and real-time user data. 8 | - Stages covered: only from Model training to deploy 9 | - Pros 10 | - supports lots of different ML frameworks 11 | - helps find the best model to use to predict 12 | - has a lot of bells and wells in terms of parameters for tuning 13 | - support GPU and TPU (among very few) 14 | - Cons 15 | - UI/UX is less user-friendly, a bit slow in responding back to users and can’t seem to make out what the progress of tasks been submitted 16 | - slow in Community mode might have better performance on the paid version 17 | 18 | --- 19 | 20 | - [ ] [AI/ML/DL Library / Package / Framework: applicable] 21 | - [ ] [Inexpensive crowd-sourced infrastructure sharing: applicable] 22 | - [ ] [Data querying: manual / tools available] 23 | - [ ] [Data analytics: manual / tools available] 24 | - [ ] [Data visualisation: manual / tools available] 25 | - [ ] [Data cleaning: manual / no tools available] 26 | - [ ] [Data validation: manual / no tools available] 27 | - [ ] [Feature extraction: manual / no tools available] 28 | - [x] **[Model creation: available]** 29 | - [x] **[Execute experiments: available]** 30 | - [x] **[Hyper parameter tuning: available]** 31 | - [x] **[Model saving: available]** 32 | 33 | Back to [Programs and Tools](./programs-and-tools.md#programs-and-tools).
34 | Back to [Data page](./README.md#data). -------------------------------------------------------------------------------- /data/about-Tableau-Prep.md: -------------------------------------------------------------------------------- 1 | ## Tableau Prep 2 | 3 | - Download desktop app from https://www.tableau.com/products/prep (Mac or Windows) or the public version at https://public.tableau.com/en-us/s/ 4 | - Install app and run it 5 | - Add new dataset via the (+) sign next to Connections 6 | - File From Disk > Choose files 7 | - Select one or more files 8 | - Once data is loaded click on the (+) sign next to the haystack_http icon on the top and select Add Step 9 | - Select the column you want to amend, select Edit value, change the value and click on enter 10 | - Actions 11 | - Was able to do the following to the `haystack_http` dataset (quite easily): 12 | - Fill the blank cells in the activity column with ‘null activity’ 13 | - Split the date column into date and time columns 14 | - Extract just the domain name from the url column and create a new column from it 15 | - There is a good way to transform columns in Tableau into new types 16 | - Limitations 17 | - Wasn’t able to label the url or domain column (needs sophisticated function work) 18 | - Was able to transform/extract from the date column into weekday or weekend labels with some effort - had to create a calculated field and then apply an if condition to it 19 | - Was able to transform/extract from the time column into working hour/non-working hour labels - had to create a calculated field and then apply an if condition to it 20 | 21 | --- 22 | 23 | - [ ] [AI/ML/DL Library / Package / Framework: applicable] 24 | - [ ] [Inexpensive crowd-sourced infrastructure sharing: applicable] 25 | - [x] **[Data querying: manual / tools available]** 26 | - [ ] [Data analytics: manual / tools available] 27 | - [x] **[Data visualisation: manual / tools available]** 28 | - [x] **[Data cleaning: manual]** 29 | - [x] **[Data validation: sem-automatic/manual]** 30 | - [x] **[Feature extraction: manual/tools available]** 31 | - [ ] [Model creation: does not] 32 | - [ ] [Execute experiments: available] 33 | - [ ] [Hyper parameter tuning: does not] 34 | - [ ] [Model saving: does not] 35 | 36 | Back to [Programs and Tools](./programs-and-tools.md#programs-and-tools).
37 | Back to [Data page](./README.md#data). -------------------------------------------------------------------------------- /data/about-fast.ai.md: -------------------------------------------------------------------------------- 1 | ## fast.ai 2 | 3 | - http://fast.ai 4 | - Free Library + course material to learn AI, ML, DL quickly 5 | - Library is based on Pytorch (previously there was a Keras version) 6 | - Has a community forum and lots of resources on the internet, good feedback and posts on medium 7 | 8 | Additional references 9 | - See [fast.ai](../courses.md#fastai) under [Courses](../courses.md#course) 10 | - https://docs.fast.ai/training.html 11 | - http://forums.fast.ai 12 | - https://forums.fast.ai/t/how-should-i-get-started-with-fast-ai-library/17627 13 | - https://forums.fast.ai/t/another-treat-early-access-to-intro-to-machine-learning-videos/6826 14 | - https://medium.com/@pierre_guillou/fastai-how-to-start-663927d4db63 15 | - https://www.kdnuggets.com/2019/01/6-most-useful-machine-learning-projects-2018.html 16 | - https://www.youtube.com/results?search_query=fast.ai 17 | - [Rachel Thomas | Fast.ai | Applied Ethics | Top Down Learning | CTDS.Show #75](https://www.youtube.com/watch?v=tq_XcFubgKo) 18 | - [Interview with Even Oldridge | Applied Research, Top Down Learning & Fast.ai | NVIDIA & Rapids.ai ](https://www.youtube.com/watch?v=-WzXIV8P_Jk) 19 | 20 | --- 21 | 22 | - [x] **[AI/ML/DL Library / Package / Framework: applicable]** 23 | - [ ] [Inexpensive crowd-sourced infrastructure sharing: applicable] 24 | - [ ] [Data querying: manual / tools available] 25 | - [ ] [Data analytics: manual / tools available] 26 | - [ ] [Data visualisation: manual / tools available] 27 | - [ ] [Data cleaning: manual / no tools available] 28 | - [ ] [Data validation: manual / no tools available] 29 | - [ ] [Feature extraction: manual / no tools available] 30 | - [ ] [Model creation: available] 31 | - [ ] [Execute experiments: available] 32 | - [ ] [Hyper parameter tuning: available] 33 | - [ ] [Model saving: available] 34 | 35 | Back to [Programs and Tools](./programs-and-tools.md#programs-and-tools).
36 | Back to [Data page](./README.md#data). -------------------------------------------------------------------------------- /data/bamboolib.md: -------------------------------------------------------------------------------- 1 | # Bamboolib 2 | 3 | A GUI for pandas. 4 | 5 | - [Home](https://bamboolib.com/) 6 | - GitHub: https://github.com/8080labs/bamboolib 7 | - Online documentation: https://docs.bamboolib.8080labs.com/ 8 | - Supporting / dependency library: 9 | - pyforest: [GitHub](https://github.com/8080labs/pyforest) | [pypi](https://pypi.org/project/pyforest/) 10 | - Blogs 11 | - [Introducing Bamboolib — a GUI for Pandas](https://towardsdatascience.com/introducing-bamboolib-a-gui-for-pandas-4f6c091089e3) 12 | - [Bamboolib — Learn and use Pandas without Coding](https://towardsdatascience.com/bamboolib-learn-and-use-pandas-without-coding-23a7d3a94e1b) 13 | - Videos 14 | - [Intro to bamboolib - a GUI for pandas](https://www.youtube.com/watch?v=5UR1v3uxqW4&feature=share) 15 | - [Demo on big data (1mio rows)](https://www.youtube.com/watch?v=r59Q19oCMr8) 16 | - [All other videos](https://www.youtube.com/channel/UCQLqkDYCGnqczni1IWyAYvA/videos?view=0&sort=da&flow=grid) 17 | - On Kaggle 18 | - [How to: Use bamboolib within Kaggle](https://docs.bamboolib.8080labs.com/how-tos/use-bamboolib-within-kaggle) 19 | - [Trying Bamboolib - Pandas GUI on Kaggle ](https://www.kaggle.com/nulldata/trying-bamboolib-pandas-gui-on-kaggle) 20 | - [LinkedIn post](https://www.linkedin.com/posts/parulpandeyindia_bamboolib-pandas-activity-6610109280953700352-Cuie) 21 | - Linkedin: https://www.linkedin.com/company/bamboolib/ 22 | - Twitter: https://twitter.com/bamboolib_ 23 | --- 24 | 25 | - [x] **[AI/ML/DL Library / Package / Framework: applicable]** 26 | - [ ] [Inexpensive crowd-sourced infrastructure sharing: applicable] 27 | - [x] **[Data querying]** 28 | - [ ] [Data analytics: manual / tools available] 29 | - [x] **[Data visualisation]** 30 | - [x] **[Data cleaning]** 31 | - [x] **[Data validation]** 32 | - [ ] [Feature extraction: manual / no tools available] 33 | - [ ] [Model creation: available] 34 | - [ ] [Execute experiments: available] 35 | - [ ] [Hyper parameter tuning: available] 36 | - [ ] [Model saving: available] 37 | 38 | Back to [Programs and Tools](./programs-and-tools.md#programs-and-tools).
39 | Back to [Data page](./README.md#data). 40 | -------------------------------------------------------------------------------- /data/courses-books.md: -------------------------------------------------------------------------------- 1 | # Courses / books 2 | 3 | ## Courses 4 | 5 | - See [Courses](../courses.md#courses) 6 | 7 | ## Books 8 | 9 | - [27 Amazing Data Science Books Every Data Scientist Should Read](https://www.analyticsvidhya.com/blog/2019/01/27-amazing-data-science-books-every-data-scientist-should-read/) 10 | - [Data Science Handbook](https://github.com/RishiSankineni/Data-Science-Swag/blob/master/The%20Data%20Science%20Handbook.pdf) 11 | 12 | # Contributing 13 | 14 | Contributions are very welcome, please share back with the wider community (and get credited for it)! 15 | 16 | Please have a look at the [CONTRIBUTING](../CONTRIBUTING.md) guidelines, also have a read about our [licensing](../LICENSE.md) policy. 17 | 18 | --- 19 | 20 | Back to [Data page (table of contents)](README.md)
21 | Back to [main page (table of contents)](../README.md) -------------------------------------------------------------------------------- /data/frameworks-checklists.md: -------------------------------------------------------------------------------- 1 | # Framework(s) / checklist(s) 2 | 3 | - [Data Science Primer](https://elitedatascience.com/primer) 4 | - [How to Prepare Data For Machine Learning](https://machinelearningmastery.com/how-to-prepare-data-for-machine-learning/) 5 | - [What is Data Mining and KDD](https://machinelearningmastery.com/what-is-data-mining-and-kdd/) 6 | - [The KDD process for extracting useful knowledge from volumes of data](http://shawndra.pbworks.com/f/The%20KDD%20process%20for%20extracting%20useful%20knowledge%20from%20volumes%20of%20data.pdf) 7 | - [Data Mining: Practical ML Tools and Techniques by Witten, Frank and Mark 3rd edition](https://www.wi.hs-wismar.de/~cleve/vorl/projects/dm/ss13/HierarClustern/Literatur/WittenFrank-DM-3rd.pdf) 8 | - [Foundational Methodology for Data Science - IBM Analytics Whitepaper](https://tdwi.org/~/media/64511A895D86457E964174EDC5C4C7B1.PDF) 9 | - See [Courses](../courses.md#courses) 10 | 11 | # Contributing 12 | 13 | Contributions are very welcome, please share back with the wider community (and get credited for it)! 14 | 15 | Please have a look at the [CONTRIBUTING](../CONTRIBUTING.md) guidelines, also have a read about our [licensing](../LICENSE.md) policy. 16 | 17 | --- 18 | 19 | Back to [Data page (table of contents)](README.md)
20 | Back to [main page (table of contents)](../README.md) 21 | -------------------------------------------------------------------------------- /data/how-to-choose-your-data-visualisations.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/data/how-to-choose-your-data-visualisations.jpg -------------------------------------------------------------------------------- /data/wandb/Activation-Function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/data/wandb/Activation-Function.png -------------------------------------------------------------------------------- /data/wandb/Quick-and-Dirty-CNN.py: -------------------------------------------------------------------------------- 1 | # normalize data 2 | X_train = X_train.astype('float32') / 255. 3 | X_test = X_test.astype('float32') / 255. 4 | N_train = X_train.shape[0] 5 | N_test = X_test.shape[0] 6 | X_train = X_train.reshape(N_train, 28,28,1) 7 | X_test = X_test.reshape(N_test, 28,28,1) 8 | 9 | # create model 10 | print ('test dimension:....', X_train.shape) 11 | model=Sequential() 12 | #model.add(Flatten(input_shape=(img_width, img_height))) 13 | #model.add(Dense(128, activation="relu")) 14 | #model.add(Dense(num_classes, activation="softmax")) 15 | 16 | #~~~~~~~~~~~~ 17 | 18 | con_width = 16 19 | conv_height = 16 20 | model.add(Conv2D(32,(con_width, conv_height), input_shape=(28, 28,1), activation='relu')) 21 | model.add(MaxPooling2D(pool_size=(2, 2))) 22 | model.add(Flatten()) 23 | dense_layer_size = 128 24 | model.add(Dense(dense_layer_size, activation='relu')) 25 | model.add(Dense(num_classes, activation='softmax')) 26 | #~~~~~~~~~~~~~~~~ 27 | # create model 28 | #model=Sequential() 29 | #model.add(Flatten(input_shape=(img_width, img_height))) 30 | #model.add(Dense(num_classes)) 31 | #model.compile(loss=config.loss, optimizer=config.optimizer, 32 | # metrics=['accuracy']) 33 | 34 | model.compile(loss="categorical_crossentropy", optimizer="adam", 35 | metrics=['accuracy']) -------------------------------------------------------------------------------- /data/what-is-a-tensor.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/data/what-is-a-tensor.jpg -------------------------------------------------------------------------------- /details/artificial-intelligence.md: -------------------------------------------------------------------------------- 1 | # Artificial intelligence 2 | 3 | - [Artificial intelligence (Wikipedia)](https://en.wikipedia.org/wiki/Artificial_intelligence) 4 | - [Artificial intelligence (Quora)](https://www.quora.com/topic/Artificial-Intelligence) 5 | - [History of Artificial Intelligence](https://en.wikipedia.org/wiki/History_of_artificial_intelligence) 6 | - [Artificial intelligence in video games](https://en.wikipedia.org/wiki/Artificial_intelligence_in_video_games) 7 | - [Scholarly articles for history of artificial intelligence](https://scholar.google.co.uk/scholar?q=history+of+artificial+intelligence&hl=en&as_sdt=0&as_vis=1&oi=scholart) 8 | - [Google AI Blog](https://ai.googleblog.com/) - the latest news from Google AI 9 | - [AI Experiments | Experiments with Google](https://experiments.withgoogle.com/ai) | [Experiments with Google](https://experiments.withgoogle.com/) 10 | - [People + AI Guidebook Logo](https://pair.withgoogle.com/) 11 | - [What is a tensor?](../data/what-is-a-tensor.jpg) 12 | 13 | 14 | # Contributing 15 | 16 | Contributions are very welcome, please share back with the wider community (and get credited for it)! 17 | 18 | Please have a look at the [CONTRIBUTING](../CONTRIBUTING.md) guidelines, also have a read about our [licensing](../LICENSE.md) policy. 19 | 20 | --- 21 | 22 | Back to [details page (table of contents)](../README-details.md#artificial-intelligence)
23 | Back to [main page (table of contents)](../README.md) 24 | -------------------------------------------------------------------------------- /details/julia-python-and-r/deep-learning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/details/julia-python-and-r/deep-learning.md -------------------------------------------------------------------------------- /examples/JuPyteR/.gitignore: -------------------------------------------------------------------------------- 1 | notebooks/ 2 | vulnerabilities/ 3 | dockerfileScan/ 4 | !notebooks/MyFirstJavaNotebook.ipynb -------------------------------------------------------------------------------- /examples/JuPyteR/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 Mani Sarkar 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /examples/JuPyteR/beakerx-homepage-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/JuPyteR/beakerx-homepage-screenshot.png -------------------------------------------------------------------------------- /examples/JuPyteR/build-docker-image/.dockerignore: -------------------------------------------------------------------------------- 1 | dockerfileScan 2 | vulnerabilities -------------------------------------------------------------------------------- /examples/JuPyteR/build-docker-image/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG GRAALVM_VERSION 2 | ARG GRAALVM_JDK_VERSION 3 | 4 | FROM oracle/graalvm-ce:${GRAALVM_VERSION}-${GRAALVM_JDK_VERSION} as graal-jdk-image 5 | 6 | FROM adoptopenjdk/openjdk11 7 | 8 | ### Install GraalVM for Java 11 9 | COPY --from=graal-jdk-image /opt/graalvm-ce-* /opt/java/graalvm 10 | 11 | RUN rm -fr /opt/java/openjdk 12 | 13 | ENV JAVA_HOME=/opt/java/graalvm 14 | ENV PATH=${JAVA_HOME}/bin:${PATH} 15 | 16 | ARG IMAGE_VERSION 17 | LABEL maintainer="Mani Sarkar" 18 | LABEL example_git_repo="https://github.com/neomatrix369/awesome-ai-ml-dl/tree/master/examples/JuPyteR" 19 | LABEL graalvm_version="${GRAALVM_VERSION}-{GRAALVM_JDK_VERSION}" 20 | LABEL version="${IMAGE_VERSION}" 21 | 22 | COPY install-jupyter-notebooks.sh install-jupyter-notebooks.sh 23 | RUN ./install-jupyter-notebooks.sh 24 | 25 | COPY install-java-kernel.sh install-java-kernel.sh 26 | RUN ./install-java-kernel.sh 27 | 28 | EXPOSE 8888 29 | 30 | RUN rm -f *.sh *.zip install.py 31 | 32 | ARG USER 33 | RUN groupadd -r ${USER} \ 34 | && useradd -r -s /bin/false \ 35 | -g ${USER} ${USER} 36 | 37 | ARG WORKDIR 38 | WORKDIR ${WORKDIR} 39 | 40 | COPY runLocal.sh runLocal.sh 41 | RUN mv -f /root/.local ${WORKDIR} 42 | 43 | RUN chown -R ${USER}:${USER} ${WORKDIR} 44 | 45 | USER ${USER} 46 | 47 | ENTRYPOINT ["./runLocal.sh"] -------------------------------------------------------------------------------- /examples/JuPyteR/build-docker-image/buildDockerImage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | IMAGE_NAME=${IMAGE_NAME:-jupyter-java} 24 | IMAGE_VERSION=${IMAGE_VERSION:-$(cat ../version.txt)} 25 | GRAALVM_VERSION=${GRAALVM_VERSION:-$(cat graalvm_version.txt)} 26 | GRAALVM_JDK_VERSION=${GRAALVM_JDK_VERSION:-$(cat graalvm_jdk_version.txt)} 27 | 28 | if [[ -z ${DOCKER_USER_NAME:-""} ]]; then 29 | echo "DOCKER_USER_NAME not defined as an environment variable, set to default value: neomatrix369" 30 | DOCKER_USER_NAME=neomatrix369 31 | fi 32 | 33 | DOCKER_FULL_TAG_NAME="${DOCKER_USER_NAME}/${IMAGE_NAME}" 34 | USER=jupyter 35 | JUPYTER_HOME=/home/${USER} 36 | 37 | time docker build \ 38 | --build-arg USER=jupyter \ 39 | --build-arg WORKDIR=${JUPYTER_HOME} \ 40 | --build-arg IMAGE_VERSION=${IMAGE_VERSION} \ 41 | --build-arg GRAALVM_VERSION=${GRAALVM_VERSION} \ 42 | --build-arg GRAALVM_JDK_VERSION=${GRAALVM_JDK_VERSION} \ 43 | -t ${DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} \ 44 | . 45 | 46 | ./removeUnusedContainersAndImages.sh 47 | ./push-jupyter-java-docker-image-to-hub.sh -------------------------------------------------------------------------------- /examples/JuPyteR/build-docker-image/graalvm_jdk_version.txt: -------------------------------------------------------------------------------- 1 | java11 -------------------------------------------------------------------------------- /examples/JuPyteR/build-docker-image/graalvm_version.txt: -------------------------------------------------------------------------------- 1 | 20.3.0 -------------------------------------------------------------------------------- /examples/JuPyteR/build-docker-image/install-jupyter-notebooks.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | echo "" 24 | echo "Installing Jupyter notebook and dependencies" 25 | 26 | echo "JAVA_TOOL_OPTIONS=${JAVA_TOOL_OPTIONS:-}" 27 | echo "Unsetting JAVA_TOOL_OPTIONS" 28 | unset JAVA_TOOL_OPTIONS 29 | 30 | export JAVA_HOME=/opt/java/openjdk/ 31 | export PATH=${JAVA_HOME}/bin:${PATH} 32 | 33 | java -version 34 | javac -version 35 | 36 | SUDO_CMD="" 37 | if [[ -f "/etc/sudoers" ]]; then 38 | SUDO_CMD=sudo 39 | fi 40 | 41 | ${SUDO_CMD} apt-get update && \ 42 | apt-get install -qy \ 43 | unzip \ 44 | curl \ 45 | python-pip \ 46 | python-setuptools \ 47 | --no-install-recommends && rm -r /var/lib/apt/lists/ 48 | 49 | python --version 50 | pip --version 51 | 52 | pip install --user --upgrade pip 53 | python -m pip install --user jupyter 54 | 55 | export PATH="${HOME}/.local/bin:${PATH}" 56 | echo "PATH=${PATH}" 57 | 58 | jupyter --version -------------------------------------------------------------------------------- /examples/JuPyteR/build-docker-image/push-jupyter-java-docker-image-to-hub.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | if [[ -z ${DOCKER_USER_NAME:-""} ]]; then 24 | echo "DOCKER_USER_NAME not defined as an environment variable, set to default value: neomatrix369" 25 | DOCKER_USER_NAME=neomatrix369 26 | fi 27 | 28 | findImage() { 29 | IMAGE_NAME=$1 30 | echo $(docker images ${IMAGE_NAME} -q | head -n1 || true) 31 | } 32 | 33 | pushImage() { 34 | language_id=$1 35 | IMAGE_NAME=${IMAGE_NAME:-jupyter-java} 36 | IMAGE_VERSION=$(cat ../version.txt) 37 | DOCKER_FULL_TAG_NAME="${DOCKER_USER_NAME}/${IMAGE_NAME}" 38 | 39 | IMAGE_FOUND="$(findImage ${DOCKER_FULL_TAG_NAME})" 40 | IS_FOUND="found" 41 | if [[ -z "${IMAGE_FOUND}" ]]; then 42 | IS_FOUND="not found" 43 | fi 44 | echo "Docker image '${DOCKER_USER_NAME}/${IMAGE_NAME}' is ${IS_FOUND} in the local repository" 45 | 46 | docker tag ${IMAGE_FOUND} ${DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} 47 | docker push ${DOCKER_FULL_TAG_NAME} 48 | } 49 | 50 | docker login --username=${DOCKER_USER_NAME} 51 | pushImage base java-base 52 | pushImage ${1:-java} -------------------------------------------------------------------------------- /examples/JuPyteR/build-docker-image/removeUnusedContainersAndImages.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | containersToRemove=$(docker ps --quiet --filter "status=exited") 24 | [ ! -z "${containersToRemove}" ] && \ 25 | echo "Remove any stopped container from the local registry" && \ 26 | docker rm ${containersToRemove} || true 27 | 28 | imagesToRemove=$(docker images --quiet --filter "dangling=true") 29 | [ ! -z "${imagesToRemove}" ] && \ 30 | echo "Remove any dangling images from the local registry" && \ 31 | docker rmi -f ${imagesToRemove} || true 32 | -------------------------------------------------------------------------------- /examples/JuPyteR/build-docker-image/runLocal.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | echo "JAVA_TOOL_OPTIONS=${JAVA_TOOL_OPTIONS:-}" 24 | echo "Unsetting JAVA_TOOL_OPTIONS" 25 | unset JAVA_TOOL_OPTIONS 26 | 27 | java --version 28 | 29 | export PATH="${HOME}/.local/bin:${PATH}" 30 | echo "PATH=${PATH}" 31 | echo "" 32 | 33 | jupyter kernelspec list 34 | echo "" 35 | 36 | jupyter notebook --ip=0.0.0.0 --no-browser --allow-root -------------------------------------------------------------------------------- /examples/JuPyteR/installDocker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | ### Installation docs: https://docs.docker.com/install/linux/docker-ce/ubuntu/ 24 | echo "Installing Docker, target OS: Ubuntu 16.04 or higher" 25 | 26 | SUDO_CMD="" 27 | if [[ -f "/etc/sudoers" ]]; then 28 | SUDO_CMD=sudo 29 | fi 30 | 31 | ${SUDO_CMD} apt-get remove docker docker-engine docker.io containerd runc || true 32 | 33 | ${SUDO_CMD} apt-get update 34 | 35 | ${SUDO_CMD} apt-get install -y \ 36 | apt-transport-https \ 37 | ca-certificates \ 38 | curl \ 39 | gnupg-agent \ 40 | software-properties-common 41 | 42 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | ${SUDO_CMD} apt-key add - 43 | 44 | ${SUDO_CMD} apt-key fingerprint 0EBFCD88 45 | 46 | ${SUDO_CMD} add-apt-repository \ 47 | "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ 48 | $(lsb_release -cs) \ 49 | stable" 50 | 51 | ${SUDO_CMD} apt-get update 52 | 53 | ${SUDO_CMD} apt-get install -y docker-ce docker-ce-cli containerd.io 54 | 55 | apt-cache madison docker-ce 56 | 57 | ${SUDO_CMD} groupadd docker || true 58 | ${SUDO_CMD} usermod -aG docker $USER 59 | newgrp docker 60 | 61 | docker run hello-world 62 | 63 | echo "Finished installing and testing Docker" -------------------------------------------------------------------------------- /examples/JuPyteR/version.txt: -------------------------------------------------------------------------------- 1 | 0.2 -------------------------------------------------------------------------------- /examples/apache-zeppelin/.gitignore: -------------------------------------------------------------------------------- 1 | logs/ -------------------------------------------------------------------------------- /examples/apache-zeppelin/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 Mani Sarkar 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /examples/apache-zeppelin/Zeppelin-Dockerfile: -------------------------------------------------------------------------------- 1 | ARG ZEPPELIN_VERSION 2 | ARG IMAGE_VERSION 3 | ARG GRAALVM_VERSION 4 | ARG GRAALVM_JDK_VERSION 5 | 6 | FROM oracle/graalvm-ce:${GRAALVM_VERSION}-${GRAALVM_JDK_VERSION} as graal-jdk-image 7 | 8 | FROM apache/zeppelin:${ZEPPELIN_VERSION:-0.8.0} 9 | 10 | ### Apache Spark installation 11 | # Workaround to "fix" https://issues.apache.org/jira/browse/ZEPPELIN-3586 12 | ARG SPARK_VERSION 13 | ENV SPARK_VERSION=${SPARK_VERSION:-2.4.3} 14 | 15 | LABEL maintainer="Mani Sarkar" 16 | LABEL example_git_repo="https://github.com/neomatrix369/awesome-ai-ml-dl/tree/master/examples/apache-zeppelin" 17 | LABEL graalvm_version=${GRAALVM_VERSION}-{GRAALVM_JDK_VERSION} 18 | LABEL version=${IMAGE_VERSION} 19 | 20 | 21 | RUN echo "$LOG_TAG Download Spark binary" && \ 22 | wget -O /tmp/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz 23 | 24 | RUN tar -zxvf /tmp/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz && \ 25 | rm -rf /tmp/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz && \ 26 | mv spark-${SPARK_VERSION}-bin-hadoop2.7 /spark-${SPARK_VERSION}-bin-hadoop2.7 27 | 28 | ENV SPARK_HOME=/spark-${SPARK_VERSION}-bin-hadoop2.7 29 | 30 | RUN rm -fr /usr/lib/jvm/java-1.8.0-openjdk-amd64 /usr/lib/jvm/java-8-openjdk-amd64 31 | 32 | ### GraalVM installation 33 | 34 | ENV GRAALVM_VERSION=${GRAALVM_VERSION:-20.3.0} 35 | 36 | COPY --from=graal-jdk-image /opt/graalvm-ce-* /usr/lib/jvm/graalvm 37 | 38 | ENV JAVA_HOME=/usr/lib/jvm/graalvm 39 | ENV PATH=$JAVA_HOME/bin:$PATH 40 | 41 | RUN java -version 42 | 43 | CMD ["bin/zeppelin.sh"] -------------------------------------------------------------------------------- /examples/apache-zeppelin/buildZeppelinDockerImage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -u 5 | set -o pipefail 6 | 7 | IMAGE_NAME=${IMAGE_NAME:-zeppelin} 8 | IMAGE_VERSION=${IMAGE_VERSION:-$(cat version.txt)} 9 | GRAALVM_VERSION=${GRAALVM_VERSION:-$(cat graalvm_version.txt)} 10 | GRAALVM_JDK_VERSION=${GRAALVM_JDK_VERSION:-$(cat graalvm_jdk_version.txt)} 11 | 12 | DOCKER_FULL_TAG_NAME="${DOCKER_USER_NAME}/${IMAGE_NAME}" 13 | 14 | if [[ ${IMAGE_VERSION} = "0.1" ]]; then 15 | time docker build \ 16 | -t ${DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} \ 17 | -f Zeppelin-Dockerfile . 18 | else 19 | time docker build \ 20 | -t ${DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} \ 21 | --build-arg ZEPPELIN_VERSION=0.8.1 \ 22 | --build-arg SPARK_VERSION=2.4.4 \ 23 | --build-arg IMAGE_VERSION=${IMAGE_VERSION} \ 24 | --build-arg GRAALVM_VERSION=${GRAALVM_VERSION} \ 25 | --build-arg GRAALVM_JDK_VERSION=${GRAALVM_JDK_VERSION} \ 26 | -f Zeppelin-Dockerfile . 27 | fi 28 | 29 | ./removeUnusedContainersAndImages.sh 30 | ./push-apache-zeppelin-docker-image-to-hub.sh -------------------------------------------------------------------------------- /examples/apache-zeppelin/graalvm_jdk_version.txt: -------------------------------------------------------------------------------- 1 | java11 -------------------------------------------------------------------------------- /examples/apache-zeppelin/graalvm_version.txt: -------------------------------------------------------------------------------- 1 | 20.3.0 -------------------------------------------------------------------------------- /examples/apache-zeppelin/installDocker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | ### Installation docs: https://docs.docker.com/install/linux/docker-ce/ubuntu/ 24 | echo "Installing Docker, target OS: Ubuntu 16.04 or higher" 25 | 26 | SUDO_CMD="" 27 | if [[ -f "/etc/sudoers" ]]; then 28 | SUDO_CMD=sudo 29 | fi 30 | 31 | ${SUDO_CMD} apt-get remove docker docker-engine docker.io containerd runc || true 32 | 33 | ${SUDO_CMD} apt-get update 34 | 35 | ${SUDO_CMD} apt-get install -y \ 36 | apt-transport-https \ 37 | ca-certificates \ 38 | curl \ 39 | gnupg-agent \ 40 | software-properties-common 41 | 42 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | ${SUDO_CMD} apt-key add - 43 | 44 | ${SUDO_CMD} apt-key fingerprint 0EBFCD88 45 | 46 | ${SUDO_CMD} add-apt-repository \ 47 | "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ 48 | $(lsb_release -cs) \ 49 | stable" 50 | 51 | ${SUDO_CMD} apt-get update 52 | 53 | ${SUDO_CMD} apt-get install -y docker-ce docker-ce-cli containerd.io 54 | 55 | apt-cache madison docker-ce 56 | 57 | ${SUDO_CMD} groupadd docker || true 58 | ${SUDO_CMD} usermod -aG docker $USER 59 | newgrp docker 60 | 61 | docker run hello-world 62 | 63 | echo "Finished installing and testing Docker" -------------------------------------------------------------------------------- /examples/apache-zeppelin/push-apache-zeppelin-docker-image-to-hub.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | if [[ -z ${DOCKER_USER_NAME:-""} ]]; then 24 | read -p "Docker username (must exist on Docker Hub): " DOCKER_USER_NAME 25 | fi 26 | 27 | IMAGE_NAME=${IMAGE_NAME:-zeppelin} 28 | IMAGE_VERSION=${IMAGE_VERSION:-$(cat version.txt)} 29 | DOCKER_FULL_TAG_NAME="${DOCKER_USER_NAME}/${IMAGE_NAME}" 30 | 31 | findImage() { 32 | IMAGE_NAME=$1 33 | echo $(docker images ${IMAGE_NAME} -q | head -n1 || true) 34 | } 35 | 36 | IMAGE_FOUND="$(findImage ${DOCKER_USER_NAME}/${IMAGE_NAME})" 37 | if [[ -z "${IMAGE_FOUND}" ]]; then 38 | echo "Docker image '${DOCKER_USER_NAME}/${IMAGE_NAME}' not found in the local repository" 39 | IMAGE_FOUND="$(findImage ${IMAGE_NAME})" 40 | if [[ -z "${IMAGE_FOUND}" ]]; then 41 | echo "Docker image '${IMAGE_NAME}' not found in the local repository" 42 | exit 1 43 | else 44 | echo "Docker image '${IMAGE_NAME}' found in the local repository" 45 | fi 46 | else 47 | echo "Docker image '${DOCKER_USER_NAME}/${IMAGE_NAME}' found in the local repository" 48 | fi 49 | 50 | docker tag ${IMAGE_FOUND} ${DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} 51 | docker login --username=${DOCKER_USER_NAME} 52 | docker push ${DOCKER_FULL_TAG_NAME} -------------------------------------------------------------------------------- /examples/apache-zeppelin/removeUnusedContainersAndImages.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | containersToRemove=$(docker ps --quiet --filter "status=exited") 24 | [ ! -z "${containersToRemove}" ] && \ 25 | echo "Remove any stopped container from the local registry" && \ 26 | docker rm ${containersToRemove} || true 27 | 28 | imagesToRemove=$(docker images --quiet --filter "dangling=true") 29 | [ ! -z "${imagesToRemove}" ] && \ 30 | echo "Remove any dangling images from the local registry" && \ 31 | docker rmi -f ${imagesToRemove} || true 32 | -------------------------------------------------------------------------------- /examples/apache-zeppelin/runZeppelinDockerContainer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | IMAGE_NAME=${IMAGE_NAME:-zeppelin} 24 | IMAGE_VERSION=${IMAGE_VERSION:-$(cat version.txt)} 25 | DOCKER_USER_NAME=${DOCKER_USER_NAME:-"neomatrix369"} 26 | DOCKER_FULL_TAG_NAME="${DOCKER_USER_NAME}/${IMAGE_NAME}" 27 | 28 | mkdir -p logs notebook 29 | chown ${USER}:${GROUP} logs 30 | chown ${USER}:${GROUP} notebook 31 | 32 | echo "Please wait till the log messages stop moving, it will be a sign that the service is ready! (about a minute or so)" 33 | echo "Once the service is ready, go to http://localhost:8080 to open the Apache Zeppelin homepage" 34 | time docker run --rm \ 35 | -it \ 36 | -p 8080:8080 \ 37 | -v ${PWD}/logs:/logs \ 38 | -v ${PWD}/notebook:/notebook \ 39 | -e ZEPPELIN_NOTEBOOK_DIR='/notebook' \ 40 | -e ZEPPELIN_LOG_DIR='/logs' \ 41 | ${DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} 42 | 43 | -------------------------------------------------------------------------------- /examples/apache-zeppelin/version.txt: -------------------------------------------------------------------------------- 1 | 0.2 -------------------------------------------------------------------------------- /examples/better-nlp/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__/ 3 | *.json 4 | *.dot -------------------------------------------------------------------------------- /examples/better-nlp/LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache 2.0 License 2 | 3 | Copyright (c) 2019 Mani Sarkar 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. -------------------------------------------------------------------------------- /examples/better-nlp/build/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7 2 | 3 | COPY install-linux.sh install-linux.sh 4 | COPY install-dependencies.sh install-dependencies.sh 5 | 6 | RUN ./install-linux.sh 7 | 8 | EXPOSE 8888 9 | 10 | ENV PATH=/root/.local/bin/:$PATH 11 | 12 | ENTRYPOINT ["jupyter-lab", "--ip=0.0.0.0", "--allow-root", "--no-browser"] -------------------------------------------------------------------------------- /examples/better-nlp/build/buildDockerImage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | if [[ -z ${DOCKER_USER_NAME:-""} ]]; then 24 | read -p "Docker username (must exist on Docker Hub): " DOCKER_USER_NAME 25 | fi 26 | 27 | IMAGE_NAME=${IMAGE_NAME:-better-nlp} 28 | IMAGE_VERSION=${IMAGE_VERSION:-$(cat ../version.txt)} 29 | BETTER_NLP_DOCKER_FULL_TAG_NAME="${DOCKER_USER_NAME}/${IMAGE_NAME}" 30 | 31 | time docker build -t ${BETTER_NLP_DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} . -------------------------------------------------------------------------------- /examples/better-nlp/build/install-dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | echo "Install components using python and pip" 24 | 25 | npm install -g npm 26 | 27 | # Quick review of program versions 28 | npm --version 29 | python --version 30 | pip --version 31 | 32 | # Install python packages for NLP 33 | python -m pip install spacy textacy "pytextrank>=2.0.1" nltk 34 | 35 | python -m pip install jupyterlab pandas matplotlib -------------------------------------------------------------------------------- /examples/better-nlp/build/install-linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | echo "Please check if you fulfill the requirements mentioned in the README file." 24 | 25 | # Install and update necessary Linux packages 26 | 27 | apt-get update && apt-get install -y --fix-missing \ 28 | wget curl liblapack-dev libswscale-dev pkg-config 29 | 30 | apt-get install -y --fix-missing zip vim 31 | 32 | echo "fs.inotify.max_user_watches=100000" > /etc/sysctl.conf 33 | 34 | # Install node and update npm 35 | curl --silent --location https://deb.nodesource.com/setup_8.x | \ 36 | bash - && apt-get install nodejs -y 37 | 38 | apt-get install -y libc-ares2 libnode64 libuv1 nodejs-doc 39 | apt-get install -y npm 40 | 41 | ./install-dependencies.sh -------------------------------------------------------------------------------- /examples/better-nlp/build/install-macos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | echo "Please check if you fulfill the requirements mentioned in the README file." 24 | 25 | # Install and update necessary MacOS packages 26 | 27 | brew update && brew install -y wget curl liblapack-dev libswscale-dev pkg-config 28 | 29 | brew install -y --fix-missing zip vim 30 | 31 | # Install node and update npm 32 | curl --silent --location https://deb.nodesource.com/setup_8.x | bash - && brew install -y nodejs 33 | 34 | ./install-dependencies.sh -------------------------------------------------------------------------------- /examples/better-nlp/build/push-better-nlp-docker-image-to-hub.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | if [[ -z ${DOCKER_USER_NAME:-""} ]]; then 24 | read -p "Docker username (must exist on Docker Hub): " DOCKER_USER_NAME 25 | fi 26 | 27 | IMAGE_NAME=${IMAGE_NAME:-better-nlp} 28 | IMAGE_VERSION=${IMAGE_VERSION:-$(cat ../version.txt)} 29 | BETTER_NLP_DOCKER_FULL_TAG_NAME="${DOCKER_USER_NAME}/${IMAGE_NAME}" 30 | 31 | findImage() { 32 | IMAGE_NAME=$1 33 | echo $(docker images ${IMAGE_NAME} -q | head -n1 || true) 34 | } 35 | 36 | IMAGE_FOUND="$(findImage ${DOCKER_USER_NAME}/${IMAGE_NAME})" 37 | if [[ -z "${IMAGE_FOUND}" ]]; then 38 | echo "Docker image '${DOCKER_USER_NAME}/${IMAGE_NAME}' not found in the local repository" 39 | IMAGE_FOUND="$(findImage ${IMAGE_NAME})" 40 | if [[ -z "${IMAGE_FOUND}" ]]; then 41 | echo "Docker image '${IMAGE_NAME}' not found in the local repository" 42 | exit 1 43 | else 44 | echo "Docker image '${IMAGE_NAME}' found in the local repository" 45 | fi 46 | else 47 | echo "Docker image '${DOCKER_USER_NAME}/${IMAGE_NAME}' found in the local repository" 48 | fi 49 | 50 | docker tag ${IMAGE_FOUND} ${BETTER_NLP_DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} 51 | docker login --username=${DOCKER_USER_NAME} 52 | docker push ${BETTER_NLP_DOCKER_FULL_TAG_NAME} -------------------------------------------------------------------------------- /examples/better-nlp/docs/Better-NLP-in-Jupyter-Notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/better-nlp/docs/Better-NLP-in-Jupyter-Notebook.png -------------------------------------------------------------------------------- /examples/better-nlp/docs/Docker-container-console-Jupyter-lab-loading.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/better-nlp/docs/Docker-container-console-Jupyter-lab-loading.png -------------------------------------------------------------------------------- /examples/better-nlp/docs/Docker_environment.md: -------------------------------------------------------------------------------- 1 | # Docker environment 2 | 3 | ### Setting your environment 4 | 5 | Ensure your environment has the below variable set, or set it in your `.bashrc` or `.bash_profile` or the relevant startup script: 6 | 7 | ```bash 8 | export DOCKER_USER_NAME="your_docker_username" 9 | ``` 10 | 11 | You must have an account on Docker hub under the above user name. 12 | 13 | 14 | ### Build a docker image with the necessary dependencies 15 | 16 | ```bash 17 | $ cd build 18 | $ ./buildDockerImage.sh 19 | or 20 | $ DOCKER_USER_NAME="your_docker_username" ./buildDockerImage.sh 21 | or 22 | $ IMAGE_VERSION="x.y.z" ./buildDockerImage.sh 23 | ``` 24 | 25 | 26 | ### Run the docker container to start running the programs 27 | 28 | ```bash 29 | $ cd build 30 | $ ./runDockerImage.sh 31 | or 32 | $ DOCKER_USER_NAME="your_docker_username" ./runDockerImage.sh 33 | or 34 | $ IMAGE_VERSION="x.y.z" ./runDockerImage.sh 35 | or run in Debug mode 36 | $ DEBUG="true" ./runDockerImage.sh 37 | ``` 38 | 39 | 40 | ### Push built Better NLP docker image to Docker hub 41 | 42 | ```bash 43 | $ cd build 44 | $ ./push-better-nlp-docker-image-to-hub.sh 45 | or 46 | $ DOCKER_USER_NAME="your_docker_username" ./push-better-nlp-docker-image-to-hub.sh 47 | or 48 | $ IMAGE_VERSION="x.y.z" ./push-better-nlp-docker-image-to-hub.sh 49 | ``` 50 | 51 | The above will prompt the docker login name and password, before it can push your image to Docker hub (you must have an account on Docker hub). 52 | 53 | 54 | ### Docker image on Docker Hub 55 | 56 | Find the [Better NLP Image on Docker Hub](https://hub.docker.com/r/neomatrix369/better-nlp). The `push-better-nlp-docker-image-to-hub.sh` script pushes the image to the Docker hub and the `runDockerImage.sh` script runs it from the local repository. If absent, in the the local repository, it downloads this image from Docker Hub. 57 | 58 | 59 | [Return to main page](../README.md) -------------------------------------------------------------------------------- /examples/better-nlp/docs/Jupyter_notebook.md: -------------------------------------------------------------------------------- 1 | # Jupyter Notebook 2 | 3 | ### Better NLP related 4 | 5 | See [better_nlp_spacy_texacy_examples.ipynb](../notebooks/jupyter/better_nlp_spacy_texacy_examples.ipynb) or [better_nlp_summarisers.ipynb](../notebooks/jupyter/better_nlp_summarisers.ipynb) to see the examples fleshed out in the notebook. A more efficient and easy way to work with - model needs to be loaded only once throughout the life-cycle of the kernel. 6 | 7 | The Jupyter lab instance is executed immediately on running the `runDockerImage.sh` script. Look for mesages like these in the docker container terminal: 8 | 9 | ![Docker-container-console-Jupyter-lab-loading.png](Docker-container-console-Jupyter-lab-loading.png) 10 | 11 | The above listed URL can be opened in the browser to access the notebook(s) in the current folder: 12 | 13 | ![Better-NLP-in-Jupyter-Notebook.png](Better-NLP-in-Jupyter-Notebook.png) 14 | 15 | ### NLP profiler 16 | 17 | See [nlp_profiler.ipynb](https://github.com/neomatrix369/nlp_profiler/blob/master/notebooks/jupyter/nlp_profiler.ipynb) 18 | 19 | ![](https://user-images.githubusercontent.com/1570917/88475059-706a2c00-cf24-11ea-8088-7516d3c4a159.png) ![](https://user-images.githubusercontent.com/1570917/88475060-73651c80-cf24-11ea-8c44-21352f7be5bc.png) 20 | 21 | Find the [NLP Profiler library here](https://github.com/neomatrix369/nlp_profiler/). 22 | 23 | [Return to main page](../README.md) -------------------------------------------------------------------------------- /examples/better-nlp/library/examples/extract-entities-from-text.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | 4 | from org.neomatrix369.better_nlp import BetterNLP 5 | 6 | betterNLP = BetterNLP() 7 | 8 | model = betterNLP.load_nlp_model() 9 | model = model["model"] 10 | 11 | print("~~~~~~~~ Started parsing...") 12 | 13 | # Can be any factual text or any text to experiment with 14 | generic_text = """Denis Guedj (1940 – April 24, 2010) was a French novelist and 15 | a professor of the History of Science at Paris VIII University. He was born 16 | in Setif. He spent many years devising courses and games to teach adults 17 | and children math. He is the author of Numbers: The Universal Language and 18 | of the novel The Parrot's Theorem. He died in Paris. 19 | """ 20 | 21 | extracted_entities = betterNLP.extract_entities(model, generic_text) 22 | extracted_entities = extracted_entities["extracted_entities"] 23 | 24 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 25 | betterNLP.pretty_print(extract_entities) 26 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 27 | 28 | print("\nToken types legend: ", betterNLP.token_entity_types()) 29 | 30 | print("\n") 31 | print("...Finished parsing ~~~~~~~\n") -------------------------------------------------------------------------------- /examples/better-nlp/library/examples/extract-noun-chunks-from-text.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | 4 | from org.neomatrix369.better_nlp import BetterNLP 5 | 6 | betterNLP = BetterNLP() 7 | 8 | model = betterNLP.load_nlp_model() 9 | model = model["model"] 10 | 11 | print("~~~~~ Started parsing...") 12 | 13 | # Can be any factual text or any text to experiment with 14 | generic_text = """Denis Guedj (1940 – April 24, 2010) was a French novelist and a professor of the History of Science 15 | at Paris VIII University. He was born in Setif. He spent many years devising courses and games to teach adults and children math. 16 | He is the author of Numbers: The Universal Language and of the novel The Parrot's Theorem. He died in Paris. """ 17 | 18 | chunks = betterNLP.extract_noun_chunks(model, generic_text) 19 | chunks = chunks["noun_chunks"] 20 | 21 | chunks = chunks.get("noun_chunks") 22 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 23 | betterNLP.pretty_print(chunks) 24 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 25 | print("\n") 26 | print("...Finished parsing ~~~~~~\n") -------------------------------------------------------------------------------- /examples/better-nlp/library/examples/gather-facts-from-text.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | 4 | from org.neomatrix369.better_nlp import BetterNLP 5 | 6 | print("~~~~~~~ Started parsing...") 7 | 8 | betterNLP = BetterNLP() 9 | 10 | model = betterNLP.load_nlp_model() 11 | model = model["model"] 12 | 13 | # Can be any factual text or any text to experiment with 14 | generic_text = """Denis Guedj (1940 – April 24, 2010) was a French novelist and a professor of the History of Science at Paris VIII University. He was born in Setif. He spent many years devising courses and games to teach adults and children math. He is the author of Numbers: The Universal Language and of the novel The Parrot's Theorem. He died in Paris. """ 15 | 16 | target_topic = "Denis Guedj" 17 | extracted_facts = betterNLP.extract_facts(model, generic_text, target_topic) 18 | extracted_facts = extracted_facts["facts"] 19 | 20 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 21 | print("Trying to gather details about " + target_topic) 22 | 23 | number_of_facts_found = 0 24 | for each_fact_statement in extracted_facts: 25 | number_of_facts_found =+ 1 26 | subject, verb, fact = each_fact_statement 27 | print(f" - {fact}") 28 | 29 | if number_of_facts_found == 0: 30 | print("There were no facts on " + target_topic) 31 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 32 | 33 | print("\n") 34 | print("...Finished parsing ~~~~~~~\n") -------------------------------------------------------------------------------- /examples/better-nlp/library/examples/obfuscate-privacy-details-in-the-text.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | 4 | from org.neomatrix369.better_nlp import BetterNLP 5 | 6 | print("~~~~~~ Started parsing...") 7 | 8 | betterNLP = BetterNLP() 9 | 10 | model = betterNLP.load_nlp_model() 11 | model = model["model"] 12 | 13 | # Can be any factual text or any text to experiment with 14 | generic_text = """Denis Guedj (1940 – April 24, 2010) was a French novelist and 15 | a professor of the History of Science at Paris VIII University. He was born 16 | in Setif. He spent many years devising courses and games to teach adults 17 | and children math. He is the author of Numbers: The Universal Language and 18 | of the novel The Parrot's Theorem. He died in Paris. 19 | """ 20 | 21 | obfuscated_text = betterNLP.obfuscate_text(model, generic_text) 22 | obfuscated_text = obfuscated_text["obfuscated_text"] 23 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 24 | print("Obfuscated generic text: ", "".join(obfuscated_text)) 25 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 26 | print("\n") 27 | print("...Finished parsing ~~~~~~~\n") -------------------------------------------------------------------------------- /examples/better-nlp/library/org/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/better-nlp/library/org/__init__.py -------------------------------------------------------------------------------- /examples/better-nlp/library/org/neomatrix369/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/better-nlp/library/org/neomatrix369/__init__.py -------------------------------------------------------------------------------- /examples/better-nlp/library/org/neomatrix369/nlp_profiler.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Mani Sarkar 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ### Kaggle Utility script: https://www.kaggle.com/neomatrix369/nlp-profiler-class 16 | ### Kaggle kernel: https://www.kaggle.com/neomatrix369/nlp-profiler-simple-dataset 17 | ### Library: https://github.com/neomatrix369/nlp_profiler 18 | ### Jupyter Notebook: https://github.com/neomatrix369/nlp_profiler/blob/master/notebooks/jupyter/nlp_profiler.ipynb 19 | 20 | def apply_text_profiling(dataframe, text_column, params={}): 21 | message = """ 22 | Moved to a new location, install and use from https://github.com/neomatrix369/nlp_profiler, 23 | read the README.md to learn more and also see https://github.com/neomatrix369/nlp_profiler/blob/master/notebooks/jupyter/nlp_profiler.ipynb. 24 | """ 25 | raise Exception(message) -------------------------------------------------------------------------------- /examples/better-nlp/notebooks/jupyter/nlp_profiler-granular.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Moved to a new location, see https://github.com/neomatrix369/nlp_profiler/blob/master/notebooks/jupyter/nlp_profiler-granular.ipynb" 8 | ] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "Python 3", 14 | "language": "python", 15 | "name": "python3" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.7.2" 28 | } 29 | }, 30 | "nbformat": 4, 31 | "nbformat_minor": 4 32 | } -------------------------------------------------------------------------------- /examples/better-nlp/notebooks/jupyter/nlp_profiler.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Moved to a new location, see https://github.com/neomatrix369/nlp_profiler/blob/master/notebooks/jupyter/nlp_profiler.ipynb" 8 | ] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "Python 3", 14 | "language": "python", 15 | "name": "python3" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.7.2" 28 | } 29 | }, 30 | "nbformat": 4, 31 | "nbformat_minor": 4 32 | } -------------------------------------------------------------------------------- /examples/better-nlp/presentations/09-Mar-2019/Better-NLP-Presentation-Slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/better-nlp/presentations/09-Mar-2019/Better-NLP-Presentation-Slides.pdf -------------------------------------------------------------------------------- /examples/better-nlp/presentations/29-Jun-2019/Better-NLP-2.0-one-library-rules-them-all-Presentation-Slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/better-nlp/presentations/29-Jun-2019/Better-NLP-2.0-one-library-rules-them-all-Presentation-Slides.pdf -------------------------------------------------------------------------------- /examples/better-nlp/runDockerImage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | DOCKER_USER_NAME=${DOCKER_USER_NAME:-"neomatrix369"} 24 | 25 | IMAGE_NAME=${IMAGE_NAME:-better-nlp} 26 | IMAGE_VERSION=${IMAGE_VERSION:-$(cat version.txt)} 27 | BETTER_NLP_DOCKER_FULL_TAG_NAME="${DOCKER_USER_NAME}/${IMAGE_NAME}" 28 | 29 | WORKDIR=better-nlp 30 | 31 | if [[ "${DEBUG:-}" = "true" ]]; then 32 | docker run --rm \ 33 | --interactive --tty \ 34 | --volume $(pwd):/better-nlp \ 35 | -p 8888:8888 \ 36 | --workdir /${WORKDIR} \ 37 | --entrypoint /bin/bash \ 38 | ${BETTER_NLP_DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} 39 | else 40 | time docker run --rm \ 41 | --interactive --tty \ 42 | --volume $(pwd):/better-nlp \ 43 | -p 8888:8888 \ 44 | --workdir /${WORKDIR} \ 45 | ${BETTER_NLP_DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} 46 | fi -------------------------------------------------------------------------------- /examples/better-nlp/version.txt: -------------------------------------------------------------------------------- 1 | 0.3 -------------------------------------------------------------------------------- /examples/cloud-devops-infra/valohai/MLPMnist/.gitignore: -------------------------------------------------------------------------------- 1 | .gradle 2 | .idea 3 | *.iml 4 | bin 5 | build 6 | target 7 | local.properties 8 | logs/ 9 | out 10 | *.jar 11 | .deeplearning4j/ 12 | *.pb 13 | checkpoint*.* 14 | *.tgz 15 | *.tar.gz 16 | dataset-scripts/artifacts/ 17 | .DS_Store -------------------------------------------------------------------------------- /examples/cloud-devops-infra/valohai/MLPMnist/README.md: -------------------------------------------------------------------------------- 1 | # mlpmnist-dl4j-example project [![MLPMNist using DL4J](https://img.shields.io/docker/pulls/neomatrix369/dl4j-mnist-single-layer.svg)](https://hub.docker.com/r/neomatrix369/dl4j-mnist-single-layer) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 2 | 3 | This area in the repo is a result of the blog post [How to do Deep Learning for Java](https://medium.com/@neomatrix369/how-to-do-deep-learning-for-java-on-the-valohai-platform-eec8ba9f71d8) | [Original post](https://blog.valohai.com/how-to-do-deep-learning-for-java-on-the-valohai-platform). Please refer to the post before considering using this repo to understand better on how to use the different aspects of it. 4 | 5 | ### Please find the working project at [Valohai's GitHub](https://github.com/valohai/) org, see repo [mlpmnist-dl4j-example project on GitHub](https://github.com/valohai/mlpmnist-dl4j-example#mlpmnist-dl4j-example-mlpmnist-single-layer-) 6 | 7 | 8 | --- 9 | 10 | Back to [main page (table of contents)](../../../../README.md#awesome-ai-ml-dl-) -------------------------------------------------------------------------------- /examples/cloud-devops-infra/valohai/nlp-cuda/README.md: -------------------------------------------------------------------------------- 1 | # DL4J NLP examples [![NLP using DL4J (cuda)](https://img.shields.io/docker/pulls/neomatrix369/dl4j-nlp-cuda.svg)](https://hub.docker.com/r/neomatrix369/dl4j-nlp-cuda) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 2 | 3 | --- 4 | 5 | This area in the repo is a result of the blog post [Applying NLP in Java, all from the command-line](https://medium.com/@neomatrix369/applying-nlp-in-java-all-from-the-command-line-1225dd591e80) | [Original post](https://blog.valohai.com/nlp_with_dl4j_in_java_all_from_the_command-line?from=3oxenia9mtr6). Please refer to the post before considering using this repo to understand better on how to use the different aspects of it. 6 | 7 | ### Please find the working project at [Valohai's GitHub](https://github.com/valohai/) org, see repo [dl4j-nlp-cuda-example project on GitHub](https://github.com/valohai/dl4j-nlp-cuda-example) 8 | 9 | --- 10 | 11 | Back to [main page (table of contents)](../../../../README.md#awesome-ai-ml-dl-) -------------------------------------------------------------------------------- /examples/cloud-devops-infra/wandb/sb-fx-competition/README.md: -------------------------------------------------------------------------------- 1 | # Tracking SB FX Competition experiments with Weights & Biases [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 2 | 3 | [Carlo Lepelaars](https://www.github.com/carlolepelaars) and [Mani Sarkar](https://github.com/neomatrix369) competed in the [SoftBank Forex Algorithm Challenge](https://medium.com/bitgrit-data-science-publication/bitgrit-and-datagateway-partner-to-host-an-ai-competition-for-softbank-64c704efadba) organised by [Bitgrit](https://twitter.com/@bitgrit_global) on behalf of [Softbank Corp.](https://www.softbank.jp/en/corp/), please find a sample notebook (in this folder) to see how we used [Weights and Bias](https://wandb.com) to record our parameters while running experiments from within a Jupyter notebook. 4 | 5 | - [Jupyter notebook with implementation](./Method-02-Linear-Model.ipynb) 6 | - [HTML output of the same notebook](./Method-02-Linear-Model.html) 7 | 8 | The steps are pretty simple and you can see from the output results that it does work. 9 | 10 | **Note:** we haven't provided a `train.csv` and `test.csv`, if you use your own, they should contain among other fields, fields like `id` and `target` in them. 11 | 12 | Please support the shoutouts we shared, post the competition completion: 13 | - [LinkedIn](https://www.linkedin.com/feed/update/urn:li:activity:6620259937497030656/) ([originally posted](https://www.linkedin.com/feed/update/urn:li:activity:6618158169258508288/) by [Carlo Lepelaars](https://www.linkedin.com/in/ACoAAA607SEB4I_HlFIcUpd-RcAdMUtUp6SoPS8/)) 14 | - [Twitter](https://twitter.com/theNeomatrix369/status/1214601525856747520) ([originally posted](https://twitter.com/carlolepelaars/status/1212391518037786624) by [Carlo Lepelaars](https://www.twitter.com/carlolepelaars)) 15 | 16 | **2255 participants competed over a period of two months, we joined towards the end of the competition (the last two weeks). And our submissions were _[ranked fifth among the others on the Private leaderboard](https://pbs.twimg.com/media/ENNHp_BXkAIFswu.jpg)_. There were occassional blips on the Public leaderboards with our initial submissions.** 17 | 18 | Find out more [about W&B](../../../../data/about-Weights-and-Biases.md#weights--biases) at this resource. 19 | 20 | --- 21 | 22 | Back to [main page (table of contents)](../../../../README.md#awesome-ai-ml-dl-) 23 | -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/.bashrc: -------------------------------------------------------------------------------- 1 | export PATH="${JAVA_HOME}/bin:${PATH}" -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/.dockerignore: -------------------------------------------------------------------------------- 1 | shared 2 | .cache -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 Mani Sarkar 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/build-on-the-cloud/create-project.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -u 5 | set -o pipefail 6 | 7 | PROJECT_NAME=${1:-} 8 | 9 | if [[ -z "${PROJECT_NAME}" ]]; then 10 | echo "Please specify the project name. Exiting..." 11 | echo " Usage: " 12 | echo " $0 [project name]" 13 | exit -1 14 | fi 15 | 16 | vh --valohai-token ${VALOHAI_TOKEN} \ 17 | project create \ 18 | -n ${PROJECT_NAME} --yes -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/build-on-the-cloud/exec-step.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -u 5 | set -o pipefail 6 | 7 | STEPNAME=${1:-} 8 | 9 | if [[ -z "${STEPNAME}" ]]; then 10 | echo "Please specify step name. Exiting..." 11 | echo " Usage: " 12 | echo " $0 [stepname]" 13 | exit -1 14 | fi 15 | echo "Executing step ${STEPNAME}" 16 | 17 | vh exec run ${STEPNAME} \ 18 | --adhoc -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/build-on-the-cloud/show-final-result.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -u 5 | set -o pipefail 6 | 7 | TMPFILE=$(mktemp) 8 | EXEC_COUNTER=${1:-latest} 9 | echo "Gathering output from counter ${EXEC_COUNTER}" 10 | vh exec logs ${EXEC_COUNTER} --stdout > ${TMPFILE} 11 | 12 | grep -A20 'Started*' ${TMPFILE} || \ 13 | (true && \ 14 | echo "Nothing found, maybe it failed, maybe its still running." && \ 15 | echo "Use the './watch-execution.sh ${EXEC_COUNTER}' command to find out.") 16 | 17 | rm -fr ${TMPFILE} -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/build-on-the-cloud/watch-execution.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -u 5 | set -o pipefail 6 | 7 | EXEC_COUNTER=${1:-latest} 8 | echo "Watching counter ${EXEC_COUNTER}" 9 | # Use 'timeout 5' as prefix, if you wish your script 10 | # to time out after watching for 5 seconds 11 | vh exec watch ${EXEC_COUNTER} 12 | echo "Stopped watching counter ${EXEC_COUNTER}" 13 | echo "Re-run to continue to watch or run the './show-final-result.sh ${EXEC_COUNTER}' to see the final outcome" -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/docs/build-the-grakn-docker-container.md: -------------------------------------------------------------------------------- 1 | ## Build the Grakn docker container 2 | 3 | See [Setting your environment](../README.md#setting-your-environment) before proceeding 4 | 5 | ```bash 6 | $ ./buildDockerImage.sh 7 | or 8 | $ DOCKER_USER_NAME="your_docker_username" ./buildDockerImage.sh 9 | or 10 | $ GRAKN_VERSION="x.y.z" ./buildDockerImage.sh 11 | ``` 12 | 13 | **Push built Grakn docker image to Docker hub:** 14 | 15 | See [Setting your environment](../README.md#setting-your-environment) before proceeding 16 | 17 | ```bash 18 | $ ./push-grakn-docker-image-to-hub.sh 19 | or 20 | $ DOCKER_USER_NAME="your_docker_username" ./push-grakn-docker-image-to-hub.sh 21 | or 22 | $ GRAKN_VERSION="x.y.z" ./push-grakn-docker-image-to-hub.sh 23 | ``` 24 | 25 | The above will prompt the docker login name and password, before it can push your image to Docker hub (you must have an account on Docker hub). 26 | 27 | --- 28 | 29 | [back to README](./../README.md) -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/docs/building-grakn.md: -------------------------------------------------------------------------------- 1 | ## Building Grakn 2 | 3 | Run the Grakn docker container: 4 | 5 | ``` 6 | $ ./grakn-runner.sh --debug --runContainer 7 | ``` 8 | 9 | When the docker prompt appears, continue doing one of the actions below. 10 | 11 | #### Building uberjar inside the container 12 | 13 | It's done from inside the container, or the host machine or even on the cloud. 14 | 15 | ``` 16 | grakn@c74ed490582e:~$ ./builder.sh --buildUberJar 17 | ``` 18 | This process can take a bit of time as bazel builds our uberjar. 19 | 20 | #### Building native-image inside the container 21 | 22 | ## Building native-image inside the container 23 | 24 | It's done in two steps and can be run from inside the container, or the host machine or even on the cloud. 25 | 26 | **Extract META-INF from the jar before proceeding with the build process** 27 | ``` 28 | grakn@c74ed490582e:~$ ./builder.sh --grakn-home [/path/to/grakn/home] --extract 29 | ``` 30 | 31 | **Building `native-image` using the extracted META-INF of the jar and the jar file** 32 | ``` 33 | grakn@c74ed490582e:~$ ./builder.sh --jarfile [/path/with/filename.jar] --buildNativeImage 34 | or 35 | grakn@c74ed490582e:~$ ./builder.sh --grakn-home [/path/to/grakn/home] --buildNativeImage 36 | ``` 37 | 38 | This process can take a bit of time as the `native-image` building process is a lengthy one. 39 | 40 | --- 41 | 42 | [back to README](./../README.md) -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/docs/grakn-docker-container.md: -------------------------------------------------------------------------------- 1 | ### Run the Grakn docker container 2 | 3 | ```bash 4 | $ ./grakn-runner.sh --runContainer 5 | 6 | or 7 | 8 | $ ./grakn-runner.sh --dockerUserName "your_docker_username" --runContainer 9 | 10 | or 11 | 12 | $ GRAKN_VERSION="x.y.z" ./grakn-runner.sh --runContainer 13 | 14 | or in debug mode 15 | 16 | $ ./grakn-runner.sh --debug --runContainer 17 | 18 | or run Grakn server only (not run the Console: Graql) 19 | 20 | $ ./grakn-runner.sh --run-grakn-only --runContainer 21 | 22 | or run in GraalVM mode 23 | 24 | $ ./grakn-runner.sh --jdk GRAALVM --runContainer 25 | 26 | or run by switching off JVMCI flag (default: on) 27 | 28 | $ ./grakn-runner.sh --jdk GRAALVM --javaopts "-XX:-UseJVMCINativeLibrary" JDK_TO_USE="GRAALVM" --runContainer 29 | ``` 30 | 31 | ### Run the scripts in the Grakn docker container 32 | 33 | ``` 34 | $ ./grakn-runner.sh --debug --runContainer 35 | $ startGraknAndGraql.sh 36 | 37 | Exiting the Graql Console takes you into the Docker container prompt. Also another way to run Grakn server but not use the Graql console prompt. 38 | 39 | or 40 | 41 | $ RUN_GRAKN_ONLY=true startGraknAndGraql.sh 42 | ``` 43 | 44 | 45 | --- 46 | 47 | [back to README](./../README.md) -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/docs/run-the-performance-script.md: -------------------------------------------------------------------------------- 1 | ### Run the performance script in the Grakn docker container 2 | 3 | #### Automatically 4 | 5 | ```bash 6 | Run the performance benchmarking script with default JDK 7 | 8 | $ RUN_PERFORMANCE_SCRIPT=true ./runGraknInDocker.sh 9 | 10 | or 11 | 12 | Run the performance benchmarking script with GraalVM 13 | 14 | $ JDK_TO_USE="GRAALVM" RUN_PERFORMANCE_SCRIPT=true ./runGraknInDocker.sh 15 | ``` 16 | 17 | #### Manually 18 | 19 | ```bash 20 | Run the performance benchmarking script with default JDK 21 | 22 | $ DEBUG=true ./runGraknInDocker.sh 23 | grakn@040eb5bd829c:~$ ./runPerformanceBenchmark.sh # inside the container 24 | 25 | or 26 | 27 | Run the performance benchmarking script with GraalVM 28 | 29 | $ JDK_TO_USE="GRAALVM" DEBUG=true ./runGraknInDocker.sh 30 | grakn@040eb5bd829c:~$ ./runPerformanceBenchmark.sh # inside the container 31 | ``` 32 | 33 | See [successful run console](successful-run-console.md) - includes both outputs from the traditional JDK8 and GraalVM executions. In debug mode, the docker container prompt is returned, the Grakn and Graql instances are not executed. Please check out the history of [successful run console](successful-run-console.md) to see progress with previous runs under various versions of Grakn, GraalVM and other configuration settings. 34 | 35 | 36 | --- 37 | 38 | [back to README](./../README.md) -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/graalvm_jdk_version.txt: -------------------------------------------------------------------------------- 1 | java8 -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/graalvm_version.txt: -------------------------------------------------------------------------------- 1 | 20.1.0 -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/grakn-graalvm-version-matrix.txt: -------------------------------------------------------------------------------- 1 | GRAALVM_VERSION="19.0.0" GRAKN_VERSION=1.4.3 2 | GRAALVM_VERSION="19.1.0" GRAKN_VERSION=1.4.3 3 | GRAALVM_VERSION="19.2.0" GRAKN_VERSION=1.4.3 4 | GRAALVM_VERSION="19.3.0" GRAALVM_JDK_VERSION=java8 GRAKN_VERSION=1.4.3 5 | GRAALVM_VERSION="19.0.0" GRAKN_VERSION=1.5.2 6 | GRAALVM_VERSION="19.1.0" GRAKN_VERSION=1.5.2 7 | GRAALVM_VERSION="19.2.0" GRAKN_VERSION=1.5.2 8 | GRAALVM_VERSION="19.3.0" GRAALVM_JDK_VERSION=java8 GRAKN_VERSION=1.5.2 9 | GRAALVM_VERSION="19.0.0" GRAKN_VERSION=1.5.7 10 | GRAALVM_VERSION="19.1.0" GRAKN_VERSION=1.5.7 11 | GRAALVM_VERSION="19.2.0" GRAKN_VERSION=1.5.7 12 | GRAALVM_VERSION="19.3.0" GRAALVM_JDK_VERSION=java8 GRAKN_VERSION=1.5.7 13 | GRAALVM_VERSION="19.0.0" GRAKN_VERSION=1.6.0 14 | GRAALVM_VERSION="19.1.0" GRAKN_VERSION=1.6.0 15 | GRAALVM_VERSION="19.2.0" GRAKN_VERSION=1.6.0 16 | GRAALVM_VERSION="19.3.0" GRAALVM_JDK_VERSION=java8 GRAKN_VERSION=1.6.0 17 | GRAALVM_VERSION="19.0.0" GRAKN_VERSION=1.6.2 18 | GRAALVM_VERSION="19.1.0" GRAKN_VERSION=1.6.2 19 | GRAALVM_VERSION="19.2.0" GRAKN_VERSION=1.6.2 20 | GRAALVM_VERSION="19.3.0" GRAALVM_JDK_VERSION=java8 GRAKN_VERSION=1.6.2 21 | GRAALVM_VERSION="20.1.0" GRAALVM_JDK_VERSION=java8 GRAKN_VERSION=1.6.2 -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/grakn_version.txt: -------------------------------------------------------------------------------- 1 | 1.6.2 -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/graql/extract_keywords.py: -------------------------------------------------------------------------------- 1 | print('Loading nltk libraries, please wait...') 2 | from nltk.corpus import stopwords 3 | from nltk.tokenize import word_tokenize 4 | print('Finished loading nltk libraries.') 5 | 6 | import importlib 7 | queries=importlib.import_module("english-graql-queries") 8 | 9 | main_queries_in_english = queries.main_queries_in_english 10 | 11 | stop_words = set(stopwords.words('english')) 12 | 13 | punctuations = '''!()[]{};:'"\,<>./?@#$%^&*_~+“”'''; ### excluding - (hypen / dash) 14 | new_schema_queries=[] 15 | for each_query in main_queries_in_english: 16 | new_query = each_query 17 | for each_char in each_query: 18 | if each_char in punctuations: 19 | new_query = new_query.replace(each_char, ""); 20 | new_schema_queries.append(new_query) 21 | 22 | main_queries_in_english=new_schema_queries 23 | 24 | print(f'~~~ Tokenising schema queries (queries: {len(main_queries_in_english)})') 25 | for each_query in main_queries_in_english: 26 | query_tokens = word_tokenize(each_query) 27 | query_without_stop_words = [] 28 | for query_token in query_tokens: 29 | if (not query_token in stop_words) and (not query_token.isnumeric()): 30 | query_without_stop_words.append(query_token) 31 | print(f'{each_query}: {query_without_stop_words}') 32 | -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/graql/pattern-matching-analysis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | from fuzzywuzzy import fuzz 5 | import pandas as pd 6 | 7 | import importlib 8 | queries=importlib.import_module("english-graql-queries") 9 | main_queries_in_english = queries.main_queries_in_english 10 | 11 | print('Iterating through schema queries') 12 | comparison_results = [] 13 | for each_query in main_queries_in_english: 14 | print(f'Question/command: {each_query}') 15 | 16 | for each_similarity in main_queries_in_english[each_query]: 17 | ratio = fuzz.ratio(each_query, each_similarity) 18 | 19 | partial_ratio = fuzz.partial_ratio(each_query, each_similarity) 20 | token_sort_ratio = fuzz.token_sort_ratio(each_query, each_similarity) 21 | comparison_results.append([each_query, each_similarity, ratio, partial_ratio, token_sort_ratio]) 22 | 23 | print('Publishing results') 24 | results = pd.DataFrame(comparison_results, columns = ['each_query', 'each_similarity', 'ratio', 'partial_ratio', 'token_sort_ratio']) 25 | print(results) 26 | print() 27 | print(results.describe()) 28 | print() 29 | ratio_results = results.sort_values(by = 'ratio', ascending = False) 30 | print(ratio_results) 31 | print() 32 | transposed_results = ratio_results.drop('each_query', axis = 1).transpose() 33 | print(transposed_results) 34 | results_partial_ratio = ratio_results.sort_values(by = 'partial_ratio', ascending = False) 35 | transposed_results = results_partial_ratio.drop('each_query', axis = 1).transpose() 36 | print(transposed_results) 37 | print() 38 | token_sort_ratio = ratio_results.sort_values(by = 'token_sort_ratio', ascending = False) 39 | transposed_results = token_sort_ratio.drop('each_query', axis = 1).transpose() 40 | print(transposed_results) 41 | -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/graql/requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools 2 | fuzzywuzzy 3 | ijson==2.3 4 | pandas<1.0 5 | grakn-client==1.6.0 6 | pytictoc 7 | colorama -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/graql/run-python-in-docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -u 5 | set -o pipefail 6 | 7 | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 8 | echo "Running python contain, mapping to current folder" 9 | WORKDIR=/home/python 10 | 11 | echo ""; echo "Run the below command once you are in the container" 12 | echo " $ pip3 install -r requirements.txt"; echo "" 13 | echo "Use python3 or pip3 to run any pythong or pip commands" 14 | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"; echo "" 15 | 16 | PREVIOUS_TO_PREVIOUS_DIR=$(cd ../.. && echo $(pwd)) 17 | 18 | set -x 19 | docker run --rm \ 20 | -it \ 21 | --volume $(pwd):${WORKDIR} \ 22 | --workdir ${WORKDIR} \ 23 | --network="host" \ 24 | --entrypoint="/bin/bash" \ 25 | neomatrix369/datascience:0.1 26 | 27 | set +x -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/graql/schema-relationship-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/data/databases/graph/grakn/graql/schema-relationship-graph.png -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/graql/workbase-front-screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/data/databases/graph/grakn/graql/workbase-front-screen.png -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/mergeJson.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -u 5 | set -o pipefail 6 | 7 | python -c ' 8 | 9 | import json 10 | 11 | import glob 12 | 13 | 14 | 15 | json_files = glob.glob("*.json") 16 | 17 | output = [json.load(open(file)) for file in json_files] 18 | 19 | json.dump(output, open("report.json", "w"), indent=4) 20 | 21 | ' -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/presentations/GraknCosmos2020/Naturally,-getting-productive,-my-journey-with-Grakn-and-Graql.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/data/databases/graph/grakn/presentations/GraknCosmos2020/Naturally,-getting-productive,-my-journey-with-Grakn-and-Graql.pdf -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/presentations/README.md: -------------------------------------------------------------------------------- 1 | ## Naturally, getting productive, my journey with Grakn and Graql 2 | 3 | ### Slides 4 | 5 | See [slides (PDF)](./GraknCosmos2020/Naturally,-getting-productive,-my-journey-with-Grakn-and-Graql.pdf) 6 | 7 | ### Video 8 | 9 | - [YouTube video](https://www.youtube.com/watch?v=Cef2nPEmybs&list=PLUz6BqeCy21SXbOTMV5uRs5buGoYaW-Qu&index=2&t=0s) 10 | 11 | ### Speakers 12 | 13 | - [Mani Sarkar](http://github.com/neomatrix369) 14 | 15 | ### Abstract 16 | 17 | See [Sessionize page](https://sessionize.com/s/mani-sarkar/naturally_getting_productive_with_g/25601) 18 | -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/pull-docker-images-from-hub.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | source common.sh 24 | 25 | GRAALVM_VERSIONS="19.0.0 19.1.0 19.2.0 19.3.0" 26 | GRAALVM_JDK_VERSIONS="java8" # java11 is not supported by Grakn 27 | GRAKN_VERSIONS="1.4.3 1.5.2 1.5.7 1.6.0 1.6.2" 28 | 29 | DOCKER_USER_NAME="${DOCKER_USER_NAME:-neomatrix369}" 30 | IMAGE_NAME=${IMAGE_NAME:-grakn} 31 | FULL_DOCKER_REPO_NAME="${DOCKER_USER_NAME}/${IMAGE_NAME}" ### Using this field as Repository name of the image (using Docker terms) 32 | 33 | for GRAKN_VERSION in ${GRAKN_VERSIONS[@]} 34 | do 35 | for GRAALVM_VERSION in ${GRAALVM_VERSIONS[@]} 36 | do 37 | if [[ "$(isVersionGreaterThanOrEqualTo "${GRAALVM_VERSION}" "19.3.0")" = "true" ]]; then 38 | for GRAALVM_JDK_VERSION in ${GRAALVM_JDK_VERSIONS[@]} 39 | do 40 | set -x 41 | IMAGE_VERSION=${GRAKN_VERSION}-GRAALVM-CE-${GRAALVM_JDK_VERSION}-${GRAALVM_VERSION} 42 | docker pull ${FULL_DOCKER_REPO_NAME}:${IMAGE_VERSION} || true 43 | set +x 44 | done 45 | else 46 | set -x 47 | IMAGE_VERSION=${GRAKN_VERSION}-GRAALVM-CE-${GRAALVM_VERSION} ### Using this field as Tag name of the image (using Docker terms) 48 | docker pull ${FULL_DOCKER_REPO_NAME}:${IMAGE_VERSION} || true 49 | set +x 50 | fi 51 | done 52 | done -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/startGraknAndGraql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | GRAKN_VERSION=${GRAKN_VERSION:-$(cat grakn_version.txt)} 24 | 25 | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 26 | 27 | echo "JAVA_HOME=${JAVA_HOME}" 28 | export PATH="${JAVA_HOME}/bin:${PATH}" 29 | echo "PATH=${PATH}" 30 | java -version 31 | 32 | echo -n "Grakn version: (see bottom of the startup text banner)" 33 | echo "" 34 | 35 | (env | grep _JAVAOPTS) || true 36 | 37 | echo "" 38 | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 39 | time ${GRAKN_HOME}/grakn server start --benchmark 40 | echo "^^^^^^^^^^^^^^^^^ Time taken for the Grakn server to startup" 41 | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 42 | echo "Grakn server is running..." 43 | 44 | if [[ "${RUN_GRAKN_ONLY:-}" = "true" ]]; then 45 | echo "Not running Graql console" 46 | /bin/bash 47 | else 48 | echo "Starting Graql console..." 49 | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 50 | time ${GRAKN_HOME}/grakn console 51 | /bin/bash 52 | fi -------------------------------------------------------------------------------- /examples/data/databases/graph/grakn/valohai.yaml: -------------------------------------------------------------------------------- 1 | - step: 2 | name: run-grakn-benchmark 3 | image: neomatrix369/grakn:1.6.2-GRAALVM-CE-19.2.1 4 | environment-variables: 5 | - name: SOURCE_ROOT_DATA_FOLDER 6 | default: "/valohai/inputs/data" 7 | - name: TARGET_ROOT_DATA_FOLDER 8 | default: "/valohai/outputs" 9 | command: 10 | - cd {parameter-value:stage} 11 | - echo "~~~ Contents of the Valohai inputs folder"; ls -lash ${VALOHAI_INPUTS} 12 | - ./runPerformanceBenchmark.sh 13 | - echo "~~~ Contents of the Valohai output folder"; ls -lash ${VALOHAI_OUTPUTS} 14 | environment: aws-eu-west-1-c5-4xlarge -------------------------------------------------------------------------------- /examples/data/dataiku/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG GRAALVM_VERSION 2 | ARG GRAALVM_JDK_VERSION 3 | ARG DSS_VERSION 4 | 5 | FROM oracle/graalvm-ce:${GRAALVM_VERSION}-${GRAALVM_JDK_VERSION} as graal-jdk-image 6 | 7 | FROM dataiku/dss:${DSS_VERSION} as dataikuDss 8 | 9 | FROM buildpack-deps:stretch-scm as base 10 | 11 | COPY --from=dataikuDss / / 12 | 13 | ARG IMAGE_VERSION 14 | 15 | LABEL maintainer="Mani Sarkar" 16 | LABEL example_git_repo="https://github.com/neomatrix369/awesome-ai-ml-dl/tree/master/examples/data/dataiku" 17 | LABEL graalvm_version=${GRAALVM_VERSION}-{GRAALVM_JDK_VERSION} 18 | LABEL version=${IMAGE_VERSION} 19 | 20 | 21 | # Install Java 8 to 11 22 | RUN echo "JAVA_HOME=${JAVA_HOME}" 23 | RUN echo "PATH=${PATH}" 24 | 25 | RUN java -version 26 | 27 | ### GraalVM installation 28 | ARG GRAALVM_VERSION 29 | ENV GRAALVM_VERSION="${GRAALVM_VERSION:-20.3.0}" 30 | 31 | COPY --from=graal-jdk-image /opt/graalvm-ce-* /usr/lib/jvm/graalvm-ce 32 | 33 | ENV GRAALVM_HOME="/usr/lib/jvm/graalvm-ce" 34 | RUN echo "GRAALVM_HOME=${GRAALVM_HOME}" 35 | RUN ${GRAALVM_HOME}/bin/java -version 36 | RUN update-alternatives --install /usr/bin/java java ${GRAALVM_HOME}/bin/java 1 37 | 38 | EXPOSE 10000 39 | 40 | ARG WORKDIR 41 | WORKDIR ${WORKDIR} 42 | 43 | COPY runDSS.sh runDSS.sh 44 | 45 | ARG USER 46 | RUN chown -R ${USER}:${USER} ${WORKDIR} 47 | 48 | USER ${USER} 49 | 50 | ENTRYPOINT ["bash", "-c", "./runDSS.sh"] -------------------------------------------------------------------------------- /examples/data/dataiku/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 Mani Sarkar 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /examples/data/dataiku/buildDockerImage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019, 2020 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | DSS_VERSION=${DSS_VERSION:-5.1.4} 24 | GRAALVM_VERSION=${GRAALVM_VERSION:-$(cat graalvm_version.txt)} 25 | GRAALVM_JDK_VERSION=${GRAALVM_JDK_VERSION:-$(cat graalvm_jdk_version.txt)} 26 | USER=dataiku 27 | 28 | if [[ -z ${DOCKER_USER_NAME:-""} ]]; then 29 | read -p "Docker username (must exist on Docker Hub): " DOCKER_USER_NAME 30 | fi 31 | 32 | IMAGE_NAME=${IMAGE_NAME:-dataiku-dss} 33 | IMAGE_VERSION=${IMAGE_VERSION:-${DSS_VERSION}} 34 | DSS_DOCKER_FULL_TAG_NAME="${DOCKER_USER_NAME}/${IMAGE_NAME}" 35 | 36 | echo "* Fetching docker image ${DSS_DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} from Docker Hub" 37 | time docker pull ${DSS_DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} || true 38 | time docker build -t ${DSS_DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} \ 39 | --build-arg DSS_VERSION=${DSS_VERSION} \ 40 | --build-arg USER=${USER} \ 41 | --build-arg WORKDIR=/home/${USER} \ 42 | --build-arg GRAALVM_VERSION=${GRAALVM_VERSION} \ 43 | --build-arg GRAALVM_JDK_VERSION=${GRAALVM_JDK_VERSION} \ 44 | . -------------------------------------------------------------------------------- /examples/data/dataiku/dataiku-dss-page-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/data/dataiku/dataiku-dss-page-01.png -------------------------------------------------------------------------------- /examples/data/dataiku/dataiku-dss-page-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/data/dataiku/dataiku-dss-page-02.png -------------------------------------------------------------------------------- /examples/data/dataiku/dataiku-dss-page-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/data/dataiku/dataiku-dss-page-03.png -------------------------------------------------------------------------------- /examples/data/dataiku/graalvm_jdk_version.txt: -------------------------------------------------------------------------------- 1 | java11 -------------------------------------------------------------------------------- /examples/data/dataiku/graalvm_version.txt: -------------------------------------------------------------------------------- 1 | 20.3.0 -------------------------------------------------------------------------------- /examples/data/dataiku/push-dss-docker-image-to-hub.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019, 2020 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | DSS_VERSION=${DSS_VERSION:-1.5.7} 24 | 25 | if [[ -z ${DOCKER_USER_NAME:-""} ]]; then 26 | read -p "Docker username (must exist on Docker Hub): " DOCKER_USER_NAME 27 | fi 28 | 29 | IMAGE_NAME=${IMAGE_NAME:-dataiku-dss} 30 | IMAGE_VERSION=${IMAGE_VERSION:-${DSS_VERSION}} 31 | DSS_DOCKER_FULL_TAG_NAME="${DOCKER_USER_NAME}/${IMAGE_NAME}" 32 | 33 | findImage() { 34 | IMAGE_NAME=$1 35 | echo $(docker images ${IMAGE_NAME} -q | head -n1 || true) 36 | } 37 | 38 | IMAGE_FOUND="$(findImage ${DOCKER_USER_NAME}/${IMAGE_NAME})" 39 | if [[ -z "${IMAGE_FOUND}" ]]; then 40 | echo "Docker image '${DOCKER_USER_NAME}/${IMAGE_NAME}' not found in the local repository" 41 | IMAGE_FOUND="$(findImage ${IMAGE_NAME})" 42 | if [[ -z "${IMAGE_FOUND}" ]]; then 43 | echo "Docker image '${IMAGE_NAME}' not found in the local repository" 44 | exit 1 45 | else 46 | echo "Docker image '${IMAGE_NAME}' found in the local repository" 47 | fi 48 | else 49 | echo "Docker image '${DOCKER_USER_NAME}/${IMAGE_NAME}' found in the local repository" 50 | fi 51 | 52 | docker tag ${IMAGE_FOUND} ${DSS_DOCKER_FULL_TAG_NAME}:${IMAGE_VERSION} 53 | docker login --username=${DOCKER_USER_NAME} 54 | docker push ${DSS_DOCKER_FULL_TAG_NAME} -------------------------------------------------------------------------------- /examples/data/dataiku/removeUnusedContainersAndImages.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Copyright 2019, 2020 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | containersToRemove=$(docker ps --quiet --filter "status=exited") 24 | [ ! -z "${containersToRemove}" ] && \ 25 | echo "Remove any stopped container from the local registry" && \ 26 | docker rm ${containersToRemove} || true 27 | 28 | imagesToRemove=$(docker images --quiet --filter "dangling=true") 29 | [ ! -z "${imagesToRemove}" ] && \ 30 | echo "Remove any dangling images from the local registry" && \ 31 | docker rmi -f ${imagesToRemove} || true 32 | -------------------------------------------------------------------------------- /examples/data/dataiku/runDSS.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019, 2020 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | if [[ "${JDK_TO_USE}" = "GRAALVM" ]]; then 24 | GRAALVM_HOME="/usr/lib/jvm/graalvm-ce-${GRAALVM_VERSION}" 25 | JAVA_HOME=${GRAALVM_HOME} 26 | echo "JAVA_HOME=${JAVA_HOME}" 27 | PATH=${GRAALVM_HOME}/bin:${PATH} 28 | echo "PATH=${PATH}" 29 | 30 | java -version 31 | fi 32 | 33 | echo "Current working directory: $(pwd)" 34 | echo "DSS_VERSION=${DSS_VERSION}" 35 | ./run.sh -------------------------------------------------------------------------------- /examples/deepnetts/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .gradle 3 | build 4 | deepnetts*-with-dependencies* 5 | META-INF 6 | *.iprof 7 | logs/ -------------------------------------------------------------------------------- /examples/deepnetts/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guidelines 2 | 3 | At the moment there isn't a lot written or spoken about this subject but nevertheless we would like consider links that are useful for the wider community, so we think the below would help: 4 | 5 | (a) widely recommended, regardless of personal opinion 6 | 7 | (b) discussed in the community (mentioned via social media) 8 | 9 | (c) a good starting point for the subject or related subjects (contains examples) 10 | 11 | (d) implementations that would be a good example for everyone to follow and be inspired by 12 | 13 | 14 | ## Pull Requests 15 | 16 | There are two required criteria for a pull request: 17 | 18 | 1. If an entry has a similar scope as other entries in the same category, the description must state the unique features that distinguishes it from the other entries. 19 | 20 | 2. If an entry does not meet conditions *(a)* to *(d)* there has to be an explanation either in the description or the pull request why it should be added to the list. 21 | 22 | Self-promotion is not encouraged, but your suggestion will of course be approved if the criteria match. 23 | 24 | Furthermore, please ensure your pull request follows the following guidelines: 25 | 26 | * Please search previous suggestions before making a new one, as yours may be a duplicate. 27 | * Please make an individual pull request for each suggestion. 28 | * Use the following format for libraries: \[RESOURCE\]\(LINK\) - DESCRIPTION. 29 | * Entries should be sorted in ascending alphabetical order, i.e. a to z. 30 | * New categories or improvements to the existing categorization are welcome. 31 | * Keep descriptions short, simple and unbiased. 32 | * Check your spelling and grammar. 33 | * Make sure your text editor is set to remove trailing whitespace. 34 | 35 | Thank you for your suggestions! 36 | -------------------------------------------------------------------------------- /examples/deepnetts/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019, 2020, 2021 Mani Sarkar 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /examples/deepnetts/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | */ 4 | 5 | plugins { 6 | // uberjar/shadowJar config 7 | id 'com.github.johnrengelman.shadow' version '6.1.0' 8 | 9 | id 'application' 10 | id 'java' 11 | id 'maven-publish' 12 | } 13 | 14 | repositories { 15 | mavenLocal() 16 | mavenCentral() 17 | maven { 18 | url = uri('https://repo.jenkins-ci.org/public/') 19 | } 20 | 21 | maven { 22 | url = uri('http://repo.maven.apache.org/maven2') 23 | } 24 | } 25 | 26 | // ------------------------------------------------------------ 27 | // Gradle 6.5 28 | // ------------------------------------------------------------ 29 | 30 | // Build time: 2020-06-02 20:46:21 UTC 31 | // Revision: a27f41e4ae5e8a41ab9b19f8dd6d86d7b384dad4 32 | 33 | // Kotlin: 1.3.72 34 | // Groovy: 2.5.11 35 | // Ant: Apache Ant(TM) version 1.10.7 compiled on September 1 2019 36 | 37 | dependencies { 38 | implementation('com.deepnetts:deepnetts-core:1.12@pom') {} 39 | } 40 | 41 | group = 'org.neomatrix369.deepnetts' 42 | version = '1.0' 43 | description = 'deepnetts-machine' 44 | sourceCompatibility = '11' 45 | mainClassName = group + '.DeepNettsMachine' 46 | 47 | jar { 48 | manifest { 49 | attributes 'Implementation-Title': 'DeepNetts Machine', 50 | 'Implementation-Version': version, 51 | 'Built-By': System.getProperty('user.name'), 52 | 'Built-Date': new Date(), 53 | 'Built-JDK': System.getProperty('java.version'), 54 | 'Main-Class': '${mainClassName}' 55 | } 56 | from { 57 | configurations.compile.collect { it.isDirectory() ? it : zipTree(it) } 58 | } 59 | } 60 | 61 | // uberjar/shadowJar config 62 | shadowJar { 63 | archiveClassifier.set('with-dependencies') 64 | } 65 | 66 | publishing { 67 | publications { 68 | maven(MavenPublication) { 69 | from(components.java) 70 | } 71 | } 72 | } 73 | 74 | tasks.withType(JavaCompile) { 75 | options.encoding = 'UTF-8' 76 | } -------------------------------------------------------------------------------- /examples/deepnetts/deployments/oci/.gitignore: -------------------------------------------------------------------------------- 1 | .terraform 2 | *.rc 3 | *.tfstate* 4 | -------------------------------------------------------------------------------- /examples/deepnetts/deployments/oci/credentials.rc_template: -------------------------------------------------------------------------------- 1 | ## Terraform env variables 2 | 3 | # Read about the concepts 4 | # https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm#five 5 | 6 | # Refer to these links before find out 7 | # https://cloud.oracle.com/tenancy (Look for Tenancy Information) 8 | export TF_VAR_tenancy_ocid=[TENANCY_OCID] 9 | 10 | # Go to https://cloud.oracle.com/identity/compartments/ocid1.compartment.oc1..[some-hash] to find out 11 | export TF_VAR_compartment_ocid=[COMPARMENT_OCID] 12 | 13 | # Go to https://cloud.oracle.com/identity/users/ocid1.user.oc1..[some-hash] to find out 14 | export TF_VAR_user_ocid=[USER_OCID] 15 | 16 | # https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm#two 17 | # https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm#four 18 | # https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm#five (look for How to Upload the Public Key) 19 | # OCI Web Interface: Governance > Identity > API Keys > Add API Key > [select one of the options] 20 | export TF_VAR_fingerprint=[FINGERPRINT] 21 | export TF_VAR_private_key_path=[PATH_TO_YOUR_ACCOUNT_PRIVATE_KEY eg: ~/.oci/key.pem] 22 | 23 | # Refer to https://cloud.oracle.com/regions/infrastructure to look for the Region Identifier that is relevant to your account 24 | export TF_VAR_region=[REGION NAME eg: uk-london-1] 25 | 26 | ## ssh keys that will be used for remote access authenication 27 | export TF_VAR_ssh_public_key="$(cat [PATH_TO_YOUR_SSH_PUBLIC_KEY])" 28 | 29 | ## We won't be assigning the private_key contents into an environment variable but pass it as an argument via the CLI 30 | echo 'Pass -var "ssh_private_key=$(cat [PATH_TO_YOUR_SSH_PRIVATE_KEY])" when running the "terraform apply" or "terraform destory" commands' -------------------------------------------------------------------------------- /examples/deepnetts/deployments/oci/get-instance-public-ip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | set -o pipefail 5 | 6 | terraform output instance_public_ips \ 7 | | grep "\." | head -n 1 | tr -d '" ,' -------------------------------------------------------------------------------- /examples/deepnetts/deployments/oci/get-notebook-url.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | set -o pipefail 5 | 6 | INSTANCE_PUBLIC_IP="$(get-instance-public-ip.sh)" 7 | 8 | NOTEBOOK_URL="" 9 | 10 | while [[ true ]]; 11 | do 12 | NOTEBOOK_URL=$(ssh opc@${INSTANCE_PUBLIC_IP} \ 13 | 'docker logs $(docker ps -q)' | \ 14 | grep -v "NotebookApp" | \ 15 | grep '127.0\.0\.1' | \ 16 | awk '{print $2}' | \ 17 | sed 's/127\.0\.0\.1/'${INSTANCE_PUBLIC_IP}'/g') 18 | if [[ "${NOTEBOOK_URL}" == "" ]]; then 19 | sleep 2 20 | else 21 | break 22 | fi 23 | done 24 | 25 | echo ${NOTEBOOK_URL} -------------------------------------------------------------------------------- /examples/deepnetts/deployments/oci/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Create a docker user and add the opc user to it 4 | # This will allow us to call docker commands without sudo 5 | groupadd docker 6 | usermod -aG docker opc 7 | 8 | # Install docker 9 | yum install -y docker-engine 10 | systemctl start docker 11 | systemctl enable docker -------------------------------------------------------------------------------- /examples/deepnetts/deployments/oci/open-notebook-in-browser.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | set -o pipefail 5 | 6 | getOpenCommand() { 7 | if [[ "$(uname)" = "Linux" ]]; then 8 | echo "xdg-open" 9 | elif [[ "$(uname)" = "Darwin" ]]; then 10 | echo "open" 11 | fi 12 | } 13 | 14 | OPEN_COMMAND=$(getOpenCommand) 15 | NOTEBOOK_URL=$(get-notebook-url.sh) 16 | set -x 17 | "${OPEN_COMMAND}" "${NOTEBOOK_URL}" 18 | set +x -------------------------------------------------------------------------------- /examples/deepnetts/deployments/oci/provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo yum install -y git 4 | 5 | # Open ports for the services to communicate 6 | sudo firewall-cmd --zone=public --permanent --add-port=8080-8085/tcp 7 | sudo firewall-cmd --zone=public --permanent --add-port=8443/tcp 8 | sudo firewall-cmd --reload 9 | 10 | git clone https://github.com/neomatrix369/awesome-ai-ml-dl 11 | cd awesome-ai-ml-dl/examples/deepnetts 12 | ./docker-runner.sh --pullImageFromHub 13 | 14 | echo "|-----------------------------------------------------------------------------------------------------|" 15 | echo "| |" 16 | echo "| ssh onto the OCI box followed by the below command: |" 17 | echo "| |" 18 | echo "| $ ./run-docker-container-in-the-cloud.sh |" 19 | echo "| |" 20 | echo "|-----------------------------------------------------------------------------------------------------|" -------------------------------------------------------------------------------- /examples/deepnetts/deployments/oci/run-docker-container-in-the-cloud.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | set -o pipefail 5 | 6 | INSTANCE_PUBLIC_IP="$(get-instance-public-ip.sh)" 7 | 8 | echo "Public IP address of the cloud instance running is ${INSTANCE_PUBLIC_IP}" 9 | 10 | exit_code=0 11 | ssh opc@${INSTANCE_PUBLIC_IP} \ 12 | 'cd awesome-ai-ml-dl/examples/deepnetts; ./docker-runner.sh --notebookMode --doNotOpenNotebook --runContainer' \ 13 | || exit_code=$? && true 14 | 15 | if [[ ${exit_code} -eq 0 ]]; then 16 | echo "" 17 | echo "Finished loading up the docker container in the cloud instance." 18 | echo "" 19 | else 20 | echo "" 21 | echo "Failed trying to run the docker container, its possible it has already been executed, check the error logs above." 22 | echo "Or try to ssh into the container and investigate." 23 | echo "" 24 | fi 25 | 26 | echo "If the container has run successfully (without any errors in the console), then you will be able to do one of these:" 27 | echo "" 28 | echo " $ ./open-notebook-in-browser.sh" 29 | echo "" 30 | echo "or, manually go to the browser at" 31 | echo "" 32 | echo " $(get-notebook-url.sh)" 33 | echo "" 34 | echo "To be able to open up the notebook in the browser using one of the above means." -------------------------------------------------------------------------------- /examples/deepnetts/docker-image/.bashrc: -------------------------------------------------------------------------------- 1 | export OLD_PATH=${PATH} 2 | 3 | function switchTo11 { 4 | export JAVA_HOME=${JAVA_11_HOME} 5 | export JDK_HOME=${JAVA_HOME} 6 | echo "Switched to ${JAVA_HOME}" 1>&2 7 | export PATH="${JAVA_HOME}/bin:${OLD_PATH:-}" 8 | java -version 9 | } 10 | 11 | function switchToGraal { 12 | export JAVA_HOME=${GRAALVM_HOME} 13 | export JDK_HOME=${JAVA_HOME} 14 | echo "Switched to ${JAVA_HOME}" 1>&2 15 | export PATH="${JAVA_HOME}/bin:${OLD_PATH:-}" 16 | java -version 17 | } 18 | 19 | if [[ "${JDK_TO_USE:-}" = "GRAALVM" ]]; then 20 | switchToGraal 21 | JAVA_HOME=${GRAALVM_HOME} 22 | else 23 | switchTo11 24 | JAVA_HOME=${JAVA_11_HOME} 25 | fi 26 | 27 | echo "PATH=${PATH}" 1>&2 28 | echo "JAVA_HOME=${JAVA_HOME}" 1>&2 -------------------------------------------------------------------------------- /examples/deepnetts/docker-image/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG GRAALVM_VERSION 2 | ARG GRAALVM_JDK_VERSION 3 | 4 | FROM findepi/graalvm:${GRAALVM_VERSION}-${GRAALVM_JDK_VERSION} as graal-jdk-image 5 | FROM jupyter/datascience-notebook as base-image 6 | 7 | ### Install JDK 11 from AdoptOpenJDK images 8 | COPY --from=adoptopenjdk/openjdk11 /opt/java /opt/java 9 | 10 | ### Install GraalVM for Java 11 11 | COPY --from=graal-jdk-image /graalvm* /opt/java/graalvm 12 | 13 | ### Install curl needed for rest of the tasks 14 | USER root 15 | RUN apt-get update && apt-get install -qy curl 16 | 17 | ARG WORKDIR 18 | WORKDIR ${WORKDIR} 19 | 20 | ARG IMAGE_VERSION 21 | ARG DEEPNETTS_VERSION 22 | 23 | LABEL maintainer="Mani Sarkar" 24 | LABEL example_git_repo="https://github.com/neomatrix369/awesome-ai-ml-dl/tree/master/examples/deepnetts" 25 | LABEL deepnetts_version=${DEEPNETTS_VERSION} 26 | LABEL graalvm_version=${GRAALVM_VERSION}-{GRAALVM_JDK_VERSION} 27 | LABEL version=${IMAGE_VERSION} 28 | 29 | ### Java setup 30 | ARG JAVA_11_HOME 31 | 32 | ARG GRAALVM_HOME 33 | ENV GRAALVM_HOME=${GRAALVM_HOME} 34 | ENV JAVA_HOME=${GRAALVM_HOME} 35 | ENV PATH=${JAVA_HOME}/bin:${PATH} 36 | 37 | ### Test Java (Traditional and GraalVM) 38 | RUN ${JAVA_11_HOME}/bin/java -version 39 | 40 | RUN ${GRAALVM_HOME}/bin/java -version 41 | 42 | ### Install packages 43 | RUN apt-get update && apt-get install -qy unzip git vim libgomp1 glibc-* 44 | 45 | RUN unzip -version 46 | RUN git --version 47 | RUN vim --version 48 | 49 | ### Install the Jupyter Java Kernel 50 | RUN wget https://github.com/neomatrix369/awesome-ai-ml-dl/releases/download/v0.1/ijava-1.3.0.zip 51 | COPY install-java-kernel.sh install-java-kernel.sh 52 | RUN ./install-java-kernel.sh --installJar 53 | 54 | ### Common functions 55 | COPY common.sh common.sh 56 | 57 | ### Install DeepNetts 58 | RUN export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${JAVA_HOME}/lib/server/:${JAVA_HOME}/lib/amd64/server/ 59 | COPY install-deepnetts.sh install-deepnetts.sh 60 | RUN ./install-deepnetts.sh 61 | 62 | ### Setup user 63 | COPY .bashrc .bashrc_custom 64 | RUN cat .bashrc_custom >> /home/jovyan/.bashrc 65 | RUN rm .bashrc_custom 66 | 67 | ARG GROUP 68 | RUN cd ${WORKDIR} && chown -R jovyan:${GROUP} . -------------------------------------------------------------------------------- /examples/deepnetts/docker-image/common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019, 2020, 2021 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | gitClone() { 24 | REPO_URL=$1 25 | BRANCH=${2:-master} 26 | REPO_FOLDER=$(echo "${REPO_URL}" | awk '{split($0,a,"/"); print a[5]}') 27 | if [ -e "${REPO_FOLDER}" ]; then 28 | echo "${REPO_FOLDER} already exists, aborting process, remove folder manually to perform a fresh download/update" 29 | else 30 | git clone --depth=1 --branch ${BRANCH} ${REPO_URL} 31 | fi 32 | } 33 | 34 | downloadArtifact() { 35 | URL=$1 36 | ARTIFACT=${2} 37 | ARTIFACT_FOLDER=${3} 38 | 39 | if [ -e "${ARTIFACT_FOLDER}" ]; then 40 | echo "${ARTIFACT_FOLDER} already exists, aborting process, remove folder manually to perform a fresh download/update" 41 | else 42 | if [[ -e "${ARTIFACT}" ]]; then 43 | echo "${ARTIFACT} already exists, skipping to next step..." 44 | else 45 | curl -O -L -J "${URL}" 46 | fi 47 | 48 | if [[ -z "$(echo ${ARTIFACT} | grep zip)" ]]; then 49 | if [[ -z "$(echo ${ARTIFACT} | grep 'tar.gz|tgz')" ]]; then 50 | tar -xvzf ${ARTIFACT} 51 | else 52 | echo 'File format unrecognised, aborting...' 53 | exit -1 54 | fi 55 | else 56 | unzip -u ${ARTIFACT} 57 | fi 58 | 59 | rm -f ${ARTIFACT} 60 | fi 61 | } -------------------------------------------------------------------------------- /examples/deepnetts/docker-image/deepnetts_version.txt: -------------------------------------------------------------------------------- 1 | 1.11 -------------------------------------------------------------------------------- /examples/deepnetts/docker-image/graalvm_jdk_version.txt: -------------------------------------------------------------------------------- 1 | java11 -------------------------------------------------------------------------------- /examples/deepnetts/docker-image/graalvm_version.txt: -------------------------------------------------------------------------------- 1 | 20.3.0 -------------------------------------------------------------------------------- /examples/deepnetts/docker-image/install-deepnetts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019, 2020, 2021 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | source common.sh 24 | 25 | gitClone https://github.com/neomatrix369/deepnetts-communityedition "add-examples-as-tutorials" 26 | cd deepnetts-communityedition 27 | 28 | echo "Downloading Maven wrapper artifacts" 29 | curl -sL https://github.com/shyiko/mvnw/releases/download/0.1.0/mvnw.tar.gz | tar xvz 30 | 31 | # Maven version can be changed with 32 | (MAVEN_VERSION=3.6.3 && 33 | sed -iEe "s/[0-9]\+[.][0-9]\+[.][0-9]\+/${MAVEN_VERSION}/g" .mvn/wrapper/maven-wrapper.properties) 34 | 35 | ## Ideally we could have just downloaded the jars from maven central 36 | ## But we need the below, and it's not clear if the Maven central Jars 37 | ## have dependencies with them: 38 | # 39 | # "deepnetts-core-1.11.jar" 40 | # or 41 | # "deepnetts-core-1.12.jar" 42 | # 43 | ## and other dependencies needed for the notebooks in the 'notebooks' folder. 44 | 45 | echo "Building DeepNetts using Maven" 46 | set -x 47 | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 48 | echo "Skipping test failures - not ideal, but to gain some speed" 49 | ./mvnw install package -Dmaven.compiler.source=11 -Dmaven.compiler.target=11 50 | 51 | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 52 | set +x 53 | 54 | DEEPNETTS_VERSION="1.11" 55 | 56 | echo "Copying the necessary jars into the notebooks folder" 57 | 58 | cp deepnetts-examples/target/deepnetts-examples-${DEEPNETTS_VERSION}.jar notebooks 59 | 60 | cd .. -------------------------------------------------------------------------------- /examples/deepnetts/docker-image/version.txt: -------------------------------------------------------------------------------- 1 | 0.1 -------------------------------------------------------------------------------- /examples/deepnetts/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/deepnetts/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /examples/deepnetts/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.5-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /examples/deepnetts/settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | */ 4 | 5 | rootProject.name = 'deepnetts-machine' 6 | -------------------------------------------------------------------------------- /examples/deepnetts/src/main/java/org/neomatrix369/deepnetts/DeepNettsMachine.java: -------------------------------------------------------------------------------- 1 | package org.neomatrix369.deepnetts; 2 | 3 | import java.util.Arrays; 4 | import org.neomatrix369.deepnetts.ClassificationExample.*; 5 | import org.neomatrix369.deepnetts.RegressionExample.*; 6 | 7 | public class DeepNettsMachine 8 | { 9 | public static void main( String[] args ) throws Exception 10 | { 11 | System.out.println("~ Running DeepNetts Machine"); 12 | System.out.println("CLI Params: " + Arrays.toString(args)); 13 | if ((args.length > 0) && (args[0].toLowerCase().contains("regression"))) { 14 | new RegressionExample().run(); 15 | } else { 16 | new ClassificationExample().run(); 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /examples/deepnetts/src/main/java/org/neomatrix369/deepnetts/utils/ExampleDataSets.java: -------------------------------------------------------------------------------- 1 | package deepnetts.examples.util; 2 | 3 | import deepnetts.data.TabularDataSet; 4 | 5 | import deepnetts.data.DataSets; 6 | import java.io.File; 7 | import java.io.IOException; 8 | import javax.visrec.ml.data.DataSet; 9 | import deepnetts.data.ImageSet; 10 | import deepnetts.data.MLDataItem; 11 | import deepnetts.data.TabularDataSet; 12 | 13 | /** 14 | * TODO: add breast cancer, mnist and other UCI stuff 15 | * @author Zoran 16 | */ 17 | public class ExampleDataSets { 18 | 19 | public static TabularDataSet iris() throws IOException { 20 | // TODO: apply some normalization here, as a param? 21 | return (TabularDataSet) DataSets.readCsv("datasets/iris_data_normalised.txt", 4, 3); 22 | } 23 | 24 | public static TabularDataSet xor() { 25 | TabularDataSet dataSet = new TabularDataSet(2, 1); 26 | 27 | MLDataItem item1 = new TabularDataSet.Item(new float[] {0, 0}, new float[] {0}); 28 | dataSet.add(item1); 29 | 30 | MLDataItem item2 = new TabularDataSet.Item(new float[] {0, 1}, new float[] {1}); 31 | dataSet.add(item2); 32 | 33 | MLDataItem item3 = new TabularDataSet.Item(new float[] {1, 0}, new float[] {1}); 34 | dataSet.add(item3); 35 | 36 | MLDataItem item4 = new TabularDataSet.Item(new float[] {1, 1}, new float[] {0}); 37 | dataSet.add(item4); 38 | 39 | return dataSet; 40 | } 41 | 42 | 43 | public static ImageSet mnist() { 44 | String labelsFile = "D:\\datasets\\mnist\\train\\labels.txt"; 45 | String trainingFile = "D:\\datasets\\mnist\\train\\train.txt"; // 1000 cifara - probaj sa 10 00 46 | 47 | ImageSet imageSet = new ImageSet(28, 28); 48 | imageSet.setInvertImages(true); 49 | imageSet.loadLabels(new File(labelsFile)); 50 | imageSet.loadImages(new File(trainingFile), 1000); 51 | 52 | return imageSet; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /examples/ensembler/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .gradle 3 | build 4 | deepnetts*-with-dependencies* 5 | META-INF 6 | *.iprof 7 | logs/ -------------------------------------------------------------------------------- /examples/ensembler/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019, 2020, 2021 Mani Sarkar 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /examples/ensembler/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | */ 4 | 5 | plugins { 6 | // uberjar/shadowJar config 7 | id 'com.github.johnrengelman.shadow' version '6.1.0' 8 | 9 | id 'application' 10 | id 'java' 11 | id 'maven-publish' 12 | } 13 | 14 | repositories { 15 | mavenLocal() 16 | mavenCentral() 17 | maven { 18 | url = uri('https://repo.jenkins-ci.org/public/') 19 | } 20 | 21 | maven { 22 | url = uri('http://repo.maven.apache.org/maven2') 23 | } 24 | } 25 | 26 | // ------------------------------------------------------------ 27 | // Gradle 6.5 28 | // ------------------------------------------------------------ 29 | 30 | // Build time: 2020-06-02 20:46:21 UTC 31 | // Revision: a27f41e4ae5e8a41ab9b19f8dd6d86d7b384dad4 32 | 33 | // Kotlin: 1.3.72 34 | // Groovy: 2.5.11 35 | // Ant: Apache Ant(TM) version 1.10.7 compiled on September 1 2019 36 | 37 | dependencies { 38 | implementation('com.deepnetts:deepnetts-core:1.12@pom') { 39 | transitive = true // for Groovy 40 | // isTransitive = true // for Kotlin 41 | } 42 | } 43 | 44 | group = 'org.neomatrix369.deepnetts' 45 | version = '1.0' 46 | description = 'deepnetts-machine' 47 | sourceCompatibility = '11' 48 | mainClassName = group + '.DeepNettsMachine' 49 | 50 | jar { 51 | manifest { 52 | attributes 'Implementation-Title': 'DeepNetts Machine', 53 | 'Implementation-Version': version, 54 | 'Built-By': System.getProperty('user.name'), 55 | 'Built-Date': new Date(), 56 | 'Built-JDK': System.getProperty('java.version'), 57 | 'Main-Class': '${mainClassName}' 58 | } 59 | from { 60 | configurations.compile.collect { it.isDirectory() ? it : zipTree(it) } 61 | } 62 | } 63 | 64 | // uberjar/shadowJar config 65 | shadowJar { 66 | archiveClassifier.set('with-dependencies') 67 | } 68 | 69 | publishing { 70 | publications { 71 | maven(MavenPublication) { 72 | from(components.java) 73 | } 74 | } 75 | } 76 | 77 | tasks.withType(JavaCompile) { 78 | options.encoding = 'UTF-8' 79 | } -------------------------------------------------------------------------------- /examples/ensembler/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/ensembler/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /examples/ensembler/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.5-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /examples/ensembler/settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | */ 4 | 5 | rootProject.name = 'deepnetts-machine' 6 | -------------------------------------------------------------------------------- /examples/ensembler/src/main/java/org/neomatrix369/ensembler/EnsemblerMachine.java: -------------------------------------------------------------------------------- 1 | package org.neomatrix369.ensembler; 2 | 3 | import java.util.Arrays; 4 | import deepnetts.examples.util.CsvFile; 5 | import org.neomatrix369.ensembler.RegressionTribuoExample.*; 6 | import org.neomatrix369.ensembler.RegressionDeepNettsExample.*; 7 | 8 | public class EnsemblerMachine 9 | { 10 | private static String csvValidationFilename = "datasets/deepnetts-linear-regression-validation.csv"; 11 | private static int UNSEEN_DATA_COUNT = 100; 12 | 13 | public static void main( String[] args ) throws Exception 14 | { 15 | System.out.println("~ Running Ensembler Machine"); 16 | System.out.println("CLI Params: " + Arrays.toString(args)); 17 | 18 | // plot predictions for some random data 19 | double[][] data = new double[UNSEEN_DATA_COUNT][2]; 20 | 21 | for(int i=0; i 14 | Back to [main page (table of contents)](../../README.md) -------------------------------------------------------------------------------- /examples/python/frameworks/streamlit/README.md: -------------------------------------------------------------------------------- 1 | # Streamlit: Getting started crash course 2 | 3 | **Getting started crash course Video:** https://youtu.be/_9WiB2PDO7k 4 | 5 | The code snippets and example files in this folder are a result of the above video course, use them while watching the video. -------------------------------------------------------------------------------- /examples/python/frameworks/streamlit/example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/python/frameworks/streamlit/example.jpg -------------------------------------------------------------------------------- /examples/python/frameworks/streamlit/example.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/python/frameworks/streamlit/example.mp3 -------------------------------------------------------------------------------- /examples/tribuo/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .gradle 3 | build 4 | tribuo*-with-dependencies* 5 | META-INF 6 | *.iprof 7 | .vscode -------------------------------------------------------------------------------- /examples/tribuo/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guidelines 2 | 3 | At the moment there isn't a lot written or spoken about this subject but nevertheless we would like consider links that are useful for the wider community, so we think the below would help: 4 | 5 | (a) widely recommended, regardless of personal opinion 6 | 7 | (b) discussed in the community (mentioned via social media) 8 | 9 | (c) a good starting point for the subject or related subjects (contains examples) 10 | 11 | (d) implementations that would be a good example for everyone to follow and be inspired by 12 | 13 | 14 | ## Pull Requests 15 | 16 | There are two required criteria for a pull request: 17 | 18 | 1. If an entry has a similar scope as other entries in the same category, the description must state the unique features that distinguishes it from the other entries. 19 | 20 | 2. If an entry does not meet conditions *(a)* to *(d)* there has to be an explanation either in the description or the pull request why it should be added to the list. 21 | 22 | Self-promotion is not encouraged, but your suggestion will of course be approved if the criteria match. 23 | 24 | Furthermore, please ensure your pull request follows the following guidelines: 25 | 26 | * Please search previous suggestions before making a new one, as yours may be a duplicate. 27 | * Please make an individual pull request for each suggestion. 28 | * Use the following format for libraries: \[RESOURCE\]\(LINK\) - DESCRIPTION. 29 | * Entries should be sorted in ascending alphabetical order, i.e. a to z. 30 | * New categories or improvements to the existing categorization are welcome. 31 | * Keep descriptions short, simple and unbiased. 32 | * Check your spelling and grammar. 33 | * Make sure your text editor is set to remove trailing whitespace. 34 | 35 | Thank you for your suggestions! 36 | -------------------------------------------------------------------------------- /examples/tribuo/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 Mani Sarkar 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /examples/tribuo/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | */ 4 | 5 | plugins { 6 | // uberjar/shadowJar config 7 | id 'com.github.johnrengelman.shadow' version '6.1.0' 8 | 9 | id 'application' 10 | id 'java' 11 | id 'maven-publish' 12 | } 13 | 14 | repositories { 15 | mavenLocal() 16 | mavenCentral() 17 | maven { 18 | url = uri('https://repo.jenkins-ci.org/public/') 19 | } 20 | 21 | maven { 22 | url = uri('http://repo.maven.apache.org/maven2') 23 | } 24 | } 25 | 26 | // ------------------------------------------------------------ 27 | // Gradle 6.5 28 | // ------------------------------------------------------------ 29 | 30 | // Build time: 2020-06-02 20:46:21 UTC 31 | // Revision: a27f41e4ae5e8a41ab9b19f8dd6d86d7b384dad4 32 | 33 | // Kotlin: 1.3.72 34 | // Groovy: 2.5.11 35 | // Ant: Apache Ant(TM) version 1.10.7 compiled on September 1 2019 36 | 37 | dependencies { 38 | implementation('org.tribuo:tribuo-all:4.2.0@pom') { 39 | transitive = true // for Groovy 40 | // isTransitive = true // for Kotlin 41 | } 42 | } 43 | 44 | group = 'org.neomatrix369.tribuo' 45 | version = '1.0' 46 | description = 'tribuo-machine' 47 | sourceCompatibility = '11' 48 | mainClassName = group + '.TribuoMachine' 49 | 50 | jar { 51 | manifest { 52 | attributes 'Implementation-Title': 'Tribuo Machine', 53 | 'Implementation-Version': version, 54 | 'Built-By': System.getProperty('user.name'), 55 | 'Built-Date': new Date(), 56 | 'Built-JDK': System.getProperty('java.version'), 57 | 'Main-Class': '${mainClassName}' 58 | } 59 | from { 60 | configurations.compile.collect { it.isDirectory() ? it : zipTree(it) } 61 | } 62 | } 63 | 64 | // uberjar/shadowJar config 65 | shadowJar { 66 | archiveClassifier.set('with-dependencies') 67 | } 68 | 69 | publishing { 70 | publications { 71 | maven(MavenPublication) { 72 | from(components.java) 73 | } 74 | } 75 | } 76 | 77 | tasks.withType(JavaCompile) { 78 | options.encoding = 'UTF-8' 79 | } -------------------------------------------------------------------------------- /examples/tribuo/deployments/oci/.gitignore: -------------------------------------------------------------------------------- 1 | .terraform 2 | *.rc 3 | *.tfstate* 4 | -------------------------------------------------------------------------------- /examples/tribuo/deployments/oci/credentials.rc_template: -------------------------------------------------------------------------------- 1 | ## Terraform env variables 2 | 3 | # Read about the concepts 4 | # https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm#five 5 | 6 | # Refer to these links before find out 7 | # https://cloud.oracle.com/tenancy (Look for Tenancy Information) 8 | export TF_VAR_tenancy_ocid=[TENANCY_OCID] 9 | 10 | # Go to https://cloud.oracle.com/identity/compartments/ocid1.compartment.oc1..[some-hash] to find out 11 | export TF_VAR_compartment_ocid=[COMPARMENT_OCID] 12 | 13 | # Go to https://cloud.oracle.com/identity/users/ocid1.user.oc1..[some-hash] to find out 14 | export TF_VAR_user_ocid=[USER_OCID] 15 | 16 | # https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm#two 17 | # https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm#four 18 | # https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm#five (look for How to Upload the Public Key) 19 | # OCI Web Interface: Governance > Identity > API Keys > Add API Key > [select one of the options] 20 | export TF_VAR_fingerprint=[FINGERPRINT] 21 | export TF_VAR_private_key_path=[PATH_TO_YOUR_ACCOUNT_PRIVATE_KEY eg: ~/.oci/key.pem] 22 | 23 | # Refer to https://cloud.oracle.com/regions/infrastructure to look for the Region Identifier that is relevant to your account 24 | export TF_VAR_region=[REGION NAME eg: uk-london-1] 25 | 26 | ## ssh keys that will be used for remote access authenication 27 | export TF_VAR_ssh_public_key="$(cat [PATH_TO_YOUR_SSH_PUBLIC_KEY])" 28 | 29 | ## We won't be assigning the private_key contents into an environment variable but pass it as an argument via the CLI 30 | echo 'Pass -var "ssh_private_key=$(cat [PATH_TO_YOUR_SSH_PRIVATE_KEY])" when running the "terraform apply" or "terraform destory" commands' -------------------------------------------------------------------------------- /examples/tribuo/deployments/oci/get-instance-public-ip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | set -o pipefail 5 | 6 | terraform output instance_public_ips \ 7 | | grep "\." | head -n 1 | tr -d '" ,' -------------------------------------------------------------------------------- /examples/tribuo/deployments/oci/get-notebook-url.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | set -o pipefail 5 | 6 | INSTANCE_PUBLIC_IP="$(get-instance-public-ip.sh)" 7 | 8 | NOTEBOOK_URL="" 9 | 10 | while [[ true ]]; 11 | do 12 | NOTEBOOK_URL=$(ssh opc@${INSTANCE_PUBLIC_IP} \ 13 | 'docker logs $(docker ps -q)' | \ 14 | grep -v "NotebookApp" | \ 15 | grep '127.0\.0\.1' | \ 16 | awk '{print $2}' | \ 17 | sed 's/127\.0\.0\.1/'${INSTANCE_PUBLIC_IP}'/g') 18 | if [[ "${NOTEBOOK_URL}" == "" ]]; then 19 | sleep 2 20 | else 21 | break 22 | fi 23 | done 24 | 25 | echo ${NOTEBOOK_URL} -------------------------------------------------------------------------------- /examples/tribuo/deployments/oci/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Create a docker user and add the opc user to it 4 | # This will allow us to call docker commands without sudo 5 | groupadd docker 6 | usermod -aG docker opc 7 | 8 | # Install docker 9 | yum install -y docker-engine 10 | systemctl start docker 11 | systemctl enable docker -------------------------------------------------------------------------------- /examples/tribuo/deployments/oci/open-notebook-in-browser.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | set -o pipefail 5 | 6 | getOpenCommand() { 7 | if [[ "$(uname)" = "Linux" ]]; then 8 | echo "xdg-open" 9 | elif [[ "$(uname)" = "Darwin" ]]; then 10 | echo "open" 11 | fi 12 | } 13 | 14 | OPEN_COMMAND=$(getOpenCommand) 15 | NOTEBOOK_URL=$(get-notebook-url.sh) 16 | set -x 17 | "${OPEN_COMMAND}" "${NOTEBOOK_URL}" 18 | set +x -------------------------------------------------------------------------------- /examples/tribuo/deployments/oci/provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo yum install -y git 4 | 5 | # Open ports for the services to communicate 6 | sudo firewall-cmd --zone=public --permanent --add-port=8080-8085/tcp 7 | sudo firewall-cmd --zone=public --permanent --add-port=8443/tcp 8 | sudo firewall-cmd --reload 9 | 10 | git clone https://github.com/neomatrix369/awesome-ai-ml-dl 11 | cd awesome-ai-ml-dl/examples/tribuo 12 | ./docker-runner.sh --pullImageFromHub 13 | 14 | echo "|-----------------------------------------------------------------------------------------------------|" 15 | echo "| |" 16 | echo "| ssh onto the OCI box followed by the below command: |" 17 | echo "| |" 18 | echo "| $ ./run-docker-container-in-the-cloud.sh |" 19 | echo "| |" 20 | echo "|-----------------------------------------------------------------------------------------------------|" -------------------------------------------------------------------------------- /examples/tribuo/deployments/oci/run-docker-container-in-the-cloud.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | set -o pipefail 5 | 6 | INSTANCE_PUBLIC_IP="$(get-instance-public-ip.sh)" 7 | 8 | echo "Public IP address of the cloud instance running is ${INSTANCE_PUBLIC_IP}" 9 | 10 | exit_code=0 11 | ssh opc@${INSTANCE_PUBLIC_IP} \ 12 | 'cd awesome-ai-ml-dl/examples/tribuo; ./docker-runner.sh --notebookMode --doNotOpenNotebook --runContainer' \ 13 | || exit_code=$? && true 14 | 15 | if [[ ${exit_code} -eq 0 ]]; then 16 | echo "" 17 | echo "Finished loading up the docker container in the cloud instance." 18 | echo "" 19 | else 20 | echo "" 21 | echo "Failed trying to run the docker container, its possible it has already been executed, check the error logs above." 22 | echo "Or try to ssh into the container and investigate." 23 | echo "" 24 | fi 25 | 26 | echo "If the container has run successfully (without any errors in the console), then you will be able to do one of these:" 27 | echo "" 28 | echo " $ ./open-notebook-in-browser.sh" 29 | echo "" 30 | echo "or, manually go to the browser at" 31 | echo "" 32 | echo " $(get-notebook-url.sh)" 33 | echo "" 34 | echo "To be able to open up the notebook in the browser using one of the above means." -------------------------------------------------------------------------------- /examples/tribuo/docker-image/.bashrc: -------------------------------------------------------------------------------- 1 | export OLD_PATH=${PATH} 2 | 3 | function switchTo11 { 4 | export JAVA_HOME=${JAVA_11_HOME} 5 | export JDK_HOME=${JAVA_HOME} 6 | echo "Switched to ${JAVA_HOME}" 1>&2 7 | export PATH="${JAVA_HOME}/bin:${OLD_PATH:-}" 8 | java -version 9 | } 10 | 11 | function switchToGraal { 12 | export JAVA_HOME=${GRAALVM_HOME} 13 | export JDK_HOME=${JAVA_HOME} 14 | echo "Switched to ${JAVA_HOME}" 1>&2 15 | export PATH="${JAVA_HOME}/bin:${OLD_PATH:-}" 16 | java -version 17 | } 18 | 19 | if [[ "${JDK_TO_USE:-}" = "GRAALVM" ]]; then 20 | switchToGraal 21 | JAVA_HOME=${GRAALVM_HOME} 22 | else 23 | switchTo11 24 | JAVA_HOME=${JAVA_11_HOME} 25 | fi 26 | 27 | echo "PATH=${PATH}" 1>&2 28 | echo "JAVA_HOME=${JAVA_HOME}" 1>&2 -------------------------------------------------------------------------------- /examples/tribuo/docker-image/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG GRAALVM_VERSION 2 | ARG GRAALVM_JDK_VERSION 3 | 4 | FROM findepi/graalvm:${GRAALVM_VERSION}-${GRAALVM_JDK_VERSION} as graal-jdk-image 5 | FROM jupyter/datascience-notebook as base-image 6 | 7 | ### Install JDK 11 from AdoptOpenJDK images 8 | COPY --from=adoptopenjdk/openjdk11 /opt/java /opt/java 9 | 10 | ### Install GraalVM for Java 11 11 | COPY --from=graal-jdk-image /graalvm* /opt/java/graalvm 12 | 13 | ### Install curl needed for rest of the tasks 14 | USER root 15 | RUN apt-get update && apt-get install -qy curl 16 | 17 | ARG WORKDIR 18 | WORKDIR ${WORKDIR} 19 | 20 | ARG IMAGE_VERSION 21 | ARG TRIBUO_VERSION 22 | 23 | LABEL maintainer="Mani Sarkar" 24 | LABEL example_git_repo="https://github.com/neomatrix369/awesome-ai-ml-dl/tree/master/examples/tribuo" 25 | LABEL tribuo_version=${TRIBUO_VERSION} 26 | LABEL graalvm_version=${GRAALVM_VERSION}-{GRAALVM_JDK_VERSION} 27 | LABEL version=${IMAGE_VERSION} 28 | 29 | ### Java setup 30 | ARG JAVA_11_HOME 31 | 32 | ARG GRAALVM_HOME 33 | ENV GRAALVM_HOME=${GRAALVM_HOME} 34 | ENV JAVA_HOME=${GRAALVM_HOME} 35 | ENV PATH=${JAVA_HOME}/bin:${PATH} 36 | 37 | ### Test Java (Traditional and GraalVM) 38 | RUN ${JAVA_11_HOME}/bin/java -version 39 | 40 | RUN ${GRAALVM_HOME}/bin/java -version 41 | 42 | ### Install packages 43 | RUN apt-get update && apt-get install -qy unzip git vim libgomp1 glibc-* 44 | 45 | RUN unzip -version 46 | RUN git --version 47 | RUN vim --version 48 | 49 | ### Install the Jupyter Java Kernel 50 | RUN wget https://github.com/neomatrix369/awesome-ai-ml-dl/releases/download/v0.1/ijava-1.3.0.zip 51 | COPY install-java-kernel.sh install-java-kernel.sh 52 | RUN ./install-java-kernel.sh --installJar 53 | 54 | ### Common functions 55 | COPY common.sh common.sh 56 | 57 | ### Install Tribuo 58 | RUN export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${JAVA_HOME}/lib/server/:${JAVA_HOME}/lib/amd64/server/ 59 | COPY install-tribuo.sh install-tribuo.sh 60 | RUN ./install-tribuo.sh 61 | RUN mv tribuo/*.gz tribuo/tutorials 62 | 63 | ### Setup user 64 | COPY .bashrc .bashrc_custom 65 | RUN cat .bashrc_custom >> /home/jovyan/.bashrc 66 | RUN rm .bashrc_custom 67 | 68 | ARG GROUP 69 | RUN cd ${WORKDIR} && chown -R jovyan:${GROUP} . -------------------------------------------------------------------------------- /examples/tribuo/docker-image/common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | gitClone() { 24 | REPO_URL=$1 25 | BRANCH=${2:-master} 26 | REPO_FOLDER=$(echo "${REPO_URL}" | awk '{split($0,a,"/"); print a[5]}') 27 | if [ -e "${REPO_FOLDER}" ]; then 28 | echo "${REPO_FOLDER} already exists, aborting process, remove folder manually to perform a fresh download/update" 29 | else 30 | git clone --depth=1 --branch ${BRANCH} ${REPO_URL} 31 | fi 32 | } 33 | 34 | downloadArtifact() { 35 | URL=$1 36 | ARTIFACT=${2} 37 | ARTIFACT_FOLDER=${3} 38 | 39 | if [ -e "${ARTIFACT_FOLDER}" ]; then 40 | echo "${ARTIFACT_FOLDER} already exists, aborting process, remove folder manually to perform a fresh download/update" 41 | else 42 | if [[ -e "${ARTIFACT}" ]]; then 43 | echo "${ARTIFACT} already exists, skipping to next step..." 44 | else 45 | curl -O -L -J "${URL}" 46 | fi 47 | 48 | if [[ -z "$(echo ${ARTIFACT} | grep zip)" ]]; then 49 | if [[ -z "$(echo ${ARTIFACT} | grep 'tar.gz|tgz')" ]]; then 50 | tar -xvzf ${ARTIFACT} 51 | else 52 | echo 'File format unrecognised, aborting...' 53 | exit -1 54 | fi 55 | else 56 | unzip -u ${ARTIFACT} 57 | fi 58 | 59 | rm -f ${ARTIFACT} 60 | fi 61 | } -------------------------------------------------------------------------------- /examples/tribuo/docker-image/graalvm_jdk_version.txt: -------------------------------------------------------------------------------- 1 | java11 -------------------------------------------------------------------------------- /examples/tribuo/docker-image/graalvm_version.txt: -------------------------------------------------------------------------------- 1 | 21.3.0 -------------------------------------------------------------------------------- /examples/tribuo/docker-image/tribuo_version.txt: -------------------------------------------------------------------------------- 1 | 4.2.0 -------------------------------------------------------------------------------- /examples/tribuo/docker-image/version.txt: -------------------------------------------------------------------------------- 1 | 0.5 -------------------------------------------------------------------------------- /examples/tribuo/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/examples/tribuo/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /examples/tribuo/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.5-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /examples/tribuo/settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | */ 4 | 5 | rootProject.name = 'tribuo-machine' 6 | -------------------------------------------------------------------------------- /examples/tribuo/src/main/java/org/neomatrix369/tribuo/TribuoMachine.java: -------------------------------------------------------------------------------- 1 | package org.neomatrix369.tribuo; 2 | 3 | import java.util.Arrays; 4 | import org.neomatrix369.tribuo.ClassificationExample.*; 5 | import org.neomatrix369.tribuo.RegressionExample.*; 6 | 7 | public class TribuoMachine 8 | { 9 | public static void main( String[] args ) throws Exception 10 | { 11 | System.out.println("~ Running Tribuo Machine"); 12 | System.out.println("CLI Params: " + Arrays.toString(args)); 13 | if ((args.length > 0) && (args[0].toLowerCase().contains("regression"))) { 14 | new RegressionExample().run(); 15 | } else { 16 | new ClassificationExample().run(); 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /natural-language-processing/course-tutorial-learning-resources.md: -------------------------------------------------------------------------------- 1 | # Courses, Tutorial, Learning resource 2 | 3 | - [Introductory: NLP for hackers](https://nlpforhackers.io/deep-learning-introduction/) 4 | - [Intermediate (by Jason Brownlee): Applications of Deep Learning for NLP](https://machinelearningmastery.com/applications-of-deep-learning-for-natural-language-processing/) 5 | - [Learn Natural Language Processing: From Beginner to Expert](https://www.commonlounge.com/discussion/c1f472553ece4d68bad9bd423fb775cf) 6 | - [Understanding Convolutional Neural Networks for NLP](http://www.wildml.com/2015/11/understanding-convolutional-neural-networks-for-nlp/) 7 | - [How to solve 90% of NLP problems: a step-by-step guide](https://blog.insightdatascience.com/how-to-solve-90-of-nlp-problems-a-step-by-step-guide-fda605278e4e) 8 | - [A framework for dialog research + datasets](https://parl.ai/) 9 | - [How Rossum is using deep learning to extract data from any document](https://www.linkedin.com/posts/eric-feuilleaubois-ph-d-43ab0925_how-rossum-is-using-deep-learning-to-extract-activity-6605832802078347264-ZsW8) 10 | - [Everything you need to know about Named Entity Recognition!!](https://github.com/neomatrix369/awesome-ai-ml-dl/blob/master/natural-language-processing/ner.md) 11 | - [NLP and Python books](https://www.linkedin.com/posts/inna-vogel-nlp_100daysofnlp-activity-6685064904925310976-KU-d) 12 | - [Citation Needed: A Taxonomy and Algorithmic Assessment of Wikipedia's Verifiability](https://arxiv.org/abs/1902.11116) 13 | - See [Natural Language Processing (NLP)](../courses.md#naturallanguageprocessing-nlp) in [Courses](../courses.md#courses) 14 | 15 | # Contributing 16 | 17 | Contributions are very welcome, please share back with the wider community (and get credited for it)! 18 | 19 | Please have a look at the [CONTRIBUTING](../CONTRIBUTING.md) guidelines, also have a read about our [licensing](../LICENSE.md) policy. 20 | 21 | --- 22 | 23 | Back to [NLP page (table of contents)](README.md)
24 | Back to [main page (table of contents)](../README.md) 25 | -------------------------------------------------------------------------------- /natural-language-processing/formulae/DMM_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/natural-language-processing/formulae/DMM_formula.png -------------------------------------------------------------------------------- /natural-language-processing/formulae/cu_metric.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/natural-language-processing/formulae/cu_metric.png -------------------------------------------------------------------------------- /natural-language-processing/formulae/dmm_graphical_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/natural-language-processing/formulae/dmm_graphical_view.png -------------------------------------------------------------------------------- /natural-language-processing/formulae/i_index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/natural-language-processing/formulae/i_index.png -------------------------------------------------------------------------------- /natural-language-processing/formulae/lang_entropy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/natural-language-processing/formulae/lang_entropy.png -------------------------------------------------------------------------------- /natural-language-processing/formulae/likehood_DMM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/natural-language-processing/formulae/likehood_DMM.png -------------------------------------------------------------------------------- /natural-language-processing/formulae/m_index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/natural-language-processing/formulae/m_index.png -------------------------------------------------------------------------------- /natural-language-processing/formulae/scoring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neomatrix369/awesome-ai-ml-dl/2d9db3888e3a1cb24856a4b73a0678ef13fe0aba/natural-language-processing/formulae/scoring.png -------------------------------------------------------------------------------- /natural-language-processing/metaphor-detection.md: -------------------------------------------------------------------------------- 1 | # Metaphor detection 2 | 3 | - [Code for the paper "Neural Metaphor Detection in Context"](https://github.com/NLPAssignment/metaphor-detection) 4 | - [Grasping the Finer Point:A Supervised Similarity Network for Metaphor Detection](https://arxiv.org/pdf/1709.00575) 5 | - [Learning to Identify Metaphors from a Corpus of Proverbs](https://aclweb.org/anthology/D16-1220) 6 | - [Metaphor Detection with Cross-Lingual Model Transfer](https://www.cs.cmu.edu/~anatoleg/metaphor-acl14.pdf) 7 | - [Metaphor Interpretation Using Paraphrases Extracted from the Web](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3779216/) 8 | - [Natural Language Processing: Measuring Semantic Relatedness](https://www.linkedin.com/posts/data-science-central_natural-language-processing-measuring-semantic-activity-6647330201426542592-wxqU) 9 | 10 | 11 | # Contributing 12 | 13 | Contributions are very welcome, please share back with the wider community (and get credited for it)! 14 | 15 | Please have a look at the [CONTRIBUTING](../CONTRIBUTING.md) guidelines, also have a read about our [licensing](../LICENSE.md) policy. 16 | 17 | --- 18 | 19 | Back to [NLP page (table of contents)](README.md)
20 | Back to [main page (table of contents)](../README.md) 21 | -------------------------------------------------------------------------------- /notebooks/jupyter/data/.gitignore: -------------------------------------------------------------------------------- 1 | housing*.* 2 | column.header 3 | *.zip 4 | *.listing -------------------------------------------------------------------------------- /notebooks/jupyter/data/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 Mani Sarkar 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /notebooks/jupyter/data/data-generation/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 Mani Sarkar 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /notebooks/jupyter/data/data-generation/add_duplicates.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import random 4 | import sys 5 | 6 | column_names = sys.argv[1] 7 | source_file = sys.argv[2] 8 | target_file = sys.argv[3] 9 | 10 | def duplicate_rows(source_dataframe, num_of_rows=50, random_seed=42): 11 | maximum_rows = source_dataframe.shape[0] 12 | random.seed(random_seed) 13 | 14 | random_indices = [random.randrange(0, maximum_rows) for a_random_value in range(num_of_rows)] 15 | random_rows = source_dataframe.iloc[random_indices].copy() 16 | 17 | target_dataframe = pd.concat([source_dataframe, random_rows]) 18 | 19 | return target_dataframe 20 | 21 | names = [line.strip() for line in open(column_names, 'r')] 22 | 23 | print("Column names", names) 24 | print("Reading source dataset {}".format(source_file)) 25 | data = pd.read_csv(source_file, names=names) 26 | 27 | new_data = duplicate_rows(data) 28 | 29 | duplicate_entries = new_data.shape[0] - data.shape[0] 30 | print("Writing to target dataset {} ({} duplicate entries created)".format(target_file, duplicate_entries)) 31 | new_data.to_csv(target_file, sep=',', index=False, header=False) -------------------------------------------------------------------------------- /notebooks/jupyter/data/data-generation/add_nulls.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import random 4 | import sys 5 | 6 | column_names = sys.argv[1] 7 | source_file = sys.argv[2] 8 | target_file = sys.argv[3] 9 | 10 | def generate_random_sample(source_list=range(0, 10), max_number_of_choices=5): 11 | return random.sample(set(source_list), random.randint(1, max_number_of_choices)) 12 | 13 | def create_rows_with_random_nulls(source_dataframe, num_of_rows=50, random_seed=42): 14 | random.seed(random_seed) 15 | 16 | random_rows = source_dataframe.sample(n=num_of_rows).copy() 17 | 18 | maximum_rows = random_rows.shape[0] 19 | maximum_columns = random_rows.shape[1] 20 | columns_to_fill = [random.choice(random_rows.columns) for dummy in generate_random_sample(source_list=range(0, maximum_columns), max_number_of_choices=maximum_columns)] 21 | 22 | for column in columns_to_fill: 23 | rows_to_fill = generate_random_sample(source_list=random_rows.index.values, max_number_of_choices=maximum_rows) 24 | for row in rows_to_fill: 25 | random_rows.loc[row, column] = np.nan 26 | 27 | target_dataframe = pd.concat([source_dataframe, random_rows]) 28 | return target_dataframe 29 | 30 | names = [line.strip() for line in open(column_names, 'r')] 31 | 32 | print("Column names", names) 33 | print("Reading source dataset {}".format(source_file)) 34 | data = pd.read_csv(source_file, names=names) 35 | 36 | new_data = create_rows_with_random_nulls(data) 37 | 38 | null_entries = new_data.shape[0] - data.shape[0] 39 | print("Writing to target dataset {} ({} null entries created)".format(target_file, null_entries)) 40 | new_data.to_csv(target_file, sep=',', index=False, header=False) -------------------------------------------------------------------------------- /notebooks/jupyter/data/data-generation/create-archive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | echo "Creating archive for Boston Housing dataset." 24 | time zip boston_housing_dataset.zip \ 25 | column.header housing*.* 26 | echo "Finished creating archive for Boston Housing dataset." -------------------------------------------------------------------------------- /notebooks/jupyter/data/data-generation/make-dataset-unclean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright 2019 Mani Sarkar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | set -u 21 | set -o pipefail 22 | 23 | if [[ ! -s "column.header" ]]; then 24 | cat > column.header <