├── .gitignore
├── LICENSE
├── README.md
├── dockerfiles
    ├── Dockerfile.common
    ├── Dockerfile.cpu
    ├── Dockerfile.gpu
    └── README.md
├── jupyter-text2code-demo.gif
├── jupyter_text2code
    ├── __init__.py
    ├── etc
    │   └── jupyter-text2code-extension.json
    ├── jupyter_text2code.css
    ├── jupyter_text2code.yaml
    ├── jupyter_text2code_lib.py
    ├── jupyter_text2code_serverextension
    │   ├── __init__.py
    │   ├── data
    │   │   ├── .gitkeep
    │   │   ├── awesome-notebooks.csv
    │   │   ├── generated_intents.csv
    │   │   ├── intent_lookup.csv
    │   │   └── ner_templates.csv
    │   └── models
    │   │   ├── .gitkeep
    │   │   ├── intent_index.idx
    │   │   ├── model-best
    │   │       ├── config.cfg
    │   │       ├── meta.json
    │   │       ├── ner
    │   │       │   ├── cfg
    │   │       │   ├── model
    │   │       │   └── moves
    │   │       ├── tok2vec
    │   │       │   ├── cfg
    │   │       │   └── model
    │   │       ├── tokenizer
    │   │       └── vocab
    │   │       │   ├── key2row
    │   │       │   ├── lookups.bin
    │   │       │   ├── strings.json
    │   │       │   ├── vectors
    │   │       │   └── vectors.cfg
    │   │   └── model-last
    │   │       ├── config.cfg
    │   │       ├── meta.json
    │   │       ├── ner
    │   │           ├── cfg
    │   │           ├── model
    │   │           └── moves
    │   │       ├── tok2vec
    │   │           ├── cfg
    │   │           └── model
    │   │       ├── tokenizer
    │   │       └── vocab
    │   │           ├── key2row
    │   │           ├── lookups.bin
    │   │           ├── strings.json
    │   │           ├── vectors
    │   │           └── vectors.cfg
    └── main.js
├── notebooks
    ├── Code Generator.ipynb
    ├── Episodes.csv
    ├── Generate Training data NER.ipynb
    └── ctds.ipynb
├── scripts
    ├── README.md
    ├── config.cfg
    ├── create_intent_index.py
    ├── create_lookup_file.py
    ├── data
    │   ├── awesome-notebooks.csv
    │   ├── awesome-notebooks.pkl
    │   ├── st_naas_intent_index.idx
    │   └── tf_naas_intent_index.idx
    ├── eval_models_performance.ipynb
    ├── generate_training_data.py
    ├── process_awesome_notebooks.py
    └── train_spacy_v3_ner.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore everything
 2 | *.pyc
 3 | .ipynb_checkpoints/
 4 | 
 5 | # whitelist
 6 | # ---------
 7 | 
 8 | !.gitignore
 9 | 
10 | 
11 | # Ignore following
12 | .idea
13 | jupyter_text2code.egg-info/
14 | .pickle
15 | 
16 | ner_train_data.pickle
17 | 
18 | build/
19 | dist/
20 | venv


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Deepklarity Technologies Pvt. Ltd.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Text2Code for Jupyter notebook
  2 | ### A proof-of-concept jupyter extension which converts english queries into relevant python code. 
  3 | 
  4 | 
  5 | ![](jupyter-text2code-demo.gif)
  6 | 
  7 | ### Blog post with more details:
  8 | #### [Data analysis made easy: Text2Code for Jupyter notebook](https://towardsdatascience.com/data-analysis-made-easy-text2code-for-jupyter-notebook-5380e89bb493?source=friends_link&sk=2c46fff2c31f7fe59b667350e4596b18)
  9 | 
 10 | ### Demo Video:
 11 | #### [Text2Code for Jupyter notebook](https://www.youtube.com/watch?v=3gZ7_9W-TJs)
 12 | 
 13 | ## Supported Operating Systems:
 14 | - Ubuntu
 15 | - macOS
 16 | 
 17 | ## Installation
 18 | 
 19 | ### NOTE: We have renamed the plugin from mopp to jupyter-text2code. Uninstall mopp before installing new jupyter-text2code version.
 20 | ```
 21 | pip uninstall mopp
 22 | ```
 23 | 
 24 | #### CPU-only install:
 25 | For Mac and other Ubuntu installations not having a nvidia GPU, we need to explicitly set an environment variable at time of install.
 26 | ```
 27 | export JUPYTER_TEXT2CODE_MODE="cpu"
 28 | 
 29 | ```
 30 | 
 31 | #### GPU install dependencies:
 32 | ```
 33 | sudo apt-get install libopenblas-dev libomp-dev
 34 | ```
 35 | 
 36 | #### Installation commands:
 37 | 
 38 | ```
 39 | git clone https://github.com/deepklarity/jupyter-text2code.git
 40 | cd jupyter-text2code
 41 | pip install .
 42 | jupyter nbextension enable jupyter-text2code/main
 43 | 
 44 | ```
 45 | 
 46 | ## Uninstallation:
 47 | ```
 48 | pip uninstall jupyter-text2code
 49 | ```
 50 | 
 51 | ## Usage Instructions:
 52 | 
 53 | - Start Jupyter notebook server by running the following command: ``` jupyter notebook ```
 54 | - If you don't see ``` Nbextensions```  tab in Jupyter notebook run the following command:``` jupyter contrib nbextension install --user ```
 55 | - You can open the sample ``` notebooks/ctds.ipynb```  notebook for testing
 56 | - If installation happened successfully, then for the first time, Universal Sentence Encoder model will be downloaded from `tensorflow_hub`
 57 | - Click on the `Terminal` Icon which appears on the menu (to activate the extension)
 58 | - Type "help" to see a list of currently supported commands in the repo
 59 | - Watch [Demo video](https://www.youtube.com/watch?v=3gZ7_9W-TJs) for some examples
 60 | 
 61 | ## Docker containers for jupyter-text2code (old version)
 62 | 
 63 | We have published CPU and GPU images to docker hub with all dependencies pre-installed.
 64 | ##### Visit https://hub.docker.com/r/deepklarity/jupyter-text2code/ to download the images and usage instructions.
 65 | 
 66 | ##### CPU image size: ``` 1.51 GB ``` 
 67 | ##### GPU image size: ``` 2.56 GB ```
 68 | 
 69 | ## Model training:
 70 | The plugin now supports pandas commands + quick snippet insertion of available snippets from [awesome-notebooks](https://github.com/jupyter-naas/awesome-notebooks). With this change, we can now get snippets for most popular integrations from within the jupyter tab. eg:
 71 | - Get followers count from twitter
 72 | - Get stats about a story from instagram
 73 | The detailed training steps are available in [scripts README](scripts/README.md) where we also evaluated performance of different models and ended up selecting SentenceTransformers `paraphrase-MiniLM-L6-v2` 
 74 | 
 75 | 
 76 | ### Steps to add more intents:
 77 | - Add more templates in `ner_templates` with a new intent_id
 78 | - Generate training data. Modify `generate_training_data.py` if different generation techniques are needed or if introducing a new entity.
 79 | - Train intent index
 80 | - Train NER model
 81 | - modify `jupyter_text2code/jupyter_text2code_serverextension/__init__.py` with new intent's condition and add actual code for the intent
 82 | - Reinstall plugin by running: `pip install .`
 83 | 
 84 | ### TODO:
 85 | - [] Add Ollama support to work with local LLMs
 86 | - [x] Publish Docker image
 87 | - [X] Refactor code and make it mode modular, remove duplicate code, etc
 88 | - [X] Add support for more commands
 89 | - [X] Improve intent detection and NER
 90 | - [ ] Add support for Windows
 91 | - [ ] Explore sentence Paraphrasing to generate higher-quality training data
 92 | - [ ] Gather real-world variable names, library names as opposed to randomly generating them
 93 | - [ ] Try NER with a transformer-based model
 94 | - [ ] With enough data, train a language model to directly do English->code like GPT-3 does, instead of having separate stages in the pipeline
 95 | - [ ] Create a survey to collect linguistic data
 96 | - [ ] Add Speech2Code support
 97 | 
 98 | #### Authored By:
 99 | 
100 | - [Deepak Rawat](https://twitter.com/dsr_ai)
101 | - [Kartik Godawat](https://twitter.com/kartik_godawat)
102 | - [Abdullah Meda](https://www.linkedin.com/in/abdmeda/)
103 | 


--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.common:
--------------------------------------------------------------------------------
 1 | ARG WORKDIR=/home/deepklarity/jupyter-text2code
 2 | 
 3 | WORKDIR $WORKDIR
 4 | 
 5 | ENV PYTHONUNBUFFERED=1 TFHUB_CACHE_DIR=/root/.cache/tfhub_modules
 6 | 
 7 | RUN --mount=type=cache,mode=0777,target=/var/cache/apt --mount=type=cache,mode=0777,target=/var/lib/apt \
 8 |     --mount=type=cache,mode=0777,target=/root/.cache/pip \
 9 |     apt-get -y update && DEBIAN_FRONTEND=noninteractive apt-get install -y git \
10 |     && git clone https://github.com/deepklarity/jupyter-text2code.git $WORKDIR \
11 |     && apt-get purge --auto-remove -y git \
12 |     && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
13 |     && python -m pip --no-cache-dir install -U --force-reinstall pip && pip --no-cache-dir install -U . \
14 |     && jupyter contrib nbextension install --user && jupyter nbextension enable jupyter-text2code/main \
15 |     && python -c 'import tensorflow_hub as hub; hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")'
16 | 
17 | CMD jupyter notebook --ip 0.0.0.0 --port 8888 --no-browser --allow-root
18 | 


--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.cpu:
--------------------------------------------------------------------------------
1 | # syntax = edrevo/dockerfile-plus
2 | FROM python:3.7-slim
3 | 
4 | ENV JUPYTER_TEXT2CODE_MODE="cpu"
5 | 
6 | INCLUDE+ dockerfiles/Dockerfile.common
7 | 


--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.gpu:
--------------------------------------------------------------------------------
 1 | # syntax = edrevo/dockerfile-plus
 2 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
 3 | 
 4 | RUN --mount=type=cache,mode=0777,target=/var/cache/apt --mount=type=cache,target=/var/lib/apt \
 5 |     apt-get -y update && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common libopenblas-dev libomp-dev \
 6 |     && add-apt-repository ppa:deadsnakes/ppa && apt-get -y update && apt-get -y install python3.7 python3-pip \
 7 |     && update-alternatives --install /usr/bin/python python /usr/bin/python3 10
 8 | 
 9 | INCLUDE+ dockerfiles/Dockerfile.common
10 | 


--------------------------------------------------------------------------------
/dockerfiles/README.md:
--------------------------------------------------------------------------------
 1 | # About jupyter-text2code
 2 | 
 3 | ### A proof-of-concept jupyter extension which converts english queries into relevant python code. 
 4 | 
 5 | #### Github Repository: https://github.com/deepklarity/jupyter-text2code
 6 | 
 7 | ### Blog post with more details:
 8 | #### [Data analysis made easy: Text2Code for Jupyter notebook](https://towardsdatascience.com/data-analysis-made-easy-text2code-for-jupyter-notebook-5380e89bb493?source=friends_link&sk=2c46fff2c31f7fe59b667350e4596b18)
 9 | 
10 | ### Demo Video:
11 | #### [Text2Code for Jupyter notebook](https://www.youtube.com/watch?v=3gZ7_9W-TJs)
12 | 
13 | # How to Use the Images
14 | 
15 | ### Install docker from:  https://docs.docker.com/engine/install/
16 | 
17 | ### CPU image:
18 | 
19 | 1. Pull the docker image
20 | ```
21 | docker pull deepklarity/jupyter-text2code:latest
22 | ```
23 | 2. Run the Docker image
24 | ```
25 | docker run -it -p 8888:8888 deepklarity/jupyter-text2code:latest
26 | ```
27 | 
28 | ### GPU image:
29 | 1. Pull the docker image
30 | ```
31 | docker pull docker pull deepklarity/jupyter-text2code:latest-gpu
32 | ```
33 | 2. Run the Docker image
34 | ```
35 | docker run -it --gpus all -p 8888:8888 deepklarity/jupyter-text2code:latest-gpu
36 | ```
37 | 
38 | ### Open Jupyter Notebook: 
39 | 
40 | #### Once the container is running, you will see a URL with a token in the terminal/console. Open that URL in your browser. 
41 | 
42 | Example url: ``` http://127.0.0.1:8888/?token=48c6ea28c1cbce210c008f1ef8dab8fa91ad77420922e259 ```
43 | 
44 | ### Usage Instructions:
45 | 
46 | - You can open the sample ``` notebooks/ctds.ipynb```  notebook for testing
47 | - Click on the `Terminal` Icon which appears on the menu (to activate the extension)
48 | - Type "help" to see a list of currently supported commands in the repo
49 | - Watch [Demo video](https://www.youtube.com/watch?v=3gZ7_9W-TJs) for some examples
50 | 


--------------------------------------------------------------------------------
/jupyter-text2code-demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter-text2code-demo.gif


--------------------------------------------------------------------------------
/jupyter_text2code/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/__init__.py


--------------------------------------------------------------------------------
/jupyter_text2code/etc/jupyter-text2code-extension.json:
--------------------------------------------------------------------------------
 1 |  {
 2 |   "NotebookApp": {
 3 |   	"load_extensions": {
 4 |      "jupyter-text2code/main": true
 5 |   },
 6 |     "nbserver_extensions": {
 7 |       "jupyter_text2code.jupyter_text2code_serverextension": true
 8 |     }
 9 |   }
10 | }


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code.css:
--------------------------------------------------------------------------------
 1 | .notebook_app {
 2 | 	/*background:red !important;*/
 3 | }
 4 | 
 5 | #jupyter_text2code_editor {
 6 | 	position: fixed;
 7 |     bottom: 0;
 8 |     width: 61%;
 9 |     z-index: 999;
10 |     background: lightblue;
11 |     padding: 10px 35px;
12 |     margin-left: -14px;
13 | }
14 | 
15 | #jupyter_text2code_editor_history {
16 | 	position: fixed;
17 |     top: 150px;
18 |     right: 10px;
19 |     background: lightblue;
20 |     padding: 5px;
21 |     width: 15%;
22 | }
23 | 
24 | #jupyter_text2code_preset_wrapper {
25 | 	max-height: 250px;
26 | 	overflow-y: scroll;
27 | }
28 | 
29 | #jupyter_text2code_history_wrapper {
30 | 	max-height: 250px;
31 | 	overflow-y: scroll;
32 | }
33 | 
34 | #jupyter_text2code_query {
35 |     height: 50px;
36 |     width: 100%;
37 |     font-size: 20px;
38 | }
39 | #jupyter_text2code_history {
40 | 	padding: 3px;
41 | }
42 | 
43 | .jupyter_text2code_history_item {
44 | 	background: lightcyan;
45 |     margin: 10px 0px;
46 |     padding: 5px;
47 |     padding-left: 10px;
48 |     cursor: pointer;
49 | }
50 | 
51 | .jupyter_text2code_preset_item {
52 | 	background: lightcyan;
53 |     margin: 10px 0px;
54 |     padding: 5px;
55 |     padding-left: 10px;
56 |     cursor: pointer;
57 | }
58 | 
59 | .jupyter_text2code_sub_heading {
60 | 	padding: 3px;
61 | 	font-size: 14px;
62 | 	font-weight: bold;
63 | }
64 | 
65 | #jupyter_text2code_preset_content {
66 | 	padding: 3px;
67 | }
68 | 
69 | .jupyter_text2code_what_heading {
70 | 	font-weight: bold;
71 |     padding: 5px;
72 | }
73 | 
74 | #jupyter_text2code_submit {
75 |     margin: 10px;
76 | }
77 | 
78 | .jupyter_text2code_spinner {
79 | 	font-size: 22px;
80 | 	margin-left: 10px;
81 | }


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code.yaml:
--------------------------------------------------------------------------------
1 | Type: IPython Notebook Extension
2 | Compatibility: 3.x, 4.x, 5.x, 6.x
3 | Main: main.js
4 | Name: jupyter-text2code
5 | Icon: icon.jpg
6 | Description: "jupyter-text2code plugin with pandas and plotly support"
7 | 


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_lib.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from IPython import get_ipython
 4 | from IPython.core.magics.namespace import NamespaceMagics
 5 | 
 6 | _nms = NamespaceMagics()
 7 | _Jupyter = get_ipython()
 8 | _nms.shell = _Jupyter.kernel.shell
 9 | 
10 | 
11 | def dataframes_info():
12 |     values = _nms.who_ls()
13 |     info = {v: (eval(v).columns.tolist()) for v in values if type(eval(v)).__name__ == 'DataFrame'}
14 |     return json.dumps(info)
15 | 


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/__init__.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json
  4 | from abc import ABC
  5 | from itertools import groupby
  6 | 
  7 | import faiss
  8 | import spacy
  9 | import numpy as np
 10 | import pandas as pd
 11 | from notebook.utils import url_path_join
 12 | from notebook.base.handlers import IPythonHandler
 13 | from sentence_transformers import SentenceTransformer
 14 | 
 15 | home = os.path.dirname(__file__)
 16 | 
 17 | SPACY_MODEL_DIR = os.path.join(home, "models/model-best")
 18 | FAISS_INDEX_PATH = os.path.join(home, "models/intent_index.idx")
 19 | INTENT_DF_PATH = os.path.join(home, "data/intent_lookup.csv")
 20 | HELP_LIST = ['Import all libraries - Example Usage: import all libraries',
 21 |              'Use plotly dark theme - Example Usage: use dark theme',
 22 |              'Load file into a dataframe - Example Usage: Load train.csv in df',
 23 |              'Show n rows of dataframe - Example Usage: Show 10 rows from df',
 24 |              'Shape of dataframe - Example Usage: Show shape of df',
 25 |              'Describe dataframe - Example Usage: Describe dataframe df',
 26 |              'List columns of dataframe - Example Usage: Show columns from df',
 27 |              'Correlation matrix of dataframe - Example Usage: Display corelation matrix of df',
 28 |              'Histogram of column in dataframe - Example Usage: Plot histogram of category from df',
 29 |              'Bar chart of columns from dataframe - Example Usage: Show bar chart of product and amount from df',
 30 |              'Pie chart of column - Example Usage: Make pie chart of fruits from df',
 31 |              'Group by aggregations of columns in dataframe - Example Usage: group df by country and show sum and mean of population',
 32 |              'Line chart of columns in dataframe - Example Usage: Line chart of price and sale from df',
 33 |              'Scatter plot of columns in dataframe - Example Usage: Show scatter plot of youtube_likes and episode_duration from df',
 34 |              'Heatmap of columns in dataframe - Example Usage: from df make heat map of recording_time and youtube_views',
 35 |              'List all files in current directory - Example Usage: List all files in current directory'
 36 |              ]
 37 | HELP_TEXT = "\n".join([f"# {s}" for s in HELP_LIST])
 38 | 
 39 | 
 40 | class CodeGenerator:
 41 | 
 42 |     def __init__(self):
 43 |         self.nlp = spacy.load(SPACY_MODEL_DIR)
 44 |         self.embedding_model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
 45 |         self.intent_index = faiss.read_index(FAISS_INDEX_PATH)
 46 |         self.intent_df = pd.read_csv(INTENT_DF_PATH)
 47 |         self.intent_df = self.intent_df.set_index('intent_id')
 48 | 
 49 |     def _get_embedding(self, command):
 50 |         command = re.sub('[^A-Za-z0-9 ]+', '', command).lower()
 51 |         return list(np.array(self.embedding_model.encode([command])[0]))
 52 | 
 53 |     def _get_intent(self, query, k_nearest=1):
 54 |         query_vector = np.array([self._get_embedding(query)]).astype(np.float32)
 55 |         faiss.normalize_L2(query_vector)
 56 |         similarities, similarities_ids = self.intent_index.search(query_vector, k_nearest)
 57 |         return similarities_ids[0][0], self.intent_df['code'][similarities_ids[0][0]]
 58 | 
 59 |     def generate_code(self, query, df_info_dict={}, debug=False):
 60 |         intent_id, intent_code = self._get_intent(query)
 61 |         if 0 <= intent_id < 10000:   # Existing
 62 |             doc = self.nlp(query)
 63 |             entities = {key: list(g) for key, g in groupby(sorted(doc.ents, key=lambda x: x.label_), lambda x: x.label_)}
 64 |             for entity, labels in entities.items():
 65 |                 intent_code = re.sub(fr'\${entity.lower()}', lambda _: next(iter(map(lambda x: x.text, labels))), intent_code)
 66 |         elif 10000 <= intent_id < 20000:   # Naas
 67 |             print("Nothing yet")
 68 | 
 69 |         return re.sub(r'\$\w+', 'xxx', intent_code)
 70 | 
 71 | 
 72 | print("*" * 20)
 73 | print("*" * 20)
 74 | print("Loading_jupyter_server_extension. First install will download SentenceTransformers, please wait...")
 75 | print("*" * 20)
 76 | print("*" * 20)
 77 | CG = CodeGenerator()
 78 | 
 79 | 
 80 | class JupyterText2CodeHandler(IPythonHandler, ABC):
 81 |     def __init__(self, application, request, **kwargs):
 82 |         super(JupyterText2CodeHandler, self).__init__(application, request, **kwargs)
 83 | 
 84 |     # TODO: Add logger
 85 |     def get(self):
 86 |         query = self.get_argument('query')
 87 | 
 88 |         try:
 89 |             status = "success"
 90 |             if query.lower() == 'help':
 91 |                 command = HELP_TEXT
 92 |             else:
 93 |                 df_info = self.get_argument('dataframes_info')
 94 |                 df_info_dict = json.loads(df_info[1:-1])
 95 |                 command = CG.generate_code(query, df_info_dict, debug=True)
 96 | 
 97 |             response = {"status": status, "message": command}
 98 |         except Exception as e:
 99 |             response = {"status": "error", "message": str(e)}
100 | 
101 |         response["message"] = f"#Query: {query}\n\n{response['message']}"
102 |         self.finish(json.dumps(response))
103 | 
104 | 
105 | def load_jupyter_server_extension(nb_server_app):
106 |     """
107 |     Called when the extension is loaded.
108 | 
109 |     Args:
110 |         nb_server_app (NotebookWebApplication): handle to the Notebook webserver instance.
111 |     """
112 |     web_app = nb_server_app.web_app
113 |     host_pattern = '.*$'
114 |     route_pattern = url_path_join(web_app.settings['base_url'], '/jupyter-text2code')
115 |     web_app.add_handlers(host_pattern, [(route_pattern, JupyterText2CodeHandler)])
116 |     print("loaded_jupyter_server_extension: jupyter-text2code")
117 | 


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/data/.gitkeep


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/data/ner_templates.csv:
--------------------------------------------------------------------------------
 1 | intent_id,template,code
 2 | 0,import $libname,import $libname
 3 | 1,import all libraries,"import pandas as pd
 4 | import numpy as np
 5 | import os
 6 | import plotly.express as px
 7 | import matplotlib.pyplot as plt
 8 | pd.options.plotting.backend = 'plotly'"
 9 | 2,load $fname,$varname = pd.read_csv('$fname')
10 | 2,load $fname in $varname,$varname = pd.read_csv('$fname')
11 | 3,show $cardinal rows from $varname,$varname.head($cardinal)
12 | 3,show $cardinal rows of $varname,$varname.head($cardinal)
13 | 3,print $cardinal rows from $varname,$varname.head($cardinal)
14 | 3,print $cardinal rows of $varname,$varname.head($cardinal)
15 | 3,print $varname head,$varname.head()
16 | 4,plot histogram of $colname column in $varname,$varname.plot.hist(x='$colname')
17 | 4,plot histogram of $colname in $varname,$varname.plot.hist(x='$colname')
18 | 4,draw histogram of $colname column in $varname,$varname.plot.hist(x='$colname')
19 | 4,get histogram of $colname in $varname,$varname.plot.hist(x='$colname')
20 | 5,get correlation matrix of $varname,$varname.corr()
21 | 6,print $varname shape,$varname.shape
22 | 6,print shape of $varname,$varname.shape
23 | 6,get size of $varname,$varname.shape
24 | 7,barplot $colname and $colname columns of $varname,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
25 | 7,plot $colname and $colname columns of $varname in a bar plot,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
26 | 7,bar plot $colname and $colname column of $varname,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
27 | 7,bar plot $colname and $colname in $varname,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
28 | 7,plot $colname and $colname of $varname in a bar plot,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
29 | 7,plot $colname $colname of $varname in a bar plot,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
30 | 7,show a bar plot with $colname on x axis over $colname in $varname,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
31 | 7,show a bar plot with $colname on x axis and $colname on y axis in $varname,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
32 | 7,show a bar plot with $colname on y axis and $colname on x axis,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
33 | 8,piechart of $colname column in $varname grouped by $colname column,"tmp = $varname['$colname'].value_counts(dropna=False)
34 | px.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')"
35 | 8,piechart of $colname in $varname grouped by $colname,"tmp = $varname['$colname'].value_counts(dropna=False)
36 | px.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')"
37 | 8,pie chart of $colname column of $varname grouped by $colname column,"tmp = $varname['$colname'].value_counts(dropna=False)
38 | px.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')"
39 | 8,pie chart of column $colname in $varname grouped by column $colname,"tmp = $varname['$colname'].value_counts(dropna=False)
40 | px.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')"
41 | 8,pie chart of $colname of $varname coloured by $colname,"tmp = $varname['$colname'].value_counts(dropna=False)
42 | px.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')"
43 | 9,install $libname,!pip install $libname
44 | 10,list columns of $varname,$varname.columns
45 | 10,list all columns of $varname,$varname.columns
46 | 11,describe $varname,$varname.describe()
47 | 12,group $varname by $colname and get $function of $colname,# Not supported in the current release :(
48 | 12,group the $varname by $colname and get $function of $colname,# Not supported in the current release :(
49 | 12,$varname group by $colname $function by $colname,# Not supported in the current release :(
50 | 12,find $function of $colname group by $colname from $varname,# Not supported in the current release :(
51 | 12,$function $colname group by $colname from $varname,# Not supported in the current release :(
52 | 12,$function $colname group by $colname from $varname,# Not supported in the current release :(
53 | 13,display a line plot showing $colname vs $colname in $varname,"$varname.plot.line(x='$colname', y='$colname', color=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
54 | 13,display a line plot showing $colname on y-axis and $colname on x-axis from $varname,"$varname.plot.line(x='$colname', y='$colname', color=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
55 | 13,display a line plot of $colname versus $colname in $varname,"$varname.plot.line(x='$colname', y='$colname', color=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
56 | 13,line plot of $colname and $colname in $varname,"$varname.plot.line(x='$colname', y='$colname', color=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
57 | 14,show a scatter plot of $colname over $colname in $varname,"$varname.plot.scatter(x='$colname', y='$colname', color=None, size=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
58 | 14,scatter plot of $colname and $colname in $varname,"$varname.plot.scatter(x='$colname', y='$colname', color=None, size=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
59 | 15,show a heatmap with $colname on x axis and $colname on y axis in $varname,"$varname.plot(kind='density_heatmap', x='$colname', y='$colname', title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
60 | 15,display a heatmap of $colname on y axis and $colname on x axis in $varname,"$varname.plot(kind='density_heatmap', x='$colname', y='$colname', title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
61 | 15,heatmap of $colname and $colname in $varname,"$varname.plot(kind='density_heatmap', x='$colname', y='$colname', title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
62 | 16,list all files in current directory,!ls .
63 | 17,switch to dark theme,"import plotly.io as pio
64 | pio.templates.default = 'plotly_dark'"
65 | 


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/.gitkeep


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/intent_index.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/intent_index.idx


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/config.cfg:
--------------------------------------------------------------------------------
  1 | [paths]
  2 | train = "corpus/train.spacy"
  3 | dev = "corpus/dev.spacy"
  4 | vectors = "en_core_web_sm"
  5 | init_tok2vec = null
  6 | 
  7 | [system]
  8 | gpu_allocator = null
  9 | seed = 0
 10 | 
 11 | [nlp]
 12 | lang = "en"
 13 | pipeline = ["tok2vec","ner"]
 14 | batch_size = 1000
 15 | disabled = []
 16 | before_creation = null
 17 | after_creation = null
 18 | after_pipeline_creation = null
 19 | tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
 20 | 
 21 | [components]
 22 | 
 23 | [components.ner]
 24 | factory = "ner"
 25 | incorrect_spans_key = null
 26 | moves = null
 27 | scorer = {"@scorers":"spacy.ner_scorer.v1"}
 28 | update_with_oracle_cut_size = 100
 29 | 
 30 | [components.ner.model]
 31 | @architectures = "spacy.TransitionBasedParser.v2"
 32 | state_type = "ner"
 33 | extra_state_tokens = false
 34 | hidden_width = 64
 35 | maxout_pieces = 2
 36 | use_upper = true
 37 | nO = null
 38 | 
 39 | [components.ner.model.tok2vec]
 40 | @architectures = "spacy.Tok2VecListener.v1"
 41 | width = ${components.tok2vec.model.encode.width}
 42 | upstream = "*"
 43 | 
 44 | [components.tok2vec]
 45 | factory = "tok2vec"
 46 | 
 47 | [components.tok2vec.model]
 48 | @architectures = "spacy.Tok2Vec.v2"
 49 | 
 50 | [components.tok2vec.model.embed]
 51 | @architectures = "spacy.MultiHashEmbed.v2"
 52 | width = ${components.tok2vec.model.encode.width}
 53 | attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
 54 | rows = [5000,2500,2500,2500]
 55 | include_static_vectors = true
 56 | 
 57 | [components.tok2vec.model.encode]
 58 | @architectures = "spacy.MaxoutWindowEncoder.v2"
 59 | width = 256
 60 | depth = 8
 61 | window_size = 1
 62 | maxout_pieces = 3
 63 | 
 64 | [corpora]
 65 | 
 66 | [corpora.dev]
 67 | @readers = "spacy.Corpus.v1"
 68 | path = ${paths.dev}
 69 | max_length = 0
 70 | gold_preproc = false
 71 | limit = 0
 72 | augmenter = null
 73 | 
 74 | [corpora.train]
 75 | @readers = "spacy.Corpus.v1"
 76 | path = ${paths.train}
 77 | max_length = 0
 78 | gold_preproc = false
 79 | limit = 0
 80 | augmenter = null
 81 | 
 82 | [training]
 83 | dev_corpus = "corpora.dev"
 84 | train_corpus = "corpora.train"
 85 | seed = ${system.seed}
 86 | gpu_allocator = ${system.gpu_allocator}
 87 | dropout = 0.1
 88 | accumulate_gradient = 1
 89 | patience = 1600
 90 | max_epochs = 0
 91 | max_steps = 20000
 92 | eval_frequency = 200
 93 | frozen_components = []
 94 | annotating_components = []
 95 | before_to_disk = null
 96 | 
 97 | [training.batcher]
 98 | @batchers = "spacy.batch_by_words.v1"
 99 | discard_oversize = false
100 | tolerance = 0.2
101 | get_length = null
102 | 
103 | [training.batcher.size]
104 | @schedules = "compounding.v1"
105 | start = 100
106 | stop = 1000
107 | compound = 1.001
108 | t = 0.0
109 | 
110 | [training.logger]
111 | @loggers = "spacy.ConsoleLogger.v1"
112 | progress_bar = false
113 | 
114 | [training.optimizer]
115 | @optimizers = "Adam.v1"
116 | beta1 = 0.9
117 | beta2 = 0.999
118 | L2_is_weight_decay = true
119 | L2 = 0.01
120 | grad_clip = 1.0
121 | use_averages = false
122 | eps = 0.00000001
123 | learn_rate = 0.001
124 | 
125 | [training.score_weights]
126 | ents_f = 1.0
127 | ents_p = 0.0
128 | ents_r = 0.0
129 | ents_per_type = null
130 | 
131 | [pretraining]
132 | 
133 | [initialize]
134 | vectors = ${paths.vectors}
135 | init_tok2vec = ${paths.init_tok2vec}
136 | vocab_data = null
137 | lookups = null
138 | before_init = null
139 | after_init = null
140 | 
141 | [initialize.components]
142 | 
143 | [initialize.tokenizer]


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/meta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "lang":"en",
 3 |   "name":"pipeline",
 4 |   "version":"0.0.0",
 5 |   "spacy_version":">=3.3.0,<3.4.0",
 6 |   "description":"",
 7 |   "author":"",
 8 |   "email":"",
 9 |   "url":"",
10 |   "license":"",
11 |   "spacy_git_version":"497a708c7",
12 |   "vectors":{
13 |     "width":0,
14 |     "vectors":0,
15 |     "keys":0,
16 |     "name":null,
17 |     "mode":"default"
18 |   },
19 |   "labels":{
20 |     "tok2vec":[
21 | 
22 |     ],
23 |     "ner":[
24 |       "CARDINAL",
25 |       "COLNAME",
26 |       "FNAME",
27 |       "FUNCTION",
28 |       "LIBNAME",
29 |       "VARNAME"
30 |     ]
31 |   },
32 |   "pipeline":[
33 |     "tok2vec",
34 |     "ner"
35 |   ],
36 |   "components":[
37 |     "tok2vec",
38 |     "ner"
39 |   ],
40 |   "disabled":[
41 | 
42 |   ],
43 |   "performance":{
44 |     "ents_f":1.0,
45 |     "ents_p":1.0,
46 |     "ents_r":1.0,
47 |     "ents_per_type":{
48 |       "COLNAME":{
49 |         "p":1.0,
50 |         "r":1.0,
51 |         "f":1.0
52 |       },
53 |       "VARNAME":{
54 |         "p":1.0,
55 |         "r":1.0,
56 |         "f":1.0
57 |       },
58 |       "CARDINAL":{
59 |         "p":1.0,
60 |         "r":1.0,
61 |         "f":1.0
62 |       },
63 |       "FUNCTION":{
64 |         "p":1.0,
65 |         "r":1.0,
66 |         "f":1.0
67 |       },
68 |       "FNAME":{
69 |         "p":1.0,
70 |         "r":1.0,
71 |         "f":1.0
72 |       },
73 |       "LIBNAME":{
74 |         "p":1.0,
75 |         "r":1.0,
76 |         "f":1.0
77 |       }
78 |     },
79 |     "tok2vec_loss":6.8340121594,
80 |     "ner_loss":1014.5475574388
81 |   }
82 | }


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/ner/cfg:
--------------------------------------------------------------------------------
 1 | {
 2 |   "moves":null,
 3 |   "update_with_oracle_cut_size":100,
 4 |   "multitasks":[
 5 | 
 6 |   ],
 7 |   "min_action_freq":1,
 8 |   "learn_tokens":false,
 9 |   "beam_width":1,
10 |   "beam_density":0.0,
11 |   "beam_update_prob":0.0,
12 |   "incorrect_spans_key":null
13 | }


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/ner/model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/ner/model


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/ner/moves:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/ner/moves


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/tok2vec/cfg:
--------------------------------------------------------------------------------
1 | {
2 | 
3 | }


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/tok2vec/model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/tok2vec/model


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/tokenizer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/tokenizer


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/key2row:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/key2row


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/lookups.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/lookups.bin


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/vectors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/vectors


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/vectors.cfg:
--------------------------------------------------------------------------------
1 | {
2 |   "mode":"default"
3 | }


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/config.cfg:
--------------------------------------------------------------------------------
  1 | [paths]
  2 | train = "corpus/train.spacy"
  3 | dev = "corpus/dev.spacy"
  4 | vectors = "en_core_web_sm"
  5 | init_tok2vec = null
  6 | 
  7 | [system]
  8 | gpu_allocator = null
  9 | seed = 0
 10 | 
 11 | [nlp]
 12 | lang = "en"
 13 | pipeline = ["tok2vec","ner"]
 14 | batch_size = 1000
 15 | disabled = []
 16 | before_creation = null
 17 | after_creation = null
 18 | after_pipeline_creation = null
 19 | tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
 20 | 
 21 | [components]
 22 | 
 23 | [components.ner]
 24 | factory = "ner"
 25 | incorrect_spans_key = null
 26 | moves = null
 27 | scorer = {"@scorers":"spacy.ner_scorer.v1"}
 28 | update_with_oracle_cut_size = 100
 29 | 
 30 | [components.ner.model]
 31 | @architectures = "spacy.TransitionBasedParser.v2"
 32 | state_type = "ner"
 33 | extra_state_tokens = false
 34 | hidden_width = 64
 35 | maxout_pieces = 2
 36 | use_upper = true
 37 | nO = null
 38 | 
 39 | [components.ner.model.tok2vec]
 40 | @architectures = "spacy.Tok2VecListener.v1"
 41 | width = ${components.tok2vec.model.encode.width}
 42 | upstream = "*"
 43 | 
 44 | [components.tok2vec]
 45 | factory = "tok2vec"
 46 | 
 47 | [components.tok2vec.model]
 48 | @architectures = "spacy.Tok2Vec.v2"
 49 | 
 50 | [components.tok2vec.model.embed]
 51 | @architectures = "spacy.MultiHashEmbed.v2"
 52 | width = ${components.tok2vec.model.encode.width}
 53 | attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
 54 | rows = [5000,2500,2500,2500]
 55 | include_static_vectors = true
 56 | 
 57 | [components.tok2vec.model.encode]
 58 | @architectures = "spacy.MaxoutWindowEncoder.v2"
 59 | width = 256
 60 | depth = 8
 61 | window_size = 1
 62 | maxout_pieces = 3
 63 | 
 64 | [corpora]
 65 | 
 66 | [corpora.dev]
 67 | @readers = "spacy.Corpus.v1"
 68 | path = ${paths.dev}
 69 | max_length = 0
 70 | gold_preproc = false
 71 | limit = 0
 72 | augmenter = null
 73 | 
 74 | [corpora.train]
 75 | @readers = "spacy.Corpus.v1"
 76 | path = ${paths.train}
 77 | max_length = 0
 78 | gold_preproc = false
 79 | limit = 0
 80 | augmenter = null
 81 | 
 82 | [training]
 83 | dev_corpus = "corpora.dev"
 84 | train_corpus = "corpora.train"
 85 | seed = ${system.seed}
 86 | gpu_allocator = ${system.gpu_allocator}
 87 | dropout = 0.1
 88 | accumulate_gradient = 1
 89 | patience = 1600
 90 | max_epochs = 0
 91 | max_steps = 20000
 92 | eval_frequency = 200
 93 | frozen_components = []
 94 | annotating_components = []
 95 | before_to_disk = null
 96 | 
 97 | [training.batcher]
 98 | @batchers = "spacy.batch_by_words.v1"
 99 | discard_oversize = false
100 | tolerance = 0.2
101 | get_length = null
102 | 
103 | [training.batcher.size]
104 | @schedules = "compounding.v1"
105 | start = 100
106 | stop = 1000
107 | compound = 1.001
108 | t = 0.0
109 | 
110 | [training.logger]
111 | @loggers = "spacy.ConsoleLogger.v1"
112 | progress_bar = false
113 | 
114 | [training.optimizer]
115 | @optimizers = "Adam.v1"
116 | beta1 = 0.9
117 | beta2 = 0.999
118 | L2_is_weight_decay = true
119 | L2 = 0.01
120 | grad_clip = 1.0
121 | use_averages = false
122 | eps = 0.00000001
123 | learn_rate = 0.001
124 | 
125 | [training.score_weights]
126 | ents_f = 1.0
127 | ents_p = 0.0
128 | ents_r = 0.0
129 | ents_per_type = null
130 | 
131 | [pretraining]
132 | 
133 | [initialize]
134 | vectors = ${paths.vectors}
135 | init_tok2vec = ${paths.init_tok2vec}
136 | vocab_data = null
137 | lookups = null
138 | before_init = null
139 | after_init = null
140 | 
141 | [initialize.components]
142 | 
143 | [initialize.tokenizer]


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/meta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "lang":"en",
 3 |   "name":"pipeline",
 4 |   "version":"0.0.0",
 5 |   "spacy_version":">=3.3.0,<3.4.0",
 6 |   "description":"",
 7 |   "author":"",
 8 |   "email":"",
 9 |   "url":"",
10 |   "license":"",
11 |   "spacy_git_version":"497a708c7",
12 |   "vectors":{
13 |     "width":0,
14 |     "vectors":0,
15 |     "keys":0,
16 |     "name":null,
17 |     "mode":"default"
18 |   },
19 |   "labels":{
20 |     "tok2vec":[
21 | 
22 |     ],
23 |     "ner":[
24 |       "CARDINAL",
25 |       "COLNAME",
26 |       "FNAME",
27 |       "FUNCTION",
28 |       "LIBNAME",
29 |       "VARNAME"
30 |     ]
31 |   },
32 |   "pipeline":[
33 |     "tok2vec",
34 |     "ner"
35 |   ],
36 |   "components":[
37 |     "tok2vec",
38 |     "ner"
39 |   ],
40 |   "disabled":[
41 | 
42 |   ],
43 |   "performance":{
44 |     "ents_f":0.9992156863,
45 |     "ents_p":1.0,
46 |     "ents_r":0.9984326019,
47 |     "ents_per_type":{
48 |       "COLNAME":{
49 |         "p":1.0,
50 |         "r":0.9970457903,
51 |         "f":0.9985207101
52 |       },
53 |       "VARNAME":{
54 |         "p":1.0,
55 |         "r":1.0,
56 |         "f":1.0
57 |       },
58 |       "CARDINAL":{
59 |         "p":1.0,
60 |         "r":1.0,
61 |         "f":1.0
62 |       },
63 |       "FUNCTION":{
64 |         "p":1.0,
65 |         "r":1.0,
66 |         "f":1.0
67 |       },
68 |       "FNAME":{
69 |         "p":1.0,
70 |         "r":1.0,
71 |         "f":1.0
72 |       },
73 |       "LIBNAME":{
74 |         "p":1.0,
75 |         "r":1.0,
76 |         "f":1.0
77 |       }
78 |     },
79 |     "tok2vec_loss":0.0000003981,
80 |     "ner_loss":0.0000006017
81 |   }
82 | }


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/ner/cfg:
--------------------------------------------------------------------------------
 1 | {
 2 |   "moves":null,
 3 |   "update_with_oracle_cut_size":100,
 4 |   "multitasks":[
 5 | 
 6 |   ],
 7 |   "min_action_freq":1,
 8 |   "learn_tokens":false,
 9 |   "beam_width":1,
10 |   "beam_density":0.0,
11 |   "beam_update_prob":0.0,
12 |   "incorrect_spans_key":null
13 | }


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/ner/model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/ner/model


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/ner/moves:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/ner/moves


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/tok2vec/cfg:
--------------------------------------------------------------------------------
1 | {
2 | 
3 | }


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/tok2vec/model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/tok2vec/model


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/tokenizer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/tokenizer


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/key2row:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/key2row


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/lookups.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/lookups.bin


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/vectors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/vectors


--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/vectors.cfg:
--------------------------------------------------------------------------------
1 | {
2 |   "mode":"default"
3 | }


--------------------------------------------------------------------------------
/jupyter_text2code/main.js:
--------------------------------------------------------------------------------
  1 | define([
  2 |     'jquery',
  3 |     'require',
  4 |     'base/js/namespace',
  5 |     'base/js/dialog',
  6 |     'base/js/events',
  7 | ], function (
  8 |     $,
  9 |     requirejs,
 10 |     Jupyter,
 11 |     dialog,
 12 |     events
 13 | ) {
 14 |     "use strict";
 15 | 
 16 |     var jupyter_text2code_lib = {}
 17 |     jupyter_text2code_lib.code_init = "";
 18 | 
 19 |     // define default values for config parameters
 20 |     var params = {
 21 |         jupyter_text2code_it_default_to_public: false,
 22 |     };
 23 | 
 24 |     var extension_state = {
 25 |         is_open: false,
 26 |         data: {
 27 |             query: "import all libraries",
 28 |             history: [],
 29 |             presets: [
 30 |                 "help",
 31 |                 "use dark theme",
 32 |                 "import all libraries",
 33 |                 "load (xxx.csv) in (df)",
 34 |                 "pie plot of (column) in (df)",
 35 |                 "bar plot of columns (column) & (column) in (df)",
 36 |                 "list all columns of (df)",
 37 |                 "show (x) rows of (df)"
 38 |             ]
 39 |         }
 40 |     };
 41 | 
 42 |     function code_exec_callback(query, response) {
 43 |         var generated_code = JSON.parse(response)["message"]
 44 | 
 45 |         extension_state.data.history.push({"query": query, "code": generated_code});
 46 |         update_history_display(query);
 47 | 
 48 |         var cur_cell = Jupyter.notebook.get_selected_cell();
 49 |         if (cur_cell.get_text() == ""){
 50 |             var command_cell = cur_cell;
 51 |         }else{
 52 |             var command_cell = Jupyter.notebook.insert_cell_below('code');
 53 |         }
 54 |         command_cell.select();
 55 |         command_cell.set_text(generated_code);
 56 |         command_cell.execute();
 57 |         Jupyter.notebook.insert_cell_below();
 58 |         Jupyter.notebook.select_next();
 59 |     }
 60 | 
 61 |     function jupyter_text2code_lib_callback(out_data) {
 62 |         if (out_data.msg_type === "execute_result"){
 63 |             var query = $("#jupyter_text2code_query").val();
 64 |             $.get({
 65 |                 url: '/jupyter-text2code',
 66 |                 data: {"query": query, "dataframes_info": out_data.content.data['text/plain']},
 67 |                 beforeSend: function(){
 68 |                     $("#jupyter_text2code_loader").show();
 69 |                 },
 70 |                 success: function(response) {
 71 |                     code_exec_callback(query, response);
 72 |                 },
 73 |                 error: handle_jupyter_text2code_error,
 74 |                 complete: function(){
 75 |                     $("#jupyter_text2code_loader").hide();
 76 |                 },
 77 |             });
 78 |         }
 79 |     }
 80 | 
 81 |     function read_code_init(lib) {
 82 |         var libName = Jupyter.notebook.base_url + "nbextensions/jupyter-text2code/" + lib;
 83 |         $.get(libName).done(function(data) {
 84 |             jupyter_text2code_lib.code_init = data;
 85 |              requirejs(
 86 |              [],
 87 |             function() {
 88 |                 Jupyter.notebook.kernel.execute(jupyter_text2code_lib.code_init, { iopub: { output: jupyter_text2code_lib_callback } }, { silent: false });
 89 |             })
 90 |             console.log(libName + ' loaded library');
 91 |         }).fail(function() {
 92 |             console.log(libName + 'failed to load ' + lib + ' library')
 93 |         });
 94 |     }
 95 | 
 96 |     var initialize = function () {
 97 |         Jupyter.toolbar.add_buttons_group([
 98 |             Jupyter.keyboard_manager.actions.register ({
 99 |                 help   : 'Launch jupyter-text2code',
100 |                 icon   : 'fa-terminal',
101 |                 handler: toggle_jupyter_text2code_editor
102 |             }, 'create-jupyter-text2code-from-notebook', 'Text2Code')
103 |         ]);
104 |         read_code_init("jupyter_text2code_lib.py");
105 |     };
106 | 
107 |     function toggle_jupyter_text2code_editor() {
108 |         if(extension_state.is_open) {
109 |             extension_state.is_open = false;
110 |             $(".jupyter_text2code_editor_display").hide();
111 |         }
112 |         else {
113 |             if($('#jupyter_text2code_editor').length == 0) {
114 |                 build_jupyter_text2code_editor();
115 |             }
116 |             extension_state.is_open = true;
117 |             $(".jupyter_text2code_editor_display").show();
118 |         }
119 |     }
120 | 
121 |     function build_alert(alert_class) {
122 |         return $('<div/>')
123 |             .addClass('alert alert-dismissable')
124 |             .addClass(alert_class)
125 |             .append(
126 |                 $('<button class="close" type="button" data-dismiss="alert" aria-label="Close"/>')
127 |                     .append($('<span aria-hidden="true"/>').html('&times;'))
128 |             );
129 |     }
130 | 
131 |     function handle_jupyter_text2code_error(jqXHR, textStatus, errorThrown) {
132 |         console.log('jupyter_text2code ajax error:', jqXHR, textStatus, errorThrown);
133 |         var alert = build_alert('alert-danger')
134 |             .hide()
135 |             .append(
136 |                 $('<p/>').text('Error:')
137 |             )
138 |             .append(
139 |                 $('<pre/>').text(jqXHR.responseJSON ? JSON.stringify(jqXHR.responseJSON, null, 2) : errorThrown)
140 |             );
141 |         $('#jupyter_text2code_modal').find('.modal-body').append(alert);
142 |         alert.slideDown('fast');
143 |     }
144 | 
145 | 
146 |     function add_presets(jupyter_text2code_editor) {
147 | 
148 |         var jupyter_text2code_preset = jupyter_text2code_editor.find('#jupyter_text2code_preset_content');
149 |         extension_state.data.presets.forEach(function(item, index) {
150 |             jupyter_text2code_preset.append("<div class='jupyter_text2code_preset_item'>"+ item + "</div>");
151 |         });
152 |         return jupyter_text2code_editor;
153 |     }
154 | 
155 |     function update_history_display(query) {
156 |         var jupyter_text2code_history = $('#jupyter_text2code_history');
157 |             jupyter_text2code_history.prepend("<div class='jupyter_text2code_history_item'>"+ query + "</div>");
158 |     }
159 | 
160 |     function build_jupyter_text2code_editor () {
161 |         var jupyter_text2code_editor = $('<div/>').attr('id', 'jupyter_text2code_editor').attr('class', 'jupyter_text2code_editor_display');
162 |         var jupyter_text2code_editor_history = $('<div/>').attr('id', 'jupyter_text2code_editor_history').attr('class', 'jupyter_text2code_editor_display');
163 | 
164 |         var textArea = $('<textarea id="jupyter_text2code_query"  class="form-control" />').val(extension_state.data.query).addClass('form-control');
165 | 
166 |         jupyter_text2code_editor
167 |             .append("<div class='jupyter_text2code_what_heading'>What do you want to do?</div>")
168 |             .append(textArea)
169 |             .append("<button class='btn-primary' id='jupyter_text2code_submit'>Text2Code</button>")
170 |             .append("<button id='jupyter_text2code_close'> Close </button>")
171 |             .append("<div id='jupyter_text2code_loader' class='fa fa-spinner fa-spin fa-3x jupyter_text2code_spinner' style='display: none;'></div>");
172 | 
173 |         // History section
174 |         jupyter_text2code_editor_history.append(""
175 |            + "<div class='jupyter_text2code_sub_heading'>Command History:</div> <div id='jupyter_text2code_history_wrapper'><div id='jupyter_text2code_history'> </div></div>"
176 |            + "<hr><div class='jupyter_text2code_sub_heading'>Presets:</div><div id='jupyter_text2code_preset_wrapper'><div id='jupyter_text2code_preset_content'></div></div>"
177 |         );
178 | 
179 |         jupyter_text2code_editor_history = add_presets(jupyter_text2code_editor_history);
180 | 
181 |         // Close button click event handler
182 |         $('body').on('click', '#jupyter_text2code_close', function() {
183 |             extension_state.is_open = false;
184 |             $(".jupyter_text2code_editor_display").hide();
185 |         });
186 |         // jupyter_text2code button click event handler
187 |         $('body').on('click', '#jupyter_text2code_submit', function() {
188 |             make_jupyter_text2code();
189 |         });
190 | 
191 | 
192 |         // Disable jupyter shortcuts while query is being typed(to avoid them from triggering)
193 |         $('body').on('focus', '#jupyter_text2code_query', function() {
194 |             Jupyter.keyboard_manager.disable();
195 |         });
196 |         $('body').on('blur', '#jupyter_text2code_query', function() {
197 |             Jupyter.keyboard_manager.enable();
198 |         });
199 | 
200 |         // Handler for clicking history item
201 |         $('body').on('click', '.jupyter_text2code_history_item', function() {
202 |             $("#jupyter_text2code_query").val($(this).text());
203 |         });
204 |         // Handler for clicking preset item
205 |         $('body').on('click', '.jupyter_text2code_preset_item', function() {
206 |             $("#jupyter_text2code_query").val($(this).text());
207 |         });
208 |         
209 |         $("#notebook-container").append(jupyter_text2code_editor);
210 |         $("body").append(jupyter_text2code_editor_history);
211 |     }
212 | 
213 |     var make_jupyter_text2code = function make_jupyter_text2code() {
214 |         var jupyter_text2code_lib_cmd = "dataframes_info()";
215 |         requirejs([],
216 |             function() {
217 |                 Jupyter.notebook.kernel.execute(
218 |                     jupyter_text2code_lib_cmd, { iopub: { output: jupyter_text2code_lib_callback } }, { silent: false }
219 |                 );
220 |             });
221 |     };
222 | 
223 |     function load_jupyter_extension () {
224 |         var link = document.createElement("link");
225 |         link.type = "text/css";
226 |         link.rel = "stylesheet";
227 |         link.href = requirejs.toUrl("./jupyter_text2code.css");
228 |         document.getElementsByTagName("head")[0].appendChild(link);
229 | 
230 |         // load when the kernel's ready
231 |         if (Jupyter.notebook.kernel) {
232 |           initialize();
233 |         } else {
234 |           events.on('kernel_ready.Kernel', initialize);
235 |         }
236 |         // return Jupyter.notebook.config.loaded.then(initialize);
237 |     }
238 | 
239 |     return {
240 |         load_jupyter_extension: load_jupyter_extension,
241 |         load_ipython_extension: load_jupyter_extension
242 |     };
243 | });
244 | 


--------------------------------------------------------------------------------
/notebooks/Code Generator.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "%load_ext autoreload\n",
  10 |     "%autoreload 2"
  11 |    ]
  12 |   },
  13 |   {
  14 |    "cell_type": "code",
  15 |    "execution_count": 2,
  16 |    "metadata": {},
  17 |    "outputs": [
  18 |     {
  19 |      "name": "stdout",
  20 |      "output_type": "stream",
  21 |      "text": [
  22 |       "********************\n",
  23 |       "********************\n",
  24 |       "loading_jupyter_server_extension: jupyter-text2code. First install will download universal-sentence-encoder, please wait...\n",
  25 |       "********************\n",
  26 |       "********************\n"
  27 |      ]
  28 |     }
  29 |    ],
  30 |    "source": [
  31 |     "import sys\n",
  32 |     "sys.path.insert(0,'../')\n",
  33 |     "from jupyter_text2code.jupyter_text2code_serverextension import CodeGenerator"
  34 |    ]
  35 |   },
  36 |   {
  37 |    "cell_type": "code",
  38 |    "execution_count": 3,
  39 |    "metadata": {},
  40 |    "outputs": [],
  41 |    "source": [
  42 |     "cg = CodeGenerator()"
  43 |    ]
  44 |   },
  45 |   {
  46 |    "cell_type": "markdown",
  47 |    "metadata": {},
  48 |    "source": [
  49 |     "### Intent 1 : import x"
  50 |    ]
  51 |   },
  52 |   {
  53 |    "cell_type": "code",
  54 |    "execution_count": 4,
  55 |    "metadata": {},
  56 |    "outputs": [
  57 |     {
  58 |      "data": {
  59 |       "text/plain": [
  60 |        "'import spacy'"
  61 |       ]
  62 |      },
  63 |      "execution_count": 4,
  64 |      "metadata": {},
  65 |      "output_type": "execute_result"
  66 |     }
  67 |    ],
  68 |    "source": [
  69 |     "cg.generate_code(\"import spacy\")"
  70 |    ]
  71 |   },
  72 |   {
  73 |    "cell_type": "code",
  74 |    "execution_count": 5,
  75 |    "metadata": {},
  76 |    "outputs": [
  77 |     {
  78 |      "data": {
  79 |       "text/plain": [
  80 |        "'import pandas'"
  81 |       ]
  82 |      },
  83 |      "execution_count": 5,
  84 |      "metadata": {},
  85 |      "output_type": "execute_result"
  86 |     }
  87 |    ],
  88 |    "source": [
  89 |     "cg.generate_code(\"import pandas\")"
  90 |    ]
  91 |   },
  92 |   {
  93 |    "cell_type": "markdown",
  94 |    "metadata": {},
  95 |    "source": [
  96 |     "### Intent2:  import all libs"
  97 |    ]
  98 |   },
  99 |   {
 100 |    "cell_type": "code",
 101 |    "execution_count": 6,
 102 |    "metadata": {},
 103 |    "outputs": [
 104 |     {
 105 |      "name": "stdout",
 106 |      "output_type": "stream",
 107 |      "text": [
 108 |       "\n",
 109 |       "import pandas as pd\n",
 110 |       "import numpy as np\n",
 111 |       "import os\n",
 112 |       "import plotly.express as px\n",
 113 |       "import matplotlib.pyplot as plt\n",
 114 |       "pd.options.plotting.backend = 'plotly'\n",
 115 |       "        \n"
 116 |      ]
 117 |     }
 118 |    ],
 119 |    "source": [
 120 |     "print(cg.generate_code(\"import all libraries\"))"
 121 |    ]
 122 |   },
 123 |   {
 124 |    "cell_type": "markdown",
 125 |    "metadata": {},
 126 |    "source": [
 127 |     "### Intent3: install lib"
 128 |    ]
 129 |   },
 130 |   {
 131 |    "cell_type": "code",
 132 |    "execution_count": 7,
 133 |    "metadata": {},
 134 |    "outputs": [
 135 |     {
 136 |      "data": {
 137 |       "text/plain": [
 138 |        "'!pip install matplotlib'"
 139 |       ]
 140 |      },
 141 |      "execution_count": 7,
 142 |      "metadata": {},
 143 |      "output_type": "execute_result"
 144 |     }
 145 |    ],
 146 |    "source": [
 147 |     "cg.generate_code(\"install matplotlib\")"
 148 |    ]
 149 |   },
 150 |   {
 151 |    "cell_type": "code",
 152 |    "execution_count": 8,
 153 |    "metadata": {},
 154 |    "outputs": [
 155 |     {
 156 |      "data": {
 157 |       "text/plain": [
 158 |        "'!pip install scikitlearn'"
 159 |       ]
 160 |      },
 161 |      "execution_count": 8,
 162 |      "metadata": {},
 163 |      "output_type": "execute_result"
 164 |     }
 165 |    ],
 166 |    "source": [
 167 |     "cg.generate_code(\"install scikitlearn\")"
 168 |    ]
 169 |   },
 170 |   {
 171 |    "cell_type": "markdown",
 172 |    "metadata": {},
 173 |    "source": [
 174 |     "### Intent4: Load file into df"
 175 |    ]
 176 |   },
 177 |   {
 178 |    "cell_type": "code",
 179 |    "execution_count": 9,
 180 |    "metadata": {},
 181 |    "outputs": [
 182 |     {
 183 |      "data": {
 184 |       "text/plain": [
 185 |        "\"xxx = pd.read_csv('train.csv')\""
 186 |       ]
 187 |      },
 188 |      "execution_count": 9,
 189 |      "metadata": {},
 190 |      "output_type": "execute_result"
 191 |     }
 192 |    ],
 193 |    "source": [
 194 |     "cg.generate_code(\"load train.csv\")"
 195 |    ]
 196 |   },
 197 |   {
 198 |    "cell_type": "code",
 199 |    "execution_count": 10,
 200 |    "metadata": {},
 201 |    "outputs": [
 202 |     {
 203 |      "name": "stdout",
 204 |      "output_type": "stream",
 205 |      "text": [
 206 |       "Intent: Load file into a dataframe  Intent_id: 2  Similarity 0.88224137\n",
 207 |       "Entities:\n",
 208 |       "train.csv 5 14 FNAME\n",
 209 |       "zzzz 18 22 VARNAME\n",
 210 |       "----------\n"
 211 |      ]
 212 |     },
 213 |     {
 214 |      "data": {
 215 |       "text/plain": [
 216 |        "\"zzzz = pd.read_csv('train.csv')\""
 217 |       ]
 218 |      },
 219 |      "execution_count": 10,
 220 |      "metadata": {},
 221 |      "output_type": "execute_result"
 222 |     }
 223 |    ],
 224 |    "source": [
 225 |     "cg.generate_code(\"load train.csv in zzzz\", debug=True)"
 226 |    ]
 227 |   },
 228 |   {
 229 |    "cell_type": "markdown",
 230 |    "metadata": {},
 231 |    "source": [
 232 |     "### Intent5: Show x rows from df"
 233 |    ]
 234 |   },
 235 |   {
 236 |    "cell_type": "code",
 237 |    "execution_count": 11,
 238 |    "metadata": {},
 239 |    "outputs": [
 240 |     {
 241 |      "name": "stdout",
 242 |      "output_type": "stream",
 243 |      "text": [
 244 |       "Intent: Show x rows from df  Intent_id: 3  Similarity 0.7455359\n",
 245 |       "Entities:\n",
 246 |       "df 10 12 VARNAME\n",
 247 |       "----------\n"
 248 |      ]
 249 |     },
 250 |     {
 251 |      "data": {
 252 |       "text/plain": [
 253 |        "'df.head()'"
 254 |       ]
 255 |      },
 256 |      "execution_count": 11,
 257 |      "metadata": {},
 258 |      "output_type": "execute_result"
 259 |     }
 260 |    ],
 261 |    "source": [
 262 |     "cg.generate_code(\"Show from df\", debug=True)"
 263 |    ]
 264 |   },
 265 |   {
 266 |    "cell_type": "code",
 267 |    "execution_count": 12,
 268 |    "metadata": {},
 269 |    "outputs": [
 270 |     {
 271 |      "data": {
 272 |       "text/plain": [
 273 |        "\"#Couldn't extract variable name, replacing with default\\nxxx.head()\""
 274 |       ]
 275 |      },
 276 |      "execution_count": 12,
 277 |      "metadata": {},
 278 |      "output_type": "execute_result"
 279 |     }
 280 |    ],
 281 |    "source": [
 282 |     "cg.generate_code(\"Show alldf\")"
 283 |    ]
 284 |   },
 285 |   {
 286 |    "cell_type": "code",
 287 |    "execution_count": 13,
 288 |    "metadata": {},
 289 |    "outputs": [
 290 |     {
 291 |      "name": "stdout",
 292 |      "output_type": "stream",
 293 |      "text": [
 294 |       "Intent: Show x rows from df  Intent_id: 3  Similarity 1.0\n",
 295 |       "Entities:\n",
 296 |       "5 5 6 CARDINAL\n",
 297 |       "df 17 19 VARNAME\n",
 298 |       "----------\n"
 299 |      ]
 300 |     },
 301 |     {
 302 |      "data": {
 303 |       "text/plain": [
 304 |        "'df.head(5)'"
 305 |       ]
 306 |      },
 307 |      "execution_count": 13,
 308 |      "metadata": {},
 309 |      "output_type": "execute_result"
 310 |     }
 311 |    ],
 312 |    "source": [
 313 |     "cg.generate_code(\"Show 5 rows from df\", debug=True)"
 314 |    ]
 315 |   },
 316 |   {
 317 |    "cell_type": "code",
 318 |    "execution_count": 14,
 319 |    "metadata": {},
 320 |    "outputs": [
 321 |     {
 322 |      "data": {
 323 |       "text/plain": [
 324 |        "'df.head(20)'"
 325 |       ]
 326 |      },
 327 |      "execution_count": 14,
 328 |      "metadata": {},
 329 |      "output_type": "execute_result"
 330 |     }
 331 |    ],
 332 |    "source": [
 333 |     "cg.generate_code(\"Show 20 rows of df\")"
 334 |    ]
 335 |   },
 336 |   {
 337 |    "cell_type": "markdown",
 338 |    "metadata": {},
 339 |    "source": [
 340 |     "### Intent6: List columns of df"
 341 |    ]
 342 |   },
 343 |   {
 344 |    "cell_type": "code",
 345 |    "execution_count": 15,
 346 |    "metadata": {},
 347 |    "outputs": [
 348 |     {
 349 |      "data": {
 350 |       "text/plain": [
 351 |        "'mydf.columns'"
 352 |       ]
 353 |      },
 354 |      "execution_count": 15,
 355 |      "metadata": {},
 356 |      "output_type": "execute_result"
 357 |     }
 358 |    ],
 359 |    "source": [
 360 |     "cg.generate_code(\"list all columns of mydf\")"
 361 |    ]
 362 |   },
 363 |   {
 364 |    "cell_type": "markdown",
 365 |    "metadata": {},
 366 |    "source": [
 367 |     "### Intent7: Describe the df"
 368 |    ]
 369 |   },
 370 |   {
 371 |    "cell_type": "code",
 372 |    "execution_count": 16,
 373 |    "metadata": {},
 374 |    "outputs": [
 375 |     {
 376 |      "data": {
 377 |       "text/plain": [
 378 |        "'df.describe()'"
 379 |       ]
 380 |      },
 381 |      "execution_count": 16,
 382 |      "metadata": {},
 383 |      "output_type": "execute_result"
 384 |     }
 385 |    ],
 386 |    "source": [
 387 |     "cg.generate_code(\"Describe df\")"
 388 |    ]
 389 |   },
 390 |   {
 391 |    "cell_type": "code",
 392 |    "execution_count": 17,
 393 |    "metadata": {},
 394 |    "outputs": [
 395 |     {
 396 |      "data": {
 397 |       "text/plain": [
 398 |        "'mydf.describe()'"
 399 |       ]
 400 |      },
 401 |      "execution_count": 17,
 402 |      "metadata": {},
 403 |      "output_type": "execute_result"
 404 |     }
 405 |    ],
 406 |    "source": [
 407 |     "cg.generate_code(\"Describe mydf\")"
 408 |    ]
 409 |   },
 410 |   {
 411 |    "cell_type": "markdown",
 412 |    "metadata": {},
 413 |    "source": [
 414 |     "### Intent8: Plot histogram of column"
 415 |    ]
 416 |   },
 417 |   {
 418 |    "cell_type": "code",
 419 |    "execution_count": 18,
 420 |    "metadata": {},
 421 |    "outputs": [
 422 |     {
 423 |      "name": "stdout",
 424 |      "output_type": "stream",
 425 |      "text": [
 426 |       "Intent: histogram of column  Intent_id: 4  Similarity 0.9223975\n",
 427 |       "Entities:\n",
 428 |       "rainfall 18 26 COLNAME\n",
 429 |       "df 37 39 VARNAME\n",
 430 |       "----------\n"
 431 |      ]
 432 |     },
 433 |     {
 434 |      "data": {
 435 |       "text/plain": [
 436 |        "'df.plot.hist(x=[\"rainfall\"])'"
 437 |       ]
 438 |      },
 439 |      "execution_count": 18,
 440 |      "metadata": {},
 441 |      "output_type": "execute_result"
 442 |     }
 443 |    ],
 444 |    "source": [
 445 |     "cg.generate_code(\"Plot histogram of rainfall column in df\", debug=True)"
 446 |    ]
 447 |   },
 448 |   {
 449 |    "cell_type": "code",
 450 |    "execution_count": 19,
 451 |    "metadata": {},
 452 |    "outputs": [
 453 |     {
 454 |      "name": "stdout",
 455 |      "output_type": "stream",
 456 |      "text": [
 457 |       "Intent: histogram of column  Intent_id: 4  Similarity 0.9799768\n",
 458 |       "Entities:\n",
 459 |       "xzcx 18 22 COLNAME\n",
 460 |       "df 33 35 VARNAME\n",
 461 |       "----------\n"
 462 |      ]
 463 |     },
 464 |     {
 465 |      "data": {
 466 |       "text/plain": [
 467 |        "'df.plot.hist(x=[\"xzcx\"])'"
 468 |       ]
 469 |      },
 470 |      "execution_count": 19,
 471 |      "metadata": {},
 472 |      "output_type": "execute_result"
 473 |     }
 474 |    ],
 475 |    "source": [
 476 |     "cg.generate_code(\"Plot histogram of xzcx column in df\", debug=True)"
 477 |    ]
 478 |   },
 479 |   {
 480 |    "cell_type": "code",
 481 |    "execution_count": 20,
 482 |    "metadata": {},
 483 |    "outputs": [
 484 |     {
 485 |      "name": "stdout",
 486 |      "output_type": "stream",
 487 |      "text": [
 488 |       "Intent: histogram of column  Intent_id: 4  Similarity 0.91642696\n",
 489 |       "Entities:\n",
 490 |       "age 18 21 VARNAME\n",
 491 |       "df 25 27 VARNAME\n",
 492 |       "----------\n",
 493 |       "Error:  Didn't detect the column name\n"
 494 |      ]
 495 |     },
 496 |     {
 497 |      "data": {
 498 |       "text/plain": [
 499 |        "'#Couldn\\'t extract column names, replacing with default\\nage.plot.hist(x=[\"xxx\", \"yyy\"])'"
 500 |       ]
 501 |      },
 502 |      "execution_count": 20,
 503 |      "metadata": {},
 504 |      "output_type": "execute_result"
 505 |     }
 506 |    ],
 507 |    "source": [
 508 |     "cg.generate_code(\"Plot histogram of age in df\", debug=True)"
 509 |    ]
 510 |   },
 511 |   {
 512 |    "cell_type": "code",
 513 |    "execution_count": 21,
 514 |    "metadata": {},
 515 |    "outputs": [
 516 |     {
 517 |      "name": "stdout",
 518 |      "output_type": "stream",
 519 |      "text": [
 520 |       "Intent: histogram of column  Intent_id: 4  Similarity 0.98263717\n",
 521 |       "Entities:\n",
 522 |       "dailywage 18 27 COLNAME\n",
 523 |       "df 38 40 VARNAME\n",
 524 |       "----------\n"
 525 |      ]
 526 |     },
 527 |     {
 528 |      "data": {
 529 |       "text/plain": [
 530 |        "'df.plot.hist(x=[\"dailywage\"])'"
 531 |       ]
 532 |      },
 533 |      "execution_count": 21,
 534 |      "metadata": {},
 535 |      "output_type": "execute_result"
 536 |     }
 537 |    ],
 538 |    "source": [
 539 |     "cg.generate_code(\"Plot histogram of dailyWage column in df\", debug=True)"
 540 |    ]
 541 |   },
 542 |   {
 543 |    "cell_type": "markdown",
 544 |    "metadata": {},
 545 |    "source": [
 546 |     "### Intent9: Get correlation matrix"
 547 |    ]
 548 |   },
 549 |   {
 550 |    "cell_type": "code",
 551 |    "execution_count": 22,
 552 |    "metadata": {},
 553 |    "outputs": [
 554 |     {
 555 |      "data": {
 556 |       "text/plain": [
 557 |        "'df.corr()'"
 558 |       ]
 559 |      },
 560 |      "execution_count": 22,
 561 |      "metadata": {},
 562 |      "output_type": "execute_result"
 563 |     }
 564 |    ],
 565 |    "source": [
 566 |     "cg.generate_code(\"Get correlation matrix of df\")"
 567 |    ]
 568 |   },
 569 |   {
 570 |    "cell_type": "markdown",
 571 |    "metadata": {},
 572 |    "source": [
 573 |     "### Intent10: Print shape of df"
 574 |    ]
 575 |   },
 576 |   {
 577 |    "cell_type": "code",
 578 |    "execution_count": 23,
 579 |    "metadata": {},
 580 |    "outputs": [
 581 |     {
 582 |      "data": {
 583 |       "text/plain": [
 584 |        "'df.shape'"
 585 |       ]
 586 |      },
 587 |      "execution_count": 23,
 588 |      "metadata": {},
 589 |      "output_type": "execute_result"
 590 |     }
 591 |    ],
 592 |    "source": [
 593 |     "cg.generate_code(\"Print shape of df\")"
 594 |    ]
 595 |   },
 596 |   {
 597 |    "cell_type": "code",
 598 |    "execution_count": 24,
 599 |    "metadata": {},
 600 |    "outputs": [
 601 |     {
 602 |      "data": {
 603 |       "text/plain": [
 604 |        "'zz.shape'"
 605 |       ]
 606 |      },
 607 |      "execution_count": 24,
 608 |      "metadata": {},
 609 |      "output_type": "execute_result"
 610 |     }
 611 |    ],
 612 |    "source": [
 613 |     "cg.generate_code(\"Print shape of zz\")"
 614 |    ]
 615 |   },
 616 |   {
 617 |    "cell_type": "markdown",
 618 |    "metadata": {},
 619 |    "source": [
 620 |     "### Intent11: Barplot two columns"
 621 |    ]
 622 |   },
 623 |   {
 624 |    "cell_type": "code",
 625 |    "execution_count": 25,
 626 |    "metadata": {},
 627 |    "outputs": [
 628 |     {
 629 |      "name": "stdout",
 630 |      "output_type": "stream",
 631 |      "text": [
 632 |       "Intent: Bar of column1 and column2 from df  Intent_id: 7  Similarity 0.7900896\n",
 633 |       "Entities:\n",
 634 |       "duration 9 17 COLNAME\n",
 635 |       "age 22 25 COLNAME\n",
 636 |       "df 31 33 VARNAME\n",
 637 |       "----------\n"
 638 |      ]
 639 |     },
 640 |     {
 641 |      "data": {
 642 |       "text/plain": [
 643 |        "\"px.bar(x='duration',y='age',data_frame=df,title='CustomTitle', labels={'duration':'duration','age':'age'})\""
 644 |       ]
 645 |      },
 646 |      "execution_count": 25,
 647 |      "metadata": {},
 648 |      "output_type": "execute_result"
 649 |     }
 650 |    ],
 651 |    "source": [
 652 |     "cg.generate_code(\"Bar plot duration and age from df\", debug=True)"
 653 |    ]
 654 |   },
 655 |   {
 656 |    "cell_type": "code",
 657 |    "execution_count": 26,
 658 |    "metadata": {},
 659 |    "outputs": [
 660 |     {
 661 |      "name": "stdout",
 662 |      "output_type": "stream",
 663 |      "text": [
 664 |       "Intent: Bar of column1 and column2 from df  Intent_id: 7  Similarity 0.8381791\n",
 665 |       "Entities:\n",
 666 |       "age 9 12 COLNAME\n",
 667 |       "temperature 17 28 COLNAME\n",
 668 |       "df 42 44 VARNAME\n",
 669 |       "----------\n"
 670 |      ]
 671 |     },
 672 |     {
 673 |      "data": {
 674 |       "text/plain": [
 675 |        "\"px.bar(x='age',y='temperature',data_frame=df,title='CustomTitle', labels={'age':'age','temperature':'temperature'})\""
 676 |       ]
 677 |      },
 678 |      "execution_count": 26,
 679 |      "metadata": {},
 680 |      "output_type": "execute_result"
 681 |     }
 682 |    ],
 683 |    "source": [
 684 |     "cg.generate_code(\"Bar plot age and temperature columns from df\", debug=True)"
 685 |    ]
 686 |   },
 687 |   {
 688 |    "cell_type": "code",
 689 |    "execution_count": 27,
 690 |    "metadata": {},
 691 |    "outputs": [
 692 |     {
 693 |      "name": "stdout",
 694 |      "output_type": "stream",
 695 |      "text": [
 696 |       "Intent: Bar of column1 and column2 from df  Intent_id: 7  Similarity 0.8020855\n",
 697 |       "Entities:\n",
 698 |       "temperature 9 20 COLNAME\n",
 699 |       "humidity 25 33 COLNAME\n",
 700 |       "df 45 47 VARNAME\n",
 701 |       "----------\n"
 702 |      ]
 703 |     },
 704 |     {
 705 |      "data": {
 706 |       "text/plain": [
 707 |        "\"px.bar(x='temperature',y='humidity',data_frame=df,title='CustomTitle', labels={'temperature':'temperature','humidity':'humidity'})\""
 708 |       ]
 709 |      },
 710 |      "execution_count": 27,
 711 |      "metadata": {},
 712 |      "output_type": "execute_result"
 713 |     }
 714 |    ],
 715 |    "source": [
 716 |     "cg.generate_code(\"Bar plot temperature and humidity columns of df\", debug=True)"
 717 |    ]
 718 |   },
 719 |   {
 720 |    "cell_type": "code",
 721 |    "execution_count": 28,
 722 |    "metadata": {},
 723 |    "outputs": [
 724 |     {
 725 |      "name": "stdout",
 726 |      "output_type": "stream",
 727 |      "text": [
 728 |       "Intent: Bar of column1 and column2 from df  Intent_id: 7  Similarity 0.9778261\n",
 729 |       "Entities:\n",
 730 |       "numitems 21 29 COLNAME\n",
 731 |       "purchasemonth 44 57 COLNAME\n",
 732 |       "df 71 73 VARNAME\n",
 733 |       "----------\n"
 734 |      ]
 735 |     },
 736 |     {
 737 |      "data": {
 738 |       "text/plain": [
 739 |        "\"px.bar(x='numitems',y='purchasemonth',data_frame=df,title='CustomTitle', labels={'numitems':'numitems','purchasemonth':'purchasemonth'})\""
 740 |       ]
 741 |      },
 742 |      "execution_count": 28,
 743 |      "metadata": {},
 744 |      "output_type": "execute_result"
 745 |     }
 746 |    ],
 747 |    "source": [
 748 |     "cg.generate_code(\"show a bar plot with numItems on x axis and purchaseMonth on y axis in df\", debug=True)"
 749 |    ]
 750 |   },
 751 |   {
 752 |    "cell_type": "markdown",
 753 |    "metadata": {},
 754 |    "source": [
 755 |     "### Intent12: Pie chart of a column in df"
 756 |    ]
 757 |   },
 758 |   {
 759 |    "cell_type": "code",
 760 |    "execution_count": 29,
 761 |    "metadata": {},
 762 |    "outputs": [
 763 |     {
 764 |      "name": "stdout",
 765 |      "output_type": "stream",
 766 |      "text": [
 767 |       "Intent: Pie chart of column in   Intent_id: 8  Similarity 0.8810677\n",
 768 |       "Entities:\n",
 769 |       "age 12 15 COLNAME\n",
 770 |       "df 26 28 VARNAME\n",
 771 |       "gender 40 46 COLNAME\n",
 772 |       "----------\n"
 773 |      ]
 774 |     },
 775 |     {
 776 |      "data": {
 777 |       "text/plain": [
 778 |        "\"tmp = df['age'].value_counts(dropna=False)\\npx.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')\""
 779 |       ]
 780 |      },
 781 |      "execution_count": 29,
 782 |      "metadata": {},
 783 |      "output_type": "execute_result"
 784 |     }
 785 |    ],
 786 |    "source": [
 787 |     "cg.generate_code(\"piechart of age column in df grouped by gender column\", debug=True)"
 788 |    ]
 789 |   },
 790 |   {
 791 |    "cell_type": "markdown",
 792 |    "metadata": {},
 793 |    "source": [
 794 |     "### Intent13: Group df "
 795 |    ]
 796 |   },
 797 |   {
 798 |    "cell_type": "code",
 799 |    "execution_count": 38,
 800 |    "metadata": {},
 801 |    "outputs": [
 802 |     {
 803 |      "name": "stdout",
 804 |      "output_type": "stream",
 805 |      "text": [
 806 |       "Intent: group the df by column1 and get average of column2  Intent_id: 12  Similarity 0.7937273\n",
 807 |       "Entities:\n",
 808 |       "df 10 12 VARNAME\n",
 809 |       "gender 16 22 COLNAME\n",
 810 |       "average 31 38 FUNCTION\n",
 811 |       "age 42 45 VARNAME\n",
 812 |       "----------\n",
 813 |       "Use mean for synonym average\n"
 814 |      ]
 815 |     },
 816 |     {
 817 |      "data": {
 818 |       "text/plain": [
 819 |        "\"df.groupby(['gender']).agg(['mean'])\""
 820 |       ]
 821 |      },
 822 |      "execution_count": 38,
 823 |      "metadata": {},
 824 |      "output_type": "execute_result"
 825 |     }
 826 |    ],
 827 |    "source": [
 828 |     "cg.generate_code(\"Group the df by gender and get average of age\", debug=True)"
 829 |    ]
 830 |   },
 831 |   {
 832 |    "cell_type": "code",
 833 |    "execution_count": 31,
 834 |    "metadata": {},
 835 |    "outputs": [
 836 |     {
 837 |      "name": "stdout",
 838 |      "output_type": "stream",
 839 |      "text": [
 840 |       "Intent: group the df by column1 and get average of column2  Intent_id: 12  Similarity 0.79380095\n",
 841 |       "Entities:\n",
 842 |       "df 6 8 VARNAME\n",
 843 |       "gender 12 18 COLNAME\n",
 844 |       "mean 27 31 FUNCTION\n",
 845 |       "age 35 38 VARNAME\n",
 846 |       "----------\n"
 847 |      ]
 848 |     },
 849 |     {
 850 |      "data": {
 851 |       "text/plain": [
 852 |        "\"df.groupby(['gender']).agg(['mean'])\""
 853 |       ]
 854 |      },
 855 |      "execution_count": 31,
 856 |      "metadata": {},
 857 |      "output_type": "execute_result"
 858 |     }
 859 |    ],
 860 |    "source": [
 861 |     "cg.generate_code(\"Group df by gender and get mean of age\", debug=True)"
 862 |    ]
 863 |   },
 864 |   {
 865 |    "cell_type": "code",
 866 |    "execution_count": 32,
 867 |    "metadata": {},
 868 |    "outputs": [
 869 |     {
 870 |      "name": "stdout",
 871 |      "output_type": "stream",
 872 |      "text": [
 873 |       "Intent: group the df by column1 and get average of column2  Intent_id: 12  Similarity 0.77789533\n",
 874 |       "Entities:\n",
 875 |       "df 6 8 VARNAME\n",
 876 |       "gender 12 18 COLNAME\n",
 877 |       "mean 27 31 FUNCTION\n",
 878 |       "sum 36 39 FUNCTION\n",
 879 |       "age 43 46 VARNAME\n",
 880 |       "----------\n"
 881 |      ]
 882 |     },
 883 |     {
 884 |      "data": {
 885 |       "text/plain": [
 886 |        "\"df.groupby(['gender']).agg(['mean','sum'])\""
 887 |       ]
 888 |      },
 889 |      "execution_count": 32,
 890 |      "metadata": {},
 891 |      "output_type": "execute_result"
 892 |     }
 893 |    ],
 894 |    "source": [
 895 |     "cg.generate_code(\"Group df by gender and get mean and sum of age\", debug=True)"
 896 |    ]
 897 |   },
 898 |   {
 899 |    "cell_type": "code",
 900 |    "execution_count": 33,
 901 |    "metadata": {},
 902 |    "outputs": [
 903 |     {
 904 |      "name": "stdout",
 905 |      "output_type": "stream",
 906 |      "text": [
 907 |       "Intent: group the df by column1 and get average of column2  Intent_id: 12  Similarity 0.7954531\n",
 908 |       "Entities:\n",
 909 |       "df 6 8 VARNAME\n",
 910 |       "gender 12 18 COLNAME\n",
 911 |       "average 27 34 FUNCTION\n",
 912 |       "sum 36 39 FUNCTION\n",
 913 |       "age 43 46 VARNAME\n",
 914 |       "----------\n",
 915 |       "Use mean for synonym average\n"
 916 |      ]
 917 |     },
 918 |     {
 919 |      "data": {
 920 |       "text/plain": [
 921 |        "\"df.groupby(['gender']).agg(['mean','sum'])\""
 922 |       ]
 923 |      },
 924 |      "execution_count": 33,
 925 |      "metadata": {},
 926 |      "output_type": "execute_result"
 927 |     }
 928 |    ],
 929 |    "source": [
 930 |     "cg.generate_code(\"Group df by gender and get average, sum of age\", debug=True)"
 931 |    ]
 932 |   },
 933 |   {
 934 |    "cell_type": "code",
 935 |    "execution_count": 39,
 936 |    "metadata": {},
 937 |    "outputs": [
 938 |     {
 939 |      "name": "stdout",
 940 |      "output_type": "stream",
 941 |      "text": [
 942 |       "Intent: group the df by column1 and get average of column2  Intent_id: 12  Similarity 0.81393397\n",
 943 |       "Entities:\n",
 944 |       "df 6 8 VARNAME\n",
 945 |       "ozxc 12 16 COLNAME\n",
 946 |       "zxc 18 21 COLNAME\n",
 947 |       "min 30 33 FUNCTION\n",
 948 |       "max 35 38 FUNCTION\n",
 949 |       "sum 40 43 FUNCTION\n",
 950 |       "corr 47 51 COLNAME\n",
 951 |       "----------\n"
 952 |      ]
 953 |     },
 954 |     {
 955 |      "data": {
 956 |       "text/plain": [
 957 |        "\"df[['ozxc','zxc','corr']].groupby(['ozxc','zxc']).agg(['min','max','sum'])\""
 958 |       ]
 959 |      },
 960 |      "execution_count": 39,
 961 |      "metadata": {},
 962 |      "output_type": "execute_result"
 963 |     }
 964 |    ],
 965 |    "source": [
 966 |     "cg.generate_code(\"Group df by ozxc, zxc and get min, max, sum of corr\", debug=True)"
 967 |    ]
 968 |   },
 969 |   {
 970 |    "cell_type": "code",
 971 |    "execution_count": 35,
 972 |    "metadata": {},
 973 |    "outputs": [
 974 |     {
 975 |      "name": "stdout",
 976 |      "output_type": "stream",
 977 |      "text": [
 978 |       "Intent: group the df by column1 and get average of column2  Intent_id: 12  Similarity 0.6917082\n",
 979 |       "Entities:\n",
 980 |       "df 10 12 VARNAME\n",
 981 |       "gender 16 22 COLNAME\n",
 982 |       "average 31 38 FUNCTION\n",
 983 |       "age 39 42 FUNCTION\n",
 984 |       "----------\n",
 985 |       "Use mean for synonym average\n"
 986 |      ]
 987 |     },
 988 |     {
 989 |      "data": {
 990 |       "text/plain": [
 991 |        "\"df.groupby(['gender']).agg(['mean','age'])\""
 992 |       ]
 993 |      },
 994 |      "execution_count": 35,
 995 |      "metadata": {},
 996 |      "output_type": "execute_result"
 997 |     }
 998 |    ],
 999 |    "source": [
1000 |     "cg.generate_code(\"Group the df by gender and get average age\", debug=True)"
1001 |    ]
1002 |   },
1003 |   {
1004 |    "cell_type": "markdown",
1005 |    "metadata": {},
1006 |    "source": [
1007 |     "### Random"
1008 |    ]
1009 |   },
1010 |   {
1011 |    "cell_type": "code",
1012 |    "execution_count": 31,
1013 |    "metadata": {},
1014 |    "outputs": [
1015 |     {
1016 |      "name": "stdout",
1017 |      "output_type": "stream",
1018 |      "text": [
1019 |       "Intent: install lib  Intent_id: 9  Similarity 0.99999994\n",
1020 |       "Entities:\n",
1021 |       "plotly 8 14 LIBNAME\n",
1022 |       "----------\n"
1023 |      ]
1024 |     },
1025 |     {
1026 |      "data": {
1027 |       "text/plain": [
1028 |        "'!pip install plotly'"
1029 |       ]
1030 |      },
1031 |      "execution_count": 31,
1032 |      "metadata": {},
1033 |      "output_type": "execute_result"
1034 |     }
1035 |    ],
1036 |    "source": [
1037 |     "cg.generate_code(\"install plotly\", debug=True)"
1038 |    ]
1039 |   },
1040 |   {
1041 |    "cell_type": "code",
1042 |    "execution_count": 32,
1043 |    "metadata": {},
1044 |    "outputs": [
1045 |     {
1046 |      "name": "stdout",
1047 |      "output_type": "stream",
1048 |      "text": [
1049 |       "Intent: Pie chart of column in   Intent_id: 8  Similarity 0.8031008\n",
1050 |       "Entities:\n",
1051 |       "heroes_gender 18 31 COLNAME\n",
1052 |       "df 35 37 VARNAME\n",
1053 |       "----------\n"
1054 |      ]
1055 |     },
1056 |     {
1057 |      "data": {
1058 |       "text/plain": [
1059 |        "\"tmp = df['heroes_gender'].value_counts(dropna=False)\\npx.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')\""
1060 |       ]
1061 |      },
1062 |      "execution_count": 32,
1063 |      "metadata": {},
1064 |      "output_type": "execute_result"
1065 |     }
1066 |    ],
1067 |    "source": [
1068 |     "cg.generate_code(\"plot pie chart of heroes_gender in df\", debug=True)"
1069 |    ]
1070 |   },
1071 |   {
1072 |    "cell_type": "code",
1073 |    "execution_count": 33,
1074 |    "metadata": {},
1075 |    "outputs": [
1076 |     {
1077 |      "name": "stdout",
1078 |      "output_type": "stream",
1079 |      "text": [
1080 |       "Intent: histogram of column  Intent_id: 4  Similarity 0.8690444\n",
1081 |       "Entities:\n",
1082 |       "heroes_gender 18 31 COLNAME\n",
1083 |       "youtube_views 36 49 COLNAME\n",
1084 |       "df 61 63 VARNAME\n",
1085 |       "----------\n"
1086 |      ]
1087 |     },
1088 |     {
1089 |      "data": {
1090 |       "text/plain": [
1091 |        "'df.plot.hist(x=[\"heroes_gender\", \"youtube_views\"])'"
1092 |       ]
1093 |      },
1094 |      "execution_count": 33,
1095 |      "metadata": {},
1096 |      "output_type": "execute_result"
1097 |     }
1098 |    ],
1099 |    "source": [
1100 |     "cg.generate_code(\"plot histogram of heroes_gender and youtube_views columns of df\", debug=True)"
1101 |    ]
1102 |   },
1103 |   {
1104 |    "cell_type": "code",
1105 |    "execution_count": 34,
1106 |    "metadata": {},
1107 |    "outputs": [
1108 |     {
1109 |      "name": "stdout",
1110 |      "output_type": "stream",
1111 |      "text": [
1112 |       "Intent: histogram of column  Intent_id: 4  Similarity 0.8805045\n",
1113 |       "Entities:\n",
1114 |       "heroes_gendes 18 31 COLNAME\n",
1115 |       "zxc 36 39 COLNAME\n",
1116 |       "df 51 53 VARNAME\n",
1117 |       "----------\n"
1118 |      ]
1119 |     },
1120 |     {
1121 |      "data": {
1122 |       "text/plain": [
1123 |        "'df.plot.hist(x=[\"heroes_gendes\", \"zxc\"])'"
1124 |       ]
1125 |      },
1126 |      "execution_count": 34,
1127 |      "metadata": {},
1128 |      "output_type": "execute_result"
1129 |     }
1130 |    ],
1131 |    "source": [
1132 |     "cg.generate_code(\"plot histogram of heroes_gendes and zxc columns of df\", debug=True)"
1133 |    ]
1134 |   },
1135 |   {
1136 |    "cell_type": "code",
1137 |    "execution_count": 35,
1138 |    "metadata": {},
1139 |    "outputs": [
1140 |     {
1141 |      "name": "stdout",
1142 |      "output_type": "stream",
1143 |      "text": [
1144 |       "Intent: line chart of column1 and column2 of df  Intent_id: 13  Similarity 0.88551736\n",
1145 |       "Entities:\n",
1146 |       "release_date 10 22 COLNAME\n",
1147 |       "youtube_avg_watch_duration 27 53 COLNAME\n",
1148 |       "df 57 59 VARNAME\n",
1149 |       "----------\n"
1150 |      ]
1151 |     },
1152 |     {
1153 |      "data": {
1154 |       "text/plain": [
1155 |        "\"df.plot.line(x='release_date', y='youtube_avg_watch_duration', color=None, title='CustomTitle', labels={'release_date':'release_date', 'youtube_avg_watch_duration':'youtube_avg_watch_duration'})\""
1156 |       ]
1157 |      },
1158 |      "execution_count": 35,
1159 |      "metadata": {},
1160 |      "output_type": "execute_result"
1161 |     }
1162 |    ],
1163 |    "source": [
1164 |     "cg.generate_code(\"line plot release_date and youtube_avg_watch_duration of df\", debug=True)"
1165 |    ]
1166 |   },
1167 |   {
1168 |    "cell_type": "code",
1169 |    "execution_count": 36,
1170 |    "metadata": {},
1171 |    "outputs": [
1172 |     {
1173 |      "name": "stdout",
1174 |      "output_type": "stream",
1175 |      "text": [
1176 |       "Intent: scatter plot of column1 and column2 of df  Intent_id: 14  Similarity 0.90176094\n",
1177 |       "Entities:\n",
1178 |       "spotify_streams 16 31 COLNAME\n",
1179 |       "youtube_views 36 49 COLNAME\n",
1180 |       "df 53 55 VARNAME\n",
1181 |       "----------\n"
1182 |      ]
1183 |     },
1184 |     {
1185 |      "data": {
1186 |       "text/plain": [
1187 |        "\"df.plot.scatter(x='spotify_streams', y='youtube_views', color=None, size=None, title='CustomTitle', labels={'spotify_streams':'spotify_streams', 'youtube_views':'youtube_views'})\""
1188 |       ]
1189 |      },
1190 |      "execution_count": 36,
1191 |      "metadata": {},
1192 |      "output_type": "execute_result"
1193 |     }
1194 |    ],
1195 |    "source": [
1196 |     "cg.generate_code(\"scatter plot of spotify_streams and youtube_views of df\", debug=True)"
1197 |    ]
1198 |   },
1199 |   {
1200 |    "cell_type": "code",
1201 |    "execution_count": 37,
1202 |    "metadata": {},
1203 |    "outputs": [
1204 |     {
1205 |      "name": "stdout",
1206 |      "output_type": "stream",
1207 |      "text": [
1208 |       "Intent: Bar of column1 and column2 from df  Intent_id: 7  Similarity 0.84774184\n",
1209 |       "Entities:\n",
1210 |       "release_date 18 30 COLNAME\n",
1211 |       "df 63 65 VARNAME\n",
1212 |       "----------\n",
1213 |       "Error:  Didn't detect the column name\n"
1214 |      ]
1215 |     },
1216 |     {
1217 |      "data": {
1218 |       "text/plain": [
1219 |        "\"#Couldn't extract column names, replacing with default\\npx.bar(x='xxx',y='yyy',data_frame=df,title='CustomTitle', labels={'xxx':'xxx','yyy':'yyy'})\""
1220 |       ]
1221 |      },
1222 |      "execution_count": 37,
1223 |      "metadata": {},
1224 |      "output_type": "execute_result"
1225 |     }
1226 |    ],
1227 |    "source": [
1228 |     "cg.generate_code(\"plot line plot of release_date & youtube_avg_watch_duration of df\", debug=True)"
1229 |    ]
1230 |   },
1231 |   {
1232 |    "cell_type": "code",
1233 |    "execution_count": 6,
1234 |    "metadata": {},
1235 |    "outputs": [
1236 |     {
1237 |      "name": "stdout",
1238 |      "output_type": "stream",
1239 |      "text": [
1240 |       "Intent: line chart of column1 and column2 of df  Intent_id: 13  Similarity 0.83657575\n",
1241 |       "Entities:\n",
1242 |       "release_date 13 25 COLNAME\n",
1243 |       "df 58 60 VARNAME\n",
1244 |       "----------\n",
1245 |       "Error:  Didn't detect the column name\n"
1246 |      ]
1247 |     },
1248 |     {
1249 |      "data": {
1250 |       "text/plain": [
1251 |        "\"#Couldn't extract column names, replacing with default\\ndf.plot.line(x='xxx', y='yyy', color=None, title='CustomTitle', labels={'xxx':'xxx', 'yyy':'yyy'})\""
1252 |       ]
1253 |      },
1254 |      "execution_count": 6,
1255 |      "metadata": {},
1256 |      "output_type": "execute_result"
1257 |     }
1258 |    ],
1259 |    "source": [
1260 |     "cg.generate_code(\"line plot of release_date & youtube_avg_watch_duration of df\", debug=True)"
1261 |    ]
1262 |   },
1263 |   {
1264 |    "cell_type": "code",
1265 |    "execution_count": 7,
1266 |    "metadata": {},
1267 |    "outputs": [
1268 |     {
1269 |      "name": "stdout",
1270 |      "output_type": "stream",
1271 |      "text": [
1272 |       "Intent: line chart of column1 and column2 of df  Intent_id: 13  Similarity 0.84694755\n",
1273 |       "Entities:\n",
1274 |       "release_date 20 32 COLNAME\n",
1275 |       "df 65 67 VARNAME\n",
1276 |       "----------\n",
1277 |       "Error:  Didn't detect the column name\n"
1278 |      ]
1279 |     },
1280 |     {
1281 |      "data": {
1282 |       "text/plain": [
1283 |        "\"#Couldn't extract column names, replacing with default\\ndf.plot.line(x='xxx', y='yyy', color=None, title='CustomTitle', labels={'xxx':'xxx', 'yyy':'yyy'})\""
1284 |       ]
1285 |      },
1286 |      "execution_count": 7,
1287 |      "metadata": {},
1288 |      "output_type": "execute_result"
1289 |     }
1290 |    ],
1291 |    "source": [
1292 |     "cg.generate_code(\"show a line plot of release_date & youtube_avg_watch_duration of df\", debug=True)"
1293 |    ]
1294 |   },
1295 |   {
1296 |    "cell_type": "code",
1297 |    "execution_count": 5,
1298 |    "metadata": {},
1299 |    "outputs": [
1300 |     {
1301 |      "name": "stdout",
1302 |      "output_type": "stream",
1303 |      "text": [
1304 |       "Intent: line chart of column1 and column2 of df  Intent_id: 13  Similarity 0.8588614\n",
1305 |       "Entities:\n",
1306 |       "release_date 5 17 CARDINAL\n",
1307 |       "df 50 52 VARNAME\n",
1308 |       "----------\n",
1309 |       "Error:  Didn't detect the column name\n"
1310 |      ]
1311 |     },
1312 |     {
1313 |      "data": {
1314 |       "text/plain": [
1315 |        "\"#Couldn't extract column names, replacing with default\\ndf.plot.line(x='xxx', y='yyy', color=None, title='CustomTitle', labels={'xxx':'xxx', 'yyy':'yyy'})\""
1316 |       ]
1317 |      },
1318 |      "execution_count": 5,
1319 |      "metadata": {},
1320 |      "output_type": "execute_result"
1321 |     }
1322 |    ],
1323 |    "source": [
1324 |     "cg.generate_code(\"show release_date & youtube_avg_watch_duration of df in a line plot\", debug=True)"
1325 |    ]
1326 |   },
1327 |   {
1328 |    "cell_type": "code",
1329 |    "execution_count": 5,
1330 |    "metadata": {},
1331 |    "outputs": [
1332 |     {
1333 |      "name": "stdout",
1334 |      "output_type": "stream",
1335 |      "text": [
1336 |       "Intent: barplot $colname and $colname columns of $varname  Intent_id: 7  Similarity 0.89604545\n",
1337 |       "Entities:\n",
1338 |       "release_date 5 17 CARDINAL\n",
1339 |       "df 50 52 VARNAME\n",
1340 |       "----------\n",
1341 |       "Error:  Didn't detect the column name\n"
1342 |      ]
1343 |     },
1344 |     {
1345 |      "data": {
1346 |       "text/plain": [
1347 |        "\"#Couldn't extract column names, replacing with default\\npx.bar(x='xxx',y='yyy',data_frame=df,title='CustomTitle', labels={'xxx':'xxx','yyy':'yyy'})\""
1348 |       ]
1349 |      },
1350 |      "execution_count": 5,
1351 |      "metadata": {},
1352 |      "output_type": "execute_result"
1353 |     }
1354 |    ],
1355 |    "source": [
1356 |     "cg.generate_code(\"show release_date & youtube_avg_watch_duration of df in a bar plot\", debug=True)"
1357 |    ]
1358 |   },
1359 |   {
1360 |    "cell_type": "code",
1361 |    "execution_count": 6,
1362 |    "metadata": {},
1363 |    "outputs": [
1364 |     {
1365 |      "name": "stdout",
1366 |      "output_type": "stream",
1367 |      "text": [
1368 |       "Intent: barplot $colname and $colname columns of $varname  Intent_id: 7  Similarity 0.8960455\n",
1369 |       "Entities:\n",
1370 |       "release_date 5 17 CARDINAL\n",
1371 |       "df 50 52 VARNAME\n",
1372 |       "----------\n",
1373 |       "Error:  Didn't detect the column name\n"
1374 |      ]
1375 |     },
1376 |     {
1377 |      "data": {
1378 |       "text/plain": [
1379 |        "\"#Couldn't extract column names, replacing with default\\npx.bar(x='xxx',y='yyy',data_frame=df,title='CustomTitle', labels={'xxx':'xxx','yyy':'yyy'})\""
1380 |       ]
1381 |      },
1382 |      "execution_count": 6,
1383 |      "metadata": {},
1384 |      "output_type": "execute_result"
1385 |     }
1386 |    ],
1387 |    "source": [
1388 |     "cg.generate_code(\"show release_date & youtube_avg_watch_duration of df in a bar plot\", debug=True)"
1389 |    ]
1390 |   },
1391 |   {
1392 |    "cell_type": "code",
1393 |    "execution_count": 7,
1394 |    "metadata": {},
1395 |    "outputs": [
1396 |     {
1397 |      "name": "stdout",
1398 |      "output_type": "stream",
1399 |      "text": [
1400 |       "Intent: switch to dark theme  Intent_id: 17  Similarity 0.9999999\n",
1401 |       "Entities:\n",
1402 |       "----------\n"
1403 |      ]
1404 |     },
1405 |     {
1406 |      "data": {
1407 |       "text/plain": [
1408 |        "\"import plotly.io as pio\\npio.templates.default = 'plotly_dark'\""
1409 |       ]
1410 |      },
1411 |      "execution_count": 7,
1412 |      "metadata": {},
1413 |      "output_type": "execute_result"
1414 |     }
1415 |    ],
1416 |    "source": [
1417 |     "cg.generate_code(\"switch to dark theme\", debug=True)"
1418 |    ]
1419 |   },
1420 |   {
1421 |    "cell_type": "code",
1422 |    "execution_count": null,
1423 |    "metadata": {},
1424 |    "outputs": [],
1425 |    "source": []
1426 |   }
1427 |  ],
1428 |  "metadata": {
1429 |   "kernelspec": {
1430 |    "display_name": "Python 3",
1431 |    "language": "python",
1432 |    "name": "python3"
1433 |   },
1434 |   "language_info": {
1435 |    "codemirror_mode": {
1436 |     "name": "ipython",
1437 |     "version": 3
1438 |    },
1439 |    "file_extension": ".py",
1440 |    "mimetype": "text/x-python",
1441 |    "name": "python",
1442 |    "nbconvert_exporter": "python",
1443 |    "pygments_lexer": "ipython3",
1444 |    "version": "3.6.9"
1445 |   }
1446 |  },
1447 |  "nbformat": 4,
1448 |  "nbformat_minor": 4
1449 | }
1450 | 


--------------------------------------------------------------------------------
/notebooks/Episodes.csv:
--------------------------------------------------------------------------------
 1 | episode_id,episode_name,heroes,heroes_gender,heroes_location,heroes_nationality,heroes_kaggle_username,heroes_twitter_handle,category,flavour_of_tea,recording_date,recording_time,release_date,episode_duration,youtube_url,youtube_thumbnail_type,youtube_impressions,youtube_impression_views,youtube_ctr,youtube_nonimpression_views,youtube_views,youtube_watch_hours,youtube_avg_watch_duration,youtube_likes,youtube_dislikes,youtube_comments,youtube_subscribers,anchor_url,anchor_thumbnail_type,anchor_plays,spotify_starts,spotify_streams,spotify_listeners,apple_listeners,apple_listened_hours,apple_avg_listen_duration
 2 | E0,Chai Time Data Science Launch Announcement,,,,,,,Other,Masala Chai,2019-07-15,Evening,2019-07-21,157,https://www.youtube.com/watch?v=Ko_gxs42lM8,1,4433,86,1.94,45,131,3,82,4,0,2,3,https://anchor.fm/chaitimedatascience/episodes/Chai-Time-Data-Science-Launch-Announcement-e4mas9,0,553,491,262,359,29,1,117
 3 | E1,"Kaggle Triple Grandmaster, Abhishek Thakur Interview",Abhishek Thakur,Male,Norway,India,abhishek,abhi1thakur,Kaggle,Ginger Chai,2019-07-14,Evening,2019-07-22,2995,https://www.youtube.com/watch?v=Ezbo57Z33N8,0,25212,845,3.35,683,1528,142,335,55,0,5,60,https://anchor.fm/chaitimedatascience/episodes/Kaggle-Triple-Grandmaster--Abhishek-Thakur-Interview-e4mjoi,0,1271,826,608,456,56,25,1621
 4 | E2,"Interview with Kaggle Master, ML Engineer: Ryan Chesler",Ryan Chesler,Male,USA,USA,ryches,ryan_chesler,Kaggle,Masala Chai,2019-07-20,Afternoon,2019-07-26,2118,https://www.youtube.com/watch?v=SJVMSKig14k,0,3282,84,2.56,44,128,14,394,7,0,1,3,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Kaggle-Master--ML-Engineer-Ryan-Chesler--Chai-Time-Data-Science-e4ntbt,0,681,398,274,214,19,10,1879
 5 | E3,"Interview with CEO of SharpestMinds, Edouard Harris",Edouard Harris,Male,Canada,Canada,,neutronsNeurons,Industry,Kashmiri Kahwa,2019-07-23,Night,2019-07-29,3072,https://www.youtube.com/watch?v=69urmSt34Ac,0,2376,38,1.60,57,95,11,417,2,0,0,1,https://anchor.fm/chaitimedatascience/episodes/Interview-with-CEO-of-SharpestMinds--Edouard-Harris--Chai-Time-Data-Science-e4nti6,0,638,334,230,169,10,4,1344
 6 | E4,Data Science for Good: City of LA Kaggle Winning Solution Interview with Kaggle Kernels Grandmaster Shivam Bansal,Shivam Bansal,Male,Singapore,India,shivamb,shivamshaz,Kaggle,Apple Cinnamon,2019-07-14,Morning,2019-08-02,1048,https://www.youtube.com/watch?v=wMYX3KABHCk,0,3884,116,2.99,36,152,9,213,4,0,0,4,https://anchor.fm/chaitimedatascience/episodes/Data-Science-for-Good-City-of-LA-Kaggle-Winning-Solution-Interview-with-Kaggle-Kernels-Grandmaster-Shivam-Bansal-e4qc36,0,495,201,139,123,17,3,633
 7 | E5,"Deep Learning Research, Hardware, Kaggle | Interview with Tim Dettmers",Tim Dettmers,Male,USA,Germany,timdettmers,Tim_Dettmers,Research,Kashmiri Kahwa,2019-07-24,Night,2019-08-05,6242,https://www.youtube.com/watch?v=8Fp9m4fNDQ4,0,2937,60,2.04,76,136,22,582,4,0,1,6,https://anchor.fm/chaitimedatascience/episodes/Deep-Learning-Research--Hardware--Kaggle--Interview-with-Tim-Dettmers-e4qcad,0,668,279,198,125,10,6,2213
 8 | E6,Interview with Kaggle Kernels GM: Shivam Bansal,Shivam Bansal,Male,Singapore,India,shivamb,shivamshaz,Kaggle,Apple Cinnamon,2019-07-14,Morning,2019-08-09,2776,https://www.youtube.com/watch?v=X73CzKIhqs8,0,5664,146,2.58,118,264,32,436,17,0,3,10,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Kaggle-Kernels-GM-Shivam-Bansal-e4qcbe,0,602,248,180,129,17,7,1516
 9 | E7,Interview with Kaggle Kernels Grandmaster #1: Artgor | Andrew Lukyanenko,Andrey Lukyanenko,Male,Russia,Russia,artgor,AndLukyane,Kaggle,Tulsi Chai,2019-07-10,Evening,2019-08-13,2459,https://www.youtube.com/watch?v=rpClh8WmTdo,0,2694,37,1.37,33,70,5,257,3,0,0,0,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Kaggle-Kernels-Grandmaster-1-Artgor--Andrew-Lukyanenko-e4r6du,0,1283,431,304,251,38,15,1432
10 | E8,"NVIDIA's DALI Library, Image Augmentations Discussion: James Dellinger",James Dellinger,Male,USA,USA,jamesdellinger,jamrdell,Industry,Masala Chai,2019-06-23,Morning,2019-08-16,1949,https://www.youtube.com/watch?v=4kMEdDcBt00,0,2862,84,2.94,45,129,12,335,1,0,0,3,https://anchor.fm/chaitimedatascience/episodes/NVIDIAs-DALI-Library--Image-Augmentations-Discussion-Interview-with-James-Dellinger-e4r6f7,0,324,98,62,66,13,4,1242
11 | E9,Albumentations Framework: a fast image augmentations library | Interview with Dr. Vladimir Iglovikov,Vladimir Iglovikov,Male,USA,Russia,iglovikov,viglovikov,Kaggle,Ginger Chai,2019-07-18,Night,2019-08-20,3182,https://www.youtube.com/watch?v=JS9xol0VmA4,0,3152,92,2.92,63,155,13,302,2,1,0,0,https://anchor.fm/chaitimedatascience/episodes/Albumentations-Framework-a-fast-image-augmentations-library--Interview-with-Dr--Vladimir-Iglovikov-e4r6e4,0,385,125,74,63,8,4,1758
12 | E10,"Interview with the Leader of mlcourse.ai, Dr. Yury Kashnitsky",Yury Kashnitsky,Male,USA,Russia,kashnitsky,ykashnitsky,Kaggle,Ginger Chai,2019-08-12,Morning,2019-08-24,3727,https://www.youtube.com/watch?v=guvFOjxdeeA,0,3714,63,1.70,45,108,13,433,4,0,0,3,https://anchor.fm/chaitimedatascience/episodes/Interview-with-the-Leader-of-mlcourse-ai--Dr--Yury-Kashnitsky--Chai-Time-Data-Science-e52r5u,0,803,225,150,90,36,23,2319
13 | E11,"MuseNet, OpenAI and Deep Learning Research: Interview with Christine Payne",Christine Payne,Female,USA,USA,,mcleavey,Research,Kashmiri Kahwa,2019-07-29,Night,2019-08-26,3640,https://www.youtube.com/watch?v=xwXIaDrQvwE,0,5475,158,2.89,121,279,32,413,12,0,2,7,https://anchor.fm/chaitimedatascience/episodes/MuseNet--OpenAI-and-Deep-Learning-Research-Interview-with-Christine-Payne-e4r6hb,0,502,191,108,102,12,7,2012
14 | E13,"Deep Learning Applied to Audio, Self Studying ML | Interview with fast.ai fellow Robert Bracco",Robert Bracco,Male,USA,USA,madeupmasters,MadeUpMasters,Kaggle,Tulsi Chai,2019-07-12,Night,2019-09-02,2950,https://www.youtube.com/watch?v=k-gZAyg5ib8,0,4153,95,2.29,66,161,21,470,6,0,0,1,https://anchor.fm/chaitimedatascience/episodes/Deep-Learning-Applied-to-Audio--Self-Studying-ML--Interview-with-fast-ai-fellow-Robert-Bracco-e4r6d9,0,670,209,146,117,20,12,2190
15 | E14,"And the Bit Goes Down, Deep Learning Research, Research at FAIR | Interview with Pierre Stock",Pierre Stock,Male,France,France,,PierreStock,Research,Masala Chai,2019-08-12,Night,2019-09-06,2794,https://www.youtube.com/watch?v=I1xf8lPU3cs,0,3206,29,0.90,14,43,2,167,1,0,0,0,https://anchor.fm/chaitimedatascience/episodes/And-the-Bit-Goes-Down--Deep-Learning-Research--Research-at-FAIR--Interview-with-Pierre-Stock-e52rpk,0,391,114,61,70,19,6,1130
16 | E15,"Medical Science, Open Source and AI | Interview with Dr. Judy Gichoya",Judy Gichoya,Female,USA,Africa,,judywawira,Industry,Apple Cinnamon,2019-08-14,Night,2019-09-09,3797,https://www.youtube.com/watch?v=X9k7TWUc4Og,0,3704,27,0.73,120,147,9,220,4,0,0,2,https://anchor.fm/chaitimedatascience/episodes/Medical-Science--Open-Source-and-AI--Interview-with-Dr--Judy-Gichoya-e52rst,0,514,148,97,97,17,8,1610
17 | E16,"Producing a Chainsmokers Remix with AI, DAWNBench & fastai | Interview with Andrew Shaw",Andrew Shaw,Male,USA,USA,,bearpelican,Industry,Tulsi Chai,2019-08-27,Night,2019-09-29,2243,https://www.youtube.com/watch?v=zbYeCWG9GIU,0,4602,58,1.26,49,107,8,269,8,0,0,1,https://anchor.fm/chaitimedatascience/episodes/Producing-a-Chainsmokers-Remix-with-AI-MusicAutobot--DAWNBench--fast-ai--Interview-with-Andrew-Shaw-e5kvdq,0,382,112,65,74,7,2,1191
18 | E17,"Hugging Face, Transformers | NLP Research and Open Source | Interview with Julien Chaumond",Julien Chaumond,Male,USA,France,,julien_c,Industry,Tulsi Chai,2019-08-29,Night,2019-10-03,3861,https://www.youtube.com/watch?v=ejWkDviM5QM,0,17892,514,2.87,308,822,68,298,21,1,0,19,https://anchor.fm/chaitimedatascience/episodes/Hugging-Face--Transformers--NLP-Research-and-Open-Source--Interview-with-Julien-Chaumond-e5o819,0,849,245,164,129,35,17,1730
19 | E18,"Generative Deep Learning, Technical Writing & Data Science Consulting | Interview with David Foster",David Foster,Male,UK,UK,,,Industry,Tulsi Chai,2019-08-25,Night,2019-10-16,2592,https://www.youtube.com/watch?v=4gXzka76AJk,0,3317,59,1.78,159,218,20,330,4,0,0,4,https://anchor.fm/chaitimedatascience/episodes/Generative-Deep-Learning--Technical-Writing--Data-Science-Consulting--Interview-with-David-Foster-e7kqkd,0,576,152,91,85,18,5,1066
20 | E19,Chip Huyen Interview: Machine Learning Interviews | MOOCS and Deep Learning at NVIDIA,Chip Huyen,Female,USA,Vietnam,,chipro,Industry,Masala Chai,2019-08-21,Morning,2019-10-19,3603,https://www.youtube.com/watch?v=cWwlou9aYUA,0,5841,494,8.46,246,740,40,195,13,0,0,7,https://anchor.fm/chaitimedatascience/episodes/Chip-Huyen-Interview-Machine-Learning-Interviews--MOOCS-and-Deep-Learning-at-NVIDIA-e7kqun,0,951,254,176,150,50,22,1574
21 | E20,Gold Medalling in First Kaggle Comp | Predicting Molecular Prop: 4 GM & The Brain | Boris Dorado,Boris Dorado,Male,France,France,borisdee,,Kaggle,Ginger Chai,2019-09-22,Afternoon,2019-10-22,3152,https://www.youtube.com/watch?v=WaCBeiyQw4Q,0,2628,47,1.79,17,64,5,281,2,0,0,1,https://anchor.fm/chaitimedatascience/episodes/Boris-Dorado-Interview--Gold-Medalling-in-First-Kaggle-Comp--Predicting-Molecular-Prop-Kaggle-Comp-Solution-e7m6s3,0,576,121,80,71,26,13,1822
22 | E21,Swift For Tensorflow | Software Engineering Internship at Google | Interview with Bart Chrazaszcz,Bart Chrazaszcz,Male,Canada,Canada,,bart_chr,Research,Masala Chai,2019-09-28,Morning,2019-10-26,2141,https://www.youtube.com/watch?v=OTXKRPGhQu8,0,3970,113,2.85,192,305,24,283,8,0,0,8,https://anchor.fm/chaitimedatascience/episodes/Swift-For-Tensorflow--Software-Engineering-Internship-at-Google--Interview-with-Bart-Chrazaszcz-e88ll2,0,435,94,51,61,17,7,1471
23 | E22,DeepMind & AlphaGo | Deep Learning Research | Swift For Tensorflow | Interview with Dr. Marc Lanctot,Marc Lactot,Male,Canada,Canada,,sharky6000,Research,Apple Cinnamon,2019-08-27,Night,2019-10-27,4284,https://www.youtube.com/watch?v=-uUHR5VALB0,0,5959,168,2.82,476,644,66,369,19,0,1,17,https://anchor.fm/chaitimedatascience/episodes/DeepMind--AlphaGo--Deep-Learning-Research--Swift-For-Tensorflow--Interview-with-Dr--Marc-Lanctot-e7ks0s,0,693,177,119,110,42,28,2384
24 | E12,Freelancing in Machine Learning | Interview with Tuatini Godard,Tuatini Godard,Male,France,France,ekami66,,Industry,Kashmiri Kahwa,2019-07-11,Morning,2019-10-29,2684,https://www.youtube.com/watch?v=AwJpKBMog6c,0,3659,61,1.67,53,114,17,537,4,0,0,2,,,,,,,,,
25 | E23,Predicting Molecular Prop Kaggle Comp 2nd Place Sol | Interview with Kaggle Master Andres Torrubia,Andres Torrubia,Male,Spain,Spain,antorsae,antor,Kaggle,Apple Cinnamon,2019-09-01,Night,2019-11-01,7876,https://www.youtube.com/watch?v=sqo2h7aYPPk,0,7213,150,2.08,546,696,68,352,17,0,3,24,https://anchor.fm/chaitimedatascience/episodes/Predicting-Molecular-Prop-Kaggle-Comp-2nd-Place-Sol--Interview-with-Kaggle-Master-Andres-Torrubia-e8h0tj,0,673,166,109,81,22,31,5122
26 | E24,Medalling in all Entered Kaggle Comp | IEEE-CIS Comp 6th Pos Sol | Interview with Dr Philipp Singer,Philipp Singer,Male,Austria,Austria,philippsinger,ph_singer,Kaggle,Masala Chai,2019-10-12,Evening,2019-11-04,3825,https://www.youtube.com/watch?v=7sh5QrUIAHI,0,8570,272,3.17,622,894,115,463,22,1,3,19,https://anchor.fm/chaitimedatascience/episodes/Medalling-in-all-Entered-Kaggle-Comp--IEEE-CIS-Comp-6th-Pos-Sol--Interview-with-Dr-Philipp-Singer-e7m6s8,0,753,217,136,98,29,22,2687
27 | E25,"Kaggle Discussions Rank #1, x2 Grandmaster: Dr. Jean Francois Puget | IEEE-CIS Comp 2nd Pos Sol",Jean Francois Puget,Male,France,France,cpmpml,jfpuget,Kaggle,Masala Chai,2019-10-11,Afternoon,2019-11-21,3462,https://www.youtube.com/watch?v=wqHlAOFSFuQ,0,8071,329,4.08,635,964,103,385,29,1,1,30,https://anchor.fm/chaitimedatascience/episodes/Kaggle-Discussions-Rank-1--x2-Grandmaster-Dr--Jean-Francois-Puget--IEEE-CIS-Comp-2nd-Pos-Sol-e7m6s7,1,776,205,148,111,48,23,1719
28 | E26,DistilBERT | Research at Hugging Face | NLP and Open Source | Interview with Victor Sanh,Victor Sanh,Male,USA,France,,sanhestpasmoi,Research,Masala Chai,2019-10-14,Morning,2019-12-05,2972,https://www.youtube.com/watch?v=n7zZzUwqBig,0,14915,601,4.03,633,1234,86,251,20,0,1,28,https://anchor.fm/chaitimedatascience/episodes/DistilBERT--Research-at-Hugging-Face--NLP-and-Open-Source--Interview-with-Victor-Sanh-e9bp0n,1,684,130,86,83,41,17,1503
29 | E27,Interview with Jeremy Howard | fast.ai | Kaggle | Machine Learning Research,Jeremy Howard,Male,USA,Australia,jhoward,jeremyphoward,Industry,Sulemani Chai,2019-11-18,Night,2019-12-08,4851,https://www.youtube.com/watch?v=205j37G1cxw,0,27596,1297,4.70,3205,4502,704,563,183,3,16,139,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Jeremy-Howard--fast-ai--Kaggle--Machine-Learning-Research-e9ddbh,1,1527,466,373,204,96,83,3117
30 | E28,Interview with Arno Candel | AutoML | Physics | H2O.ai,Arno Candel,Male,USA,Switzerland,arnocandel,arnocandel,Industry,Sulemani Chai,2019-11-12,Afternoon,2019-12-12,4070,https://www.youtube.com/watch?v=72nXl0tvgc0,0,12900,245,1.90,577,822,71,312,10,0,1,6,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Arno-Candel--AutoML--Physics--H2O-ai--CTDS-Show-e9doo2,1,950,204,137,101,51,25,1743
31 | E29,"Eugene Khvedchenya | Kaggle, Computer Vision & Best Code Practises | Severstal Steel, 4th Pos Sol",Eugene Khvedchenya,Male,Ukraine,Ukraine,bloodaxe,cvtalks,Kaggle,Sulemani Chai,2019-10-28,Evening,2020-01-10,3127,https://www.youtube.com/watch?v=TwhJfZHx10s,1,7121,213,2.99,458,671,46,247,30,1,1,14,https://anchor.fm/chaitimedatascience/episodes/Eugene-Khvedchenya--Kaggle--Computer-Vision--Best-Code-Practises--Severstal-Steel--4th-Pos-Sol-ea34a8,1,624,100,67,57,34,13,1370
32 | E30,"Interview with x2 Kaggle GM: Dr. Olivier Grellier | Kaggle, Data Science & H2O.ai",Olivier Grellier,Male,France,France,ogrellier,,Kaggle,Ginger Chai,2019-11-18,Evening,2020-01-13,2276,https://www.youtube.com/watch?v=96YP13cxoq4,0,10500,189,1.80,568,757,58,277,17,0,2,18,https://anchor.fm/chaitimedatascience/episodes/Interview-with-x2-Kaggle-GM-Dr--Olivier-Grellier--Kaggle--Data-Science--H2O-ai-ea5ekr,1,564,81,49,50,32,12,1306
33 | E31,"Interview with Even Oldridge | Applied Research, Top Down Learning & Fast.ai | NVIDIA & Rapids.ai",Even Oldridge,Male,Canada,Canada,evenoldridge,Even_Oldridge,Industry,Masala Chai,2019-11-28,Morning,2020-01-16,5213,https://www.youtube.com/watch?v=-WzXIV8P_Jk,0,7371,130,1.76,190,320,39,439,5,2,2,5,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Even-Oldridge--Applied-Research--Top-Down-Learning--Fast-ai--NVIDIA--Rapids-ai-ea7hon,1,561,83,56,42,32,24,2714
34 | E32,"Interview with Leland Wilkinson | Grammar of Graphics | Open Source, Statistics & Software Dev",Leland Wilkinson,Male,USA,USA,,,Industry,Sulemani Chai,2019-12-16,Night,2020-01-19,3751,https://www.youtube.com/watch?v=j5dxrptSBYw,0,12300,148,1.20,321,469,51,394,17,0,1,11,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Leland-Wilkinson--Grammar-of-Graphics--Open-Source--Statistics--Software-Dev-ea9fj4,1,530,61,41,34,24,10,1559
35 | E33,"Interview with Kaggle Legend: Gilberto Titericz | Giba, Former #1 | Data Science & Kaggle",Gilberto Titericz,Male,Brazil,Brazil,titericz,giba1,Kaggle,Sulemani Chai,2019-11-30,Morning,2020-01-23,3844,https://www.youtube.com/watch?v=MpYeDKw8EOg,1,9232,328,3.55,1084,1412,181,461,51,1,4,35,https://anchor.fm/chaitimedatascience/episodes/Kaggle-Legend-Gilberto-Titericz--Giba--Former-1--Data-Science--Kaggle-ea9vdn,1,618,112,84,54,29,19,2343
36 | E34,Dmitry Gordeev & Philipp Singer | What does it take to win a Kaggle Comp? | NFL Data Bowl Win Sol,Dmitry Gordeev | Philipp Singer,Male,Austria,Austria,dott1718 | philippsinger,dott1718 | ph_singer,Kaggle,Sulemani Chai,2020-01-15,Night,2020-01-26,4657,https://www.youtube.com/watch?v=_Srv0bKmfjY,1,7382,256,3.47,834,1090,144,476,30,1,0,18,https://anchor.fm/chaitimedatascience/episodes/Dmitry-Gordeev--Philipp-Singer--What-does-it-take-to-win-a-Kaggle-Comp---NFL-Data-Bowl-Win-Sol-eaaihi,1,621,108,69,54,35,28,2887
37 | E35,"Rohan Rao | Numbers, Data Science & Kaggle | ASHRAE - Great Energy Predictor 2nd Pos Sol",Rohan Rao,Male,India,India,rohanrao,vopani,Kaggle,Ginger Chai,2020-01-10,Morning,2020-01-30,6151,https://www.youtube.com/watch?v=4nVL4ICMNcw,0,14000,420,3.00,735,1155,132,413,36,0,1,26,https://anchor.fm/chaitimedatascience/episodes/Rohan-Rao--Numbers--Data-Science--Kaggle--ASHRAE---Great-Energy-Predictor-2nd-Pos-Sol-eaa0db,1,590,114,74,56,22,12,1962
38 | E36,DeOldify | Fast.ai & NoGAN | Machine Learning & Software Engineering | Interview with Jason Antic,Jason Antic,Male,USA,USA,,citnaj,Industry,Herbal Tea,2019-12-21,Night,2020-02-02,3916,https://www.youtube.com/watch?v=A5Cq8SWudts,0,7696,189,2.46,586,775,71,330,31,1,8,21,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Jason-Antic--DeOldify--Fast-ai--NoGAN--Machine-Learning--Software-Engineering-eaj1t4,1,596,93,60,52,28,22,2816
39 | E37,The Story of Kaggle & Kaggle's Evolution | Interview with the CEO of Kaggle: Anthony Goldbloom,Anthony Goldbloom,Male,USA,Australia,antgoldbloom,antgoldbloom,Kaggle,Ginger Chai,2020-01-07,Morning,2020-02-06,3909,https://www.youtube.com/watch?v=jw2Z-IMyFYw,0,5883,127,2.16,340,467,48,370,32,1,7,11,https://anchor.fm/chaitimedatascience/episodes/The-Story-of-Kaggle--Kaggles-Evolution--Interview-with-the-CEO-of-Kaggle-Anthony-Goldbloom-ea9fj6,1,595,102,58,56,29,19,2414
40 | E38,Becoming The Youngest Kaggle Grandmaster | ML For Japanese Literature | Anokas: Mikel Bober-Irizar,Mikel Bober-Irizar,Male,UK,UK,anokas,mikb0b,Kaggle,Ginger Chai,2020-01-25,Evening,2020-02-09,3559,https://www.youtube.com/watch?v=maR9ibJ2r7g,0,9558,465,4.87,903,1368,148,389,51,0,5,29,https://anchor.fm/chaitimedatascience/episodes/Anokas-Mikel-Bober-Irizar--Becoming-The-Youngest-Kaggle-Grandmaster--ML-For-Japanese-Literature--Kaggle-eanr0n,1,674,160,118,55,32,23,2632
41 | E39,"Machine Learning, H2O.ai & Machine Learning Interpretability | Interview with Patrick Hall",Patrick Hall,Male,USA,USA,,jpatrickhall,Industry,Herbal Tea,2020-01-14,Evening,2020-02-13,3490,https://www.youtube.com/watch?v=TSmSBWnVSzc,0,10000,190,1.90,638,828,47,204,7,0,0,10,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Sr--Director-of-Product-at-H2O-ai-Patrick-Hall--Machine-Learning--H2O-ai--Machine-Learning-Interpretability-ea9ff0,1,530,75,53,47,21,12,2011
42 | E40,Interview with Zachary Mueller | Fast.ai: The course and New Library | SGs and Top Down Learning,Zachary Mueller,Male,USA,USA,muellerzr,TheZachMueller,Industry,Herbal Tea,2020-01-12,Evening,2020-02-16,3758,https://www.youtube.com/watch?v=AXr8pzXXUDQ,0,5528,166,3.00,377,543,61,404,24,0,0,15,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Zachary-Mueller--Fast-ai-The-course-and-New-Library--SGs-and-Top-Down-Learning-easari,1,602,99,75,51,26,18,2519
43 | E41,Software Engineering & Data Science | Machine Learning Interpretability | Open Source | Navdeep Gill,Navdeep Gill,Male,USA,USA,,Navdeep_Gill_,Industry,Herbal Tea,2020-02-05,Night,2020-02-20,3563,https://www.youtube.com/watch?v=9l8D8Ktlmpo,0,8900,125,1.40,199,324,20,222,10,0,1,3,https://anchor.fm/chaitimedatascience/episodes/Navdeep-Gill--Software-Engineering--Data-Science--Machine-Learning-Interpretability--Open-Source-eauhps,1,548,68,48,35,21,9,1565
44 | E42,"Statistics, Open Source & ML Research | Python for ML | Interview with Sebastian Raschka",Sebastian Raschka,Male,USA,Germany,,rasbt,Industry,Herbal Tea,2020-01-28,Morning,2020-02-23,4101,https://www.youtube.com/watch?v=beSLA-wO2T4,1,9478,275,2.90,577,852,61,258,39,1,1,40,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Sebastian-Raschka--Statistics--Open-Source--ML-Research--Python-for-ML-Book-eauidk,1,650,111,90,51,37,19,1891
45 | E43,Cyber-Security & Anti-Money Laundering | Applied AI & H2O AI | Interview with Dr. Ashrith Barthur,Ashrith Barthur,Male,USA,India,,cyberbaggage,Industry,Kesar Rose Chai,2020-02-21,Night,2020-02-27,2649,https://www.youtube.com/watch?v=pVhGyVr61ps,0,8400,126,1.50,434,560,42,270,10,0,1,6,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Dr--Ashrith-Barthur--Cyber-Security--Anti-Money-Laundering--Applied-AI--H2O-AI-eb1jti,1,469,25,14,16,22,8,1231
46 | E44,"Fast.ai, Learning to Learn | Machine Learning, Kaggle & Blogging | Interview with Radek Osmulski",Radek Osmulski,Male,Poland,Poland,radek1,radekosmulski,Industry,Herbal Tea,2020-02-20,Afternoon,2020-03-01,3936,https://www.youtube.com/watch?v=4h41v07bYYI,0,7607,260,3.42,588,848,122,518,49,2,5,24,https://anchor.fm/chaitimedatascience/episodes/Interview-with-fast-ai-hero-Radek-Osmulski--Fast-ai--Learning-to-Learn--Machine-Learning--Kaggle--Blogging-eav36v,1,657,113,88,55,30,25,3031
47 | E45,"Interview with Marios Michailidis | What does it take to become #1 on Kaggle | DSB 2019, 14th Pos Sol",Marios Michailidis,Male,UK,Greece,kazanova,stacknet_,Kaggle,Sulemani Chai,2020-02-14,Afternoon,2020-03-05,3276,https://www.youtube.com/watch?v=A3GvuHqGGZI,0,9700,213,2.20,545,758,62,294,13,0,3,10,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Marios-Michailidis--What-does-it-take-to-become-1-on-Kaggle--DSB-2019--14th-Pos-Sol-eb401g,2,470,49,32,32,13,1,230
48 | M0,00 Introduction & About: fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,624,https://www.youtube.com/watch?v=rzuIkj8lymc,2,3789,139,3.67,162,301,15,179,15,0,2,10,https://anchor.fm/chaitimedatascience/episodes/00-fast-ai-2019-Summaries--Things-Jeremy-Howard-says-to-do-eb97el,2,308,49,33,35,6,1,463
49 | M1,01: Lesson-1 Image Classification | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,341,https://www.youtube.com/watch?v=RKtfgXz7Qo0,2,4643,163,3.51,56,219,7,115,8,0,2,1,https://anchor.fm/chaitimedatascience/episodes/01-fast-ai-Lesson-1-Image-Classification--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97ek,2,368,37,32,29,10,1,504
50 | M2,02: Lesson-2 Production & SGD From Scratch | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,316,https://www.youtube.com/watch?v=ahdybq2V-38,2,3144,63,2.00,37,100,3,108,2,1,0,0,https://anchor.fm/chaitimedatascience/episodes/02-fast-ai-Lesson-2-Production--SGD-From-Scratch--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97eu,2,317,33,21,24,8,1,312
51 | M3,03: Lesson-3 Multi-label; SGD from scratch | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,332,https://www.youtube.com/watch?v=Z-waVKLcLJE,2,2436,52,2.13,28,80,3,135,2,0,0,0,https://anchor.fm/chaitimedatascience/episodes/03-fast-ai-Lesson-3-Multi-label-SGD-from-scratch--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97em,2,276,20,13,16,11,1,260
52 | M4,04: Lesson-4 NLP:Tabular Data; Recsys | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,281,https://www.youtube.com/watch?v=5CW3QdGdr8c,2,2592,40,1.54,23,63,2,114,3,0,0,1,https://anchor.fm/chaitimedatascience/episodes/04-fast-ai-Lesson-4-NLPTabular-Data-Recsys--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97f0,2,301,24,17,17,10,7,2547
53 | M5,05: Lesson 5: Backprop; Neural Nets from scratch | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,311,https://www.youtube.com/watch?v=RIGlXwvUo_Q,2,2536,26,1.03,11,37,1,97,0,0,0,0,https://anchor.fm/chaitimedatascience/episodes/05-fast-ai-Lesson-5-Backprop-Neural-Nets-from-scratch--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97f8,2,279,18,16,15,15,2,479
54 | M6,06: Lesson-6 CNN Deep Dive; Ethics | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,412,https://www.youtube.com/watch?v=nAE8tq_SIXo,2,3572,49,1.37,33,82,2,88,2,0,0,0,https://anchor.fm/chaitimedatascience/episodes/06-fast-ai-Lesson-6-CNN-Deep-Dive-Ethics--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97fn,2,275,27,13,17,11,2,515
55 | M7,07: Lesson-7 ResNet; U-Net; GANs | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,467,https://www.youtube.com/watch?v=0eWG6apI1iY,2,2381,22,0.92,20,42,2,171,1,0,0,0,https://anchor.fm/chaitimedatascience/episodes/07-fast-ai-Lesson-7-ResNet-U-Net-GANs--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97fq,2,281,19,14,17,9,1,288
56 | M8,"08: Where to go from here, General fast.ai advice",,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,605,https://www.youtube.com/watch?v=oOr-7hYaU8o,2,2133,33,1.55,11,44,2,164,1,0,0,0,https://anchor.fm/chaitimedatascience/episodes/08-Where-to-go-from-here--General-fast-ai-advice-eb97g2,2,376,26,17,22,8,1,301
57 | E46,Classical Japanese Lit & ML | Kuzushiji recog kaggle comp | Interview with Tarin Clanuwat,Tarin Clanuwat,Female,Japan,Japan,,tkasasagi,Research,Sulemani Chai,2020-02-03,Evening,2020-03-08,2162,https://www.youtube.com/watch?v=9E5JnTj8df0,1,4221,115,2.72,73,188,2,38,3,0,1,0,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Tarin-Clanuwat--Classical-Japanese-Literature--ML--Kuzushiji-recognition-kaggle-comp-eb4rh6,3,605,43,29,33,17,7,1472
58 | E47,"NFL 1st and Future: Analytics Winning Sol | ""Real World Data Sci"" & Kaggle | Interview with John Miller",John Miller,Male,USA,USA,jpmiller,johnmillertx,Kaggle,Herbal Tea,2020-02-03,Evening,2020-03-12,2936,https://www.youtube.com/watch?v=Ic__3zG-ab4,0,7400,96,1.30,141,237,19,295,13,0,0,1,https://anchor.fm/chaitimedatascience/episodes/Interview-w-John-Miller--NFL-1st-and-Future-Analytics-Winning-Sol--Real-World-Data-Sci--Kaggle-eb7p33,1,419,26,19,17,12,6,1652
59 | E48,Interview with ChristOf Henkel | Google Quest Q&A Labelling Comp 2nd Pos Sol | Rapids.ai & Kaggle,Christof Henkel,Male,Germany,Germany,christofhenkel,kagglingdieter,Kaggle,Ginger Chai,2020-02-12,Afternoon,2020-03-15,3180,https://www.youtube.com/watch?v=Q0_Xajic_9U,0,5196,162,3.12,731,893,88,355,32,1,2,23,https://anchor.fm/chaitimedatascience/episodes/Interview-with-ChristOf-Henkel--Google-Quest-QA-Labelling-Comp-2nd-Pos-Sol--Rapids-ai--Kaggle-eb4rpr,1,461,48,36,24,12,7,2119
60 | E49,Interview with Parul Pandey | Getting Started with Data Science & Blogging | Women in Data Science,Parul Pandey,Female,India,India,parulpandey,pandeyparul,Industry,Ginger Chai,2020-02-26,Morning,2020-03-19,3570,https://www.youtube.com/watch?v=DjBgB_fNXl0,0,17600,722,4.10,1439,2161,116,193,54,5,12,66,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Parul-Pandey--Getting-Started-with-Data-Science--Blogging--Women-in-Data-Science-eb4tr8,1,472,38,28,28,8,2,767
61 | E50,Inversion: Walter Reade | Data Science at Kaggle | Becoming a Data Scientist & Kaggle Grandmaster,Walter Reade,Male,USA,USA,inversion,walterreade,Kaggle,Ginger Chai,2020-02-03,Night,2020-03-22,2746,https://www.youtube.com/watch?v=OoB_LQpgDCk,0,5273,95,1.80,107,202,26,463,11,0,2,3,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Inversion-Walter-Reade--Data-Science-at-Kaggle--Becoming-a-Data-Scientist--Kaggle-Grandmaster-eb7k60,1,406,29,20,17,10,5,1799
62 | E51,"Interview with Sergey Kolesnikov | Catalyst: PyTorch Framework for DL & RL | Open Source, Soft. Engg",Sergey Kolesnikov,Male,Russia,Russia,scitator,scitator,Industry,Herbal Tea,2020-02-10,Morning,2020-03-26,4489,https://www.youtube.com/watch?v=1g6BpItJdJA,0,4856,140,2.88,303,443,22,179,22,2,0,8,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Sergey-Kolesnikov--Catalyst-PyTorch-Framework-for-DL--RL--Open-Source--Soft--Engg--Community-eb5c3j,1,426,20,12,15,9,4,1532
63 | E52,"Interview with Russ Wolfinger | Statistics, Data Science & Kaggle | NFL Big Data Bowl #14 Pos Sol",Russ Wolfinger,Male,USA,USA,sasrdw,,Kaggle,Ginger Chai,2020-02-10,Night,2020-03-29,4462,https://www.youtube.com/watch?v=akYeBUTXmT4,0,5882,99,1.68,286,385,27,252,18,0,1,9,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Russ-Wolfinger--Statistics--Data-Science--Kaggle--NFL-Big-Data-Bowl-14-Pos-Sol-eb5cnu,1,438,38,27,27,5,4,2849
64 | E53,Interview with Erin LeDell | H2O-AutoML & H2O.ai | Open Source | RLadies & WiMLDS Community,Erin LeDell,Female,USA,USA,ledell,ledell,Industry,Ginger Chai,2020-02-13,Night,2020-04-02,3428,https://www.youtube.com/watch?v=i2K42HNAoFM,0,7700,154,2.00,290,444,46,371,4,1,1,3,https://anchor.fm/chaitimedatascience/episodes/Interview-w-Erin-LeDell--H2O-AutoML--H2O-ai--Open-Source--RLadies--WiMLDS-Community-eb5irc,1,392,30,20,17,0,0,0
65 | E54,Interview with Sylvain Gugger | fast.ai: The new Framework & course | FastBook & Research at fast.ai,Sylvain Gugger,Male,USA,France,,guggersylvain,Industry,Herbal Tea,2020-02-13,Night,2020-04-05,2043,https://www.youtube.com/watch?v=-3fw9hxiop0,0,5417,175,3.23,522,697,68,351,35,1,3,15,https://anchor.fm/chaitimedatascience/episodes/Interview-w-Sylvain-Gugger--fast-ai-The-new-Framework--course--FastBook--Research-at-fast-ai-eb5a6a,1,510,49,37,29,17,5,1077
66 | E55,"SharpestMinds Team on Learning to Learn | Data Science, Startups & Hiring",Edouard Harris | Jeremie Harris | Russell Pollari,Male,Canada,Canada,,neutronsNeurons | russ_poll | jeremiecharris,Industry,Herbal Tea,2020-02-07,Morning,2020-04-09,4032,https://www.youtube.com/watch?v=vaWOS9GHB9c,1,5096,145,2.85,237,382,53,499,26,0,5,19,https://anchor.fm/chaitimedatascience/episodes/SharpestMinds-Team-on-Learning-to-Learn--Data-Science--Startups--Hiring-eb61lb,1,473,54,39,36,11,7,2284
67 | E56,"Interview with Dmytro Mushkin | Computer Vision Research | Kaggle, ML & Education",Dmytro Mushkin,Male,Czech Republic,Ukraine,oldufo,ducha_aiki,Kaggle,Herbal Tea,2020-02-10,Afternoon,2020-04-12,3164,https://www.youtube.com/watch?v=lWwkbiufwNE,1,5321,99,1.86,143,242,18,268,16,0,3,2,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Dmytro-Mushkin--Computer-Vision-Research--Kaggle--ML--Education-eb40j2,1,443,39,32,21,8,4,1585
68 | E57,"Interview with Mark Landry | Data Science, Kaggle, H2O.ai | AutoML",Mark Landry,Male,USA,USA,mlandry,mark_a_landry,Kaggle,Ginger Chai,2020-02-21,Morning,2020-04-16,3833,https://www.youtube.com/watch?v=kBcVi4p-ruY,1,9700,194,2.00,214,408,41,358,5,1,0,11,https://anchor.fm/chaitimedatascience/episodes/Interview-w-Mark-Landry--Data-Science--Kaggle--H2O-ai--AutoML-eb63se,1,397,33,26,21,7,7,3366
69 | E58,CEO of Decision.ai: Dan Becker | What does it take to become a Data Scientist? | Kaggle Learn,Dan Becker,Male,USA,USA,dansbecker,dan_s_becker,Industry,Sulemani Chai,2020-02-11,Night,2020-04-19,3375,https://www.youtube.com/watch?v=eEYvgsUeEgw,0,4510,143,3.17,336,479,49,368,24,0,4,17,https://anchor.fm/chaitimedatascience/episodes/CEO-of-Decision-ai-Dan-Becker--What-does-it-take-to-become-a-Data-Scientist---Kaggle-Learn--Data-Science-Portfolio-eb631q,1,464,58,42,31,15,11,2564
70 | E59,Suzana Ilić | Democratising AI with Communities | Machine Learning Tokyo | Inclusivity in AI,Suzana Illić,Female,Japan,Austria,,suzatweet,Research,Masala Chai,2020-02-12,Morning,2020-04-23,2192,https://www.youtube.com/watch?v=TzgHNJN8D3I,1,4237,126,2.97,218,344,24,251,14,0,2,4,https://anchor.fm/chaitimedatascience/episodes/Suzana-Ili--Democratising-AI-w-Communities--Machine-Learning-Tokyo--Inclusivity-in-AI-eb58p3,1,361,33,21,21,8,2,1033
71 | E60,"Interview with Ines Montani | Spacy, NLP & Open Source Frameworks | Explosion.ai, Thinc.ai & Prodi.gy",Ines Montani,Female,Germany,Germany,,_inesmontani,Industry,Ginger Chai,2020-02-24,Night,2020-04-26,3119,https://www.youtube.com/watch?v=C5DGFSDlMBM,3,6810,340,4.99,822,1162,82,254,31,0,3,30,https://anchor.fm/chaitimedatascience/episodes/Interview-w-Ines-Montani--Spacy--NLP--Open-Source-Frameworks--Explosion-ai--Thinc-ai--Prodi-gy-ed2fvt,1,417,47,31,19,17,8,1751
72 | E61,Daniel Bourke | Learning to Learn | Creating AI Content | Fitness & Machine Learning,Daniel Bourke,Male,Australia,Australia,,mrdbourke,Industry,Sulemani Chai,2020-02-20,Night,2020-04-30,5597,https://www.youtube.com/watch?v=r5_SuLF5UWY,3,4464,143,3.20,197,340,50,529,27,0,4,12,https://anchor.fm/chaitimedatascience/episodes/Daniel-Bourke--Learning-to-Learn--Creating-AI-Content--Fitness--Machine-Learning-eb65ap,1,437,53,43,27,13,9,2505
73 | E62,"Pablo Samuel Castro | ML Research, Google Brain & Creative AI | Learning ML with the community | LatinX",Pablo Samuel Castro,Male,Canada,Ecuador,,pcastr,Research,Sulemani Chai,2020-02-20,Night,2020-05-03,3560,https://www.youtube.com/watch?v=muiM5SQxTIA,3,4585,75,1.64,136,211,22,375,17,0,3,4,https://anchor.fm/chaitimedatascience/episodes/Pablo-Samuel-Castro--ML-Research--Google-Brain--Creative-AI--Learning-ML-w-the-community--LatinX-eb7kfp,1,405,43,36,24,15,6,1525
74 | E63,"Robert Bracco | Learning to Learn | Approaching Fast.ai Materials, Kaggle & Blogging",Robert Bracco,Male,USA,USA,madeupmasters,madeupmasters,Industry,Kesar Rose Chai,2020-02-14,Night,2020-05-07,7103,https://www.youtube.com/watch?v=CYYvQ-5V3xA,3,6163,180,2.92,196,376,61,584,12,1,6,2,https://anchor.fm/chaitimedatascience/episodes/Robert-Bracco--Learning-to-Learn--Approaching-Fast-ai-Materials--Kaggle--Blogging-eb7rqd,1,472,79,55,38,12,12,3562
75 | E64,"Hamel Husain | Fastpages, Open Source | ML at Github | fastai",Hamel Husain,Male,USA,USA,hamelhusain,HamelHusain,Industry,Ginger Chai,2020-02-25,Night,2020-05-10,2976,https://www.youtube.com/watch?v=-pYMXSThpvc,3,4350,97,2.23,169,266,28,379,12,0,5,7,https://anchor.fm/chaitimedatascience/episodes/Hamel-Husain--Fastpages--Open-Source--ML-at-Github--fastai-eds2e6,1,426,36,24,21,13,8,2164
76 | E65,Dmitry Danevskiy | Google Quest Q&A Labelling Comp: Winning Sol | Becoming Kaggle Grandmaster,Dmitry Danevskiy,Male,Ukraine,Ukraine,ddanevskyi,DanevskiyD,Kaggle,Masala Chai,2020-03-03,Morning,2020-05-14,2019,https://www.youtube.com/watch?v=pQL892iT-dM,3,4362,117,2.68,230,347,22,228,22,0,1,4,https://anchor.fm/chaitimedatascience/episodes/Dmitry-Danevskiy--Google-Quest-QA-Labelling-Comp-Winning-Sol--Becoming-Kaggle-Grandmaster-eb7jtv,1,358,24,15,18,9,3,1326
77 | E66,Goku Mohandas | MadeWithML | AI Research | Healthcare | Education,Goku Mohandas,Male,USA,USA,,GokuMohandas,Industry,Paan Rose Green Tea,2020-05-06,Night,2020-05-17,5734,https://www.youtube.com/watch?v=VqysJmIqko8,3,9903,203,2.05,326,529,69,470,28,0,4,16,https://anchor.fm/chaitimedatascience/episodes/Goku-Mohandas--MadeWithML--AI-Research--Healthcare--Education-ee609r,1,402,51,36,24,15,15,3494
78 | E67,"Eli Stevens, Luca Antiga, and Thomas Viehmann | Deep Learning with PyTorch",Eli Stevens | Luca Antiga | Thomas Viehmann,Male,USA | Italy | Germany,USA | Italy | Germany,,eli0stevens | lantiga | thomasviehmann,Industry,Paan Rose Green Tea,2020-05-01,Night,2020-05-21,4667,https://www.youtube.com/watch?v=f5Qv3eSZpug,3,4503,144,3.20,407,551,56,366,29,0,1,12,https://anchor.fm/chaitimedatascience/episodes/Eli-Stevens--Luca-Antiga--and-Thomas-Viehmann--Deep-Learning-with-PyTorch-eec4qk,1,403,24,18,16,19,11,2156
79 | E68,Emmanuel Ameisen | Building Machine Learning Powered Apps,Emmanuel Ameisen,Male,USA,USA,,mlpowered,Industry,Masala Chai,2020-05-20,Morning,2020-05-24,3491,https://www.youtube.com/watch?v=ctss0hcD9SE,3,4734,137,2.89,191,328,35,384,21,1,2,4,https://anchor.fm/chaitimedatascience/episodes/Emmanuel-Ameisen--Building-Machine-Learning-Powered-Apps-eegaf3,3,418,54,42,28,21,10,1704
80 | E69,Birthday Special AMA: Answering Questions from my ML Heroes | CTDS.News Launch,,,,,,,Other,Masala Chai,2020-05-27,Morning,2020-05-27,3984,https://www.youtube.com/watch?v=hyJhwWshfbY,3,3698,163,4.41,338,501,55,395,36,1,3,15,https://anchor.fm/chaitimedatascience/episodes/Birthday-Special-AMA-Answering-Questions-from-my-ML-Heroes--CTDS-News-Launch-eekt01,3,342,24,16,16,17,9,1992
81 | E70,"Interview with Yauhen Babakhin | Kaggle, Computer Vision and AutoML",Yauhen Babakhin,Male,Belarus,Belarus,ybabakhin,,Kaggle,Paan Rose Green Tea,2020-05-28,Afternoon,2020-05-31,3952,https://www.youtube.com/watch?v=n_IUOeiKwnE,3,5200,99,2,201,300,14,168,5,0,2,4,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Yauhen-Babakhin--Kaggle--Computer-Vision-and-AutoML--CTDS-Show-eeqdom,3,372,32,20,15,14,9,2229
82 | E71,"Martin Henze, Heads Or Tails, First Kaggle Kernel GM | Astronomy | Story-Telling with Data",Martin Henze,Male,USA,USA,headsortails,heads0rtai1s,Kaggle,Paan Rose Green Tea,2020-05-24,Night,2020-06-04,4176,https://www.youtube.com/watch?v=2dpaSTWdhSk&list=PLLvvXm0q8zUbiNdoIazGzlENMXvZ9bd3x,3,3670,95,2.59,124,219,22,362,12,0,0,2,https://anchor.fm/chaitimedatascience/episodes/Martin-Henze--Heads-Or-Tails--First-Kaggle-Kernel-GM--Astronomy--Story-Telling-with-Data-eet4j9,3,359,33,25,18,10,5,1960
83 | E72,Andreas Mueller | Scikit-Learn | ML and Open Source,Andreas Mueller,Male,USA,Germany,amuellerml,amuellerml,Industry,Paan Rose Green Tea,2020-05-25,Night,2020-06-07,3997,https://www.youtube.com/watch?v=iNZd_5T8tCI&list=PLLvvXm0q8zUbiNdoIazGzlENMXvZ9bd3x,3,3480,142,4.08,291,433,40,333,21,0,2,9,https://anchor.fm/chaitimedatascience/episodes/Andreas-Mueller--Scikit-Learn--ML-and-Open-Source--CTDS-Show-72-eet4j1,3,461,54,39,28,18,11,2147
84 | E73,"Maximilian Jeblick | Physics, Math and Data Science | Kaggle and H2O.ai",Maximilian Jeblick,Male,Germany,Germany,maxjeblick,,Kaggle,Paan Rose Green Tea,2020-06-11,Afternoon,2020-06-11,2372,https://www.youtube.com/watch?v=VeM1T7UaYTk,3,3200,54,2,86,140,9,231,5,0,1,1,https://anchor.fm/chaitimedatascience/episodes/Maximilian-Jeblick--Physics--Math-and-Data-Science--Kaggle-and-H2O-ai--CTDS-Show-73-ef9jjk,3,327,18,16,13,11,4,1174
85 | E74,"Dmitry Larko | H2O.ai | Kaggle, Applying Kaggle to Real world | AutoML",Dmitry Larko,Male,USA,Russia,dmitrylarko,DmitryLarko,Kaggle,Masala Chai,2020-06-05,Night,2020-06-14,4031,https://www.youtube.com/watch?v=aC9t9D7HpYE,3,4200,118,3,194,312,29,335,11,0,0,5,https://anchor.fm/chaitimedatascience/episodes/Dmitry-Larko--H2O-ai--Kaggle--Applying-Kaggle-to-Real-world--AutoML--CTDS-Show-74-efdhmt,3,352,35,26,22,13,7,1958
86 | E75,Rachel Thomas | Fast.ai | Applied Ethics | Top Down Learning,Rachel Thomas ,Female,USA,USA,,math_rachel,Industry,Masala Chai,2020-06-16,Night,2020-06-18,2214,https://www.youtube.com/watch?v=tq_XcFubgKo&list=PLLvvXm0q8zUbiNdoIazGzlENMXvZ9bd3x,3,1931,115,5.96,164,279,23,297,20,0,1,3,https://anchor.fm/chaitimedatascience/episodes/Rachel-Thomas--Fast-ai--Applied-Ethics--Top-Down-Learning--CTDS-Show-75-efjj5d,3,247,17,10,13,,,


--------------------------------------------------------------------------------
/notebooks/Generate Training data NER.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%load_ext autoreload\n",
 10 |     "%autoreload 2"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import sys\n",
 20 |     "sys.path.insert(0,'../scripts')\n",
 21 |     "from generate_training_data import TrainDataGenerator"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 3,
 27 |    "metadata": {},
 28 |    "outputs": [
 29 |     {
 30 |      "name": "stdout",
 31 |      "output_type": "stream",
 32 |      "text": [
 33 |       "**********\n",
 34 |       "51  templates loaded\n",
 35 |       "**********\n"
 36 |      ]
 37 |     }
 38 |    ],
 39 |    "source": [
 40 |     "tdg = TrainDataGenerator(mode=\"ner\")"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 4,
 46 |    "metadata": {},
 47 |    "outputs": [
 48 |     {
 49 |      "name": "stderr",
 50 |      "output_type": "stream",
 51 |      "text": [
 52 |       "100%|██████████| 10/10 [00:00<00:00, 1203.74it/s]\n"
 53 |      ]
 54 |     },
 55 |     {
 56 |      "data": {
 57 |       "text/plain": [
 58 |        "[('pie chart of yrlw_ot column of zzz grouped by dz_k column',\n",
 59 |        "  {'entities': [(13, 20, 'COLNAME'),\n",
 60 |        "    (31, 34, 'VARNAME'),\n",
 61 |        "    (46, 50, 'COLNAME')]}),\n",
 62 |        " ('plot fqi_hczeud and vakdbgvkxfgomzbgg of df in a bar plot',\n",
 63 |        "  {'entities': [(5, 15, 'COLNAME'),\n",
 64 |        "    (20, 37, 'COLNAME'),\n",
 65 |        "    (41, 43, 'VARNAME')]}),\n",
 66 |        " ('scatter plot of wzvapheuza and jsbefivugeekdv in tempdf',\n",
 67 |        "  {'entities': [(16, 26, 'COLNAME'),\n",
 68 |        "    (31, 45, 'COLNAME'),\n",
 69 |        "    (49, 55, 'VARNAME')]}),\n",
 70 |        " ('plot histogram of dhqwz_ydyfw in df',\n",
 71 |        "  {'entities': [(18, 29, 'COLNAME'), (33, 35, 'VARNAME')]}),\n",
 72 |        " ('find min,minimum of csgc,hlbrbnql group by wuwgxhrsgvrcqaezot from mydf',\n",
 73 |        "  {'entities': [(5, 8, 'FUNCTION'),\n",
 74 |        "    (9, 16, 'FUNCTION'),\n",
 75 |        "    (20, 24, 'COLNAME'),\n",
 76 |        "    (25, 33, 'COLNAME'),\n",
 77 |        "    (43, 61, 'COLNAME'),\n",
 78 |        "    (67, 71, 'VARNAME')]}),\n",
 79 |        " ('import plotly', {'entities': [(7, 13, 'LIBNAME')]}),\n",
 80 |        " ('load test.csv', {'entities': [(5, 13, 'FNAME')]}),\n",
 81 |        " ('print df head', {'entities': [(6, 8, 'VARNAME')]}),\n",
 82 |        " ('describe df', {'entities': [(9, 11, 'VARNAME')]}),\n",
 83 |        " ('pie chart of oqayvfaukwgykiomp column of tempdf grouped by txtnmhqhfqts column',\n",
 84 |        "  {'entities': [(13, 30, 'COLNAME'),\n",
 85 |        "    (41, 47, 'VARNAME'),\n",
 86 |        "    (59, 71, 'COLNAME')]})]"
 87 |       ]
 88 |      },
 89 |      "execution_count": 4,
 90 |      "metadata": {},
 91 |      "output_type": "execute_result"
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "tdg.generate_training_rows(n_rows=10)"
 96 |    ]
 97 |   }
 98 |  ],
 99 |  "metadata": {
100 |   "kernelspec": {
101 |    "display_name": "Python 3",
102 |    "language": "python",
103 |    "name": "python3"
104 |   },
105 |   "language_info": {
106 |    "codemirror_mode": {
107 |     "name": "ipython",
108 |     "version": 3
109 |    },
110 |    "file_extension": ".py",
111 |    "mimetype": "text/x-python",
112 |    "name": "python",
113 |    "nbconvert_exporter": "python",
114 |    "pygments_lexer": "ipython3",
115 |    "version": "3.6.9"
116 |   }
117 |  },
118 |  "nbformat": 4,
119 |  "nbformat_minor": 4
120 | }
121 | 


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
 1 | # `Scripts` dir - A Walk-through
 2 | 
 3 | ### Note: Make sure to run all the following commands at `/scirpts` level in your terminal
 4 | 
 5 | ### Preferred Tools:
 6 | - Python version: 3.7
 7 | - Python environment: conda
 8 | - Python package installer: pip
 9 | 
10 | ## Processing awesome-notebooks
11 | 
12 | To start off, run the following command to download the awesome-notebooks repo into the `input/` sub-directory
13 | ```
14 | git clone https://github.com/jupyter-naas/awesome-notebooks.git ./input/
15 | ```
16 | 
17 | Then, in order to extract the tasks and code and create a `.pkl` out of it, run the following command
18 | ```
19 | python3 process_awesome_notebooks.py create_pkl_file
20 | ```
21 | 
22 | and in order to create the faiss's FlatIndex using the Embeddings from the `.pkl` file, run
23 | ```
24 | python3 process_awesome_notebooks.py create_faiss_index
25 | ```
26 | 
27 | and in order to get an intent, run the following command where `<query>` is your query and `<nearest_k>` is the number of nearest neighbours from your query's embedding
28 | ```
29 | python3 process_awesome_notebooks.py get_intent <query> <nearest_k>
30 | ```
31 | 
32 | To evaluate the outputs of both tensorflow_hub and sentence_transformers embeddings, run the following command to create 2 `.csv` files, one for each type of encoder
33 | ```
34 | python3 process_awesome_notebooks.py eval_models
35 | ```
36 | To get speed benchmarks for each encoder over a fixed number of repetitions, run:
37 | ```
38 | python3 process_awesome_notebooks.py get_benchmark_data <repetitions>
39 | ```
40 | 
41 | ## Training NER model using spaCy v3
42 | 
43 | To generate training or validation data:
44 | ```
45 | python3 generate_training_data.py <number of rows>
46 | ```
47 | 
48 | To convert the `.json` files to `.spacy` objects:
49 | ```
50 | python3 train_spacy3_ner.py <input path> <output_path>
51 | ```
52 | 
53 | To create the default config file:
54 | ```
55 | python3 train_spacy3_ner.py create_default_config_file
56 | ```
57 | 
58 | To train the NER model:
59 | ```
60 | python3 train_spacy3_ner.py train_model
61 | ```
62 | 
63 | You now have a trained NER Model!


--------------------------------------------------------------------------------
/scripts/config.cfg:
--------------------------------------------------------------------------------
  1 | [paths]
  2 | train = null
  3 | dev = null
  4 | vectors = "en_core_web_sm"
  5 | init_tok2vec = null
  6 | 
  7 | [system]
  8 | gpu_allocator = null
  9 | seed = 0
 10 | 
 11 | [nlp]
 12 | lang = "en"
 13 | pipeline = ["tok2vec","ner"]
 14 | batch_size = 1000
 15 | disabled = []
 16 | before_creation = null
 17 | after_creation = null
 18 | after_pipeline_creation = null
 19 | tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
 20 | 
 21 | [components]
 22 | 
 23 | [components.ner]
 24 | factory = "ner"
 25 | incorrect_spans_key = null
 26 | moves = null
 27 | scorer = {"@scorers":"spacy.ner_scorer.v1"}
 28 | update_with_oracle_cut_size = 100
 29 | 
 30 | [components.ner.model]
 31 | @architectures = "spacy.TransitionBasedParser.v2"
 32 | state_type = "ner"
 33 | extra_state_tokens = false
 34 | hidden_width = 64
 35 | maxout_pieces = 2
 36 | use_upper = true
 37 | nO = null
 38 | 
 39 | [components.ner.model.tok2vec]
 40 | @architectures = "spacy.Tok2VecListener.v1"
 41 | width = ${components.tok2vec.model.encode.width}
 42 | upstream = "*"
 43 | 
 44 | [components.tok2vec]
 45 | factory = "tok2vec"
 46 | 
 47 | [components.tok2vec.model]
 48 | @architectures = "spacy.Tok2Vec.v2"
 49 | 
 50 | [components.tok2vec.model.embed]
 51 | @architectures = "spacy.MultiHashEmbed.v2"
 52 | width = ${components.tok2vec.model.encode.width}
 53 | attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
 54 | rows = [5000,2500,2500,2500]
 55 | include_static_vectors = true
 56 | 
 57 | [components.tok2vec.model.encode]
 58 | @architectures = "spacy.MaxoutWindowEncoder.v2"
 59 | width = 256
 60 | depth = 8
 61 | window_size = 1
 62 | maxout_pieces = 3
 63 | 
 64 | [corpora]
 65 | 
 66 | [corpora.dev]
 67 | @readers = "spacy.Corpus.v1"
 68 | path = ${paths.dev}
 69 | max_length = 0
 70 | gold_preproc = false
 71 | limit = 0
 72 | augmenter = null
 73 | 
 74 | [corpora.train]
 75 | @readers = "spacy.Corpus.v1"
 76 | path = ${paths.train}
 77 | max_length = 0
 78 | gold_preproc = false
 79 | limit = 0
 80 | augmenter = null
 81 | 
 82 | [training]
 83 | dev_corpus = "corpora.dev"
 84 | train_corpus = "corpora.train"
 85 | seed = ${system.seed}
 86 | gpu_allocator = ${system.gpu_allocator}
 87 | dropout = 0.1
 88 | accumulate_gradient = 1
 89 | patience = 1600
 90 | max_epochs = 0
 91 | max_steps = 20000
 92 | eval_frequency = 200
 93 | frozen_components = []
 94 | annotating_components = []
 95 | before_to_disk = null
 96 | 
 97 | [training.batcher]
 98 | @batchers = "spacy.batch_by_words.v1"
 99 | discard_oversize = false
100 | tolerance = 0.2
101 | get_length = null
102 | 
103 | [training.batcher.size]
104 | @schedules = "compounding.v1"
105 | start = 100
106 | stop = 1000
107 | compound = 1.001
108 | t = 0.0
109 | 
110 | [training.logger]
111 | @loggers = "spacy.ConsoleLogger.v1"
112 | progress_bar = false
113 | 
114 | [training.optimizer]
115 | @optimizers = "Adam.v1"
116 | beta1 = 0.9
117 | beta2 = 0.999
118 | L2_is_weight_decay = true
119 | L2 = 0.01
120 | grad_clip = 1.0
121 | use_averages = false
122 | eps = 0.00000001
123 | learn_rate = 0.001
124 | 
125 | [training.score_weights]
126 | ents_f = 1.0
127 | ents_p = 0.0
128 | ents_r = 0.0
129 | ents_per_type = null
130 | 
131 | [pretraining]
132 | 
133 | [initialize]
134 | vectors = ${paths.vectors}
135 | init_tok2vec = ${paths.init_tok2vec}
136 | vocab_data = null
137 | lookups = null
138 | before_init = null
139 | after_init = null
140 | 
141 | [initialize.components]
142 | 
143 | [initialize.tokenizer]


--------------------------------------------------------------------------------
/scripts/create_intent_index.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import faiss
 4 | import numpy as np
 5 | import pandas as pd
 6 | from sentence_transformers import SentenceTransformer
 7 | 
 8 | model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
 9 | backend_dir = "../jupyter_text2code/jupyter_text2code_serverextension"
10 | 
11 | 
12 | def _get_embedding(command):
13 |     command = re.sub('[^A-Za-z0-9 ]+', '', command).lower()
14 |     return list(np.array(model.encode([command])[0]))
15 | 
16 | 
17 | # Make intent df
18 | jt2c = pd.read_csv(f'{backend_dir}/data/generated_intents.csv')
19 | naas = pd.read_csv(f'{backend_dir}/data/awesome-notebooks.csv')[['intent_id', 'task', 'st_embedding']]
20 | naas.columns = ['intent_id', 'intent', 'embedding']
21 | 
22 | jt2c['embedding'] = jt2c['intent'].apply(_get_embedding)
23 | naas['embedding'] = naas['intent'].apply(_get_embedding)
24 | jt2c = jt2c[['intent_id', 'intent', 'embedding']]
25 | 
26 | intent_df = pd.concat([jt2c, naas], axis=0)
27 | intent_df.to_csv('testing.csv', index=False)
28 | 
29 | for x, y in zip(intent_df["intent_id"].values, intent_df["embedding"].values):
30 |     if len(y) != 384:
31 |         print(x)
32 | 
33 | # Make faiss index
34 | db_ids = intent_df['intent_id'].values
35 | db_vectors = np.stack(intent_df["embedding"].values).astype(np.float32)
36 | faiss.normalize_L2(db_vectors)
37 | intent_index = faiss.IndexIDMap(faiss.IndexFlatIP(384))
38 | intent_index.add_with_ids(db_vectors, db_ids)
39 | faiss.write_index(intent_index, f"{backend_dir}/models/intent_index.idx")
40 | 


--------------------------------------------------------------------------------
/scripts/create_lookup_file.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | backend_dir = "../jupyter_text2code/jupyter_text2code_serverextension"
 4 | 
 5 | jt2c = pd.read_csv(f'{backend_dir}/data/ner_templates.csv')[['intent_id', 'template', 'code']]
 6 | jt2c.columns = ['intent_id', 'task', 'code']
 7 | naas = pd.read_csv(f'{backend_dir}/data/awesome-notebooks.csv')[['intent_id', 'task', 'code']]
 8 | 
 9 | lookup_df = pd.concat([jt2c, naas], axis=0)
10 | lookup_df.columns = ['intent_id', 'intent', 'code']
11 | lookup_df = lookup_df.drop_duplicates('intent_id')
12 | lookup_df.to_csv(f'{backend_dir}/data/intent_lookup.csv', index=False)
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/scripts/data/awesome-notebooks.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/scripts/data/awesome-notebooks.pkl


--------------------------------------------------------------------------------
/scripts/data/st_naas_intent_index.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/scripts/data/st_naas_intent_index.idx


--------------------------------------------------------------------------------
/scripts/data/tf_naas_intent_index.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/scripts/data/tf_naas_intent_index.idx


--------------------------------------------------------------------------------
/scripts/eval_models_performance.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "pycharm": {
  8 |      "name": "#%%\n"
  9 |     }
 10 |    },
 11 |    "outputs": [],
 12 |    "source": [
 13 |     "import pandas as pd\n",
 14 |     "import matplotlib.pyplot as plt"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {
 21 |     "pycharm": {
 22 |      "name": "#%%\n"
 23 |     }
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "tf_data = pd.read_csv('output/tf_eval_df.csv')\n",
 28 |     "st_data = pd.read_csv('output/st_eval_df.csv')"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 3,
 34 |    "metadata": {
 35 |     "pycharm": {
 36 |      "name": "#%%\n"
 37 |     }
 38 |    },
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "<class 'pandas.core.frame.DataFrame'>\n",
 45 |       "RangeIndex: 352 entries, 0 to 351\n",
 46 |       "Data columns (total 9 columns):\n",
 47 |       " #   Column                           Non-Null Count  Dtype \n",
 48 |       "---  ------                           --------------  ----- \n",
 49 |       " 0   category                         352 non-null    object\n",
 50 |       " 1   intent_id                        352 non-null    int64 \n",
 51 |       " 2   task                             352 non-null    object\n",
 52 |       " 3   tf_matched_intent_id             352 non-null    int64 \n",
 53 |       " 4   tf_matched_intent_text           352 non-null    object\n",
 54 |       " 5   tf_is_intent_matched             352 non-null    bool  \n",
 55 |       " 6   tf_matched_intent_id_shuffled    352 non-null    int64 \n",
 56 |       " 7   tf_matched_intent_text_shuffled  352 non-null    object\n",
 57 |       " 8   tf_is_intent_matched_shuffled    352 non-null    bool  \n",
 58 |       "dtypes: bool(2), int64(3), object(4)\n",
 59 |       "memory usage: 20.1+ KB\n"
 60 |      ]
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "tf_data.info()"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 4,
 70 |    "metadata": {
 71 |     "pycharm": {
 72 |      "name": "#%%\n"
 73 |     }
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "<class 'pandas.core.frame.DataFrame'>\n",
 81 |       "RangeIndex: 352 entries, 0 to 351\n",
 82 |       "Data columns (total 9 columns):\n",
 83 |       " #   Column                           Non-Null Count  Dtype \n",
 84 |       "---  ------                           --------------  ----- \n",
 85 |       " 0   category                         352 non-null    object\n",
 86 |       " 1   intent_id                        352 non-null    int64 \n",
 87 |       " 2   task                             352 non-null    object\n",
 88 |       " 3   st_matched_intent_id             352 non-null    int64 \n",
 89 |       " 4   st_matched_intent_text           352 non-null    object\n",
 90 |       " 5   st_is_intent_matched             352 non-null    bool  \n",
 91 |       " 6   st_matched_intent_id_shuffled    352 non-null    int64 \n",
 92 |       " 7   st_matched_intent_text_shuffled  352 non-null    object\n",
 93 |       " 8   st_is_intent_matched_shuffled    352 non-null    bool  \n",
 94 |       "dtypes: bool(2), int64(3), object(4)\n",
 95 |       "memory usage: 20.1+ KB\n"
 96 |      ]
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "st_data.info()"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 5,
106 |    "metadata": {
107 |     "pycharm": {
108 |      "name": "#%%\n"
109 |     }
110 |    },
111 |    "outputs": [
112 |     {
113 |      "data": {
114 |       "text/plain": [
115 |        "True    352\n",
116 |        "Name: tf_is_intent_matched, dtype: int64"
117 |       ]
118 |      },
119 |      "execution_count": 5,
120 |      "metadata": {},
121 |      "output_type": "execute_result"
122 |     }
123 |    ],
124 |    "source": [
125 |     "tf_data['tf_is_intent_matched'].value_counts()"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 6,
131 |    "metadata": {
132 |     "pycharm": {
133 |      "name": "#%%\n"
134 |     }
135 |    },
136 |    "outputs": [
137 |     {
138 |      "data": {
139 |       "text/plain": [
140 |        "True    352\n",
141 |        "Name: st_is_intent_matched, dtype: int64"
142 |       ]
143 |      },
144 |      "execution_count": 6,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "st_data['st_is_intent_matched'].value_counts()"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 7,
156 |    "metadata": {
157 |     "pycharm": {
158 |      "name": "#%%\n"
159 |     }
160 |    },
161 |    "outputs": [
162 |     {
163 |      "data": {
164 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEDCAYAAADOc0QpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAO+ElEQVR4nO3dX2ydd33H8feHpCtoRSJVnSokYYlQ0EiYSJEXkLhhLVozdpFy0SmVhnJRKb1IJdDQpJaLAReemMQfadJaKYiKaGJkmQA1AraRRSBUCTW4XShNQ4RFu8YkSsw/QXeRLel3F36iHOxj+9jHp15+vF+SdZ7nd57n+Oubd44eP8dJVSFJasvr1noASdLqM+6S1CDjLkkNMu6S1CDjLkkNWr/WAwDccccdtW3btrUeQ5JuKs8888zPqmqs33P/L+K+bds2Jicn13oMSbqpJPmvhZ7zsowkNci4S1KDjLskNci4S1KDjLskNci4S1KDjLskNWjJuCd5fZJTSX6Q5EyST3brn0jy0ySnu68P9JzzaJKpJOeS3DvKH0CSNN8gH2K6AtxdVa8kuQV4Ksm/ds99rqo+3Xtwkp3AfmAX8GbgP5K8raqurebgkqSFLRn3mv3fPF7pdm/pvhb7Hz72AUer6grwYpIpYA/wvSFn1YC2PfKNtR5B6uulT/35Wo/wO2Oga+5J1iU5DVwGTlTV091TDyd5LskTSTZ0a5uB8z2nT3drc1/zYJLJJJMzMzMr/wkkSfMMFPequlZVu4EtwJ4k7wAeB94K7AYuAp/pDk+/l+jzmoeraryqxsfG+v7dG0nSCi3rbpmq+hXwHWBvVV3qov8q8HlmL73A7Dv1rT2nbQEuDD+qJGlQg9wtM5bkTd32G4D3Az9KsqnnsA8Cz3fbx4H9SW5Nsh3YAZxa1aklSYsa5G6ZTcCRJOuY/cfgWFV9Pck/JtnN7CWXl4CHAKrqTJJjwAvAVeCQd8pI0mtrkLtlngPu6rP+oUXOmQAmhhtNkrRSfkJVkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQUvGPcnrk5xK8oMkZ5J8slu/PcmJJD/uHjf0nPNokqkk55LcO8ofQJI03yDv3K8Ad1fVO4HdwN4k7wEeAU5W1Q7gZLdPkp3AfmAXsBd4LMm6EcwuSVrAknGvWa90u7d0XwXsA45060eA+7rtfcDRqrpSVS8CU8Ce1RxakrS4ga65J1mX5DRwGThRVU8Dd1bVRYDucWN3+GbgfM/p093a3Nc8mGQyyeTMzMwQP4Ikaa6B4l5V16pqN7AF2JPkHYscnn4v0ec1D1fVeFWNj42NDTSsJGkwy7pbpqp+BXyH2Wvpl5JsAugeL3eHTQNbe07bAlwYdlBJ0uAGuVtmLMmbuu03AO8HfgQcBw50hx0Anuy2jwP7k9yaZDuwAzi1ynNLkhaxfoBjNgFHujteXgccq6qvJ/kecCzJg8DLwP0AVXUmyTHgBeAqcKiqro1mfElSP0vGvaqeA+7qs/5z4J4FzpkAJoaeTpK0In5CVZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIatGTck2xN8u0kZ5OcSfLhbv0TSX6a5HT39YGecx5NMpXkXJJ7R/kDSJLmWz/AMVeBj1bVs0neCDyT5ET33Oeq6tO9ByfZCewHdgFvBv4jyduq6tpqDi5JWtiS79yr6mJVPdtt/wY4C2xe5JR9wNGqulJVLwJTwJ7VGFaSNJhlXXNPsg24C3i6W3o4yXNJnkiyoVvbDJzvOW2aPv8YJDmYZDLJ5MzMzPInlyQtaOC4J7kN+Arwkar6NfA48FZgN3AR+Mz1Q/ucXvMWqg5X1XhVjY+NjS13bknSIgaKe5JbmA37l6rqqwBVdamqrlXVq8DnuXHpZRrY2nP6FuDC6o0sSVrKIHfLBPgCcLaqPtuzvqnnsA8Cz3fbx4H9SW5Nsh3YAZxavZElSUsZ5G6Z9wIfAn6Y5HS39jHggSS7mb3k8hLwEEBVnUlyDHiB2TttDnmnjCS9tpaMe1U9Rf/r6N9c5JwJYGKIuSRJQ/ATqpLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ1aMu5Jtib5dpKzSc4k+XC3fnuSE0l+3D1u6Dnn0SRTSc4luXeUP4Akab5B3rlfBT5aVW8H3gMcSrITeAQ4WVU7gJPdPt1z+4FdwF7gsSTrRjG8JKm/JeNeVRer6tlu+zfAWWAzsA840h12BLiv294HHK2qK1X1IjAF7FnluSVJi1jWNfck24C7gKeBO6vqIsz+AwBs7A7bDJzvOW26W5v7WgeTTCaZnJmZWcHokqSFDBz3JLcBXwE+UlW/XuzQPms1b6HqcFWNV9X42NjYoGNIkgYwUNyT3MJs2L9UVV/tli8l2dQ9vwm43K1PA1t7Tt8CXFidcSVJgxjkbpkAXwDOVtVne546Dhzotg8AT/as709ya5LtwA7g1OqNLElayvoBjnkv8CHgh0lOd2sfAz4FHEvyIPAycD9AVZ1Jcgx4gdk7bQ5V1bXVHlyStLAl415VT9H/OjrAPQucMwFMDDGXJGkIfkJVkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQUvGPckTSS4neb5n7RNJfprkdPf1gZ7nHk0yleRckntHNbgkaWGDvHP/IrC3z/rnqmp39/VNgCQ7gf3Aru6cx5KsW61hJUmDWTLuVfVd4BcDvt4+4GhVXamqF4EpYM8Q80mSVmCYa+4PJ3muu2yzoVvbDJzvOWa6W5snycEkk0kmZ2ZmhhhDkjTXSuP+OPBWYDdwEfhMt54+x1a/F6iqw1U1XlXjY2NjKxxDktTPiuJeVZeq6lpVvQp8nhuXXqaBrT2HbgEuDDeiJGm5VhT3JJt6dj8IXL+T5jiwP8mtSbYDO4BTw40oSVqu9UsdkOTLwPuAO5JMAx8H3pdkN7OXXF4CHgKoqjNJjgEvAFeBQ1V1bSSTS5IWtGTcq+qBPstfWOT4CWBimKEkScPxE6qS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1KAl457kiSSXkzzfs3Z7khNJftw9buh57tEkU0nOJbl3VINLkhY2yDv3LwJ756w9Apysqh3AyW6fJDuB/cCu7pzHkqxbtWklSQNZMu5V9V3gF3OW9wFHuu0jwH0960er6kpVvQhMAXtWZ1RJ0qBWes39zqq6CNA9buzWNwPne46b7tbmSXIwyWSSyZmZmRWOIUnqZ7V/oZo+a9XvwKo6XFXjVTU+Nja2ymNI0u+2lcb9UpJNAN3j5W59Gtjac9wW4MLKx5MkrcRK434cONBtHwCe7Fnfn+TWJNuBHcCp4UaUJC3X+qUOSPJl4H3AHUmmgY8DnwKOJXkQeBm4H6CqziQ5BrwAXAUOVdW1Ec0uSVrAknGvqgcWeOqeBY6fACaGGUqSNBw/oSpJDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktSgJf+D7MUkeQn4DXANuFpV40luB/4Z2Aa8BPxFVf1yuDElScuxGu/c/6SqdlfVeLf/CHCyqnYAJ7t9SdJraBSXZfYBR7rtI8B9I/gekqRFDBv3Ar6V5JkkB7u1O6vqIkD3uLHfiUkOJplMMjkzMzPkGJKkXkNdcwfeW1UXkmwETiT50aAnVtVh4DDA+Ph4DTmHJKnHUO/cq+pC93gZ+BqwB7iUZBNA93h52CElScuz4rgn+f0kb7y+Dfwp8DxwHDjQHXYAeHLYISVJyzPMZZk7ga8luf46/1RV/5bk+8CxJA8CLwP3Dz+mJGk5Vhz3qvoJ8M4+6z8H7hlmKEnScPyEqiQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1aGRxT7I3ybkkU0keGdX3kSTNN5K4J1kH/APwZ8BO4IEkO0fxvSRJ843qnfseYKqqflJV/wMcBfaN6HtJkuZYP6LX3Qyc79mfBt7de0CSg8DBbveVJOdGNIs0rDuAn631EC3I3631BM35g4WeGFXc02etfmun6jBweETfX1o1SSaranyt55CWY1SXZaaBrT37W4ALI/pekqQ5RhX37wM7kmxP8nvAfuD4iL6XJGmOkVyWqaqrSR4G/h1YBzxRVWdG8b2k14CXD3XTSVUtfZQk6abiJ1QlqUHGXZIaZNwlqUHGXZojs/4yyd90+29Jsmet55KWw1+oSnMkeRx4Fbi7qt6eZAPwrar64zUeTRrYqD6hKt3M3l1V70rynwBV9cvu8xrSTcPLMtJ8/9v9ZdMCSDLG7Dt56aZh3KX5/h74GrAxyQTwFPC3azuStDxec5f6SPKHwD3M/hG8k1V1do1HkpbFuEtzJHlLv/Wqevm1nkVaKeMuzZHkh8xebw/wemA7cK6qdq3pYNIyeLeMNEdV/VHvfpJ3AQ+t0TjSivgLVWkJVfUs4D3uuqn4zl2aI8lf9ey+DngXMLNG40grYtyl+d7Ys30V+AbwlTWaRVoR4y716D68dFtV/fVazyINw2vuUifJ+qq6xuxlGOmm5jt36YZTzIb9dJLjwL8A/339yar66loNJi2XcZfmux34OXA3N+53L8C466Zh3KUbNnZ3yjzPjahf56f9dFMx7tIN64Db+O2oX2fcdVPxzw9InSTPVpW/TFUTvFtGuqHfO3bppuQ7d6mT5Paq+sVazyGtBuMuSQ3ysowkNci4S1KDjLskNci4S1KD/g9SHrdAdDylhAAAAABJRU5ErkJggg==\n",
165 |       "text/plain": [
166 |        "<Figure size 432x288 with 1 Axes>"
167 |       ]
168 |      },
169 |      "metadata": {
170 |       "needs_background": "light"
171 |      },
172 |      "output_type": "display_data"
173 |     }
174 |    ],
175 |    "source": [
176 |     "tf_data['tf_is_intent_matched'].value_counts().plot.bar()\n",
177 |     "plt.show()"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 8,
183 |    "metadata": {
184 |     "pycharm": {
185 |      "name": "#%%\n"
186 |     }
187 |    },
188 |    "outputs": [
189 |     {
190 |      "data": {
191 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEDCAYAAADOc0QpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAO+ElEQVR4nO3dX2ydd33H8feHpCtoRSJVnSokYYlQ0EiYSJEXkLhhLVozdpFy0SmVhnJRKb1IJdDQpJaLAReemMQfadJaKYiKaGJkmQA1AraRRSBUCTW4XShNQ4RFu8YkSsw/QXeRLel3F36iHOxj+9jHp15+vF+SdZ7nd57n+Oubd44eP8dJVSFJasvr1noASdLqM+6S1CDjLkkNMu6S1CDjLkkNWr/WAwDccccdtW3btrUeQ5JuKs8888zPqmqs33P/L+K+bds2Jicn13oMSbqpJPmvhZ7zsowkNci4S1KDjLskNci4S1KDjLskNci4S1KDjLskNWjJuCd5fZJTSX6Q5EyST3brn0jy0ySnu68P9JzzaJKpJOeS3DvKH0CSNN8gH2K6AtxdVa8kuQV4Ksm/ds99rqo+3Xtwkp3AfmAX8GbgP5K8raqurebgkqSFLRn3mv3fPF7pdm/pvhb7Hz72AUer6grwYpIpYA/wvSFn1YC2PfKNtR5B6uulT/35Wo/wO2Oga+5J1iU5DVwGTlTV091TDyd5LskTSTZ0a5uB8z2nT3drc1/zYJLJJJMzMzMr/wkkSfMMFPequlZVu4EtwJ4k7wAeB94K7AYuAp/pDk+/l+jzmoeraryqxsfG+v7dG0nSCi3rbpmq+hXwHWBvVV3qov8q8HlmL73A7Dv1rT2nbQEuDD+qJGlQg9wtM5bkTd32G4D3Az9KsqnnsA8Cz3fbx4H9SW5Nsh3YAZxa1aklSYsa5G6ZTcCRJOuY/cfgWFV9Pck/JtnN7CWXl4CHAKrqTJJjwAvAVeCQd8pI0mtrkLtlngPu6rP+oUXOmQAmhhtNkrRSfkJVkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQUvGPcnrk5xK8oMkZ5J8slu/PcmJJD/uHjf0nPNokqkk55LcO8ofQJI03yDv3K8Ad1fVO4HdwN4k7wEeAU5W1Q7gZLdPkp3AfmAXsBd4LMm6EcwuSVrAknGvWa90u7d0XwXsA45060eA+7rtfcDRqrpSVS8CU8Ce1RxakrS4ga65J1mX5DRwGThRVU8Dd1bVRYDucWN3+GbgfM/p093a3Nc8mGQyyeTMzMwQP4Ikaa6B4l5V16pqN7AF2JPkHYscnn4v0ec1D1fVeFWNj42NDTSsJGkwy7pbpqp+BXyH2Wvpl5JsAugeL3eHTQNbe07bAlwYdlBJ0uAGuVtmLMmbuu03AO8HfgQcBw50hx0Anuy2jwP7k9yaZDuwAzi1ynNLkhaxfoBjNgFHujteXgccq6qvJ/kecCzJg8DLwP0AVXUmyTHgBeAqcKiqro1mfElSP0vGvaqeA+7qs/5z4J4FzpkAJoaeTpK0In5CVZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIatGTck2xN8u0kZ5OcSfLhbv0TSX6a5HT39YGecx5NMpXkXJJ7R/kDSJLmWz/AMVeBj1bVs0neCDyT5ET33Oeq6tO9ByfZCewHdgFvBv4jyduq6tpqDi5JWtiS79yr6mJVPdtt/wY4C2xe5JR9wNGqulJVLwJTwJ7VGFaSNJhlXXNPsg24C3i6W3o4yXNJnkiyoVvbDJzvOW2aPv8YJDmYZDLJ5MzMzPInlyQtaOC4J7kN+Arwkar6NfA48FZgN3AR+Mz1Q/ucXvMWqg5X1XhVjY+NjS13bknSIgaKe5JbmA37l6rqqwBVdamqrlXVq8DnuXHpZRrY2nP6FuDC6o0sSVrKIHfLBPgCcLaqPtuzvqnnsA8Cz3fbx4H9SW5Nsh3YAZxavZElSUsZ5G6Z9wIfAn6Y5HS39jHggSS7mb3k8hLwEEBVnUlyDHiB2TttDnmnjCS9tpaMe1U9Rf/r6N9c5JwJYGKIuSRJQ/ATqpLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ1aMu5Jtib5dpKzSc4k+XC3fnuSE0l+3D1u6Dnn0SRTSc4luXeUP4Akab5B3rlfBT5aVW8H3gMcSrITeAQ4WVU7gJPdPt1z+4FdwF7gsSTrRjG8JKm/JeNeVRer6tlu+zfAWWAzsA840h12BLiv294HHK2qK1X1IjAF7FnluSVJi1jWNfck24C7gKeBO6vqIsz+AwBs7A7bDJzvOW26W5v7WgeTTCaZnJmZWcHokqSFDBz3JLcBXwE+UlW/XuzQPms1b6HqcFWNV9X42NjYoGNIkgYwUNyT3MJs2L9UVV/tli8l2dQ9vwm43K1PA1t7Tt8CXFidcSVJgxjkbpkAXwDOVtVne546Dhzotg8AT/as709ya5LtwA7g1OqNLElayvoBjnkv8CHgh0lOd2sfAz4FHEvyIPAycD9AVZ1Jcgx4gdk7bQ5V1bXVHlyStLAl415VT9H/OjrAPQucMwFMDDGXJGkIfkJVkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQUvGPckTSS4neb5n7RNJfprkdPf1gZ7nHk0yleRckntHNbgkaWGDvHP/IrC3z/rnqmp39/VNgCQ7gf3Aru6cx5KsW61hJUmDWTLuVfVd4BcDvt4+4GhVXamqF4EpYM8Q80mSVmCYa+4PJ3muu2yzoVvbDJzvOWa6W5snycEkk0kmZ2ZmhhhDkjTXSuP+OPBWYDdwEfhMt54+x1a/F6iqw1U1XlXjY2NjKxxDktTPiuJeVZeq6lpVvQp8nhuXXqaBrT2HbgEuDDeiJGm5VhT3JJt6dj8IXL+T5jiwP8mtSbYDO4BTw40oSVqu9UsdkOTLwPuAO5JMAx8H3pdkN7OXXF4CHgKoqjNJjgEvAFeBQ1V1bSSTS5IWtGTcq+qBPstfWOT4CWBimKEkScPxE6qS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1KAl457kiSSXkzzfs3Z7khNJftw9buh57tEkU0nOJbl3VINLkhY2yDv3LwJ756w9Apysqh3AyW6fJDuB/cCu7pzHkqxbtWklSQNZMu5V9V3gF3OW9wFHuu0jwH0960er6kpVvQhMAXtWZ1RJ0qBWes39zqq6CNA9buzWNwPne46b7tbmSXIwyWSSyZmZmRWOIUnqZ7V/oZo+a9XvwKo6XFXjVTU+Nja2ymNI0u+2lcb9UpJNAN3j5W59Gtjac9wW4MLKx5MkrcRK434cONBtHwCe7Fnfn+TWJNuBHcCp4UaUJC3X+qUOSPJl4H3AHUmmgY8DnwKOJXkQeBm4H6CqziQ5BrwAXAUOVdW1Ec0uSVrAknGvqgcWeOqeBY6fACaGGUqSNBw/oSpJDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktSgJf+D7MUkeQn4DXANuFpV40luB/4Z2Aa8BPxFVf1yuDElScuxGu/c/6SqdlfVeLf/CHCyqnYAJ7t9SdJraBSXZfYBR7rtI8B9I/gekqRFDBv3Ar6V5JkkB7u1O6vqIkD3uLHfiUkOJplMMjkzMzPkGJKkXkNdcwfeW1UXkmwETiT50aAnVtVh4DDA+Ph4DTmHJKnHUO/cq+pC93gZ+BqwB7iUZBNA93h52CElScuz4rgn+f0kb7y+Dfwp8DxwHDjQHXYAeHLYISVJyzPMZZk7ga8luf46/1RV/5bk+8CxJA8CLwP3Dz+mJGk5Vhz3qvoJ8M4+6z8H7hlmKEnScPyEqiQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1aGRxT7I3ybkkU0keGdX3kSTNN5K4J1kH/APwZ8BO4IEkO0fxvSRJ843qnfseYKqqflJV/wMcBfaN6HtJkuZYP6LX3Qyc79mfBt7de0CSg8DBbveVJOdGNIs0rDuAn631EC3I3631BM35g4WeGFXc02etfmun6jBweETfX1o1SSaranyt55CWY1SXZaaBrT37W4ALI/pekqQ5RhX37wM7kmxP8nvAfuD4iL6XJGmOkVyWqaqrSR4G/h1YBzxRVWdG8b2k14CXD3XTSVUtfZQk6abiJ1QlqUHGXZIaZNwlqUHGXZojs/4yyd90+29Jsmet55KWw1+oSnMkeRx4Fbi7qt6eZAPwrar64zUeTRrYqD6hKt3M3l1V70rynwBV9cvu8xrSTcPLMtJ8/9v9ZdMCSDLG7Dt56aZh3KX5/h74GrAxyQTwFPC3azuStDxec5f6SPKHwD3M/hG8k1V1do1HkpbFuEtzJHlLv/Wqevm1nkVaKeMuzZHkh8xebw/wemA7cK6qdq3pYNIyeLeMNEdV/VHvfpJ3AQ+t0TjSivgLVWkJVfUs4D3uuqn4zl2aI8lf9ey+DngXMLNG40grYtyl+d7Ys30V+AbwlTWaRVoR4y716D68dFtV/fVazyINw2vuUifJ+qq6xuxlGOmm5jt36YZTzIb9dJLjwL8A/339yar66loNJi2XcZfmux34OXA3N+53L8C466Zh3KUbNnZ3yjzPjahf56f9dFMx7tIN64Db+O2oX2fcdVPxzw9InSTPVpW/TFUTvFtGuqHfO3bppuQ7d6mT5Paq+sVazyGtBuMuSQ3ysowkNci4S1KDjLskNci4S1KD/g9SHrdAdDylhAAAAABJRU5ErkJggg==\n",
192 |       "text/plain": [
193 |        "<Figure size 432x288 with 1 Axes>"
194 |       ]
195 |      },
196 |      "metadata": {
197 |       "needs_background": "light"
198 |      },
199 |      "output_type": "display_data"
200 |     }
201 |    ],
202 |    "source": [
203 |     "st_data['st_is_intent_matched'].value_counts().plot.bar()\n",
204 |     "plt.show()"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 9,
210 |    "metadata": {
211 |     "pycharm": {
212 |      "name": "#%%\n"
213 |     }
214 |    },
215 |    "outputs": [
216 |     {
217 |      "data": {
218 |       "text/plain": [
219 |        "True     351\n",
220 |        "False      1\n",
221 |        "Name: tf_is_intent_matched_shuffled, dtype: int64"
222 |       ]
223 |      },
224 |      "execution_count": 9,
225 |      "metadata": {},
226 |      "output_type": "execute_result"
227 |     }
228 |    ],
229 |    "source": [
230 |     "tf_data['tf_is_intent_matched_shuffled'].value_counts()"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": 10,
236 |    "metadata": {
237 |     "pycharm": {
238 |      "name": "#%%\n"
239 |     }
240 |    },
241 |    "outputs": [
242 |     {
243 |      "data": {
244 |       "text/html": [
245 |        "<div>\n",
246 |        "<style scoped>\n",
247 |        "    .dataframe tbody tr th:only-of-type {\n",
248 |        "        vertical-align: middle;\n",
249 |        "    }\n",
250 |        "\n",
251 |        "    .dataframe tbody tr th {\n",
252 |        "        vertical-align: top;\n",
253 |        "    }\n",
254 |        "\n",
255 |        "    .dataframe thead th {\n",
256 |        "        text-align: right;\n",
257 |        "    }\n",
258 |        "</style>\n",
259 |        "<table border=\"1\" class=\"dataframe\">\n",
260 |        "  <thead>\n",
261 |        "    <tr style=\"text-align: right;\">\n",
262 |        "      <th></th>\n",
263 |        "      <th>category</th>\n",
264 |        "      <th>intent_id</th>\n",
265 |        "      <th>task</th>\n",
266 |        "      <th>tf_matched_intent_id</th>\n",
267 |        "      <th>tf_matched_intent_text</th>\n",
268 |        "      <th>tf_is_intent_matched</th>\n",
269 |        "      <th>tf_matched_intent_id_shuffled</th>\n",
270 |        "      <th>tf_matched_intent_text_shuffled</th>\n",
271 |        "      <th>tf_is_intent_matched_shuffled</th>\n",
272 |        "    </tr>\n",
273 |        "  </thead>\n",
274 |        "  <tbody>\n",
275 |        "    <tr>\n",
276 |        "      <th>169</th>\n",
277 |        "      <td>Airtable</td>\n",
278 |        "      <td>169</td>\n",
279 |        "      <td>Airtable - Get data</td>\n",
280 |        "      <td>169</td>\n",
281 |        "      <td>Airtable - Get data</td>\n",
282 |        "      <td>True</td>\n",
283 |        "      <td>50</td>\n",
284 |        "      <td>Newsapi - Get data</td>\n",
285 |        "      <td>False</td>\n",
286 |        "    </tr>\n",
287 |        "  </tbody>\n",
288 |        "</table>\n",
289 |        "</div>"
290 |       ],
291 |       "text/plain": [
292 |        "     category  intent_id                 task  tf_matched_intent_id  \\\n",
293 |        "169  Airtable        169  Airtable - Get data                   169   \n",
294 |        "\n",
295 |        "    tf_matched_intent_text  tf_is_intent_matched  \\\n",
296 |        "169    Airtable - Get data                  True   \n",
297 |        "\n",
298 |        "     tf_matched_intent_id_shuffled tf_matched_intent_text_shuffled  \\\n",
299 |        "169                             50              Newsapi - Get data   \n",
300 |        "\n",
301 |        "     tf_is_intent_matched_shuffled  \n",
302 |        "169                          False  "
303 |       ]
304 |      },
305 |      "execution_count": 10,
306 |      "metadata": {},
307 |      "output_type": "execute_result"
308 |     }
309 |    ],
310 |    "source": [
311 |     "tf_data[tf_data['tf_is_intent_matched_shuffled'] == False]"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 11,
317 |    "metadata": {
318 |     "pycharm": {
319 |      "name": "#%%\n"
320 |     }
321 |    },
322 |    "outputs": [
323 |     {
324 |      "data": {
325 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQmUlEQVR4nO3dcaxedX3H8feHFpEJmzBuSW2L7VjNVtws5q66kCxO3EBdVszCVjJdl5DUP0qimTEBs03d1gU30WgiJCU6u4XJuqihUbaJjcawTOqFIFJqZyMItR29okZQV2357o97Ks/a59779N779Gl/vF/Jk3PO7/x+5/ne5OZzz/0955wnVYUkqS1njboASdLCM9wlqUGGuyQ1yHCXpAYZ7pLUIMNdkhq0eNQFAFx00UW1cuXKUZchSWeU+++//ztVNdZv32kR7itXrmRiYmLUZUjSGSXJt6bb57SMJDXIcJekBhnuktQgw12SGmS4S1KDZg33JC9MsivJV5PsTvLerv09Sb6d5MHu9YaeMTcl2Zdkb5KrhvkDSJJONMilkIeB11bVM0nOBu5N8m/dvg9W1ft7OydZA2wALgNeAnw+ycuq6uhCFi5Jmt6sZ+415Zlu8+zuNdND4NcDd1bV4ap6FNgHrJt3pZKkgQ10E1OSRcD9wC8DH6mq+5K8HrghyZ8AE8A7qup7wDLgyz3D93dtZ7yVN3521CU05bGb3zjqEqRmDfSBalUdraq1wHJgXZKXA7cBlwJrgYPALV339DvE8Q1JNiWZSDIxOTk5h9IlSdM5qatlqur7wBeBq6vqyS70nwVu57mpl/3Aip5hy4EDfY61tarGq2p8bKzvoxEkSXM0yNUyY0le3K2fC7wO+HqSpT3d3gQ83K3vADYkOSfJKmA1sGtBq5YkzWiQOfelwLZu3v0sYHtVfSbJPyVZy9SUy2PAWwGqaneS7cAjwBFgs1fKSNKpNWu4V9VDwOV92t8yw5gtwJb5lSZJmivvUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lq0KzhnuSFSXYl+WqS3Une27VfmOSeJN/olhf0jLkpyb4ke5NcNcwfQJJ0okHO3A8Dr62qVwBrgauTvBq4EdhZVauBnd02SdYAG4DLgKuBW5MsGkLtkqRpzBruNeWZbvPs7lXAemBb174NuKZbXw/cWVWHq+pRYB+wbiGLliTNbKA59ySLkjwIHALuqar7gIur6iBAt1zSdV8GPNEzfH/XJkk6RQYK96o6WlVrgeXAuiQvn6F7+h3ihE7JpiQTSSYmJycHKlaSNJiTulqmqr4PfJGpufQnkywF6JaHum77gRU9w5YDB/oca2tVjVfV+NjY2MlXLkma1iBXy4wleXG3fi7wOuDrwA5gY9dtI3BXt74D2JDknCSrgNXArgWuW5I0g8UD9FkKbOuueDkL2F5Vn0nyX8D2JNcDjwPXAlTV7iTbgUeAI8Dmqjo6nPIlSf3MGu5V9RBweZ/2p4ArpxmzBdgy7+okSXPiHaqS1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWrQrOGeZEWSLyTZk2R3krd17e9J8u0kD3avN/SMuSnJviR7k1w1zB9AknSixQP0OQK8o6oeSHI+cH+Se7p9H6yq9/d2TrIG2ABcBrwE+HySl1XV0YUsXJI0vVnP3KvqYFU90K0/DewBls0wZD1wZ1UdrqpHgX3AuoUoVpI0mJOac0+yErgcuK9ruiHJQ0k+luSCrm0Z8ETPsP3M/MdAkrTABg73JOcBnwTeXlU/AG4DLgXWAgeBW4517TO8+hxvU5KJJBOTk5MnW7ckaQYDhXuSs5kK9juq6lMAVfVkVR2tqmeB23lu6mU/sKJn+HLgwPHHrKqtVTVeVeNjY2Pz+RkkSccZ5GqZAB8F9lTVB3ral/Z0exPwcLe+A9iQ5Jwkq4DVwK6FK1mSNJtBrpa5AngL8LUkD3Zt7wKuS7KWqSmXx4C3AlTV7iTbgUeYutJms1fKSNKpNWu4V9W99J9Hv3uGMVuALfOoS5I0D96hKkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgWcM9yYokX0iyJ8nuJG/r2i9Mck+Sb3TLC3rG3JRkX5K9Sa4a5g8gSTrRIGfuR4B3VNWvAq8GNidZA9wI7Kyq1cDObptu3wbgMuBq4NYki4ZRvCSpv1nDvaoOVtUD3frTwB5gGbAe2NZ12wZc062vB+6sqsNV9SiwD1i3wHVLkmZwUnPuSVYClwP3ARdX1UGY+gMALOm6LQOe6Bm2v2uTJJ0iA4d7kvOATwJvr6ofzNS1T1v1Od6mJBNJJiYnJwctQ5I0gIHCPcnZTAX7HVX1qa75ySRLu/1LgUNd+35gRc/w5cCB449ZVVuraryqxsfGxuZavySpj0GulgnwUWBPVX2gZ9cOYGO3vhG4q6d9Q5JzkqwCVgO7Fq5kSdJsFg/Q5wrgLcDXkjzYtb0LuBnYnuR64HHgWoCq2p1kO/AIU1fabK6qowtduCRperOGe1XdS/95dIArpxmzBdgyj7okSfPgHaqS1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWrQrOGe5GNJDiV5uKftPUm+neTB7vWGnn03JdmXZG+Sq4ZVuCRpeoOcuX8cuLpP+weram33uhsgyRpgA3BZN+bWJIsWqlhJ0mBmDfeq+hLw3QGPtx64s6oOV9WjwD5g3TzqkyTNwXzm3G9I8lA3bXNB17YMeKKnz/6uTZJ0Cs013G8DLgXWAgeBW7r29Olb/Q6QZFOSiSQTk5OTcyxDktTPnMK9qp6sqqNV9SxwO89NvewHVvR0XQ4cmOYYW6tqvKrGx8bG5lKGJGkacwr3JEt7Nt8EHLuSZgewIck5SVYBq4Fd8ytRknSyFs/WIckngNcAFyXZD7wbeE2StUxNuTwGvBWgqnYn2Q48AhwBNlfV0aFULkma1qzhXlXX9Wn+6Az9twBb5lOUJGl+vENVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGzhnuSjyU5lOThnrYLk9yT5Bvd8oKefTcl2Zdkb5KrhlW4JGl6g5y5fxy4+ri2G4GdVbUa2Nltk2QNsAG4rBtza5JFC1atJGkgs4Z7VX0J+O5xzeuBbd36NuCanvY7q+pwVT0K7APWLUypkqRBzXXO/eKqOgjQLZd07cuAJ3r67e/aJEmn0EJ/oJo+bdW3Y7IpyUSSicnJyQUuQ5Ke3+Ya7k8mWQrQLQ917fuBFT39lgMH+h2gqrZW1XhVjY+Njc2xDElSP3MN9x3Axm59I3BXT/uGJOckWQWsBnbNr0RJ0slaPFuHJJ8AXgNclGQ/8G7gZmB7kuuBx4FrAapqd5LtwCPAEWBzVR0dUu2SpGnMGu5Vdd00u66cpv8WYMt8ipIkzY93qEpSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGzfkH2TJI8BjwNHAWOVNV4kguBfwFWAo8Bf1hV35tfmZKkk7EQZ+6/XVVrq2q8274R2FlVq4Gd3bYk6RQaxrTMemBbt74NuGYI7yFJmsF8w72AzyW5P8mmru3iqjoI0C2XzPM9JEknaV5z7sAVVXUgyRLgniRfH3Rg98dgE8All1wyzzIkSb3mdeZeVQe65SHg08A64MkkSwG65aFpxm6tqvGqGh8bG5tPGZKk48w53JO8KMn5x9aB3wUeBnYAG7tuG4G75lukJOnkzGda5mLg00mOHeefq+rfk3wF2J7keuBx4Nr5lylJOhlzDveq+ibwij7tTwFXzqcoSdL8eIeqJDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoOGFu5Jrk6yN8m+JDcO630kSScaSrgnWQR8BHg9sAa4LsmaYbyXJOlEwzpzXwfsq6pvVtVPgDuB9UN6L0nScRYP6bjLgCd6tvcDrxrSe0kCVt742VGX0IzHbn7jqEuYt2GFe/q01f/rkGwCNnWbzyTZO6Rano8uAr4z6iJmk/eNugKNgL+bC+ul0+0YVrjvB1b0bC8HDvR2qKqtwNYhvf/zWpKJqhofdR3S8fzdPHWGNef+FWB1klVJXgBsAHYM6b0kSccZypl7VR1JcgPwH8Ai4GNVtXsY7yVJOtGwpmWoqruBu4d1fM3I6S6drvzdPEVSVbP3kiSdUXz8gCQ1yHCXpAYZ7g3IlDcn+ctu+5Ik60Zdl6TRMdzbcCvwm8B13fbTTD3bRxq5JD+X5C+S3N5tr07ye6Ouq3WGexteVVWbgf8FqKrvAS8YbUnSz/wDcJipExCYusnxb0ZXzvOD4d6Gn3ZP4iyAJGPAs6MtSfqZS6vq74CfAlTVj+n/iBItIMO9DR8GPg0sSbIFuBf429GWJP3MT5Kcy3MnH5cydSavIfI690Yk+RXgSqbOiHZW1Z4RlyQBkOR3gD9n6rsdPgdcAfxpVX1xlHW1znBvQJJL+rVX1eOnuhapnyS/CLyaqZOPL1fVaf9kyDOd4d6AJF9j6l/eAC8EVgF7q+qykRYmAUmuAB6sqh8meTPwSuBDVfWtEZfWNOfcG1BVv1ZVv94tVzP1TVj3jrouqXMb8KMkrwDeCXwL+MfRltQ+w71BVfUA8BujrkPqHKmpKYL1wIer6kPA+SOuqXlDeyqkTp0kf9azeRZT//ZOjqgc6XhPJ7kJeDPwW91lu2ePuKbmeebehvN7XucAn8UvJNfp44+YuvTx+qr6H6a+Y/nvR1tS+/xA9QzXnQXdXFXvHHUtkk4fTsucwZIs7r716pWjrkU6XpKn6W5cOn4XUFX186e4pOcVw/3Mtoup+fUHk+wA/hX44bGdVfWpURUmVZUfmo6Q4d6GC4GngNfy3PXuBRjuOm0kWcLUfRiAN9kNm+F+ZlvSXSnzMM+F+jF+mKLTQpLfB24BXgIcAl4K7AG8yW6IvFrmzLYIOK97nd+zfuwlnQ7+mqlHD/x3Va1i6hlI/znaktrnmfuZ7WBV/dWoi5Bm8dOqeirJWUnOqqovJHnfqItqneF+ZvOZ2DoTfD/JecCXgDuSHAKOjLim5nmd+xksyYVV9d1R1yH1k+SSqno8yYuAHzM1DfzHwC8Ad1TVUyMtsHGGu6ShSPJAVb2yW/9kVf3BqGt6PvEDVUnD0jtt+Esjq+J5ynCXNCw1zbpOAadlJA1FkqNM3TEd4FzgR8d24eMHhs5wl6QGOS0jSQ0y3CWpQYa7JDXIcJekBhnuktSg/wOzS016e64AqAAAAABJRU5ErkJggg==\n",
326 |       "text/plain": [
327 |        "<Figure size 432x288 with 1 Axes>"
328 |       ]
329 |      },
330 |      "metadata": {
331 |       "needs_background": "light"
332 |      },
333 |      "output_type": "display_data"
334 |     }
335 |    ],
336 |    "source": [
337 |     "tf_data['tf_is_intent_matched_shuffled'].value_counts().plot.bar()\n",
338 |     "plt.show()"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": 12,
344 |    "metadata": {
345 |     "pycharm": {
346 |      "name": "#%%\n"
347 |     }
348 |    },
349 |    "outputs": [
350 |     {
351 |      "data": {
352 |       "text/plain": [
353 |        "True     350\n",
354 |        "False      2\n",
355 |        "Name: st_is_intent_matched_shuffled, dtype: int64"
356 |       ]
357 |      },
358 |      "execution_count": 12,
359 |      "metadata": {},
360 |      "output_type": "execute_result"
361 |     }
362 |    ],
363 |    "source": [
364 |     "st_data['st_is_intent_matched_shuffled'].value_counts()"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": 13,
370 |    "metadata": {
371 |     "pycharm": {
372 |      "name": "#%%\n"
373 |     }
374 |    },
375 |    "outputs": [
376 |     {
377 |      "data": {
378 |       "text/html": [
379 |        "<div>\n",
380 |        "<style scoped>\n",
381 |        "    .dataframe tbody tr th:only-of-type {\n",
382 |        "        vertical-align: middle;\n",
383 |        "    }\n",
384 |        "\n",
385 |        "    .dataframe tbody tr th {\n",
386 |        "        vertical-align: top;\n",
387 |        "    }\n",
388 |        "\n",
389 |        "    .dataframe thead th {\n",
390 |        "        text-align: right;\n",
391 |        "    }\n",
392 |        "</style>\n",
393 |        "<table border=\"1\" class=\"dataframe\">\n",
394 |        "  <thead>\n",
395 |        "    <tr style=\"text-align: right;\">\n",
396 |        "      <th></th>\n",
397 |        "      <th>category</th>\n",
398 |        "      <th>intent_id</th>\n",
399 |        "      <th>task</th>\n",
400 |        "      <th>st_matched_intent_id</th>\n",
401 |        "      <th>st_matched_intent_text</th>\n",
402 |        "      <th>st_is_intent_matched</th>\n",
403 |        "      <th>st_matched_intent_id_shuffled</th>\n",
404 |        "      <th>st_matched_intent_text_shuffled</th>\n",
405 |        "      <th>st_is_intent_matched_shuffled</th>\n",
406 |        "    </tr>\n",
407 |        "  </thead>\n",
408 |        "  <tbody>\n",
409 |        "    <tr>\n",
410 |        "      <th>34</th>\n",
411 |        "      <td>Google Sheets</td>\n",
412 |        "      <td>34</td>\n",
413 |        "      <td>Google Sheets - Send data to MongoDB</td>\n",
414 |        "      <td>34</td>\n",
415 |        "      <td>Google Sheets - Send data to MongoDB</td>\n",
416 |        "      <td>True</td>\n",
417 |        "      <td>126</td>\n",
418 |        "      <td>MongoDB - Send data to Google Sheets</td>\n",
419 |        "      <td>False</td>\n",
420 |        "    </tr>\n",
421 |        "    <tr>\n",
422 |        "      <th>126</th>\n",
423 |        "      <td>MongoDB</td>\n",
424 |        "      <td>126</td>\n",
425 |        "      <td>MongoDB - Send data to Google Sheets</td>\n",
426 |        "      <td>126</td>\n",
427 |        "      <td>MongoDB - Send data to Google Sheets</td>\n",
428 |        "      <td>True</td>\n",
429 |        "      <td>34</td>\n",
430 |        "      <td>Google Sheets - Send data to MongoDB</td>\n",
431 |        "      <td>False</td>\n",
432 |        "    </tr>\n",
433 |        "  </tbody>\n",
434 |        "</table>\n",
435 |        "</div>"
436 |       ],
437 |       "text/plain": [
438 |        "          category  intent_id                                  task  \\\n",
439 |        "34   Google Sheets         34  Google Sheets - Send data to MongoDB   \n",
440 |        "126        MongoDB        126  MongoDB - Send data to Google Sheets   \n",
441 |        "\n",
442 |        "     st_matched_intent_id                st_matched_intent_text  \\\n",
443 |        "34                     34  Google Sheets - Send data to MongoDB   \n",
444 |        "126                   126  MongoDB - Send data to Google Sheets   \n",
445 |        "\n",
446 |        "     st_is_intent_matched  st_matched_intent_id_shuffled  \\\n",
447 |        "34                   True                            126   \n",
448 |        "126                  True                             34   \n",
449 |        "\n",
450 |        "          st_matched_intent_text_shuffled  st_is_intent_matched_shuffled  \n",
451 |        "34   MongoDB - Send data to Google Sheets                          False  \n",
452 |        "126  Google Sheets - Send data to MongoDB                          False  "
453 |       ]
454 |      },
455 |      "execution_count": 13,
456 |      "metadata": {},
457 |      "output_type": "execute_result"
458 |     }
459 |    ],
460 |    "source": [
461 |     "st_data[st_data['st_is_intent_matched_shuffled'] == False]"
462 |    ]
463 |   },
464 |   {
465 |    "cell_type": "code",
466 |    "execution_count": 14,
467 |    "metadata": {
468 |     "pycharm": {
469 |      "name": "#%%\n"
470 |     }
471 |    },
472 |    "outputs": [
473 |     {
474 |      "data": {
475 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQmUlEQVR4nO3df6xfdX3H8eeLFpEJmzBuSe0P27GarbhZzF3nQrI4cQN1WTELW8l0XUJS/yiJZsYEzDZ1WxfcRKOJkJTI7BYm66KGRtkmNhrDMqmFVKTUzkYQru3oFTWCus6W9/64p/Jd+7293957v/22H56P5Jtzzud8Puf7/iY3r3vu53vOuakqJEltOWfUBUiS5p/hLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoIWjLgDgkksuqRUrVoy6DEk6qzz44IPfqaqxfvvOiHBfsWIFu3btGnUZknRWSfKt6fY5LSNJDTLcJalBhrskNchwl6QGGe6S1KAZwz3Ji5PsTPLVJHuSvK9rf2+SbyfZ3b3e2DPm5iT7k+xLcvUwP4Ak6USDXAp5GHhdVT2b5Fzg/iT/2u37UFV9oLdzktXAeuBy4GXA55O8oqqOzmfhkqTpzXjmXlOe7TbP7V4newj8OuDuqjpcVY8B+4G1c65UkjSwgW5iSrIAeBD4ReCjVfVAkjcANyb5Y2AX8M6q+h6wBPhyz/CJru34Y24ENgIsX758Th/idFlx02dHXUJTHr/lTaMuQWrWQF+oVtXRqloDLAXWJnklcDtwGbAGOAjc2nVPv0P0OeaWqhqvqvGxsb53z0qSZumUrpapqu8DXwSuqaqnutB/DriD56deJoBlPcOWAgfmXqokaVCDXC0zluSl3fr5wOuBrydZ3NPtzcAj3fp2YH2S85KsBFYBO+e1aknSSQ0y574Y2NrNu58DbKuqzyT5xyRrmJpyeRx4G0BV7UmyDXgUOAJs8koZSTq9Zgz3qnoYuKJP+1tPMmYzsHlupUmSZss7VCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KAZwz3Ji5PsTPLVJHuSvK9rvzjJfUm+0S0v6hlzc5L9SfYluXqYH0CSdKJBztwPA6+rqlcBa4BrkrwGuAnYUVWrgB3dNklWA+uBy4FrgNuSLBhC7ZKkacwY7jXl2W7z3O5VwDpga9e+Fbi2W18H3F1Vh6vqMWA/sHY+i5YkndxAc+5JFiTZDRwC7quqB4BLq+ogQLdc1HVfAjzZM3yia5MknSYDhXtVHa2qNcBSYG2SV56ke/od4oROycYku5LsmpycHKhYSdJgTulqmar6PvBFpubSn0qyGKBbHuq6TQDLeoYtBQ70OdaWqhqvqvGxsbFTr1ySNK1BrpYZS/LSbv184PXA14HtwIau2wbgnm59O7A+yXlJVgKrgJ3zXLck6SQWDtBnMbC1u+LlHGBbVX0myX8C25LcADwBXAdQVXuSbAMeBY4Am6rq6HDKlyT1M2O4V9XDwBV92p8GrppmzGZg85yrkyTNineoSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVoxnBPsizJF5LsTbInydu79vcm+XaS3d3rjT1jbk6yP8m+JFcP8wNIkk60cIA+R4B3VtVDSS4EHkxyX7fvQ1X1gd7OSVYD64HLgZcBn0/yiqo6Op+FS5KmN+OZe1UdrKqHuvVngL3AkpMMWQfcXVWHq+oxYD+wdj6KlSQN5pTm3JOsAK4AHuiabkzycJI7k1zUtS0BnuwZNsHJfxlIkubZwOGe5ALgk8A7quoHwO3AZcAa4CBw67GufYZXn+NtTLIrya7JyclTrVuSdBIDhXuSc5kK9ruq6lMAVfVUVR2tqueAO3h+6mUCWNYzfClw4PhjVtWWqhqvqvGxsbG5fAZJ0nEGuVomwMeAvVX1wZ72xT3d3gw80q1vB9YnOS/JSmAVsHP+SpYkzWSQq2WuBN4KfC3J7q7t3cD1SdYwNeXyOPA2gKrak2Qb8ChTV9ps8koZSTq9Zgz3qrqf/vPo955kzGZg8xzqkiTNgXeoSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQTOGe5JlSb6QZG+SPUne3rVfnOS+JN/olhf1jLk5yf4k+5JcPcwPIEk60SBn7keAd1bVLwOvATYlWQ3cBOyoqlXAjm6bbt964HLgGuC2JAuGUbwkqb8Zw72qDlbVQ936M8BeYAmwDtjaddsKXNutrwPurqrDVfUYsB9YO891S5JO4pTm3JOsAK4AHgAuraqDMPULAFjUdVsCPNkzbKJrkySdJgOHe5ILgE8C76iqH5ysa5+26nO8jUl2Jdk1OTk5aBmSpAEMFO5JzmUq2O+qqk91zU8lWdztXwwc6tongGU9w5cCB44/ZlVtqarxqhofGxubbf2SpD4GuVomwMeAvVX1wZ5d24EN3foG4J6e9vVJzkuyElgF7Jy/kiVJM1k4QJ8rgbcCX0uyu2t7N3ALsC3JDcATwHUAVbUnyTbgUaautNlUVUfnu3BJ0vRmDPequp/+8+gAV00zZjOweQ51SZLmwDtUJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkho0Y7gnuTPJoSSP9LS9N8m3k+zuXm/s2Xdzkv1J9iW5eliFS5KmN8iZ+8eBa/q0f6iq1nSvewGSrAbWA5d3Y25LsmC+ipUkDWbGcK+qLwHfHfB464C7q+pwVT0G7AfWzqE+SdIszGXO/cYkD3fTNhd1bUuAJ3v6THRtkqTTaLbhfjtwGbAGOAjc2rWnT9/qd4AkG5PsSrJrcnJylmVIkvqZVbhX1VNVdbSqngPu4PmplwlgWU/XpcCBaY6xparGq2p8bGxsNmVIkqYxq3BPsrhn883AsStptgPrk5yXZCWwCtg5txIlSadq4UwdknwCeC1wSZIJ4D3Aa5OsYWrK5XHgbQBVtSfJNuBR4AiwqaqODqVySdK0Zgz3qrq+T/PHTtJ/M7B5LkVJkubGO1QlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgGcM9yZ1JDiV5pKft4iT3JflGt7yoZ9/NSfYn2Zfk6mEVLkma3iBn7h8Hrjmu7SZgR1WtAnZ02yRZDawHLu/G3JZkwbxVK0kayIzhXlVfAr57XPM6YGu3vhW4tqf97qo6XFWPAfuBtfNTqiRpULOdc7+0qg4CdMtFXfsS4MmefhNdmyTpNJrvL1TTp636dkw2JtmVZNfk5OQ8lyFJL2yzDfenkiwG6JaHuvYJYFlPv6XAgX4HqKotVTVeVeNjY2OzLEOS1M9sw307sKFb3wDc09O+Psl5SVYCq4CdcytRknSqFs7UIckngNcClySZAN4D3AJsS3ID8ARwHUBV7UmyDXgUOAJsqqqjQ6pdkjSNGcO9qq6fZtdV0/TfDGyeS1GSpLnxDlVJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBi2cy+AkjwPPAEeBI1U1nuRi4J+BFcDjwB9U1ffmVqYk6VTMx5n7b1XVmqoa77ZvAnZU1SpgR7ctSTqNhjEtsw7Y2q1vBa4dwntIkk5iruFewOeSPJhkY9d2aVUdBOiWi+b4HpKkUzSnOXfgyqo6kGQRcF+Srw86sPtlsBFg+fLlcyxDktRrTmfuVXWgWx4CPg2sBZ5KshigWx6aZuyWqhqvqvGxsbG5lCFJOs6swz3JS5JceGwd+B3gEWA7sKHrtgG4Z65FSpJOzVymZS4FPp3k2HH+qar+LclXgG1JbgCeAK6be5mSpFMx63Cvqm8Cr+rT/jRw1VyKkiTNjXeoSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVoaOGe5Jok+5LsT3LTsN5HknSioYR7kgXAR4E3AKuB65OsHsZ7SZJOtHBIx10L7K+qbwIkuRtYBzw6pPeTXvBW3PTZUZfQjMdvedOoS5izYU3LLAGe7Nme6NokSafBsM7c06et/l+HZCOwsdt8Nsm+IdXyQnQJ8J1RFzGTvH/UFWgE/NmcXy+fbsewwn0CWNazvRQ40NuhqrYAW4b0/i9oSXZV1fio65CO58/m6TOsaZmvAKuSrEzyImA9sH1I7yVJOs5Qztyr6kiSG4F/BxYAd1bVnmG8lyTpRMOalqGq7gXuHdbxdVJOd+lM5c/maZKqmrmXJOms4uMHJKlBhrskNchwb0CmvCXJX3Tby5OsHXVdkkbHcG/DbcBvANd3288w9WwfaeSS/EySP09yR7e9Ksnvjrqu1hnubfj1qtoE/A9AVX0PeNFoS5J+6u+Bw0ydgMDUTY5/PbpyXhgM9zb8pHsSZwEkGQOeG21J0k9dVlV/C/wEoKp+TP9HlGgeGe5t+AjwaWBRks3A/cDfjLYk6af+N8n5PH/ycRlTZ/IaIq9zb0SSXwKuYuqMaEdV7R1xSRIASX4b+DOm/rfD54ArgT+pqi+Osq7WGe4NSLK8X3tVPXG6a5H6SfLzwGuYOvn4clWd8U+GPNsZ7g1I8jWm/uQN8GJgJbCvqi4faWESkORKYHdV/TDJW4BXAx+uqm+NuLSmOefegKr6lar61W65iqn/hHX/qOuSOrcDP0ryKuBdwLeAfxhtSe0z3BtUVQ8BvzbqOqTOkZqaIlgHfKSqPgxcOOKamje0p0Lq9Enypz2b5zD1Z+/kiMqRjvdMkpuBtwC/2V22e+6Ia2qeZ+5tuLDndR7wWabOkqQzwR8ydenjDVX130z9P+W/G21J7fML1bNcdxZ0S1W9a9S1SDpzOC1zFkuysPuvV68edS3S8ZI8Q3fj0vG7gKqqnz3NJb2gGO5nt51Mza/vTrId+Bfgh8d2VtWnRlWYVFV+aTpChnsbLgaeBl7H89e7F2C464yRZBFT92EA3mQ3bIb72W1Rd6XMIzwf6sf4ZYrOCEl+D7gVeBlwCHg5sBfwJrsh8mqZs9sC4ILudWHP+rGXdCb4K6YePfBfVbWSqWcg/cdoS2qfZ+5nt4NV9ZejLkKawU+q6ukk5yQ5p6q+kOT9oy6qdYb72c1nYuts8P0kFwBfAu5Kcgg4MuKamud17mexJBdX1XdHXYfUT5LlVfVEkpcAP2ZqGviPgJ8D7qqqp0daYOMMd0lDkeShqnp1t/7Jqvr9Udf0QuIXqpKGpXfa8BdGVsULlOEuaVhqmnWdBk7LSBqKJEeZumM6wPnAj47twscPDJ3hLkkNclpGkhpkuEtSgwx3SWqQ4S5JDTLcJalB/wf5ekahV2pYlwAAAABJRU5ErkJggg==\n",
476 |       "text/plain": [
477 |        "<Figure size 432x288 with 1 Axes>"
478 |       ]
479 |      },
480 |      "metadata": {
481 |       "needs_background": "light"
482 |      },
483 |      "output_type": "display_data"
484 |     }
485 |    ],
486 |    "source": [
487 |     "st_data['st_is_intent_matched_shuffled'].value_counts().plot.bar()\n",
488 |     "plt.show()"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "code",
493 |    "execution_count": 27,
494 |    "metadata": {
495 |     "pycharm": {
496 |      "name": "#%%\n"
497 |     }
498 |    },
499 |    "outputs": [
500 |     {
501 |      "data": {
502 |       "text/html": [
503 |        "<div>\n",
504 |        "<style scoped>\n",
505 |        "    .dataframe tbody tr th:only-of-type {\n",
506 |        "        vertical-align: middle;\n",
507 |        "    }\n",
508 |        "\n",
509 |        "    .dataframe tbody tr th {\n",
510 |        "        vertical-align: top;\n",
511 |        "    }\n",
512 |        "\n",
513 |        "    .dataframe thead th {\n",
514 |        "        text-align: right;\n",
515 |        "    }\n",
516 |        "</style>\n",
517 |        "<table border=\"1\" class=\"dataframe\">\n",
518 |        "  <thead>\n",
519 |        "    <tr style=\"text-align: right;\">\n",
520 |        "      <th></th>\n",
521 |        "      <th>repetitions</th>\n",
522 |        "      <th>tf_time_elapsed</th>\n",
523 |        "      <th>st_time_elapsed</th>\n",
524 |        "    </tr>\n",
525 |        "  </thead>\n",
526 |        "  <tbody>\n",
527 |        "    <tr>\n",
528 |        "      <th>0</th>\n",
529 |        "      <td>0</td>\n",
530 |        "      <td>0.000003</td>\n",
531 |        "      <td>0.000002</td>\n",
532 |        "    </tr>\n",
533 |        "    <tr>\n",
534 |        "      <th>1</th>\n",
535 |        "      <td>1</td>\n",
536 |        "      <td>1.755822</td>\n",
537 |        "      <td>8.272556</td>\n",
538 |        "    </tr>\n",
539 |        "    <tr>\n",
540 |        "      <th>2</th>\n",
541 |        "      <td>2</td>\n",
542 |        "      <td>3.255506</td>\n",
543 |        "      <td>17.061706</td>\n",
544 |        "    </tr>\n",
545 |        "    <tr>\n",
546 |        "      <th>3</th>\n",
547 |        "      <td>3</td>\n",
548 |        "      <td>4.404537</td>\n",
549 |        "      <td>26.071062</td>\n",
550 |        "    </tr>\n",
551 |        "    <tr>\n",
552 |        "      <th>4</th>\n",
553 |        "      <td>4</td>\n",
554 |        "      <td>6.189593</td>\n",
555 |        "      <td>35.721578</td>\n",
556 |        "    </tr>\n",
557 |        "    <tr>\n",
558 |        "      <th>5</th>\n",
559 |        "      <td>5</td>\n",
560 |        "      <td>7.772967</td>\n",
561 |        "      <td>44.145934</td>\n",
562 |        "    </tr>\n",
563 |        "    <tr>\n",
564 |        "      <th>6</th>\n",
565 |        "      <td>6</td>\n",
566 |        "      <td>9.517264</td>\n",
567 |        "      <td>56.337967</td>\n",
568 |        "    </tr>\n",
569 |        "    <tr>\n",
570 |        "      <th>7</th>\n",
571 |        "      <td>7</td>\n",
572 |        "      <td>10.562992</td>\n",
573 |        "      <td>72.224635</td>\n",
574 |        "    </tr>\n",
575 |        "    <tr>\n",
576 |        "      <th>8</th>\n",
577 |        "      <td>8</td>\n",
578 |        "      <td>12.480063</td>\n",
579 |        "      <td>78.887859</td>\n",
580 |        "    </tr>\n",
581 |        "    <tr>\n",
582 |        "      <th>9</th>\n",
583 |        "      <td>9</td>\n",
584 |        "      <td>13.984340</td>\n",
585 |        "      <td>87.984332</td>\n",
586 |        "    </tr>\n",
587 |        "    <tr>\n",
588 |        "      <th>10</th>\n",
589 |        "      <td>10</td>\n",
590 |        "      <td>15.344909</td>\n",
591 |        "      <td>103.883669</td>\n",
592 |        "    </tr>\n",
593 |        "  </tbody>\n",
594 |        "</table>\n",
595 |        "</div>"
596 |       ],
597 |       "text/plain": [
598 |        "    repetitions  tf_time_elapsed  st_time_elapsed\n",
599 |        "0             0         0.000003         0.000002\n",
600 |        "1             1         1.755822         8.272556\n",
601 |        "2             2         3.255506        17.061706\n",
602 |        "3             3         4.404537        26.071062\n",
603 |        "4             4         6.189593        35.721578\n",
604 |        "5             5         7.772967        44.145934\n",
605 |        "6             6         9.517264        56.337967\n",
606 |        "7             7        10.562992        72.224635\n",
607 |        "8             8        12.480063        78.887859\n",
608 |        "9             9        13.984340        87.984332\n",
609 |        "10           10        15.344909       103.883669"
610 |       ]
611 |      },
612 |      "execution_count": 27,
613 |      "metadata": {},
614 |      "output_type": "execute_result"
615 |     }
616 |    ],
617 |    "source": [
618 |     "speed_benchmark = pd.read_csv('output/speed_benchmark.csv')\n",
619 |     "speed_benchmark"
620 |    ]
621 |   },
622 |   {
623 |    "cell_type": "code",
624 |    "execution_count": 28,
625 |    "metadata": {
626 |     "pycharm": {
627 |      "name": "#%%\n"
628 |     }
629 |    },
630 |    "outputs": [
631 |     {
632 |      "data": {
633 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAA3IklEQVR4nO3dd3hUZfbA8e9JQgokJBA6SF1AeoAASo2KiIJgx4q9rsrqomBHXBRddO26rIio7G9R7MoqKF0RpCkCuigoRCIQSoCQnvP7496MAQIMSSbTzud58szcOzP3PaG8Z+5933teUVWMMcYYgAh/B2CMMSZwWFIwxhjjYUnBGGOMhyUFY4wxHpYUjDHGeET5O4CKqFOnjjZv3tzfYRhjTFBZsWJFpqrWLeu1oE4KzZs3Z/ny5f4OwxhjgoqI/Hqk1+zykTHGGA9LCsYYYzwsKRhjjPEI6jGFshQUFJCenk5ubq6/QzFBKjY2liZNmlCtWjV/h2JMlQu5pJCenk5CQgLNmzdHRPwdjgkyqsrOnTtJT0+nRYsW/g7HmCoXcpePcnNzSU5OtoRgykVESE5OtjNNE7ZCLikAlhBMhdi/HxPOQjIpGGNMSPtmCvw8zyeHtqRQyXbu3ElKSgopKSk0aNCAxo0be7bz8/OrJIa3336bdu3accoppzB//nyGDh1aacf+5Zdf6Nix40H7xo0bx6RJk476uddee41bb7210uIwJmwtnQyf3Amr3vDJ4UNuoNnfkpOTWb16NeB0lvHx8YwePbpK2lZVVJUpU6bw4osvepKCMSZEfP0yfDoGThwK57zskybsTKEKrFixggEDBtC9e3fOOOMMMjIyAEhLS2PMmDH07NmTNm3asGjRIgDWrl1Lz549SUlJoXPnzmzYsAGAp556io4dO9KxY0eefvppwPnm3q5dO2655Ra6devGI488wuLFi7npppu46667Dopj165dnHPOOXTu3JmTTjqJ7777DoBOnTqxZ88eVJXk5GRef/11AK644go+//zz4/pd09LSPKVHMjMzKV2basuWLQwePJi2bdvy8MMPH98fojHh7uuX/kgIF0yFqGifNBPSZwoPf7SWdVv3Vuox2zeqyUNnd/D6/arKbbfdxgcffEDdunWZMWMG9913H6+++ioAhYWFLFu2jFmzZvHwww/z+eef8/LLLzNq1Cguu+wy8vPzKSoqYsWKFUydOpWlS5eiqvTq1YsBAwZQq1YtfvzxR6ZOncqLL74IwLx585g0aRKpqakHnSk89NBDdO3alffff5+5c+cycuRIVq9eTZ8+ffjyyy9p1qwZLVu2ZNGiRYwcOZKvv/6al1566bDf6eeffyYlJcWz/fvvv3t1NrRs2TK+//57qlevTo8ePRgyZAipqale/1kaE7aWvACf3QvtznYSQqTv7qHxWVIQkVeBocB2Ve3o7qsNzACaA78AF6nqbve1e4BrgSLgdlX9zFexVaW8vDy+//57Tj/9dACKiopo2LCh5/XzzjsPgO7du/PLL78AcPLJJzNhwgTS09M577zzaN26NYsXL+bcc8+lRo0ans8tWrSIYcOG0axZM0466aRjxrJ48WLeeecdAE499VR27txJVlYW/fr1Y+HChTRr1oybb76ZyZMn89tvv1G7dm3i4+MPO06rVq08l8jAuUzmjdNPP53k5GRP/IsXL7akYMyxfPU8zL4P2g2DC171aUIA354pvAY8D7xeat9Y4AtVnSgiY93tMSLSHrgY6AA0Aj4XkTaqWlSRAI7nG72vqCodOnRgyZIlZb4eExMDQGRkJIWFhQBceuml9OrVi08++YQzzjiDV155BVU9YhslicKbWA4lIvTv358XXniBzZs3M2HCBN577z1mzpxJv379vDpuaVFRURQXFwMcNtf/0KmeNvXTmGP48lmY8wC0PwfOf8XnCQF8OKagqguBXYfsHg5Mc59PA84ptf8/qpqnqpuAn4CevoqtKsXExLBjxw5PUigoKGDt2rVH/czGjRtp2bIlt99+O8OGDeO7776jf//+vP/++xw4cIDs7Gzee++94+60+/fvz/Tp0wGYP38+derUoWbNmpxwwglkZmayYcMGWrZsSd++fZk0aVK5kkLz5s1ZsWIFADNnzjzotTlz5rBr1y5ycnJ4//336dOnz3Ef35iw8eUzTkLocG6VJQSo+oHm+qqaAeA+1nP3Nwa2lHpfursv6EVERDBz5kzGjBlDly5dSElJ4auvvjrqZ2bMmEHHjh1JSUnhhx9+YOTIkXTr1o2rrrqKnj170qtXL6677jq6du16XLGMGzeO5cuX07lzZ8aOHcu0adM8r/Xq1Ys2bdoA0K9fP3777Tf69u173L/v6NGjeemll+jduzeZmZkHvda3b1+uuOIKUlJSOP/88+3SkTFHsvgfMOdB6Hg+nFd1CQFAjnZZosIHF2kOfFxqTGGPqiaVen23qtYSkReAJar6prt/CjBLVd8p45g3ADcANG3atPuvvx68VsT69etp166dj34jEy7s35Hxm0VPwRcPQ8cL4Nx/QmTlX+UXkRWqWua3sqo+U9gmIg0B3Mft7v504IRS72sCbC3rAKo6WVVTVTW1bt0yV5MzxpjgtHCSkxA6XeizhHAsVZ0UPgSudJ9fCXxQav/FIhIjIi2A1sCyKo7NGGP8Z+HfYe4j0OkivyUE8O2U1P8D0oA6IpIOPARMBN4SkWuBzcCFAKq6VkTeAtYBhcCfKzrzyBhjgsaCJ2DeBOh8MZzzIkRE+i0UnyUFVb3kCC+ddoT3TwAm+CoeY4wJSPMnwvzHoMslMPwFvyYECPE7mo0xJqDNewwWTIQul8Lw5/2eEMCSgjHGVD1V5+xgweOQcjkMezYgEgJYQTyfmDBhAh06dKBz586kpKSwdOnSch1n9erVzJo1q5KjO7Y1a9Z4yn3Xrl2bFi1akJKSwsCBA33S3qJFi+jQoQMpKSnk5OT4pA1jAoYqzHvUSQhdL4dhzwVMQgA7U6h0S5Ys4eOPP2blypXExMSQmZlZ7nUUVq9ezfLlyznrrLMqOcqj69Spk6e20VVXXcXQoUO54IILDnpPYWEhUVGV889n+vTpjB49mquvvtqr9xcVFREZWXn/iSrzdzHmqFRh7t9g0SToNhKGPgMRgfXdPLCiCQEZGRnUqVPHU9OoTp06NGrUCDi+Etr5+fk8+OCDzJgxg5SUFGbMmEF2djbXXHMNPXr0oGvXrnzwgTOj97XXXuO8885j8ODBtG7dmrvvvtsTz6effkq3bt3o0qULp53mjPEf6TjHkpaWxr333suAAQN45pln+Oijj+jVqxddu3Zl4MCBbNu2DXDunL7mmmtIS0ujZcuWPPvss552hwwZQpcuXejYsSMzZszglVde4a233mL8+PFcdtllqCp33XUXHTt2pFOnTsyYMQNwynKccsopXHrppXTq1In58+czYMAALrroItq0acPYsWOZPn06PXv2pFOnTvz8888A7Nixg/PPP58ePXrQo0cPvvzyS0+MN9xwA4MGDWLkyJHl/ws3xluqzpTTRZOg25UBmRAg1M8U/jsWfl9Tucds0AnOnHjElwcNGsT48eNp06YNAwcOZMSIEQwYMICCgoLjLqE9fvx4li9fzvPPPw/Avffey6mnnsqrr77Knj176Nmzp+eSzurVq1m1ahUxMTG0bduW2267jdjYWK6//noWLlxIixYt2LXLKUU1YcKEMo/jTWG9PXv2sGDBAgB2797N119/jYjwyiuv8MQTT/Dkk08C8MMPPzBv3jz27dtH27Ztufnmm/n0009p1KgRn3zyCQBZWVkkJiayePFiz9nIO++8w+rVq/n222/JzMykR48e9O/fH/ij9HaLFi2YP38+3377LevXr6d27dq0bNmS6667jmXLlvHMM8/w3HPP8fTTTzNq1CjuuOMO+vbty+bNmznjjDNYv3494CTpxYsXExcXd9z/DIw5LqrOTWmL/wHdr4Ih/wjIhAChnhT8ID4+nhUrVrBo0SLmzZvHiBEjmDhxIqmpqcddQvtQs2fP5sMPP/QsfZmbm8vmzZsBOO2000hMTASgffv2/Prrr+zevZv+/fvTokULAGrXrn3U43hT1mHEiBGe5+np6YwYMYKMjAzy8/M97QAMGTKEmJgYYmJiqFevHtu2baNTp06MHj2aMWPGMHTo0DIL7i1evJhLLrmEyMhI6tevz4ABA/jmm2+oWbMmPXv2PKiNHj16eP4MW7VqxaBBgwDn8te8ec76tZ9//jnr1q3zfGbv3r3s27cPgGHDhllCML6nCp+Pgy+fhtRr4KwnAzYhQKgnhaN8o/elyMhI0tLSSEtLo1OnTkybNo3u3bsfdwntQ6kq77zzDm3btj1o/9KlSz2fL30MVS2zPPWRjuON0mcTt912G3feeSfDhg1j/vz5B62rUFY8bdq0YcWKFcyaNYt77rmHQYMG8eCDDx4WmzdtH9pGRESEZzsiIsLzZ1hcXMySJUvK7Py9LTluTLmpOoXtvnoWUq+FsyYFdEIAG1OodD/++KNn+UxwLus0a9aMtm3bHncJ7YSEBM+3WoAzzjiD5557ztNxrlq16qifP/nkk1mwYAGbNm0C8Fw+Ot7jHElWVhaNGzvFbEtXXD2SrVu3Ur16dS6//HJGjx7NypUrD3tP//79mTFjBkVFRezYsYOFCxfSs2f5q6gPGjTIc/kNOGhxIGN8ShVm3+8khB7XwZDAPkMoEfgRBpn9+/dz5ZVX0r59ezp37sy6desYN24c0dHRx11C+5RTTmHdunWegeYHHniAgoICOnfuTMeOHXnggQeO+vm6desyefJkzjvvPLp06eK59HO8xzmScePGceGFF9KvXz/q1KlzzPevWbPGs/b0hAkTuP/++w97z7nnnkvnzp3p0qULp556Kk888QQNGjQoV3wAzz77rKdcePv27Xn5Zd8sdm7MQUoSwpLnoecNzhlCkCwq5dPS2b6WmpqqJYvEl7CSx6Yy2L8jU26qznrKX78IvW6CwRMDLiEcrXR2aI8pGGNMVVKFT++BpS9Br5th8GMBlxCOxZKCMcZUBlX4dCwsfRlOugXOeDToEgKE6JhCMF8SM/5n/37McVOF/97tJISTbw3ahAAhmBRiY2PZuXOn/cc25aKq7Ny5k9jYWH+HYoKFKswaDcsmOwlh0N+CNiFACF4+atKkCenp6ezYscPfoZggFRsbS5MmTfwdhgkGxcVOQlg+BXrfDqePD+qEACGYFKpVq3bQXa/GGOMTxUVuQngV+oyCgQ8HfUKAEEwKxhjjc5sWwWf3OLXV+t4Bpz0UEgkBLCkYY4z3dm2E2Q/ADx9D4glwwavQ4byQSQhgScEYY44tNwsW/h2+fhkio+HUB+DkP0O10CuoaEnBGGOOpKgQVk5zVko7sBO6XuYkhITyl14JdJYUjDGmLD/Phc/ug+3roFkf596DRin+jsrnLCkYY0xpmRucYnb/+xSSmsFFb0C7s0Nq3OBoLCkYYwzAgV2w4An45l8QFefcc9DrJoiKOfZnQ4glBWNMeCsqcO41mP+YM6DcbSScch/E1/N3ZH5hScEYE742zHHKXGf+D1oMcKqa1u/g76j86phJQURaAemqmiciaUBn4HVV3ePb0Iwxxke2r3cGkX/+Amq3gkv+A20Gh824wdF4c6bwDpAqIn8CpgAfAv8GzvJlYMYYU+myd8L8R2H5VIiJhzMec5bKjIr2d2QBw5ukUKyqhSJyLvC0qj4nIuVb1NcYY/yhMN8ZQJ7/OOTvh9RrIO0eqJHs78gCjjdJoUBELgGuBM5291XzXUjGGFNJVOHH/zpTTHf9DH8aCIMmQL0T/R1ZwPImKVwN3ARMUNVNItICeLMijYrIHcB1gAJr3DaqAzOA5sAvwEWqursi7Rhjwtjv3zuDyJsWQJ22cNlMaH26v6MKeFLVi9GISGNgMdBeVXNE5C1gFtAe2KWqE0VkLFBLVccc7Vipqam6fPly3wdtjAke+7fD3L/BqjcgNhHS7oXUqyHSLnCUEJEVqppa1mtHPFMQkTU43+TLpKqdKxBTFBAnIgU4ZwhbgXuANPf1acB84KhJwRhjPArz4OuXYOEkKMxxbjwbcDfE1fJ3ZEHlaJePhrqPf3Yf33AfLwMOlLdBVf1NRCYBm4EcYLaqzhaR+qqa4b4nQ0TKvHNERG4AbgBo2rRpecMwxoQKVVj/oVPSes+v0OZMZ0nMOn/yd2RB6YhJQVV/BRCRPqrap9RLY0XkS2B8eRoUkVrAcKAFsAd4W0Qu9/bzqjoZmAzO5aPyxGCMCQEFubD2XWdt5K2roF57uOJ9aHWKvyMLat4MNNcQkb6quhhARHoDNSrQ5kBgk6rucI/3LtAb2CYiDd2zhIbA9gq0YYwJVbt/ccpSrHwDcnZBnTZw9jOQcjlEWpGGivLmT/Ba4FURSXS39wDXVKDNzcBJIlId5/LRacByIBtn2utE9/GDCrRhjAklxcXw0+fwzSuwYTZIBJx4FvS4Hlr0tzuRK9Exk4KqrgC6iEhNnNlKWRVpUFWXishMYCVQCKzCuRwUD7wlItfiJI4LK9KOMSYEHNgFq96E5VOcM4Qa9aD/XdD9Kkhs7O/oQpI3tY9igPNx7h+IEjcjq2q5xhTczz4EPHTI7jycswZjTLj7baVzVvD9O1CYC017w2kPwolnW0kKH/Pm8tEHQBawAqfjNsaYyleQA2vfg2X/gq0roVoNSLnUqU0U5pVLq5I3SaGJqg72eSTGmPC0+xf4Zopzs1nObmfg+My/Q5cRzs1npkp5kxS+EpFOqrrG59EYY8KDZ+D4X86aBhIBJw5xzgps4NivvEkKfYGrRGQTzuUjAbSCdzQbY8LRgV3OGcE3U5wbzWzgOOB4kxTO9HkUxpjQ9tsKJxGUDBw36wMDH7KB4wDkzZTUX0WkC9DP3bVIVb/1bVjGmKBXkAPfv+vMIrKB46DhzZTUUcD1wLvurjdFZLKqPufTyIwxwWnXJueOY8/AcVt34PhiiK3p7+jMMXh7R3MvVc0GEJHHgSWAJQVjzB9++hyW/vPggeOe10PzfjZwHES8SQoCFJXaLnL3GWOMY8ETMG8CxNd3ylV3u9IGjoOUN0lhKrBURN5zt88BpvgsImNMcJn/OMx/FDpfDMOes4HjIOfNQPNTIjIfZ2qqAFer6ipfB2aMCQLzHoMFE6HLpTD8eYiI9HdEpoK8GWg+CVirqivd7QQR6aWqS30enTEmMKnC/MdgweNOyephz1pCCBERXrznJWB/qe1sd58xJhypOuMHCx6Hrpc7l4wsIYQMb5KCqKpnhTNVLca7sQhjTKhRhbl/g4V/h24j4eznIMKbbsQEC2/+NjeKyO0iUs39GQVs9HVgxpgAowpfjIdFk5zZRUOfsYQQgrz5G70JZ7nM34B0oBdwgy+DMsYEGFX4fBwsfgq6Xw1Dn7aEEKK8mX20Hbi4CmIxxgQiVZjzIHz1LKReC2dNsoQQwo75NysibUTkCxH53t3uLCL3+z40Y4zfqcKcB5yE0OM6GPKkJYQQ583f7r+Ae4ACAFX9DjtzMCb0qcLs++Gr56DnDc4ZgpWrCHneJIXqqrrskH2FvgjGGBMgVOGze2HJ89DrJjjzCUsIYcKbqaWZItIKUAARuQDI8GlUxhj/UYVP74GlL0Gvm2HwY5YQwog3SeHPwGTgRBH5DdgEXO7TqIwx/qEK/x0Dy/4JJ90CZzxqCSHMeDP7aCMwUERqABGqus/3YRljqpwqzLrLWTf55Fth0N8sIYQhb2YfjRKRmsAB4B8islJEBvk+NGNMlSkuhk/+6iSE3rdZQghj3gw0X6Oqe4FBQD3gamCiT6MyxlSd4mKY9VdYPgX6jILTH7GEEMa8qn3kPp4FTHXXZ7Z/McaEguJi+OROZ/nMPn+BgQ9bQghz3iSFFSIyGycpfCYiCUCxb8MyxvhccTF8/BdYMRX63gkDx1lCMF6v0ZwCbFTVAyKSjHMJyRgTrIqL4aPbYdUb0G80nHq/JQQDeDf7qBhYWWp7J7DTl0EZY3youBg+ug1WvQn974ZT7rWEYDz8UsRERJJEZKaI/CAi60XkZBGpLSJzRGSD+1jLH7EZE9KKi+DDW52EMGCMJQRzGH9VtnoG+FRVTwS6AOuBscAXqtoa+MLdNsZUluIi+OBWWD0d0u6xhGDK5NUKaiISCdQv/X5V3VyeBt17HvoDV7nHyQfyRWQ4kOa+bRowHxhTnjaMMYcoLoL3b4Hv/gNp90Ka/dcyZTtmUhCR24CHgG38MetIgc7lbLMlsAOYKiJdgBXAKKC+qmYAqGqGiNQ7Qjw34C7y07Rp03KGYEwYKS6C92+G72bAKffDgLv8HZEJYN5cPhoFtFXVDqrayf0pb0IAJxF1A15S1a5ANsdxqUhVJ6tqqqqm1q1btwJhGBMGigrhvRudhHDqA5YQzDF5kxS2AFmV2GY6kK6qS93tmThJYpuINARwH7dXYpvGhJ+ShLDmbTjtQeg/2t8RmSDgzZjCRmC+iHwC5JXsVNWnytOgqv4uIltEpK2q/gicBqxzf67EKaFxJfBBeY5vjMFJCO9eD2vfdW5K63uHvyMyQcKbpLDZ/Yl2fyrDbcB0EYnGSTpX45y1vCUi17rtXVhJbRkTXooK4d3rYO17cPp4p56RMV7y5ua1hyu7UVVdDaSW8dJpld2WMWGlqADeuQ7Wve9UOu19m78jMkHmiElBRJ5W1b+IyEe4q66VpqrDfBqZMeb4ZO90zhB+nguDJkDvW/0dkQlCRztTeMN9nFQVgRhjKmDLMnj7KsjeAWc/C92v9HdEJkgdMSmo6gr3cUHVhWOMOS6q8PWLMOdBqNkYrp0DjVL8HZUJYl7d0WyMCUC5WfDBn2H9R3DiUBj+AsQl+TsqE+QsKRgTjDK+g7dGwp7NzoDyybdaHSNTKSwpGBNMVGHlNJh1N1RPhqtnQdOT/B2VCSFHm31U5qyjEjb7yJgqlp8NH9/pFLVreQqc/wrUqOPvqEyIOdqZQsmso/OABsCb7vYlwC8+jMkYc6gd/3MuF+34wSl73f8uiIj0d1QmBB1t9tECABF5RFX7l3rpIxFZ6PPIjDGONTPho1EQFQNXvAutTvV3RCaEeTOmUFdEWqrqRgARaQFYeVJjfK0wDz67F755BU7oBRdMhcTG/o7KhDhvksIdOAXxNrrbzYEbfRaRMQZ2/wpvXwlbVzkziwaOg8hq/o7KhAFvah99KiKtgRPdXT+oat7RPmOMqYAf/+uUvFZgxHRoN9TfEZkw4s3Ka9WBO4Fmqnq9iLR2y15/7PvwjAkjRYUwdzx8+Qw06AwXTYPaLf0dlQkz3lw+moqzZObJ7nY68DZgScGYyrLvd5h5Dfz6JXS/GgZPhGqx/o7KhCFvkkIrVR0hIpcAqGqOiN06aUyl2bgA3rnWuQ/h3MnQZYS/IzJhzJukkC8icbg3solIK0qtwGaMKafiYlj0JMx/FJJbw5UfQ70Tj/05Y3zIm6TwEPApcIKITAf6AFf5MihjQl72TnjvBvjpc+h0IQx9GmLi/R2VMV7NPpojIiuBkwABRqlqps8jMyZUbfnGXftgOwx5ClKvsWJ2JmBEHOsNItIHyFXVT4Ak4F4RaebrwIwJOarw9UswdbBTouLa2dDjWksIJqAcMykALwEHRKQLcBfwK/C6T6MyJtTkZjm1iz4dC60HwY0LoFFXf0dlzGG8GVMoVFUVkeHAs6o6RURsrT9jvPX7Gich7P4VTh8PvW+3swMTsLxJCvtE5B7gCqCfiEQCdr+9Md5Y+QbMGg1xteCqj6FZb39HZMxReXP5aATOFNRrVPV3oDHwd59GZUywyz8A798CH97qFLO7cZElBBMUjpkU3ETwb6CWiJwN5KuqjSkYUxZV+N9s+NcpsPrfMGAMXPEexFthYRMcvJl9dB2wDGexnQuAr0XkGl8HZkzQ+fUrmHom/PtCp+z15e/AKffaYjgmqHgzpnAX0FVVdwKISDLwFfCqLwMzJmhkfAtfPAI/zYH4Bs69B91GWqlrE5S8SQrpwL5S2/uALb4Jx5ggkvkTzJsAa9+F2CRnZlGP6yG6ur8jM6bcvEkKvwFLReQDnPpHw4FlInIngKo+5cP4jAk8Wemw4HFYNR2iYp31knvfBrGJ/o7MmArzJin87P6U+MB9TKj8cIwJYNmZsOgpZ3lMFHpeD/3+CvH1/B2ZMZXGm9pHD/uiYfd+h+XAb6o6VERqAzNwlvv8BbhIVXf7om1jjkvuXljyAix5HgoOQJdLIW0MJDX1d2TGVDpvVl6rC9wNdAA8q36o6qkVbHsUsB6o6W6PBb5Q1YkiMtbdHlPBNowpv4Jc56xg0ZOQswvaDYNT74e6bf0dmTE+483Na9OBH4AWwMM43+K/qUijItIEGAK8Umr3cGCa+3wacE5F2jCm3IoKYcVr8Fw3mH0fNOwC18+DEW9YQjAhz5sxhWS33tEoVV0ALBCRBRVs92mcs4/S4xL1VTUDQFUzRKTMC7UicgNwA0DTpnb6bipRcTGsew/mToBdP0PjVDj3ZWjR39+RGVNlvEkKBe5jhogMAbYCTcrboIgMBbar6goRSTvez6vqZGAyQGpqqpY3DmM8VGHDHJg73ileV689XPx/0PZMK1xnwo43SeFvIpII/BV4DmcM4I4KtNkHGCYiZ+GMUdQUkTeBbSLS0D1LaAhsr0Abxnjn1yXwxcOweQkkNXPWSO50gd2FbMKWN7OPPnafZgGnVLRBVb0HuAfAPVMYraqXi8jfgSuBie7jB0c6hjEVlvEdzH0ENsyG+Pow5EnoOhKiov0dmTF+dcSkICLP4dysViZVvb2SY5kIvCUi1wKbgQsr+fjGwM6fnbuQv3/HuQt54DjoeaPdhWyM62hnCst93biqzgfmu893Aqf5uk0TprJ+c+9CfhOiYpybznrfDnFJ/o7MmIByxKSgqtNKb4tIDVXN9n1IxlSi7J2w+ClY9i/QYuhxnZMQEur7OzJjApI3N6+dDEwB4oGm7lrNN6rqLb4OzphyK8yHr1+EhZOgIBs6XwxpY6FWM39HZkxA82b20dPAGcCHAKr6rYjYxG0TuDYugFl3QeaP0GYwDHwY6p3o76iMCQreJAVUdYscPF+7yDfhGFMBe7fC7PudQeSkZnDJDGg72N9RGRNUvEkKW0SkN6AiEg3cjlOzyJjAUFQAS/8J8x9zng8YC33/AtXi/B2ZMUHHm6RwE/AM0BhnwZ3ZwJ99GZQxXvvlS5g1Gravg9aD4MzHoXZLf0dlTNDy5ua1TOCyKojFGO/t2wZzHoDvZkBiU7j439D2LCtLYUwFeTWmYEzAKCp0ylnPmwAFOc700n6j7eYzYyqJJQUTPDYvhU/+CtvWQMtT4KxJUOdP/o7KmJBiScEEvv074PNxsPpNqNkYLpwG7YfbpSJjfMCbm9fqA48CjVT1TBFpD5ysqlN8Hp0Jb8VFsGIqfDEe8rOhzyjofzfExPs7MmNCljdnCq8BU4H73O3/4aylbEnB+E76CvjkTshYDc37OVVMbdUzY3zOm+U466jqW0AxgKoWYjevGV85sAs+vB1eOQ32/Q7nT4ErP7KEYEwV8eZMIVtEknHLaIvISThrKxhTeYqLYdXrzthB7l44+c8wYAzE1vR3ZMaEFW+Swp04dY9aiciXQF3gAp9GZcLL1lXOrKLfVkDT3jBkEtTv4O+ojAlL3ty8tlJEBgBtAQF+VNWCY3zMmGPL2Q1z/wbfTIEadeDcf0LnETaryBg/8mb2USRwFtDcff8gEUFVn/JxbCZUFRfDt/+GOQ86iaHXjZB2jy14Y0wA8Oby0UdALrAGd7DZmHLL+M6pVbRlKTTp6cwqatjZ31EZY1zeJIUmqmr/a03F5GbBvEdh2WSIqwXDX4Aul0KENxPgjDFVxZuk8F8RGaSqs30ejQk9xcWw5i2Y/QBk74DUa+DU+6F6bX9HZowpgzdJ4WvgPRGJAApwBptVVW2uoDm6X5fAZ/fC1pXQqBtcOgMad/N3VMaYo/AmKTwJnAysUVX1cTwmFOzaBJ8/BOs+gIRGzqyiThfZpSJjgoA3SWED8L0lBHNMOXtg0SRnFbSIKEi7F3rfCtE1/B2ZMcZL3iSFDGC+iPwXyCvZaVNSjUdRoVO4bv5jTpmKlMuccYOaDf0dmTHmOHmTFDa5P9HujzEOVdgwB2bfD5k/OoXrzpgADbv4OzJjTDl5c0fzw1URiAky29bCZ/fBxnlQu5Uth2lMiDhiUhCR51X1VhH5CLcYXmmqOsynkZnAtH+7sxTmytchpiYMngip10KUnUQaEwqOdqYwErgVmFRFsZhAVpADX78Ii56CwlzoeSMMuNvuNzAmxBwtKfwMoKoLqigWE4hU4ft3nJLWWVug7RA4fbytjWxMiDpaUqgrInce6cXyzj4SkROA14EGOLWUJqvqMyJSG2dFt+bAL8BFqrq7PG2YSrJlmXPzWfo30KATnPMitOjv76iMMT50tKQQCcTj3MFcmQqBv7oluROAFSIyB7gK+EJVJ4rIWGAsMKaS2zbe2P2rc2aw9l2IbwDDX4QuF0NEpL8jM8b42NGSQoaqjq/sBlU1A+feB1R1n4isBxoDw4E0923TgPlYUqhauXth8VOw5EWQCGfls963Q0y8vyMzxlSRoyUFn88tFJHmQFdgKVDfTRioaoaI1DvCZ24AbgBo2rSpr0MMD0WFzlKYcyfAgUzocgmc+gAkNvZ3ZMaYKna0pHCaLxsWkXjgHeAvqrpXvJzfrqqTgckAqampVnqjon76HD67H3asd5bCPONtK1pnTBg7YlJQ1V2+alREquEkhOmq+q67e5uINHTPEhoC233VvgG2/+DcifzTHKjVAi56A9qdbTefGRPmvClzUanEOSWYAqw/ZAbTh8CVwET38YOqji0sZGc6i92seA2i42HQBOh5PUTF+DsyY0wAqPKkAPQBrgDWiMhqd9+9OMngLRG5FtgMXOiH2EJXQS4s+ycsnAT52dDjOmcguUayvyMzxgSQKk8KqrqYIw9i+3QcIywVF8P3M+GLRyBrM7Q507n5rG4bf0dmjAlA/jhTMFVl4wKY8wBkfAsNOsPw56Blmr+jMsYEMEsKoWj7epjzIGyYDYknwLmTodOFtvKZMSGisKiY/KJiqkdXfhduSSGU7M1wKpiung7RCc5lop43QrVYf0dmjDkKVWVvTiGZ2Xns3J/Pzv15ZGbns2t/PjvdfZn789iVnc/O7Hx2H8hneJdGPH1x10qPxZJCKMjbB18+C0ueh6IC6HUz9B9tFUyN8RNV5UB+kdOZZ+d5OvfM/flOp5/tdPCZbgLYlZ1PYXHZt10lxlUjOT6aOjViaFU3np4tokmOj6FT40SfxG5JIZgVFcDKaTB/ImTvgI7nO3ci127h78iMCTnFxcqenAJ27MtzfvbnsmOf+80+2+ncnUen088tKC7zODWiI6kdH01yjRgaJ8XSuXEiyfHR1K4RTZ34GJLd15Ljo6lVPZroqKq97GtJIRipwg+fOEXrdm6AZn3gkhnQpLu/IzMm6BzIL/yjo9+Xx479eWVuZ+7Po6Do8G/z0ZERTkfuduZ/qhvvbseQ7Hb0tWv88XpcdGAXlrSkEGy2fOPMKNq8BOq0gUv+A20G253IxpRSWFTMzuz8wzr37XtzD+v0s/OLDvt8hEByfAx142OomxBDm/oJ1E34Y7v0T0JMFN6W6QkGlhSCxa6N8PnDsO59qFEPhv4Duo6ESPsrNOElO6+Q3/bk8NvuHOdxTw7bsg7u7HcdyEfLuESfEBtFPbcz79Qk6fBO3t2uXSOayIjQ6eiPh/UogS57Jyx8Ar6ZApHVYMBY6H2blbM2IUlVydyfz1a3sy/d8Zc8z8opOOgzURFC/Zqx1E2I4YTa1enWrNYRO/vYaoF96SYQWFIIVAU5sPRlZ03k/P3QbSSk3QMJDfwdmTHlVlBUzO9ZuaTvzjms4y/Zzis8eIA2PiaKxklxNEqKpVuzJBonVadRUixNasXROKk6dRNiwvZbvS9YUgg0xcXw3QyY+zfYm+6MFwx8GOqd6O/IjDmm/XmFTue+O4d097F0579tX+5hl3XqJsTQKCmOdg1rMrB9fRolxtK4VnUaJ8XROCmOmnGhdc0+0FlSCCQ/z4XZD8K2NdAwBc59GVr083dUJswUFBWzN6eArJwC9uYWkuU+z8opYK/749nO/eP5ngMF7MstPOhY1SKFholO5963dR0aJcXRJCmOxrXiaJQUR8PEWLukE2AsKQSC3793ylL8/AUkNYXzp0CH86wshSkXVSWvsPiwzvyP54d09LkHv36gjNk4pcVERZAYV42acdVIjKtGvYRYWtdLIDGuGg0SY2nkfsNvUiuOuvExRNilnaBiScGfsn5zy1L8G2ITbW0Dc0wFRcVs35fH71m5/J6VS0ZWjvO4N9ezb8f+PPILy75xqkR8TBQ1Y6M8HXvT2tU9z0t+asZFHbwd6yQC+2Yf2iwp+ENuFix+Gr5+EbQYet8K/f4KcbX8HZnxo9yCIrbvzXM6+r25ZBza8bsd/qHX5GOrRdAwMY4GNWPp1aI2dRNiDuvgS3+zrxkbRVSknYWasllSqEqFebB8qjPF9MBOp3LpqQ9ArWb+jsz42IH8wlKdfC6/Z+V4tn93v+XvzM4/7HMJMVE0SIylQWIsbRsk0CDRuQ7fIDGWhomxNKxpA7GmcllSqApFBU7l0gV/d2YUNe8Hgx6BRpVf4dBUrYKiYnbsy2Pb3ly278tzfvbmsn1vnqezz8jKYe8hA7AAtapX83TyXU5IomHNks4+zpMI4mPsv6ipWvYvzpeKi2DNTJj/GOzeBI1T4ZwXoMUAK0sR4PIKnUs52/flsWNfLtv25rF9n9PZbyvp+Pc51S0PVVIioWFiLM2Sq3NSy9oHfcNv4Hb+dm3eBCJLCr5QXAw/fATzHoUdP0D9Tk7BujZnWDLws5Lr9ttLd/TuN/3S3/j3HCg47LOREULd+Bjq14yhSS3nztn6CbHUqxlDvYQY6teMpV5CDMnxdjOVCV6WFCqTKmyYA3Mfgd+/cwrWXfgatBtu00t9TFXZlZ1PhnvNPqPUNXvPpZ29uWVexqkWKdRLcMoktKhTg14tkj2dfN1SHX7t6tE2vdKEPEsKlWXTQucu5C1LIakZnPMydL4IIuwSQUWpKrsPFLB1T47nGn1J5791zx8zdQ6dhlkt0qmJU79mLK3rxdP3T3Wom1DqW33NGOolxFKrejUbqDXGZUmhorYsc84MNi2EhEZu9dIrnOJ15phUlT0HCtjqTrvcWjIzZ0+uZ19GVu5h9XBKOvyGibF0aZLE4I6xNKwZS0P3LtmGiXEk17Bv9sYcL0sK5ZXxLcydABs+gxp1YfBE6H61rYfsUlWy84vYcyCfPQcK2L4vl617ct2O/4/OPiMr57AVqkqqXjZMjKVTkyQGdYj1dPQNE2NpmBRLnRp2p6wxvmBJ4Xht/wHmPwrrPoDYJDjtIeh5Q8iWsi5Za3ZPToGng99zoIA9OSXP89nt7svKOfh5WatURUYI9RNiaJgUR4dGNRnYrl6pzj6ORomxNlBrjB9ZUvDWro3OWsjfvQXRNWDAGDjpFohL8ndkXsvJL2J3Scee80cHv/tAvlvQzOnUs9zXS57nFx25ZEJctUiSqlcjqXo0SXHVaFM/nsS4aJKqV6NW9WokxUWTWL0a9RJiaJgYZ2WOjQlwlhSOJSsdFjwBq96EyGhngZs+f4Eayf6ODHBuntq5P/+ghcSPtNZsWcsOloiJiqBW9Wi3g69Gyzrxf3T2bgf/R2fvPCZaHRxjQo4lhSPZtw0WPwXLX3Wmmva41qlPVAWL3JQMvh5pAfHS22XdPAVQMzbKs+pUybKDyfHR1K4RXWYHb527MQYsKRzuwC748hlYNtmpVZRyKQy42ylpXUEld8mW3Ch1pE4/c39emdfjY6IiPB19s+TqpDavddhyg3UTYqgTb8sOGmPKx5JCidy9TtXSJS9A3j7odIGz/GVyK+8+XlD0R7GzvTmemTYl279n5ZK5/8glEUo69Tb1Ew7r5Et+EmKs8JkxxrcCLimIyGDgGSASeEVVJ/q0wfxs56zgy2cgZze0OxvS7oX67T1vObTCZcaeHE/9+pKKl7vLKIuQGFfNnUoZS6fGSZ7aN/USYjx30NauEW0Dr8aYgBFQSUFEIoEXgNOBdOAbEflQVddVemNuGeviRU8Skb2dXQ0HsKL7LayjFRmLc8jIWnbUCpfJNaJpkBhL46RYujdL8tSz9xQ9S4ylenRA/fEaY8wxBVqv1RP4SVU3AojIf4DhQKUmhZ9XLyThg6upp5ksK27HpIKbWL7pRNhUBPyPuglHrnDZMNEpm2DX7I0xoSjQkkJjYEup7XSgV+k3iMgNwA0ATZuWb/A3tv6fyIxtzqdNHiC3SV9GJlVnjFvSuH7NWKKjrHidMSY8BVpSKOvi+kHTcFR1MjAZIDU19fApOl5o3LARjcd8Qftjv9UYY8JKoH0lTgdOKLXdBNjqp1iMMSbsBFpS+AZoLSItRCQauBj40M8xGWNM2Aioy0eqWigitwKf4UxJfVVV1/o5LGOMCRsBlRQAVHUWMMvfcRhjTDgKtMtHxhhj/MiSgjHGGA9LCsYYYzwsKRhjjPEQ1XLd/xUQRGQH8GsFDlEHyKykcIJBuP2+YL9zuLDf+fg0U9W6Zb0Q1EmhokRkuaqm+juOqhJuvy/Y7xwu7HeuPHb5yBhjjIclBWOMMR7hnhQm+zuAKhZuvy/Y7xwu7HeuJGE9pmCMMeZg4X6mYIwxphRLCsYYYzzCMimIyGAR+VFEfhKRsf6Ox9dE5AQRmSci60VkrYiM8ndMVUVEIkVklYh87O9YqoKIJInITBH5wf37PtnfMfmSiNzh/pv+XkT+T0Ri/R2TL4jIqyKyXUS+L7WvtojMEZEN7mOtymgr7JKCiEQCLwBnAu2BS0Qk1BdhKwT+qqrtgJOAP4fB71xiFLDe30FUoWeAT1X1RKALIfy7i0hj4HYgVVU74pTbv9i/UfnMa8DgQ/aNBb5Q1dbAF+52hYVdUgB6Aj+p6kZVzQf+Awz3c0w+paoZqrrSfb4Pp6No7N+ofE9EmgBDgFf8HUtVEJGaQH9gCoCq5qvqHr8G5XtRQJyIRAHVCdGVGlV1IbDrkN3DgWnu82nAOZXRVjgmhcbAllLb6YRBB1lCRJoDXYGlfg6lKjwN3A0U+zmOqtIS2AFMdS+ZvSIiNfwdlK+o6m/AJGAzkAFkqeps/0ZVpeqragY4X/yAepVx0HBMClLGvrCYlysi8cA7wF9Uda+/4/ElERkKbFfVFf6OpQpFAd2Al1S1K5BNJV1SCETuNfThQAugEVBDRC73b1TBLxyTQjpwQqntJoToKWdpIlINJyFMV9V3/R1PFegDDBORX3AuEZ4qIm/6NySfSwfSVbXkLHAmTpIIVQOBTaq6Q1ULgHeB3n6OqSptE5GGAO7j9so4aDgmhW+A1iLSQkSicQamPvRzTD4lIoJznXm9qj7l73iqgqreo6pNVLU5zt/xXFUN6W+Rqvo7sEVE2rq7TgPW+TEkX9sMnCQi1d1/46cRwgPrZfgQuNJ9fiXwQWUcNODWaPY1VS0UkVuBz3BmK7yqqmv9HJav9QGuANaIyGp3373uetgmtNwGTHe/8GwErvZzPD6jqktFZCawEmeG3SpCtNyFiPwfkAbUEZF04CFgIvCWiFyLkyAvrJS2rMyFMcaYEuF4+cgYY8wRWFIwxhjjYUnBGGOMhyUFY4wxHpYUjDHGeFhSMEFHRFREniy1PVpExvmgnf8Tke9E5I7KPvYR2msuIpeW2k4VkWfd52ki0rvUazeJyMiqiMuEl7C7T8GEhDzgPBF5TFUzfdGAiDQAeqtqs2O8L0pVCyup2ebApcC/AVR1ObDcfS0N2A985b72ciW1acxB7EzBBKNCnJuUDvsGLyLNROQL9xv+FyLS9GgHEpFYEZkqImvcInKnuC/NBuqJyGoR6XfIZ14TkadEZB7wuIi0EpFPRWSFiCwSkRNLve9ld9//3HpMJWs8/F1EvnHjvNE99ESgn9vmHe7ZwcduEcObgDtK4hGRcSIy2j1eioh87R7rvZK6+iIyX0QeF5Flbvv93P0d3H2r3c+0Ls9fgglNlhRMsHoBuExEEg/Z/zzwuqp2BqYDzx7jOH8GUNVOwCXANHehlmHAz6qaoqqLyvhcG2Cgqv4VJ0HdpqrdgdHAi6Xe1xwYgFPC+2X32NfiVPTsAfQArheRFjjF6xa5bf6j5ACq+gvwMvCPI8TzOjDG/Z3X4NztWiJKVXsCfym1/ybgGVVNAVJxaiYZA9jlIxOkVHWviLyOs8hKTqmXTgbOc5+/ATxxjEP1BZ5zj/mDiPyK0+Efq4rs26pa5Fae7Q287ZTfASCm1PveUtViYIOIbAROBAYBnUXkAvc9iUBrIP8YbR7GTYpJqrrA3TUNeLvUW0qKH67ASVAAS4D73PUm3lXVDcfbrgldlhRMMHsap+7N1KO851h1XMoqpe6NbPcxAtjjfuv2pn1127xNVT87KBCRtHLGcjR57mMR7v93Vf23iCzFOXv5TESuU9W5PmjbBCG7fGSClqruAt7CuRxT4iv+WJLxMmDxMQ6z0H0fItIGaAr8eBwx7AU2iciF7jFERLqUesuFIhIhIq1wFsH5EacY481uOXNEpI27GM4+IOEITZX5mqpmAbtLjXtcASw49H2liUhLYKOqPotTabOzd7+tCQeWFEywexKoU2r7duBqEfkOp4McBZ4pnDeV8fkXgUgRWQPMAK5S1bwy3nc0lwHXisi3wFoOXt71R5xO+r/ATaqai7M86DpgpTgLsf8T51v8d0ChiHxbxjTYj4Bzyxr4ximb/Hf3d04Bxh8j3hHA927F3BNxxiSMAaxKqjE+IyKvAR+r6kx/x2KMt+xMwRhjjIedKRhjjPGwMwVjjDEelhSMMcZ4WFIwxhjjYUnBGGOMhyUFY4wxHv8P/ozgIXOy7MUAAAAASUVORK5CYII=\n",
634 |       "text/plain": [
635 |        "<Figure size 432x288 with 1 Axes>"
636 |       ]
637 |      },
638 |      "metadata": {
639 |       "needs_background": "light"
640 |      },
641 |      "output_type": "display_data"
642 |     }
643 |    ],
644 |    "source": [
645 |     "plt.plot(speed_benchmark['repetitions'], speed_benchmark['tf_time_elapsed'], label='Tensorflow Hub')\n",
646 |     "plt.plot(speed_benchmark['repetitions'], speed_benchmark['st_time_elapsed'], label='Sentence Transformer')\n",
647 |     "\n",
648 |     "plt.xlabel('No. of repetitions')\n",
649 |     "plt.ylabel('Time elapsed in seconds')\n",
650 |     "plt.legend(loc=\"upper left\")\n",
651 |     "\n",
652 |     "plt.show()"
653 |    ]
654 |   },
655 |   {
656 |    "cell_type": "code",
657 |    "execution_count": 10,
658 |    "metadata": {
659 |     "pycharm": {
660 |      "name": "#%%\n"
661 |     }
662 |    },
663 |    "outputs": [
664 |     {
665 |      "data": {
666 |       "text/plain": "<IPython.core.display.HTML object>",
667 |       "text/html": "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">make scatter plot of \n<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n    time_elapsed\n    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">COLNAME</span>\n</mark>\n and \n<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n    height_from_ground_level\n    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">COLNAME</span>\n</mark>\n from \n<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n    df\n    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">VARNAME</span>\n</mark>\n</div></span>"
668 |      },
669 |      "metadata": {},
670 |      "output_type": "display_data"
671 |     }
672 |    ],
673 |    "source": [
674 |     "import spacy\n",
675 |     "nlp = spacy.load(\"training/model-best\")\n",
676 |     "doc = nlp('make scatter plot of time_elapsed and height_from_ground_level from df')\n",
677 |     "spacy.displacy.render(doc, style=\"ent\", jupyter=True)"
678 |    ]
679 |   }
680 |  ],
681 |  "metadata": {
682 |   "kernelspec": {
683 |    "display_name": "Python 3 (ipykernel)",
684 |    "language": "python",
685 |    "name": "python3"
686 |   },
687 |   "language_info": {
688 |    "codemirror_mode": {
689 |     "name": "ipython",
690 |     "version": 3
691 |    },
692 |    "file_extension": ".py",
693 |    "mimetype": "text/x-python",
694 |    "name": "python",
695 |    "nbconvert_exporter": "python",
696 |    "pygments_lexer": "ipython3",
697 |    "version": "3.9.12"
698 |   }
699 |  },
700 |  "nbformat": 4,
701 |  "nbformat_minor": 1
702 | }


--------------------------------------------------------------------------------
/scripts/generate_training_data.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import string
  3 | import pickle
  4 | import random
  5 | from enum import Enum
  6 | from random import randint
  7 | 
  8 | import fire
  9 | import pandas as pd
 10 | from tqdm import tqdm
 11 | 
 12 | 
 13 | class Entities(Enum):
 14 |     VARNAME = "VARNAME"
 15 |     COLNAME = "COLNAME"
 16 |     FNAME = "FNAME"
 17 |     LIBNAME = "LIBNAME"
 18 |     CARDINAL = "CARDINAL"
 19 |     FUNCTION = "FUNCTION"
 20 | 
 21 | 
 22 | class TrainDataGenerator:
 23 | 
 24 |     def __init__(self, mode):
 25 |         template_file = "../jupyter_text2code/jupyter_text2code_serverextension/data/ner_templates.csv"
 26 |         self.templates_df = pd.read_csv(template_file)
 27 |         self.mode = mode  # intent or ner
 28 | 
 29 |         self.num_templates = self.templates_df.shape[0]
 30 |         print("*" * 10)
 31 |         print(self.num_templates, " templates loaded")
 32 |         print("*" * 10)
 33 | 
 34 |     def _get_entity_type(self, entity_str):
 35 |         for entity in Entities:
 36 |             if entity.value.lower() in entity_str:
 37 |                 return entity.value
 38 |         print("Cannot find entity in db", entity_str)
 39 |         return None
 40 | 
 41 |     def _get_replacement_word(self, entity_type, debug):
 42 |         if entity_type == "VARNAME":
 43 |             choices = ["mydf", "df", "zzz", "tempdf"]
 44 |             return random.choice(choices)
 45 |         elif entity_type == "FUNCTION":
 46 |             choices = ["average", "sum", "min", "max", "maximum", "minimum", "mean", "avg", "count"]
 47 |             return random.choice(choices)
 48 |         elif entity_type == "COLNAME":
 49 |             #             choices = ["age", "temperature", "humidity"]
 50 |             #             return random.choice(choices)
 51 |             # Generate random columns
 52 |             col_len = randint(3, 20)
 53 |             cols = ''.join(random.choices(string.ascii_lowercase, k=col_len))
 54 |             if randint(1, 10) < 4:
 55 |                 replace = randint(1, len(cols) - 1)
 56 |                 cols = cols[:replace] + "_" + cols[replace:]
 57 |             return cols
 58 | 
 59 |         elif entity_type == "FNAME":
 60 |             choices = ["train.csv", "train.json", "test.csv", "validation.csv", "data.csv", "data.xls"]
 61 |             return random.choice(choices)
 62 |         elif entity_type == "LIBNAME":
 63 |             choices = ["spacy", "matplotlib", "pandas", "numpy", "seaborn", "plotly", "tensorflow", "torch",
 64 |                        "transformers"]
 65 |             return random.choice(choices)
 66 |         elif entity_type == "CARDINAL":
 67 |             return str(randint(1, 100))
 68 | 
 69 |     def _replace_var(self, template, entity_dict, intent_id, debug):
 70 |         sign_idx = template.find("$")
 71 |         if sign_idx == -1:
 72 |             print("Error: No symbol $ found to replace")
 73 | 
 74 |         start_idx = sign_idx
 75 |         entity_str = template.split("$")[1].split()[0]
 76 |         entity_type = self._get_entity_type(entity_str)
 77 | 
 78 |         # Allow multiple column syntaxes for group by 
 79 |         if intent_id not in [12] or entity_type in ["VARNAME", "FNAME", "LIBNAME", "CARDINAL"]:
 80 |             replacement_word = self._get_replacement_word(entity_type, debug)
 81 |             end_idx = start_idx + len(replacement_word)
 82 |             entity_dict["entities"].append((start_idx, end_idx, entity_type))
 83 | 
 84 |             template = template[:start_idx] + replacement_word + template[start_idx + len(entity_str) + 1:]
 85 |             return template, entity_dict
 86 | 
 87 |         n = randint(1, 9)
 88 |         if n < 5:
 89 |             # One word
 90 |             n = 1
 91 |         elif n < 8:
 92 |             # Two words
 93 |             n = 2
 94 |         elif n < 10:
 95 |             # 3-5 words
 96 |             n = randint(3, 5)
 97 | 
 98 |         replacement_word_all = ""
 99 |         for i in range(n):
100 |             replacement_word = self._get_replacement_word(entity_type, debug)
101 |             end_idx = start_idx + len(replacement_word)
102 |             entity_dict["entities"].append((start_idx, end_idx, entity_type))
103 |             start_idx = end_idx
104 |             replacement_word_all += replacement_word
105 |             if i != n - 1:
106 |                 zzz = randint(1, 2)
107 |                 if zzz == 1:
108 |                     replacement_word_all += ","
109 |                     start_idx += 1
110 |                 elif zzz == 2:
111 |                     replacement_word_all += ", "
112 |                     start_idx += 2
113 | 
114 |         template = template[:sign_idx] + replacement_word_all + template[sign_idx + len(entity_str) + 1:]
115 | 
116 |         if debug:
117 |             print("Modified template=>", template)
118 |         return template, entity_dict
119 | 
120 |     def generate_training_row(self, intent_id=None, debug=False):
121 |         if intent_id:
122 |             try:
123 |                 template = self.templates_df[self.templates_df["intent_id"] == intent_id].sample(1)["template"].values[
124 |                     0]
125 |             except:
126 |                 print("Intent id ", intent_id, " not found")
127 |         else:
128 |             tmp = self.templates_df.sample(1)
129 |             template = tmp["template"].values[0]
130 |             intent_id = tmp["intent_id"].values[0]
131 |         if debug:
132 |             print("Template=>", template)
133 | 
134 |         entity_dict = {"entities": []}
135 |         while True:
136 |             if template.find("$") == -1:
137 |                 break
138 |             template, entity_dict = self._replace_var(template, entity_dict, intent_id, debug=debug)
139 |         if debug:
140 |             print("Generated text=> ", template)
141 |             print("Entities=>", entity_dict)
142 | 
143 |         if self.mode.lower() == "ner":
144 |             return template, entity_dict
145 |         else:
146 |             return {"intent_id": intent_id, "intent": template}
147 | 
148 |     def generate_training_rows(self, n_rows=10, debug=False):
149 |         rows = []
150 |         for _ in tqdm(range(n_rows)):
151 |             rows.append(self.generate_training_row(debug=debug))
152 |         return rows
153 | 
154 | 
155 | def ner_data(n_rows=1000):
156 |     tdg = TrainDataGenerator(mode="ner")
157 |     rows = tdg.generate_training_rows(n_rows=n_rows)
158 |     with open('assets/train.json', 'w', encoding='utf-8') as f:
159 |         json.dump(rows, f, ensure_ascii=False, indent=4)
160 |     # pickle_out = open("ner_train_data.pickle", "wb")
161 |     # pickle.dump(rows, pickle_out)
162 |     # pickle_out.close()
163 |     print("Generated ner data")
164 | 
165 | 
166 | def intent_data(n_rows=1000):
167 |     tdg = TrainDataGenerator(mode="intent")
168 |     rows = tdg.generate_training_rows(n_rows=n_rows)
169 |     df_intent = pd.DataFrame(rows)
170 |     df_intent.to_csv("../jupyter_text2code/jupyter_text2code_serverextension/data/generated_intents.csv", index=False)
171 |     print("Generated intent data")
172 | 
173 | 
174 | def main(generate_ner_data="yes", generate_intent_data="yes", n_rows=1000):
175 |     if generate_ner_data.lower() == "yes":
176 |         ner_data(n_rows=n_rows)
177 |     if generate_intent_data.lower() == "yes":
178 |         intent_data(n_rows=n_rows)
179 | 
180 | 
181 | if __name__ == '__main__':
182 |     fire.Fire(main)
183 | 


--------------------------------------------------------------------------------
/scripts/process_awesome_notebooks.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import time
  4 | import json
  5 | from pathlib import Path
  6 | 
  7 | import fire
  8 | import faiss
  9 | import numpy as np
 10 | import pandas as pd
 11 | import tensorflow_hub as hub
 12 | from sentence_transformers import SentenceTransformer
 13 | 
 14 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
 15 | 
 16 | 
 17 | class NaasProcessor(object):
 18 |     """code entry class"""
 19 | 
 20 |     exclude = ['.github']
 21 |     rootdir = 'input/awesome-notebooks-master'
 22 |     model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
 23 |     embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
 24 | 
 25 |     def _get_categories(self, root, exclude):
 26 |         for _, dirs, _ in os.walk(root, topdown=True):
 27 |             return [d for d in dirs if d not in exclude]
 28 | 
 29 |     def _get_files(self, path):
 30 |         for _, _, files in os.walk(path, topdown=True):
 31 |             return [os.path.join(path, file) for file in files if file.endswith('.ipynb')]
 32 | 
 33 |     def _make_tasks_json(self, intent_id=10000):
 34 |         tasks = []
 35 |         for category in self._get_categories(self.rootdir, self.exclude):
 36 |             for file in self._get_files(os.path.join(self.rootdir, category)):
 37 |                 task = {'category': category}
 38 |                 with open(file, 'r') as handle:
 39 |                     data = json.load(handle)
 40 |                     task['intent_id'] = intent_id
 41 |                     task['task'] = data['cells'][1]['source'][0][2:-1]
 42 |                     task['st_embedding'] = self._get_embedding(task['task'], 'st')
 43 |                     task['tf_embedding'] = self._get_embedding(task['task'], 'tf')
 44 |                     task['code'] = "\n".join(["".join(cell['source']) for cell in data['cells'] if cell['cell_type'] == 'code'])
 45 |                 intent_id += 1
 46 |                 tasks.append(task)
 47 |         return tasks
 48 | 
 49 |     def create_intent_df_file(self):
 50 |         tasks = pd.DataFrame(self._make_tasks_json())
 51 |         tasks = tasks.set_index('intent_id')
 52 |         tasks.to_csv('data/awesome-notebooks.csv')
 53 |         tasks.to_pickle('data/awesome-notebooks.pkl')
 54 | 
 55 |     def _get_embedding(self, command, encoder):
 56 |         command = re.sub('[^A-Za-z0-9 ]+', '', command).lower()
 57 |         if encoder == 'tf':
 58 |             return list(np.array(self.embed([command])[0]))
 59 |         elif encoder == 'st':
 60 |             return list(np.array(self.model.encode([command])[0]))
 61 | 
 62 |     def create_naas_faiss_index(self):
 63 |         intent_df = pd.read_pickle('data/awesome-notebooks.pkl').reset_index()
 64 |         db_ids = intent_df["intent_id"].values
 65 | 
 66 |         for prefix, dimension in zip(['tf', 'st'], [512, 384]):
 67 |             db_vectors = np.stack(intent_df[f"{prefix}_embedding"].values).astype(np.float32)
 68 |             faiss.normalize_L2(db_vectors)
 69 |             intent_index = faiss.IndexIDMap(faiss.IndexFlatIP(dimension))
 70 |             intent_index.add_with_ids(db_vectors, db_ids)
 71 |             faiss.write_index(intent_index, f"data/{prefix}_naas_intent_index.idx")
 72 | 
 73 |     def get_intent(self, query, prefix, tasks, k_nearest=1):
 74 |         index = faiss.read_index(f"data/{prefix}_intent_index.idx")
 75 |         query_vector = np.array([self._get_embedding(query, prefix)]).astype(np.float32)
 76 |         faiss.normalize_L2(query_vector)
 77 |         similarities, similarities_ids = index.search(query_vector, k_nearest)
 78 |         return similarities_ids[0][0], tasks['task'][similarities_ids[0][0]]
 79 | 
 80 |     def eval_models(self):
 81 |         for prefix in ['tf', 'st']:
 82 |             tasks = pd.read_pickle('data/awesome-notebooks.pkl')
 83 |             tasks = tasks.set_index('intent_id')
 84 |             tasks.drop([i + '_embedding' for i in ['tf', 'st']] + ['code'], axis=1, inplace=True)
 85 | 
 86 |             tasks[[f"{prefix}_matched_intent_id", f"{prefix}_matched_intent_text"]] = tasks['task'].apply(lambda x: pd.Series(self.get_intent(x, prefix, tasks)))
 87 |             tasks[f"{prefix}_is_intent_matched"] = tasks[f"{prefix}_matched_intent_id"] == tasks["intent_id"]
 88 | 
 89 |             tasks[[f"{prefix}_matched_intent_id_shuffled", f"{prefix}_matched_intent_text_shuffled"]] = tasks['task'].apply(lambda x: pd.Series(self.get_intent(self._shuffle_word(x), prefix, tasks)))
 90 |             tasks[f"{prefix}_is_intent_matched_shuffled"] = tasks[f"{prefix}_matched_intent_id_shuffled"] == tasks["intent_id"]
 91 | 
 92 |             Path("output").mkdir(parents=True, exist_ok=True)
 93 |             tasks.to_csv(f'output/{prefix}_eval_df.csv', index=False)
 94 | 
 95 |     def speed_benchmark(self, prefix, repetitions):
 96 |         tasks = pd.read_pickle('data/awesome-notebooks.pkl')
 97 |         tasks = tasks.set_index('intent_id')
 98 |         tasks.drop([i + '_embedding' for i in ['tf', 'st']] + ['code'], axis=1, inplace=True)
 99 | 
100 |         start = time.time()
101 | 
102 |         for i in range(repetitions):
103 |             tasks[[f"{prefix}_matched_intent_id", f"{prefix}_matched_intent_text"]] = tasks['task'].apply(lambda x: pd.Series(self.get_intent(x, prefix, tasks)))
104 |             tasks[f"{prefix}_is_intent_matched"] = tasks[f"{prefix}_matched_intent_id"] == tasks["intent_id"]
105 | 
106 |             tasks[[f"{prefix}_matched_intent_id_shuffled", f"{prefix}_matched_intent_text_shuffled"]] = tasks['task'].apply(lambda x: pd.Series(self.get_intent(self._shuffle_word(x), prefix, tasks)))
107 |             tasks[f"{prefix}_is_intent_matched_shuffled"] = tasks[f"{prefix}_matched_intent_id_shuffled"] == tasks[ "intent_id"]
108 | 
109 |         end = time.time()
110 |         return end - start
111 | 
112 |     def get_benchmark_data(self, repetitions):
113 |         data = pd.DataFrame([[i] for i in range(repetitions + 1)], columns=['repetitions'])
114 |         for prefix in ['tf', 'st']:
115 |             data[f'{prefix}_time_elapsed'] = data['repetitions'].apply(lambda x: self.speed_benchmark(prefix, x))
116 |         Path("output").mkdir(parents=True, exist_ok=True)
117 |         data.to_csv('output/speed_benchmarks.csv', index=False)
118 | 
119 |     def _shuffle_word(self, sentence):
120 |         sub = sentence.split(' - ', maxsplit=1)
121 |         return "".join([sub[1], ' - ', sub[0]])
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     fire.Fire(NaasProcessor)
126 | 


--------------------------------------------------------------------------------
/scripts/train_spacy_v3_ner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import fire
 4 | import spacy
 5 | import srsly
 6 | from spacy.tokens import DocBin
 7 | 
 8 | 
 9 | class SpaCy3NERTrainer:
10 | 
11 |     @staticmethod
12 |     def convert(input_path, output_path, lang='en'):
13 |         nlp = spacy.blank(lang)
14 |         db = DocBin()
15 |         for text, annot in srsly.read_json(input_path):
16 |             doc = nlp.make_doc(text)
17 |             ents = []
18 |             for start, end, label in annot["entities"]:
19 |                 span = doc.char_span(start, end, label=label)
20 |                 if span is None:
21 |                     print("Skipping entity")
22 |                 else:
23 |                     ents.append(span)
24 |             doc.ents = ents
25 |             db.add(doc)
26 |         db.to_disk(output_path)
27 | 
28 |     @staticmethod
29 |     def create_default_config_file(lang='en', pipeline='ner', output='config.cfg', optimize='accuracy'):
30 |         os.system(f'python -m spacy init config --lang {lang} --pipeline {pipeline} --optimize {optimize} {output}')
31 | 
32 |     @staticmethod
33 |     def train_model(config='config.cfg', output='training/', train='corpus/train.spacy', dev='corpus/dev.spacy', vectors='sm'):
34 |         os.system(f'python -m spacy download en_core_web_{vectors}')
35 |         os.system(f'python -m spacy train {config} --output {output} --paths.train {train} --paths.dev {dev}')
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     fire.Fire(SpaCy3NERTrainer)
40 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from glob import glob
 3 | 
 4 | import setuptools
 5 | 
 6 | MODE = os.environ.get("JUPYTER_TEXT2CODE_MODE")
 7 | INSTALL_LIBS = [
 8 |     "numpy",
 9 |     "jupyter",
10 |     "jupyter_contrib_nbextensions",
11 |     "pandas",
12 |     "spacy==3.3.1",
13 |     "sentence_transformers",
14 |     "absl-py",
15 |     "plotly",
16 |     "matplotlib",
17 | ]
18 | 
19 | if MODE and MODE.lower() == "cpu":
20 |     INSTALL_LIBS.append("faiss-cpu")
21 | else:
22 |     INSTALL_LIBS.append("faiss-gpu")
23 | 
24 | 
25 | def get_serverextension_files():
26 |     data_files = []
27 |     for f in glob(
28 |         "jupyter_text2code/jupyter_text2code_serverextension/**", recursive=True
29 |     ):
30 |         if os.path.isfile(f):
31 |             frags = f.split("/")[:-1]
32 |             frags[0] = "jupyter-text2code"
33 |             relative_common_path = "/".join(frags)
34 |             data_files.append(
35 |                 (os.path.join("share/jupyter/nbextensions/", relative_common_path), [f])
36 |             )
37 |     return data_files
38 | 
39 | 
40 | data_files = [
41 |     (
42 |         "share/jupyter/nbextensions/jupyter-text2code",
43 |         [
44 |             "jupyter_text2code/__init__.py",
45 |             "jupyter_text2code/jupyter_text2code.yaml",
46 |             "jupyter_text2code/main.js",
47 |             "jupyter_text2code/jupyter_text2code.css",
48 |             "jupyter_text2code/jupyter_text2code_lib.py",
49 |         ],
50 |     ),
51 |     (
52 |         "etc/jupyter/jupyter_notebook_config.d",
53 |         ["jupyter_text2code/etc/jupyter-text2code-extension.json"],
54 |     ),
55 | ]
56 | 
57 | data_files.extend(get_serverextension_files())
58 | 
59 | setuptools.setup(
60 |     name="jupyter-text2code",
61 |     version="0.0.2",
62 |     url="https://github.com/deepklarity/jupyter-text2code",
63 |     author="Deepak Rawat and Kartik Godawat",
64 |     license="MIT License",
65 |     description="Jupyter server extension to assist with data science EDA",
66 |     packages=setuptools.find_packages(),
67 |     install_requires=INSTALL_LIBS,
68 |     python_requires=">=3.7",
69 |     classifiers=[
70 |         "Framework :: Jupyter",
71 |     ],
72 |     data_files=data_files,
73 |     include_package_data=True,
74 |     zip_safe=False,
75 | )
76 | 


--------------------------------------------------------------------------------