├── .gitignore
├── LICENSE
├── README.md
├── dockerfiles
├── Dockerfile.common
├── Dockerfile.cpu
├── Dockerfile.gpu
└── README.md
├── jupyter-text2code-demo.gif
├── jupyter_text2code
├── __init__.py
├── etc
│ └── jupyter-text2code-extension.json
├── jupyter_text2code.css
├── jupyter_text2code.yaml
├── jupyter_text2code_lib.py
├── jupyter_text2code_serverextension
│ ├── __init__.py
│ ├── data
│ │ ├── .gitkeep
│ │ ├── awesome-notebooks.csv
│ │ ├── generated_intents.csv
│ │ ├── intent_lookup.csv
│ │ └── ner_templates.csv
│ └── models
│ │ ├── .gitkeep
│ │ ├── intent_index.idx
│ │ ├── model-best
│ │ ├── config.cfg
│ │ ├── meta.json
│ │ ├── ner
│ │ │ ├── cfg
│ │ │ ├── model
│ │ │ └── moves
│ │ ├── tok2vec
│ │ │ ├── cfg
│ │ │ └── model
│ │ ├── tokenizer
│ │ └── vocab
│ │ │ ├── key2row
│ │ │ ├── lookups.bin
│ │ │ ├── strings.json
│ │ │ ├── vectors
│ │ │ └── vectors.cfg
│ │ └── model-last
│ │ ├── config.cfg
│ │ ├── meta.json
│ │ ├── ner
│ │ ├── cfg
│ │ ├── model
│ │ └── moves
│ │ ├── tok2vec
│ │ ├── cfg
│ │ └── model
│ │ ├── tokenizer
│ │ └── vocab
│ │ ├── key2row
│ │ ├── lookups.bin
│ │ ├── strings.json
│ │ ├── vectors
│ │ └── vectors.cfg
└── main.js
├── notebooks
├── Code Generator.ipynb
├── Episodes.csv
├── Generate Training data NER.ipynb
└── ctds.ipynb
├── scripts
├── README.md
├── config.cfg
├── create_intent_index.py
├── create_lookup_file.py
├── data
│ ├── awesome-notebooks.csv
│ ├── awesome-notebooks.pkl
│ ├── st_naas_intent_index.idx
│ └── tf_naas_intent_index.idx
├── eval_models_performance.ipynb
├── generate_training_data.py
├── process_awesome_notebooks.py
└── train_spacy_v3_ner.py
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything
2 | *.pyc
3 | .ipynb_checkpoints/
4 |
5 | # whitelist
6 | # ---------
7 |
8 | !.gitignore
9 |
10 |
11 | # Ignore following
12 | .idea
13 | jupyter_text2code.egg-info/
14 | .pickle
15 |
16 | ner_train_data.pickle
17 |
18 | build/
19 | dist/
20 | venv
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Deepklarity Technologies Pvt. Ltd.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Text2Code for Jupyter notebook
2 | ### A proof-of-concept jupyter extension which converts english queries into relevant python code.
3 |
4 |
5 | 
6 |
7 | ### Blog post with more details:
8 | #### [Data analysis made easy: Text2Code for Jupyter notebook](https://towardsdatascience.com/data-analysis-made-easy-text2code-for-jupyter-notebook-5380e89bb493?source=friends_link&sk=2c46fff2c31f7fe59b667350e4596b18)
9 |
10 | ### Demo Video:
11 | #### [Text2Code for Jupyter notebook](https://www.youtube.com/watch?v=3gZ7_9W-TJs)
12 |
13 | ## Supported Operating Systems:
14 | - Ubuntu
15 | - macOS
16 |
17 | ## Installation
18 |
19 | ### NOTE: We have renamed the plugin from mopp to jupyter-text2code. Uninstall mopp before installing new jupyter-text2code version.
20 | ```
21 | pip uninstall mopp
22 | ```
23 |
24 | #### CPU-only install:
25 | For Mac and other Ubuntu installations not having a nvidia GPU, we need to explicitly set an environment variable at time of install.
26 | ```
27 | export JUPYTER_TEXT2CODE_MODE="cpu"
28 |
29 | ```
30 |
31 | #### GPU install dependencies:
32 | ```
33 | sudo apt-get install libopenblas-dev libomp-dev
34 | ```
35 |
36 | #### Installation commands:
37 |
38 | ```
39 | git clone https://github.com/deepklarity/jupyter-text2code.git
40 | cd jupyter-text2code
41 | pip install .
42 | jupyter nbextension enable jupyter-text2code/main
43 |
44 | ```
45 |
46 | ## Uninstallation:
47 | ```
48 | pip uninstall jupyter-text2code
49 | ```
50 |
51 | ## Usage Instructions:
52 |
53 | - Start Jupyter notebook server by running the following command: ``` jupyter notebook ```
54 | - If you don't see ``` Nbextensions``` tab in Jupyter notebook run the following command:``` jupyter contrib nbextension install --user ```
55 | - You can open the sample ``` notebooks/ctds.ipynb``` notebook for testing
56 | - If installation happened successfully, then for the first time, Universal Sentence Encoder model will be downloaded from `tensorflow_hub`
57 | - Click on the `Terminal` Icon which appears on the menu (to activate the extension)
58 | - Type "help" to see a list of currently supported commands in the repo
59 | - Watch [Demo video](https://www.youtube.com/watch?v=3gZ7_9W-TJs) for some examples
60 |
61 | ## Docker containers for jupyter-text2code (old version)
62 |
63 | We have published CPU and GPU images to docker hub with all dependencies pre-installed.
64 | ##### Visit https://hub.docker.com/r/deepklarity/jupyter-text2code/ to download the images and usage instructions.
65 |
66 | ##### CPU image size: ``` 1.51 GB ```
67 | ##### GPU image size: ``` 2.56 GB ```
68 |
69 | ## Model training:
70 | The plugin now supports pandas commands + quick snippet insertion of available snippets from [awesome-notebooks](https://github.com/jupyter-naas/awesome-notebooks). With this change, we can now get snippets for most popular integrations from within the jupyter tab. eg:
71 | - Get followers count from twitter
72 | - Get stats about a story from instagram
73 | The detailed training steps are available in [scripts README](scripts/README.md) where we also evaluated performance of different models and ended up selecting SentenceTransformers `paraphrase-MiniLM-L6-v2`
74 |
75 |
76 | ### Steps to add more intents:
77 | - Add more templates in `ner_templates` with a new intent_id
78 | - Generate training data. Modify `generate_training_data.py` if different generation techniques are needed or if introducing a new entity.
79 | - Train intent index
80 | - Train NER model
81 | - modify `jupyter_text2code/jupyter_text2code_serverextension/__init__.py` with new intent's condition and add actual code for the intent
82 | - Reinstall plugin by running: `pip install .`
83 |
84 | ### TODO:
85 | - [] Add Ollama support to work with local LLMs
86 | - [x] Publish Docker image
87 | - [X] Refactor code and make it mode modular, remove duplicate code, etc
88 | - [X] Add support for more commands
89 | - [X] Improve intent detection and NER
90 | - [ ] Add support for Windows
91 | - [ ] Explore sentence Paraphrasing to generate higher-quality training data
92 | - [ ] Gather real-world variable names, library names as opposed to randomly generating them
93 | - [ ] Try NER with a transformer-based model
94 | - [ ] With enough data, train a language model to directly do English->code like GPT-3 does, instead of having separate stages in the pipeline
95 | - [ ] Create a survey to collect linguistic data
96 | - [ ] Add Speech2Code support
97 |
98 | #### Authored By:
99 |
100 | - [Deepak Rawat](https://twitter.com/dsr_ai)
101 | - [Kartik Godawat](https://twitter.com/kartik_godawat)
102 | - [Abdullah Meda](https://www.linkedin.com/in/abdmeda/)
103 |
--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.common:
--------------------------------------------------------------------------------
1 | ARG WORKDIR=/home/deepklarity/jupyter-text2code
2 |
3 | WORKDIR $WORKDIR
4 |
5 | ENV PYTHONUNBUFFERED=1 TFHUB_CACHE_DIR=/root/.cache/tfhub_modules
6 |
7 | RUN --mount=type=cache,mode=0777,target=/var/cache/apt --mount=type=cache,mode=0777,target=/var/lib/apt \
8 | --mount=type=cache,mode=0777,target=/root/.cache/pip \
9 | apt-get -y update && DEBIAN_FRONTEND=noninteractive apt-get install -y git \
10 | && git clone https://github.com/deepklarity/jupyter-text2code.git $WORKDIR \
11 | && apt-get purge --auto-remove -y git \
12 | && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
13 | && python -m pip --no-cache-dir install -U --force-reinstall pip && pip --no-cache-dir install -U . \
14 | && jupyter contrib nbextension install --user && jupyter nbextension enable jupyter-text2code/main \
15 | && python -c 'import tensorflow_hub as hub; hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")'
16 |
17 | CMD jupyter notebook --ip 0.0.0.0 --port 8888 --no-browser --allow-root
18 |
--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.cpu:
--------------------------------------------------------------------------------
1 | # syntax = edrevo/dockerfile-plus
2 | FROM python:3.7-slim
3 |
4 | ENV JUPYTER_TEXT2CODE_MODE="cpu"
5 |
6 | INCLUDE+ dockerfiles/Dockerfile.common
7 |
--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.gpu:
--------------------------------------------------------------------------------
1 | # syntax = edrevo/dockerfile-plus
2 | FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
3 |
4 | RUN --mount=type=cache,mode=0777,target=/var/cache/apt --mount=type=cache,target=/var/lib/apt \
5 | apt-get -y update && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common libopenblas-dev libomp-dev \
6 | && add-apt-repository ppa:deadsnakes/ppa && apt-get -y update && apt-get -y install python3.7 python3-pip \
7 | && update-alternatives --install /usr/bin/python python /usr/bin/python3 10
8 |
9 | INCLUDE+ dockerfiles/Dockerfile.common
10 |
--------------------------------------------------------------------------------
/dockerfiles/README.md:
--------------------------------------------------------------------------------
1 | # About jupyter-text2code
2 |
3 | ### A proof-of-concept jupyter extension which converts english queries into relevant python code.
4 |
5 | #### Github Repository: https://github.com/deepklarity/jupyter-text2code
6 |
7 | ### Blog post with more details:
8 | #### [Data analysis made easy: Text2Code for Jupyter notebook](https://towardsdatascience.com/data-analysis-made-easy-text2code-for-jupyter-notebook-5380e89bb493?source=friends_link&sk=2c46fff2c31f7fe59b667350e4596b18)
9 |
10 | ### Demo Video:
11 | #### [Text2Code for Jupyter notebook](https://www.youtube.com/watch?v=3gZ7_9W-TJs)
12 |
13 | # How to Use the Images
14 |
15 | ### Install docker from: https://docs.docker.com/engine/install/
16 |
17 | ### CPU image:
18 |
19 | 1. Pull the docker image
20 | ```
21 | docker pull deepklarity/jupyter-text2code:latest
22 | ```
23 | 2. Run the Docker image
24 | ```
25 | docker run -it -p 8888:8888 deepklarity/jupyter-text2code:latest
26 | ```
27 |
28 | ### GPU image:
29 | 1. Pull the docker image
30 | ```
31 | docker pull docker pull deepklarity/jupyter-text2code:latest-gpu
32 | ```
33 | 2. Run the Docker image
34 | ```
35 | docker run -it --gpus all -p 8888:8888 deepklarity/jupyter-text2code:latest-gpu
36 | ```
37 |
38 | ### Open Jupyter Notebook:
39 |
40 | #### Once the container is running, you will see a URL with a token in the terminal/console. Open that URL in your browser.
41 |
42 | Example url: ``` http://127.0.0.1:8888/?token=48c6ea28c1cbce210c008f1ef8dab8fa91ad77420922e259 ```
43 |
44 | ### Usage Instructions:
45 |
46 | - You can open the sample ``` notebooks/ctds.ipynb``` notebook for testing
47 | - Click on the `Terminal` Icon which appears on the menu (to activate the extension)
48 | - Type "help" to see a list of currently supported commands in the repo
49 | - Watch [Demo video](https://www.youtube.com/watch?v=3gZ7_9W-TJs) for some examples
50 |
--------------------------------------------------------------------------------
/jupyter-text2code-demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter-text2code-demo.gif
--------------------------------------------------------------------------------
/jupyter_text2code/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/__init__.py
--------------------------------------------------------------------------------
/jupyter_text2code/etc/jupyter-text2code-extension.json:
--------------------------------------------------------------------------------
1 | {
2 | "NotebookApp": {
3 | "load_extensions": {
4 | "jupyter-text2code/main": true
5 | },
6 | "nbserver_extensions": {
7 | "jupyter_text2code.jupyter_text2code_serverextension": true
8 | }
9 | }
10 | }
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code.css:
--------------------------------------------------------------------------------
1 | .notebook_app {
2 | /*background:red !important;*/
3 | }
4 |
5 | #jupyter_text2code_editor {
6 | position: fixed;
7 | bottom: 0;
8 | width: 61%;
9 | z-index: 999;
10 | background: lightblue;
11 | padding: 10px 35px;
12 | margin-left: -14px;
13 | }
14 |
15 | #jupyter_text2code_editor_history {
16 | position: fixed;
17 | top: 150px;
18 | right: 10px;
19 | background: lightblue;
20 | padding: 5px;
21 | width: 15%;
22 | }
23 |
24 | #jupyter_text2code_preset_wrapper {
25 | max-height: 250px;
26 | overflow-y: scroll;
27 | }
28 |
29 | #jupyter_text2code_history_wrapper {
30 | max-height: 250px;
31 | overflow-y: scroll;
32 | }
33 |
34 | #jupyter_text2code_query {
35 | height: 50px;
36 | width: 100%;
37 | font-size: 20px;
38 | }
39 | #jupyter_text2code_history {
40 | padding: 3px;
41 | }
42 |
43 | .jupyter_text2code_history_item {
44 | background: lightcyan;
45 | margin: 10px 0px;
46 | padding: 5px;
47 | padding-left: 10px;
48 | cursor: pointer;
49 | }
50 |
51 | .jupyter_text2code_preset_item {
52 | background: lightcyan;
53 | margin: 10px 0px;
54 | padding: 5px;
55 | padding-left: 10px;
56 | cursor: pointer;
57 | }
58 |
59 | .jupyter_text2code_sub_heading {
60 | padding: 3px;
61 | font-size: 14px;
62 | font-weight: bold;
63 | }
64 |
65 | #jupyter_text2code_preset_content {
66 | padding: 3px;
67 | }
68 |
69 | .jupyter_text2code_what_heading {
70 | font-weight: bold;
71 | padding: 5px;
72 | }
73 |
74 | #jupyter_text2code_submit {
75 | margin: 10px;
76 | }
77 |
78 | .jupyter_text2code_spinner {
79 | font-size: 22px;
80 | margin-left: 10px;
81 | }
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code.yaml:
--------------------------------------------------------------------------------
1 | Type: IPython Notebook Extension
2 | Compatibility: 3.x, 4.x, 5.x, 6.x
3 | Main: main.js
4 | Name: jupyter-text2code
5 | Icon: icon.jpg
6 | Description: "jupyter-text2code plugin with pandas and plotly support"
7 |
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_lib.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from IPython import get_ipython
4 | from IPython.core.magics.namespace import NamespaceMagics
5 |
6 | _nms = NamespaceMagics()
7 | _Jupyter = get_ipython()
8 | _nms.shell = _Jupyter.kernel.shell
9 |
10 |
11 | def dataframes_info():
12 | values = _nms.who_ls()
13 | info = {v: (eval(v).columns.tolist()) for v in values if type(eval(v)).__name__ == 'DataFrame'}
14 | return json.dumps(info)
15 |
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import json
4 | from abc import ABC
5 | from itertools import groupby
6 |
7 | import faiss
8 | import spacy
9 | import numpy as np
10 | import pandas as pd
11 | from notebook.utils import url_path_join
12 | from notebook.base.handlers import IPythonHandler
13 | from sentence_transformers import SentenceTransformer
14 |
15 | home = os.path.dirname(__file__)
16 |
17 | SPACY_MODEL_DIR = os.path.join(home, "models/model-best")
18 | FAISS_INDEX_PATH = os.path.join(home, "models/intent_index.idx")
19 | INTENT_DF_PATH = os.path.join(home, "data/intent_lookup.csv")
20 | HELP_LIST = ['Import all libraries - Example Usage: import all libraries',
21 | 'Use plotly dark theme - Example Usage: use dark theme',
22 | 'Load file into a dataframe - Example Usage: Load train.csv in df',
23 | 'Show n rows of dataframe - Example Usage: Show 10 rows from df',
24 | 'Shape of dataframe - Example Usage: Show shape of df',
25 | 'Describe dataframe - Example Usage: Describe dataframe df',
26 | 'List columns of dataframe - Example Usage: Show columns from df',
27 | 'Correlation matrix of dataframe - Example Usage: Display corelation matrix of df',
28 | 'Histogram of column in dataframe - Example Usage: Plot histogram of category from df',
29 | 'Bar chart of columns from dataframe - Example Usage: Show bar chart of product and amount from df',
30 | 'Pie chart of column - Example Usage: Make pie chart of fruits from df',
31 | 'Group by aggregations of columns in dataframe - Example Usage: group df by country and show sum and mean of population',
32 | 'Line chart of columns in dataframe - Example Usage: Line chart of price and sale from df',
33 | 'Scatter plot of columns in dataframe - Example Usage: Show scatter plot of youtube_likes and episode_duration from df',
34 | 'Heatmap of columns in dataframe - Example Usage: from df make heat map of recording_time and youtube_views',
35 | 'List all files in current directory - Example Usage: List all files in current directory'
36 | ]
37 | HELP_TEXT = "\n".join([f"# {s}" for s in HELP_LIST])
38 |
39 |
40 | class CodeGenerator:
41 |
42 | def __init__(self):
43 | self.nlp = spacy.load(SPACY_MODEL_DIR)
44 | self.embedding_model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
45 | self.intent_index = faiss.read_index(FAISS_INDEX_PATH)
46 | self.intent_df = pd.read_csv(INTENT_DF_PATH)
47 | self.intent_df = self.intent_df.set_index('intent_id')
48 |
49 | def _get_embedding(self, command):
50 | command = re.sub('[^A-Za-z0-9 ]+', '', command).lower()
51 | return list(np.array(self.embedding_model.encode([command])[0]))
52 |
53 | def _get_intent(self, query, k_nearest=1):
54 | query_vector = np.array([self._get_embedding(query)]).astype(np.float32)
55 | faiss.normalize_L2(query_vector)
56 | similarities, similarities_ids = self.intent_index.search(query_vector, k_nearest)
57 | return similarities_ids[0][0], self.intent_df['code'][similarities_ids[0][0]]
58 |
59 | def generate_code(self, query, df_info_dict={}, debug=False):
60 | intent_id, intent_code = self._get_intent(query)
61 | if 0 <= intent_id < 10000: # Existing
62 | doc = self.nlp(query)
63 | entities = {key: list(g) for key, g in groupby(sorted(doc.ents, key=lambda x: x.label_), lambda x: x.label_)}
64 | for entity, labels in entities.items():
65 | intent_code = re.sub(fr'\${entity.lower()}', lambda _: next(iter(map(lambda x: x.text, labels))), intent_code)
66 | elif 10000 <= intent_id < 20000: # Naas
67 | print("Nothing yet")
68 |
69 | return re.sub(r'\$\w+', 'xxx', intent_code)
70 |
71 |
72 | print("*" * 20)
73 | print("*" * 20)
74 | print("Loading_jupyter_server_extension. First install will download SentenceTransformers, please wait...")
75 | print("*" * 20)
76 | print("*" * 20)
77 | CG = CodeGenerator()
78 |
79 |
80 | class JupyterText2CodeHandler(IPythonHandler, ABC):
81 | def __init__(self, application, request, **kwargs):
82 | super(JupyterText2CodeHandler, self).__init__(application, request, **kwargs)
83 |
84 | # TODO: Add logger
85 | def get(self):
86 | query = self.get_argument('query')
87 |
88 | try:
89 | status = "success"
90 | if query.lower() == 'help':
91 | command = HELP_TEXT
92 | else:
93 | df_info = self.get_argument('dataframes_info')
94 | df_info_dict = json.loads(df_info[1:-1])
95 | command = CG.generate_code(query, df_info_dict, debug=True)
96 |
97 | response = {"status": status, "message": command}
98 | except Exception as e:
99 | response = {"status": "error", "message": str(e)}
100 |
101 | response["message"] = f"#Query: {query}\n\n{response['message']}"
102 | self.finish(json.dumps(response))
103 |
104 |
105 | def load_jupyter_server_extension(nb_server_app):
106 | """
107 | Called when the extension is loaded.
108 |
109 | Args:
110 | nb_server_app (NotebookWebApplication): handle to the Notebook webserver instance.
111 | """
112 | web_app = nb_server_app.web_app
113 | host_pattern = '.*$'
114 | route_pattern = url_path_join(web_app.settings['base_url'], '/jupyter-text2code')
115 | web_app.add_handlers(host_pattern, [(route_pattern, JupyterText2CodeHandler)])
116 | print("loaded_jupyter_server_extension: jupyter-text2code")
117 |
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/data/.gitkeep
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/data/ner_templates.csv:
--------------------------------------------------------------------------------
1 | intent_id,template,code
2 | 0,import $libname,import $libname
3 | 1,import all libraries,"import pandas as pd
4 | import numpy as np
5 | import os
6 | import plotly.express as px
7 | import matplotlib.pyplot as plt
8 | pd.options.plotting.backend = 'plotly'"
9 | 2,load $fname,$varname = pd.read_csv('$fname')
10 | 2,load $fname in $varname,$varname = pd.read_csv('$fname')
11 | 3,show $cardinal rows from $varname,$varname.head($cardinal)
12 | 3,show $cardinal rows of $varname,$varname.head($cardinal)
13 | 3,print $cardinal rows from $varname,$varname.head($cardinal)
14 | 3,print $cardinal rows of $varname,$varname.head($cardinal)
15 | 3,print $varname head,$varname.head()
16 | 4,plot histogram of $colname column in $varname,$varname.plot.hist(x='$colname')
17 | 4,plot histogram of $colname in $varname,$varname.plot.hist(x='$colname')
18 | 4,draw histogram of $colname column in $varname,$varname.plot.hist(x='$colname')
19 | 4,get histogram of $colname in $varname,$varname.plot.hist(x='$colname')
20 | 5,get correlation matrix of $varname,$varname.corr()
21 | 6,print $varname shape,$varname.shape
22 | 6,print shape of $varname,$varname.shape
23 | 6,get size of $varname,$varname.shape
24 | 7,barplot $colname and $colname columns of $varname,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
25 | 7,plot $colname and $colname columns of $varname in a bar plot,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
26 | 7,bar plot $colname and $colname column of $varname,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
27 | 7,bar plot $colname and $colname in $varname,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
28 | 7,plot $colname and $colname of $varname in a bar plot,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
29 | 7,plot $colname $colname of $varname in a bar plot,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
30 | 7,show a bar plot with $colname on x axis over $colname in $varname,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
31 | 7,show a bar plot with $colname on x axis and $colname on y axis in $varname,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
32 | 7,show a bar plot with $colname on y axis and $colname on x axis,"px.bar(x='$colname', y='$colname', data_frame=$varname, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
33 | 8,piechart of $colname column in $varname grouped by $colname column,"tmp = $varname['$colname'].value_counts(dropna=False)
34 | px.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')"
35 | 8,piechart of $colname in $varname grouped by $colname,"tmp = $varname['$colname'].value_counts(dropna=False)
36 | px.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')"
37 | 8,pie chart of $colname column of $varname grouped by $colname column,"tmp = $varname['$colname'].value_counts(dropna=False)
38 | px.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')"
39 | 8,pie chart of column $colname in $varname grouped by column $colname,"tmp = $varname['$colname'].value_counts(dropna=False)
40 | px.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')"
41 | 8,pie chart of $colname of $varname coloured by $colname,"tmp = $varname['$colname'].value_counts(dropna=False)
42 | px.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')"
43 | 9,install $libname,!pip install $libname
44 | 10,list columns of $varname,$varname.columns
45 | 10,list all columns of $varname,$varname.columns
46 | 11,describe $varname,$varname.describe()
47 | 12,group $varname by $colname and get $function of $colname,# Not supported in the current release :(
48 | 12,group the $varname by $colname and get $function of $colname,# Not supported in the current release :(
49 | 12,$varname group by $colname $function by $colname,# Not supported in the current release :(
50 | 12,find $function of $colname group by $colname from $varname,# Not supported in the current release :(
51 | 12,$function $colname group by $colname from $varname,# Not supported in the current release :(
52 | 12,$function $colname group by $colname from $varname,# Not supported in the current release :(
53 | 13,display a line plot showing $colname vs $colname in $varname,"$varname.plot.line(x='$colname', y='$colname', color=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
54 | 13,display a line plot showing $colname on y-axis and $colname on x-axis from $varname,"$varname.plot.line(x='$colname', y='$colname', color=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
55 | 13,display a line plot of $colname versus $colname in $varname,"$varname.plot.line(x='$colname', y='$colname', color=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
56 | 13,line plot of $colname and $colname in $varname,"$varname.plot.line(x='$colname', y='$colname', color=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
57 | 14,show a scatter plot of $colname over $colname in $varname,"$varname.plot.scatter(x='$colname', y='$colname', color=None, size=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
58 | 14,scatter plot of $colname and $colname in $varname,"$varname.plot.scatter(x='$colname', y='$colname', color=None, size=None, title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
59 | 15,show a heatmap with $colname on x axis and $colname on y axis in $varname,"$varname.plot(kind='density_heatmap', x='$colname', y='$colname', title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
60 | 15,display a heatmap of $colname on y axis and $colname on x axis in $varname,"$varname.plot(kind='density_heatmap', x='$colname', y='$colname', title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
61 | 15,heatmap of $colname and $colname in $varname,"$varname.plot(kind='density_heatmap', x='$colname', y='$colname', title='CustomTitle', labels={'$colname':'$colname', '$colname':'$colname'})"
62 | 16,list all files in current directory,!ls .
63 | 17,switch to dark theme,"import plotly.io as pio
64 | pio.templates.default = 'plotly_dark'"
65 |
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/.gitkeep
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/intent_index.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/intent_index.idx
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/config.cfg:
--------------------------------------------------------------------------------
1 | [paths]
2 | train = "corpus/train.spacy"
3 | dev = "corpus/dev.spacy"
4 | vectors = "en_core_web_sm"
5 | init_tok2vec = null
6 |
7 | [system]
8 | gpu_allocator = null
9 | seed = 0
10 |
11 | [nlp]
12 | lang = "en"
13 | pipeline = ["tok2vec","ner"]
14 | batch_size = 1000
15 | disabled = []
16 | before_creation = null
17 | after_creation = null
18 | after_pipeline_creation = null
19 | tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20 |
21 | [components]
22 |
23 | [components.ner]
24 | factory = "ner"
25 | incorrect_spans_key = null
26 | moves = null
27 | scorer = {"@scorers":"spacy.ner_scorer.v1"}
28 | update_with_oracle_cut_size = 100
29 |
30 | [components.ner.model]
31 | @architectures = "spacy.TransitionBasedParser.v2"
32 | state_type = "ner"
33 | extra_state_tokens = false
34 | hidden_width = 64
35 | maxout_pieces = 2
36 | use_upper = true
37 | nO = null
38 |
39 | [components.ner.model.tok2vec]
40 | @architectures = "spacy.Tok2VecListener.v1"
41 | width = ${components.tok2vec.model.encode.width}
42 | upstream = "*"
43 |
44 | [components.tok2vec]
45 | factory = "tok2vec"
46 |
47 | [components.tok2vec.model]
48 | @architectures = "spacy.Tok2Vec.v2"
49 |
50 | [components.tok2vec.model.embed]
51 | @architectures = "spacy.MultiHashEmbed.v2"
52 | width = ${components.tok2vec.model.encode.width}
53 | attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
54 | rows = [5000,2500,2500,2500]
55 | include_static_vectors = true
56 |
57 | [components.tok2vec.model.encode]
58 | @architectures = "spacy.MaxoutWindowEncoder.v2"
59 | width = 256
60 | depth = 8
61 | window_size = 1
62 | maxout_pieces = 3
63 |
64 | [corpora]
65 |
66 | [corpora.dev]
67 | @readers = "spacy.Corpus.v1"
68 | path = ${paths.dev}
69 | max_length = 0
70 | gold_preproc = false
71 | limit = 0
72 | augmenter = null
73 |
74 | [corpora.train]
75 | @readers = "spacy.Corpus.v1"
76 | path = ${paths.train}
77 | max_length = 0
78 | gold_preproc = false
79 | limit = 0
80 | augmenter = null
81 |
82 | [training]
83 | dev_corpus = "corpora.dev"
84 | train_corpus = "corpora.train"
85 | seed = ${system.seed}
86 | gpu_allocator = ${system.gpu_allocator}
87 | dropout = 0.1
88 | accumulate_gradient = 1
89 | patience = 1600
90 | max_epochs = 0
91 | max_steps = 20000
92 | eval_frequency = 200
93 | frozen_components = []
94 | annotating_components = []
95 | before_to_disk = null
96 |
97 | [training.batcher]
98 | @batchers = "spacy.batch_by_words.v1"
99 | discard_oversize = false
100 | tolerance = 0.2
101 | get_length = null
102 |
103 | [training.batcher.size]
104 | @schedules = "compounding.v1"
105 | start = 100
106 | stop = 1000
107 | compound = 1.001
108 | t = 0.0
109 |
110 | [training.logger]
111 | @loggers = "spacy.ConsoleLogger.v1"
112 | progress_bar = false
113 |
114 | [training.optimizer]
115 | @optimizers = "Adam.v1"
116 | beta1 = 0.9
117 | beta2 = 0.999
118 | L2_is_weight_decay = true
119 | L2 = 0.01
120 | grad_clip = 1.0
121 | use_averages = false
122 | eps = 0.00000001
123 | learn_rate = 0.001
124 |
125 | [training.score_weights]
126 | ents_f = 1.0
127 | ents_p = 0.0
128 | ents_r = 0.0
129 | ents_per_type = null
130 |
131 | [pretraining]
132 |
133 | [initialize]
134 | vectors = ${paths.vectors}
135 | init_tok2vec = ${paths.init_tok2vec}
136 | vocab_data = null
137 | lookups = null
138 | before_init = null
139 | after_init = null
140 |
141 | [initialize.components]
142 |
143 | [initialize.tokenizer]
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "lang":"en",
3 | "name":"pipeline",
4 | "version":"0.0.0",
5 | "spacy_version":">=3.3.0,<3.4.0",
6 | "description":"",
7 | "author":"",
8 | "email":"",
9 | "url":"",
10 | "license":"",
11 | "spacy_git_version":"497a708c7",
12 | "vectors":{
13 | "width":0,
14 | "vectors":0,
15 | "keys":0,
16 | "name":null,
17 | "mode":"default"
18 | },
19 | "labels":{
20 | "tok2vec":[
21 |
22 | ],
23 | "ner":[
24 | "CARDINAL",
25 | "COLNAME",
26 | "FNAME",
27 | "FUNCTION",
28 | "LIBNAME",
29 | "VARNAME"
30 | ]
31 | },
32 | "pipeline":[
33 | "tok2vec",
34 | "ner"
35 | ],
36 | "components":[
37 | "tok2vec",
38 | "ner"
39 | ],
40 | "disabled":[
41 |
42 | ],
43 | "performance":{
44 | "ents_f":1.0,
45 | "ents_p":1.0,
46 | "ents_r":1.0,
47 | "ents_per_type":{
48 | "COLNAME":{
49 | "p":1.0,
50 | "r":1.0,
51 | "f":1.0
52 | },
53 | "VARNAME":{
54 | "p":1.0,
55 | "r":1.0,
56 | "f":1.0
57 | },
58 | "CARDINAL":{
59 | "p":1.0,
60 | "r":1.0,
61 | "f":1.0
62 | },
63 | "FUNCTION":{
64 | "p":1.0,
65 | "r":1.0,
66 | "f":1.0
67 | },
68 | "FNAME":{
69 | "p":1.0,
70 | "r":1.0,
71 | "f":1.0
72 | },
73 | "LIBNAME":{
74 | "p":1.0,
75 | "r":1.0,
76 | "f":1.0
77 | }
78 | },
79 | "tok2vec_loss":6.8340121594,
80 | "ner_loss":1014.5475574388
81 | }
82 | }
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/ner/cfg:
--------------------------------------------------------------------------------
1 | {
2 | "moves":null,
3 | "update_with_oracle_cut_size":100,
4 | "multitasks":[
5 |
6 | ],
7 | "min_action_freq":1,
8 | "learn_tokens":false,
9 | "beam_width":1,
10 | "beam_density":0.0,
11 | "beam_update_prob":0.0,
12 | "incorrect_spans_key":null
13 | }
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/ner/model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/ner/model
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/ner/moves:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/ner/moves
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/tok2vec/cfg:
--------------------------------------------------------------------------------
1 | {
2 |
3 | }
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/tok2vec/model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/tok2vec/model
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/tokenizer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/tokenizer
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/key2row:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/key2row
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/lookups.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/lookups.bin
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/vectors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/vectors
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-best/vocab/vectors.cfg:
--------------------------------------------------------------------------------
1 | {
2 | "mode":"default"
3 | }
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/config.cfg:
--------------------------------------------------------------------------------
1 | [paths]
2 | train = "corpus/train.spacy"
3 | dev = "corpus/dev.spacy"
4 | vectors = "en_core_web_sm"
5 | init_tok2vec = null
6 |
7 | [system]
8 | gpu_allocator = null
9 | seed = 0
10 |
11 | [nlp]
12 | lang = "en"
13 | pipeline = ["tok2vec","ner"]
14 | batch_size = 1000
15 | disabled = []
16 | before_creation = null
17 | after_creation = null
18 | after_pipeline_creation = null
19 | tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20 |
21 | [components]
22 |
23 | [components.ner]
24 | factory = "ner"
25 | incorrect_spans_key = null
26 | moves = null
27 | scorer = {"@scorers":"spacy.ner_scorer.v1"}
28 | update_with_oracle_cut_size = 100
29 |
30 | [components.ner.model]
31 | @architectures = "spacy.TransitionBasedParser.v2"
32 | state_type = "ner"
33 | extra_state_tokens = false
34 | hidden_width = 64
35 | maxout_pieces = 2
36 | use_upper = true
37 | nO = null
38 |
39 | [components.ner.model.tok2vec]
40 | @architectures = "spacy.Tok2VecListener.v1"
41 | width = ${components.tok2vec.model.encode.width}
42 | upstream = "*"
43 |
44 | [components.tok2vec]
45 | factory = "tok2vec"
46 |
47 | [components.tok2vec.model]
48 | @architectures = "spacy.Tok2Vec.v2"
49 |
50 | [components.tok2vec.model.embed]
51 | @architectures = "spacy.MultiHashEmbed.v2"
52 | width = ${components.tok2vec.model.encode.width}
53 | attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
54 | rows = [5000,2500,2500,2500]
55 | include_static_vectors = true
56 |
57 | [components.tok2vec.model.encode]
58 | @architectures = "spacy.MaxoutWindowEncoder.v2"
59 | width = 256
60 | depth = 8
61 | window_size = 1
62 | maxout_pieces = 3
63 |
64 | [corpora]
65 |
66 | [corpora.dev]
67 | @readers = "spacy.Corpus.v1"
68 | path = ${paths.dev}
69 | max_length = 0
70 | gold_preproc = false
71 | limit = 0
72 | augmenter = null
73 |
74 | [corpora.train]
75 | @readers = "spacy.Corpus.v1"
76 | path = ${paths.train}
77 | max_length = 0
78 | gold_preproc = false
79 | limit = 0
80 | augmenter = null
81 |
82 | [training]
83 | dev_corpus = "corpora.dev"
84 | train_corpus = "corpora.train"
85 | seed = ${system.seed}
86 | gpu_allocator = ${system.gpu_allocator}
87 | dropout = 0.1
88 | accumulate_gradient = 1
89 | patience = 1600
90 | max_epochs = 0
91 | max_steps = 20000
92 | eval_frequency = 200
93 | frozen_components = []
94 | annotating_components = []
95 | before_to_disk = null
96 |
97 | [training.batcher]
98 | @batchers = "spacy.batch_by_words.v1"
99 | discard_oversize = false
100 | tolerance = 0.2
101 | get_length = null
102 |
103 | [training.batcher.size]
104 | @schedules = "compounding.v1"
105 | start = 100
106 | stop = 1000
107 | compound = 1.001
108 | t = 0.0
109 |
110 | [training.logger]
111 | @loggers = "spacy.ConsoleLogger.v1"
112 | progress_bar = false
113 |
114 | [training.optimizer]
115 | @optimizers = "Adam.v1"
116 | beta1 = 0.9
117 | beta2 = 0.999
118 | L2_is_weight_decay = true
119 | L2 = 0.01
120 | grad_clip = 1.0
121 | use_averages = false
122 | eps = 0.00000001
123 | learn_rate = 0.001
124 |
125 | [training.score_weights]
126 | ents_f = 1.0
127 | ents_p = 0.0
128 | ents_r = 0.0
129 | ents_per_type = null
130 |
131 | [pretraining]
132 |
133 | [initialize]
134 | vectors = ${paths.vectors}
135 | init_tok2vec = ${paths.init_tok2vec}
136 | vocab_data = null
137 | lookups = null
138 | before_init = null
139 | after_init = null
140 |
141 | [initialize.components]
142 |
143 | [initialize.tokenizer]
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "lang":"en",
3 | "name":"pipeline",
4 | "version":"0.0.0",
5 | "spacy_version":">=3.3.0,<3.4.0",
6 | "description":"",
7 | "author":"",
8 | "email":"",
9 | "url":"",
10 | "license":"",
11 | "spacy_git_version":"497a708c7",
12 | "vectors":{
13 | "width":0,
14 | "vectors":0,
15 | "keys":0,
16 | "name":null,
17 | "mode":"default"
18 | },
19 | "labels":{
20 | "tok2vec":[
21 |
22 | ],
23 | "ner":[
24 | "CARDINAL",
25 | "COLNAME",
26 | "FNAME",
27 | "FUNCTION",
28 | "LIBNAME",
29 | "VARNAME"
30 | ]
31 | },
32 | "pipeline":[
33 | "tok2vec",
34 | "ner"
35 | ],
36 | "components":[
37 | "tok2vec",
38 | "ner"
39 | ],
40 | "disabled":[
41 |
42 | ],
43 | "performance":{
44 | "ents_f":0.9992156863,
45 | "ents_p":1.0,
46 | "ents_r":0.9984326019,
47 | "ents_per_type":{
48 | "COLNAME":{
49 | "p":1.0,
50 | "r":0.9970457903,
51 | "f":0.9985207101
52 | },
53 | "VARNAME":{
54 | "p":1.0,
55 | "r":1.0,
56 | "f":1.0
57 | },
58 | "CARDINAL":{
59 | "p":1.0,
60 | "r":1.0,
61 | "f":1.0
62 | },
63 | "FUNCTION":{
64 | "p":1.0,
65 | "r":1.0,
66 | "f":1.0
67 | },
68 | "FNAME":{
69 | "p":1.0,
70 | "r":1.0,
71 | "f":1.0
72 | },
73 | "LIBNAME":{
74 | "p":1.0,
75 | "r":1.0,
76 | "f":1.0
77 | }
78 | },
79 | "tok2vec_loss":0.0000003981,
80 | "ner_loss":0.0000006017
81 | }
82 | }
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/ner/cfg:
--------------------------------------------------------------------------------
1 | {
2 | "moves":null,
3 | "update_with_oracle_cut_size":100,
4 | "multitasks":[
5 |
6 | ],
7 | "min_action_freq":1,
8 | "learn_tokens":false,
9 | "beam_width":1,
10 | "beam_density":0.0,
11 | "beam_update_prob":0.0,
12 | "incorrect_spans_key":null
13 | }
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/ner/model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/ner/model
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/ner/moves:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/ner/moves
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/tok2vec/cfg:
--------------------------------------------------------------------------------
1 | {
2 |
3 | }
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/tok2vec/model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/tok2vec/model
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/tokenizer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/tokenizer
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/key2row:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/key2row
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/lookups.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/lookups.bin
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/vectors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/vectors
--------------------------------------------------------------------------------
/jupyter_text2code/jupyter_text2code_serverextension/models/model-last/vocab/vectors.cfg:
--------------------------------------------------------------------------------
1 | {
2 | "mode":"default"
3 | }
--------------------------------------------------------------------------------
/jupyter_text2code/main.js:
--------------------------------------------------------------------------------
1 | define([
2 | 'jquery',
3 | 'require',
4 | 'base/js/namespace',
5 | 'base/js/dialog',
6 | 'base/js/events',
7 | ], function (
8 | $,
9 | requirejs,
10 | Jupyter,
11 | dialog,
12 | events
13 | ) {
14 | "use strict";
15 |
16 | var jupyter_text2code_lib = {}
17 | jupyter_text2code_lib.code_init = "";
18 |
19 | // define default values for config parameters
20 | var params = {
21 | jupyter_text2code_it_default_to_public: false,
22 | };
23 |
24 | var extension_state = {
25 | is_open: false,
26 | data: {
27 | query: "import all libraries",
28 | history: [],
29 | presets: [
30 | "help",
31 | "use dark theme",
32 | "import all libraries",
33 | "load (xxx.csv) in (df)",
34 | "pie plot of (column) in (df)",
35 | "bar plot of columns (column) & (column) in (df)",
36 | "list all columns of (df)",
37 | "show (x) rows of (df)"
38 | ]
39 | }
40 | };
41 |
42 | function code_exec_callback(query, response) {
43 | var generated_code = JSON.parse(response)["message"]
44 |
45 | extension_state.data.history.push({"query": query, "code": generated_code});
46 | update_history_display(query);
47 |
48 | var cur_cell = Jupyter.notebook.get_selected_cell();
49 | if (cur_cell.get_text() == ""){
50 | var command_cell = cur_cell;
51 | }else{
52 | var command_cell = Jupyter.notebook.insert_cell_below('code');
53 | }
54 | command_cell.select();
55 | command_cell.set_text(generated_code);
56 | command_cell.execute();
57 | Jupyter.notebook.insert_cell_below();
58 | Jupyter.notebook.select_next();
59 | }
60 |
61 | function jupyter_text2code_lib_callback(out_data) {
62 | if (out_data.msg_type === "execute_result"){
63 | var query = $("#jupyter_text2code_query").val();
64 | $.get({
65 | url: '/jupyter-text2code',
66 | data: {"query": query, "dataframes_info": out_data.content.data['text/plain']},
67 | beforeSend: function(){
68 | $("#jupyter_text2code_loader").show();
69 | },
70 | success: function(response) {
71 | code_exec_callback(query, response);
72 | },
73 | error: handle_jupyter_text2code_error,
74 | complete: function(){
75 | $("#jupyter_text2code_loader").hide();
76 | },
77 | });
78 | }
79 | }
80 |
81 | function read_code_init(lib) {
82 | var libName = Jupyter.notebook.base_url + "nbextensions/jupyter-text2code/" + lib;
83 | $.get(libName).done(function(data) {
84 | jupyter_text2code_lib.code_init = data;
85 | requirejs(
86 | [],
87 | function() {
88 | Jupyter.notebook.kernel.execute(jupyter_text2code_lib.code_init, { iopub: { output: jupyter_text2code_lib_callback } }, { silent: false });
89 | })
90 | console.log(libName + ' loaded library');
91 | }).fail(function() {
92 | console.log(libName + 'failed to load ' + lib + ' library')
93 | });
94 | }
95 |
96 | var initialize = function () {
97 | Jupyter.toolbar.add_buttons_group([
98 | Jupyter.keyboard_manager.actions.register ({
99 | help : 'Launch jupyter-text2code',
100 | icon : 'fa-terminal',
101 | handler: toggle_jupyter_text2code_editor
102 | }, 'create-jupyter-text2code-from-notebook', 'Text2Code')
103 | ]);
104 | read_code_init("jupyter_text2code_lib.py");
105 | };
106 |
107 | function toggle_jupyter_text2code_editor() {
108 | if(extension_state.is_open) {
109 | extension_state.is_open = false;
110 | $(".jupyter_text2code_editor_display").hide();
111 | }
112 | else {
113 | if($('#jupyter_text2code_editor').length == 0) {
114 | build_jupyter_text2code_editor();
115 | }
116 | extension_state.is_open = true;
117 | $(".jupyter_text2code_editor_display").show();
118 | }
119 | }
120 |
121 | function build_alert(alert_class) {
122 | return $('
')
123 | .addClass('alert alert-dismissable')
124 | .addClass(alert_class)
125 | .append(
126 | $('')
127 | .append($('').html('×'))
128 | );
129 | }
130 |
131 | function handle_jupyter_text2code_error(jqXHR, textStatus, errorThrown) {
132 | console.log('jupyter_text2code ajax error:', jqXHR, textStatus, errorThrown);
133 | var alert = build_alert('alert-danger')
134 | .hide()
135 | .append(
136 | $('').text('Error:')
137 | )
138 | .append(
139 | $('').text(jqXHR.responseJSON ? JSON.stringify(jqXHR.responseJSON, null, 2) : errorThrown)
140 | );
141 | $('#jupyter_text2code_modal').find('.modal-body').append(alert);
142 | alert.slideDown('fast');
143 | }
144 |
145 |
146 | function add_presets(jupyter_text2code_editor) {
147 |
148 | var jupyter_text2code_preset = jupyter_text2code_editor.find('#jupyter_text2code_preset_content');
149 | extension_state.data.presets.forEach(function(item, index) {
150 | jupyter_text2code_preset.append(""+ item + "
");
151 | });
152 | return jupyter_text2code_editor;
153 | }
154 |
155 | function update_history_display(query) {
156 | var jupyter_text2code_history = $('#jupyter_text2code_history');
157 | jupyter_text2code_history.prepend(""+ query + "
");
158 | }
159 |
160 | function build_jupyter_text2code_editor () {
161 | var jupyter_text2code_editor = $('').attr('id', 'jupyter_text2code_editor').attr('class', 'jupyter_text2code_editor_display');
162 | var jupyter_text2code_editor_history = $('').attr('id', 'jupyter_text2code_editor_history').attr('class', 'jupyter_text2code_editor_display');
163 |
164 | var textArea = $('').val(extension_state.data.query).addClass('form-control');
165 |
166 | jupyter_text2code_editor
167 | .append("What do you want to do?
")
168 | .append(textArea)
169 | .append("")
170 | .append("")
171 | .append("");
172 |
173 | // History section
174 | jupyter_text2code_editor_history.append(""
175 | + "Command History:
"
176 | + "
Presets:
"
177 | );
178 |
179 | jupyter_text2code_editor_history = add_presets(jupyter_text2code_editor_history);
180 |
181 | // Close button click event handler
182 | $('body').on('click', '#jupyter_text2code_close', function() {
183 | extension_state.is_open = false;
184 | $(".jupyter_text2code_editor_display").hide();
185 | });
186 | // jupyter_text2code button click event handler
187 | $('body').on('click', '#jupyter_text2code_submit', function() {
188 | make_jupyter_text2code();
189 | });
190 |
191 |
192 | // Disable jupyter shortcuts while query is being typed(to avoid them from triggering)
193 | $('body').on('focus', '#jupyter_text2code_query', function() {
194 | Jupyter.keyboard_manager.disable();
195 | });
196 | $('body').on('blur', '#jupyter_text2code_query', function() {
197 | Jupyter.keyboard_manager.enable();
198 | });
199 |
200 | // Handler for clicking history item
201 | $('body').on('click', '.jupyter_text2code_history_item', function() {
202 | $("#jupyter_text2code_query").val($(this).text());
203 | });
204 | // Handler for clicking preset item
205 | $('body').on('click', '.jupyter_text2code_preset_item', function() {
206 | $("#jupyter_text2code_query").val($(this).text());
207 | });
208 |
209 | $("#notebook-container").append(jupyter_text2code_editor);
210 | $("body").append(jupyter_text2code_editor_history);
211 | }
212 |
213 | var make_jupyter_text2code = function make_jupyter_text2code() {
214 | var jupyter_text2code_lib_cmd = "dataframes_info()";
215 | requirejs([],
216 | function() {
217 | Jupyter.notebook.kernel.execute(
218 | jupyter_text2code_lib_cmd, { iopub: { output: jupyter_text2code_lib_callback } }, { silent: false }
219 | );
220 | });
221 | };
222 |
223 | function load_jupyter_extension () {
224 | var link = document.createElement("link");
225 | link.type = "text/css";
226 | link.rel = "stylesheet";
227 | link.href = requirejs.toUrl("./jupyter_text2code.css");
228 | document.getElementsByTagName("head")[0].appendChild(link);
229 |
230 | // load when the kernel's ready
231 | if (Jupyter.notebook.kernel) {
232 | initialize();
233 | } else {
234 | events.on('kernel_ready.Kernel', initialize);
235 | }
236 | // return Jupyter.notebook.config.loaded.then(initialize);
237 | }
238 |
239 | return {
240 | load_jupyter_extension: load_jupyter_extension,
241 | load_ipython_extension: load_jupyter_extension
242 | };
243 | });
244 |
--------------------------------------------------------------------------------
/notebooks/Code Generator.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "%load_ext autoreload\n",
10 | "%autoreload 2"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [
18 | {
19 | "name": "stdout",
20 | "output_type": "stream",
21 | "text": [
22 | "********************\n",
23 | "********************\n",
24 | "loading_jupyter_server_extension: jupyter-text2code. First install will download universal-sentence-encoder, please wait...\n",
25 | "********************\n",
26 | "********************\n"
27 | ]
28 | }
29 | ],
30 | "source": [
31 | "import sys\n",
32 | "sys.path.insert(0,'../')\n",
33 | "from jupyter_text2code.jupyter_text2code_serverextension import CodeGenerator"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 3,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "cg = CodeGenerator()"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "### Intent 1 : import x"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 4,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "data": {
59 | "text/plain": [
60 | "'import spacy'"
61 | ]
62 | },
63 | "execution_count": 4,
64 | "metadata": {},
65 | "output_type": "execute_result"
66 | }
67 | ],
68 | "source": [
69 | "cg.generate_code(\"import spacy\")"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 5,
75 | "metadata": {},
76 | "outputs": [
77 | {
78 | "data": {
79 | "text/plain": [
80 | "'import pandas'"
81 | ]
82 | },
83 | "execution_count": 5,
84 | "metadata": {},
85 | "output_type": "execute_result"
86 | }
87 | ],
88 | "source": [
89 | "cg.generate_code(\"import pandas\")"
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {},
95 | "source": [
96 | "### Intent2: import all libs"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 6,
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "name": "stdout",
106 | "output_type": "stream",
107 | "text": [
108 | "\n",
109 | "import pandas as pd\n",
110 | "import numpy as np\n",
111 | "import os\n",
112 | "import plotly.express as px\n",
113 | "import matplotlib.pyplot as plt\n",
114 | "pd.options.plotting.backend = 'plotly'\n",
115 | " \n"
116 | ]
117 | }
118 | ],
119 | "source": [
120 | "print(cg.generate_code(\"import all libraries\"))"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "metadata": {},
126 | "source": [
127 | "### Intent3: install lib"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 7,
133 | "metadata": {},
134 | "outputs": [
135 | {
136 | "data": {
137 | "text/plain": [
138 | "'!pip install matplotlib'"
139 | ]
140 | },
141 | "execution_count": 7,
142 | "metadata": {},
143 | "output_type": "execute_result"
144 | }
145 | ],
146 | "source": [
147 | "cg.generate_code(\"install matplotlib\")"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 8,
153 | "metadata": {},
154 | "outputs": [
155 | {
156 | "data": {
157 | "text/plain": [
158 | "'!pip install scikitlearn'"
159 | ]
160 | },
161 | "execution_count": 8,
162 | "metadata": {},
163 | "output_type": "execute_result"
164 | }
165 | ],
166 | "source": [
167 | "cg.generate_code(\"install scikitlearn\")"
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {},
173 | "source": [
174 | "### Intent4: Load file into df"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 9,
180 | "metadata": {},
181 | "outputs": [
182 | {
183 | "data": {
184 | "text/plain": [
185 | "\"xxx = pd.read_csv('train.csv')\""
186 | ]
187 | },
188 | "execution_count": 9,
189 | "metadata": {},
190 | "output_type": "execute_result"
191 | }
192 | ],
193 | "source": [
194 | "cg.generate_code(\"load train.csv\")"
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": 10,
200 | "metadata": {},
201 | "outputs": [
202 | {
203 | "name": "stdout",
204 | "output_type": "stream",
205 | "text": [
206 | "Intent: Load file into a dataframe Intent_id: 2 Similarity 0.88224137\n",
207 | "Entities:\n",
208 | "train.csv 5 14 FNAME\n",
209 | "zzzz 18 22 VARNAME\n",
210 | "----------\n"
211 | ]
212 | },
213 | {
214 | "data": {
215 | "text/plain": [
216 | "\"zzzz = pd.read_csv('train.csv')\""
217 | ]
218 | },
219 | "execution_count": 10,
220 | "metadata": {},
221 | "output_type": "execute_result"
222 | }
223 | ],
224 | "source": [
225 | "cg.generate_code(\"load train.csv in zzzz\", debug=True)"
226 | ]
227 | },
228 | {
229 | "cell_type": "markdown",
230 | "metadata": {},
231 | "source": [
232 | "### Intent5: Show x rows from df"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": 11,
238 | "metadata": {},
239 | "outputs": [
240 | {
241 | "name": "stdout",
242 | "output_type": "stream",
243 | "text": [
244 | "Intent: Show x rows from df Intent_id: 3 Similarity 0.7455359\n",
245 | "Entities:\n",
246 | "df 10 12 VARNAME\n",
247 | "----------\n"
248 | ]
249 | },
250 | {
251 | "data": {
252 | "text/plain": [
253 | "'df.head()'"
254 | ]
255 | },
256 | "execution_count": 11,
257 | "metadata": {},
258 | "output_type": "execute_result"
259 | }
260 | ],
261 | "source": [
262 | "cg.generate_code(\"Show from df\", debug=True)"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 12,
268 | "metadata": {},
269 | "outputs": [
270 | {
271 | "data": {
272 | "text/plain": [
273 | "\"#Couldn't extract variable name, replacing with default\\nxxx.head()\""
274 | ]
275 | },
276 | "execution_count": 12,
277 | "metadata": {},
278 | "output_type": "execute_result"
279 | }
280 | ],
281 | "source": [
282 | "cg.generate_code(\"Show alldf\")"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": 13,
288 | "metadata": {},
289 | "outputs": [
290 | {
291 | "name": "stdout",
292 | "output_type": "stream",
293 | "text": [
294 | "Intent: Show x rows from df Intent_id: 3 Similarity 1.0\n",
295 | "Entities:\n",
296 | "5 5 6 CARDINAL\n",
297 | "df 17 19 VARNAME\n",
298 | "----------\n"
299 | ]
300 | },
301 | {
302 | "data": {
303 | "text/plain": [
304 | "'df.head(5)'"
305 | ]
306 | },
307 | "execution_count": 13,
308 | "metadata": {},
309 | "output_type": "execute_result"
310 | }
311 | ],
312 | "source": [
313 | "cg.generate_code(\"Show 5 rows from df\", debug=True)"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": 14,
319 | "metadata": {},
320 | "outputs": [
321 | {
322 | "data": {
323 | "text/plain": [
324 | "'df.head(20)'"
325 | ]
326 | },
327 | "execution_count": 14,
328 | "metadata": {},
329 | "output_type": "execute_result"
330 | }
331 | ],
332 | "source": [
333 | "cg.generate_code(\"Show 20 rows of df\")"
334 | ]
335 | },
336 | {
337 | "cell_type": "markdown",
338 | "metadata": {},
339 | "source": [
340 | "### Intent6: List columns of df"
341 | ]
342 | },
343 | {
344 | "cell_type": "code",
345 | "execution_count": 15,
346 | "metadata": {},
347 | "outputs": [
348 | {
349 | "data": {
350 | "text/plain": [
351 | "'mydf.columns'"
352 | ]
353 | },
354 | "execution_count": 15,
355 | "metadata": {},
356 | "output_type": "execute_result"
357 | }
358 | ],
359 | "source": [
360 | "cg.generate_code(\"list all columns of mydf\")"
361 | ]
362 | },
363 | {
364 | "cell_type": "markdown",
365 | "metadata": {},
366 | "source": [
367 | "### Intent7: Describe the df"
368 | ]
369 | },
370 | {
371 | "cell_type": "code",
372 | "execution_count": 16,
373 | "metadata": {},
374 | "outputs": [
375 | {
376 | "data": {
377 | "text/plain": [
378 | "'df.describe()'"
379 | ]
380 | },
381 | "execution_count": 16,
382 | "metadata": {},
383 | "output_type": "execute_result"
384 | }
385 | ],
386 | "source": [
387 | "cg.generate_code(\"Describe df\")"
388 | ]
389 | },
390 | {
391 | "cell_type": "code",
392 | "execution_count": 17,
393 | "metadata": {},
394 | "outputs": [
395 | {
396 | "data": {
397 | "text/plain": [
398 | "'mydf.describe()'"
399 | ]
400 | },
401 | "execution_count": 17,
402 | "metadata": {},
403 | "output_type": "execute_result"
404 | }
405 | ],
406 | "source": [
407 | "cg.generate_code(\"Describe mydf\")"
408 | ]
409 | },
410 | {
411 | "cell_type": "markdown",
412 | "metadata": {},
413 | "source": [
414 | "### Intent8: Plot histogram of column"
415 | ]
416 | },
417 | {
418 | "cell_type": "code",
419 | "execution_count": 18,
420 | "metadata": {},
421 | "outputs": [
422 | {
423 | "name": "stdout",
424 | "output_type": "stream",
425 | "text": [
426 | "Intent: histogram of column Intent_id: 4 Similarity 0.9223975\n",
427 | "Entities:\n",
428 | "rainfall 18 26 COLNAME\n",
429 | "df 37 39 VARNAME\n",
430 | "----------\n"
431 | ]
432 | },
433 | {
434 | "data": {
435 | "text/plain": [
436 | "'df.plot.hist(x=[\"rainfall\"])'"
437 | ]
438 | },
439 | "execution_count": 18,
440 | "metadata": {},
441 | "output_type": "execute_result"
442 | }
443 | ],
444 | "source": [
445 | "cg.generate_code(\"Plot histogram of rainfall column in df\", debug=True)"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": 19,
451 | "metadata": {},
452 | "outputs": [
453 | {
454 | "name": "stdout",
455 | "output_type": "stream",
456 | "text": [
457 | "Intent: histogram of column Intent_id: 4 Similarity 0.9799768\n",
458 | "Entities:\n",
459 | "xzcx 18 22 COLNAME\n",
460 | "df 33 35 VARNAME\n",
461 | "----------\n"
462 | ]
463 | },
464 | {
465 | "data": {
466 | "text/plain": [
467 | "'df.plot.hist(x=[\"xzcx\"])'"
468 | ]
469 | },
470 | "execution_count": 19,
471 | "metadata": {},
472 | "output_type": "execute_result"
473 | }
474 | ],
475 | "source": [
476 | "cg.generate_code(\"Plot histogram of xzcx column in df\", debug=True)"
477 | ]
478 | },
479 | {
480 | "cell_type": "code",
481 | "execution_count": 20,
482 | "metadata": {},
483 | "outputs": [
484 | {
485 | "name": "stdout",
486 | "output_type": "stream",
487 | "text": [
488 | "Intent: histogram of column Intent_id: 4 Similarity 0.91642696\n",
489 | "Entities:\n",
490 | "age 18 21 VARNAME\n",
491 | "df 25 27 VARNAME\n",
492 | "----------\n",
493 | "Error: Didn't detect the column name\n"
494 | ]
495 | },
496 | {
497 | "data": {
498 | "text/plain": [
499 | "'#Couldn\\'t extract column names, replacing with default\\nage.plot.hist(x=[\"xxx\", \"yyy\"])'"
500 | ]
501 | },
502 | "execution_count": 20,
503 | "metadata": {},
504 | "output_type": "execute_result"
505 | }
506 | ],
507 | "source": [
508 | "cg.generate_code(\"Plot histogram of age in df\", debug=True)"
509 | ]
510 | },
511 | {
512 | "cell_type": "code",
513 | "execution_count": 21,
514 | "metadata": {},
515 | "outputs": [
516 | {
517 | "name": "stdout",
518 | "output_type": "stream",
519 | "text": [
520 | "Intent: histogram of column Intent_id: 4 Similarity 0.98263717\n",
521 | "Entities:\n",
522 | "dailywage 18 27 COLNAME\n",
523 | "df 38 40 VARNAME\n",
524 | "----------\n"
525 | ]
526 | },
527 | {
528 | "data": {
529 | "text/plain": [
530 | "'df.plot.hist(x=[\"dailywage\"])'"
531 | ]
532 | },
533 | "execution_count": 21,
534 | "metadata": {},
535 | "output_type": "execute_result"
536 | }
537 | ],
538 | "source": [
539 | "cg.generate_code(\"Plot histogram of dailyWage column in df\", debug=True)"
540 | ]
541 | },
542 | {
543 | "cell_type": "markdown",
544 | "metadata": {},
545 | "source": [
546 | "### Intent9: Get correlation matrix"
547 | ]
548 | },
549 | {
550 | "cell_type": "code",
551 | "execution_count": 22,
552 | "metadata": {},
553 | "outputs": [
554 | {
555 | "data": {
556 | "text/plain": [
557 | "'df.corr()'"
558 | ]
559 | },
560 | "execution_count": 22,
561 | "metadata": {},
562 | "output_type": "execute_result"
563 | }
564 | ],
565 | "source": [
566 | "cg.generate_code(\"Get correlation matrix of df\")"
567 | ]
568 | },
569 | {
570 | "cell_type": "markdown",
571 | "metadata": {},
572 | "source": [
573 | "### Intent10: Print shape of df"
574 | ]
575 | },
576 | {
577 | "cell_type": "code",
578 | "execution_count": 23,
579 | "metadata": {},
580 | "outputs": [
581 | {
582 | "data": {
583 | "text/plain": [
584 | "'df.shape'"
585 | ]
586 | },
587 | "execution_count": 23,
588 | "metadata": {},
589 | "output_type": "execute_result"
590 | }
591 | ],
592 | "source": [
593 | "cg.generate_code(\"Print shape of df\")"
594 | ]
595 | },
596 | {
597 | "cell_type": "code",
598 | "execution_count": 24,
599 | "metadata": {},
600 | "outputs": [
601 | {
602 | "data": {
603 | "text/plain": [
604 | "'zz.shape'"
605 | ]
606 | },
607 | "execution_count": 24,
608 | "metadata": {},
609 | "output_type": "execute_result"
610 | }
611 | ],
612 | "source": [
613 | "cg.generate_code(\"Print shape of zz\")"
614 | ]
615 | },
616 | {
617 | "cell_type": "markdown",
618 | "metadata": {},
619 | "source": [
620 | "### Intent11: Barplot two columns"
621 | ]
622 | },
623 | {
624 | "cell_type": "code",
625 | "execution_count": 25,
626 | "metadata": {},
627 | "outputs": [
628 | {
629 | "name": "stdout",
630 | "output_type": "stream",
631 | "text": [
632 | "Intent: Bar of column1 and column2 from df Intent_id: 7 Similarity 0.7900896\n",
633 | "Entities:\n",
634 | "duration 9 17 COLNAME\n",
635 | "age 22 25 COLNAME\n",
636 | "df 31 33 VARNAME\n",
637 | "----------\n"
638 | ]
639 | },
640 | {
641 | "data": {
642 | "text/plain": [
643 | "\"px.bar(x='duration',y='age',data_frame=df,title='CustomTitle', labels={'duration':'duration','age':'age'})\""
644 | ]
645 | },
646 | "execution_count": 25,
647 | "metadata": {},
648 | "output_type": "execute_result"
649 | }
650 | ],
651 | "source": [
652 | "cg.generate_code(\"Bar plot duration and age from df\", debug=True)"
653 | ]
654 | },
655 | {
656 | "cell_type": "code",
657 | "execution_count": 26,
658 | "metadata": {},
659 | "outputs": [
660 | {
661 | "name": "stdout",
662 | "output_type": "stream",
663 | "text": [
664 | "Intent: Bar of column1 and column2 from df Intent_id: 7 Similarity 0.8381791\n",
665 | "Entities:\n",
666 | "age 9 12 COLNAME\n",
667 | "temperature 17 28 COLNAME\n",
668 | "df 42 44 VARNAME\n",
669 | "----------\n"
670 | ]
671 | },
672 | {
673 | "data": {
674 | "text/plain": [
675 | "\"px.bar(x='age',y='temperature',data_frame=df,title='CustomTitle', labels={'age':'age','temperature':'temperature'})\""
676 | ]
677 | },
678 | "execution_count": 26,
679 | "metadata": {},
680 | "output_type": "execute_result"
681 | }
682 | ],
683 | "source": [
684 | "cg.generate_code(\"Bar plot age and temperature columns from df\", debug=True)"
685 | ]
686 | },
687 | {
688 | "cell_type": "code",
689 | "execution_count": 27,
690 | "metadata": {},
691 | "outputs": [
692 | {
693 | "name": "stdout",
694 | "output_type": "stream",
695 | "text": [
696 | "Intent: Bar of column1 and column2 from df Intent_id: 7 Similarity 0.8020855\n",
697 | "Entities:\n",
698 | "temperature 9 20 COLNAME\n",
699 | "humidity 25 33 COLNAME\n",
700 | "df 45 47 VARNAME\n",
701 | "----------\n"
702 | ]
703 | },
704 | {
705 | "data": {
706 | "text/plain": [
707 | "\"px.bar(x='temperature',y='humidity',data_frame=df,title='CustomTitle', labels={'temperature':'temperature','humidity':'humidity'})\""
708 | ]
709 | },
710 | "execution_count": 27,
711 | "metadata": {},
712 | "output_type": "execute_result"
713 | }
714 | ],
715 | "source": [
716 | "cg.generate_code(\"Bar plot temperature and humidity columns of df\", debug=True)"
717 | ]
718 | },
719 | {
720 | "cell_type": "code",
721 | "execution_count": 28,
722 | "metadata": {},
723 | "outputs": [
724 | {
725 | "name": "stdout",
726 | "output_type": "stream",
727 | "text": [
728 | "Intent: Bar of column1 and column2 from df Intent_id: 7 Similarity 0.9778261\n",
729 | "Entities:\n",
730 | "numitems 21 29 COLNAME\n",
731 | "purchasemonth 44 57 COLNAME\n",
732 | "df 71 73 VARNAME\n",
733 | "----------\n"
734 | ]
735 | },
736 | {
737 | "data": {
738 | "text/plain": [
739 | "\"px.bar(x='numitems',y='purchasemonth',data_frame=df,title='CustomTitle', labels={'numitems':'numitems','purchasemonth':'purchasemonth'})\""
740 | ]
741 | },
742 | "execution_count": 28,
743 | "metadata": {},
744 | "output_type": "execute_result"
745 | }
746 | ],
747 | "source": [
748 | "cg.generate_code(\"show a bar plot with numItems on x axis and purchaseMonth on y axis in df\", debug=True)"
749 | ]
750 | },
751 | {
752 | "cell_type": "markdown",
753 | "metadata": {},
754 | "source": [
755 | "### Intent12: Pie chart of a column in df"
756 | ]
757 | },
758 | {
759 | "cell_type": "code",
760 | "execution_count": 29,
761 | "metadata": {},
762 | "outputs": [
763 | {
764 | "name": "stdout",
765 | "output_type": "stream",
766 | "text": [
767 | "Intent: Pie chart of column in Intent_id: 8 Similarity 0.8810677\n",
768 | "Entities:\n",
769 | "age 12 15 COLNAME\n",
770 | "df 26 28 VARNAME\n",
771 | "gender 40 46 COLNAME\n",
772 | "----------\n"
773 | ]
774 | },
775 | {
776 | "data": {
777 | "text/plain": [
778 | "\"tmp = df['age'].value_counts(dropna=False)\\npx.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')\""
779 | ]
780 | },
781 | "execution_count": 29,
782 | "metadata": {},
783 | "output_type": "execute_result"
784 | }
785 | ],
786 | "source": [
787 | "cg.generate_code(\"piechart of age column in df grouped by gender column\", debug=True)"
788 | ]
789 | },
790 | {
791 | "cell_type": "markdown",
792 | "metadata": {},
793 | "source": [
794 | "### Intent13: Group df "
795 | ]
796 | },
797 | {
798 | "cell_type": "code",
799 | "execution_count": 38,
800 | "metadata": {},
801 | "outputs": [
802 | {
803 | "name": "stdout",
804 | "output_type": "stream",
805 | "text": [
806 | "Intent: group the df by column1 and get average of column2 Intent_id: 12 Similarity 0.7937273\n",
807 | "Entities:\n",
808 | "df 10 12 VARNAME\n",
809 | "gender 16 22 COLNAME\n",
810 | "average 31 38 FUNCTION\n",
811 | "age 42 45 VARNAME\n",
812 | "----------\n",
813 | "Use mean for synonym average\n"
814 | ]
815 | },
816 | {
817 | "data": {
818 | "text/plain": [
819 | "\"df.groupby(['gender']).agg(['mean'])\""
820 | ]
821 | },
822 | "execution_count": 38,
823 | "metadata": {},
824 | "output_type": "execute_result"
825 | }
826 | ],
827 | "source": [
828 | "cg.generate_code(\"Group the df by gender and get average of age\", debug=True)"
829 | ]
830 | },
831 | {
832 | "cell_type": "code",
833 | "execution_count": 31,
834 | "metadata": {},
835 | "outputs": [
836 | {
837 | "name": "stdout",
838 | "output_type": "stream",
839 | "text": [
840 | "Intent: group the df by column1 and get average of column2 Intent_id: 12 Similarity 0.79380095\n",
841 | "Entities:\n",
842 | "df 6 8 VARNAME\n",
843 | "gender 12 18 COLNAME\n",
844 | "mean 27 31 FUNCTION\n",
845 | "age 35 38 VARNAME\n",
846 | "----------\n"
847 | ]
848 | },
849 | {
850 | "data": {
851 | "text/plain": [
852 | "\"df.groupby(['gender']).agg(['mean'])\""
853 | ]
854 | },
855 | "execution_count": 31,
856 | "metadata": {},
857 | "output_type": "execute_result"
858 | }
859 | ],
860 | "source": [
861 | "cg.generate_code(\"Group df by gender and get mean of age\", debug=True)"
862 | ]
863 | },
864 | {
865 | "cell_type": "code",
866 | "execution_count": 32,
867 | "metadata": {},
868 | "outputs": [
869 | {
870 | "name": "stdout",
871 | "output_type": "stream",
872 | "text": [
873 | "Intent: group the df by column1 and get average of column2 Intent_id: 12 Similarity 0.77789533\n",
874 | "Entities:\n",
875 | "df 6 8 VARNAME\n",
876 | "gender 12 18 COLNAME\n",
877 | "mean 27 31 FUNCTION\n",
878 | "sum 36 39 FUNCTION\n",
879 | "age 43 46 VARNAME\n",
880 | "----------\n"
881 | ]
882 | },
883 | {
884 | "data": {
885 | "text/plain": [
886 | "\"df.groupby(['gender']).agg(['mean','sum'])\""
887 | ]
888 | },
889 | "execution_count": 32,
890 | "metadata": {},
891 | "output_type": "execute_result"
892 | }
893 | ],
894 | "source": [
895 | "cg.generate_code(\"Group df by gender and get mean and sum of age\", debug=True)"
896 | ]
897 | },
898 | {
899 | "cell_type": "code",
900 | "execution_count": 33,
901 | "metadata": {},
902 | "outputs": [
903 | {
904 | "name": "stdout",
905 | "output_type": "stream",
906 | "text": [
907 | "Intent: group the df by column1 and get average of column2 Intent_id: 12 Similarity 0.7954531\n",
908 | "Entities:\n",
909 | "df 6 8 VARNAME\n",
910 | "gender 12 18 COLNAME\n",
911 | "average 27 34 FUNCTION\n",
912 | "sum 36 39 FUNCTION\n",
913 | "age 43 46 VARNAME\n",
914 | "----------\n",
915 | "Use mean for synonym average\n"
916 | ]
917 | },
918 | {
919 | "data": {
920 | "text/plain": [
921 | "\"df.groupby(['gender']).agg(['mean','sum'])\""
922 | ]
923 | },
924 | "execution_count": 33,
925 | "metadata": {},
926 | "output_type": "execute_result"
927 | }
928 | ],
929 | "source": [
930 | "cg.generate_code(\"Group df by gender and get average, sum of age\", debug=True)"
931 | ]
932 | },
933 | {
934 | "cell_type": "code",
935 | "execution_count": 39,
936 | "metadata": {},
937 | "outputs": [
938 | {
939 | "name": "stdout",
940 | "output_type": "stream",
941 | "text": [
942 | "Intent: group the df by column1 and get average of column2 Intent_id: 12 Similarity 0.81393397\n",
943 | "Entities:\n",
944 | "df 6 8 VARNAME\n",
945 | "ozxc 12 16 COLNAME\n",
946 | "zxc 18 21 COLNAME\n",
947 | "min 30 33 FUNCTION\n",
948 | "max 35 38 FUNCTION\n",
949 | "sum 40 43 FUNCTION\n",
950 | "corr 47 51 COLNAME\n",
951 | "----------\n"
952 | ]
953 | },
954 | {
955 | "data": {
956 | "text/plain": [
957 | "\"df[['ozxc','zxc','corr']].groupby(['ozxc','zxc']).agg(['min','max','sum'])\""
958 | ]
959 | },
960 | "execution_count": 39,
961 | "metadata": {},
962 | "output_type": "execute_result"
963 | }
964 | ],
965 | "source": [
966 | "cg.generate_code(\"Group df by ozxc, zxc and get min, max, sum of corr\", debug=True)"
967 | ]
968 | },
969 | {
970 | "cell_type": "code",
971 | "execution_count": 35,
972 | "metadata": {},
973 | "outputs": [
974 | {
975 | "name": "stdout",
976 | "output_type": "stream",
977 | "text": [
978 | "Intent: group the df by column1 and get average of column2 Intent_id: 12 Similarity 0.6917082\n",
979 | "Entities:\n",
980 | "df 10 12 VARNAME\n",
981 | "gender 16 22 COLNAME\n",
982 | "average 31 38 FUNCTION\n",
983 | "age 39 42 FUNCTION\n",
984 | "----------\n",
985 | "Use mean for synonym average\n"
986 | ]
987 | },
988 | {
989 | "data": {
990 | "text/plain": [
991 | "\"df.groupby(['gender']).agg(['mean','age'])\""
992 | ]
993 | },
994 | "execution_count": 35,
995 | "metadata": {},
996 | "output_type": "execute_result"
997 | }
998 | ],
999 | "source": [
1000 | "cg.generate_code(\"Group the df by gender and get average age\", debug=True)"
1001 | ]
1002 | },
1003 | {
1004 | "cell_type": "markdown",
1005 | "metadata": {},
1006 | "source": [
1007 | "### Random"
1008 | ]
1009 | },
1010 | {
1011 | "cell_type": "code",
1012 | "execution_count": 31,
1013 | "metadata": {},
1014 | "outputs": [
1015 | {
1016 | "name": "stdout",
1017 | "output_type": "stream",
1018 | "text": [
1019 | "Intent: install lib Intent_id: 9 Similarity 0.99999994\n",
1020 | "Entities:\n",
1021 | "plotly 8 14 LIBNAME\n",
1022 | "----------\n"
1023 | ]
1024 | },
1025 | {
1026 | "data": {
1027 | "text/plain": [
1028 | "'!pip install plotly'"
1029 | ]
1030 | },
1031 | "execution_count": 31,
1032 | "metadata": {},
1033 | "output_type": "execute_result"
1034 | }
1035 | ],
1036 | "source": [
1037 | "cg.generate_code(\"install plotly\", debug=True)"
1038 | ]
1039 | },
1040 | {
1041 | "cell_type": "code",
1042 | "execution_count": 32,
1043 | "metadata": {},
1044 | "outputs": [
1045 | {
1046 | "name": "stdout",
1047 | "output_type": "stream",
1048 | "text": [
1049 | "Intent: Pie chart of column in Intent_id: 8 Similarity 0.8031008\n",
1050 | "Entities:\n",
1051 | "heroes_gender 18 31 COLNAME\n",
1052 | "df 35 37 VARNAME\n",
1053 | "----------\n"
1054 | ]
1055 | },
1056 | {
1057 | "data": {
1058 | "text/plain": [
1059 | "\"tmp = df['heroes_gender'].value_counts(dropna=False)\\npx.pie(tmp,values=tmp.values,names=tmp.index,title='CustomTitle')\""
1060 | ]
1061 | },
1062 | "execution_count": 32,
1063 | "metadata": {},
1064 | "output_type": "execute_result"
1065 | }
1066 | ],
1067 | "source": [
1068 | "cg.generate_code(\"plot pie chart of heroes_gender in df\", debug=True)"
1069 | ]
1070 | },
1071 | {
1072 | "cell_type": "code",
1073 | "execution_count": 33,
1074 | "metadata": {},
1075 | "outputs": [
1076 | {
1077 | "name": "stdout",
1078 | "output_type": "stream",
1079 | "text": [
1080 | "Intent: histogram of column Intent_id: 4 Similarity 0.8690444\n",
1081 | "Entities:\n",
1082 | "heroes_gender 18 31 COLNAME\n",
1083 | "youtube_views 36 49 COLNAME\n",
1084 | "df 61 63 VARNAME\n",
1085 | "----------\n"
1086 | ]
1087 | },
1088 | {
1089 | "data": {
1090 | "text/plain": [
1091 | "'df.plot.hist(x=[\"heroes_gender\", \"youtube_views\"])'"
1092 | ]
1093 | },
1094 | "execution_count": 33,
1095 | "metadata": {},
1096 | "output_type": "execute_result"
1097 | }
1098 | ],
1099 | "source": [
1100 | "cg.generate_code(\"plot histogram of heroes_gender and youtube_views columns of df\", debug=True)"
1101 | ]
1102 | },
1103 | {
1104 | "cell_type": "code",
1105 | "execution_count": 34,
1106 | "metadata": {},
1107 | "outputs": [
1108 | {
1109 | "name": "stdout",
1110 | "output_type": "stream",
1111 | "text": [
1112 | "Intent: histogram of column Intent_id: 4 Similarity 0.8805045\n",
1113 | "Entities:\n",
1114 | "heroes_gendes 18 31 COLNAME\n",
1115 | "zxc 36 39 COLNAME\n",
1116 | "df 51 53 VARNAME\n",
1117 | "----------\n"
1118 | ]
1119 | },
1120 | {
1121 | "data": {
1122 | "text/plain": [
1123 | "'df.plot.hist(x=[\"heroes_gendes\", \"zxc\"])'"
1124 | ]
1125 | },
1126 | "execution_count": 34,
1127 | "metadata": {},
1128 | "output_type": "execute_result"
1129 | }
1130 | ],
1131 | "source": [
1132 | "cg.generate_code(\"plot histogram of heroes_gendes and zxc columns of df\", debug=True)"
1133 | ]
1134 | },
1135 | {
1136 | "cell_type": "code",
1137 | "execution_count": 35,
1138 | "metadata": {},
1139 | "outputs": [
1140 | {
1141 | "name": "stdout",
1142 | "output_type": "stream",
1143 | "text": [
1144 | "Intent: line chart of column1 and column2 of df Intent_id: 13 Similarity 0.88551736\n",
1145 | "Entities:\n",
1146 | "release_date 10 22 COLNAME\n",
1147 | "youtube_avg_watch_duration 27 53 COLNAME\n",
1148 | "df 57 59 VARNAME\n",
1149 | "----------\n"
1150 | ]
1151 | },
1152 | {
1153 | "data": {
1154 | "text/plain": [
1155 | "\"df.plot.line(x='release_date', y='youtube_avg_watch_duration', color=None, title='CustomTitle', labels={'release_date':'release_date', 'youtube_avg_watch_duration':'youtube_avg_watch_duration'})\""
1156 | ]
1157 | },
1158 | "execution_count": 35,
1159 | "metadata": {},
1160 | "output_type": "execute_result"
1161 | }
1162 | ],
1163 | "source": [
1164 | "cg.generate_code(\"line plot release_date and youtube_avg_watch_duration of df\", debug=True)"
1165 | ]
1166 | },
1167 | {
1168 | "cell_type": "code",
1169 | "execution_count": 36,
1170 | "metadata": {},
1171 | "outputs": [
1172 | {
1173 | "name": "stdout",
1174 | "output_type": "stream",
1175 | "text": [
1176 | "Intent: scatter plot of column1 and column2 of df Intent_id: 14 Similarity 0.90176094\n",
1177 | "Entities:\n",
1178 | "spotify_streams 16 31 COLNAME\n",
1179 | "youtube_views 36 49 COLNAME\n",
1180 | "df 53 55 VARNAME\n",
1181 | "----------\n"
1182 | ]
1183 | },
1184 | {
1185 | "data": {
1186 | "text/plain": [
1187 | "\"df.plot.scatter(x='spotify_streams', y='youtube_views', color=None, size=None, title='CustomTitle', labels={'spotify_streams':'spotify_streams', 'youtube_views':'youtube_views'})\""
1188 | ]
1189 | },
1190 | "execution_count": 36,
1191 | "metadata": {},
1192 | "output_type": "execute_result"
1193 | }
1194 | ],
1195 | "source": [
1196 | "cg.generate_code(\"scatter plot of spotify_streams and youtube_views of df\", debug=True)"
1197 | ]
1198 | },
1199 | {
1200 | "cell_type": "code",
1201 | "execution_count": 37,
1202 | "metadata": {},
1203 | "outputs": [
1204 | {
1205 | "name": "stdout",
1206 | "output_type": "stream",
1207 | "text": [
1208 | "Intent: Bar of column1 and column2 from df Intent_id: 7 Similarity 0.84774184\n",
1209 | "Entities:\n",
1210 | "release_date 18 30 COLNAME\n",
1211 | "df 63 65 VARNAME\n",
1212 | "----------\n",
1213 | "Error: Didn't detect the column name\n"
1214 | ]
1215 | },
1216 | {
1217 | "data": {
1218 | "text/plain": [
1219 | "\"#Couldn't extract column names, replacing with default\\npx.bar(x='xxx',y='yyy',data_frame=df,title='CustomTitle', labels={'xxx':'xxx','yyy':'yyy'})\""
1220 | ]
1221 | },
1222 | "execution_count": 37,
1223 | "metadata": {},
1224 | "output_type": "execute_result"
1225 | }
1226 | ],
1227 | "source": [
1228 | "cg.generate_code(\"plot line plot of release_date & youtube_avg_watch_duration of df\", debug=True)"
1229 | ]
1230 | },
1231 | {
1232 | "cell_type": "code",
1233 | "execution_count": 6,
1234 | "metadata": {},
1235 | "outputs": [
1236 | {
1237 | "name": "stdout",
1238 | "output_type": "stream",
1239 | "text": [
1240 | "Intent: line chart of column1 and column2 of df Intent_id: 13 Similarity 0.83657575\n",
1241 | "Entities:\n",
1242 | "release_date 13 25 COLNAME\n",
1243 | "df 58 60 VARNAME\n",
1244 | "----------\n",
1245 | "Error: Didn't detect the column name\n"
1246 | ]
1247 | },
1248 | {
1249 | "data": {
1250 | "text/plain": [
1251 | "\"#Couldn't extract column names, replacing with default\\ndf.plot.line(x='xxx', y='yyy', color=None, title='CustomTitle', labels={'xxx':'xxx', 'yyy':'yyy'})\""
1252 | ]
1253 | },
1254 | "execution_count": 6,
1255 | "metadata": {},
1256 | "output_type": "execute_result"
1257 | }
1258 | ],
1259 | "source": [
1260 | "cg.generate_code(\"line plot of release_date & youtube_avg_watch_duration of df\", debug=True)"
1261 | ]
1262 | },
1263 | {
1264 | "cell_type": "code",
1265 | "execution_count": 7,
1266 | "metadata": {},
1267 | "outputs": [
1268 | {
1269 | "name": "stdout",
1270 | "output_type": "stream",
1271 | "text": [
1272 | "Intent: line chart of column1 and column2 of df Intent_id: 13 Similarity 0.84694755\n",
1273 | "Entities:\n",
1274 | "release_date 20 32 COLNAME\n",
1275 | "df 65 67 VARNAME\n",
1276 | "----------\n",
1277 | "Error: Didn't detect the column name\n"
1278 | ]
1279 | },
1280 | {
1281 | "data": {
1282 | "text/plain": [
1283 | "\"#Couldn't extract column names, replacing with default\\ndf.plot.line(x='xxx', y='yyy', color=None, title='CustomTitle', labels={'xxx':'xxx', 'yyy':'yyy'})\""
1284 | ]
1285 | },
1286 | "execution_count": 7,
1287 | "metadata": {},
1288 | "output_type": "execute_result"
1289 | }
1290 | ],
1291 | "source": [
1292 | "cg.generate_code(\"show a line plot of release_date & youtube_avg_watch_duration of df\", debug=True)"
1293 | ]
1294 | },
1295 | {
1296 | "cell_type": "code",
1297 | "execution_count": 5,
1298 | "metadata": {},
1299 | "outputs": [
1300 | {
1301 | "name": "stdout",
1302 | "output_type": "stream",
1303 | "text": [
1304 | "Intent: line chart of column1 and column2 of df Intent_id: 13 Similarity 0.8588614\n",
1305 | "Entities:\n",
1306 | "release_date 5 17 CARDINAL\n",
1307 | "df 50 52 VARNAME\n",
1308 | "----------\n",
1309 | "Error: Didn't detect the column name\n"
1310 | ]
1311 | },
1312 | {
1313 | "data": {
1314 | "text/plain": [
1315 | "\"#Couldn't extract column names, replacing with default\\ndf.plot.line(x='xxx', y='yyy', color=None, title='CustomTitle', labels={'xxx':'xxx', 'yyy':'yyy'})\""
1316 | ]
1317 | },
1318 | "execution_count": 5,
1319 | "metadata": {},
1320 | "output_type": "execute_result"
1321 | }
1322 | ],
1323 | "source": [
1324 | "cg.generate_code(\"show release_date & youtube_avg_watch_duration of df in a line plot\", debug=True)"
1325 | ]
1326 | },
1327 | {
1328 | "cell_type": "code",
1329 | "execution_count": 5,
1330 | "metadata": {},
1331 | "outputs": [
1332 | {
1333 | "name": "stdout",
1334 | "output_type": "stream",
1335 | "text": [
1336 | "Intent: barplot $colname and $colname columns of $varname Intent_id: 7 Similarity 0.89604545\n",
1337 | "Entities:\n",
1338 | "release_date 5 17 CARDINAL\n",
1339 | "df 50 52 VARNAME\n",
1340 | "----------\n",
1341 | "Error: Didn't detect the column name\n"
1342 | ]
1343 | },
1344 | {
1345 | "data": {
1346 | "text/plain": [
1347 | "\"#Couldn't extract column names, replacing with default\\npx.bar(x='xxx',y='yyy',data_frame=df,title='CustomTitle', labels={'xxx':'xxx','yyy':'yyy'})\""
1348 | ]
1349 | },
1350 | "execution_count": 5,
1351 | "metadata": {},
1352 | "output_type": "execute_result"
1353 | }
1354 | ],
1355 | "source": [
1356 | "cg.generate_code(\"show release_date & youtube_avg_watch_duration of df in a bar plot\", debug=True)"
1357 | ]
1358 | },
1359 | {
1360 | "cell_type": "code",
1361 | "execution_count": 6,
1362 | "metadata": {},
1363 | "outputs": [
1364 | {
1365 | "name": "stdout",
1366 | "output_type": "stream",
1367 | "text": [
1368 | "Intent: barplot $colname and $colname columns of $varname Intent_id: 7 Similarity 0.8960455\n",
1369 | "Entities:\n",
1370 | "release_date 5 17 CARDINAL\n",
1371 | "df 50 52 VARNAME\n",
1372 | "----------\n",
1373 | "Error: Didn't detect the column name\n"
1374 | ]
1375 | },
1376 | {
1377 | "data": {
1378 | "text/plain": [
1379 | "\"#Couldn't extract column names, replacing with default\\npx.bar(x='xxx',y='yyy',data_frame=df,title='CustomTitle', labels={'xxx':'xxx','yyy':'yyy'})\""
1380 | ]
1381 | },
1382 | "execution_count": 6,
1383 | "metadata": {},
1384 | "output_type": "execute_result"
1385 | }
1386 | ],
1387 | "source": [
1388 | "cg.generate_code(\"show release_date & youtube_avg_watch_duration of df in a bar plot\", debug=True)"
1389 | ]
1390 | },
1391 | {
1392 | "cell_type": "code",
1393 | "execution_count": 7,
1394 | "metadata": {},
1395 | "outputs": [
1396 | {
1397 | "name": "stdout",
1398 | "output_type": "stream",
1399 | "text": [
1400 | "Intent: switch to dark theme Intent_id: 17 Similarity 0.9999999\n",
1401 | "Entities:\n",
1402 | "----------\n"
1403 | ]
1404 | },
1405 | {
1406 | "data": {
1407 | "text/plain": [
1408 | "\"import plotly.io as pio\\npio.templates.default = 'plotly_dark'\""
1409 | ]
1410 | },
1411 | "execution_count": 7,
1412 | "metadata": {},
1413 | "output_type": "execute_result"
1414 | }
1415 | ],
1416 | "source": [
1417 | "cg.generate_code(\"switch to dark theme\", debug=True)"
1418 | ]
1419 | },
1420 | {
1421 | "cell_type": "code",
1422 | "execution_count": null,
1423 | "metadata": {},
1424 | "outputs": [],
1425 | "source": []
1426 | }
1427 | ],
1428 | "metadata": {
1429 | "kernelspec": {
1430 | "display_name": "Python 3",
1431 | "language": "python",
1432 | "name": "python3"
1433 | },
1434 | "language_info": {
1435 | "codemirror_mode": {
1436 | "name": "ipython",
1437 | "version": 3
1438 | },
1439 | "file_extension": ".py",
1440 | "mimetype": "text/x-python",
1441 | "name": "python",
1442 | "nbconvert_exporter": "python",
1443 | "pygments_lexer": "ipython3",
1444 | "version": "3.6.9"
1445 | }
1446 | },
1447 | "nbformat": 4,
1448 | "nbformat_minor": 4
1449 | }
1450 |
--------------------------------------------------------------------------------
/notebooks/Episodes.csv:
--------------------------------------------------------------------------------
1 | episode_id,episode_name,heroes,heroes_gender,heroes_location,heroes_nationality,heroes_kaggle_username,heroes_twitter_handle,category,flavour_of_tea,recording_date,recording_time,release_date,episode_duration,youtube_url,youtube_thumbnail_type,youtube_impressions,youtube_impression_views,youtube_ctr,youtube_nonimpression_views,youtube_views,youtube_watch_hours,youtube_avg_watch_duration,youtube_likes,youtube_dislikes,youtube_comments,youtube_subscribers,anchor_url,anchor_thumbnail_type,anchor_plays,spotify_starts,spotify_streams,spotify_listeners,apple_listeners,apple_listened_hours,apple_avg_listen_duration
2 | E0,Chai Time Data Science Launch Announcement,,,,,,,Other,Masala Chai,2019-07-15,Evening,2019-07-21,157,https://www.youtube.com/watch?v=Ko_gxs42lM8,1,4433,86,1.94,45,131,3,82,4,0,2,3,https://anchor.fm/chaitimedatascience/episodes/Chai-Time-Data-Science-Launch-Announcement-e4mas9,0,553,491,262,359,29,1,117
3 | E1,"Kaggle Triple Grandmaster, Abhishek Thakur Interview",Abhishek Thakur,Male,Norway,India,abhishek,abhi1thakur,Kaggle,Ginger Chai,2019-07-14,Evening,2019-07-22,2995,https://www.youtube.com/watch?v=Ezbo57Z33N8,0,25212,845,3.35,683,1528,142,335,55,0,5,60,https://anchor.fm/chaitimedatascience/episodes/Kaggle-Triple-Grandmaster--Abhishek-Thakur-Interview-e4mjoi,0,1271,826,608,456,56,25,1621
4 | E2,"Interview with Kaggle Master, ML Engineer: Ryan Chesler",Ryan Chesler,Male,USA,USA,ryches,ryan_chesler,Kaggle,Masala Chai,2019-07-20,Afternoon,2019-07-26,2118,https://www.youtube.com/watch?v=SJVMSKig14k,0,3282,84,2.56,44,128,14,394,7,0,1,3,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Kaggle-Master--ML-Engineer-Ryan-Chesler--Chai-Time-Data-Science-e4ntbt,0,681,398,274,214,19,10,1879
5 | E3,"Interview with CEO of SharpestMinds, Edouard Harris",Edouard Harris,Male,Canada,Canada,,neutronsNeurons,Industry,Kashmiri Kahwa,2019-07-23,Night,2019-07-29,3072,https://www.youtube.com/watch?v=69urmSt34Ac,0,2376,38,1.60,57,95,11,417,2,0,0,1,https://anchor.fm/chaitimedatascience/episodes/Interview-with-CEO-of-SharpestMinds--Edouard-Harris--Chai-Time-Data-Science-e4nti6,0,638,334,230,169,10,4,1344
6 | E4,Data Science for Good: City of LA Kaggle Winning Solution Interview with Kaggle Kernels Grandmaster Shivam Bansal,Shivam Bansal,Male,Singapore,India,shivamb,shivamshaz,Kaggle,Apple Cinnamon,2019-07-14,Morning,2019-08-02,1048,https://www.youtube.com/watch?v=wMYX3KABHCk,0,3884,116,2.99,36,152,9,213,4,0,0,4,https://anchor.fm/chaitimedatascience/episodes/Data-Science-for-Good-City-of-LA-Kaggle-Winning-Solution-Interview-with-Kaggle-Kernels-Grandmaster-Shivam-Bansal-e4qc36,0,495,201,139,123,17,3,633
7 | E5,"Deep Learning Research, Hardware, Kaggle | Interview with Tim Dettmers",Tim Dettmers,Male,USA,Germany,timdettmers,Tim_Dettmers,Research,Kashmiri Kahwa,2019-07-24,Night,2019-08-05,6242,https://www.youtube.com/watch?v=8Fp9m4fNDQ4,0,2937,60,2.04,76,136,22,582,4,0,1,6,https://anchor.fm/chaitimedatascience/episodes/Deep-Learning-Research--Hardware--Kaggle--Interview-with-Tim-Dettmers-e4qcad,0,668,279,198,125,10,6,2213
8 | E6,Interview with Kaggle Kernels GM: Shivam Bansal,Shivam Bansal,Male,Singapore,India,shivamb,shivamshaz,Kaggle,Apple Cinnamon,2019-07-14,Morning,2019-08-09,2776,https://www.youtube.com/watch?v=X73CzKIhqs8,0,5664,146,2.58,118,264,32,436,17,0,3,10,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Kaggle-Kernels-GM-Shivam-Bansal-e4qcbe,0,602,248,180,129,17,7,1516
9 | E7,Interview with Kaggle Kernels Grandmaster #1: Artgor | Andrew Lukyanenko,Andrey Lukyanenko,Male,Russia,Russia,artgor,AndLukyane,Kaggle,Tulsi Chai,2019-07-10,Evening,2019-08-13,2459,https://www.youtube.com/watch?v=rpClh8WmTdo,0,2694,37,1.37,33,70,5,257,3,0,0,0,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Kaggle-Kernels-Grandmaster-1-Artgor--Andrew-Lukyanenko-e4r6du,0,1283,431,304,251,38,15,1432
10 | E8,"NVIDIA's DALI Library, Image Augmentations Discussion: James Dellinger",James Dellinger,Male,USA,USA,jamesdellinger,jamrdell,Industry,Masala Chai,2019-06-23,Morning,2019-08-16,1949,https://www.youtube.com/watch?v=4kMEdDcBt00,0,2862,84,2.94,45,129,12,335,1,0,0,3,https://anchor.fm/chaitimedatascience/episodes/NVIDIAs-DALI-Library--Image-Augmentations-Discussion-Interview-with-James-Dellinger-e4r6f7,0,324,98,62,66,13,4,1242
11 | E9,Albumentations Framework: a fast image augmentations library | Interview with Dr. Vladimir Iglovikov,Vladimir Iglovikov,Male,USA,Russia,iglovikov,viglovikov,Kaggle,Ginger Chai,2019-07-18,Night,2019-08-20,3182,https://www.youtube.com/watch?v=JS9xol0VmA4,0,3152,92,2.92,63,155,13,302,2,1,0,0,https://anchor.fm/chaitimedatascience/episodes/Albumentations-Framework-a-fast-image-augmentations-library--Interview-with-Dr--Vladimir-Iglovikov-e4r6e4,0,385,125,74,63,8,4,1758
12 | E10,"Interview with the Leader of mlcourse.ai, Dr. Yury Kashnitsky",Yury Kashnitsky,Male,USA,Russia,kashnitsky,ykashnitsky,Kaggle,Ginger Chai,2019-08-12,Morning,2019-08-24,3727,https://www.youtube.com/watch?v=guvFOjxdeeA,0,3714,63,1.70,45,108,13,433,4,0,0,3,https://anchor.fm/chaitimedatascience/episodes/Interview-with-the-Leader-of-mlcourse-ai--Dr--Yury-Kashnitsky--Chai-Time-Data-Science-e52r5u,0,803,225,150,90,36,23,2319
13 | E11,"MuseNet, OpenAI and Deep Learning Research: Interview with Christine Payne",Christine Payne,Female,USA,USA,,mcleavey,Research,Kashmiri Kahwa,2019-07-29,Night,2019-08-26,3640,https://www.youtube.com/watch?v=xwXIaDrQvwE,0,5475,158,2.89,121,279,32,413,12,0,2,7,https://anchor.fm/chaitimedatascience/episodes/MuseNet--OpenAI-and-Deep-Learning-Research-Interview-with-Christine-Payne-e4r6hb,0,502,191,108,102,12,7,2012
14 | E13,"Deep Learning Applied to Audio, Self Studying ML | Interview with fast.ai fellow Robert Bracco",Robert Bracco,Male,USA,USA,madeupmasters,MadeUpMasters,Kaggle,Tulsi Chai,2019-07-12,Night,2019-09-02,2950,https://www.youtube.com/watch?v=k-gZAyg5ib8,0,4153,95,2.29,66,161,21,470,6,0,0,1,https://anchor.fm/chaitimedatascience/episodes/Deep-Learning-Applied-to-Audio--Self-Studying-ML--Interview-with-fast-ai-fellow-Robert-Bracco-e4r6d9,0,670,209,146,117,20,12,2190
15 | E14,"And the Bit Goes Down, Deep Learning Research, Research at FAIR | Interview with Pierre Stock",Pierre Stock,Male,France,France,,PierreStock,Research,Masala Chai,2019-08-12,Night,2019-09-06,2794,https://www.youtube.com/watch?v=I1xf8lPU3cs,0,3206,29,0.90,14,43,2,167,1,0,0,0,https://anchor.fm/chaitimedatascience/episodes/And-the-Bit-Goes-Down--Deep-Learning-Research--Research-at-FAIR--Interview-with-Pierre-Stock-e52rpk,0,391,114,61,70,19,6,1130
16 | E15,"Medical Science, Open Source and AI | Interview with Dr. Judy Gichoya",Judy Gichoya,Female,USA,Africa,,judywawira,Industry,Apple Cinnamon,2019-08-14,Night,2019-09-09,3797,https://www.youtube.com/watch?v=X9k7TWUc4Og,0,3704,27,0.73,120,147,9,220,4,0,0,2,https://anchor.fm/chaitimedatascience/episodes/Medical-Science--Open-Source-and-AI--Interview-with-Dr--Judy-Gichoya-e52rst,0,514,148,97,97,17,8,1610
17 | E16,"Producing a Chainsmokers Remix with AI, DAWNBench & fastai | Interview with Andrew Shaw",Andrew Shaw,Male,USA,USA,,bearpelican,Industry,Tulsi Chai,2019-08-27,Night,2019-09-29,2243,https://www.youtube.com/watch?v=zbYeCWG9GIU,0,4602,58,1.26,49,107,8,269,8,0,0,1,https://anchor.fm/chaitimedatascience/episodes/Producing-a-Chainsmokers-Remix-with-AI-MusicAutobot--DAWNBench--fast-ai--Interview-with-Andrew-Shaw-e5kvdq,0,382,112,65,74,7,2,1191
18 | E17,"Hugging Face, Transformers | NLP Research and Open Source | Interview with Julien Chaumond",Julien Chaumond,Male,USA,France,,julien_c,Industry,Tulsi Chai,2019-08-29,Night,2019-10-03,3861,https://www.youtube.com/watch?v=ejWkDviM5QM,0,17892,514,2.87,308,822,68,298,21,1,0,19,https://anchor.fm/chaitimedatascience/episodes/Hugging-Face--Transformers--NLP-Research-and-Open-Source--Interview-with-Julien-Chaumond-e5o819,0,849,245,164,129,35,17,1730
19 | E18,"Generative Deep Learning, Technical Writing & Data Science Consulting | Interview with David Foster",David Foster,Male,UK,UK,,,Industry,Tulsi Chai,2019-08-25,Night,2019-10-16,2592,https://www.youtube.com/watch?v=4gXzka76AJk,0,3317,59,1.78,159,218,20,330,4,0,0,4,https://anchor.fm/chaitimedatascience/episodes/Generative-Deep-Learning--Technical-Writing--Data-Science-Consulting--Interview-with-David-Foster-e7kqkd,0,576,152,91,85,18,5,1066
20 | E19,Chip Huyen Interview: Machine Learning Interviews | MOOCS and Deep Learning at NVIDIA,Chip Huyen,Female,USA,Vietnam,,chipro,Industry,Masala Chai,2019-08-21,Morning,2019-10-19,3603,https://www.youtube.com/watch?v=cWwlou9aYUA,0,5841,494,8.46,246,740,40,195,13,0,0,7,https://anchor.fm/chaitimedatascience/episodes/Chip-Huyen-Interview-Machine-Learning-Interviews--MOOCS-and-Deep-Learning-at-NVIDIA-e7kqun,0,951,254,176,150,50,22,1574
21 | E20,Gold Medalling in First Kaggle Comp | Predicting Molecular Prop: 4 GM & The Brain | Boris Dorado,Boris Dorado,Male,France,France,borisdee,,Kaggle,Ginger Chai,2019-09-22,Afternoon,2019-10-22,3152,https://www.youtube.com/watch?v=WaCBeiyQw4Q,0,2628,47,1.79,17,64,5,281,2,0,0,1,https://anchor.fm/chaitimedatascience/episodes/Boris-Dorado-Interview--Gold-Medalling-in-First-Kaggle-Comp--Predicting-Molecular-Prop-Kaggle-Comp-Solution-e7m6s3,0,576,121,80,71,26,13,1822
22 | E21,Swift For Tensorflow | Software Engineering Internship at Google | Interview with Bart Chrazaszcz,Bart Chrazaszcz,Male,Canada,Canada,,bart_chr,Research,Masala Chai,2019-09-28,Morning,2019-10-26,2141,https://www.youtube.com/watch?v=OTXKRPGhQu8,0,3970,113,2.85,192,305,24,283,8,0,0,8,https://anchor.fm/chaitimedatascience/episodes/Swift-For-Tensorflow--Software-Engineering-Internship-at-Google--Interview-with-Bart-Chrazaszcz-e88ll2,0,435,94,51,61,17,7,1471
23 | E22,DeepMind & AlphaGo | Deep Learning Research | Swift For Tensorflow | Interview with Dr. Marc Lanctot,Marc Lactot,Male,Canada,Canada,,sharky6000,Research,Apple Cinnamon,2019-08-27,Night,2019-10-27,4284,https://www.youtube.com/watch?v=-uUHR5VALB0,0,5959,168,2.82,476,644,66,369,19,0,1,17,https://anchor.fm/chaitimedatascience/episodes/DeepMind--AlphaGo--Deep-Learning-Research--Swift-For-Tensorflow--Interview-with-Dr--Marc-Lanctot-e7ks0s,0,693,177,119,110,42,28,2384
24 | E12,Freelancing in Machine Learning | Interview with Tuatini Godard,Tuatini Godard,Male,France,France,ekami66,,Industry,Kashmiri Kahwa,2019-07-11,Morning,2019-10-29,2684,https://www.youtube.com/watch?v=AwJpKBMog6c,0,3659,61,1.67,53,114,17,537,4,0,0,2,,,,,,,,,
25 | E23,Predicting Molecular Prop Kaggle Comp 2nd Place Sol | Interview with Kaggle Master Andres Torrubia,Andres Torrubia,Male,Spain,Spain,antorsae,antor,Kaggle,Apple Cinnamon,2019-09-01,Night,2019-11-01,7876,https://www.youtube.com/watch?v=sqo2h7aYPPk,0,7213,150,2.08,546,696,68,352,17,0,3,24,https://anchor.fm/chaitimedatascience/episodes/Predicting-Molecular-Prop-Kaggle-Comp-2nd-Place-Sol--Interview-with-Kaggle-Master-Andres-Torrubia-e8h0tj,0,673,166,109,81,22,31,5122
26 | E24,Medalling in all Entered Kaggle Comp | IEEE-CIS Comp 6th Pos Sol | Interview with Dr Philipp Singer,Philipp Singer,Male,Austria,Austria,philippsinger,ph_singer,Kaggle,Masala Chai,2019-10-12,Evening,2019-11-04,3825,https://www.youtube.com/watch?v=7sh5QrUIAHI,0,8570,272,3.17,622,894,115,463,22,1,3,19,https://anchor.fm/chaitimedatascience/episodes/Medalling-in-all-Entered-Kaggle-Comp--IEEE-CIS-Comp-6th-Pos-Sol--Interview-with-Dr-Philipp-Singer-e7m6s8,0,753,217,136,98,29,22,2687
27 | E25,"Kaggle Discussions Rank #1, x2 Grandmaster: Dr. Jean Francois Puget | IEEE-CIS Comp 2nd Pos Sol",Jean Francois Puget,Male,France,France,cpmpml,jfpuget,Kaggle,Masala Chai,2019-10-11,Afternoon,2019-11-21,3462,https://www.youtube.com/watch?v=wqHlAOFSFuQ,0,8071,329,4.08,635,964,103,385,29,1,1,30,https://anchor.fm/chaitimedatascience/episodes/Kaggle-Discussions-Rank-1--x2-Grandmaster-Dr--Jean-Francois-Puget--IEEE-CIS-Comp-2nd-Pos-Sol-e7m6s7,1,776,205,148,111,48,23,1719
28 | E26,DistilBERT | Research at Hugging Face | NLP and Open Source | Interview with Victor Sanh,Victor Sanh,Male,USA,France,,sanhestpasmoi,Research,Masala Chai,2019-10-14,Morning,2019-12-05,2972,https://www.youtube.com/watch?v=n7zZzUwqBig,0,14915,601,4.03,633,1234,86,251,20,0,1,28,https://anchor.fm/chaitimedatascience/episodes/DistilBERT--Research-at-Hugging-Face--NLP-and-Open-Source--Interview-with-Victor-Sanh-e9bp0n,1,684,130,86,83,41,17,1503
29 | E27,Interview with Jeremy Howard | fast.ai | Kaggle | Machine Learning Research,Jeremy Howard,Male,USA,Australia,jhoward,jeremyphoward,Industry,Sulemani Chai,2019-11-18,Night,2019-12-08,4851,https://www.youtube.com/watch?v=205j37G1cxw,0,27596,1297,4.70,3205,4502,704,563,183,3,16,139,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Jeremy-Howard--fast-ai--Kaggle--Machine-Learning-Research-e9ddbh,1,1527,466,373,204,96,83,3117
30 | E28,Interview with Arno Candel | AutoML | Physics | H2O.ai,Arno Candel,Male,USA,Switzerland,arnocandel,arnocandel,Industry,Sulemani Chai,2019-11-12,Afternoon,2019-12-12,4070,https://www.youtube.com/watch?v=72nXl0tvgc0,0,12900,245,1.90,577,822,71,312,10,0,1,6,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Arno-Candel--AutoML--Physics--H2O-ai--CTDS-Show-e9doo2,1,950,204,137,101,51,25,1743
31 | E29,"Eugene Khvedchenya | Kaggle, Computer Vision & Best Code Practises | Severstal Steel, 4th Pos Sol",Eugene Khvedchenya,Male,Ukraine,Ukraine,bloodaxe,cvtalks,Kaggle,Sulemani Chai,2019-10-28,Evening,2020-01-10,3127,https://www.youtube.com/watch?v=TwhJfZHx10s,1,7121,213,2.99,458,671,46,247,30,1,1,14,https://anchor.fm/chaitimedatascience/episodes/Eugene-Khvedchenya--Kaggle--Computer-Vision--Best-Code-Practises--Severstal-Steel--4th-Pos-Sol-ea34a8,1,624,100,67,57,34,13,1370
32 | E30,"Interview with x2 Kaggle GM: Dr. Olivier Grellier | Kaggle, Data Science & H2O.ai",Olivier Grellier,Male,France,France,ogrellier,,Kaggle,Ginger Chai,2019-11-18,Evening,2020-01-13,2276,https://www.youtube.com/watch?v=96YP13cxoq4,0,10500,189,1.80,568,757,58,277,17,0,2,18,https://anchor.fm/chaitimedatascience/episodes/Interview-with-x2-Kaggle-GM-Dr--Olivier-Grellier--Kaggle--Data-Science--H2O-ai-ea5ekr,1,564,81,49,50,32,12,1306
33 | E31,"Interview with Even Oldridge | Applied Research, Top Down Learning & Fast.ai | NVIDIA & Rapids.ai",Even Oldridge,Male,Canada,Canada,evenoldridge,Even_Oldridge,Industry,Masala Chai,2019-11-28,Morning,2020-01-16,5213,https://www.youtube.com/watch?v=-WzXIV8P_Jk,0,7371,130,1.76,190,320,39,439,5,2,2,5,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Even-Oldridge--Applied-Research--Top-Down-Learning--Fast-ai--NVIDIA--Rapids-ai-ea7hon,1,561,83,56,42,32,24,2714
34 | E32,"Interview with Leland Wilkinson | Grammar of Graphics | Open Source, Statistics & Software Dev",Leland Wilkinson,Male,USA,USA,,,Industry,Sulemani Chai,2019-12-16,Night,2020-01-19,3751,https://www.youtube.com/watch?v=j5dxrptSBYw,0,12300,148,1.20,321,469,51,394,17,0,1,11,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Leland-Wilkinson--Grammar-of-Graphics--Open-Source--Statistics--Software-Dev-ea9fj4,1,530,61,41,34,24,10,1559
35 | E33,"Interview with Kaggle Legend: Gilberto Titericz | Giba, Former #1 | Data Science & Kaggle",Gilberto Titericz,Male,Brazil,Brazil,titericz,giba1,Kaggle,Sulemani Chai,2019-11-30,Morning,2020-01-23,3844,https://www.youtube.com/watch?v=MpYeDKw8EOg,1,9232,328,3.55,1084,1412,181,461,51,1,4,35,https://anchor.fm/chaitimedatascience/episodes/Kaggle-Legend-Gilberto-Titericz--Giba--Former-1--Data-Science--Kaggle-ea9vdn,1,618,112,84,54,29,19,2343
36 | E34,Dmitry Gordeev & Philipp Singer | What does it take to win a Kaggle Comp? | NFL Data Bowl Win Sol,Dmitry Gordeev | Philipp Singer,Male,Austria,Austria,dott1718 | philippsinger,dott1718 | ph_singer,Kaggle,Sulemani Chai,2020-01-15,Night,2020-01-26,4657,https://www.youtube.com/watch?v=_Srv0bKmfjY,1,7382,256,3.47,834,1090,144,476,30,1,0,18,https://anchor.fm/chaitimedatascience/episodes/Dmitry-Gordeev--Philipp-Singer--What-does-it-take-to-win-a-Kaggle-Comp---NFL-Data-Bowl-Win-Sol-eaaihi,1,621,108,69,54,35,28,2887
37 | E35,"Rohan Rao | Numbers, Data Science & Kaggle | ASHRAE - Great Energy Predictor 2nd Pos Sol",Rohan Rao,Male,India,India,rohanrao,vopani,Kaggle,Ginger Chai,2020-01-10,Morning,2020-01-30,6151,https://www.youtube.com/watch?v=4nVL4ICMNcw,0,14000,420,3.00,735,1155,132,413,36,0,1,26,https://anchor.fm/chaitimedatascience/episodes/Rohan-Rao--Numbers--Data-Science--Kaggle--ASHRAE---Great-Energy-Predictor-2nd-Pos-Sol-eaa0db,1,590,114,74,56,22,12,1962
38 | E36,DeOldify | Fast.ai & NoGAN | Machine Learning & Software Engineering | Interview with Jason Antic,Jason Antic,Male,USA,USA,,citnaj,Industry,Herbal Tea,2019-12-21,Night,2020-02-02,3916,https://www.youtube.com/watch?v=A5Cq8SWudts,0,7696,189,2.46,586,775,71,330,31,1,8,21,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Jason-Antic--DeOldify--Fast-ai--NoGAN--Machine-Learning--Software-Engineering-eaj1t4,1,596,93,60,52,28,22,2816
39 | E37,The Story of Kaggle & Kaggle's Evolution | Interview with the CEO of Kaggle: Anthony Goldbloom,Anthony Goldbloom,Male,USA,Australia,antgoldbloom,antgoldbloom,Kaggle,Ginger Chai,2020-01-07,Morning,2020-02-06,3909,https://www.youtube.com/watch?v=jw2Z-IMyFYw,0,5883,127,2.16,340,467,48,370,32,1,7,11,https://anchor.fm/chaitimedatascience/episodes/The-Story-of-Kaggle--Kaggles-Evolution--Interview-with-the-CEO-of-Kaggle-Anthony-Goldbloom-ea9fj6,1,595,102,58,56,29,19,2414
40 | E38,Becoming The Youngest Kaggle Grandmaster | ML For Japanese Literature | Anokas: Mikel Bober-Irizar,Mikel Bober-Irizar,Male,UK,UK,anokas,mikb0b,Kaggle,Ginger Chai,2020-01-25,Evening,2020-02-09,3559,https://www.youtube.com/watch?v=maR9ibJ2r7g,0,9558,465,4.87,903,1368,148,389,51,0,5,29,https://anchor.fm/chaitimedatascience/episodes/Anokas-Mikel-Bober-Irizar--Becoming-The-Youngest-Kaggle-Grandmaster--ML-For-Japanese-Literature--Kaggle-eanr0n,1,674,160,118,55,32,23,2632
41 | E39,"Machine Learning, H2O.ai & Machine Learning Interpretability | Interview with Patrick Hall",Patrick Hall,Male,USA,USA,,jpatrickhall,Industry,Herbal Tea,2020-01-14,Evening,2020-02-13,3490,https://www.youtube.com/watch?v=TSmSBWnVSzc,0,10000,190,1.90,638,828,47,204,7,0,0,10,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Sr--Director-of-Product-at-H2O-ai-Patrick-Hall--Machine-Learning--H2O-ai--Machine-Learning-Interpretability-ea9ff0,1,530,75,53,47,21,12,2011
42 | E40,Interview with Zachary Mueller | Fast.ai: The course and New Library | SGs and Top Down Learning,Zachary Mueller,Male,USA,USA,muellerzr,TheZachMueller,Industry,Herbal Tea,2020-01-12,Evening,2020-02-16,3758,https://www.youtube.com/watch?v=AXr8pzXXUDQ,0,5528,166,3.00,377,543,61,404,24,0,0,15,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Zachary-Mueller--Fast-ai-The-course-and-New-Library--SGs-and-Top-Down-Learning-easari,1,602,99,75,51,26,18,2519
43 | E41,Software Engineering & Data Science | Machine Learning Interpretability | Open Source | Navdeep Gill,Navdeep Gill,Male,USA,USA,,Navdeep_Gill_,Industry,Herbal Tea,2020-02-05,Night,2020-02-20,3563,https://www.youtube.com/watch?v=9l8D8Ktlmpo,0,8900,125,1.40,199,324,20,222,10,0,1,3,https://anchor.fm/chaitimedatascience/episodes/Navdeep-Gill--Software-Engineering--Data-Science--Machine-Learning-Interpretability--Open-Source-eauhps,1,548,68,48,35,21,9,1565
44 | E42,"Statistics, Open Source & ML Research | Python for ML | Interview with Sebastian Raschka",Sebastian Raschka,Male,USA,Germany,,rasbt,Industry,Herbal Tea,2020-01-28,Morning,2020-02-23,4101,https://www.youtube.com/watch?v=beSLA-wO2T4,1,9478,275,2.90,577,852,61,258,39,1,1,40,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Sebastian-Raschka--Statistics--Open-Source--ML-Research--Python-for-ML-Book-eauidk,1,650,111,90,51,37,19,1891
45 | E43,Cyber-Security & Anti-Money Laundering | Applied AI & H2O AI | Interview with Dr. Ashrith Barthur,Ashrith Barthur,Male,USA,India,,cyberbaggage,Industry,Kesar Rose Chai,2020-02-21,Night,2020-02-27,2649,https://www.youtube.com/watch?v=pVhGyVr61ps,0,8400,126,1.50,434,560,42,270,10,0,1,6,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Dr--Ashrith-Barthur--Cyber-Security--Anti-Money-Laundering--Applied-AI--H2O-AI-eb1jti,1,469,25,14,16,22,8,1231
46 | E44,"Fast.ai, Learning to Learn | Machine Learning, Kaggle & Blogging | Interview with Radek Osmulski",Radek Osmulski,Male,Poland,Poland,radek1,radekosmulski,Industry,Herbal Tea,2020-02-20,Afternoon,2020-03-01,3936,https://www.youtube.com/watch?v=4h41v07bYYI,0,7607,260,3.42,588,848,122,518,49,2,5,24,https://anchor.fm/chaitimedatascience/episodes/Interview-with-fast-ai-hero-Radek-Osmulski--Fast-ai--Learning-to-Learn--Machine-Learning--Kaggle--Blogging-eav36v,1,657,113,88,55,30,25,3031
47 | E45,"Interview with Marios Michailidis | What does it take to become #1 on Kaggle | DSB 2019, 14th Pos Sol",Marios Michailidis,Male,UK,Greece,kazanova,stacknet_,Kaggle,Sulemani Chai,2020-02-14,Afternoon,2020-03-05,3276,https://www.youtube.com/watch?v=A3GvuHqGGZI,0,9700,213,2.20,545,758,62,294,13,0,3,10,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Marios-Michailidis--What-does-it-take-to-become-1-on-Kaggle--DSB-2019--14th-Pos-Sol-eb401g,2,470,49,32,32,13,1,230
48 | M0,00 Introduction & About: fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,624,https://www.youtube.com/watch?v=rzuIkj8lymc,2,3789,139,3.67,162,301,15,179,15,0,2,10,https://anchor.fm/chaitimedatascience/episodes/00-fast-ai-2019-Summaries--Things-Jeremy-Howard-says-to-do-eb97el,2,308,49,33,35,6,1,463
49 | M1,01: Lesson-1 Image Classification | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,341,https://www.youtube.com/watch?v=RKtfgXz7Qo0,2,4643,163,3.51,56,219,7,115,8,0,2,1,https://anchor.fm/chaitimedatascience/episodes/01-fast-ai-Lesson-1-Image-Classification--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97ek,2,368,37,32,29,10,1,504
50 | M2,02: Lesson-2 Production & SGD From Scratch | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,316,https://www.youtube.com/watch?v=ahdybq2V-38,2,3144,63,2.00,37,100,3,108,2,1,0,0,https://anchor.fm/chaitimedatascience/episodes/02-fast-ai-Lesson-2-Production--SGD-From-Scratch--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97eu,2,317,33,21,24,8,1,312
51 | M3,03: Lesson-3 Multi-label; SGD from scratch | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,332,https://www.youtube.com/watch?v=Z-waVKLcLJE,2,2436,52,2.13,28,80,3,135,2,0,0,0,https://anchor.fm/chaitimedatascience/episodes/03-fast-ai-Lesson-3-Multi-label-SGD-from-scratch--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97em,2,276,20,13,16,11,1,260
52 | M4,04: Lesson-4 NLP:Tabular Data; Recsys | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,281,https://www.youtube.com/watch?v=5CW3QdGdr8c,2,2592,40,1.54,23,63,2,114,3,0,0,1,https://anchor.fm/chaitimedatascience/episodes/04-fast-ai-Lesson-4-NLPTabular-Data-Recsys--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97f0,2,301,24,17,17,10,7,2547
53 | M5,05: Lesson 5: Backprop; Neural Nets from scratch | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,311,https://www.youtube.com/watch?v=RIGlXwvUo_Q,2,2536,26,1.03,11,37,1,97,0,0,0,0,https://anchor.fm/chaitimedatascience/episodes/05-fast-ai-Lesson-5-Backprop-Neural-Nets-from-scratch--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97f8,2,279,18,16,15,15,2,479
54 | M6,06: Lesson-6 CNN Deep Dive; Ethics | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,412,https://www.youtube.com/watch?v=nAE8tq_SIXo,2,3572,49,1.37,33,82,2,88,2,0,0,0,https://anchor.fm/chaitimedatascience/episodes/06-fast-ai-Lesson-6-CNN-Deep-Dive-Ethics--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97fn,2,275,27,13,17,11,2,515
55 | M7,07: Lesson-7 ResNet; U-Net; GANs | fast.ai 2019 & Things Jeremy Howard says to do,,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,467,https://www.youtube.com/watch?v=0eWG6apI1iY,2,2381,22,0.92,20,42,2,171,1,0,0,0,https://anchor.fm/chaitimedatascience/episodes/07-fast-ai-Lesson-7-ResNet-U-Net-GANs--fast-ai-2019--Things-Jeremy-Howard-says-to-do-eb97fq,2,281,19,14,17,9,1,288
56 | M8,"08: Where to go from here, General fast.ai advice",,,,,,,Other,Kesar Rose Chai,2020-02-26,Night,2020-03-07,605,https://www.youtube.com/watch?v=oOr-7hYaU8o,2,2133,33,1.55,11,44,2,164,1,0,0,0,https://anchor.fm/chaitimedatascience/episodes/08-Where-to-go-from-here--General-fast-ai-advice-eb97g2,2,376,26,17,22,8,1,301
57 | E46,Classical Japanese Lit & ML | Kuzushiji recog kaggle comp | Interview with Tarin Clanuwat,Tarin Clanuwat,Female,Japan,Japan,,tkasasagi,Research,Sulemani Chai,2020-02-03,Evening,2020-03-08,2162,https://www.youtube.com/watch?v=9E5JnTj8df0,1,4221,115,2.72,73,188,2,38,3,0,1,0,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Tarin-Clanuwat--Classical-Japanese-Literature--ML--Kuzushiji-recognition-kaggle-comp-eb4rh6,3,605,43,29,33,17,7,1472
58 | E47,"NFL 1st and Future: Analytics Winning Sol | ""Real World Data Sci"" & Kaggle | Interview with John Miller",John Miller,Male,USA,USA,jpmiller,johnmillertx,Kaggle,Herbal Tea,2020-02-03,Evening,2020-03-12,2936,https://www.youtube.com/watch?v=Ic__3zG-ab4,0,7400,96,1.30,141,237,19,295,13,0,0,1,https://anchor.fm/chaitimedatascience/episodes/Interview-w-John-Miller--NFL-1st-and-Future-Analytics-Winning-Sol--Real-World-Data-Sci--Kaggle-eb7p33,1,419,26,19,17,12,6,1652
59 | E48,Interview with ChristOf Henkel | Google Quest Q&A Labelling Comp 2nd Pos Sol | Rapids.ai & Kaggle,Christof Henkel,Male,Germany,Germany,christofhenkel,kagglingdieter,Kaggle,Ginger Chai,2020-02-12,Afternoon,2020-03-15,3180,https://www.youtube.com/watch?v=Q0_Xajic_9U,0,5196,162,3.12,731,893,88,355,32,1,2,23,https://anchor.fm/chaitimedatascience/episodes/Interview-with-ChristOf-Henkel--Google-Quest-QA-Labelling-Comp-2nd-Pos-Sol--Rapids-ai--Kaggle-eb4rpr,1,461,48,36,24,12,7,2119
60 | E49,Interview with Parul Pandey | Getting Started with Data Science & Blogging | Women in Data Science,Parul Pandey,Female,India,India,parulpandey,pandeyparul,Industry,Ginger Chai,2020-02-26,Morning,2020-03-19,3570,https://www.youtube.com/watch?v=DjBgB_fNXl0,0,17600,722,4.10,1439,2161,116,193,54,5,12,66,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Parul-Pandey--Getting-Started-with-Data-Science--Blogging--Women-in-Data-Science-eb4tr8,1,472,38,28,28,8,2,767
61 | E50,Inversion: Walter Reade | Data Science at Kaggle | Becoming a Data Scientist & Kaggle Grandmaster,Walter Reade,Male,USA,USA,inversion,walterreade,Kaggle,Ginger Chai,2020-02-03,Night,2020-03-22,2746,https://www.youtube.com/watch?v=OoB_LQpgDCk,0,5273,95,1.80,107,202,26,463,11,0,2,3,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Inversion-Walter-Reade--Data-Science-at-Kaggle--Becoming-a-Data-Scientist--Kaggle-Grandmaster-eb7k60,1,406,29,20,17,10,5,1799
62 | E51,"Interview with Sergey Kolesnikov | Catalyst: PyTorch Framework for DL & RL | Open Source, Soft. Engg",Sergey Kolesnikov,Male,Russia,Russia,scitator,scitator,Industry,Herbal Tea,2020-02-10,Morning,2020-03-26,4489,https://www.youtube.com/watch?v=1g6BpItJdJA,0,4856,140,2.88,303,443,22,179,22,2,0,8,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Sergey-Kolesnikov--Catalyst-PyTorch-Framework-for-DL--RL--Open-Source--Soft--Engg--Community-eb5c3j,1,426,20,12,15,9,4,1532
63 | E52,"Interview with Russ Wolfinger | Statistics, Data Science & Kaggle | NFL Big Data Bowl #14 Pos Sol",Russ Wolfinger,Male,USA,USA,sasrdw,,Kaggle,Ginger Chai,2020-02-10,Night,2020-03-29,4462,https://www.youtube.com/watch?v=akYeBUTXmT4,0,5882,99,1.68,286,385,27,252,18,0,1,9,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Russ-Wolfinger--Statistics--Data-Science--Kaggle--NFL-Big-Data-Bowl-14-Pos-Sol-eb5cnu,1,438,38,27,27,5,4,2849
64 | E53,Interview with Erin LeDell | H2O-AutoML & H2O.ai | Open Source | RLadies & WiMLDS Community,Erin LeDell,Female,USA,USA,ledell,ledell,Industry,Ginger Chai,2020-02-13,Night,2020-04-02,3428,https://www.youtube.com/watch?v=i2K42HNAoFM,0,7700,154,2.00,290,444,46,371,4,1,1,3,https://anchor.fm/chaitimedatascience/episodes/Interview-w-Erin-LeDell--H2O-AutoML--H2O-ai--Open-Source--RLadies--WiMLDS-Community-eb5irc,1,392,30,20,17,0,0,0
65 | E54,Interview with Sylvain Gugger | fast.ai: The new Framework & course | FastBook & Research at fast.ai,Sylvain Gugger,Male,USA,France,,guggersylvain,Industry,Herbal Tea,2020-02-13,Night,2020-04-05,2043,https://www.youtube.com/watch?v=-3fw9hxiop0,0,5417,175,3.23,522,697,68,351,35,1,3,15,https://anchor.fm/chaitimedatascience/episodes/Interview-w-Sylvain-Gugger--fast-ai-The-new-Framework--course--FastBook--Research-at-fast-ai-eb5a6a,1,510,49,37,29,17,5,1077
66 | E55,"SharpestMinds Team on Learning to Learn | Data Science, Startups & Hiring",Edouard Harris | Jeremie Harris | Russell Pollari,Male,Canada,Canada,,neutronsNeurons | russ_poll | jeremiecharris,Industry,Herbal Tea,2020-02-07,Morning,2020-04-09,4032,https://www.youtube.com/watch?v=vaWOS9GHB9c,1,5096,145,2.85,237,382,53,499,26,0,5,19,https://anchor.fm/chaitimedatascience/episodes/SharpestMinds-Team-on-Learning-to-Learn--Data-Science--Startups--Hiring-eb61lb,1,473,54,39,36,11,7,2284
67 | E56,"Interview with Dmytro Mushkin | Computer Vision Research | Kaggle, ML & Education",Dmytro Mushkin,Male,Czech Republic,Ukraine,oldufo,ducha_aiki,Kaggle,Herbal Tea,2020-02-10,Afternoon,2020-04-12,3164,https://www.youtube.com/watch?v=lWwkbiufwNE,1,5321,99,1.86,143,242,18,268,16,0,3,2,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Dmytro-Mushkin--Computer-Vision-Research--Kaggle--ML--Education-eb40j2,1,443,39,32,21,8,4,1585
68 | E57,"Interview with Mark Landry | Data Science, Kaggle, H2O.ai | AutoML",Mark Landry,Male,USA,USA,mlandry,mark_a_landry,Kaggle,Ginger Chai,2020-02-21,Morning,2020-04-16,3833,https://www.youtube.com/watch?v=kBcVi4p-ruY,1,9700,194,2.00,214,408,41,358,5,1,0,11,https://anchor.fm/chaitimedatascience/episodes/Interview-w-Mark-Landry--Data-Science--Kaggle--H2O-ai--AutoML-eb63se,1,397,33,26,21,7,7,3366
69 | E58,CEO of Decision.ai: Dan Becker | What does it take to become a Data Scientist? | Kaggle Learn,Dan Becker,Male,USA,USA,dansbecker,dan_s_becker,Industry,Sulemani Chai,2020-02-11,Night,2020-04-19,3375,https://www.youtube.com/watch?v=eEYvgsUeEgw,0,4510,143,3.17,336,479,49,368,24,0,4,17,https://anchor.fm/chaitimedatascience/episodes/CEO-of-Decision-ai-Dan-Becker--What-does-it-take-to-become-a-Data-Scientist---Kaggle-Learn--Data-Science-Portfolio-eb631q,1,464,58,42,31,15,11,2564
70 | E59,Suzana Ilić | Democratising AI with Communities | Machine Learning Tokyo | Inclusivity in AI,Suzana Illić,Female,Japan,Austria,,suzatweet,Research,Masala Chai,2020-02-12,Morning,2020-04-23,2192,https://www.youtube.com/watch?v=TzgHNJN8D3I,1,4237,126,2.97,218,344,24,251,14,0,2,4,https://anchor.fm/chaitimedatascience/episodes/Suzana-Ili--Democratising-AI-w-Communities--Machine-Learning-Tokyo--Inclusivity-in-AI-eb58p3,1,361,33,21,21,8,2,1033
71 | E60,"Interview with Ines Montani | Spacy, NLP & Open Source Frameworks | Explosion.ai, Thinc.ai & Prodi.gy",Ines Montani,Female,Germany,Germany,,_inesmontani,Industry,Ginger Chai,2020-02-24,Night,2020-04-26,3119,https://www.youtube.com/watch?v=C5DGFSDlMBM,3,6810,340,4.99,822,1162,82,254,31,0,3,30,https://anchor.fm/chaitimedatascience/episodes/Interview-w-Ines-Montani--Spacy--NLP--Open-Source-Frameworks--Explosion-ai--Thinc-ai--Prodi-gy-ed2fvt,1,417,47,31,19,17,8,1751
72 | E61,Daniel Bourke | Learning to Learn | Creating AI Content | Fitness & Machine Learning,Daniel Bourke,Male,Australia,Australia,,mrdbourke,Industry,Sulemani Chai,2020-02-20,Night,2020-04-30,5597,https://www.youtube.com/watch?v=r5_SuLF5UWY,3,4464,143,3.20,197,340,50,529,27,0,4,12,https://anchor.fm/chaitimedatascience/episodes/Daniel-Bourke--Learning-to-Learn--Creating-AI-Content--Fitness--Machine-Learning-eb65ap,1,437,53,43,27,13,9,2505
73 | E62,"Pablo Samuel Castro | ML Research, Google Brain & Creative AI | Learning ML with the community | LatinX",Pablo Samuel Castro,Male,Canada,Ecuador,,pcastr,Research,Sulemani Chai,2020-02-20,Night,2020-05-03,3560,https://www.youtube.com/watch?v=muiM5SQxTIA,3,4585,75,1.64,136,211,22,375,17,0,3,4,https://anchor.fm/chaitimedatascience/episodes/Pablo-Samuel-Castro--ML-Research--Google-Brain--Creative-AI--Learning-ML-w-the-community--LatinX-eb7kfp,1,405,43,36,24,15,6,1525
74 | E63,"Robert Bracco | Learning to Learn | Approaching Fast.ai Materials, Kaggle & Blogging",Robert Bracco,Male,USA,USA,madeupmasters,madeupmasters,Industry,Kesar Rose Chai,2020-02-14,Night,2020-05-07,7103,https://www.youtube.com/watch?v=CYYvQ-5V3xA,3,6163,180,2.92,196,376,61,584,12,1,6,2,https://anchor.fm/chaitimedatascience/episodes/Robert-Bracco--Learning-to-Learn--Approaching-Fast-ai-Materials--Kaggle--Blogging-eb7rqd,1,472,79,55,38,12,12,3562
75 | E64,"Hamel Husain | Fastpages, Open Source | ML at Github | fastai",Hamel Husain,Male,USA,USA,hamelhusain,HamelHusain,Industry,Ginger Chai,2020-02-25,Night,2020-05-10,2976,https://www.youtube.com/watch?v=-pYMXSThpvc,3,4350,97,2.23,169,266,28,379,12,0,5,7,https://anchor.fm/chaitimedatascience/episodes/Hamel-Husain--Fastpages--Open-Source--ML-at-Github--fastai-eds2e6,1,426,36,24,21,13,8,2164
76 | E65,Dmitry Danevskiy | Google Quest Q&A Labelling Comp: Winning Sol | Becoming Kaggle Grandmaster,Dmitry Danevskiy,Male,Ukraine,Ukraine,ddanevskyi,DanevskiyD,Kaggle,Masala Chai,2020-03-03,Morning,2020-05-14,2019,https://www.youtube.com/watch?v=pQL892iT-dM,3,4362,117,2.68,230,347,22,228,22,0,1,4,https://anchor.fm/chaitimedatascience/episodes/Dmitry-Danevskiy--Google-Quest-QA-Labelling-Comp-Winning-Sol--Becoming-Kaggle-Grandmaster-eb7jtv,1,358,24,15,18,9,3,1326
77 | E66,Goku Mohandas | MadeWithML | AI Research | Healthcare | Education,Goku Mohandas,Male,USA,USA,,GokuMohandas,Industry,Paan Rose Green Tea,2020-05-06,Night,2020-05-17,5734,https://www.youtube.com/watch?v=VqysJmIqko8,3,9903,203,2.05,326,529,69,470,28,0,4,16,https://anchor.fm/chaitimedatascience/episodes/Goku-Mohandas--MadeWithML--AI-Research--Healthcare--Education-ee609r,1,402,51,36,24,15,15,3494
78 | E67,"Eli Stevens, Luca Antiga, and Thomas Viehmann | Deep Learning with PyTorch",Eli Stevens | Luca Antiga | Thomas Viehmann,Male,USA | Italy | Germany,USA | Italy | Germany,,eli0stevens | lantiga | thomasviehmann,Industry,Paan Rose Green Tea,2020-05-01,Night,2020-05-21,4667,https://www.youtube.com/watch?v=f5Qv3eSZpug,3,4503,144,3.20,407,551,56,366,29,0,1,12,https://anchor.fm/chaitimedatascience/episodes/Eli-Stevens--Luca-Antiga--and-Thomas-Viehmann--Deep-Learning-with-PyTorch-eec4qk,1,403,24,18,16,19,11,2156
79 | E68,Emmanuel Ameisen | Building Machine Learning Powered Apps,Emmanuel Ameisen,Male,USA,USA,,mlpowered,Industry,Masala Chai,2020-05-20,Morning,2020-05-24,3491,https://www.youtube.com/watch?v=ctss0hcD9SE,3,4734,137,2.89,191,328,35,384,21,1,2,4,https://anchor.fm/chaitimedatascience/episodes/Emmanuel-Ameisen--Building-Machine-Learning-Powered-Apps-eegaf3,3,418,54,42,28,21,10,1704
80 | E69,Birthday Special AMA: Answering Questions from my ML Heroes | CTDS.News Launch,,,,,,,Other,Masala Chai,2020-05-27,Morning,2020-05-27,3984,https://www.youtube.com/watch?v=hyJhwWshfbY,3,3698,163,4.41,338,501,55,395,36,1,3,15,https://anchor.fm/chaitimedatascience/episodes/Birthday-Special-AMA-Answering-Questions-from-my-ML-Heroes--CTDS-News-Launch-eekt01,3,342,24,16,16,17,9,1992
81 | E70,"Interview with Yauhen Babakhin | Kaggle, Computer Vision and AutoML",Yauhen Babakhin,Male,Belarus,Belarus,ybabakhin,,Kaggle,Paan Rose Green Tea,2020-05-28,Afternoon,2020-05-31,3952,https://www.youtube.com/watch?v=n_IUOeiKwnE,3,5200,99,2,201,300,14,168,5,0,2,4,https://anchor.fm/chaitimedatascience/episodes/Interview-with-Yauhen-Babakhin--Kaggle--Computer-Vision-and-AutoML--CTDS-Show-eeqdom,3,372,32,20,15,14,9,2229
82 | E71,"Martin Henze, Heads Or Tails, First Kaggle Kernel GM | Astronomy | Story-Telling with Data",Martin Henze,Male,USA,USA,headsortails,heads0rtai1s,Kaggle,Paan Rose Green Tea,2020-05-24,Night,2020-06-04,4176,https://www.youtube.com/watch?v=2dpaSTWdhSk&list=PLLvvXm0q8zUbiNdoIazGzlENMXvZ9bd3x,3,3670,95,2.59,124,219,22,362,12,0,0,2,https://anchor.fm/chaitimedatascience/episodes/Martin-Henze--Heads-Or-Tails--First-Kaggle-Kernel-GM--Astronomy--Story-Telling-with-Data-eet4j9,3,359,33,25,18,10,5,1960
83 | E72,Andreas Mueller | Scikit-Learn | ML and Open Source,Andreas Mueller,Male,USA,Germany,amuellerml,amuellerml,Industry,Paan Rose Green Tea,2020-05-25,Night,2020-06-07,3997,https://www.youtube.com/watch?v=iNZd_5T8tCI&list=PLLvvXm0q8zUbiNdoIazGzlENMXvZ9bd3x,3,3480,142,4.08,291,433,40,333,21,0,2,9,https://anchor.fm/chaitimedatascience/episodes/Andreas-Mueller--Scikit-Learn--ML-and-Open-Source--CTDS-Show-72-eet4j1,3,461,54,39,28,18,11,2147
84 | E73,"Maximilian Jeblick | Physics, Math and Data Science | Kaggle and H2O.ai",Maximilian Jeblick,Male,Germany,Germany,maxjeblick,,Kaggle,Paan Rose Green Tea,2020-06-11,Afternoon,2020-06-11,2372,https://www.youtube.com/watch?v=VeM1T7UaYTk,3,3200,54,2,86,140,9,231,5,0,1,1,https://anchor.fm/chaitimedatascience/episodes/Maximilian-Jeblick--Physics--Math-and-Data-Science--Kaggle-and-H2O-ai--CTDS-Show-73-ef9jjk,3,327,18,16,13,11,4,1174
85 | E74,"Dmitry Larko | H2O.ai | Kaggle, Applying Kaggle to Real world | AutoML",Dmitry Larko,Male,USA,Russia,dmitrylarko,DmitryLarko,Kaggle,Masala Chai,2020-06-05,Night,2020-06-14,4031,https://www.youtube.com/watch?v=aC9t9D7HpYE,3,4200,118,3,194,312,29,335,11,0,0,5,https://anchor.fm/chaitimedatascience/episodes/Dmitry-Larko--H2O-ai--Kaggle--Applying-Kaggle-to-Real-world--AutoML--CTDS-Show-74-efdhmt,3,352,35,26,22,13,7,1958
86 | E75,Rachel Thomas | Fast.ai | Applied Ethics | Top Down Learning,Rachel Thomas ,Female,USA,USA,,math_rachel,Industry,Masala Chai,2020-06-16,Night,2020-06-18,2214,https://www.youtube.com/watch?v=tq_XcFubgKo&list=PLLvvXm0q8zUbiNdoIazGzlENMXvZ9bd3x,3,1931,115,5.96,164,279,23,297,20,0,1,3,https://anchor.fm/chaitimedatascience/episodes/Rachel-Thomas--Fast-ai--Applied-Ethics--Top-Down-Learning--CTDS-Show-75-efjj5d,3,247,17,10,13,,,
--------------------------------------------------------------------------------
/notebooks/Generate Training data NER.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "%load_ext autoreload\n",
10 | "%autoreload 2"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "import sys\n",
20 | "sys.path.insert(0,'../scripts')\n",
21 | "from generate_training_data import TrainDataGenerator"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 3,
27 | "metadata": {},
28 | "outputs": [
29 | {
30 | "name": "stdout",
31 | "output_type": "stream",
32 | "text": [
33 | "**********\n",
34 | "51 templates loaded\n",
35 | "**********\n"
36 | ]
37 | }
38 | ],
39 | "source": [
40 | "tdg = TrainDataGenerator(mode=\"ner\")"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 4,
46 | "metadata": {},
47 | "outputs": [
48 | {
49 | "name": "stderr",
50 | "output_type": "stream",
51 | "text": [
52 | "100%|██████████| 10/10 [00:00<00:00, 1203.74it/s]\n"
53 | ]
54 | },
55 | {
56 | "data": {
57 | "text/plain": [
58 | "[('pie chart of yrlw_ot column of zzz grouped by dz_k column',\n",
59 | " {'entities': [(13, 20, 'COLNAME'),\n",
60 | " (31, 34, 'VARNAME'),\n",
61 | " (46, 50, 'COLNAME')]}),\n",
62 | " ('plot fqi_hczeud and vakdbgvkxfgomzbgg of df in a bar plot',\n",
63 | " {'entities': [(5, 15, 'COLNAME'),\n",
64 | " (20, 37, 'COLNAME'),\n",
65 | " (41, 43, 'VARNAME')]}),\n",
66 | " ('scatter plot of wzvapheuza and jsbefivugeekdv in tempdf',\n",
67 | " {'entities': [(16, 26, 'COLNAME'),\n",
68 | " (31, 45, 'COLNAME'),\n",
69 | " (49, 55, 'VARNAME')]}),\n",
70 | " ('plot histogram of dhqwz_ydyfw in df',\n",
71 | " {'entities': [(18, 29, 'COLNAME'), (33, 35, 'VARNAME')]}),\n",
72 | " ('find min,minimum of csgc,hlbrbnql group by wuwgxhrsgvrcqaezot from mydf',\n",
73 | " {'entities': [(5, 8, 'FUNCTION'),\n",
74 | " (9, 16, 'FUNCTION'),\n",
75 | " (20, 24, 'COLNAME'),\n",
76 | " (25, 33, 'COLNAME'),\n",
77 | " (43, 61, 'COLNAME'),\n",
78 | " (67, 71, 'VARNAME')]}),\n",
79 | " ('import plotly', {'entities': [(7, 13, 'LIBNAME')]}),\n",
80 | " ('load test.csv', {'entities': [(5, 13, 'FNAME')]}),\n",
81 | " ('print df head', {'entities': [(6, 8, 'VARNAME')]}),\n",
82 | " ('describe df', {'entities': [(9, 11, 'VARNAME')]}),\n",
83 | " ('pie chart of oqayvfaukwgykiomp column of tempdf grouped by txtnmhqhfqts column',\n",
84 | " {'entities': [(13, 30, 'COLNAME'),\n",
85 | " (41, 47, 'VARNAME'),\n",
86 | " (59, 71, 'COLNAME')]})]"
87 | ]
88 | },
89 | "execution_count": 4,
90 | "metadata": {},
91 | "output_type": "execute_result"
92 | }
93 | ],
94 | "source": [
95 | "tdg.generate_training_rows(n_rows=10)"
96 | ]
97 | }
98 | ],
99 | "metadata": {
100 | "kernelspec": {
101 | "display_name": "Python 3",
102 | "language": "python",
103 | "name": "python3"
104 | },
105 | "language_info": {
106 | "codemirror_mode": {
107 | "name": "ipython",
108 | "version": 3
109 | },
110 | "file_extension": ".py",
111 | "mimetype": "text/x-python",
112 | "name": "python",
113 | "nbconvert_exporter": "python",
114 | "pygments_lexer": "ipython3",
115 | "version": "3.6.9"
116 | }
117 | },
118 | "nbformat": 4,
119 | "nbformat_minor": 4
120 | }
121 |
--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
1 | # `Scripts` dir - A Walk-through
2 |
3 | ### Note: Make sure to run all the following commands at `/scirpts` level in your terminal
4 |
5 | ### Preferred Tools:
6 | - Python version: 3.7
7 | - Python environment: conda
8 | - Python package installer: pip
9 |
10 | ## Processing awesome-notebooks
11 |
12 | To start off, run the following command to download the awesome-notebooks repo into the `input/` sub-directory
13 | ```
14 | git clone https://github.com/jupyter-naas/awesome-notebooks.git ./input/
15 | ```
16 |
17 | Then, in order to extract the tasks and code and create a `.pkl` out of it, run the following command
18 | ```
19 | python3 process_awesome_notebooks.py create_pkl_file
20 | ```
21 |
22 | and in order to create the faiss's FlatIndex using the Embeddings from the `.pkl` file, run
23 | ```
24 | python3 process_awesome_notebooks.py create_faiss_index
25 | ```
26 |
27 | and in order to get an intent, run the following command where `` is your query and `` is the number of nearest neighbours from your query's embedding
28 | ```
29 | python3 process_awesome_notebooks.py get_intent
30 | ```
31 |
32 | To evaluate the outputs of both tensorflow_hub and sentence_transformers embeddings, run the following command to create 2 `.csv` files, one for each type of encoder
33 | ```
34 | python3 process_awesome_notebooks.py eval_models
35 | ```
36 | To get speed benchmarks for each encoder over a fixed number of repetitions, run:
37 | ```
38 | python3 process_awesome_notebooks.py get_benchmark_data
39 | ```
40 |
41 | ## Training NER model using spaCy v3
42 |
43 | To generate training or validation data:
44 | ```
45 | python3 generate_training_data.py
46 | ```
47 |
48 | To convert the `.json` files to `.spacy` objects:
49 | ```
50 | python3 train_spacy3_ner.py
51 | ```
52 |
53 | To create the default config file:
54 | ```
55 | python3 train_spacy3_ner.py create_default_config_file
56 | ```
57 |
58 | To train the NER model:
59 | ```
60 | python3 train_spacy3_ner.py train_model
61 | ```
62 |
63 | You now have a trained NER Model!
--------------------------------------------------------------------------------
/scripts/config.cfg:
--------------------------------------------------------------------------------
1 | [paths]
2 | train = null
3 | dev = null
4 | vectors = "en_core_web_sm"
5 | init_tok2vec = null
6 |
7 | [system]
8 | gpu_allocator = null
9 | seed = 0
10 |
11 | [nlp]
12 | lang = "en"
13 | pipeline = ["tok2vec","ner"]
14 | batch_size = 1000
15 | disabled = []
16 | before_creation = null
17 | after_creation = null
18 | after_pipeline_creation = null
19 | tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20 |
21 | [components]
22 |
23 | [components.ner]
24 | factory = "ner"
25 | incorrect_spans_key = null
26 | moves = null
27 | scorer = {"@scorers":"spacy.ner_scorer.v1"}
28 | update_with_oracle_cut_size = 100
29 |
30 | [components.ner.model]
31 | @architectures = "spacy.TransitionBasedParser.v2"
32 | state_type = "ner"
33 | extra_state_tokens = false
34 | hidden_width = 64
35 | maxout_pieces = 2
36 | use_upper = true
37 | nO = null
38 |
39 | [components.ner.model.tok2vec]
40 | @architectures = "spacy.Tok2VecListener.v1"
41 | width = ${components.tok2vec.model.encode.width}
42 | upstream = "*"
43 |
44 | [components.tok2vec]
45 | factory = "tok2vec"
46 |
47 | [components.tok2vec.model]
48 | @architectures = "spacy.Tok2Vec.v2"
49 |
50 | [components.tok2vec.model.embed]
51 | @architectures = "spacy.MultiHashEmbed.v2"
52 | width = ${components.tok2vec.model.encode.width}
53 | attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
54 | rows = [5000,2500,2500,2500]
55 | include_static_vectors = true
56 |
57 | [components.tok2vec.model.encode]
58 | @architectures = "spacy.MaxoutWindowEncoder.v2"
59 | width = 256
60 | depth = 8
61 | window_size = 1
62 | maxout_pieces = 3
63 |
64 | [corpora]
65 |
66 | [corpora.dev]
67 | @readers = "spacy.Corpus.v1"
68 | path = ${paths.dev}
69 | max_length = 0
70 | gold_preproc = false
71 | limit = 0
72 | augmenter = null
73 |
74 | [corpora.train]
75 | @readers = "spacy.Corpus.v1"
76 | path = ${paths.train}
77 | max_length = 0
78 | gold_preproc = false
79 | limit = 0
80 | augmenter = null
81 |
82 | [training]
83 | dev_corpus = "corpora.dev"
84 | train_corpus = "corpora.train"
85 | seed = ${system.seed}
86 | gpu_allocator = ${system.gpu_allocator}
87 | dropout = 0.1
88 | accumulate_gradient = 1
89 | patience = 1600
90 | max_epochs = 0
91 | max_steps = 20000
92 | eval_frequency = 200
93 | frozen_components = []
94 | annotating_components = []
95 | before_to_disk = null
96 |
97 | [training.batcher]
98 | @batchers = "spacy.batch_by_words.v1"
99 | discard_oversize = false
100 | tolerance = 0.2
101 | get_length = null
102 |
103 | [training.batcher.size]
104 | @schedules = "compounding.v1"
105 | start = 100
106 | stop = 1000
107 | compound = 1.001
108 | t = 0.0
109 |
110 | [training.logger]
111 | @loggers = "spacy.ConsoleLogger.v1"
112 | progress_bar = false
113 |
114 | [training.optimizer]
115 | @optimizers = "Adam.v1"
116 | beta1 = 0.9
117 | beta2 = 0.999
118 | L2_is_weight_decay = true
119 | L2 = 0.01
120 | grad_clip = 1.0
121 | use_averages = false
122 | eps = 0.00000001
123 | learn_rate = 0.001
124 |
125 | [training.score_weights]
126 | ents_f = 1.0
127 | ents_p = 0.0
128 | ents_r = 0.0
129 | ents_per_type = null
130 |
131 | [pretraining]
132 |
133 | [initialize]
134 | vectors = ${paths.vectors}
135 | init_tok2vec = ${paths.init_tok2vec}
136 | vocab_data = null
137 | lookups = null
138 | before_init = null
139 | after_init = null
140 |
141 | [initialize.components]
142 |
143 | [initialize.tokenizer]
--------------------------------------------------------------------------------
/scripts/create_intent_index.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import faiss
4 | import numpy as np
5 | import pandas as pd
6 | from sentence_transformers import SentenceTransformer
7 |
8 | model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
9 | backend_dir = "../jupyter_text2code/jupyter_text2code_serverextension"
10 |
11 |
12 | def _get_embedding(command):
13 | command = re.sub('[^A-Za-z0-9 ]+', '', command).lower()
14 | return list(np.array(model.encode([command])[0]))
15 |
16 |
17 | # Make intent df
18 | jt2c = pd.read_csv(f'{backend_dir}/data/generated_intents.csv')
19 | naas = pd.read_csv(f'{backend_dir}/data/awesome-notebooks.csv')[['intent_id', 'task', 'st_embedding']]
20 | naas.columns = ['intent_id', 'intent', 'embedding']
21 |
22 | jt2c['embedding'] = jt2c['intent'].apply(_get_embedding)
23 | naas['embedding'] = naas['intent'].apply(_get_embedding)
24 | jt2c = jt2c[['intent_id', 'intent', 'embedding']]
25 |
26 | intent_df = pd.concat([jt2c, naas], axis=0)
27 | intent_df.to_csv('testing.csv', index=False)
28 |
29 | for x, y in zip(intent_df["intent_id"].values, intent_df["embedding"].values):
30 | if len(y) != 384:
31 | print(x)
32 |
33 | # Make faiss index
34 | db_ids = intent_df['intent_id'].values
35 | db_vectors = np.stack(intent_df["embedding"].values).astype(np.float32)
36 | faiss.normalize_L2(db_vectors)
37 | intent_index = faiss.IndexIDMap(faiss.IndexFlatIP(384))
38 | intent_index.add_with_ids(db_vectors, db_ids)
39 | faiss.write_index(intent_index, f"{backend_dir}/models/intent_index.idx")
40 |
--------------------------------------------------------------------------------
/scripts/create_lookup_file.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | backend_dir = "../jupyter_text2code/jupyter_text2code_serverextension"
4 |
5 | jt2c = pd.read_csv(f'{backend_dir}/data/ner_templates.csv')[['intent_id', 'template', 'code']]
6 | jt2c.columns = ['intent_id', 'task', 'code']
7 | naas = pd.read_csv(f'{backend_dir}/data/awesome-notebooks.csv')[['intent_id', 'task', 'code']]
8 |
9 | lookup_df = pd.concat([jt2c, naas], axis=0)
10 | lookup_df.columns = ['intent_id', 'intent', 'code']
11 | lookup_df = lookup_df.drop_duplicates('intent_id')
12 | lookup_df.to_csv(f'{backend_dir}/data/intent_lookup.csv', index=False)
13 |
14 |
15 |
--------------------------------------------------------------------------------
/scripts/data/awesome-notebooks.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/scripts/data/awesome-notebooks.pkl
--------------------------------------------------------------------------------
/scripts/data/st_naas_intent_index.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/scripts/data/st_naas_intent_index.idx
--------------------------------------------------------------------------------
/scripts/data/tf_naas_intent_index.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepklarity/jupyter-text2code/f8f01f4ed3eee935f3ebbd696c9fdbd743cbecc1/scripts/data/tf_naas_intent_index.idx
--------------------------------------------------------------------------------
/scripts/eval_models_performance.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "pycharm": {
8 | "name": "#%%\n"
9 | }
10 | },
11 | "outputs": [],
12 | "source": [
13 | "import pandas as pd\n",
14 | "import matplotlib.pyplot as plt"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "metadata": {
21 | "pycharm": {
22 | "name": "#%%\n"
23 | }
24 | },
25 | "outputs": [],
26 | "source": [
27 | "tf_data = pd.read_csv('output/tf_eval_df.csv')\n",
28 | "st_data = pd.read_csv('output/st_eval_df.csv')"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 3,
34 | "metadata": {
35 | "pycharm": {
36 | "name": "#%%\n"
37 | }
38 | },
39 | "outputs": [
40 | {
41 | "name": "stdout",
42 | "output_type": "stream",
43 | "text": [
44 | "\n",
45 | "RangeIndex: 352 entries, 0 to 351\n",
46 | "Data columns (total 9 columns):\n",
47 | " # Column Non-Null Count Dtype \n",
48 | "--- ------ -------------- ----- \n",
49 | " 0 category 352 non-null object\n",
50 | " 1 intent_id 352 non-null int64 \n",
51 | " 2 task 352 non-null object\n",
52 | " 3 tf_matched_intent_id 352 non-null int64 \n",
53 | " 4 tf_matched_intent_text 352 non-null object\n",
54 | " 5 tf_is_intent_matched 352 non-null bool \n",
55 | " 6 tf_matched_intent_id_shuffled 352 non-null int64 \n",
56 | " 7 tf_matched_intent_text_shuffled 352 non-null object\n",
57 | " 8 tf_is_intent_matched_shuffled 352 non-null bool \n",
58 | "dtypes: bool(2), int64(3), object(4)\n",
59 | "memory usage: 20.1+ KB\n"
60 | ]
61 | }
62 | ],
63 | "source": [
64 | "tf_data.info()"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 4,
70 | "metadata": {
71 | "pycharm": {
72 | "name": "#%%\n"
73 | }
74 | },
75 | "outputs": [
76 | {
77 | "name": "stdout",
78 | "output_type": "stream",
79 | "text": [
80 | "\n",
81 | "RangeIndex: 352 entries, 0 to 351\n",
82 | "Data columns (total 9 columns):\n",
83 | " # Column Non-Null Count Dtype \n",
84 | "--- ------ -------------- ----- \n",
85 | " 0 category 352 non-null object\n",
86 | " 1 intent_id 352 non-null int64 \n",
87 | " 2 task 352 non-null object\n",
88 | " 3 st_matched_intent_id 352 non-null int64 \n",
89 | " 4 st_matched_intent_text 352 non-null object\n",
90 | " 5 st_is_intent_matched 352 non-null bool \n",
91 | " 6 st_matched_intent_id_shuffled 352 non-null int64 \n",
92 | " 7 st_matched_intent_text_shuffled 352 non-null object\n",
93 | " 8 st_is_intent_matched_shuffled 352 non-null bool \n",
94 | "dtypes: bool(2), int64(3), object(4)\n",
95 | "memory usage: 20.1+ KB\n"
96 | ]
97 | }
98 | ],
99 | "source": [
100 | "st_data.info()"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 5,
106 | "metadata": {
107 | "pycharm": {
108 | "name": "#%%\n"
109 | }
110 | },
111 | "outputs": [
112 | {
113 | "data": {
114 | "text/plain": [
115 | "True 352\n",
116 | "Name: tf_is_intent_matched, dtype: int64"
117 | ]
118 | },
119 | "execution_count": 5,
120 | "metadata": {},
121 | "output_type": "execute_result"
122 | }
123 | ],
124 | "source": [
125 | "tf_data['tf_is_intent_matched'].value_counts()"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 6,
131 | "metadata": {
132 | "pycharm": {
133 | "name": "#%%\n"
134 | }
135 | },
136 | "outputs": [
137 | {
138 | "data": {
139 | "text/plain": [
140 | "True 352\n",
141 | "Name: st_is_intent_matched, dtype: int64"
142 | ]
143 | },
144 | "execution_count": 6,
145 | "metadata": {},
146 | "output_type": "execute_result"
147 | }
148 | ],
149 | "source": [
150 | "st_data['st_is_intent_matched'].value_counts()"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": 7,
156 | "metadata": {
157 | "pycharm": {
158 | "name": "#%%\n"
159 | }
160 | },
161 | "outputs": [
162 | {
163 | "data": {
164 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEDCAYAAADOc0QpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAO+ElEQVR4nO3dX2ydd33H8feHpCtoRSJVnSokYYlQ0EiYSJEXkLhhLVozdpFy0SmVhnJRKb1IJdDQpJaLAReemMQfadJaKYiKaGJkmQA1AraRRSBUCTW4XShNQ4RFu8YkSsw/QXeRLel3F36iHOxj+9jHp15+vF+SdZ7nd57n+Oubd44eP8dJVSFJasvr1noASdLqM+6S1CDjLkkNMu6S1CDjLkkNWr/WAwDccccdtW3btrUeQ5JuKs8888zPqmqs33P/L+K+bds2Jicn13oMSbqpJPmvhZ7zsowkNci4S1KDjLskNci4S1KDjLskNci4S1KDjLskNWjJuCd5fZJTSX6Q5EyST3brn0jy0ySnu68P9JzzaJKpJOeS3DvKH0CSNN8gH2K6AtxdVa8kuQV4Ksm/ds99rqo+3Xtwkp3AfmAX8GbgP5K8raqurebgkqSFLRn3mv3fPF7pdm/pvhb7Hz72AUer6grwYpIpYA/wvSFn1YC2PfKNtR5B6uulT/35Wo/wO2Oga+5J1iU5DVwGTlTV091TDyd5LskTSTZ0a5uB8z2nT3drc1/zYJLJJJMzMzMr/wkkSfMMFPequlZVu4EtwJ4k7wAeB94K7AYuAp/pDk+/l+jzmoeraryqxsfG+v7dG0nSCi3rbpmq+hXwHWBvVV3qov8q8HlmL73A7Dv1rT2nbQEuDD+qJGlQg9wtM5bkTd32G4D3Az9KsqnnsA8Cz3fbx4H9SW5Nsh3YAZxa1aklSYsa5G6ZTcCRJOuY/cfgWFV9Pck/JtnN7CWXl4CHAKrqTJJjwAvAVeCQd8pI0mtrkLtlngPu6rP+oUXOmQAmhhtNkrRSfkJVkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQUvGPcnrk5xK8oMkZ5J8slu/PcmJJD/uHjf0nPNokqkk55LcO8ofQJI03yDv3K8Ad1fVO4HdwN4k7wEeAU5W1Q7gZLdPkp3AfmAXsBd4LMm6EcwuSVrAknGvWa90u7d0XwXsA45060eA+7rtfcDRqrpSVS8CU8Ce1RxakrS4ga65J1mX5DRwGThRVU8Dd1bVRYDucWN3+GbgfM/p093a3Nc8mGQyyeTMzMwQP4Ikaa6B4l5V16pqN7AF2JPkHYscnn4v0ec1D1fVeFWNj42NDTSsJGkwy7pbpqp+BXyH2Wvpl5JsAugeL3eHTQNbe07bAlwYdlBJ0uAGuVtmLMmbuu03AO8HfgQcBw50hx0Anuy2jwP7k9yaZDuwAzi1ynNLkhaxfoBjNgFHujteXgccq6qvJ/kecCzJg8DLwP0AVXUmyTHgBeAqcKiqro1mfElSP0vGvaqeA+7qs/5z4J4FzpkAJoaeTpK0In5CVZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIatGTck2xN8u0kZ5OcSfLhbv0TSX6a5HT39YGecx5NMpXkXJJ7R/kDSJLmWz/AMVeBj1bVs0neCDyT5ET33Oeq6tO9ByfZCewHdgFvBv4jyduq6tpqDi5JWtiS79yr6mJVPdtt/wY4C2xe5JR9wNGqulJVLwJTwJ7VGFaSNJhlXXNPsg24C3i6W3o4yXNJnkiyoVvbDJzvOW2aPv8YJDmYZDLJ5MzMzPInlyQtaOC4J7kN+Arwkar6NfA48FZgN3AR+Mz1Q/ucXvMWqg5X1XhVjY+NjS13bknSIgaKe5JbmA37l6rqqwBVdamqrlXVq8DnuXHpZRrY2nP6FuDC6o0sSVrKIHfLBPgCcLaqPtuzvqnnsA8Cz3fbx4H9SW5Nsh3YAZxavZElSUsZ5G6Z9wIfAn6Y5HS39jHggSS7mb3k8hLwEEBVnUlyDHiB2TttDnmnjCS9tpaMe1U9Rf/r6N9c5JwJYGKIuSRJQ/ATqpLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ1aMu5Jtib5dpKzSc4k+XC3fnuSE0l+3D1u6Dnn0SRTSc4luXeUP4Akab5B3rlfBT5aVW8H3gMcSrITeAQ4WVU7gJPdPt1z+4FdwF7gsSTrRjG8JKm/JeNeVRer6tlu+zfAWWAzsA840h12BLiv294HHK2qK1X1IjAF7FnluSVJi1jWNfck24C7gKeBO6vqIsz+AwBs7A7bDJzvOW26W5v7WgeTTCaZnJmZWcHokqSFDBz3JLcBXwE+UlW/XuzQPms1b6HqcFWNV9X42NjYoGNIkgYwUNyT3MJs2L9UVV/tli8l2dQ9vwm43K1PA1t7Tt8CXFidcSVJgxjkbpkAXwDOVtVne546Dhzotg8AT/as709ya5LtwA7g1OqNLElayvoBjnkv8CHgh0lOd2sfAz4FHEvyIPAycD9AVZ1Jcgx4gdk7bQ5V1bXVHlyStLAl415VT9H/OjrAPQucMwFMDDGXJGkIfkJVkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQUvGPckTSS4neb5n7RNJfprkdPf1gZ7nHk0yleRckntHNbgkaWGDvHP/IrC3z/rnqmp39/VNgCQ7gf3Aru6cx5KsW61hJUmDWTLuVfVd4BcDvt4+4GhVXamqF4EpYM8Q80mSVmCYa+4PJ3muu2yzoVvbDJzvOWa6W5snycEkk0kmZ2ZmhhhDkjTXSuP+OPBWYDdwEfhMt54+x1a/F6iqw1U1XlXjY2NjKxxDktTPiuJeVZeq6lpVvQp8nhuXXqaBrT2HbgEuDDeiJGm5VhT3JJt6dj8IXL+T5jiwP8mtSbYDO4BTw40oSVqu9UsdkOTLwPuAO5JMAx8H3pdkN7OXXF4CHgKoqjNJjgEvAFeBQ1V1bSSTS5IWtGTcq+qBPstfWOT4CWBimKEkScPxE6qS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1KAl457kiSSXkzzfs3Z7khNJftw9buh57tEkU0nOJbl3VINLkhY2yDv3LwJ756w9Apysqh3AyW6fJDuB/cCu7pzHkqxbtWklSQNZMu5V9V3gF3OW9wFHuu0jwH0960er6kpVvQhMAXtWZ1RJ0qBWes39zqq6CNA9buzWNwPne46b7tbmSXIwyWSSyZmZmRWOIUnqZ7V/oZo+a9XvwKo6XFXjVTU+Nja2ymNI0u+2lcb9UpJNAN3j5W59Gtjac9wW4MLKx5MkrcRK434cONBtHwCe7Fnfn+TWJNuBHcCp4UaUJC3X+qUOSPJl4H3AHUmmgY8DnwKOJXkQeBm4H6CqziQ5BrwAXAUOVdW1Ec0uSVrAknGvqgcWeOqeBY6fACaGGUqSNBw/oSpJDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktSgJf+D7MUkeQn4DXANuFpV40luB/4Z2Aa8BPxFVf1yuDElScuxGu/c/6SqdlfVeLf/CHCyqnYAJ7t9SdJraBSXZfYBR7rtI8B9I/gekqRFDBv3Ar6V5JkkB7u1O6vqIkD3uLHfiUkOJplMMjkzMzPkGJKkXkNdcwfeW1UXkmwETiT50aAnVtVh4DDA+Ph4DTmHJKnHUO/cq+pC93gZ+BqwB7iUZBNA93h52CElScuz4rgn+f0kb7y+Dfwp8DxwHDjQHXYAeHLYISVJyzPMZZk7ga8luf46/1RV/5bk+8CxJA8CLwP3Dz+mJGk5Vhz3qvoJ8M4+6z8H7hlmKEnScPyEqiQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1aGRxT7I3ybkkU0keGdX3kSTNN5K4J1kH/APwZ8BO4IEkO0fxvSRJ843qnfseYKqqflJV/wMcBfaN6HtJkuZYP6LX3Qyc79mfBt7de0CSg8DBbveVJOdGNIs0rDuAn631EC3I3631BM35g4WeGFXc02etfmun6jBweETfX1o1SSaranyt55CWY1SXZaaBrT37W4ALI/pekqQ5RhX37wM7kmxP8nvAfuD4iL6XJGmOkVyWqaqrSR4G/h1YBzxRVWdG8b2k14CXD3XTSVUtfZQk6abiJ1QlqUHGXZIaZNwlqUHGXZojs/4yyd90+29Jsmet55KWw1+oSnMkeRx4Fbi7qt6eZAPwrar64zUeTRrYqD6hKt3M3l1V70rynwBV9cvu8xrSTcPLMtJ8/9v9ZdMCSDLG7Dt56aZh3KX5/h74GrAxyQTwFPC3azuStDxec5f6SPKHwD3M/hG8k1V1do1HkpbFuEtzJHlLv/Wqevm1nkVaKeMuzZHkh8xebw/wemA7cK6qdq3pYNIyeLeMNEdV/VHvfpJ3AQ+t0TjSivgLVWkJVfUs4D3uuqn4zl2aI8lf9ey+DngXMLNG40grYtyl+d7Ys30V+AbwlTWaRVoR4y716D68dFtV/fVazyINw2vuUifJ+qq6xuxlGOmm5jt36YZTzIb9dJLjwL8A/339yar66loNJi2XcZfmux34OXA3N+53L8C466Zh3KUbNnZ3yjzPjahf56f9dFMx7tIN64Db+O2oX2fcdVPxzw9InSTPVpW/TFUTvFtGuqHfO3bppuQ7d6mT5Paq+sVazyGtBuMuSQ3ysowkNci4S1KDjLskNci4S1KD/g9SHrdAdDylhAAAAABJRU5ErkJggg==\n",
165 | "text/plain": [
166 | ""
167 | ]
168 | },
169 | "metadata": {
170 | "needs_background": "light"
171 | },
172 | "output_type": "display_data"
173 | }
174 | ],
175 | "source": [
176 | "tf_data['tf_is_intent_matched'].value_counts().plot.bar()\n",
177 | "plt.show()"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 8,
183 | "metadata": {
184 | "pycharm": {
185 | "name": "#%%\n"
186 | }
187 | },
188 | "outputs": [
189 | {
190 | "data": {
191 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEDCAYAAADOc0QpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAO+ElEQVR4nO3dX2ydd33H8feHpCtoRSJVnSokYYlQ0EiYSJEXkLhhLVozdpFy0SmVhnJRKb1IJdDQpJaLAReemMQfadJaKYiKaGJkmQA1AraRRSBUCTW4XShNQ4RFu8YkSsw/QXeRLel3F36iHOxj+9jHp15+vF+SdZ7nd57n+Oubd44eP8dJVSFJasvr1noASdLqM+6S1CDjLkkNMu6S1CDjLkkNWr/WAwDccccdtW3btrUeQ5JuKs8888zPqmqs33P/L+K+bds2Jicn13oMSbqpJPmvhZ7zsowkNci4S1KDjLskNci4S1KDjLskNci4S1KDjLskNWjJuCd5fZJTSX6Q5EyST3brn0jy0ySnu68P9JzzaJKpJOeS3DvKH0CSNN8gH2K6AtxdVa8kuQV4Ksm/ds99rqo+3Xtwkp3AfmAX8GbgP5K8raqurebgkqSFLRn3mv3fPF7pdm/pvhb7Hz72AUer6grwYpIpYA/wvSFn1YC2PfKNtR5B6uulT/35Wo/wO2Oga+5J1iU5DVwGTlTV091TDyd5LskTSTZ0a5uB8z2nT3drc1/zYJLJJJMzMzMr/wkkSfMMFPequlZVu4EtwJ4k7wAeB94K7AYuAp/pDk+/l+jzmoeraryqxsfG+v7dG0nSCi3rbpmq+hXwHWBvVV3qov8q8HlmL73A7Dv1rT2nbQEuDD+qJGlQg9wtM5bkTd32G4D3Az9KsqnnsA8Cz3fbx4H9SW5Nsh3YAZxa1aklSYsa5G6ZTcCRJOuY/cfgWFV9Pck/JtnN7CWXl4CHAKrqTJJjwAvAVeCQd8pI0mtrkLtlngPu6rP+oUXOmQAmhhtNkrRSfkJVkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQUvGPcnrk5xK8oMkZ5J8slu/PcmJJD/uHjf0nPNokqkk55LcO8ofQJI03yDv3K8Ad1fVO4HdwN4k7wEeAU5W1Q7gZLdPkp3AfmAXsBd4LMm6EcwuSVrAknGvWa90u7d0XwXsA45060eA+7rtfcDRqrpSVS8CU8Ce1RxakrS4ga65J1mX5DRwGThRVU8Dd1bVRYDucWN3+GbgfM/p093a3Nc8mGQyyeTMzMwQP4Ikaa6B4l5V16pqN7AF2JPkHYscnn4v0ec1D1fVeFWNj42NDTSsJGkwy7pbpqp+BXyH2Wvpl5JsAugeL3eHTQNbe07bAlwYdlBJ0uAGuVtmLMmbuu03AO8HfgQcBw50hx0Anuy2jwP7k9yaZDuwAzi1ynNLkhaxfoBjNgFHujteXgccq6qvJ/kecCzJg8DLwP0AVXUmyTHgBeAqcKiqro1mfElSP0vGvaqeA+7qs/5z4J4FzpkAJoaeTpK0In5CVZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIatGTck2xN8u0kZ5OcSfLhbv0TSX6a5HT39YGecx5NMpXkXJJ7R/kDSJLmWz/AMVeBj1bVs0neCDyT5ET33Oeq6tO9ByfZCewHdgFvBv4jyduq6tpqDi5JWtiS79yr6mJVPdtt/wY4C2xe5JR9wNGqulJVLwJTwJ7VGFaSNJhlXXNPsg24C3i6W3o4yXNJnkiyoVvbDJzvOW2aPv8YJDmYZDLJ5MzMzPInlyQtaOC4J7kN+Arwkar6NfA48FZgN3AR+Mz1Q/ucXvMWqg5X1XhVjY+NjS13bknSIgaKe5JbmA37l6rqqwBVdamqrlXVq8DnuXHpZRrY2nP6FuDC6o0sSVrKIHfLBPgCcLaqPtuzvqnnsA8Cz3fbx4H9SW5Nsh3YAZxavZElSUsZ5G6Z9wIfAn6Y5HS39jHggSS7mb3k8hLwEEBVnUlyDHiB2TttDnmnjCS9tpaMe1U9Rf/r6N9c5JwJYGKIuSRJQ/ATqpLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ1aMu5Jtib5dpKzSc4k+XC3fnuSE0l+3D1u6Dnn0SRTSc4luXeUP4Akab5B3rlfBT5aVW8H3gMcSrITeAQ4WVU7gJPdPt1z+4FdwF7gsSTrRjG8JKm/JeNeVRer6tlu+zfAWWAzsA840h12BLiv294HHK2qK1X1IjAF7FnluSVJi1jWNfck24C7gKeBO6vqIsz+AwBs7A7bDJzvOW26W5v7WgeTTCaZnJmZWcHokqSFDBz3JLcBXwE+UlW/XuzQPms1b6HqcFWNV9X42NjYoGNIkgYwUNyT3MJs2L9UVV/tli8l2dQ9vwm43K1PA1t7Tt8CXFidcSVJgxjkbpkAXwDOVtVne546Dhzotg8AT/as709ya5LtwA7g1OqNLElayvoBjnkv8CHgh0lOd2sfAz4FHEvyIPAycD9AVZ1Jcgx4gdk7bQ5V1bXVHlyStLAl415VT9H/OjrAPQucMwFMDDGXJGkIfkJVkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQUvGPckTSS4neb5n7RNJfprkdPf1gZ7nHk0yleRckntHNbgkaWGDvHP/IrC3z/rnqmp39/VNgCQ7gf3Aru6cx5KsW61hJUmDWTLuVfVd4BcDvt4+4GhVXamqF4EpYM8Q80mSVmCYa+4PJ3muu2yzoVvbDJzvOWa6W5snycEkk0kmZ2ZmhhhDkjTXSuP+OPBWYDdwEfhMt54+x1a/F6iqw1U1XlXjY2NjKxxDktTPiuJeVZeq6lpVvQp8nhuXXqaBrT2HbgEuDDeiJGm5VhT3JJt6dj8IXL+T5jiwP8mtSbYDO4BTw40oSVqu9UsdkOTLwPuAO5JMAx8H3pdkN7OXXF4CHgKoqjNJjgEvAFeBQ1V1bSSTS5IWtGTcq+qBPstfWOT4CWBimKEkScPxE6qS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1KAl457kiSSXkzzfs3Z7khNJftw9buh57tEkU0nOJbl3VINLkhY2yDv3LwJ756w9Apysqh3AyW6fJDuB/cCu7pzHkqxbtWklSQNZMu5V9V3gF3OW9wFHuu0jwH0960er6kpVvQhMAXtWZ1RJ0qBWes39zqq6CNA9buzWNwPne46b7tbmSXIwyWSSyZmZmRWOIUnqZ7V/oZo+a9XvwKo6XFXjVTU+Nja2ymNI0u+2lcb9UpJNAN3j5W59Gtjac9wW4MLKx5MkrcRK434cONBtHwCe7Fnfn+TWJNuBHcCp4UaUJC3X+qUOSPJl4H3AHUmmgY8DnwKOJXkQeBm4H6CqziQ5BrwAXAUOVdW1Ec0uSVrAknGvqgcWeOqeBY6fACaGGUqSNBw/oSpJDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktSgJf+D7MUkeQn4DXANuFpV40luB/4Z2Aa8BPxFVf1yuDElScuxGu/c/6SqdlfVeLf/CHCyqnYAJ7t9SdJraBSXZfYBR7rtI8B9I/gekqRFDBv3Ar6V5JkkB7u1O6vqIkD3uLHfiUkOJplMMjkzMzPkGJKkXkNdcwfeW1UXkmwETiT50aAnVtVh4DDA+Ph4DTmHJKnHUO/cq+pC93gZ+BqwB7iUZBNA93h52CElScuz4rgn+f0kb7y+Dfwp8DxwHDjQHXYAeHLYISVJyzPMZZk7ga8luf46/1RV/5bk+8CxJA8CLwP3Dz+mJGk5Vhz3qvoJ8M4+6z8H7hlmKEnScPyEqiQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1aGRxT7I3ybkkU0keGdX3kSTNN5K4J1kH/APwZ8BO4IEkO0fxvSRJ843qnfseYKqqflJV/wMcBfaN6HtJkuZYP6LX3Qyc79mfBt7de0CSg8DBbveVJOdGNIs0rDuAn631EC3I3631BM35g4WeGFXc02etfmun6jBweETfX1o1SSaranyt55CWY1SXZaaBrT37W4ALI/pekqQ5RhX37wM7kmxP8nvAfuD4iL6XJGmOkVyWqaqrSR4G/h1YBzxRVWdG8b2k14CXD3XTSVUtfZQk6abiJ1QlqUHGXZIaZNwlqUHGXZojs/4yyd90+29Jsmet55KWw1+oSnMkeRx4Fbi7qt6eZAPwrar64zUeTRrYqD6hKt3M3l1V70rynwBV9cvu8xrSTcPLMtJ8/9v9ZdMCSDLG7Dt56aZh3KX5/h74GrAxyQTwFPC3azuStDxec5f6SPKHwD3M/hG8k1V1do1HkpbFuEtzJHlLv/Wqevm1nkVaKeMuzZHkh8xebw/wemA7cK6qdq3pYNIyeLeMNEdV/VHvfpJ3AQ+t0TjSivgLVWkJVfUs4D3uuqn4zl2aI8lf9ey+DngXMLNG40grYtyl+d7Ys30V+AbwlTWaRVoR4y716D68dFtV/fVazyINw2vuUifJ+qq6xuxlGOmm5jt36YZTzIb9dJLjwL8A/339yar66loNJi2XcZfmux34OXA3N+53L8C466Zh3KUbNnZ3yjzPjahf56f9dFMx7tIN64Db+O2oX2fcdVPxzw9InSTPVpW/TFUTvFtGuqHfO3bppuQ7d6mT5Paq+sVazyGtBuMuSQ3ysowkNci4S1KDjLskNci4S1KD/g9SHrdAdDylhAAAAABJRU5ErkJggg==\n",
192 | "text/plain": [
193 | ""
194 | ]
195 | },
196 | "metadata": {
197 | "needs_background": "light"
198 | },
199 | "output_type": "display_data"
200 | }
201 | ],
202 | "source": [
203 | "st_data['st_is_intent_matched'].value_counts().plot.bar()\n",
204 | "plt.show()"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 9,
210 | "metadata": {
211 | "pycharm": {
212 | "name": "#%%\n"
213 | }
214 | },
215 | "outputs": [
216 | {
217 | "data": {
218 | "text/plain": [
219 | "True 351\n",
220 | "False 1\n",
221 | "Name: tf_is_intent_matched_shuffled, dtype: int64"
222 | ]
223 | },
224 | "execution_count": 9,
225 | "metadata": {},
226 | "output_type": "execute_result"
227 | }
228 | ],
229 | "source": [
230 | "tf_data['tf_is_intent_matched_shuffled'].value_counts()"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": 10,
236 | "metadata": {
237 | "pycharm": {
238 | "name": "#%%\n"
239 | }
240 | },
241 | "outputs": [
242 | {
243 | "data": {
244 | "text/html": [
245 | "\n",
246 | "\n",
259 | "
\n",
260 | " \n",
261 | " \n",
262 | " | \n",
263 | " category | \n",
264 | " intent_id | \n",
265 | " task | \n",
266 | " tf_matched_intent_id | \n",
267 | " tf_matched_intent_text | \n",
268 | " tf_is_intent_matched | \n",
269 | " tf_matched_intent_id_shuffled | \n",
270 | " tf_matched_intent_text_shuffled | \n",
271 | " tf_is_intent_matched_shuffled | \n",
272 | "
\n",
273 | " \n",
274 | " \n",
275 | " \n",
276 | " 169 | \n",
277 | " Airtable | \n",
278 | " 169 | \n",
279 | " Airtable - Get data | \n",
280 | " 169 | \n",
281 | " Airtable - Get data | \n",
282 | " True | \n",
283 | " 50 | \n",
284 | " Newsapi - Get data | \n",
285 | " False | \n",
286 | "
\n",
287 | " \n",
288 | "
\n",
289 | "
"
290 | ],
291 | "text/plain": [
292 | " category intent_id task tf_matched_intent_id \\\n",
293 | "169 Airtable 169 Airtable - Get data 169 \n",
294 | "\n",
295 | " tf_matched_intent_text tf_is_intent_matched \\\n",
296 | "169 Airtable - Get data True \n",
297 | "\n",
298 | " tf_matched_intent_id_shuffled tf_matched_intent_text_shuffled \\\n",
299 | "169 50 Newsapi - Get data \n",
300 | "\n",
301 | " tf_is_intent_matched_shuffled \n",
302 | "169 False "
303 | ]
304 | },
305 | "execution_count": 10,
306 | "metadata": {},
307 | "output_type": "execute_result"
308 | }
309 | ],
310 | "source": [
311 | "tf_data[tf_data['tf_is_intent_matched_shuffled'] == False]"
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": 11,
317 | "metadata": {
318 | "pycharm": {
319 | "name": "#%%\n"
320 | }
321 | },
322 | "outputs": [
323 | {
324 | "data": {
325 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQmUlEQVR4nO3dcaxedX3H8feHFpEJmzBuSW2L7VjNVtws5q66kCxO3EBdVszCVjJdl5DUP0qimTEBs03d1gU30WgiJCU6u4XJuqihUbaJjcawTOqFIFJqZyMItR29okZQV2357o97Ks/a59779N779Gl/vF/Jk3PO7/x+5/ne5OZzz/0955wnVYUkqS1njboASdLCM9wlqUGGuyQ1yHCXpAYZ7pLUIMNdkhq0eNQFAFx00UW1cuXKUZchSWeU+++//ztVNdZv32kR7itXrmRiYmLUZUjSGSXJt6bb57SMJDXIcJekBhnuktQgw12SGmS4S1KDZg33JC9MsivJV5PsTvLerv09Sb6d5MHu9YaeMTcl2Zdkb5KrhvkDSJJONMilkIeB11bVM0nOBu5N8m/dvg9W1ft7OydZA2wALgNeAnw+ycuq6uhCFi5Jmt6sZ+415Zlu8+zuNdND4NcDd1bV4ap6FNgHrJt3pZKkgQ10E1OSRcD9wC8DH6mq+5K8HrghyZ8AE8A7qup7wDLgyz3D93dtZ7yVN3521CU05bGb3zjqEqRmDfSBalUdraq1wHJgXZKXA7cBlwJrgYPALV339DvE8Q1JNiWZSDIxOTk5h9IlSdM5qatlqur7wBeBq6vqyS70nwVu57mpl/3Aip5hy4EDfY61tarGq2p8bKzvoxEkSXM0yNUyY0le3K2fC7wO+HqSpT3d3gQ83K3vADYkOSfJKmA1sGtBq5YkzWiQOfelwLZu3v0sYHtVfSbJPyVZy9SUy2PAWwGqaneS7cAjwBFgs1fKSNKpNWu4V9VDwOV92t8yw5gtwJb5lSZJmivvUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lq0KzhnuSFSXYl+WqS3Une27VfmOSeJN/olhf0jLkpyb4ke5NcNcwfQJJ0okHO3A8Dr62qVwBrgauTvBq4EdhZVauBnd02SdYAG4DLgKuBW5MsGkLtkqRpzBruNeWZbvPs7lXAemBb174NuKZbXw/cWVWHq+pRYB+wbiGLliTNbKA59ySLkjwIHALuqar7gIur6iBAt1zSdV8GPNEzfH/XJkk6RQYK96o6WlVrgeXAuiQvn6F7+h3ihE7JpiQTSSYmJycHKlaSNJiTulqmqr4PfJGpufQnkywF6JaHum77gRU9w5YDB/oca2tVjVfV+NjY2MlXLkma1iBXy4wleXG3fi7wOuDrwA5gY9dtI3BXt74D2JDknCSrgNXArgWuW5I0g8UD9FkKbOuueDkL2F5Vn0nyX8D2JNcDjwPXAlTV7iTbgUeAI8Dmqjo6nPIlSf3MGu5V9RBweZ/2p4ArpxmzBdgy7+okSXPiHaqS1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWrQrOGeZEWSLyTZk2R3krd17e9J8u0kD3avN/SMuSnJviR7k1w1zB9AknSixQP0OQK8o6oeSHI+cH+Se7p9H6yq9/d2TrIG2ABcBrwE+HySl1XV0YUsXJI0vVnP3KvqYFU90K0/DewBls0wZD1wZ1UdrqpHgX3AuoUoVpI0mJOac0+yErgcuK9ruiHJQ0k+luSCrm0Z8ETPsP3M/MdAkrTABg73JOcBnwTeXlU/AG4DLgXWAgeBW4517TO8+hxvU5KJJBOTk5MnW7ckaQYDhXuSs5kK9juq6lMAVfVkVR2tqmeB23lu6mU/sKJn+HLgwPHHrKqtVTVeVeNjY2Pz+RkkSccZ5GqZAB8F9lTVB3ral/Z0exPwcLe+A9iQ5Jwkq4DVwK6FK1mSNJtBrpa5AngL8LUkD3Zt7wKuS7KWqSmXx4C3AlTV7iTbgUeYutJms1fKSNKpNWu4V9W99J9Hv3uGMVuALfOoS5I0D96hKkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgWcM9yYokX0iyJ8nuJG/r2i9Mck+Sb3TLC3rG3JRkX5K9Sa4a5g8gSTrRIGfuR4B3VNWvAq8GNidZA9wI7Kyq1cDObptu3wbgMuBq4NYki4ZRvCSpv1nDvaoOVtUD3frTwB5gGbAe2NZ12wZc062vB+6sqsNV9SiwD1i3wHVLkmZwUnPuSVYClwP3ARdX1UGY+gMALOm6LQOe6Bm2v2uTJJ0iA4d7kvOATwJvr6ofzNS1T1v1Od6mJBNJJiYnJwctQ5I0gIHCPcnZTAX7HVX1qa75ySRLu/1LgUNd+35gRc/w5cCB449ZVVuraryqxsfGxuZavySpj0GulgnwUWBPVX2gZ9cOYGO3vhG4q6d9Q5JzkqwCVgO7Fq5kSdJsFg/Q5wrgLcDXkjzYtb0LuBnYnuR64HHgWoCq2p1kO/AIU1fabK6qowtduCRperOGe1XdS/95dIArpxmzBdgyj7okSfPgHaqS1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWrQrOGe5GNJDiV5uKftPUm+neTB7vWGnn03JdmXZG+Sq4ZVuCRpeoOcuX8cuLpP+weram33uhsgyRpgA3BZN+bWJIsWqlhJ0mBmDfeq+hLw3QGPtx64s6oOV9WjwD5g3TzqkyTNwXzm3G9I8lA3bXNB17YMeKKnz/6uTZJ0Cs013G8DLgXWAgeBW7r29Olb/Q6QZFOSiSQTk5OTcyxDktTPnMK9qp6sqqNV9SxwO89NvewHVvR0XQ4cmOYYW6tqvKrGx8bG5lKGJGkacwr3JEt7Nt8EHLuSZgewIck5SVYBq4Fd8ytRknSyFs/WIckngNcAFyXZD7wbeE2StUxNuTwGvBWgqnYn2Q48AhwBNlfV0aFULkma1qzhXlXX9Wn+6Az9twBb5lOUJGl+vENVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGzhnuSjyU5lOThnrYLk9yT5Bvd8oKefTcl2Zdkb5KrhlW4JGl6g5y5fxy4+ri2G4GdVbUa2Nltk2QNsAG4rBtza5JFC1atJGkgs4Z7VX0J+O5xzeuBbd36NuCanvY7q+pwVT0K7APWLUypkqRBzXXO/eKqOgjQLZd07cuAJ3r67e/aJEmn0EJ/oJo+bdW3Y7IpyUSSicnJyQUuQ5Ke3+Ya7k8mWQrQLQ917fuBFT39lgMH+h2gqrZW1XhVjY+Njc2xDElSP3MN9x3Axm59I3BXT/uGJOckWQWsBnbNr0RJ0slaPFuHJJ8AXgNclGQ/8G7gZmB7kuuBx4FrAapqd5LtwCPAEWBzVR0dUu2SpGnMGu5Vdd00u66cpv8WYMt8ipIkzY93qEpSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGzfkH2TJI8BjwNHAWOVNV4kguBfwFWAo8Bf1hV35tfmZKkk7EQZ+6/XVVrq2q8274R2FlVq4Gd3bYk6RQaxrTMemBbt74NuGYI7yFJmsF8w72AzyW5P8mmru3iqjoI0C2XzPM9JEknaV5z7sAVVXUgyRLgniRfH3Rg98dgE8All1wyzzIkSb3mdeZeVQe65SHg08A64MkkSwG65aFpxm6tqvGqGh8bG5tPGZKk48w53JO8KMn5x9aB3wUeBnYAG7tuG4G75lukJOnkzGda5mLg00mOHeefq+rfk3wF2J7keuBx4Nr5lylJOhlzDveq+ibwij7tTwFXzqcoSdL8eIeqJDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoOGFu5Jrk6yN8m+JDcO630kSScaSrgnWQR8BHg9sAa4LsmaYbyXJOlEwzpzXwfsq6pvVtVPgDuB9UN6L0nScRYP6bjLgCd6tvcDrxrSe0kCVt742VGX0IzHbn7jqEuYt2GFe/q01f/rkGwCNnWbzyTZO6Rano8uAr4z6iJmk/eNugKNgL+bC+ul0+0YVrjvB1b0bC8HDvR2qKqtwNYhvf/zWpKJqhofdR3S8fzdPHWGNef+FWB1klVJXgBsAHYM6b0kSccZypl7VR1JcgPwH8Ai4GNVtXsY7yVJOtGwpmWoqruBu4d1fM3I6S6drvzdPEVSVbP3kiSdUXz8gCQ1yHCXpAYZ7g3IlDcn+ctu+5Ik60Zdl6TRMdzbcCvwm8B13fbTTD3bRxq5JD+X5C+S3N5tr07ye6Ouq3WGexteVVWbgf8FqKrvAS8YbUnSz/wDcJipExCYusnxb0ZXzvOD4d6Gn3ZP4iyAJGPAs6MtSfqZS6vq74CfAlTVj+n/iBItIMO9DR8GPg0sSbIFuBf429GWJP3MT5Kcy3MnH5cydSavIfI690Yk+RXgSqbOiHZW1Z4RlyQBkOR3gD9n6rsdPgdcAfxpVX1xlHW1znBvQJJL+rVX1eOnuhapnyS/CLyaqZOPL1fVaf9kyDOd4d6AJF9j6l/eAC8EVgF7q+qykRYmAUmuAB6sqh8meTPwSuBDVfWtEZfWNOfcG1BVv1ZVv94tVzP1TVj3jrouqXMb8KMkrwDeCXwL+MfRltQ+w71BVfUA8BujrkPqHKmpKYL1wIer6kPA+SOuqXlDeyqkTp0kf9azeRZT//ZOjqgc6XhPJ7kJeDPwW91lu2ePuKbmeebehvN7XucAn8UvJNfp44+YuvTx+qr6H6a+Y/nvR1tS+/xA9QzXnQXdXFXvHHUtkk4fTsucwZIs7r716pWjrkU6XpKn6W5cOn4XUFX186e4pOcVw/3Mtoup+fUHk+wA/hX44bGdVfWpURUmVZUfmo6Q4d6GC4GngNfy3PXuBRjuOm0kWcLUfRiAN9kNm+F+ZlvSXSnzMM+F+jF+mKLTQpLfB24BXgIcAl4K7AG8yW6IvFrmzLYIOK97nd+zfuwlnQ7+mqlHD/x3Va1i6hlI/znaktrnmfuZ7WBV/dWoi5Bm8dOqeirJWUnOqqovJHnfqItqneF+ZvOZ2DoTfD/JecCXgDuSHAKOjLim5nmd+xksyYVV9d1R1yH1k+SSqno8yYuAHzM1DfzHwC8Ad1TVUyMtsHGGu6ShSPJAVb2yW/9kVf3BqGt6PvEDVUnD0jtt+Esjq+J5ynCXNCw1zbpOAadlJA1FkqNM3TEd4FzgR8d24eMHhs5wl6QGOS0jSQ0y3CWpQYa7JDXIcJekBhnuktSg/wOzS016e64AqAAAAABJRU5ErkJggg==\n",
326 | "text/plain": [
327 | ""
328 | ]
329 | },
330 | "metadata": {
331 | "needs_background": "light"
332 | },
333 | "output_type": "display_data"
334 | }
335 | ],
336 | "source": [
337 | "tf_data['tf_is_intent_matched_shuffled'].value_counts().plot.bar()\n",
338 | "plt.show()"
339 | ]
340 | },
341 | {
342 | "cell_type": "code",
343 | "execution_count": 12,
344 | "metadata": {
345 | "pycharm": {
346 | "name": "#%%\n"
347 | }
348 | },
349 | "outputs": [
350 | {
351 | "data": {
352 | "text/plain": [
353 | "True 350\n",
354 | "False 2\n",
355 | "Name: st_is_intent_matched_shuffled, dtype: int64"
356 | ]
357 | },
358 | "execution_count": 12,
359 | "metadata": {},
360 | "output_type": "execute_result"
361 | }
362 | ],
363 | "source": [
364 | "st_data['st_is_intent_matched_shuffled'].value_counts()"
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "execution_count": 13,
370 | "metadata": {
371 | "pycharm": {
372 | "name": "#%%\n"
373 | }
374 | },
375 | "outputs": [
376 | {
377 | "data": {
378 | "text/html": [
379 | "\n",
380 | "\n",
393 | "
\n",
394 | " \n",
395 | " \n",
396 | " | \n",
397 | " category | \n",
398 | " intent_id | \n",
399 | " task | \n",
400 | " st_matched_intent_id | \n",
401 | " st_matched_intent_text | \n",
402 | " st_is_intent_matched | \n",
403 | " st_matched_intent_id_shuffled | \n",
404 | " st_matched_intent_text_shuffled | \n",
405 | " st_is_intent_matched_shuffled | \n",
406 | "
\n",
407 | " \n",
408 | " \n",
409 | " \n",
410 | " 34 | \n",
411 | " Google Sheets | \n",
412 | " 34 | \n",
413 | " Google Sheets - Send data to MongoDB | \n",
414 | " 34 | \n",
415 | " Google Sheets - Send data to MongoDB | \n",
416 | " True | \n",
417 | " 126 | \n",
418 | " MongoDB - Send data to Google Sheets | \n",
419 | " False | \n",
420 | "
\n",
421 | " \n",
422 | " 126 | \n",
423 | " MongoDB | \n",
424 | " 126 | \n",
425 | " MongoDB - Send data to Google Sheets | \n",
426 | " 126 | \n",
427 | " MongoDB - Send data to Google Sheets | \n",
428 | " True | \n",
429 | " 34 | \n",
430 | " Google Sheets - Send data to MongoDB | \n",
431 | " False | \n",
432 | "
\n",
433 | " \n",
434 | "
\n",
435 | "
"
436 | ],
437 | "text/plain": [
438 | " category intent_id task \\\n",
439 | "34 Google Sheets 34 Google Sheets - Send data to MongoDB \n",
440 | "126 MongoDB 126 MongoDB - Send data to Google Sheets \n",
441 | "\n",
442 | " st_matched_intent_id st_matched_intent_text \\\n",
443 | "34 34 Google Sheets - Send data to MongoDB \n",
444 | "126 126 MongoDB - Send data to Google Sheets \n",
445 | "\n",
446 | " st_is_intent_matched st_matched_intent_id_shuffled \\\n",
447 | "34 True 126 \n",
448 | "126 True 34 \n",
449 | "\n",
450 | " st_matched_intent_text_shuffled st_is_intent_matched_shuffled \n",
451 | "34 MongoDB - Send data to Google Sheets False \n",
452 | "126 Google Sheets - Send data to MongoDB False "
453 | ]
454 | },
455 | "execution_count": 13,
456 | "metadata": {},
457 | "output_type": "execute_result"
458 | }
459 | ],
460 | "source": [
461 | "st_data[st_data['st_is_intent_matched_shuffled'] == False]"
462 | ]
463 | },
464 | {
465 | "cell_type": "code",
466 | "execution_count": 14,
467 | "metadata": {
468 | "pycharm": {
469 | "name": "#%%\n"
470 | }
471 | },
472 | "outputs": [
473 | {
474 | "data": {
475 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQmUlEQVR4nO3df6xfdX3H8eeLFpEJmzBuSe0P27GarbhZzF3nQrI4cQN1WTELW8l0XUJS/yiJZsYEzDZ1WxfcRKOJkJTI7BYm66KGRtkmNhrDMqmFVKTUzkYQru3oFTWCus6W9/64p/Jd+7293957v/22H56P5Jtzzud8Puf7/iY3r3vu53vOuakqJEltOWfUBUiS5p/hLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoIWjLgDgkksuqRUrVoy6DEk6qzz44IPfqaqxfvvOiHBfsWIFu3btGnUZknRWSfKt6fY5LSNJDTLcJalBhrskNchwl6QGGe6S1KAZwz3Ji5PsTPLVJHuSvK9rf2+SbyfZ3b3e2DPm5iT7k+xLcvUwP4Ak6USDXAp5GHhdVT2b5Fzg/iT/2u37UFV9oLdzktXAeuBy4GXA55O8oqqOzmfhkqTpzXjmXlOe7TbP7V4newj8OuDuqjpcVY8B+4G1c65UkjSwgW5iSrIAeBD4ReCjVfVAkjcANyb5Y2AX8M6q+h6wBPhyz/CJru34Y24ENgIsX758Th/idFlx02dHXUJTHr/lTaMuQWrWQF+oVtXRqloDLAXWJnklcDtwGbAGOAjc2nVPv0P0OeaWqhqvqvGxsb53z0qSZumUrpapqu8DXwSuqaqnutB/DriD56deJoBlPcOWAgfmXqokaVCDXC0zluSl3fr5wOuBrydZ3NPtzcAj3fp2YH2S85KsBFYBO+e1aknSSQ0y574Y2NrNu58DbKuqzyT5xyRrmJpyeRx4G0BV7UmyDXgUOAJs8koZSTq9Zgz3qnoYuKJP+1tPMmYzsHlupUmSZss7VCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KAZwz3Ji5PsTPLVJHuSvK9rvzjJfUm+0S0v6hlzc5L9SfYluXqYH0CSdKJBztwPA6+rqlcBa4BrkrwGuAnYUVWrgB3dNklWA+uBy4FrgNuSLBhC7ZKkacwY7jXl2W7z3O5VwDpga9e+Fbi2W18H3F1Vh6vqMWA/sHY+i5YkndxAc+5JFiTZDRwC7quqB4BLq+ogQLdc1HVfAjzZM3yia5MknSYDhXtVHa2qNcBSYG2SV56ke/od4oROycYku5LsmpycHKhYSdJgTulqmar6PvBFpubSn0qyGKBbHuq6TQDLeoYtBQ70OdaWqhqvqvGxsbFTr1ySNK1BrpYZS/LSbv184PXA14HtwIau2wbgnm59O7A+yXlJVgKrgJ3zXLck6SQWDtBnMbC1u+LlHGBbVX0myX8C25LcADwBXAdQVXuSbAMeBY4Am6rq6HDKlyT1M2O4V9XDwBV92p8GrppmzGZg85yrkyTNineoSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVoxnBPsizJF5LsTbInydu79vcm+XaS3d3rjT1jbk6yP8m+JFcP8wNIkk60cIA+R4B3VtVDSS4EHkxyX7fvQ1X1gd7OSVYD64HLgZcBn0/yiqo6Op+FS5KmN+OZe1UdrKqHuvVngL3AkpMMWQfcXVWHq+oxYD+wdj6KlSQN5pTm3JOsAK4AHuiabkzycJI7k1zUtS0BnuwZNsHJfxlIkubZwOGe5ALgk8A7quoHwO3AZcAa4CBw67GufYZXn+NtTLIrya7JyclTrVuSdBIDhXuSc5kK9ruq6lMAVfVUVR2tqueAO3h+6mUCWNYzfClw4PhjVtWWqhqvqvGxsbG5fAZJ0nEGuVomwMeAvVX1wZ72xT3d3gw80q1vB9YnOS/JSmAVsHP+SpYkzWSQq2WuBN4KfC3J7q7t3cD1SdYwNeXyOPA2gKrak2Qb8ChTV9ps8koZSTq9Zgz3qrqf/vPo955kzGZg8xzqkiTNgXeoSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQTOGe5JlSb6QZG+SPUne3rVfnOS+JN/olhf1jLk5yf4k+5JcPcwPIEk60SBn7keAd1bVLwOvATYlWQ3cBOyoqlXAjm6bbt964HLgGuC2JAuGUbwkqb8Zw72qDlbVQ936M8BeYAmwDtjaddsKXNutrwPurqrDVfUYsB9YO891S5JO4pTm3JOsAK4AHgAuraqDMPULAFjUdVsCPNkzbKJrkySdJgOHe5ILgE8C76iqH5ysa5+26nO8jUl2Jdk1OTk5aBmSpAEMFO5JzmUq2O+qqk91zU8lWdztXwwc6tongGU9w5cCB44/ZlVtqarxqhofGxubbf2SpD4GuVomwMeAvVX1wZ5d24EN3foG4J6e9vVJzkuyElgF7Jy/kiVJM1k4QJ8rgbcCX0uyu2t7N3ALsC3JDcATwHUAVbUnyTbgUaautNlUVUfnu3BJ0vRmDPequp/+8+gAV00zZjOweQ51SZLmwDtUJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkho0Y7gnuTPJoSSP9LS9N8m3k+zuXm/s2Xdzkv1J9iW5eliFS5KmN8iZ+8eBa/q0f6iq1nSvewGSrAbWA5d3Y25LsmC+ipUkDWbGcK+qLwHfHfB464C7q+pwVT0G7AfWzqE+SdIszGXO/cYkD3fTNhd1bUuAJ3v6THRtkqTTaLbhfjtwGbAGOAjc2rWnT9/qd4AkG5PsSrJrcnJylmVIkvqZVbhX1VNVdbSqngPu4PmplwlgWU/XpcCBaY6xparGq2p8bGxsNmVIkqYxq3BPsrhn883AsStptgPrk5yXZCWwCtg5txIlSadq4UwdknwCeC1wSZIJ4D3Aa5OsYWrK5XHgbQBVtSfJNuBR4AiwqaqODqVySdK0Zgz3qrq+T/PHTtJ/M7B5LkVJkubGO1QlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgGcM9yZ1JDiV5pKft4iT3JflGt7yoZ9/NSfYn2Zfk6mEVLkma3iBn7h8Hrjmu7SZgR1WtAnZ02yRZDawHLu/G3JZkwbxVK0kayIzhXlVfAr57XPM6YGu3vhW4tqf97qo6XFWPAfuBtfNTqiRpULOdc7+0qg4CdMtFXfsS4MmefhNdmyTpNJrvL1TTp636dkw2JtmVZNfk5OQ8lyFJL2yzDfenkiwG6JaHuvYJYFlPv6XAgX4HqKotVTVeVeNjY2OzLEOS1M9sw307sKFb3wDc09O+Psl5SVYCq4CdcytRknSqFs7UIckngNcClySZAN4D3AJsS3ID8ARwHUBV7UmyDXgUOAJsqqqjQ6pdkjSNGcO9qq6fZtdV0/TfDGyeS1GSpLnxDlVJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBi2cy+AkjwPPAEeBI1U1nuRi4J+BFcDjwB9U1ffmVqYk6VTMx5n7b1XVmqoa77ZvAnZU1SpgR7ctSTqNhjEtsw7Y2q1vBa4dwntIkk5iruFewOeSPJhkY9d2aVUdBOiWi+b4HpKkUzSnOXfgyqo6kGQRcF+Srw86sPtlsBFg+fLlcyxDktRrTmfuVXWgWx4CPg2sBZ5KshigWx6aZuyWqhqvqvGxsbG5lCFJOs6swz3JS5JceGwd+B3gEWA7sKHrtgG4Z65FSpJOzVymZS4FPp3k2HH+qar+LclXgG1JbgCeAK6be5mSpFMx63Cvqm8Cr+rT/jRw1VyKkiTNjXeoSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVoaOGe5Jok+5LsT3LTsN5HknSioYR7kgXAR4E3AKuB65OsHsZ7SZJOtHBIx10L7K+qbwIkuRtYBzw6pPeTXvBW3PTZUZfQjMdvedOoS5izYU3LLAGe7Nme6NokSafBsM7c06et/l+HZCOwsdt8Nsm+IdXyQnQJ8J1RFzGTvH/UFWgE/NmcXy+fbsewwn0CWNazvRQ40NuhqrYAW4b0/i9oSXZV1fio65CO58/m6TOsaZmvAKuSrEzyImA9sH1I7yVJOs5Qztyr6kiSG4F/BxYAd1bVnmG8lyTpRMOalqGq7gXuHdbxdVJOd+lM5c/maZKqmrmXJOms4uMHJKlBhrskNchwb0CmvCXJX3Tby5OsHXVdkkbHcG/DbcBvANd3288w9WwfaeSS/EySP09yR7e9Ksnvjrqu1hnubfj1qtoE/A9AVX0PeNFoS5J+6u+Bw0ydgMDUTY5/PbpyXhgM9zb8pHsSZwEkGQOeG21J0k9dVlV/C/wEoKp+TP9HlGgeGe5t+AjwaWBRks3A/cDfjLYk6af+N8n5PH/ycRlTZ/IaIq9zb0SSXwKuYuqMaEdV7R1xSRIASX4b+DOm/rfD54ArgT+pqi+Osq7WGe4NSLK8X3tVPXG6a5H6SfLzwGuYOvn4clWd8U+GPNsZ7g1I8jWm/uQN8GJgJbCvqi4faWESkORKYHdV/TDJW4BXAx+uqm+NuLSmOefegKr6lar61W65iqn/hHX/qOuSOrcDP0ryKuBdwLeAfxhtSe0z3BtUVQ8BvzbqOqTOkZqaIlgHfKSqPgxcOOKamje0p0Lq9Enypz2b5zD1Z+/kiMqRjvdMkpuBtwC/2V22e+6Ia2qeZ+5tuLDndR7wWabOkqQzwR8ydenjDVX130z9P+W/G21J7fML1bNcdxZ0S1W9a9S1SDpzOC1zFkuysPuvV68edS3S8ZI8Q3fj0vG7gKqqnz3NJb2gGO5nt51Mza/vTrId+Bfgh8d2VtWnRlWYVFV+aTpChnsbLgaeBl7H89e7F2C464yRZBFT92EA3mQ3bIb72W1Rd6XMIzwf6sf4ZYrOCEl+D7gVeBlwCHg5sBfwJrsh8mqZs9sC4ILudWHP+rGXdCb4K6YePfBfVbWSqWcg/cdoS2qfZ+5nt4NV9ZejLkKawU+q6ukk5yQ5p6q+kOT9oy6qdYb72c1nYuts8P0kFwBfAu5Kcgg4MuKamud17mexJBdX1XdHXYfUT5LlVfVEkpcAP2ZqGviPgJ8D7qqqp0daYOMMd0lDkeShqnp1t/7Jqvr9Udf0QuIXqpKGpXfa8BdGVsULlOEuaVhqmnWdBk7LSBqKJEeZumM6wPnAj47twscPDJ3hLkkNclpGkhpkuEtSgwx3SWqQ4S5JDTLcJalB/wf5ekahV2pYlwAAAABJRU5ErkJggg==\n",
476 | "text/plain": [
477 | ""
478 | ]
479 | },
480 | "metadata": {
481 | "needs_background": "light"
482 | },
483 | "output_type": "display_data"
484 | }
485 | ],
486 | "source": [
487 | "st_data['st_is_intent_matched_shuffled'].value_counts().plot.bar()\n",
488 | "plt.show()"
489 | ]
490 | },
491 | {
492 | "cell_type": "code",
493 | "execution_count": 27,
494 | "metadata": {
495 | "pycharm": {
496 | "name": "#%%\n"
497 | }
498 | },
499 | "outputs": [
500 | {
501 | "data": {
502 | "text/html": [
503 | "\n",
504 | "\n",
517 | "
\n",
518 | " \n",
519 | " \n",
520 | " | \n",
521 | " repetitions | \n",
522 | " tf_time_elapsed | \n",
523 | " st_time_elapsed | \n",
524 | "
\n",
525 | " \n",
526 | " \n",
527 | " \n",
528 | " 0 | \n",
529 | " 0 | \n",
530 | " 0.000003 | \n",
531 | " 0.000002 | \n",
532 | "
\n",
533 | " \n",
534 | " 1 | \n",
535 | " 1 | \n",
536 | " 1.755822 | \n",
537 | " 8.272556 | \n",
538 | "
\n",
539 | " \n",
540 | " 2 | \n",
541 | " 2 | \n",
542 | " 3.255506 | \n",
543 | " 17.061706 | \n",
544 | "
\n",
545 | " \n",
546 | " 3 | \n",
547 | " 3 | \n",
548 | " 4.404537 | \n",
549 | " 26.071062 | \n",
550 | "
\n",
551 | " \n",
552 | " 4 | \n",
553 | " 4 | \n",
554 | " 6.189593 | \n",
555 | " 35.721578 | \n",
556 | "
\n",
557 | " \n",
558 | " 5 | \n",
559 | " 5 | \n",
560 | " 7.772967 | \n",
561 | " 44.145934 | \n",
562 | "
\n",
563 | " \n",
564 | " 6 | \n",
565 | " 6 | \n",
566 | " 9.517264 | \n",
567 | " 56.337967 | \n",
568 | "
\n",
569 | " \n",
570 | " 7 | \n",
571 | " 7 | \n",
572 | " 10.562992 | \n",
573 | " 72.224635 | \n",
574 | "
\n",
575 | " \n",
576 | " 8 | \n",
577 | " 8 | \n",
578 | " 12.480063 | \n",
579 | " 78.887859 | \n",
580 | "
\n",
581 | " \n",
582 | " 9 | \n",
583 | " 9 | \n",
584 | " 13.984340 | \n",
585 | " 87.984332 | \n",
586 | "
\n",
587 | " \n",
588 | " 10 | \n",
589 | " 10 | \n",
590 | " 15.344909 | \n",
591 | " 103.883669 | \n",
592 | "
\n",
593 | " \n",
594 | "
\n",
595 | "
"
596 | ],
597 | "text/plain": [
598 | " repetitions tf_time_elapsed st_time_elapsed\n",
599 | "0 0 0.000003 0.000002\n",
600 | "1 1 1.755822 8.272556\n",
601 | "2 2 3.255506 17.061706\n",
602 | "3 3 4.404537 26.071062\n",
603 | "4 4 6.189593 35.721578\n",
604 | "5 5 7.772967 44.145934\n",
605 | "6 6 9.517264 56.337967\n",
606 | "7 7 10.562992 72.224635\n",
607 | "8 8 12.480063 78.887859\n",
608 | "9 9 13.984340 87.984332\n",
609 | "10 10 15.344909 103.883669"
610 | ]
611 | },
612 | "execution_count": 27,
613 | "metadata": {},
614 | "output_type": "execute_result"
615 | }
616 | ],
617 | "source": [
618 | "speed_benchmark = pd.read_csv('output/speed_benchmark.csv')\n",
619 | "speed_benchmark"
620 | ]
621 | },
622 | {
623 | "cell_type": "code",
624 | "execution_count": 28,
625 | "metadata": {
626 | "pycharm": {
627 | "name": "#%%\n"
628 | }
629 | },
630 | "outputs": [
631 | {
632 | "data": {
633 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAA3IklEQVR4nO3dd3hUZfbA8e9JQgokJBA6SF1AeoAASo2KiIJgx4q9rsrqomBHXBRddO26rIio7G9R7MoqKF0RpCkCuigoRCIQSoCQnvP7496MAQIMSSbTzud58szcOzP3PaG8Z+5933teUVWMMcYYgAh/B2CMMSZwWFIwxhjjYUnBGGOMhyUFY4wxHpYUjDHGeET5O4CKqFOnjjZv3tzfYRhjTFBZsWJFpqrWLeu1oE4KzZs3Z/ny5f4OwxhjgoqI/Hqk1+zykTHGGA9LCsYYYzwsKRhjjPEI6jGFshQUFJCenk5ubq6/QzFBKjY2liZNmlCtWjV/h2JMlQu5pJCenk5CQgLNmzdHRPwdjgkyqsrOnTtJT0+nRYsW/g7HmCoXcpePcnNzSU5OtoRgykVESE5OtjNNE7ZCLikAlhBMhdi/HxPOQjIpGGNMSPtmCvw8zyeHtqRQyXbu3ElKSgopKSk0aNCAxo0be7bz8/OrJIa3336bdu3accoppzB//nyGDh1aacf+5Zdf6Nix40H7xo0bx6RJk476uddee41bb7210uIwJmwtnQyf3Amr3vDJ4UNuoNnfkpOTWb16NeB0lvHx8YwePbpK2lZVVJUpU6bw4osvepKCMSZEfP0yfDoGThwK57zskybsTKEKrFixggEDBtC9e3fOOOMMMjIyAEhLS2PMmDH07NmTNm3asGjRIgDWrl1Lz549SUlJoXPnzmzYsAGAp556io4dO9KxY0eefvppwPnm3q5dO2655Ra6devGI488wuLFi7npppu46667Dopj165dnHPOOXTu3JmTTjqJ7777DoBOnTqxZ88eVJXk5GRef/11AK644go+//zz4/pd09LSPKVHMjMzKV2basuWLQwePJi2bdvy8MMPH98fojHh7uuX/kgIF0yFqGifNBPSZwoPf7SWdVv3Vuox2zeqyUNnd/D6/arKbbfdxgcffEDdunWZMWMG9913H6+++ioAhYWFLFu2jFmzZvHwww/z+eef8/LLLzNq1Cguu+wy8vPzKSoqYsWKFUydOpWlS5eiqvTq1YsBAwZQq1YtfvzxR6ZOncqLL74IwLx585g0aRKpqakHnSk89NBDdO3alffff5+5c+cycuRIVq9eTZ8+ffjyyy9p1qwZLVu2ZNGiRYwcOZKvv/6al1566bDf6eeffyYlJcWz/fvvv3t1NrRs2TK+//57qlevTo8ePRgyZAipqale/1kaE7aWvACf3QvtznYSQqTv7qHxWVIQkVeBocB2Ve3o7qsNzACaA78AF6nqbve1e4BrgSLgdlX9zFexVaW8vDy+//57Tj/9dACKiopo2LCh5/XzzjsPgO7du/PLL78AcPLJJzNhwgTS09M577zzaN26NYsXL+bcc8+lRo0ans8tWrSIYcOG0axZM0466aRjxrJ48WLeeecdAE499VR27txJVlYW/fr1Y+HChTRr1oybb76ZyZMn89tvv1G7dm3i4+MPO06rVq08l8jAuUzmjdNPP53k5GRP/IsXL7akYMyxfPU8zL4P2g2DC171aUIA354pvAY8D7xeat9Y4AtVnSgiY93tMSLSHrgY6AA0Aj4XkTaqWlSRAI7nG72vqCodOnRgyZIlZb4eExMDQGRkJIWFhQBceuml9OrVi08++YQzzjiDV155BVU9YhslicKbWA4lIvTv358XXniBzZs3M2HCBN577z1mzpxJv379vDpuaVFRURQXFwMcNtf/0KmeNvXTmGP48lmY8wC0PwfOf8XnCQF8OKagqguBXYfsHg5Mc59PA84ptf8/qpqnqpuAn4CevoqtKsXExLBjxw5PUigoKGDt2rVH/czGjRtp2bIlt99+O8OGDeO7776jf//+vP/++xw4cIDs7Gzee++94+60+/fvz/Tp0wGYP38+derUoWbNmpxwwglkZmayYcMGWrZsSd++fZk0aVK5kkLz5s1ZsWIFADNnzjzotTlz5rBr1y5ycnJ4//336dOnz3Ef35iw8eUzTkLocG6VJQSo+oHm+qqaAeA+1nP3Nwa2lHpfursv6EVERDBz5kzGjBlDly5dSElJ4auvvjrqZ2bMmEHHjh1JSUnhhx9+YOTIkXTr1o2rrrqKnj170qtXL6677jq6du16XLGMGzeO5cuX07lzZ8aOHcu0adM8r/Xq1Ys2bdoA0K9fP3777Tf69u173L/v6NGjeemll+jduzeZmZkHvda3b1+uuOIKUlJSOP/88+3SkTFHsvgfMOdB6Hg+nFd1CQFAjnZZosIHF2kOfFxqTGGPqiaVen23qtYSkReAJar6prt/CjBLVd8p45g3ADcANG3atPuvvx68VsT69etp166dj34jEy7s35Hxm0VPwRcPQ8cL4Nx/QmTlX+UXkRWqWua3sqo+U9gmIg0B3Mft7v504IRS72sCbC3rAKo6WVVTVTW1bt0yV5MzxpjgtHCSkxA6XeizhHAsVZ0UPgSudJ9fCXxQav/FIhIjIi2A1sCyKo7NGGP8Z+HfYe4j0OkivyUE8O2U1P8D0oA6IpIOPARMBN4SkWuBzcCFAKq6VkTeAtYBhcCfKzrzyBhjgsaCJ2DeBOh8MZzzIkRE+i0UnyUFVb3kCC+ddoT3TwAm+CoeY4wJSPMnwvzHoMslMPwFvyYECPE7mo0xJqDNewwWTIQul8Lw5/2eEMCSgjHGVD1V5+xgweOQcjkMezYgEgJYQTyfmDBhAh06dKBz586kpKSwdOnSch1n9erVzJo1q5KjO7Y1a9Z4yn3Xrl2bFi1akJKSwsCBA33S3qJFi+jQoQMpKSnk5OT4pA1jAoYqzHvUSQhdL4dhzwVMQgA7U6h0S5Ys4eOPP2blypXExMSQmZlZ7nUUVq9ezfLlyznrrLMqOcqj69Spk6e20VVXXcXQoUO54IILDnpPYWEhUVGV889n+vTpjB49mquvvtqr9xcVFREZWXn/iSrzdzHmqFRh7t9g0SToNhKGPgMRgfXdPLCiCQEZGRnUqVPHU9OoTp06NGrUCDi+Etr5+fk8+OCDzJgxg5SUFGbMmEF2djbXXHMNPXr0oGvXrnzwgTOj97XXXuO8885j8ODBtG7dmrvvvtsTz6effkq3bt3o0qULp53mjPEf6TjHkpaWxr333suAAQN45pln+Oijj+jVqxddu3Zl4MCBbNu2DXDunL7mmmtIS0ujZcuWPPvss552hwwZQpcuXejYsSMzZszglVde4a233mL8+PFcdtllqCp33XUXHTt2pFOnTsyYMQNwynKccsopXHrppXTq1In58+czYMAALrroItq0acPYsWOZPn06PXv2pFOnTvz8888A7Nixg/PPP58ePXrQo0cPvvzyS0+MN9xwA4MGDWLkyJHl/ws3xluqzpTTRZOg25UBmRAg1M8U/jsWfl9Tucds0AnOnHjElwcNGsT48eNp06YNAwcOZMSIEQwYMICCgoLjLqE9fvx4li9fzvPPPw/Avffey6mnnsqrr77Knj176Nmzp+eSzurVq1m1ahUxMTG0bduW2267jdjYWK6//noWLlxIixYt2LXLKUU1YcKEMo/jTWG9PXv2sGDBAgB2797N119/jYjwyiuv8MQTT/Dkk08C8MMPPzBv3jz27dtH27Ztufnmm/n0009p1KgRn3zyCQBZWVkkJiayePFiz9nIO++8w+rVq/n222/JzMykR48e9O/fH/ij9HaLFi2YP38+3377LevXr6d27dq0bNmS6667jmXLlvHMM8/w3HPP8fTTTzNq1CjuuOMO+vbty+bNmznjjDNYv3494CTpxYsXExcXd9z/DIw5LqrOTWmL/wHdr4Ih/wjIhAChnhT8ID4+nhUrVrBo0SLmzZvHiBEjmDhxIqmpqcddQvtQs2fP5sMPP/QsfZmbm8vmzZsBOO2000hMTASgffv2/Prrr+zevZv+/fvTokULAGrXrn3U43hT1mHEiBGe5+np6YwYMYKMjAzy8/M97QAMGTKEmJgYYmJiqFevHtu2baNTp06MHj2aMWPGMHTo0DIL7i1evJhLLrmEyMhI6tevz4ABA/jmm2+oWbMmPXv2PKiNHj16eP4MW7VqxaBBgwDn8te8ec76tZ9//jnr1q3zfGbv3r3s27cPgGHDhllCML6nCp+Pgy+fhtRr4KwnAzYhQKgnhaN8o/elyMhI0tLSSEtLo1OnTkybNo3u3bsfdwntQ6kq77zzDm3btj1o/9KlSz2fL30MVS2zPPWRjuON0mcTt912G3feeSfDhg1j/vz5B62rUFY8bdq0YcWKFcyaNYt77rmHQYMG8eCDDx4WmzdtH9pGRESEZzsiIsLzZ1hcXMySJUvK7Py9LTluTLmpOoXtvnoWUq+FsyYFdEIAG1OodD/++KNn+UxwLus0a9aMtm3bHncJ7YSEBM+3WoAzzjiD5557ztNxrlq16qifP/nkk1mwYAGbNm0C8Fw+Ot7jHElWVhaNGzvFbEtXXD2SrVu3Ur16dS6//HJGjx7NypUrD3tP//79mTFjBkVFRezYsYOFCxfSs2f5q6gPGjTIc/kNOGhxIGN8ShVm3+8khB7XwZDAPkMoEfgRBpn9+/dz5ZVX0r59ezp37sy6desYN24c0dHRx11C+5RTTmHdunWegeYHHniAgoICOnfuTMeOHXnggQeO+vm6desyefJkzjvvPLp06eK59HO8xzmScePGceGFF9KvXz/q1KlzzPevWbPGs/b0hAkTuP/++w97z7nnnkvnzp3p0qULp556Kk888QQNGjQoV3wAzz77rKdcePv27Xn5Zd8sdm7MQUoSwpLnoecNzhlCkCwq5dPS2b6WmpqqJYvEl7CSx6Yy2L8jU26qznrKX78IvW6CwRMDLiEcrXR2aI8pGGNMVVKFT++BpS9Br5th8GMBlxCOxZKCMcZUBlX4dCwsfRlOugXOeDToEgKE6JhCMF8SM/5n/37McVOF/97tJISTbw3ahAAhmBRiY2PZuXOn/cc25aKq7Ny5k9jYWH+HYoKFKswaDcsmOwlh0N+CNiFACF4+atKkCenp6ezYscPfoZggFRsbS5MmTfwdhgkGxcVOQlg+BXrfDqePD+qEACGYFKpVq3bQXa/GGOMTxUVuQngV+oyCgQ8HfUKAEEwKxhjjc5sWwWf3OLXV+t4Bpz0UEgkBLCkYY4z3dm2E2Q/ADx9D4glwwavQ4byQSQhgScEYY44tNwsW/h2+fhkio+HUB+DkP0O10CuoaEnBGGOOpKgQVk5zVko7sBO6XuYkhITyl14JdJYUjDGmLD/Phc/ug+3roFkf596DRin+jsrnLCkYY0xpmRucYnb/+xSSmsFFb0C7s0Nq3OBoLCkYYwzAgV2w4An45l8QFefcc9DrJoiKOfZnQ4glBWNMeCsqcO41mP+YM6DcbSScch/E1/N3ZH5hScEYE742zHHKXGf+D1oMcKqa1u/g76j86phJQURaAemqmiciaUBn4HVV3ePb0Iwxxke2r3cGkX/+Amq3gkv+A20Gh824wdF4c6bwDpAqIn8CpgAfAv8GzvJlYMYYU+myd8L8R2H5VIiJhzMec5bKjIr2d2QBw5ukUKyqhSJyLvC0qj4nIuVb1NcYY/yhMN8ZQJ7/OOTvh9RrIO0eqJHs78gCjjdJoUBELgGuBM5291XzXUjGGFNJVOHH/zpTTHf9DH8aCIMmQL0T/R1ZwPImKVwN3ARMUNVNItICeLMijYrIHcB1gAJr3DaqAzOA5sAvwEWqursi7Rhjwtjv3zuDyJsWQJ22cNlMaH26v6MKeFLVi9GISGNgMdBeVXNE5C1gFtAe2KWqE0VkLFBLVccc7Vipqam6fPly3wdtjAke+7fD3L/BqjcgNhHS7oXUqyHSLnCUEJEVqppa1mtHPFMQkTU43+TLpKqdKxBTFBAnIgU4ZwhbgXuANPf1acB84KhJwRhjPArz4OuXYOEkKMxxbjwbcDfE1fJ3ZEHlaJePhrqPf3Yf33AfLwMOlLdBVf1NRCYBm4EcYLaqzhaR+qqa4b4nQ0TKvHNERG4AbgBo2rRpecMwxoQKVVj/oVPSes+v0OZMZ0nMOn/yd2RB6YhJQVV/BRCRPqrap9RLY0XkS2B8eRoUkVrAcKAFsAd4W0Qu9/bzqjoZmAzO5aPyxGCMCQEFubD2XWdt5K2roF57uOJ9aHWKvyMLat4MNNcQkb6quhhARHoDNSrQ5kBgk6rucI/3LtAb2CYiDd2zhIbA9gq0YYwJVbt/ccpSrHwDcnZBnTZw9jOQcjlEWpGGivLmT/Ba4FURSXS39wDXVKDNzcBJIlId5/LRacByIBtn2utE9/GDCrRhjAklxcXw0+fwzSuwYTZIBJx4FvS4Hlr0tzuRK9Exk4KqrgC6iEhNnNlKWRVpUFWXishMYCVQCKzCuRwUD7wlItfiJI4LK9KOMSYEHNgFq96E5VOcM4Qa9aD/XdD9Kkhs7O/oQpI3tY9igPNx7h+IEjcjq2q5xhTczz4EPHTI7jycswZjTLj7baVzVvD9O1CYC017w2kPwolnW0kKH/Pm8tEHQBawAqfjNsaYyleQA2vfg2X/gq0roVoNSLnUqU0U5pVLq5I3SaGJqg72eSTGmPC0+xf4Zopzs1nObmfg+My/Q5cRzs1npkp5kxS+EpFOqrrG59EYY8KDZ+D4X86aBhIBJw5xzgps4NivvEkKfYGrRGQTzuUjAbSCdzQbY8LRgV3OGcE3U5wbzWzgOOB4kxTO9HkUxpjQ9tsKJxGUDBw36wMDH7KB4wDkzZTUX0WkC9DP3bVIVb/1bVjGmKBXkAPfv+vMIrKB46DhzZTUUcD1wLvurjdFZLKqPufTyIwxwWnXJueOY8/AcVt34PhiiK3p7+jMMXh7R3MvVc0GEJHHgSWAJQVjzB9++hyW/vPggeOe10PzfjZwHES8SQoCFJXaLnL3GWOMY8ETMG8CxNd3ylV3u9IGjoOUN0lhKrBURN5zt88BpvgsImNMcJn/OMx/FDpfDMOes4HjIOfNQPNTIjIfZ2qqAFer6ipfB2aMCQLzHoMFE6HLpTD8eYiI9HdEpoK8GWg+CVirqivd7QQR6aWqS30enTEmMKnC/MdgweNOyephz1pCCBERXrznJWB/qe1sd58xJhypOuMHCx6Hrpc7l4wsIYQMb5KCqKpnhTNVLca7sQhjTKhRhbl/g4V/h24j4eznIMKbbsQEC2/+NjeKyO0iUs39GQVs9HVgxpgAowpfjIdFk5zZRUOfsYQQgrz5G70JZ7nM34B0oBdwgy+DMsYEGFX4fBwsfgq6Xw1Dn7aEEKK8mX20Hbi4CmIxxgQiVZjzIHz1LKReC2dNsoQQwo75NysibUTkCxH53t3uLCL3+z40Y4zfqcKcB5yE0OM6GPKkJYQQ583f7r+Ae4ACAFX9DjtzMCb0qcLs++Gr56DnDc4ZgpWrCHneJIXqqrrskH2FvgjGGBMgVOGze2HJ89DrJjjzCUsIYcKbqaWZItIKUAARuQDI8GlUxhj/UYVP74GlL0Gvm2HwY5YQwog3SeHPwGTgRBH5DdgEXO7TqIwx/qEK/x0Dy/4JJ90CZzxqCSHMeDP7aCMwUERqABGqus/3YRljqpwqzLrLWTf55Fth0N8sIYQhb2YfjRKRmsAB4B8islJEBvk+NGNMlSkuhk/+6iSE3rdZQghj3gw0X6Oqe4FBQD3gamCiT6MyxlSd4mKY9VdYPgX6jILTH7GEEMa8qn3kPp4FTHXXZ7Z/McaEguJi+OROZ/nMPn+BgQ9bQghz3iSFFSIyGycpfCYiCUCxb8MyxvhccTF8/BdYMRX63gkDx1lCMF6v0ZwCbFTVAyKSjHMJyRgTrIqL4aPbYdUb0G80nHq/JQQDeDf7qBhYWWp7J7DTl0EZY3youBg+ug1WvQn974ZT7rWEYDz8UsRERJJEZKaI/CAi60XkZBGpLSJzRGSD+1jLH7EZE9KKi+DDW52EMGCMJQRzGH9VtnoG+FRVTwS6AOuBscAXqtoa+MLdNsZUluIi+OBWWD0d0u6xhGDK5NUKaiISCdQv/X5V3VyeBt17HvoDV7nHyQfyRWQ4kOa+bRowHxhTnjaMMYcoLoL3b4Hv/gNp90Ka/dcyZTtmUhCR24CHgG38MetIgc7lbLMlsAOYKiJdgBXAKKC+qmYAqGqGiNQ7Qjw34C7y07Rp03KGYEwYKS6C92+G72bAKffDgLv8HZEJYN5cPhoFtFXVDqrayf0pb0IAJxF1A15S1a5ANsdxqUhVJ6tqqqqm1q1btwJhGBMGigrhvRudhHDqA5YQzDF5kxS2AFmV2GY6kK6qS93tmThJYpuINARwH7dXYpvGhJ+ShLDmbTjtQeg/2t8RmSDgzZjCRmC+iHwC5JXsVNWnytOgqv4uIltEpK2q/gicBqxzf67EKaFxJfBBeY5vjMFJCO9eD2vfdW5K63uHvyMyQcKbpLDZ/Yl2fyrDbcB0EYnGSTpX45y1vCUi17rtXVhJbRkTXooK4d3rYO17cPp4p56RMV7y5ua1hyu7UVVdDaSW8dJpld2WMWGlqADeuQ7Wve9UOu19m78jMkHmiElBRJ5W1b+IyEe4q66VpqrDfBqZMeb4ZO90zhB+nguDJkDvW/0dkQlCRztTeMN9nFQVgRhjKmDLMnj7KsjeAWc/C92v9HdEJkgdMSmo6gr3cUHVhWOMOS6q8PWLMOdBqNkYrp0DjVL8HZUJYl7d0WyMCUC5WfDBn2H9R3DiUBj+AsQl+TsqE+QsKRgTjDK+g7dGwp7NzoDyybdaHSNTKSwpGBNMVGHlNJh1N1RPhqtnQdOT/B2VCSFHm31U5qyjEjb7yJgqlp8NH9/pFLVreQqc/wrUqOPvqEyIOdqZQsmso/OABsCb7vYlwC8+jMkYc6gd/3MuF+34wSl73f8uiIj0d1QmBB1t9tECABF5RFX7l3rpIxFZ6PPIjDGONTPho1EQFQNXvAutTvV3RCaEeTOmUFdEWqrqRgARaQFYeVJjfK0wDz67F755BU7oBRdMhcTG/o7KhDhvksIdOAXxNrrbzYEbfRaRMQZ2/wpvXwlbVzkziwaOg8hq/o7KhAFvah99KiKtgRPdXT+oat7RPmOMqYAf/+uUvFZgxHRoN9TfEZkw4s3Ka9WBO4Fmqnq9iLR2y15/7PvwjAkjRYUwdzx8+Qw06AwXTYPaLf0dlQkz3lw+moqzZObJ7nY68DZgScGYyrLvd5h5Dfz6JXS/GgZPhGqx/o7KhCFvkkIrVR0hIpcAqGqOiN06aUyl2bgA3rnWuQ/h3MnQZYS/IzJhzJukkC8icbg3solIK0qtwGaMKafiYlj0JMx/FJJbw5UfQ70Tj/05Y3zIm6TwEPApcIKITAf6AFf5MihjQl72TnjvBvjpc+h0IQx9GmLi/R2VMV7NPpojIiuBkwABRqlqps8jMyZUbfnGXftgOwx5ClKvsWJ2JmBEHOsNItIHyFXVT4Ak4F4RaebrwIwJOarw9UswdbBTouLa2dDjWksIJqAcMykALwEHRKQLcBfwK/C6T6MyJtTkZjm1iz4dC60HwY0LoFFXf0dlzGG8GVMoVFUVkeHAs6o6RURsrT9jvPX7Gich7P4VTh8PvW+3swMTsLxJCvtE5B7gCqCfiEQCdr+9Md5Y+QbMGg1xteCqj6FZb39HZMxReXP5aATOFNRrVPV3oDHwd59GZUywyz8A798CH97qFLO7cZElBBMUjpkU3ETwb6CWiJwN5KuqjSkYUxZV+N9s+NcpsPrfMGAMXPEexFthYRMcvJl9dB2wDGexnQuAr0XkGl8HZkzQ+fUrmHom/PtCp+z15e/AKffaYjgmqHgzpnAX0FVVdwKISDLwFfCqLwMzJmhkfAtfPAI/zYH4Bs69B91GWqlrE5S8SQrpwL5S2/uALb4Jx5ggkvkTzJsAa9+F2CRnZlGP6yG6ur8jM6bcvEkKvwFLReQDnPpHw4FlInIngKo+5cP4jAk8Wemw4HFYNR2iYp31knvfBrGJ/o7MmArzJin87P6U+MB9TKj8cIwJYNmZsOgpZ3lMFHpeD/3+CvH1/B2ZMZXGm9pHD/uiYfd+h+XAb6o6VERqAzNwlvv8BbhIVXf7om1jjkvuXljyAix5HgoOQJdLIW0MJDX1d2TGVDpvVl6rC9wNdAA8q36o6qkVbHsUsB6o6W6PBb5Q1YkiMtbdHlPBNowpv4Jc56xg0ZOQswvaDYNT74e6bf0dmTE+483Na9OBH4AWwMM43+K/qUijItIEGAK8Umr3cGCa+3wacE5F2jCm3IoKYcVr8Fw3mH0fNOwC18+DEW9YQjAhz5sxhWS33tEoVV0ALBCRBRVs92mcs4/S4xL1VTUDQFUzRKTMC7UicgNwA0DTpnb6bipRcTGsew/mToBdP0PjVDj3ZWjR39+RGVNlvEkKBe5jhogMAbYCTcrboIgMBbar6goRSTvez6vqZGAyQGpqqpY3DmM8VGHDHJg73ileV689XPx/0PZMK1xnwo43SeFvIpII/BV4DmcM4I4KtNkHGCYiZ+GMUdQUkTeBbSLS0D1LaAhsr0Abxnjn1yXwxcOweQkkNXPWSO50gd2FbMKWN7OPPnafZgGnVLRBVb0HuAfAPVMYraqXi8jfgSuBie7jB0c6hjEVlvEdzH0ENsyG+Pow5EnoOhKiov0dmTF+dcSkICLP4dysViZVvb2SY5kIvCUi1wKbgQsr+fjGwM6fnbuQv3/HuQt54DjoeaPdhWyM62hnCst93biqzgfmu893Aqf5uk0TprJ+c+9CfhOiYpybznrfDnFJ/o7MmIByxKSgqtNKb4tIDVXN9n1IxlSi7J2w+ClY9i/QYuhxnZMQEur7OzJjApI3N6+dDEwB4oGm7lrNN6rqLb4OzphyK8yHr1+EhZOgIBs6XwxpY6FWM39HZkxA82b20dPAGcCHAKr6rYjYxG0TuDYugFl3QeaP0GYwDHwY6p3o76iMCQreJAVUdYscPF+7yDfhGFMBe7fC7PudQeSkZnDJDGg72N9RGRNUvEkKW0SkN6AiEg3cjlOzyJjAUFQAS/8J8x9zng8YC33/AtXi/B2ZMUHHm6RwE/AM0BhnwZ3ZwJ99GZQxXvvlS5g1Gravg9aD4MzHoXZLf0dlTNDy5ua1TOCyKojFGO/t2wZzHoDvZkBiU7j439D2LCtLYUwFeTWmYEzAKCp0ylnPmwAFOc700n6j7eYzYyqJJQUTPDYvhU/+CtvWQMtT4KxJUOdP/o7KmJBiScEEvv074PNxsPpNqNkYLpwG7YfbpSJjfMCbm9fqA48CjVT1TBFpD5ysqlN8Hp0Jb8VFsGIqfDEe8rOhzyjofzfExPs7MmNCljdnCq8BU4H73O3/4aylbEnB+E76CvjkTshYDc37OVVMbdUzY3zOm+U466jqW0AxgKoWYjevGV85sAs+vB1eOQ32/Q7nT4ErP7KEYEwV8eZMIVtEknHLaIvISThrKxhTeYqLYdXrzthB7l44+c8wYAzE1vR3ZMaEFW+Swp04dY9aiciXQF3gAp9GZcLL1lXOrKLfVkDT3jBkEtTv4O+ojAlL3ty8tlJEBgBtAQF+VNWCY3zMmGPL2Q1z/wbfTIEadeDcf0LnETaryBg/8mb2USRwFtDcff8gEUFVn/JxbCZUFRfDt/+GOQ86iaHXjZB2jy14Y0wA8Oby0UdALrAGd7DZmHLL+M6pVbRlKTTp6cwqatjZ31EZY1zeJIUmqmr/a03F5GbBvEdh2WSIqwXDX4Aul0KENxPgjDFVxZuk8F8RGaSqs30ejQk9xcWw5i2Y/QBk74DUa+DU+6F6bX9HZowpgzdJ4WvgPRGJAApwBptVVW2uoDm6X5fAZ/fC1pXQqBtcOgMad/N3VMaYo/AmKTwJnAysUVX1cTwmFOzaBJ8/BOs+gIRGzqyiThfZpSJjgoA3SWED8L0lBHNMOXtg0SRnFbSIKEi7F3rfCtE1/B2ZMcZL3iSFDGC+iPwXyCvZaVNSjUdRoVO4bv5jTpmKlMuccYOaDf0dmTHmOHmTFDa5P9HujzEOVdgwB2bfD5k/OoXrzpgADbv4OzJjTDl5c0fzw1URiAky29bCZ/fBxnlQu5Uth2lMiDhiUhCR51X1VhH5CLcYXmmqOsynkZnAtH+7sxTmytchpiYMngip10KUnUQaEwqOdqYwErgVmFRFsZhAVpADX78Ii56CwlzoeSMMuNvuNzAmxBwtKfwMoKoLqigWE4hU4ft3nJLWWVug7RA4fbytjWxMiDpaUqgrInce6cXyzj4SkROA14EGOLWUJqvqMyJSG2dFt+bAL8BFqrq7PG2YSrJlmXPzWfo30KATnPMitOjv76iMMT50tKQQCcTj3MFcmQqBv7oluROAFSIyB7gK+EJVJ4rIWGAsMKaS2zbe2P2rc2aw9l2IbwDDX4QuF0NEpL8jM8b42NGSQoaqjq/sBlU1A+feB1R1n4isBxoDw4E0923TgPlYUqhauXth8VOw5EWQCGfls963Q0y8vyMzxlSRoyUFn88tFJHmQFdgKVDfTRioaoaI1DvCZ24AbgBo2rSpr0MMD0WFzlKYcyfAgUzocgmc+gAkNvZ3ZMaYKna0pHCaLxsWkXjgHeAvqrpXvJzfrqqTgckAqampVnqjon76HD67H3asd5bCPONtK1pnTBg7YlJQ1V2+alREquEkhOmq+q67e5uINHTPEhoC233VvgG2/+DcifzTHKjVAi56A9qdbTefGRPmvClzUanEOSWYAqw/ZAbTh8CVwET38YOqji0sZGc6i92seA2i42HQBOh5PUTF+DsyY0wAqPKkAPQBrgDWiMhqd9+9OMngLRG5FtgMXOiH2EJXQS4s+ycsnAT52dDjOmcguUayvyMzxgSQKk8KqrqYIw9i+3QcIywVF8P3M+GLRyBrM7Q507n5rG4bf0dmjAlA/jhTMFVl4wKY8wBkfAsNOsPw56Blmr+jMsYEMEsKoWj7epjzIGyYDYknwLmTodOFtvKZMSGisKiY/KJiqkdXfhduSSGU7M1wKpiung7RCc5lop43QrVYf0dmjDkKVWVvTiGZ2Xns3J/Pzv15ZGbns2t/PjvdfZn789iVnc/O7Hx2H8hneJdGPH1x10qPxZJCKMjbB18+C0ueh6IC6HUz9B9tFUyN8RNV5UB+kdOZZ+d5OvfM/flOp5/tdPCZbgLYlZ1PYXHZt10lxlUjOT6aOjViaFU3np4tokmOj6FT40SfxG5JIZgVFcDKaTB/ImTvgI7nO3ci127h78iMCTnFxcqenAJ27MtzfvbnsmOf+80+2+ncnUen088tKC7zODWiI6kdH01yjRgaJ8XSuXEiyfHR1K4RTZ34GJLd15Ljo6lVPZroqKq97GtJIRipwg+fOEXrdm6AZn3gkhnQpLu/IzMm6BzIL/yjo9+Xx479eWVuZ+7Po6Do8G/z0ZERTkfuduZ/qhvvbseQ7Hb0tWv88XpcdGAXlrSkEGy2fOPMKNq8BOq0gUv+A20G253IxpRSWFTMzuz8wzr37XtzD+v0s/OLDvt8hEByfAx142OomxBDm/oJ1E34Y7v0T0JMFN6W6QkGlhSCxa6N8PnDsO59qFEPhv4Duo6ESPsrNOElO6+Q3/bk8NvuHOdxTw7bsg7u7HcdyEfLuESfEBtFPbcz79Qk6fBO3t2uXSOayIjQ6eiPh/UogS57Jyx8Ar6ZApHVYMBY6H2blbM2IUlVydyfz1a3sy/d8Zc8z8opOOgzURFC/Zqx1E2I4YTa1enWrNYRO/vYaoF96SYQWFIIVAU5sPRlZ03k/P3QbSSk3QMJDfwdmTHlVlBUzO9ZuaTvzjms4y/Zzis8eIA2PiaKxklxNEqKpVuzJBonVadRUixNasXROKk6dRNiwvZbvS9YUgg0xcXw3QyY+zfYm+6MFwx8GOqd6O/IjDmm/XmFTue+O4d097F0579tX+5hl3XqJsTQKCmOdg1rMrB9fRolxtK4VnUaJ8XROCmOmnGhdc0+0FlSCCQ/z4XZD8K2NdAwBc59GVr083dUJswUFBWzN6eArJwC9uYWkuU+z8opYK/749nO/eP5ngMF7MstPOhY1SKFholO5963dR0aJcXRJCmOxrXiaJQUR8PEWLukE2AsKQSC3793ylL8/AUkNYXzp0CH86wshSkXVSWvsPiwzvyP54d09LkHv36gjNk4pcVERZAYV42acdVIjKtGvYRYWtdLIDGuGg0SY2nkfsNvUiuOuvExRNilnaBiScGfsn5zy1L8G2ITbW0Dc0wFRcVs35fH71m5/J6VS0ZWjvO4N9ezb8f+PPILy75xqkR8TBQ1Y6M8HXvT2tU9z0t+asZFHbwd6yQC+2Yf2iwp+ENuFix+Gr5+EbQYet8K/f4KcbX8HZnxo9yCIrbvzXM6+r25ZBza8bsd/qHX5GOrRdAwMY4GNWPp1aI2dRNiDuvgS3+zrxkbRVSknYWasllSqEqFebB8qjPF9MBOp3LpqQ9ArWb+jsz42IH8wlKdfC6/Z+V4tn93v+XvzM4/7HMJMVE0SIylQWIsbRsk0CDRuQ7fIDGWhomxNKxpA7GmcllSqApFBU7l0gV/d2YUNe8Hgx6BRpVf4dBUrYKiYnbsy2Pb3ly278tzfvbmsn1vnqezz8jKYe8hA7AAtapX83TyXU5IomHNks4+zpMI4mPsv6ipWvYvzpeKi2DNTJj/GOzeBI1T4ZwXoMUAK0sR4PIKnUs52/flsWNfLtv25rF9n9PZbyvp+Pc51S0PVVIioWFiLM2Sq3NSy9oHfcNv4Hb+dm3eBCJLCr5QXAw/fATzHoUdP0D9Tk7BujZnWDLws5Lr9ttLd/TuN/3S3/j3HCg47LOREULd+Bjq14yhSS3nztn6CbHUqxlDvYQY6teMpV5CDMnxdjOVCV6WFCqTKmyYA3Mfgd+/cwrWXfgatBtu00t9TFXZlZ1PhnvNPqPUNXvPpZ29uWVexqkWKdRLcMoktKhTg14tkj2dfN1SHX7t6tE2vdKEPEsKlWXTQucu5C1LIakZnPMydL4IIuwSQUWpKrsPFLB1T47nGn1J5791zx8zdQ6dhlkt0qmJU79mLK3rxdP3T3Wom1DqW33NGOolxFKrejUbqDXGZUmhorYsc84MNi2EhEZu9dIrnOJ15phUlT0HCtjqTrvcWjIzZ0+uZ19GVu5h9XBKOvyGibF0aZLE4I6xNKwZS0P3LtmGiXEk17Bv9sYcL0sK5ZXxLcydABs+gxp1YfBE6H61rYfsUlWy84vYcyCfPQcK2L4vl617ct2O/4/OPiMr57AVqkqqXjZMjKVTkyQGdYj1dPQNE2NpmBRLnRp2p6wxvmBJ4Xht/wHmPwrrPoDYJDjtIeh5Q8iWsi5Za3ZPToGng99zoIA9OSXP89nt7svKOfh5WatURUYI9RNiaJgUR4dGNRnYrl6pzj6ORomxNlBrjB9ZUvDWro3OWsjfvQXRNWDAGDjpFohL8ndkXsvJL2J3Scee80cHv/tAvlvQzOnUs9zXS57nFx25ZEJctUiSqlcjqXo0SXHVaFM/nsS4aJKqV6NW9WokxUWTWL0a9RJiaJgYZ2WOjQlwlhSOJSsdFjwBq96EyGhngZs+f4Eayf6ODHBuntq5P/+ghcSPtNZsWcsOloiJiqBW9Wi3g69Gyzrxf3T2bgf/R2fvPCZaHRxjQo4lhSPZtw0WPwXLX3Wmmva41qlPVAWL3JQMvh5pAfHS22XdPAVQMzbKs+pUybKDyfHR1K4RXWYHb527MQYsKRzuwC748hlYNtmpVZRyKQy42ylpXUEld8mW3Ch1pE4/c39emdfjY6IiPB19s+TqpDavddhyg3UTYqgTb8sOGmPKx5JCidy9TtXSJS9A3j7odIGz/GVyK+8+XlD0R7GzvTmemTYl279n5ZK5/8glEUo69Tb1Ew7r5Et+EmKs8JkxxrcCLimIyGDgGSASeEVVJ/q0wfxs56zgy2cgZze0OxvS7oX67T1vObTCZcaeHE/9+pKKl7vLKIuQGFfNnUoZS6fGSZ7aN/USYjx30NauEW0Dr8aYgBFQSUFEIoEXgNOBdOAbEflQVddVemNuGeviRU8Skb2dXQ0HsKL7LayjFRmLc8jIWnbUCpfJNaJpkBhL46RYujdL8tSz9xQ9S4ylenRA/fEaY8wxBVqv1RP4SVU3AojIf4DhQKUmhZ9XLyThg6upp5ksK27HpIKbWL7pRNhUBPyPuglHrnDZMNEpm2DX7I0xoSjQkkJjYEup7XSgV+k3iMgNwA0ATZuWb/A3tv6fyIxtzqdNHiC3SV9GJlVnjFvSuH7NWKKjrHidMSY8BVpSKOvi+kHTcFR1MjAZIDU19fApOl5o3LARjcd8Qftjv9UYY8JKoH0lTgdOKLXdBNjqp1iMMSbsBFpS+AZoLSItRCQauBj40M8xGWNM2Aioy0eqWigitwKf4UxJfVVV1/o5LGOMCRsBlRQAVHUWMMvfcRhjTDgKtMtHxhhj/MiSgjHGGA9LCsYYYzwsKRhjjPEQ1XLd/xUQRGQH8GsFDlEHyKykcIJBuP2+YL9zuLDf+fg0U9W6Zb0Q1EmhokRkuaqm+juOqhJuvy/Y7xwu7HeuPHb5yBhjjIclBWOMMR7hnhQm+zuAKhZuvy/Y7xwu7HeuJGE9pmCMMeZg4X6mYIwxphRLCsYYYzzCMimIyGAR+VFEfhKRsf6Ox9dE5AQRmSci60VkrYiM8ndMVUVEIkVklYh87O9YqoKIJInITBH5wf37PtnfMfmSiNzh/pv+XkT+T0Ri/R2TL4jIqyKyXUS+L7WvtojMEZEN7mOtymgr7JKCiEQCLwBnAu2BS0Qk1BdhKwT+qqrtgJOAP4fB71xiFLDe30FUoWeAT1X1RKALIfy7i0hj4HYgVVU74pTbv9i/UfnMa8DgQ/aNBb5Q1dbAF+52hYVdUgB6Aj+p6kZVzQf+Awz3c0w+paoZqrrSfb4Pp6No7N+ofE9EmgBDgFf8HUtVEJGaQH9gCoCq5qvqHr8G5XtRQJyIRAHVCdGVGlV1IbDrkN3DgWnu82nAOZXRVjgmhcbAllLb6YRBB1lCRJoDXYGlfg6lKjwN3A0U+zmOqtIS2AFMdS+ZvSIiNfwdlK+o6m/AJGAzkAFkqeps/0ZVpeqragY4X/yAepVx0HBMClLGvrCYlysi8cA7wF9Uda+/4/ElERkKbFfVFf6OpQpFAd2Al1S1K5BNJV1SCETuNfThQAugEVBDRC73b1TBLxyTQjpwQqntJoToKWdpIlINJyFMV9V3/R1PFegDDBORX3AuEZ4qIm/6NySfSwfSVbXkLHAmTpIIVQOBTaq6Q1ULgHeB3n6OqSptE5GGAO7j9so4aDgmhW+A1iLSQkSicQamPvRzTD4lIoJznXm9qj7l73iqgqreo6pNVLU5zt/xXFUN6W+Rqvo7sEVE2rq7TgPW+TEkX9sMnCQi1d1/46cRwgPrZfgQuNJ9fiXwQWUcNODWaPY1VS0UkVuBz3BmK7yqqmv9HJav9QGuANaIyGp3373uetgmtNwGTHe/8GwErvZzPD6jqktFZCawEmeG3SpCtNyFiPwfkAbUEZF04CFgIvCWiFyLkyAvrJS2rMyFMcaYEuF4+cgYY8wRWFIwxhjjYUnBGGOMhyUFY4wxHpYUjDHGeFhSMEFHRFREniy1PVpExvmgnf8Tke9E5I7KPvYR2msuIpeW2k4VkWfd52ki0rvUazeJyMiqiMuEl7C7T8GEhDzgPBF5TFUzfdGAiDQAeqtqs2O8L0pVCyup2ebApcC/AVR1ObDcfS0N2A985b72ciW1acxB7EzBBKNCnJuUDvsGLyLNROQL9xv+FyLS9GgHEpFYEZkqImvcInKnuC/NBuqJyGoR6XfIZ14TkadEZB7wuIi0EpFPRWSFiCwSkRNLve9ld9//3HpMJWs8/F1EvnHjvNE99ESgn9vmHe7ZwcduEcObgDtK4hGRcSIy2j1eioh87R7rvZK6+iIyX0QeF5Flbvv93P0d3H2r3c+0Ls9fgglNlhRMsHoBuExEEg/Z/zzwuqp2BqYDzx7jOH8GUNVOwCXANHehlmHAz6qaoqqLyvhcG2Cgqv4VJ0HdpqrdgdHAi6Xe1xwYgFPC+2X32NfiVPTsAfQArheRFjjF6xa5bf6j5ACq+gvwMvCPI8TzOjDG/Z3X4NztWiJKVXsCfym1/ybgGVVNAVJxaiYZA9jlIxOkVHWviLyOs8hKTqmXTgbOc5+/ATxxjEP1BZ5zj/mDiPyK0+Efq4rs26pa5Fae7Q287ZTfASCm1PveUtViYIOIbAROBAYBnUXkAvc9iUBrIP8YbR7GTYpJqrrA3TUNeLvUW0qKH67ASVAAS4D73PUm3lXVDcfbrgldlhRMMHsap+7N1KO851h1XMoqpe6NbPcxAtjjfuv2pn1127xNVT87KBCRtHLGcjR57mMR7v93Vf23iCzFOXv5TESuU9W5PmjbBCG7fGSClqruAt7CuRxT4iv+WJLxMmDxMQ6z0H0fItIGaAr8eBwx7AU2iciF7jFERLqUesuFIhIhIq1wFsH5EacY481uOXNEpI27GM4+IOEITZX5mqpmAbtLjXtcASw49H2liUhLYKOqPotTabOzd7+tCQeWFEywexKoU2r7duBqEfkOp4McBZ4pnDeV8fkXgUgRWQPMAK5S1bwy3nc0lwHXisi3wFoOXt71R5xO+r/ATaqai7M86DpgpTgLsf8T51v8d0ChiHxbxjTYj4Bzyxr4ximb/Hf3d04Bxh8j3hHA927F3BNxxiSMAaxKqjE+IyKvAR+r6kx/x2KMt+xMwRhjjIedKRhjjPGwMwVjjDEelhSMMcZ4WFIwxhjjYUnBGGOMhyUFY4wxHv8P/ozgIXOy7MUAAAAASUVORK5CYII=\n",
634 | "text/plain": [
635 | ""
636 | ]
637 | },
638 | "metadata": {
639 | "needs_background": "light"
640 | },
641 | "output_type": "display_data"
642 | }
643 | ],
644 | "source": [
645 | "plt.plot(speed_benchmark['repetitions'], speed_benchmark['tf_time_elapsed'], label='Tensorflow Hub')\n",
646 | "plt.plot(speed_benchmark['repetitions'], speed_benchmark['st_time_elapsed'], label='Sentence Transformer')\n",
647 | "\n",
648 | "plt.xlabel('No. of repetitions')\n",
649 | "plt.ylabel('Time elapsed in seconds')\n",
650 | "plt.legend(loc=\"upper left\")\n",
651 | "\n",
652 | "plt.show()"
653 | ]
654 | },
655 | {
656 | "cell_type": "code",
657 | "execution_count": 10,
658 | "metadata": {
659 | "pycharm": {
660 | "name": "#%%\n"
661 | }
662 | },
663 | "outputs": [
664 | {
665 | "data": {
666 | "text/plain": "",
667 | "text/html": "make scatter plot of \n\n time_elapsed\n COLNAME\n\n and \n\n height_from_ground_level\n COLNAME\n\n from \n\n df\n VARNAME\n\n
"
668 | },
669 | "metadata": {},
670 | "output_type": "display_data"
671 | }
672 | ],
673 | "source": [
674 | "import spacy\n",
675 | "nlp = spacy.load(\"training/model-best\")\n",
676 | "doc = nlp('make scatter plot of time_elapsed and height_from_ground_level from df')\n",
677 | "spacy.displacy.render(doc, style=\"ent\", jupyter=True)"
678 | ]
679 | }
680 | ],
681 | "metadata": {
682 | "kernelspec": {
683 | "display_name": "Python 3 (ipykernel)",
684 | "language": "python",
685 | "name": "python3"
686 | },
687 | "language_info": {
688 | "codemirror_mode": {
689 | "name": "ipython",
690 | "version": 3
691 | },
692 | "file_extension": ".py",
693 | "mimetype": "text/x-python",
694 | "name": "python",
695 | "nbconvert_exporter": "python",
696 | "pygments_lexer": "ipython3",
697 | "version": "3.9.12"
698 | }
699 | },
700 | "nbformat": 4,
701 | "nbformat_minor": 1
702 | }
--------------------------------------------------------------------------------
/scripts/generate_training_data.py:
--------------------------------------------------------------------------------
1 | import json
2 | import string
3 | import pickle
4 | import random
5 | from enum import Enum
6 | from random import randint
7 |
8 | import fire
9 | import pandas as pd
10 | from tqdm import tqdm
11 |
12 |
13 | class Entities(Enum):
14 | VARNAME = "VARNAME"
15 | COLNAME = "COLNAME"
16 | FNAME = "FNAME"
17 | LIBNAME = "LIBNAME"
18 | CARDINAL = "CARDINAL"
19 | FUNCTION = "FUNCTION"
20 |
21 |
22 | class TrainDataGenerator:
23 |
24 | def __init__(self, mode):
25 | template_file = "../jupyter_text2code/jupyter_text2code_serverextension/data/ner_templates.csv"
26 | self.templates_df = pd.read_csv(template_file)
27 | self.mode = mode # intent or ner
28 |
29 | self.num_templates = self.templates_df.shape[0]
30 | print("*" * 10)
31 | print(self.num_templates, " templates loaded")
32 | print("*" * 10)
33 |
34 | def _get_entity_type(self, entity_str):
35 | for entity in Entities:
36 | if entity.value.lower() in entity_str:
37 | return entity.value
38 | print("Cannot find entity in db", entity_str)
39 | return None
40 |
41 | def _get_replacement_word(self, entity_type, debug):
42 | if entity_type == "VARNAME":
43 | choices = ["mydf", "df", "zzz", "tempdf"]
44 | return random.choice(choices)
45 | elif entity_type == "FUNCTION":
46 | choices = ["average", "sum", "min", "max", "maximum", "minimum", "mean", "avg", "count"]
47 | return random.choice(choices)
48 | elif entity_type == "COLNAME":
49 | # choices = ["age", "temperature", "humidity"]
50 | # return random.choice(choices)
51 | # Generate random columns
52 | col_len = randint(3, 20)
53 | cols = ''.join(random.choices(string.ascii_lowercase, k=col_len))
54 | if randint(1, 10) < 4:
55 | replace = randint(1, len(cols) - 1)
56 | cols = cols[:replace] + "_" + cols[replace:]
57 | return cols
58 |
59 | elif entity_type == "FNAME":
60 | choices = ["train.csv", "train.json", "test.csv", "validation.csv", "data.csv", "data.xls"]
61 | return random.choice(choices)
62 | elif entity_type == "LIBNAME":
63 | choices = ["spacy", "matplotlib", "pandas", "numpy", "seaborn", "plotly", "tensorflow", "torch",
64 | "transformers"]
65 | return random.choice(choices)
66 | elif entity_type == "CARDINAL":
67 | return str(randint(1, 100))
68 |
69 | def _replace_var(self, template, entity_dict, intent_id, debug):
70 | sign_idx = template.find("$")
71 | if sign_idx == -1:
72 | print("Error: No symbol $ found to replace")
73 |
74 | start_idx = sign_idx
75 | entity_str = template.split("$")[1].split()[0]
76 | entity_type = self._get_entity_type(entity_str)
77 |
78 | # Allow multiple column syntaxes for group by
79 | if intent_id not in [12] or entity_type in ["VARNAME", "FNAME", "LIBNAME", "CARDINAL"]:
80 | replacement_word = self._get_replacement_word(entity_type, debug)
81 | end_idx = start_idx + len(replacement_word)
82 | entity_dict["entities"].append((start_idx, end_idx, entity_type))
83 |
84 | template = template[:start_idx] + replacement_word + template[start_idx + len(entity_str) + 1:]
85 | return template, entity_dict
86 |
87 | n = randint(1, 9)
88 | if n < 5:
89 | # One word
90 | n = 1
91 | elif n < 8:
92 | # Two words
93 | n = 2
94 | elif n < 10:
95 | # 3-5 words
96 | n = randint(3, 5)
97 |
98 | replacement_word_all = ""
99 | for i in range(n):
100 | replacement_word = self._get_replacement_word(entity_type, debug)
101 | end_idx = start_idx + len(replacement_word)
102 | entity_dict["entities"].append((start_idx, end_idx, entity_type))
103 | start_idx = end_idx
104 | replacement_word_all += replacement_word
105 | if i != n - 1:
106 | zzz = randint(1, 2)
107 | if zzz == 1:
108 | replacement_word_all += ","
109 | start_idx += 1
110 | elif zzz == 2:
111 | replacement_word_all += ", "
112 | start_idx += 2
113 |
114 | template = template[:sign_idx] + replacement_word_all + template[sign_idx + len(entity_str) + 1:]
115 |
116 | if debug:
117 | print("Modified template=>", template)
118 | return template, entity_dict
119 |
120 | def generate_training_row(self, intent_id=None, debug=False):
121 | if intent_id:
122 | try:
123 | template = self.templates_df[self.templates_df["intent_id"] == intent_id].sample(1)["template"].values[
124 | 0]
125 | except:
126 | print("Intent id ", intent_id, " not found")
127 | else:
128 | tmp = self.templates_df.sample(1)
129 | template = tmp["template"].values[0]
130 | intent_id = tmp["intent_id"].values[0]
131 | if debug:
132 | print("Template=>", template)
133 |
134 | entity_dict = {"entities": []}
135 | while True:
136 | if template.find("$") == -1:
137 | break
138 | template, entity_dict = self._replace_var(template, entity_dict, intent_id, debug=debug)
139 | if debug:
140 | print("Generated text=> ", template)
141 | print("Entities=>", entity_dict)
142 |
143 | if self.mode.lower() == "ner":
144 | return template, entity_dict
145 | else:
146 | return {"intent_id": intent_id, "intent": template}
147 |
148 | def generate_training_rows(self, n_rows=10, debug=False):
149 | rows = []
150 | for _ in tqdm(range(n_rows)):
151 | rows.append(self.generate_training_row(debug=debug))
152 | return rows
153 |
154 |
155 | def ner_data(n_rows=1000):
156 | tdg = TrainDataGenerator(mode="ner")
157 | rows = tdg.generate_training_rows(n_rows=n_rows)
158 | with open('assets/train.json', 'w', encoding='utf-8') as f:
159 | json.dump(rows, f, ensure_ascii=False, indent=4)
160 | # pickle_out = open("ner_train_data.pickle", "wb")
161 | # pickle.dump(rows, pickle_out)
162 | # pickle_out.close()
163 | print("Generated ner data")
164 |
165 |
166 | def intent_data(n_rows=1000):
167 | tdg = TrainDataGenerator(mode="intent")
168 | rows = tdg.generate_training_rows(n_rows=n_rows)
169 | df_intent = pd.DataFrame(rows)
170 | df_intent.to_csv("../jupyter_text2code/jupyter_text2code_serverextension/data/generated_intents.csv", index=False)
171 | print("Generated intent data")
172 |
173 |
174 | def main(generate_ner_data="yes", generate_intent_data="yes", n_rows=1000):
175 | if generate_ner_data.lower() == "yes":
176 | ner_data(n_rows=n_rows)
177 | if generate_intent_data.lower() == "yes":
178 | intent_data(n_rows=n_rows)
179 |
180 |
181 | if __name__ == '__main__':
182 | fire.Fire(main)
183 |
--------------------------------------------------------------------------------
/scripts/process_awesome_notebooks.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import time
4 | import json
5 | from pathlib import Path
6 |
7 | import fire
8 | import faiss
9 | import numpy as np
10 | import pandas as pd
11 | import tensorflow_hub as hub
12 | from sentence_transformers import SentenceTransformer
13 |
14 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
15 |
16 |
17 | class NaasProcessor(object):
18 | """code entry class"""
19 |
20 | exclude = ['.github']
21 | rootdir = 'input/awesome-notebooks-master'
22 | model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
23 | embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
24 |
25 | def _get_categories(self, root, exclude):
26 | for _, dirs, _ in os.walk(root, topdown=True):
27 | return [d for d in dirs if d not in exclude]
28 |
29 | def _get_files(self, path):
30 | for _, _, files in os.walk(path, topdown=True):
31 | return [os.path.join(path, file) for file in files if file.endswith('.ipynb')]
32 |
33 | def _make_tasks_json(self, intent_id=10000):
34 | tasks = []
35 | for category in self._get_categories(self.rootdir, self.exclude):
36 | for file in self._get_files(os.path.join(self.rootdir, category)):
37 | task = {'category': category}
38 | with open(file, 'r') as handle:
39 | data = json.load(handle)
40 | task['intent_id'] = intent_id
41 | task['task'] = data['cells'][1]['source'][0][2:-1]
42 | task['st_embedding'] = self._get_embedding(task['task'], 'st')
43 | task['tf_embedding'] = self._get_embedding(task['task'], 'tf')
44 | task['code'] = "\n".join(["".join(cell['source']) for cell in data['cells'] if cell['cell_type'] == 'code'])
45 | intent_id += 1
46 | tasks.append(task)
47 | return tasks
48 |
49 | def create_intent_df_file(self):
50 | tasks = pd.DataFrame(self._make_tasks_json())
51 | tasks = tasks.set_index('intent_id')
52 | tasks.to_csv('data/awesome-notebooks.csv')
53 | tasks.to_pickle('data/awesome-notebooks.pkl')
54 |
55 | def _get_embedding(self, command, encoder):
56 | command = re.sub('[^A-Za-z0-9 ]+', '', command).lower()
57 | if encoder == 'tf':
58 | return list(np.array(self.embed([command])[0]))
59 | elif encoder == 'st':
60 | return list(np.array(self.model.encode([command])[0]))
61 |
62 | def create_naas_faiss_index(self):
63 | intent_df = pd.read_pickle('data/awesome-notebooks.pkl').reset_index()
64 | db_ids = intent_df["intent_id"].values
65 |
66 | for prefix, dimension in zip(['tf', 'st'], [512, 384]):
67 | db_vectors = np.stack(intent_df[f"{prefix}_embedding"].values).astype(np.float32)
68 | faiss.normalize_L2(db_vectors)
69 | intent_index = faiss.IndexIDMap(faiss.IndexFlatIP(dimension))
70 | intent_index.add_with_ids(db_vectors, db_ids)
71 | faiss.write_index(intent_index, f"data/{prefix}_naas_intent_index.idx")
72 |
73 | def get_intent(self, query, prefix, tasks, k_nearest=1):
74 | index = faiss.read_index(f"data/{prefix}_intent_index.idx")
75 | query_vector = np.array([self._get_embedding(query, prefix)]).astype(np.float32)
76 | faiss.normalize_L2(query_vector)
77 | similarities, similarities_ids = index.search(query_vector, k_nearest)
78 | return similarities_ids[0][0], tasks['task'][similarities_ids[0][0]]
79 |
80 | def eval_models(self):
81 | for prefix in ['tf', 'st']:
82 | tasks = pd.read_pickle('data/awesome-notebooks.pkl')
83 | tasks = tasks.set_index('intent_id')
84 | tasks.drop([i + '_embedding' for i in ['tf', 'st']] + ['code'], axis=1, inplace=True)
85 |
86 | tasks[[f"{prefix}_matched_intent_id", f"{prefix}_matched_intent_text"]] = tasks['task'].apply(lambda x: pd.Series(self.get_intent(x, prefix, tasks)))
87 | tasks[f"{prefix}_is_intent_matched"] = tasks[f"{prefix}_matched_intent_id"] == tasks["intent_id"]
88 |
89 | tasks[[f"{prefix}_matched_intent_id_shuffled", f"{prefix}_matched_intent_text_shuffled"]] = tasks['task'].apply(lambda x: pd.Series(self.get_intent(self._shuffle_word(x), prefix, tasks)))
90 | tasks[f"{prefix}_is_intent_matched_shuffled"] = tasks[f"{prefix}_matched_intent_id_shuffled"] == tasks["intent_id"]
91 |
92 | Path("output").mkdir(parents=True, exist_ok=True)
93 | tasks.to_csv(f'output/{prefix}_eval_df.csv', index=False)
94 |
95 | def speed_benchmark(self, prefix, repetitions):
96 | tasks = pd.read_pickle('data/awesome-notebooks.pkl')
97 | tasks = tasks.set_index('intent_id')
98 | tasks.drop([i + '_embedding' for i in ['tf', 'st']] + ['code'], axis=1, inplace=True)
99 |
100 | start = time.time()
101 |
102 | for i in range(repetitions):
103 | tasks[[f"{prefix}_matched_intent_id", f"{prefix}_matched_intent_text"]] = tasks['task'].apply(lambda x: pd.Series(self.get_intent(x, prefix, tasks)))
104 | tasks[f"{prefix}_is_intent_matched"] = tasks[f"{prefix}_matched_intent_id"] == tasks["intent_id"]
105 |
106 | tasks[[f"{prefix}_matched_intent_id_shuffled", f"{prefix}_matched_intent_text_shuffled"]] = tasks['task'].apply(lambda x: pd.Series(self.get_intent(self._shuffle_word(x), prefix, tasks)))
107 | tasks[f"{prefix}_is_intent_matched_shuffled"] = tasks[f"{prefix}_matched_intent_id_shuffled"] == tasks[ "intent_id"]
108 |
109 | end = time.time()
110 | return end - start
111 |
112 | def get_benchmark_data(self, repetitions):
113 | data = pd.DataFrame([[i] for i in range(repetitions + 1)], columns=['repetitions'])
114 | for prefix in ['tf', 'st']:
115 | data[f'{prefix}_time_elapsed'] = data['repetitions'].apply(lambda x: self.speed_benchmark(prefix, x))
116 | Path("output").mkdir(parents=True, exist_ok=True)
117 | data.to_csv('output/speed_benchmarks.csv', index=False)
118 |
119 | def _shuffle_word(self, sentence):
120 | sub = sentence.split(' - ', maxsplit=1)
121 | return "".join([sub[1], ' - ', sub[0]])
122 |
123 |
124 | if __name__ == '__main__':
125 | fire.Fire(NaasProcessor)
126 |
--------------------------------------------------------------------------------
/scripts/train_spacy_v3_ner.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import fire
4 | import spacy
5 | import srsly
6 | from spacy.tokens import DocBin
7 |
8 |
9 | class SpaCy3NERTrainer:
10 |
11 | @staticmethod
12 | def convert(input_path, output_path, lang='en'):
13 | nlp = spacy.blank(lang)
14 | db = DocBin()
15 | for text, annot in srsly.read_json(input_path):
16 | doc = nlp.make_doc(text)
17 | ents = []
18 | for start, end, label in annot["entities"]:
19 | span = doc.char_span(start, end, label=label)
20 | if span is None:
21 | print("Skipping entity")
22 | else:
23 | ents.append(span)
24 | doc.ents = ents
25 | db.add(doc)
26 | db.to_disk(output_path)
27 |
28 | @staticmethod
29 | def create_default_config_file(lang='en', pipeline='ner', output='config.cfg', optimize='accuracy'):
30 | os.system(f'python -m spacy init config --lang {lang} --pipeline {pipeline} --optimize {optimize} {output}')
31 |
32 | @staticmethod
33 | def train_model(config='config.cfg', output='training/', train='corpus/train.spacy', dev='corpus/dev.spacy', vectors='sm'):
34 | os.system(f'python -m spacy download en_core_web_{vectors}')
35 | os.system(f'python -m spacy train {config} --output {output} --paths.train {train} --paths.dev {dev}')
36 |
37 |
38 | if __name__ == '__main__':
39 | fire.Fire(SpaCy3NERTrainer)
40 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | from glob import glob
3 |
4 | import setuptools
5 |
6 | MODE = os.environ.get("JUPYTER_TEXT2CODE_MODE")
7 | INSTALL_LIBS = [
8 | "numpy",
9 | "jupyter",
10 | "jupyter_contrib_nbextensions",
11 | "pandas",
12 | "spacy==3.3.1",
13 | "sentence_transformers",
14 | "absl-py",
15 | "plotly",
16 | "matplotlib",
17 | ]
18 |
19 | if MODE and MODE.lower() == "cpu":
20 | INSTALL_LIBS.append("faiss-cpu")
21 | else:
22 | INSTALL_LIBS.append("faiss-gpu")
23 |
24 |
25 | def get_serverextension_files():
26 | data_files = []
27 | for f in glob(
28 | "jupyter_text2code/jupyter_text2code_serverextension/**", recursive=True
29 | ):
30 | if os.path.isfile(f):
31 | frags = f.split("/")[:-1]
32 | frags[0] = "jupyter-text2code"
33 | relative_common_path = "/".join(frags)
34 | data_files.append(
35 | (os.path.join("share/jupyter/nbextensions/", relative_common_path), [f])
36 | )
37 | return data_files
38 |
39 |
40 | data_files = [
41 | (
42 | "share/jupyter/nbextensions/jupyter-text2code",
43 | [
44 | "jupyter_text2code/__init__.py",
45 | "jupyter_text2code/jupyter_text2code.yaml",
46 | "jupyter_text2code/main.js",
47 | "jupyter_text2code/jupyter_text2code.css",
48 | "jupyter_text2code/jupyter_text2code_lib.py",
49 | ],
50 | ),
51 | (
52 | "etc/jupyter/jupyter_notebook_config.d",
53 | ["jupyter_text2code/etc/jupyter-text2code-extension.json"],
54 | ),
55 | ]
56 |
57 | data_files.extend(get_serverextension_files())
58 |
59 | setuptools.setup(
60 | name="jupyter-text2code",
61 | version="0.0.2",
62 | url="https://github.com/deepklarity/jupyter-text2code",
63 | author="Deepak Rawat and Kartik Godawat",
64 | license="MIT License",
65 | description="Jupyter server extension to assist with data science EDA",
66 | packages=setuptools.find_packages(),
67 | install_requires=INSTALL_LIBS,
68 | python_requires=">=3.7",
69 | classifiers=[
70 | "Framework :: Jupyter",
71 | ],
72 | data_files=data_files,
73 | include_package_data=True,
74 | zip_safe=False,
75 | )
76 |
--------------------------------------------------------------------------------