├── .gitattributes
├── .github
    └── mergeable.yml
├── .gitignore
├── LICENSE.md
├── README.md
├── analysis
    └── plot_metrics_graph.ipynb
├── data_exploration
    ├── eda_curekart.ipynb
    ├── eda_powerplay11.ipynb
    ├── eda_sofmattress.ipynb
    └── stats.ipynb
├── dataset
    ├── v1
    │   ├── test
    │   │   ├── curekart_test.csv
    │   │   ├── powerplay11_test.csv
    │   │   └── sofmattress_test.csv
    │   └── train
    │   │   ├── curekart_subset_train.csv
    │   │   ├── curekart_train.csv
    │   │   ├── powerplay11_subset_train.csv
    │   │   ├── powerplay11_train.csv
    │   │   ├── sofmattress_subset_train.csv
    │   │   └── sofmattress_train.csv
    └── v2
    │   ├── test
    │       ├── curekart_test.csv
    │       ├── powerplay11_test.csv
    │       └── sofmattress_test.csv
    │   └── train
    │       ├── curekart_train.csv
    │       ├── powerplay11_train.csv
    │       └── sofmattress_train.csv
├── env-requirements.txt
├── platforms
    ├── bert
    │   ├── bert-bot-only-data-es.py
    │   ├── bert_models
    │   │   ├── curekart
    │   │   │   ├── config.json
    │   │   │   ├── eval_results.txt
    │   │   │   ├── model_args.json
    │   │   │   ├── predictions.csv
    │   │   │   ├── predictions.jsonl
    │   │   │   ├── special_tokens_map.json
    │   │   │   ├── tblogs
    │   │   │   │   └── events.out.tfevents.1597350081.haptik-ai-research-mum-ml-2-vm
    │   │   │   ├── tokenizer_config.json
    │   │   │   ├── training_args.bin
    │   │   │   ├── training_progress_scores.csv
    │   │   │   └── vocab.txt
    │   │   ├── curekart_subset
    │   │   │   ├── best_model
    │   │   │   │   ├── config.json
    │   │   │   │   ├── eval_results.txt
    │   │   │   │   ├── model_args.json
    │   │   │   │   ├── special_tokens_map.json
    │   │   │   │   ├── tokenizer_config.json
    │   │   │   │   ├── training_args.bin
    │   │   │   │   └── vocab.txt
    │   │   │   ├── config.json
    │   │   │   ├── eval_results.txt
    │   │   │   ├── model_args.json
    │   │   │   ├── predictions.csv
    │   │   │   ├── predictions.jsonl
    │   │   │   ├── special_tokens_map.json
    │   │   │   ├── tblogs
    │   │   │   │   └── events.out.tfevents.1597351105.haptik-ai-research-mum-ml-2-vm
    │   │   │   ├── tokenizer_config.json
    │   │   │   ├── training_args.bin
    │   │   │   ├── training_progress_scores.csv
    │   │   │   └── vocab.txt
    │   │   ├── powerplay11
    │   │   │   ├── best_model
    │   │   │   │   ├── config.json
    │   │   │   │   ├── eval_results.txt
    │   │   │   │   ├── model_args.json
    │   │   │   │   ├── special_tokens_map.json
    │   │   │   │   ├── tokenizer_config.json
    │   │   │   │   ├── training_args.bin
    │   │   │   │   └── vocab.txt
    │   │   │   ├── config.json
    │   │   │   ├── eval_results.txt
    │   │   │   ├── model_args.json
    │   │   │   ├── predictions.csv
    │   │   │   ├── predictions.jsonl
    │   │   │   ├── special_tokens_map.json
    │   │   │   ├── tblogs
    │   │   │   │   └── events.out.tfevents.1597160596.haptik-ai-research-mum-ml-2-vm
    │   │   │   ├── tokenizer_config.json
    │   │   │   ├── training_args.bin
    │   │   │   ├── training_progress_scores.csv
    │   │   │   └── vocab.txt
    │   │   ├── powerplay11_subset
    │   │   │   ├── best_model
    │   │   │   │   ├── config.json
    │   │   │   │   ├── eval_results.txt
    │   │   │   │   ├── model_args.json
    │   │   │   │   ├── special_tokens_map.json
    │   │   │   │   ├── tokenizer_config.json
    │   │   │   │   ├── training_args.bin
    │   │   │   │   └── vocab.txt
    │   │   │   ├── config.json
    │   │   │   ├── eval_results.txt
    │   │   │   ├── model_args.json
    │   │   │   ├── predictions.csv
    │   │   │   ├── predictions.jsonl
    │   │   │   ├── special_tokens_map.json
    │   │   │   ├── tblogs
    │   │   │   │   └── events.out.tfevents.1597163469.haptik-ai-research-mum-ml-2-vm
    │   │   │   ├── tokenizer_config.json
    │   │   │   ├── training_args.bin
    │   │   │   ├── training_progress_scores.csv
    │   │   │   └── vocab.txt
    │   │   ├── sofmattress
    │   │   │   ├── best_model
    │   │   │   │   ├── config.json
    │   │   │   │   ├── eval_results.txt
    │   │   │   │   ├── model_args.json
    │   │   │   │   ├── special_tokens_map.json
    │   │   │   │   ├── tokenizer_config.json
    │   │   │   │   ├── training_args.bin
    │   │   │   │   └── vocab.txt
    │   │   │   ├── config.json
    │   │   │   ├── eval_results.txt
    │   │   │   ├── model_args.json
    │   │   │   ├── predictions.csv
    │   │   │   ├── predictions.jsonl
    │   │   │   ├── special_tokens_map.json
    │   │   │   ├── tblogs
    │   │   │   │   └── events.out.tfevents.1597159859.haptik-ai-research-mum-ml-2-vm
    │   │   │   ├── tokenizer_config.json
    │   │   │   ├── training_args.bin
    │   │   │   ├── training_progress_scores.csv
    │   │   │   └── vocab.txt
    │   │   └── sofmattress_subset
    │   │   │   ├── best_model
    │   │   │       ├── config.json
    │   │   │       ├── eval_results.txt
    │   │   │       ├── model_args.json
    │   │   │       ├── special_tokens_map.json
    │   │   │       ├── tokenizer_config.json
    │   │   │       ├── training_args.bin
    │   │   │       └── vocab.txt
    │   │   │   ├── config.json
    │   │   │   ├── eval_results.txt
    │   │   │   ├── model_args.json
    │   │   │   ├── predictions.csv
    │   │   │   ├── predictions.jsonl
    │   │   │   ├── special_tokens_map.json
    │   │   │   ├── tblogs
    │   │   │       └── events.out.tfevents.1597162813.haptik-ai-research-mum-ml-2-vm
    │   │   │   ├── tokenizer_config.json
    │   │   │   ├── training_args.bin
    │   │   │   ├── training_progress_scores.csv
    │   │   │   └── vocab.txt
    │   ├── down-requirements.txt
    │   ├── run_bert_experiments.sh
    │   └── up-requirements.txt
    ├── dialogflow
    │   ├── agent_template
    │   │   ├── agent.json
    │   │   └── package.json
    │   ├── generate_preds.ipynb
    │   └── training_data_conversion.ipynb
    ├── haptik
    │   └── convert_data.py
    ├── luis
    │   ├── generate_preds.ipynb
    │   └── training_data_conversion.ipynb
    └── rasa
    │   ├── __init__.py
    │   ├── actions.py
    │   ├── config.yml
    │   ├── credentials.yml
    │   ├── data
    │       └── curekart.json
    │   ├── domain.yml
    │   ├── endpoints.yml
    │   ├── generate_preds.ipynb
    │   ├── tests
    │       └── conversation_tests.md
    │   └── training_data_conversion.ipynb
├── preds
    ├── bert_curekart.csv
    ├── bert_curekart_subset.csv
    ├── bert_powerplay11.csv
    ├── bert_powerplay11_subset.csv
    ├── bert_sofmattress.csv
    ├── bert_sofmattress_subset.csv
    ├── dialogflow_curekart.csv
    ├── dialogflow_curekart_subset.csv
    ├── dialogflow_powerplay11.csv
    ├── dialogflow_powerplay11_subset.csv
    ├── dialogflow_sofmattress.csv
    ├── dialogflow_sofmattress_subset.csv
    ├── haptik_curekart.csv
    ├── haptik_curekart_subset.csv
    ├── haptik_powerplay11.csv
    ├── haptik_powerplay11_subset.csv
    ├── haptik_sofmattress.csv
    ├── haptik_sofmattress_subset.csv
    ├── luis_curekart.csv
    ├── luis_curekart_subset.csv
    ├── luis_powerplay11.csv
    ├── luis_powerplay11_subset.csv
    ├── luis_sofmattress.csv
    ├── luis_sofmattress_subset.csv
    ├── rasa_curekart.csv
    ├── rasa_curekart_subset.csv
    ├── rasa_powerplay11.csv
    ├── rasa_powerplay11_subset.csv
    ├── rasa_sofmattress.csv
    └── rasa_sofmattress_subset.csv
├── prepare_subset_of_data.ipynb
├── results
    ├── bert_curekart.csv
    ├── bert_curekart_subset.csv
    ├── bert_powerplay11.csv
    ├── bert_powerplay11_subset.csv
    ├── bert_sofmattress.csv
    ├── bert_sofmattress_subset.csv
    ├── dialogflow_curekart.csv
    ├── dialogflow_curekart_subset.csv
    ├── dialogflow_powerplay11.csv
    ├── dialogflow_powerplay11_subset.csv
    ├── dialogflow_sofmattress.csv
    ├── dialogflow_sofmattress_subset.csv
    ├── haptik_curekart.csv
    ├── haptik_curekart_subset.csv
    ├── haptik_powerplay11.csv
    ├── haptik_powerplay11_subset.csv
    ├── haptik_sofmattress.csv
    ├── haptik_sofmattress_subset.csv
    ├── luis_curekart.csv
    ├── luis_curekart_subset.csv
    ├── luis_powerplay11.csv
    ├── luis_powerplay11_subset.csv
    ├── luis_sofmattress.csv
    ├── luis_sofmattress_subset.csv
    ├── rasa_curekart.csv
    ├── rasa_curekart_subset.csv
    ├── rasa_powerplay11.csv
    ├── rasa_powerplay11_subset.csv
    ├── rasa_sofmattress.csv
    └── rasa_sofmattress_subset.csv
└── run_evaluation.ipynb


/.gitattributes:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/.gitattributes


--------------------------------------------------------------------------------
/.github/mergeable.yml:
--------------------------------------------------------------------------------
 1 | mergeable:
 2 |   pull_requests:
 3 |     stale:
 4 |       days: 14
 5 |       message: 'This PR is stale. Please follow up!'
 6 | 
 7 |     label:
 8 |       must_include:
 9 |         regex: '(new-feature)|(documentation)|(bug-fixes)|(enhancement)|(needs-migration)|(packages-updated)|(miscellaneous)|(superman)'
10 |         message: 'Can you please add a valid label! [One of (new-feature) / (documentation) / (bug-fixes) / (enhancement) / (needs-migration) / (packages-updated) / (miscellaneous)]'
11 |       must_exclude:
12 |         regex: '(do-not-merge)'
13 |         message: 'This PR is work in progress. Cannot be merged yet.'
14 | 
15 |     description:
16 |       no_empty:
17 |         enabled: true
18 |         message: 'Can you please add a description!'
19 |       must_exclude:
20 |         regex: 'do not merge'
21 |         message: 'This PR is work in progress. Cannot be merged yet.'
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | .ipynb_checkpoints
132 | .idea
133 | 
134 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | LICENSE
2 | ==============
3 | HINT3 dataset is made available under the Open Database License: http://opendatacommons.org/licenses/odbl/1.0/.
4 | Any rights in individual contents of the database are licensed under the Database Contents License: http://opendatacommons.org/licenses/dbcl/1.0/
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## HINT3: Raising the bar for Intent Detection in the Wild
  2 | 
  3 | This repository contains datasets and code for the paper
  4 |  "HINT3: Raising the bar for Intent Detection in the Wild" 
  5 |  accepted at EMNLP-2020's
  6 |   [Insights workshop](https://insights-workshop.github.io/)
  7 |   
  8 | Published paper is available [here](https://www.aclweb.org/anthology/2020.insights-1.16/)
  9 | 
 10 | **Update Feb 2021: We noticed in our analysis of the results that
 11 |  there are few ground truth labels which are incorrect. Hence, we're releasing 
 12 |  a new version, v2 of the dataset, present inside dataset/v2 folder. All the
 13 |   results in the paper were obtained on the earlier version of the dataset 
 14 |   present inside dataset/v1, which should be used to exactly reproduce
 15 |    the results presented in the paper.**
 16 | 
 17 | 
 18 | ### Dataset
 19 | 
 20 | - Train and Test sets for SOFMattress, Curekart and Powerplay11
 21 |  are available in `dataset` folder for both Full and Subset variations.
 22 | - You can also use `prepare_subset_of_data.ipynb` notebook to generate
 23 |  subset variations of full datasets.  All the entailment assets
 24 |   generated can be downloaded from [here](https://drive.google.com/drive/folders/1Un97REmtSbxmcNlDgg5qX0awxFoGz0n4?usp=sharing).
 25 | 
 26 | 
 27 | ### EDA
 28 | 
 29 | We have done EDA analysis on the datasets which is accessible
 30 |  from the `data_exploration` folder.
 31 | 
 32 | 
 33 | ### Test set predictions
 34 | 
 35 | Predictions from BERT and 4 NLU platforms on test sets used for
 36 |  analysis in the paper are present in `preds` folder. Feel free to
 37 |   do further analysis on these predictions if you want.
 38 | 
 39 | ### Test set metrics
 40 | 
 41 | All the metrics from BERT and 4 NLU platforms on test sets
 42 |  are present in `results` folder for further analysis. Graphs plotted in
 43 |   the paper can be reproduced using `analysis/plot_metrics_graph.ipynb` 
 44 |   notebook 
 45 | 
 46 | 
 47 | ### Reproducibility Instructions
 48 | 
 49 | The scripts to generate training data and predicting intents
 50 |  based on the testing data for all the 4 platforms and BERT 
 51 |  based classifier are inside `platforms` folder within
 52 |   their named directories.
 53 | 
 54 | 
 55 | #### Rasa
 56 | 
 57 | - The `training_data_conversion.ipynb` notebook is used to
 58 |  convert the training set into a JSON format that Rasa
 59 |   mandates in order to train its model. The generated JSON
 60 |    file is created inside the `data` directory
 61 | 
 62 | - In order to train a model for one particular bot, keep only
 63 |  that bot's JSON file inside the `data` directory
 64 | 
 65 | - Train the model using this command: `rasa train nlu`
 66 | 
 67 | - Once the model is trained, its tar.gz file will be stored
 68 |  inside the `models` directory based on the current timestamp
 69 | 
 70 | - In order to start the NLU server, run the following command:
 71 |  `rasa run --enable-api -m models/nlu-<timestamp>.tar.gz` where
 72 |   `nlu-<timestamp>.tar.gz` is the name of the model's file
 73 |    created in the previous step
 74 | 
 75 | - In order to generate a report against a testing set file,
 76 |  run the `generate_preds.ipynb` notebook after specifying the
 77 |   name of the bot. Generated predictions will be stored inside
 78 |    `preds` folder
 79 | 
 80 | 
 81 | 
 82 | #### Dialogflow
 83 | - The `training_data_conversion.ipynb` file is used to convert
 84 |  the training set into a bunch of JSON files that Dialogflow
 85 |   mandates in order to train its model. The generated JSON files
 86 |    are stored inside the `intents` directory
 87 | 
 88 | - Login to the Diaologflow dashboard using a Gmail account
 89 |  and visit `https://dialogflow.cloud.google.com`
 90 | 
 91 | - Dialogflow allows bulk upload of the training set by
 92 |  importing a zip file. The compressed folder has a predefined
 93 |   structure. In order to create this folder, create a copy of
 94 |    the `agent_template` directory and rename the folder as
 95 |     per your bot name. Then, copy all the JSON files created
 96 |      in step 1 and paste it inside the `intents` folder of your
 97 |       agent directory. Then, open the `agent.json` file and edit
 98 |        the `displayName` property to specify the name of the
 99 |         agent of your bot. An agent is analogous to an app or
100 |          a bot. Once these changes are done, compress the agent
101 |           directory into a zip file
102 | 
103 | - Create a new agent on the Dialogflow dashboard
104 |  here: `https://dialogflow.cloud.google.com/?authuser=1#/newAgent`
105 | 
106 | - Delete `Default Fallback Intent` from the intents dashboard
107 | 
108 | - Edit the agent: `https://dialogflow.cloud.google.com/?authuser=1#/editAgent/mt11-agent-ugmx/` -> Export & Import -> Import from zip -> upload the agent zip file. This will allow us to bulk upload all intents along with their respective utterances
109 | 
110 | - Go to Edit agent -> ML settings. The default threshold value
111 |  is 0.3. Change it to 0.05 and Train the model
112 | 
113 | - Copy the CURL request from the API playground. We can get
114 |  the authentication token and the model's API endpoint from
115 |   this CURL request
116 | 
117 | - The `generate_preds.ipynb` file will help generate predictions
118 |  for the bot.
119 | 
120 | 
121 | #### LUIS
122 | - The `training_data_conversion.ipynb` file will generate
123 |  a JSON file based on the training set's CSV file
124 | 
125 | - Login to `luis.ai`, go to `https://www.luis.ai/applications`
126 |  and click on `New app for conversation` -> `Import as JSON`.
127 |   Upload the JSON file generated in the first step
128 | 
129 | - Once all the intents are uploaded, click on the `Train` button
130 |  to train the model. Once the model is trained, click on
131 |   `Publish` followed by selecting `Production slot`
132 | 
133 | - Now, go to the `Manage` section of the app and copy the
134 |  App ID. We will be using this App ID in the `generate_preds.ipynb` file to generate our prediction reports
135 | 
136 | - Go to the settings page of your account in order to get
137 |  the `PREDICTION_KEY` and `PREDICTION_ENDPOINT` used in
138 |   `generate_preds.ipynb` file
139 | 
140 | #### Haptik
141 | 
142 | - Access requests for signup on Haptik are processed via contact
143 |  form at https://haptik.ai/contact-us/
144 | 
145 | - Once you get the access, you'll be able to create bots
146 |  and run predictions using the scripts provided in
147 |   `platforms/haptik`
148 | 
149 | 
150 | #### BERT
151 | 
152 | - Results on BERT can be reproduced using scripts in the
153 |  folder `platforms/bert`
154 | 
155 | - The folder also contains config for each of the models
156 |  trained on Full and Subset variations of datasets
157 |  
158 |  
159 |  ### Citation
160 |  
161 |  If you use this in your research, please consider citing:
162 |  
163 |  ```latex
164 |  @inproceedings{arora-etal-2020-hint3,
165 |     title = "{HINT}3: Raising the bar for Intent Detection in the Wild",
166 |     author = "Arora, Gaurav  and
167 |       Jain, Chirag  and
168 |       Chaturvedi, Manas  and
169 |       Modi, Krupal",
170 |     booktitle = "Proceedings of the First Workshop on Insights from Negative Results in NLP",
171 |     month = nov,
172 |     year = "2020",
173 |     address = "Online",
174 |     publisher = "Association for Computational Linguistics",
175 |     url = "https://www.aclweb.org/anthology/2020.insights-1.16",
176 |     doi = "10.18653/v1/2020.insights-1.16",
177 |     pages = "100--105",
178 |     abstract = "Intent Detection systems in the real world are exposed to complexities of imbalanced datasets containing varying perception of intent, unintended correlations and domain-specific aberrations. To facilitate benchmarking which can reflect near real-world scenarios, we introduce 3 new datasets created from live chatbots in diverse domains. Unlike most existing datasets that are crowdsourced, our datasets contain real user queries received by the chatbots and facilitates penalising unwanted correlations grasped during the training process. We evaluate 4 NLU platforms and a BERT based classifier and find that performance saturates at inadequate levels on test sets because all systems latch on to unintended patterns in training data.",
179 | }
180 |  ```
181 |  
182 |  
183 | 


--------------------------------------------------------------------------------
/data_exploration/stats.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import nltk\n",
 10 |     "import numpy as np\n",
 11 |     "import pandas as pd\n",
 12 |     "import spacy\n",
 13 |     "import pickle\n",
 14 |     "from pathlib import Path\n",
 15 |     "from typing import List\n",
 16 |     "from fastai.text import SpacyTokenizer"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "OOS_CLASS = 'NO_NODES_DETECTED'"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 3,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "train_dir = Path('../train/')\n",
 35 |     "test_dir = Path('../test/')"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 4,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "nlp = spacy.load('en_core_web_lg',  disable=['ner', 'parser'])"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 5,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "60000\n"
 57 |      ]
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "with open(\"/home/ubuntu/gaurav/.fastai/models/wt103-fwd/itos_wt103.pkl\", \"rb\") as input_file:\n",
 62 |     "    wiki_vocab = set(pickle.load(input_file))\n",
 63 |     "print(len(wiki_vocab))\n",
 64 |     "tokenizer = SpacyTokenizer('en')"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 6,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "def dist(a: List[str], b: List[str], n: int = 3) -> float:\n",
 74 |     "    answer = 1.0\n",
 75 |     "    for i in range(1, n + 1):\n",
 76 |     "        a_i = set(nltk.ngrams(a, i))\n",
 77 |     "        b_i = set(nltk.ngrams(b, i))\n",
 78 |     "        intersection = len(a_i & b_i)\n",
 79 |     "        union = len(a_i | b_i)\n",
 80 |     "        if union:\n",
 81 |     "            answer -= (intersection / union)\n",
 82 |     "    return answer\n",
 83 |     "#     return max(0, answer)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 7,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "def diversity(train_df: pd.DataFrame) -> float:\n",
 93 |     "    div, labelsc = 0, 0\n",
 94 |     "    for label, group_df in train_df.groupby('label'):\n",
 95 |     "        acc = 0\n",
 96 |     "        labelsc += 1\n",
 97 |     "        for text_a in group_df['sentence_tokens']:\n",
 98 |     "            for text_b in group_df['sentence_tokens']:\n",
 99 |     "                d = dist(text_a, text_b)\n",
100 |     "                acc += d\n",
101 |     "        div += (acc / (len(group_df) * len(group_df)))\n",
102 |     "    return div / labelsc"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 8,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "def coverage(train_df: pd.DataFrame, test_df: pd.DataFrame) -> float:\n",
112 |     "    cov, labelsc = 0, 0\n",
113 |     "    for label, group_df in test_df.groupby('label'):\n",
114 |     "        if label == OOS_CLASS:\n",
115 |     "            continue\n",
116 |     "        labelsc += 1\n",
117 |     "        train_group = train_df[train_df['label'] == label]\n",
118 |     "        acc = 0\n",
119 |     "        for text_b in group_df['sentence_tokens']:\n",
120 |     "            acc += max(1.0 - dist(text_a, text_b) for text_a in train_group['sentence_tokens'])\n",
121 |     "        cov += (acc / len(group_df))\n",
122 |     "    return cov / labelsc"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 9,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "def read_file(path: Path) -> pd.DataFrame:\n",
132 |     "    print(f'Reading file {path}')\n",
133 |     "    df = pd.read_csv(str(path))\n",
134 |     "    df['sentence_tokens'] = df['sentence'].apply(lambda sent: [tok.text for tok in nlp(sent.lower().strip())])\n",
135 |     "    return df"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 10,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "def df_stats(df):\n",
145 |     "    all_toks = set()\n",
146 |     "    oov_toks = set()\n",
147 |     "    tok_lens = []\n",
148 |     "    for sentence in df['sentence']:\n",
149 |     "        doc = tokenizer.tokenizer(sentence.lower().strip())\n",
150 |     "        tok_lens.append(len(doc))\n",
151 |     "        for tok in doc:\n",
152 |     "            all_toks.add(tok)\n",
153 |     "            if tok not in wiki_vocab:\n",
154 |     "                oov_toks.add(tok)\n",
155 |     "    return {\n",
156 |     "        'len': len(df),\n",
157 |     "        'in-scope': len(df[df['label'] != OOS_CLASS]),\n",
158 |     "        'oos': len(df[df['label'] == OOS_CLASS]),\n",
159 |     "        'labels': len(df[df['label'] != OOS_CLASS]['label'].unique()),\n",
160 |     "        'tok_min': min(tok_lens),\n",
161 |     "        'tok_max': max(tok_lens),\n",
162 |     "        'tok_mean': np.mean(tok_lens),\n",
163 |     "        'tok_std': np.std(tok_lens),\n",
164 |     "        'oov_percentage': len(oov_toks) / len(all_toks),\n",
165 |     "    }"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 11,
171 |    "metadata": {},
172 |    "outputs": [
173 |     {
174 |      "name": "stdout",
175 |      "output_type": "stream",
176 |      "text": [
177 |       "Reading file ../train/sofmattress_train.csv\n",
178 |       "Reading file ../test/sofmattress_test.csv\n",
179 |       "train stats: {'len': 328, 'in-scope': 328, 'oos': 0, 'labels': 21, 'tok_min': 1, 'tok_max': 28, 'tok_mean': 4.414634146341464, 'tok_std': 2.542090648688811, 'oov_percentage': 0.05084745762711865}\n",
180 |       "test stats: {'len': 397, 'in-scope': 231, 'oos': 166, 'labels': 20, 'tok_min': 1, 'tok_max': 53, 'tok_mean': 6.607052896725441, 'tok_std': 5.770746222932882, 'oov_percentage': 0.20937042459736457}\n",
181 |       "Diversity: 0.6168521547770577\n",
182 |       "Coverage: 0.4411860589631133\n",
183 |       "Reading file ../train/sofmattress_subset_train.csv\n",
184 |       "Reading file ../test/sofmattress_test.csv\n",
185 |       "train stats: {'len': 180, 'in-scope': 180, 'oos': 0, 'labels': 21, 'tok_min': 1, 'tok_max': 28, 'tok_mean': 5.338888888888889, 'tok_std': 2.828749014609518, 'oov_percentage': 0.049429657794676805}\n",
186 |       "test stats: {'len': 397, 'in-scope': 231, 'oos': 166, 'labels': 20, 'tok_min': 1, 'tok_max': 53, 'tok_mean': 6.607052896725441, 'tok_std': 5.770746222932882, 'oov_percentage': 0.20937042459736457}\n",
187 |       "Diversity: 0.44642491013275915\n",
188 |       "Coverage: 0.3599558172984878\n",
189 |       "Reading file ../train/powerplay11_train.csv\n",
190 |       "Reading file ../test/powerplay11_test.csv\n",
191 |       "train stats: {'len': 471, 'in-scope': 471, 'oos': 0, 'labels': 59, 'tok_min': 1, 'tok_max': 31, 'tok_mean': 5.021231422505308, 'tok_std': 3.5458141003642187, 'oov_percentage': 0.05102040816326531}\n",
192 |       "test stats: {'len': 983, 'in-scope': 275, 'oos': 708, 'labels': 58, 'tok_min': 1, 'tok_max': 73, 'tok_mean': 7.2868769074262465, 'tok_std': 6.8174198446968575, 'oov_percentage': 0.3639822447685479}\n",
193 |       "Diversity: -0.014979018179958047\n",
194 |       "Coverage: 0.507159939637793\n",
195 |       "Reading file ../train/powerplay11_subset_train.csv\n",
196 |       "Reading file ../test/powerplay11_test.csv\n",
197 |       "train stats: {'len': 261, 'in-scope': 261, 'oos': 0, 'labels': 59, 'tok_min': 1, 'tok_max': 31, 'tok_mean': 6.0344827586206895, 'tok_std': 4.166409028895613, 'oov_percentage': 0.05070422535211268}\n",
198 |       "test stats: {'len': 983, 'in-scope': 275, 'oos': 708, 'labels': 58, 'tok_min': 1, 'tok_max': 73, 'tok_mean': 7.2868769074262465, 'tok_std': 6.8174198446968575, 'oov_percentage': 0.3639822447685479}\n",
199 |       "Diversity: -0.3480264198264641\n",
200 |       "Coverage: 0.4201842473694509\n",
201 |       "Reading file ../train/curekart_train.csv\n",
202 |       "Reading file ../test/curekart_test.csv\n",
203 |       "train stats: {'len': 600, 'in-scope': 600, 'oos': 0, 'labels': 28, 'tok_min': 1, 'tok_max': 27, 'tok_mean': 6.29, 'tok_std': 4.032273965560045, 'oov_percentage': 0.10204081632653061}\n",
204 |       "test stats: {'len': 991, 'in-scope': 452, 'oos': 539, 'labels': 21, 'tok_min': 1, 'tok_max': 44, 'tok_mean': 6.424823410696266, 'tok_std': 5.02407416696985, 'oov_percentage': 0.30474934036939316}\n",
205 |       "Diversity: 0.5431161766997563\n",
206 |       "Coverage: 0.7164532041212871\n",
207 |       "Reading file ../train/curekart_subset_train.csv\n",
208 |       "Reading file ../test/curekart_test.csv\n",
209 |       "train stats: {'len': 413, 'in-scope': 413, 'oos': 0, 'labels': 28, 'tok_min': 1, 'tok_max': 27, 'tok_mean': 7.196125907990314, 'tok_std': 4.236676538359367, 'oov_percentage': 0.09433962264150944}\n",
210 |       "test stats: {'len': 991, 'in-scope': 452, 'oos': 539, 'labels': 21, 'tok_min': 1, 'tok_max': 44, 'tok_mean': 6.424823410696266, 'tok_std': 5.02407416696985, 'oov_percentage': 0.30474934036939316}\n",
211 |       "Diversity: 0.4406516266397046\n",
212 |       "Coverage: 0.5727839744833852\n"
213 |      ]
214 |     }
215 |    ],
216 |    "source": [
217 |     "datasets = ['sofmattress', 'powerplay11', 'curekart']\n",
218 |     "for dataset in datasets:\n",
219 |     "    for suf in ['', '_subset']:\n",
220 |     "        train_df = read_file(train_dir / f'{dataset}{suf}_train.csv')\n",
221 |     "        test_df = read_file(test_dir / f'{dataset}_test.csv')\n",
222 |     "        print('train stats:', df_stats(train_df))\n",
223 |     "        print('test stats:', df_stats(test_df))\n",
224 |     "        print('Diversity:', diversity(train_df))\n",
225 |     "        print('Coverage:', coverage(train_df, test_df))"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "metadata": {},
232 |    "outputs": [],
233 |    "source": []
234 |   }
235 |  ],
236 |  "metadata": {
237 |   "kernelspec": {
238 |    "display_name": "Python (py36)",
239 |    "language": "python",
240 |    "name": "py36"
241 |   },
242 |   "language_info": {
243 |    "codemirror_mode": {
244 |     "name": "ipython",
245 |     "version": 3
246 |    },
247 |    "file_extension": ".py",
248 |    "mimetype": "text/x-python",
249 |    "name": "python",
250 |    "nbconvert_exporter": "python",
251 |    "pygments_lexer": "ipython3",
252 |    "version": "3.6.8"
253 |   }
254 |  },
255 |  "nbformat": 4,
256 |  "nbformat_minor": 4
257 | }
258 | 


--------------------------------------------------------------------------------
/dataset/v1/train/sofmattress_subset_train.csv:
--------------------------------------------------------------------------------
  1 | sentence,label
  2 | How do I know the size of my bed?,WHAT_SIZE_TO_ORDER
  3 | How do I know what size to order?,WHAT_SIZE_TO_ORDER
  4 | Can I please have the size chart?,WHAT_SIZE_TO_ORDER
  5 | Want to know the custom size chart,WHAT_SIZE_TO_ORDER
  6 | Can you help with the size?,WHAT_SIZE_TO_ORDER
  7 | Help me with the size chart,WHAT_SIZE_TO_ORDER
  8 | What are the available mattress sizes,WHAT_SIZE_TO_ORDER
  9 | What are the available sizes?,WHAT_SIZE_TO_ORDER
 10 | What are the sizes available?,WHAT_SIZE_TO_ORDER
 11 | Show me all available sizes,WHAT_SIZE_TO_ORDER
 12 | Share the size structure,WHAT_SIZE_TO_ORDER
 13 |  What size to order?,WHAT_SIZE_TO_ORDER
 14 | What are the sizes,WHAT_SIZE_TO_ORDER
 15 | Mattress size,WHAT_SIZE_TO_ORDER
 16 | King Size,WHAT_SIZE_TO_ORDER
 17 | Inches,WHAT_SIZE_TO_ORDER
 18 | Length,WHAT_SIZE_TO_ORDER
 19 | 6*3,WHAT_SIZE_TO_ORDER
 20 | How much does a SOF mattress cost,MATTRESS_COST
 21 | Want to know the price,MATTRESS_COST
 22 | What will be the price,MATTRESS_COST
 23 | What is the cost,MATTRESS_COST
 24 | Price of mattress,MATTRESS_COST
 25 | I need price,MATTRESS_COST
 26 | Cost of Bed,MATTRESS_COST
 27 | Price Range,MATTRESS_COST
 28 | Low price,MATTRESS_COST
 29 | MRP,MATTRESS_COST
 30 | Not happy with the product please help me to return,RETURN_EXCHANGE
 31 | I want to return my mattress,RETURN_EXCHANGE
 32 | How can I replace the mattress.,RETURN_EXCHANGE
 33 | Help me with exchange process,RETURN_EXCHANGE
 34 | How do I return it,RETURN_EXCHANGE
 35 | Need my money back,RETURN_EXCHANGE
 36 | I want refund,RETURN_EXCHANGE
 37 | Replacement policy,RETURN_EXCHANGE
 38 | What is the price for size (x ft x y ft)? What is the price for size (x inches x y inches)?,DISTRIBUTORS
 39 |  Where can I see the product before I buy,DISTRIBUTORS
 40 | Do you have any showrooms in Delhi state,DISTRIBUTORS
 41 | Do you have any distributors in Mumbai city,DISTRIBUTORS
 42 | Do you have any retailers in Pune city,DISTRIBUTORS
 43 | Can I visit SOF mattress showroom,DISTRIBUTORS
 44 | Any shop that I can visit,DISTRIBUTORS
 45 | You have any branch,DISTRIBUTORS
 46 | We want dealer ship,DISTRIBUTORS
 47 | Nearby Show room,DISTRIBUTORS
 48 | Shop near by,DISTRIBUTORS
 49 | Demo store,DISTRIBUTORS
 50 | Need dealership,DISTRIBUTORS
 51 | Distributors/Retailers/Showrooms,DISTRIBUTORS
 52 | What is the difference between the Ergo & Ortho variants,COMPARISON
 53 | Is the mattress good for my back,COMPARISON
 54 | I wanna know the difference,COMPARISON
 55 | Difference between the products,COMPARISON
 56 | Compare the 2 mattresses,COMPARISON
 57 | Which mattress to buy?,COMPARISON
 58 |  It's been 30 days my product haven't received,DELAY_IN_DELIVERY
 59 | I did not receive my order yet,DELAY_IN_DELIVERY
 60 | It's too late to get delivered,DELAY_IN_DELIVERY
 61 | It's been so many days,DELAY_IN_DELIVERY
 62 | Why so many days,DELAY_IN_DELIVERY
 63 | Almost 1 month over,DELAY_IN_DELIVERY
 64 | Why so long?,DELAY_IN_DELIVERY
 65 | Delivery is delayed,DELAY_IN_DELIVERY
 66 | It's delayed,DELAY_IN_DELIVERY
 67 | Do you offer Zero Percent EMI payment options?,EMI
 68 | I want to buy this in installments,EMI
 69 | I want it on 0% interest,EMI
 70 | I want to buy on EMI,EMI
 71 | Do you accept Paisa EMI card,EMI
 72 | Can we buy through Paisa finance,EMI
 73 | You guys provide EMI option?,EMI
 74 | How to get in EMI,EMI
 75 | What is minimum down payment,EMI
 76 | No cost EMI is available?,EMI
 77 | I want in installment,EMI
 78 | what about emi options,EMI
 79 | Paisa finance service available,EMI
 80 | 0% EMI.,EMI
 81 | Down payments,EMI
 82 | Installments,EMI
 83 | Can I buy pillows from here,PILLOWS
 84 | I want to buy pillows,PILLOWS
 85 | Can I also have pillows,PILLOWS
 86 | Can I get pillows?,PILLOWS
 87 | May I please know about the offers,OFFERS
 88 | Tell me about the latest offers,OFFERS
 89 | What are the available offers,OFFERS
 90 | Want to know the discount ,OFFERS
 91 | Give me some discount,OFFERS
 92 | Discount,OFFERS
 93 | Can you make delivery on this pin code?,CHECK_PINCODE
 94 | Will you be able to deliver here,CHECK_PINCODE
 95 | Can you please deliver on my pincode,CHECK_PINCODE
 96 | Check pincode,CHECK_PINCODE
 97 | How long is the warranty you offer on your mattresses and what does it cover,WARRANTY
 98 | Does mattress cover is included in warranty,WARRANTY
 99 | Tell me about the product warranty,WARRANTY
100 | What is the warranty period?,WARRANTY
101 | Want to know about warranty,WARRANTY
102 | would interested in warranty details,WARRANTY
103 | Do you offer warranty ,WARRANTY
104 | Is the 100 night return trial applicable for custom size as well,100_NIGHT_TRIAL_OFFER
105 | How does the 100 night trial work,100_NIGHT_TRIAL_OFFER
106 |  Can you give me 100 night trial ,100_NIGHT_TRIAL_OFFER
107 | Can I try a mattress first,100_NIGHT_TRIAL_OFFER
108 | What is 100 Night trial offer,100_NIGHT_TRIAL_OFFER
109 | What is the 100-night offer,100_NIGHT_TRIAL_OFFER
110 | How to enroll for trial,100_NIGHT_TRIAL_OFFER
111 | I want to check offers,100_NIGHT_TRIAL_OFFER
112 | Can I get free trial,100_NIGHT_TRIAL_OFFER
113 | Trial offer on customisation,100_NIGHT_TRIAL_OFFER
114 | do you provide exchange,100_NIGHT_TRIAL_OFFER
115 | Need 100 days trial,100_NIGHT_TRIAL_OFFER
116 | 100 Nights trial version,100_NIGHT_TRIAL_OFFER
117 | 100 night trial,100_NIGHT_TRIAL_OFFER
118 | 100 days trial,100_NIGHT_TRIAL_OFFER
119 | 100 free Nights,100_NIGHT_TRIAL_OFFER
120 | 100 night,100_NIGHT_TRIAL_OFFER
121 | What are the key features of the SOF Ortho mattress,ORTHO_FEATURES
122 | I m looking mattress for slip disc problem,ORTHO_FEATURES
123 | I am cervical and Lombard section problem,ORTHO_FEATURES
124 | Tell me about SOF Ortho mattress,ORTHO_FEATURES
125 | Do we have anything for backache,ORTHO_FEATURES
126 | I have back pain issue,ORTHO_FEATURES
127 | Is there orthopedic mattress available,ORTHO_FEATURES
128 | Have a back problem,ORTHO_FEATURES
129 | ortho,ORTHO_FEATURES
130 | How is SOF different from other mattress brands,ABOUT_SOF_MATTRESS
131 | How is SOF mattress different from,ABOUT_SOF_MATTRESS
132 | Tell me about SOF mattresses,ABOUT_SOF_MATTRESS
133 | Who are SOF mattress,ABOUT_SOF_MATTRESS
134 | Tell me about company,ABOUT_SOF_MATTRESS
135 | What is SOF,ABOUT_SOF_MATTRESS
136 | What are the key features of the SOF Ergo mattress,ERGO_FEATURES
137 | Tell me about SOF Ergo mattress,ERGO_FEATURES
138 | Does this have ergonomic support?,ERGO_FEATURES
139 | Features of Ergo mattress,ERGO_FEATURES
140 | What about ergo,ERGO_FEATURES
141 | Want to talk to an live agent,LEAD_GEN
142 | Need a call from your representative,LEAD_GEN
143 | Want to get in touch,LEAD_GEN
144 | I want to buy this,LEAD_GEN
145 | I want to order,LEAD_GEN
146 | Connect to an agent,LEAD_GEN
147 | Get in Touch,LEAD_GEN
148 |  Schedule a callback ,LEAD_GEN
149 | Interested in buying,LEAD_GEN
150 | I want to cancel my order,CANCEL_ORDER
151 | How can I cancel my order,CANCEL_ORDER
152 | Cancellation status,CANCEL_ORDER
153 | Do you offer COD to my pincode?,COD
154 | Is it possible to COD,COD
155 | Cash on delivery is acceptable?,COD
156 | Can pay later on delivery ,COD
157 | I want to change the size of the mattress.,SIZE_CUSTOMIZATION
158 | Need some help in changing size of the mattress,SIZE_CUSTOMIZATION
159 | Will I get an option to Customise the size,SIZE_CUSTOMIZATION
160 | How can I order a custom sized mattress,SIZE_CUSTOMIZATION
161 | Can mattress size be customised?,SIZE_CUSTOMIZATION
162 | Customisation is possible?,SIZE_CUSTOMIZATION
163 | Custom size,SIZE_CUSTOMIZATION
164 | What are the SOF mattress products,PRODUCT_VARIANTS
165 | I want to buy a mattress,PRODUCT_VARIANTS
166 | Tell me about SOF mattress features,PRODUCT_VARIANTS
167 | What are the product variants,PRODUCT_VARIANTS
168 | Help me with different products,PRODUCT_VARIANTS
169 | I want to check products,PRODUCT_VARIANTS
170 | I am looking the mattress,PRODUCT_VARIANTS
171 | Which product is best,PRODUCT_VARIANTS
172 |  Which mattress is best,PRODUCT_VARIANTS
173 | Type of foam used,PRODUCT_VARIANTS
174 | Show me products,PRODUCT_VARIANTS
175 | Show more mattress,PRODUCT_VARIANTS
176 | When will the order be delivered to me?,ORDER_STATUS
177 | I want updates of my order,ORDER_STATUS
178 | What is my order status?,ORDER_STATUS
179 | Where is my product,ORDER_STATUS
180 | When can we expect,ORDER_STATUS
181 | Order related,ORDER_STATUS
182 | 


--------------------------------------------------------------------------------
/env-requirements.txt:
--------------------------------------------------------------------------------
  1 | # This file may be used to create an environment using:
  2 | # $ conda create --name <env> --file <this file>
  3 | # platform: linux-64
  4 | _libgcc_mutex=0.1=main
  5 | absl-py=0.9.0=pypi_0
  6 | adal=1.2.2=pypi_0
  7 | aiofiles=0.5.0=pypi_0
  8 | aiohttp=3.6.2=pypi_0
  9 | alibi=0.3.2=pypi_0
 10 | allennlp=1.0.0=pypi_0
 11 | allennlp-models=1.0.0=pypi_0
 12 | ansiwrap=0.8.4=pypi_0
 13 | apex=0.1=pypi_0
 14 | applicationinsights=0.11.9=pypi_0
 15 | apscheduler=3.6.3=pypi_0
 16 | asn1crypto=0.24.0=pypi_0
 17 | astor=0.8.0=pypi_0
 18 | astunparse=1.6.3=pypi_0
 19 | async-generator=1.10=pypi_0
 20 | async-timeout=3.0.1=pypi_0
 21 | attrs=19.3.0=py_0
 22 | awscli=1.16.255=pypi_0
 23 | azure-common=1.1.23=pypi_0
 24 | azure-graphrbac=0.61.1=pypi_0
 25 | azure-mgmt-authorization=0.60.0=pypi_0
 26 | azure-mgmt-containerregistry=2.8.0=pypi_0
 27 | azure-mgmt-keyvault=2.0.0=pypi_0
 28 | azure-mgmt-resource=3.0.0=pypi_0
 29 | azure-mgmt-storage=4.0.0=pypi_0
 30 | azure-storage-blob=2.0.1=pypi_0
 31 | azure-storage-common=2.0.0=pypi_0
 32 | azureml-automl-core=1.0.45.1=pypi_0
 33 | azureml-contrib-explain-model=1.0.45=pypi_0
 34 | azureml-contrib-notebook=1.0.45=pypi_0
 35 | azureml-contrib-opendatasets=1.0.45=pypi_0
 36 | azureml-contrib-server=1.0.45=pypi_0
 37 | azureml-contrib-services=1.0.45=pypi_0
 38 | azureml-core=1.0.45=pypi_0
 39 | azureml-dataprep=1.1.7=pypi_0
 40 | azureml-dataprep-native=13.0.0=pypi_0
 41 | azureml-explain-model=1.0.45=pypi_0
 42 | azureml-pipeline=1.0.45=pypi_0
 43 | azureml-pipeline-core=1.0.45=pypi_0
 44 | azureml-pipeline-steps=1.0.45=pypi_0
 45 | azureml-sdk=1.0.45=pypi_0
 46 | azureml-telemetry=1.0.45=pypi_0
 47 | azureml-train=1.0.45=pypi_0
 48 | azureml-train-automl=1.0.45.1=pypi_0
 49 | azureml-train-core=1.0.45=pypi_0
 50 | azureml-train-restclients-hyperdrive=1.0.45=pypi_0
 51 | azureml-widgets=1.0.45.1=pypi_0
 52 | backcall=0.2.0=py_0
 53 | backports-csv=1.0.7=pypi_0
 54 | backports-tempfile=1.0=pypi_0
 55 | backports-weakref=1.0.post1=pypi_0
 56 | beautifulsoup4=4.9.1=py36_0
 57 | blas=1.0=mkl
 58 | bleach=3.1.5=py_0
 59 | blessings=1.7=pypi_0
 60 | boto=2.49.0=pypi_0
 61 | boto3=1.14.39=pypi_0
 62 | botocore=1.17.39=pypi_0
 63 | bottleneck=1.3.2=py36heb32a55_1
 64 | brotlipy=0.7.0=py36h7b6447c_1000
 65 | bs4=0.0.1=pypi_0
 66 | ca-certificates=2020.6.24=0
 67 | cachetools=4.1.1=pypi_0
 68 | captum=0.2.0=pypi_0
 69 | certifi=2020.6.20=py36_0
 70 | cffi=1.14.0=py36h2e261b9_0
 71 | chainer=6.2.0=pypi_0
 72 | chardet=3.0.4=py36_1003
 73 | cheroot=8.2.1=pypi_0
 74 | cherrypy=18.5.0=pypi_0
 75 | click=7.0=pypi_0
 76 | cloudpickle=1.2.1=pypi_0
 77 | colorama=0.4.1=pypi_0
 78 | colorclass=2.2.0=pypi_0
 79 | coloredlogs=10.0=pypi_0
 80 | colorhash=1.0.2=pypi_0
 81 | conllu=3.0=pypi_0
 82 | contextlib2=0.5.5=pypi_0
 83 | contextvars=2.4=pypi_0
 84 | cryptography=2.9.2=py36h1ba5d50_0
 85 | cudatoolkit=10.1.243=h6bb024c_0
 86 | cycler=0.10.0=py36_0
 87 | cymem=2.0.2=py36he1b5a44_0
 88 | cython=0.29.11=py36he6710b0_0
 89 | cython-blis=0.2.4=py36h516909a_1
 90 | dataclasses=0.7=pypi_0
 91 | dbus=1.13.16=hb2f20db_0
 92 | decorator=4.4.2=py_0
 93 | defusedxml=0.6.0=py_0
 94 | distro=1.4.0=pypi_0
 95 | dnspython=1.16.0=pypi_0
 96 | docker=4.0.2=pypi_0
 97 | docopt=0.6.2=pypi_0
 98 | docutils=0.14=pypi_0
 99 | docx=0.2.4=pypi_0
100 | dotnetcore2=2.1.8=pypi_0
101 | en-core-web-lg=2.1.0=pypi_0
102 | en-core-web-md=2.1.0=pypi_0
103 | en-core-web-sm=2.1.0=pypi_0
104 | entrypoints=0.3=py36_0
105 | expat=2.2.9=he6710b0_2
106 | fastai=1.0.61=pypi_0
107 | fastprogress=0.2.2=py_0
108 | fbmessenger=6.0.0=pypi_0
109 | feedparser=5.2.1=pypi_0
110 | filelock=3.0.12=pypi_0
111 | flask=1.1.0=pypi_0
112 | fontconfig=2.13.0=h9420a91_0
113 | freetype=2.10.2=h5ab3b9f_0
114 | future=0.17.1=pypi_0
115 | gast=0.3.3=pypi_0
116 | gdown=3.8.3=pypi_0
117 | gensim=3.7.3=pypi_0
118 | gevent=1.5.0=pypi_0
119 | glib=2.63.1=h5a9c865_0
120 | google-auth=1.20.1=pypi_0
121 | google-auth-oauthlib=0.4.1=pypi_0
122 | google-pasta=0.2.0=pypi_0
123 | gpustat=1.0.0.dev0=pypi_0
124 | greenlet=0.4.16=pypi_0
125 | grpcio=1.22.0=pypi_0
126 | gst-plugins-base=1.14.0=hbbd80ab_1
127 | gstreamer=1.14.0=hb453b48_1
128 | h11=0.8.1=pypi_0
129 | h2=3.2.0=pypi_0
130 | h5py=2.10.0=pypi_0
131 | horovod=0.16.4=pypi_0
132 | hpack=3.0.0=pypi_0
133 | hstspreload=2020.8.11=pypi_0
134 | httplib2=0.18.1=pypi_0
135 | httptools=0.1.1=pypi_0
136 | httpx=0.9.3=pypi_0
137 | humanfriendly=8.2=pypi_0
138 | hyperframe=5.2.0=pypi_0
139 | icu=58.2=he6710b0_3
140 | idna=2.8=pypi_0
141 | idna-ssl=1.1.0=pypi_0
142 | imageio=2.5.0=pypi_0
143 | immutables=0.14=pypi_0
144 | importlib-metadata=1.1.0=pypi_0
145 | importlib-resources=1.0.2=pypi_0
146 | importlib_metadata=1.7.0=0
147 | inltk=0.8.1=pypi_0
148 | intel-openmp=2020.1=217
149 | ipdb=0.12.2=pypi_0
150 | ipykernel=5.1.1=py36h39e3cac_0
151 | ipython=7.16.1=py36h5ca1d4c_0
152 | ipython_genutils=0.2.0=py36_0
153 | ipywidgets=7.5.0=pypi_0
154 | isodate=0.6.0=pypi_0
155 | itsdangerous=1.1.0=pypi_0
156 | jamspell=0.0.11=pypi_0
157 | jaraco-classes=2.0=pypi_0
158 | jaraco-collections=2.1=pypi_0
159 | jaraco-functools=2.0=pypi_0
160 | jaraco-text=3.2.0=pypi_0
161 | jedi=0.17.1=py36_0
162 | jeepney=0.4=pypi_0
163 | jinja2=2.11.2=py_0
164 | jmespath=0.9.4=pypi_0
165 | joblib=0.13.2=py36_0
166 | jpeg=9b=h024ee3a_2
167 | jsonform=0.0.2=pypi_0
168 | jsonnet=0.16.0=pypi_0
169 | jsonpickle=1.4.1=pypi_0
170 | jsonschema=3.2.0=py36_0
171 | jsonsir=0.0.2=pypi_0
172 | jupyter=1.0.0=pypi_0
173 | jupyter-console=6.0.0=pypi_0
174 | jupyter_client=6.1.5=py_0
175 | jupyter_core=4.6.3=py36_0
176 | jupyterlab=1.2.2=pypi_0
177 | kafka-python=1.4.7=pypi_0
178 | keras=2.2.4=pypi_0
179 | keras-applications=1.0.8=pypi_0
180 | keras-preprocessing=1.1.0=pypi_0
181 | keras2onnx=1.5.0=pypi_0
182 | kiwisolver=1.2.0=py36hfd86e86_0
183 | lab=4.2=pypi_0
184 | lcms2=2.11=h396b838_0
185 | libedit=3.1.20191231=h14c3975_1
186 | libffi=3.2.1=hd88cf55_4
187 | libgcc-ng=9.1.0=hdf63c60_0
188 | libgfortran-ng=7.3.0=hdf63c60_0
189 | libpng=1.6.37=hbc83047_0
190 | libsodium=1.0.18=h7b6447c_0
191 | libstdcxx-ng=9.1.0=hdf63c60_0
192 | libtiff=4.1.0=h2733197_1
193 | libuuid=1.0.3=h1bed415_2
194 | libxcb=1.14=h7b6447c_0
195 | libxml2=2.9.10=he19cac6_1
196 | lightgbm=2.2.1=pypi_0
197 | lime=0.2.0.1=pypi_0
198 | lxml=4.4.2=pypi_0
199 | lz4-c=1.9.2=he6710b0_0
200 | markdown=3.1.1=pypi_0
201 | markupsafe=1.1.1=py36h7b6447c_0
202 | matplotlib=3.1.0=py36h5429711_0
203 | mattermostwrapper=2.2=pypi_0
204 | mistune=0.8.4=py36h7b6447c_0
205 | mkl=2020.1=217
206 | mkl-service=2.3.0=py36he904b0f_0
207 | mkl_fft=1.1.0=py36h23d657b_0
208 | mkl_random=1.1.1=py36h0573a6f_0
209 | more-itertools=8.0.0=pypi_0
210 | msgpack=0.5.6=pypi_0
211 | msrest=0.6.8=pypi_0
212 | msrestazure=0.6.1=pypi_0
213 | multidict=4.7.6=pypi_0
214 | multimethods=1.0.0=pypi_0
215 | murmurhash=1.0.2=py36he6710b0_0
216 | mysqlclient=1.4.6=pypi_0
217 | nb_conda=2.2.1=py36_0
218 | nb_conda_kernels=2.2.3=py36_0
219 | nbconvert=5.6.1=py36_0
220 | nbformat=5.0.7=py_0
221 | ncurses=6.2=he6710b0_1
222 | ndg-httpsclient=0.5.1=pypi_0
223 | networkx=2.4=pypi_0
224 | newrelic=5.2.3.131=pypi_0
225 | nimbusml=0.6.5=pypi_0
226 | ninja=1.9.0=py36hfd86e86_0
227 | nlpaug=0.0.9=pypi_0
228 | nltk=3.4.5=pypi_0
229 | nodejs=12.4.0=he1b5a44_0
230 | notebook=6.0.3=py36_0
231 | numexpr=2.7.1=py36h423224d_0
232 | numpy=1.16.2=pypi_0
233 | numpy-base=1.16.4=py36hde5b4d6_0
234 | nvidia-ml-py3=7.352.0=pypi_0
235 | oauth2client=4.1.3=pypi_0
236 | oauthlib=3.0.2=pypi_0
237 | olefile=0.46=py36_0
238 | onnx=1.5.0=pypi_0
239 | onnx-chainer=1.5.0=pypi_0
240 | onnxconverter-common=1.5.0=pypi_0
241 | onnxmltools=1.4.1=pypi_0
242 | onnxruntime=0.5.0=pypi_0
243 | openssl=1.1.1g=h7b6447c_0
244 | opt-einsum=3.1.0=pypi_0
245 | overrides=3.0.0=pypi_0
246 | packaging=20.4=py_0
247 | pandas=1.0.5=py36h0573a6f_0
248 | pandoc=2.9.2.1=0
249 | pandocfilters=1.4.2=py36_1
250 | papermill=1.0.1=pypi_0
251 | parso=0.7.0=py_0
252 | pathspec=0.5.9=pypi_0
253 | patsy=0.5.1=pypi_0
254 | pattern=3.6.1=pypi_0
255 | pcre=8.44=he6710b0_0
256 | pdfminer-six=20191110=pypi_0
257 | pdfminer3k=1.3.1=pypi_0
258 | pexpect=4.8.0=py36_0
259 | pickleshare=0.7.5=py36_0
260 | pika=1.1.0=pypi_0
261 | pillow=7.2.0=py36hb39fc2d_0
262 | pip=19.2.3=py36_0
263 | pip-autoremove=0.9.1=pypi_0
264 | plac=0.9.6=py36_1
265 | pluggy=0.13.1=pypi_0
266 | ply=3.11=pypi_0
267 | portend=2.6=pypi_0
268 | preshed=2.0.1=py36he6710b0_0
269 | prometheus_client=0.8.0=py_0
270 | prompt-toolkit=2.0.10=pypi_0
271 | protobuf=3.12.4=pypi_0
272 | psutil=5.6.3=pypi_0
273 | psycopg2-binary=2.8.5=pypi_0
274 | ptyprocess=0.6.0=py36_0
275 | py=1.8.0=pypi_0
276 | py-rouge=1.1=pypi_0
277 | py4j=0.10.7=pypi_0
278 | pyarrow=0.14.0=pypi_0
279 | pyasn1=0.4.8=pypi_0
280 | pyasn1-modules=0.2.8=pypi_0
281 | pycparser=2.20=py_2
282 | pycryptodome=3.9.4=pypi_0
283 | pydot=1.4.1=pypi_0
284 | pygments=2.6.1=py_0
285 | pyjwt=1.7.1=pypi_0
286 | pykwalify=1.7.0=pypi_0
287 | pymongo=3.8.0=pypi_0
288 | pyopenssl=19.0.0=pypi_0
289 | pyparsing=2.4.7=py_0
290 | pyqt=5.9.2=py36h05f1152_2
291 | pyrsistent=0.16.0=py36h7b6447c_0
292 | pysocks=1.7.1=py36_0
293 | pyspark=2.4.3=pypi_0
294 | pyspellchecker=0.5.1=pypi_0
295 | pytest=5.3.1=pypi_0
296 | python=3.6.8=h0371630_0
297 | python-crfsuite=0.9.7=pypi_0
298 | python-dateutil=2.8.1=py_0
299 | python-docx=0.8.10=pypi_0
300 | python-dotenv=0.10.3=pypi_0
301 | python-easyconfig=0.1.7=pypi_0
302 | python-engineio=3.12.1=pypi_0
303 | python-socketio=4.5.1=pypi_0
304 | python-telegram-bot=12.8=pypi_0
305 | pytorch=1.5.1=py3.6_cuda10.1.243_cudnn7.6.3_0
306 | pytorch-lightning=0.7.5=pypi_0
307 | pytorch-pretrained-bert=0.6.2=pypi_0
308 | pytorch-transformers=1.2.0=pypi_0
309 | pytz=2019.3=pypi_0
310 | pywavelets=1.0.3=pypi_0
311 | pyyaml=5.1.1=pypi_0
312 | pyzmq=19.0.1=py36he6710b0_1
313 | qt=5.9.7=h5867ecd_1
314 | qtconsole=4.6.0=pypi_0
315 | questionary=1.5.2=pypi_0
316 | rasa=1.10.10=pypi_0
317 | rasa-sdk=1.10.2=pypi_0
318 | readline=7.0=h7b6447c_5
319 | redis=3.5.3=pypi_0
320 | regex=2020.6.8=pypi_0
321 | requests=2.24.0=py_0
322 | requests-oauthlib=1.2.0=pypi_0
323 | requests-toolbelt=0.9.1=pypi_0
324 | resource=0.2.1=pypi_0
325 | rfc3986=1.4.0=pypi_0
326 | rocketchat-api=1.3.1=pypi_0
327 | rsa=3.4.2=pypi_0
328 | ruamel-yaml=0.16.10=pypi_0
329 | ruamel-yaml-clib=0.2.0=pypi_0
330 | s3transfer=0.3.3=pypi_0
331 | sacremoses=0.0.33=pypi_0
332 | sanic=19.12.2=pypi_0
333 | sanic-cors=0.10.0.post3=pypi_0
334 | sanic-jwt=1.4.1=pypi_0
335 | sanic-plugins-framework=0.9.3=pypi_0
336 | scikit-image=0.15.0=pypi_0
337 | scikit-learn=0.22.2.post1=pypi_0
338 | scipy=1.1.0=pypi_0
339 | secretstorage=3.1.1=pypi_0
340 | send2trash=1.5.0=py36_0
341 | sentencepiece=0.1.83=pypi_0
342 | seqeval=0.0.12=pypi_0
343 | setuptools=47.3.1=py36_0
344 | shap=0.29.1=pypi_0
345 | simplejson=3.17.0=pypi_0
346 | simpletransformers=0.43.6=pypi_0
347 | sip=4.19.8=py36hf484d3e_0
348 | six=1.15.0=py_0
349 | skl2onnx=1.4.9=pypi_0
350 | sklearn-crfsuite=0.3.6=pypi_0
351 | sklearn-pandas=1.7.0=pypi_0
352 | slackclient=2.8.0=pypi_0
353 | smart-open=1.8.4=pypi_0
354 | sniffio=1.1.0=pypi_0
355 | sortedcontainers=2.1.0=pypi_0
356 | soupsieve=2.0.1=py_0
357 | spacy=2.1.8=py36hc9558a2_0
358 | sqlalchemy=1.3.18=pypi_0
359 | sqlite=3.32.3=h62c20be_0
360 | srsly=0.1.0=py36he1b5a44_0
361 | statsmodels=0.10.0=pypi_0
362 | tabulate=0.8.7=pypi_0
363 | tempora=1.14.1=pypi_0
364 | tenacity=5.0.4=pypi_0
365 | tensorboard=2.1.1=pypi_0
366 | tensorboard-plugin-wit=1.7.0=pypi_0
367 | tensorboardx=2.1=pypi_0
368 | tensorflow=2.1.1=pypi_0
369 | tensorflow-addons=0.7.1=pypi_0
370 | tensorflow-estimator=2.1.0=pypi_0
371 | tensorflow-gpu=1.14.0=pypi_0
372 | tensorflow-hub=0.8.0=pypi_0
373 | tensorflow-probability=0.9.0=pypi_0
374 | tensorflow-text=2.1.0rc0=pypi_0
375 | termcolor=1.1.0=pypi_0
376 | terminado=0.8.3=py36_0
377 | terminaltables=3.1.0=pypi_0
378 | testpath=0.4.4=py_0
379 | textwrap3=0.9.2=pypi_0
380 | tf2onnx=1.5.3=pypi_0
381 | thinc=7.0.8=py36hc9558a2_0
382 | tk=8.6.10=hbc83047_0
383 | tokenizers=0.7.0=pypi_0
384 | torch=1.4.0=pypi_0
385 | torchvision=0.6.1=py36_cu101
386 | tornado=6.0.4=py36h7b6447c_1
387 | tqdm=4.45.0=pypi_0
388 | traitlets=4.3.3=py36_0
389 | transformers=2.11.0=pypi_0
390 | twilio=6.26.3=pypi_0
391 | typing=3.7.4=pypi_0
392 | typing-extensions=3.7.4=pypi_0
393 | tzlocal=2.1=pypi_0
394 | ujson=2.0.3=pypi_0
395 | urllib3=1.25.3=pypi_0
396 | uvloop=0.14.0=pypi_0
397 | wasabi=0.2.2=py_0
398 | wcwidth=0.2.5=py_0
399 | webencodings=0.5.1=py36_1
400 | webexteamssdk=1.3=pypi_0
401 | websocket-client=0.56.0=pypi_0
402 | websockets=8.1=pypi_0
403 | werkzeug=0.15.4=pypi_0
404 | wheel=0.30.0=pypi_0
405 | widgetsnbextension=3.5.0=pypi_0
406 | word2number=1.1=pypi_0
407 | wrapt=1.12.1=pypi_0
408 | xlrd=1.2.0=pypi_0
409 | xz=5.2.5=h7b6447c_0
410 | yaml=0.2.5=h7b6447c_0
411 | yarl=1.4.2=pypi_0
412 | zc-lockfile=2.0=pypi_0
413 | zeromq=4.3.2=he6710b0_2
414 | zipp=0.6.0=pypi_0
415 | zlib=1.2.11=h7b6447c_3
416 | zstd=1.4.4=h0b5b093_3
417 | 


--------------------------------------------------------------------------------
/platforms/bert/bert-bot-only-data-es.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import collections
  3 | import datetime
  4 | import logging
  5 | import os
  6 | import random
  7 | import sys
  8 | from logging.handlers import WatchedFileHandler
  9 | from typing import Any
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | from scipy.special import softmax
 14 | from simpletransformers.classification import ClassificationModel
 15 | from sklearn.metrics import f1_score
 16 | from sklearn.model_selection import train_test_split
 17 | from sklearn.utils.class_weight import compute_class_weight
 18 | import torch
 19 | 
 20 | assert(torch.cuda.is_available())
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | now_ts = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
 24 | OOS_CLASS = 'NO_NODES_DETECTED'
 25 | 
 26 | 
 27 | def str2bool(v: Any):
 28 |     # https://stackoverflow.com/q/15008758/3697191
 29 |     if isinstance(v, bool):
 30 |         return v
 31 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 32 |         return True
 33 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 34 |         return False
 35 |     else:
 36 |         raise argparse.ArgumentTypeError('Boolean value expected.')
 37 | 
 38 | 
 39 | def random_seed(seed_value: int, use_cuda: bool):
 40 |     np.random.seed(seed_value)  # cpu vars
 41 |     torch.manual_seed(seed_value)  # cpu  vars
 42 |     random.seed(seed_value)  # Python
 43 |     if use_cuda:
 44 |         torch.cuda.manual_seed(seed_value)
 45 |         torch.cuda.manual_seed_all(seed_value)  # gpu vars
 46 |         torch.backends.cudnn.deterministic = True  # needed
 47 |         torch.backends.cudnn.benchmark = False
 48 | 
 49 | 
 50 | def setup_logging(output_dir: str):
 51 |     global logger
 52 |     for handler in logger.handlers:
 53 |         logger.removeHandler(handler)
 54 |     logger.setLevel(logging.DEBUG)
 55 |     handler = WatchedFileHandler(f'{output_dir}/run_logs.log')
 56 |     handler.setLevel(logging.DEBUG)
 57 |     logger.addHandler(handler)
 58 |     handler = logging.StreamHandler(sys.stdout)
 59 |     handler.setLevel(logging.DEBUG)
 60 |     logger.addHandler(handler)
 61 | 
 62 | 
 63 | def log(*parts):
 64 |     logger.info(' '.join([str(part) for part in parts]))
 65 |     logger.info("=" * 80)
 66 | 
 67 | 
 68 | def make_st_args(cmd_args):
 69 |     args = [
 70 |         ('fp16', False),
 71 |         ('output_dir', f'{cmd_args.output_dir}/'),
 72 |         ('best_model_dir', f'{cmd_args.output_dir}/best_model/'),
 73 |         ('tensorboard_dir', f'{cmd_args.output_dir}/tblogs/'),
 74 |         ('manual_seed', cmd_args.seed),
 75 |         ('do_lower_case', cmd_args.do_lower_case),
 76 |         ('learning_rate', cmd_args.learning_rate),
 77 |         ('train_batch_size', cmd_args.batch_size),
 78 |         ('eval_batch_size', cmd_args.batch_size),
 79 |         ('num_train_epochs', cmd_args.epochs),
 80 |         ('gradient_accumulation_steps', 1),
 81 |         ('max_seq_length', 512),
 82 |         ('overwrite_output_dir', True),
 83 |         ('reprocess_input_data', True),
 84 |         ('save_best_model', True),
 85 |         ('save_eval_checkpoints', False),
 86 |         ('save_model_every_epoch', False),
 87 |         ('save_optimizer_and_scheduler', True),
 88 |         ('save_steps', -1),
 89 |         ('evaluate_during_training', True),
 90 |         ('evaluate_during_training_silent', False),
 91 |         ('evaluate_during_training_steps', cmd_args.eval_every_n_steps),
 92 |         ('evaluate_during_training_verbose', True),
 93 |     ]
 94 | 
 95 |     if cmd_args.use_early_stopping:
 96 |         args.append(('use_early_stopping', True))
 97 |         args.append(('early_stopping_consider_epochs', True))
 98 |         args.append(('early_stopping_metric', 'eval_loss'))
 99 |         args.append(('early_stopping_metric_minimize', True))
100 |         args.append(('early_stopping_delta', cmd_args.early_stopping_delta))
101 |         args.append(('early_stopping_patience', cmd_args.early_stopping_patience))
102 |     else:
103 |         args.append(('use_early_stopping', False))
104 | 
105 |     return dict(args)
106 | 
107 | 
108 | def read_data(path: str) -> pd.DataFrame:
109 |     df = pd.read_csv(path)
110 |     df = df.rename(columns={'sentence': 'text', 'label': 'labels'})
111 |     df = df[['text', 'labels']]
112 |     return df
113 | 
114 | 
115 | def get_labels_map(df):
116 |     labels = sorted(df['labels'].unique().tolist())
117 |     label2id = collections.OrderedDict(zip(labels, range(len(labels))))
118 |     return label2id
119 | 
120 | 
121 | def f1_at_threshold(preds, y_true, labels_list, threshold):
122 |     labels_list = labels_list + [OOS_CLASS]
123 |     idxs = np.argmax(preds, axis=1)
124 |     scores = preds[np.arange(preds.shape[0]), idxs]
125 |     idxs[scores < threshold] = -1
126 |     y_pred = [labels_list[i] for i in idxs]
127 |     return f1_score(y_true=y_true, y_pred=y_pred, average='weighted')
128 | 
129 | 
130 | def run_experiment(cmd_args):
131 |     setup_logging(cmd_args.output_dir)
132 |     log('Run args', vars(cmd_args))
133 |     torch.cuda.empty_cache()
134 |     random_seed(cmd_args.seed, True)
135 |     train_df = read_data(cmd_args.train_file)
136 |     eval_df = read_data(cmd_args.train_file)
137 |     label2id = get_labels_map(train_df)
138 |     test_df = read_data(cmd_args.test_file)
139 |     if cmd_args.eval_frac > 0:
140 |         train_df, eval_df = train_test_split(
141 |             train_df, test_size=cmd_args.eval_frac,
142 |             random_state=cmd_args.seed,
143 |             shuffle=True,
144 |             stratify=train_df['labels']
145 |         )
146 | 
147 |     log('Train Shape', train_df.shape)
148 |     log('Eval Shape', train_df.shape)
149 |     log('Test Shape', train_df.shape)
150 | 
151 |     weights = compute_class_weight('balanced', classes=list(label2id.keys()), y=train_df['labels']).tolist()
152 |     log('Class weights', weights)
153 | 
154 |     args = make_st_args(cmd_args)
155 |     args['labels_list'] = list(label2id.keys())
156 |     args['labels_map'] = label2id
157 | 
158 |     log('Labels map', label2id)
159 |     log('ST args', args)
160 | 
161 |     m = ClassificationModel(
162 |         model_type=cmd_args.model_type,
163 |         model_name=cmd_args.model_name,
164 |         num_labels=len(label2id),
165 |         weight=weights,
166 |         args=args)
167 |     m.train_model(train_df=train_df, eval_df=eval_df)
168 |     m = ClassificationModel(
169 |         cmd_args.model_type,
170 |         args['best_model_dir'],
171 |         args=args,
172 |     )
173 |     _, logits = m.predict(test_df['text'])
174 |     preds = softmax(logits, axis=1)
175 |     top_predicted = np.argmax(preds, axis=1)
176 |     
177 |     out_df = test_df.rename(columns={'text': 'sentence', 'labels': 'label'})
178 |     out_df['predicted_node'] = [m.args.labels_list[top_predicted[i]] for i in range(len(test_df))]
179 |     out_df['predicted_node_score'] = [preds[i][top_predicted[i]] for i in range(len(test_df))]
180 |     out_df.to_csv(f'{cmd_args.output_dir}/predictions.csv', columns=['sentence', 'label', 'predicted_node', 'predicted_node_score'], index=False)
181 |     
182 |     test_df['predictions'] = [dict(zip(m.args.labels_list, preds[i])) for i in range(len(test_df))]
183 |     test_df.to_json(f'{cmd_args.output_dir}/predictions.jsonl', orient='records', lines=True)
184 |     for t in range(0, 101, 5):
185 |         t = t / 100.0
186 |         f1 = f1_at_threshold(preds, test_df['labels'], m.args.labels_list, t)
187 |         log(f'F1 @ t={t}', f1)
188 |     
189 | 
190 | def main():
191 |     parser = argparse.ArgumentParser()
192 |     parser.add_argument('--train_file', required=True, type=str)
193 |     parser.add_argument('--test_file', required=True, type=str)
194 |     parser.add_argument('--output_dir', required=True, type=str)
195 |     parser.add_argument('--model_type', required=False, default='bert', type=str)
196 |     parser.add_argument('--model_name', required=False, default='bert-base-uncased', type=str)
197 |     parser.add_argument('--do_lower_case', required=False, default=True, type=str2bool)
198 |     parser.add_argument('--seed', required=False, default=42, type=int)
199 |     parser.add_argument('--learning_rate', required=False, default=0.00004, type=float)
200 |     parser.add_argument('--batch_size', required=False, default=16, type=int)
201 |     parser.add_argument('--epochs', required=False, default=10, type=int)
202 |     parser.add_argument('--eval_frac', required=False, default=0.1, type=float)
203 |     parser.add_argument('--eval_every_n_steps', required=False, default=100, type=int)
204 |     parser.add_argument('--use_early_stopping', required=False, default=True, type=str2bool)
205 |     parser.add_argument('--early_stopping_patience', required=False, default=5, type=int)
206 |     parser.add_argument('--early_stopping_delta', required=False, default=0.00005, type=float)
207 |     cmd_args = parser.parse_args()
208 |     cmd_args.output_dir = f'{cmd_args.output_dir.rstrip("/")}' # /{now_ts}'
209 |     os.makedirs(cmd_args.output_dir, exist_ok=True)
210 |     random_seed(cmd_args.seed, True)
211 |     run_experiment(cmd_args)
212 | 
213 | 
214 | if __name__ == '__main__':
215 |     main()
216 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertForSequenceClassification"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "gradient_checkpointing": false,
 7 |   "hidden_act": "gelu",
 8 |   "hidden_dropout_prob": 0.1,
 9 |   "hidden_size": 768,
10 |   "id2label": {
11 |     "0": "LABEL_0",
12 |     "1": "LABEL_1",
13 |     "2": "LABEL_2",
14 |     "3": "LABEL_3",
15 |     "4": "LABEL_4",
16 |     "5": "LABEL_5",
17 |     "6": "LABEL_6",
18 |     "7": "LABEL_7",
19 |     "8": "LABEL_8",
20 |     "9": "LABEL_9",
21 |     "10": "LABEL_10",
22 |     "11": "LABEL_11",
23 |     "12": "LABEL_12",
24 |     "13": "LABEL_13",
25 |     "14": "LABEL_14",
26 |     "15": "LABEL_15",
27 |     "16": "LABEL_16",
28 |     "17": "LABEL_17",
29 |     "18": "LABEL_18",
30 |     "19": "LABEL_19",
31 |     "20": "LABEL_20",
32 |     "21": "LABEL_21",
33 |     "22": "LABEL_22",
34 |     "23": "LABEL_23",
35 |     "24": "LABEL_24",
36 |     "25": "LABEL_25",
37 |     "26": "LABEL_26",
38 |     "27": "LABEL_27"
39 |   },
40 |   "initializer_range": 0.02,
41 |   "intermediate_size": 3072,
42 |   "label2id": {
43 |     "LABEL_0": 0,
44 |     "LABEL_1": 1,
45 |     "LABEL_10": 10,
46 |     "LABEL_11": 11,
47 |     "LABEL_12": 12,
48 |     "LABEL_13": 13,
49 |     "LABEL_14": 14,
50 |     "LABEL_15": 15,
51 |     "LABEL_16": 16,
52 |     "LABEL_17": 17,
53 |     "LABEL_18": 18,
54 |     "LABEL_19": 19,
55 |     "LABEL_2": 2,
56 |     "LABEL_20": 20,
57 |     "LABEL_21": 21,
58 |     "LABEL_22": 22,
59 |     "LABEL_23": 23,
60 |     "LABEL_24": 24,
61 |     "LABEL_25": 25,
62 |     "LABEL_26": 26,
63 |     "LABEL_27": 27,
64 |     "LABEL_3": 3,
65 |     "LABEL_4": 4,
66 |     "LABEL_5": 5,
67 |     "LABEL_6": 6,
68 |     "LABEL_7": 7,
69 |     "LABEL_8": 8,
70 |     "LABEL_9": 9
71 |   },
72 |   "layer_norm_eps": 1e-12,
73 |   "max_position_embeddings": 512,
74 |   "model_type": "bert",
75 |   "num_attention_heads": 12,
76 |   "num_hidden_layers": 12,
77 |   "pad_token_id": 0,
78 |   "type_vocab_size": 2,
79 |   "vocab_size": 30522
80 | }
81 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart/eval_results.txt:
--------------------------------------------------------------------------------
1 | eval_loss = 0.0016611038629586498
2 | mcc = 1.0
3 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart/model_args.json:
--------------------------------------------------------------------------------
1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/curekart/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/curekart/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/curekart/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 225, "weight_decay": 0, "labels_list": ["CALL_CENTER", "CANCEL_ORDER", "CHAT_WITH_AGENT", "CHECK_PINCODE", "CONSULT_START", "DELAY_IN_PARCEL", "EXPIRY_DATE", "FRANCHISE", "IMMUNITY", "INTERNATIONAL_SHIPPING", "MODES_OF_PAYMENTS", "MODIFY_ADDRESS", "ORDER_QUERY", "ORDER_STATUS", "ORDER_TAKING", "ORIGINAL_PRODUCT", "PAYMENT_AND_BILL", "PORTAL_ISSUE", "RECOMMEND_PRODUCT", "REFER_EARN", "REFUNDS_RETURNS_REPLACEMENTS", "RESUME_DELIVERY", "SIDE_EFFECT", "SIGN_UP", "START_OVER", "STORE_INFORMATION", "USER_GOAL_FORM", "WORK_FROM_HOME"], "labels_map": {"CALL_CENTER": 0, "CANCEL_ORDER": 1, "CHAT_WITH_AGENT": 2, "CHECK_PINCODE": 3, "CONSULT_START": 4, "DELAY_IN_PARCEL": 5, "EXPIRY_DATE": 6, "FRANCHISE": 7, "IMMUNITY": 8, "INTERNATIONAL_SHIPPING": 9, "MODES_OF_PAYMENTS": 10, "MODIFY_ADDRESS": 11, "ORDER_QUERY": 12, "ORDER_STATUS": 13, "ORDER_TAKING": 14, "ORIGINAL_PRODUCT": 15, "PAYMENT_AND_BILL": 16, "PORTAL_ISSUE": 17, "RECOMMEND_PRODUCT": 18, "REFER_EARN": 19, "REFUNDS_RETURNS_REPLACEMENTS": 20, "RESUME_DELIVERY": 21, "SIDE_EFFECT": 22, "SIGN_UP": 23, "START_OVER": 24, "STORE_INFORMATION": 25, "USER_GOAL_FORM": 26, "WORK_FROM_HOME": 27}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart/tblogs/events.out.tfevents.1597350081.haptik-ai-research-mum-ml-2-vm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/curekart/tblogs/events.out.tfevents.1597350081.haptik-ai-research-mum-ml-2-vm


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"do_lower_case": true, "model_max_length": 512}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart/training_args.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/curekart/training_args.bin


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart/training_progress_scores.csv:
--------------------------------------------------------------------------------
 1 | global_step,mcc,train_loss,eval_loss
 2 | 75,0.0489226281429655,3.3800320625305176,3.268240276972453
 3 | 100,0.14227801509842952,3.2683238983154297,3.097734079360962
 4 | 150,0.318445814589155,3.260500431060791,2.6981120856602985
 5 | 200,0.7360823978724693,2.241304636001587,2.1334492603937787
 6 | 225,0.850652764009118,1.5696055889129639,1.7193865990638733
 7 | 300,0.9732721399504127,0.7272082567214966,0.7657513042291005
 8 | 300,0.9732721399504127,0.7272082567214966,0.7657513042291005
 9 | 375,0.9839939029676897,0.19786734879016876,0.262628804842631
10 | 400,0.9964266032875477,0.1580124795436859,0.17506494929393132
11 | 450,0.9982145156768018,0.061699073761701584,0.08133237525820732
12 | 500,1.0,0.044864166527986526,0.04304662769039472
13 | 525,0.9982146902794541,0.1949339509010315,0.03420027763893207
14 | 600,0.9982146902794541,0.01979409158229828,0.0174630964299043
15 | 600,0.9982146902794541,0.01979409158229828,0.0174630964299043
16 | 675,1.0,0.010599198751151562,0.012711792197078466
17 | 700,1.0,0.009350229986011982,0.00925802268087864
18 | 750,1.0,0.006756887771189213,0.007775597075621287
19 | 800,1.0,0.007059372961521149,0.006613130954404672
20 | 825,1.0,0.006302967667579651,0.006184613835066557
21 | 900,1.0,0.005933687090873718,0.005213701774676641
22 | 900,1.0,0.005933687090873718,0.005213701774676641
23 | 975,1.0,0.0048155635595321655,0.004500839247678717
24 | 1000,1.0,0.003983728121966124,0.004296117999280493
25 | 1050,1.0,0.005057642702013254,0.003936607257152597
26 | 1100,1.0,0.003730300348252058,0.003631598042945067
27 | 1125,1.0,0.005177562590688467,0.003497548938418428
28 | 1200,1.0,0.004394039046019316,0.0031360636465251448
29 | 1200,1.0,0.004394039046019316,0.0031360636465251448
30 | 1275,1.0,0.003173314966261387,0.0028356259533514577
31 | 1300,1.0,0.00268826843239367,0.002747759005675713
32 | 1350,1.0,0.0037474841810762882,0.0025869984024514754
33 | 1400,1.0,0.002153025707229972,0.002440233255426089
34 | 1425,1.0,0.003527172142639756,0.0023733606096357105
35 | 1500,1.0,0.002447231439873576,0.0021881715798129637
36 | 1500,1.0,0.002447231439873576,0.0021881715798129637
37 | 1575,1.0,0.002039331942796707,0.0020307602997248373
38 | 1600,1.0,0.003677624510601163,0.001983012695175906
39 | 1650,1.0,0.0019816916901618242,0.0018912556565677126
40 | 1700,1.0,0.0018908806378021836,0.001807974735274911
41 | 1725,1.0,0.0018591739935800433,0.0017696570216988524
42 | 1800,1.0,0.0017346820095553994,0.0016611038629586498
43 | 1800,1.0,0.0017346820095553994,0.0016611038629586498
44 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/best_model/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertForSequenceClassification"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "gradient_checkpointing": false,
 7 |   "hidden_act": "gelu",
 8 |   "hidden_dropout_prob": 0.1,
 9 |   "hidden_size": 768,
10 |   "id2label": {
11 |     "0": "LABEL_0",
12 |     "1": "LABEL_1",
13 |     "2": "LABEL_2",
14 |     "3": "LABEL_3",
15 |     "4": "LABEL_4",
16 |     "5": "LABEL_5",
17 |     "6": "LABEL_6",
18 |     "7": "LABEL_7",
19 |     "8": "LABEL_8",
20 |     "9": "LABEL_9",
21 |     "10": "LABEL_10",
22 |     "11": "LABEL_11",
23 |     "12": "LABEL_12",
24 |     "13": "LABEL_13",
25 |     "14": "LABEL_14",
26 |     "15": "LABEL_15",
27 |     "16": "LABEL_16",
28 |     "17": "LABEL_17",
29 |     "18": "LABEL_18",
30 |     "19": "LABEL_19",
31 |     "20": "LABEL_20",
32 |     "21": "LABEL_21",
33 |     "22": "LABEL_22",
34 |     "23": "LABEL_23",
35 |     "24": "LABEL_24",
36 |     "25": "LABEL_25",
37 |     "26": "LABEL_26",
38 |     "27": "LABEL_27"
39 |   },
40 |   "initializer_range": 0.02,
41 |   "intermediate_size": 3072,
42 |   "label2id": {
43 |     "LABEL_0": 0,
44 |     "LABEL_1": 1,
45 |     "LABEL_10": 10,
46 |     "LABEL_11": 11,
47 |     "LABEL_12": 12,
48 |     "LABEL_13": 13,
49 |     "LABEL_14": 14,
50 |     "LABEL_15": 15,
51 |     "LABEL_16": 16,
52 |     "LABEL_17": 17,
53 |     "LABEL_18": 18,
54 |     "LABEL_19": 19,
55 |     "LABEL_2": 2,
56 |     "LABEL_20": 20,
57 |     "LABEL_21": 21,
58 |     "LABEL_22": 22,
59 |     "LABEL_23": 23,
60 |     "LABEL_24": 24,
61 |     "LABEL_25": 25,
62 |     "LABEL_26": 26,
63 |     "LABEL_27": 27,
64 |     "LABEL_3": 3,
65 |     "LABEL_4": 4,
66 |     "LABEL_5": 5,
67 |     "LABEL_6": 6,
68 |     "LABEL_7": 7,
69 |     "LABEL_8": 8,
70 |     "LABEL_9": 9
71 |   },
72 |   "layer_norm_eps": 1e-12,
73 |   "max_position_embeddings": 512,
74 |   "model_type": "bert",
75 |   "num_attention_heads": 12,
76 |   "num_hidden_layers": 12,
77 |   "pad_token_id": 0,
78 |   "type_vocab_size": 2,
79 |   "vocab_size": 30522
80 | }
81 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/best_model/eval_results.txt:
--------------------------------------------------------------------------------
1 | eval_loss = 0.002526673533094044
2 | mcc = 1.0
3 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/best_model/model_args.json:
--------------------------------------------------------------------------------
1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/curekart_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/curekart_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/curekart_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 156, "weight_decay": 0, "labels_list": ["CALL_CENTER", "CANCEL_ORDER", "CHAT_WITH_AGENT", "CHECK_PINCODE", "CONSULT_START", "DELAY_IN_PARCEL", "EXPIRY_DATE", "FRANCHISE", "IMMUNITY", "INTERNATIONAL_SHIPPING", "MODES_OF_PAYMENTS", "MODIFY_ADDRESS", "ORDER_QUERY", "ORDER_STATUS", "ORDER_TAKING", "ORIGINAL_PRODUCT", "PAYMENT_AND_BILL", "PORTAL_ISSUE", "RECOMMEND_PRODUCT", "REFER_EARN", "REFUNDS_RETURNS_REPLACEMENTS", "RESUME_DELIVERY", "SIDE_EFFECT", "SIGN_UP", "START_OVER", "STORE_INFORMATION", "USER_GOAL_FORM", "WORK_FROM_HOME"], "labels_map": {"CALL_CENTER": 0, "CANCEL_ORDER": 1, "CHAT_WITH_AGENT": 2, "CHECK_PINCODE": 3, "CONSULT_START": 4, "DELAY_IN_PARCEL": 5, "EXPIRY_DATE": 6, "FRANCHISE": 7, "IMMUNITY": 8, "INTERNATIONAL_SHIPPING": 9, "MODES_OF_PAYMENTS": 10, "MODIFY_ADDRESS": 11, "ORDER_QUERY": 12, "ORDER_STATUS": 13, "ORDER_TAKING": 14, "ORIGINAL_PRODUCT": 15, "PAYMENT_AND_BILL": 16, "PORTAL_ISSUE": 17, "RECOMMEND_PRODUCT": 18, "REFER_EARN": 19, "REFUNDS_RETURNS_REPLACEMENTS": 20, "RESUME_DELIVERY": 21, "SIDE_EFFECT": 22, "SIGN_UP": 23, "START_OVER": 24, "STORE_INFORMATION": 25, "USER_GOAL_FORM": 26, "WORK_FROM_HOME": 27}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/best_model/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/best_model/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"do_lower_case": true, "model_max_length": 512}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/best_model/training_args.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/curekart_subset/best_model/training_args.bin


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertForSequenceClassification"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "gradient_checkpointing": false,
 7 |   "hidden_act": "gelu",
 8 |   "hidden_dropout_prob": 0.1,
 9 |   "hidden_size": 768,
10 |   "id2label": {
11 |     "0": "LABEL_0",
12 |     "1": "LABEL_1",
13 |     "2": "LABEL_2",
14 |     "3": "LABEL_3",
15 |     "4": "LABEL_4",
16 |     "5": "LABEL_5",
17 |     "6": "LABEL_6",
18 |     "7": "LABEL_7",
19 |     "8": "LABEL_8",
20 |     "9": "LABEL_9",
21 |     "10": "LABEL_10",
22 |     "11": "LABEL_11",
23 |     "12": "LABEL_12",
24 |     "13": "LABEL_13",
25 |     "14": "LABEL_14",
26 |     "15": "LABEL_15",
27 |     "16": "LABEL_16",
28 |     "17": "LABEL_17",
29 |     "18": "LABEL_18",
30 |     "19": "LABEL_19",
31 |     "20": "LABEL_20",
32 |     "21": "LABEL_21",
33 |     "22": "LABEL_22",
34 |     "23": "LABEL_23",
35 |     "24": "LABEL_24",
36 |     "25": "LABEL_25",
37 |     "26": "LABEL_26",
38 |     "27": "LABEL_27"
39 |   },
40 |   "initializer_range": 0.02,
41 |   "intermediate_size": 3072,
42 |   "label2id": {
43 |     "LABEL_0": 0,
44 |     "LABEL_1": 1,
45 |     "LABEL_10": 10,
46 |     "LABEL_11": 11,
47 |     "LABEL_12": 12,
48 |     "LABEL_13": 13,
49 |     "LABEL_14": 14,
50 |     "LABEL_15": 15,
51 |     "LABEL_16": 16,
52 |     "LABEL_17": 17,
53 |     "LABEL_18": 18,
54 |     "LABEL_19": 19,
55 |     "LABEL_2": 2,
56 |     "LABEL_20": 20,
57 |     "LABEL_21": 21,
58 |     "LABEL_22": 22,
59 |     "LABEL_23": 23,
60 |     "LABEL_24": 24,
61 |     "LABEL_25": 25,
62 |     "LABEL_26": 26,
63 |     "LABEL_27": 27,
64 |     "LABEL_3": 3,
65 |     "LABEL_4": 4,
66 |     "LABEL_5": 5,
67 |     "LABEL_6": 6,
68 |     "LABEL_7": 7,
69 |     "LABEL_8": 8,
70 |     "LABEL_9": 9
71 |   },
72 |   "layer_norm_eps": 1e-12,
73 |   "max_position_embeddings": 512,
74 |   "model_type": "bert",
75 |   "num_attention_heads": 12,
76 |   "num_hidden_layers": 12,
77 |   "pad_token_id": 0,
78 |   "type_vocab_size": 2,
79 |   "vocab_size": 30522
80 | }
81 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/eval_results.txt:
--------------------------------------------------------------------------------
1 | eval_loss = 0.0021807193876996348
2 | mcc = 1.0
3 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/model_args.json:
--------------------------------------------------------------------------------
1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/curekart_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/curekart_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/curekart_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 156, "weight_decay": 0, "labels_list": ["CALL_CENTER", "CANCEL_ORDER", "CHAT_WITH_AGENT", "CHECK_PINCODE", "CONSULT_START", "DELAY_IN_PARCEL", "EXPIRY_DATE", "FRANCHISE", "IMMUNITY", "INTERNATIONAL_SHIPPING", "MODES_OF_PAYMENTS", "MODIFY_ADDRESS", "ORDER_QUERY", "ORDER_STATUS", "ORDER_TAKING", "ORIGINAL_PRODUCT", "PAYMENT_AND_BILL", "PORTAL_ISSUE", "RECOMMEND_PRODUCT", "REFER_EARN", "REFUNDS_RETURNS_REPLACEMENTS", "RESUME_DELIVERY", "SIDE_EFFECT", "SIGN_UP", "START_OVER", "STORE_INFORMATION", "USER_GOAL_FORM", "WORK_FROM_HOME"], "labels_map": {"CALL_CENTER": 0, "CANCEL_ORDER": 1, "CHAT_WITH_AGENT": 2, "CHECK_PINCODE": 3, "CONSULT_START": 4, "DELAY_IN_PARCEL": 5, "EXPIRY_DATE": 6, "FRANCHISE": 7, "IMMUNITY": 8, "INTERNATIONAL_SHIPPING": 9, "MODES_OF_PAYMENTS": 10, "MODIFY_ADDRESS": 11, "ORDER_QUERY": 12, "ORDER_STATUS": 13, "ORDER_TAKING": 14, "ORIGINAL_PRODUCT": 15, "PAYMENT_AND_BILL": 16, "PORTAL_ISSUE": 17, "RECOMMEND_PRODUCT": 18, "REFER_EARN": 19, "REFUNDS_RETURNS_REPLACEMENTS": 20, "RESUME_DELIVERY": 21, "SIDE_EFFECT": 22, "SIGN_UP": 23, "START_OVER": 24, "STORE_INFORMATION": 25, "USER_GOAL_FORM": 26, "WORK_FROM_HOME": 27}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/tblogs/events.out.tfevents.1597351105.haptik-ai-research-mum-ml-2-vm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/curekart_subset/tblogs/events.out.tfevents.1597351105.haptik-ai-research-mum-ml-2-vm


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"do_lower_case": true, "model_max_length": 512}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/training_args.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/curekart_subset/training_args.bin


--------------------------------------------------------------------------------
/platforms/bert/bert_models/curekart_subset/training_progress_scores.csv:
--------------------------------------------------------------------------------
 1 | global_step,mcc,train_loss,eval_loss
 2 | 52,0.015618999829071147,3.3157989978790283,3.295060153190906
 3 | 100,0.15194244760341802,3.0897583961486816,2.9831165144076714
 4 | 104,0.1341155552042914,3.1407852172851562,2.9660061368575463
 5 | 156,0.6815403439984585,2.89314603805542,2.3054693043231964
 6 | 200,0.8222967981450889,1.9050589799880981,1.7316755652427673
 7 | 208,0.9141834131268949,1.519261360168457,1.533938548885859
 8 | 260,0.9660890899536336,1.3009628057479858,0.8027959448786882
 9 | 300,0.9870062869068875,0.8840746879577637,0.44703088586147016
10 | 312,0.9947941467363587,0.6657969355583191,0.38270918279886246
11 | 364,0.9817915970321776,0.1275816559791565,0.214721856208948
12 | 400,0.9922147550894792,0.25742307305336,0.12980968003662732
13 | 416,1.0,0.13560621440410614,0.09332964519182077
14 | 468,1.0,0.03233667090535164,0.043612775536110766
15 | 500,1.0,0.0301946010440588,0.030464474362536118
16 | 520,1.0,0.02194424904882908,0.0229594346243315
17 | 572,1.0,0.014761185273528099,0.01612605733008912
18 | 600,1.0,0.015239364467561245,0.01403906225011899
19 | 624,1.0,0.011298744939267635,0.012511847576556297
20 | 676,1.0,0.009137298911809921,0.010069010976272134
21 | 700,1.0,0.009275787509977818,0.009287668401017211
22 | 728,1.0,0.010665534995496273,0.00839059384396443
23 | 780,1.0,0.01529514417052269,0.007350107448963592
24 | 800,1.0,0.009811767376959324,0.007007549937742834
25 | 832,1.0,0.008346081711351871,0.006515947215330715
26 | 884,1.0,0.006207848433405161,0.005841879720369784
27 | 900,1.0,0.007257106713950634,0.005661116948781105
28 | 936,1.0,0.0076172430999577045,0.00529962131090892
29 | 988,1.0,0.005975247826427221,0.004852061746462893
30 | 1000,1.0,0.0038416371680796146,0.0047596723306924105
31 | 1040,1.0,0.005082871299237013,0.004475821105118554
32 | 1092,1.0,0.003949825186282396,0.004150250917658783
33 | 1100,1.0,0.004515018314123154,0.004103761085738929
34 | 1144,1.0,0.00412968173623085,0.003867794764049065
35 | 1196,1.0,0.0031884999480098486,0.0036216092412360013
36 | 1200,1.0,0.004237225279211998,0.003604552045894357
37 | 1248,1.0,0.004021225031465292,0.0034063787136871656
38 | 1300,1.0,0.0037145880050957203,0.003216816734773322
39 | 1300,1.0,0.0037145880050957203,0.003216816734773322
40 | 1352,1.0,0.0035830303095281124,0.0030451807408378674
41 | 1400,1.0,0.002926103537902236,0.002906436136422249
42 | 1404,1.0,0.004418825265020132,0.0028954993781203833
43 | 1456,1.0,0.003054356202483177,0.0027609795327477446
44 | 1500,1.0,0.002867345931008458,0.002655705150503379
45 | 1508,1.0,0.0023572815116494894,0.0026374717507188995
46 | 1560,1.0,0.0022416089195758104,0.002526673533094044
47 | 1600,1.0,0.002718471921980381,0.0024489216000522273
48 | 1612,1.0,0.002809862606227398,0.002426815902044137
49 | 1664,1.0,0.0025117190089076757,0.0023362424690276384
50 | 1700,1.0,0.0023699230514466763,0.002280304313619406
51 | 1716,1.0,0.003509317059069872,0.0022554639391063783
52 | 1768,1.0,0.002868139650672674,0.0021807193876996348
53 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/best_model/config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "architectures": [
  3 |     "BertForSequenceClassification"
  4 |   ],
  5 |   "attention_probs_dropout_prob": 0.1,
  6 |   "gradient_checkpointing": false,
  7 |   "hidden_act": "gelu",
  8 |   "hidden_dropout_prob": 0.1,
  9 |   "hidden_size": 768,
 10 |   "id2label": {
 11 |     "0": "LABEL_0",
 12 |     "1": "LABEL_1",
 13 |     "2": "LABEL_2",
 14 |     "3": "LABEL_3",
 15 |     "4": "LABEL_4",
 16 |     "5": "LABEL_5",
 17 |     "6": "LABEL_6",
 18 |     "7": "LABEL_7",
 19 |     "8": "LABEL_8",
 20 |     "9": "LABEL_9",
 21 |     "10": "LABEL_10",
 22 |     "11": "LABEL_11",
 23 |     "12": "LABEL_12",
 24 |     "13": "LABEL_13",
 25 |     "14": "LABEL_14",
 26 |     "15": "LABEL_15",
 27 |     "16": "LABEL_16",
 28 |     "17": "LABEL_17",
 29 |     "18": "LABEL_18",
 30 |     "19": "LABEL_19",
 31 |     "20": "LABEL_20",
 32 |     "21": "LABEL_21",
 33 |     "22": "LABEL_22",
 34 |     "23": "LABEL_23",
 35 |     "24": "LABEL_24",
 36 |     "25": "LABEL_25",
 37 |     "26": "LABEL_26",
 38 |     "27": "LABEL_27",
 39 |     "28": "LABEL_28",
 40 |     "29": "LABEL_29",
 41 |     "30": "LABEL_30",
 42 |     "31": "LABEL_31",
 43 |     "32": "LABEL_32",
 44 |     "33": "LABEL_33",
 45 |     "34": "LABEL_34",
 46 |     "35": "LABEL_35",
 47 |     "36": "LABEL_36",
 48 |     "37": "LABEL_37",
 49 |     "38": "LABEL_38",
 50 |     "39": "LABEL_39",
 51 |     "40": "LABEL_40",
 52 |     "41": "LABEL_41",
 53 |     "42": "LABEL_42",
 54 |     "43": "LABEL_43",
 55 |     "44": "LABEL_44",
 56 |     "45": "LABEL_45",
 57 |     "46": "LABEL_46",
 58 |     "47": "LABEL_47",
 59 |     "48": "LABEL_48",
 60 |     "49": "LABEL_49",
 61 |     "50": "LABEL_50",
 62 |     "51": "LABEL_51",
 63 |     "52": "LABEL_52",
 64 |     "53": "LABEL_53",
 65 |     "54": "LABEL_54",
 66 |     "55": "LABEL_55",
 67 |     "56": "LABEL_56",
 68 |     "57": "LABEL_57",
 69 |     "58": "LABEL_58"
 70 |   },
 71 |   "initializer_range": 0.02,
 72 |   "intermediate_size": 3072,
 73 |   "label2id": {
 74 |     "LABEL_0": 0,
 75 |     "LABEL_1": 1,
 76 |     "LABEL_10": 10,
 77 |     "LABEL_11": 11,
 78 |     "LABEL_12": 12,
 79 |     "LABEL_13": 13,
 80 |     "LABEL_14": 14,
 81 |     "LABEL_15": 15,
 82 |     "LABEL_16": 16,
 83 |     "LABEL_17": 17,
 84 |     "LABEL_18": 18,
 85 |     "LABEL_19": 19,
 86 |     "LABEL_2": 2,
 87 |     "LABEL_20": 20,
 88 |     "LABEL_21": 21,
 89 |     "LABEL_22": 22,
 90 |     "LABEL_23": 23,
 91 |     "LABEL_24": 24,
 92 |     "LABEL_25": 25,
 93 |     "LABEL_26": 26,
 94 |     "LABEL_27": 27,
 95 |     "LABEL_28": 28,
 96 |     "LABEL_29": 29,
 97 |     "LABEL_3": 3,
 98 |     "LABEL_30": 30,
 99 |     "LABEL_31": 31,
100 |     "LABEL_32": 32,
101 |     "LABEL_33": 33,
102 |     "LABEL_34": 34,
103 |     "LABEL_35": 35,
104 |     "LABEL_36": 36,
105 |     "LABEL_37": 37,
106 |     "LABEL_38": 38,
107 |     "LABEL_39": 39,
108 |     "LABEL_4": 4,
109 |     "LABEL_40": 40,
110 |     "LABEL_41": 41,
111 |     "LABEL_42": 42,
112 |     "LABEL_43": 43,
113 |     "LABEL_44": 44,
114 |     "LABEL_45": 45,
115 |     "LABEL_46": 46,
116 |     "LABEL_47": 47,
117 |     "LABEL_48": 48,
118 |     "LABEL_49": 49,
119 |     "LABEL_5": 5,
120 |     "LABEL_50": 50,
121 |     "LABEL_51": 51,
122 |     "LABEL_52": 52,
123 |     "LABEL_53": 53,
124 |     "LABEL_54": 54,
125 |     "LABEL_55": 55,
126 |     "LABEL_56": 56,
127 |     "LABEL_57": 57,
128 |     "LABEL_58": 58,
129 |     "LABEL_6": 6,
130 |     "LABEL_7": 7,
131 |     "LABEL_8": 8,
132 |     "LABEL_9": 9
133 |   },
134 |   "layer_norm_eps": 1e-12,
135 |   "max_position_embeddings": 512,
136 |   "model_type": "bert",
137 |   "num_attention_heads": 12,
138 |   "num_hidden_layers": 12,
139 |   "pad_token_id": 0,
140 |   "type_vocab_size": 2,
141 |   "vocab_size": 30522
142 | }
143 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/best_model/eval_results.txt:
--------------------------------------------------------------------------------
1 | eval_loss = 0.004777703106687483
2 | mcc = 1.0
3 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/best_model/model_args.json:
--------------------------------------------------------------------------------
1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/powerplay11/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/powerplay11/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/powerplay11/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 177, "weight_decay": 0, "labels_list": ["ACCOUNT_BALANCE_DEDUCTED", "ACCOUNT_NOT_VERIFIED", "ACCOUNT_RESET", "APPRECIATION", "BANK_VERIFICATION_DETAILS", "CANNOT_SEE_JOINED_CONTESTS", "CAPABILITIES", "CASH_BONUS", "CASH_BONUS_EXPIRY", "CHANGE_BANK_ACCOUNT", "CHANGE_MOBILE_NUMBER", "CHANGE_PROFILE_TEAM_DETAILS", "CHAT_WITH_AN_AGENT", "CHECK_DEPOSIT_STATUS", "CHECK_WALLET_BALANCE", "CONTACT_NUMBER", "CRITICISM", "DEDUCTED_AMOUNT_NOT_RECEIVED", "DELETE_PAN_CARD", "DOWNLOAD_POWERPLAY11", "FAIRPLAY_VIOLATIONS", "FAKE_TEAMS", "FEEDBACK", "GREETINGS_DAY", "HOW_POINTS_CALCULATED", "HOW_TO_PLAY", "INSTANT_WITHDRAWAL", "JOIN_CONTEST", "LESS_WINNINGS_AMOUNT", "MATCH_ABANDONED", "NEW_TEAM_PATTERN", "NO_EMAIL_CONFIRMATION", "OFFERS_AND_REFERRALS", "PAN_VERIFICATION_FAILED", "POINTS_NOT_UPDATED", "PRESENCE", "REFUND_OF_ADDED_CASH", "REFUND_OF_WRONG_AMOUNT", "SIGNUP_BONUS", "TAXES_ON_WINNINGS", "TEAM_DEADLINE", "THANKS", "TYPES_BONUS", "TYPES_CONTESTS", "UNUTILIZED_MONEY", "UPDATE_APP", "VERIFY_EMAIL", "VERIFY_MOBILE", "VERIFY_PAN", "WHAT_IF_THERES_A_TIE", "WHEN_SCORES_UPDATED", "WHEN_WINNINGS_DISTRIBUTED", "WHY_VERIFY", "WINNINGS", "WITHDRAWAL_INTRO", "WITHDRAWAL_STATUS", "WITHDRAWAL_TIME", "WITHDRAW_CASH_BONUS", "WRONG_SCORES"], "labels_map": {"ACCOUNT_BALANCE_DEDUCTED": 0, "ACCOUNT_NOT_VERIFIED": 1, "ACCOUNT_RESET": 2, "APPRECIATION": 3, "BANK_VERIFICATION_DETAILS": 4, "CANNOT_SEE_JOINED_CONTESTS": 5, "CAPABILITIES": 6, "CASH_BONUS": 7, "CASH_BONUS_EXPIRY": 8, "CHANGE_BANK_ACCOUNT": 9, "CHANGE_MOBILE_NUMBER": 10, "CHANGE_PROFILE_TEAM_DETAILS": 11, "CHAT_WITH_AN_AGENT": 12, "CHECK_DEPOSIT_STATUS": 13, "CHECK_WALLET_BALANCE": 14, "CONTACT_NUMBER": 15, "CRITICISM": 16, "DEDUCTED_AMOUNT_NOT_RECEIVED": 17, "DELETE_PAN_CARD": 18, "DOWNLOAD_POWERPLAY11": 19, "FAIRPLAY_VIOLATIONS": 20, "FAKE_TEAMS": 21, "FEEDBACK": 22, "GREETINGS_DAY": 23, "HOW_POINTS_CALCULATED": 24, "HOW_TO_PLAY": 25, "INSTANT_WITHDRAWAL": 26, "JOIN_CONTEST": 27, "LESS_WINNINGS_AMOUNT": 28, "MATCH_ABANDONED": 29, "NEW_TEAM_PATTERN": 30, "NO_EMAIL_CONFIRMATION": 31, "OFFERS_AND_REFERRALS": 32, "PAN_VERIFICATION_FAILED": 33, "POINTS_NOT_UPDATED": 34, "PRESENCE": 35, "REFUND_OF_ADDED_CASH": 36, "REFUND_OF_WRONG_AMOUNT": 37, "SIGNUP_BONUS": 38, "TAXES_ON_WINNINGS": 39, "TEAM_DEADLINE": 40, "THANKS": 41, "TYPES_BONUS": 42, "TYPES_CONTESTS": 43, "UNUTILIZED_MONEY": 44, "UPDATE_APP": 45, "VERIFY_EMAIL": 46, "VERIFY_MOBILE": 47, "VERIFY_PAN": 48, "WHAT_IF_THERES_A_TIE": 49, "WHEN_SCORES_UPDATED": 50, "WHEN_WINNINGS_DISTRIBUTED": 51, "WHY_VERIFY": 52, "WINNINGS": 53, "WITHDRAWAL_INTRO": 54, "WITHDRAWAL_STATUS": 55, "WITHDRAWAL_TIME": 56, "WITHDRAW_CASH_BONUS": 57, "WRONG_SCORES": 58}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/best_model/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/best_model/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"do_lower_case": true, "model_max_length": 512}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/best_model/training_args.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11/best_model/training_args.bin


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "architectures": [
  3 |     "BertForSequenceClassification"
  4 |   ],
  5 |   "attention_probs_dropout_prob": 0.1,
  6 |   "gradient_checkpointing": false,
  7 |   "hidden_act": "gelu",
  8 |   "hidden_dropout_prob": 0.1,
  9 |   "hidden_size": 768,
 10 |   "id2label": {
 11 |     "0": "LABEL_0",
 12 |     "1": "LABEL_1",
 13 |     "2": "LABEL_2",
 14 |     "3": "LABEL_3",
 15 |     "4": "LABEL_4",
 16 |     "5": "LABEL_5",
 17 |     "6": "LABEL_6",
 18 |     "7": "LABEL_7",
 19 |     "8": "LABEL_8",
 20 |     "9": "LABEL_9",
 21 |     "10": "LABEL_10",
 22 |     "11": "LABEL_11",
 23 |     "12": "LABEL_12",
 24 |     "13": "LABEL_13",
 25 |     "14": "LABEL_14",
 26 |     "15": "LABEL_15",
 27 |     "16": "LABEL_16",
 28 |     "17": "LABEL_17",
 29 |     "18": "LABEL_18",
 30 |     "19": "LABEL_19",
 31 |     "20": "LABEL_20",
 32 |     "21": "LABEL_21",
 33 |     "22": "LABEL_22",
 34 |     "23": "LABEL_23",
 35 |     "24": "LABEL_24",
 36 |     "25": "LABEL_25",
 37 |     "26": "LABEL_26",
 38 |     "27": "LABEL_27",
 39 |     "28": "LABEL_28",
 40 |     "29": "LABEL_29",
 41 |     "30": "LABEL_30",
 42 |     "31": "LABEL_31",
 43 |     "32": "LABEL_32",
 44 |     "33": "LABEL_33",
 45 |     "34": "LABEL_34",
 46 |     "35": "LABEL_35",
 47 |     "36": "LABEL_36",
 48 |     "37": "LABEL_37",
 49 |     "38": "LABEL_38",
 50 |     "39": "LABEL_39",
 51 |     "40": "LABEL_40",
 52 |     "41": "LABEL_41",
 53 |     "42": "LABEL_42",
 54 |     "43": "LABEL_43",
 55 |     "44": "LABEL_44",
 56 |     "45": "LABEL_45",
 57 |     "46": "LABEL_46",
 58 |     "47": "LABEL_47",
 59 |     "48": "LABEL_48",
 60 |     "49": "LABEL_49",
 61 |     "50": "LABEL_50",
 62 |     "51": "LABEL_51",
 63 |     "52": "LABEL_52",
 64 |     "53": "LABEL_53",
 65 |     "54": "LABEL_54",
 66 |     "55": "LABEL_55",
 67 |     "56": "LABEL_56",
 68 |     "57": "LABEL_57",
 69 |     "58": "LABEL_58"
 70 |   },
 71 |   "initializer_range": 0.02,
 72 |   "intermediate_size": 3072,
 73 |   "label2id": {
 74 |     "LABEL_0": 0,
 75 |     "LABEL_1": 1,
 76 |     "LABEL_10": 10,
 77 |     "LABEL_11": 11,
 78 |     "LABEL_12": 12,
 79 |     "LABEL_13": 13,
 80 |     "LABEL_14": 14,
 81 |     "LABEL_15": 15,
 82 |     "LABEL_16": 16,
 83 |     "LABEL_17": 17,
 84 |     "LABEL_18": 18,
 85 |     "LABEL_19": 19,
 86 |     "LABEL_2": 2,
 87 |     "LABEL_20": 20,
 88 |     "LABEL_21": 21,
 89 |     "LABEL_22": 22,
 90 |     "LABEL_23": 23,
 91 |     "LABEL_24": 24,
 92 |     "LABEL_25": 25,
 93 |     "LABEL_26": 26,
 94 |     "LABEL_27": 27,
 95 |     "LABEL_28": 28,
 96 |     "LABEL_29": 29,
 97 |     "LABEL_3": 3,
 98 |     "LABEL_30": 30,
 99 |     "LABEL_31": 31,
100 |     "LABEL_32": 32,
101 |     "LABEL_33": 33,
102 |     "LABEL_34": 34,
103 |     "LABEL_35": 35,
104 |     "LABEL_36": 36,
105 |     "LABEL_37": 37,
106 |     "LABEL_38": 38,
107 |     "LABEL_39": 39,
108 |     "LABEL_4": 4,
109 |     "LABEL_40": 40,
110 |     "LABEL_41": 41,
111 |     "LABEL_42": 42,
112 |     "LABEL_43": 43,
113 |     "LABEL_44": 44,
114 |     "LABEL_45": 45,
115 |     "LABEL_46": 46,
116 |     "LABEL_47": 47,
117 |     "LABEL_48": 48,
118 |     "LABEL_49": 49,
119 |     "LABEL_5": 5,
120 |     "LABEL_50": 50,
121 |     "LABEL_51": 51,
122 |     "LABEL_52": 52,
123 |     "LABEL_53": 53,
124 |     "LABEL_54": 54,
125 |     "LABEL_55": 55,
126 |     "LABEL_56": 56,
127 |     "LABEL_57": 57,
128 |     "LABEL_58": 58,
129 |     "LABEL_6": 6,
130 |     "LABEL_7": 7,
131 |     "LABEL_8": 8,
132 |     "LABEL_9": 9
133 |   },
134 |   "layer_norm_eps": 1e-12,
135 |   "max_position_embeddings": 512,
136 |   "model_type": "bert",
137 |   "num_attention_heads": 12,
138 |   "num_hidden_layers": 12,
139 |   "pad_token_id": 0,
140 |   "type_vocab_size": 2,
141 |   "vocab_size": 30522
142 | }
143 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/eval_results.txt:
--------------------------------------------------------------------------------
1 | eval_loss = 0.004436580420834786
2 | mcc = 1.0
3 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/model_args.json:
--------------------------------------------------------------------------------
1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/powerplay11/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/powerplay11/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/powerplay11/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 177, "weight_decay": 0, "labels_list": ["ACCOUNT_BALANCE_DEDUCTED", "ACCOUNT_NOT_VERIFIED", "ACCOUNT_RESET", "APPRECIATION", "BANK_VERIFICATION_DETAILS", "CANNOT_SEE_JOINED_CONTESTS", "CAPABILITIES", "CASH_BONUS", "CASH_BONUS_EXPIRY", "CHANGE_BANK_ACCOUNT", "CHANGE_MOBILE_NUMBER", "CHANGE_PROFILE_TEAM_DETAILS", "CHAT_WITH_AN_AGENT", "CHECK_DEPOSIT_STATUS", "CHECK_WALLET_BALANCE", "CONTACT_NUMBER", "CRITICISM", "DEDUCTED_AMOUNT_NOT_RECEIVED", "DELETE_PAN_CARD", "DOWNLOAD_POWERPLAY11", "FAIRPLAY_VIOLATIONS", "FAKE_TEAMS", "FEEDBACK", "GREETINGS_DAY", "HOW_POINTS_CALCULATED", "HOW_TO_PLAY", "INSTANT_WITHDRAWAL", "JOIN_CONTEST", "LESS_WINNINGS_AMOUNT", "MATCH_ABANDONED", "NEW_TEAM_PATTERN", "NO_EMAIL_CONFIRMATION", "OFFERS_AND_REFERRALS", "PAN_VERIFICATION_FAILED", "POINTS_NOT_UPDATED", "PRESENCE", "REFUND_OF_ADDED_CASH", "REFUND_OF_WRONG_AMOUNT", "SIGNUP_BONUS", "TAXES_ON_WINNINGS", "TEAM_DEADLINE", "THANKS", "TYPES_BONUS", "TYPES_CONTESTS", "UNUTILIZED_MONEY", "UPDATE_APP", "VERIFY_EMAIL", "VERIFY_MOBILE", "VERIFY_PAN", "WHAT_IF_THERES_A_TIE", "WHEN_SCORES_UPDATED", "WHEN_WINNINGS_DISTRIBUTED", "WHY_VERIFY", "WINNINGS", "WITHDRAWAL_INTRO", "WITHDRAWAL_STATUS", "WITHDRAWAL_TIME", "WITHDRAW_CASH_BONUS", "WRONG_SCORES"], "labels_map": {"ACCOUNT_BALANCE_DEDUCTED": 0, "ACCOUNT_NOT_VERIFIED": 1, "ACCOUNT_RESET": 2, "APPRECIATION": 3, "BANK_VERIFICATION_DETAILS": 4, "CANNOT_SEE_JOINED_CONTESTS": 5, "CAPABILITIES": 6, "CASH_BONUS": 7, "CASH_BONUS_EXPIRY": 8, "CHANGE_BANK_ACCOUNT": 9, "CHANGE_MOBILE_NUMBER": 10, "CHANGE_PROFILE_TEAM_DETAILS": 11, "CHAT_WITH_AN_AGENT": 12, "CHECK_DEPOSIT_STATUS": 13, "CHECK_WALLET_BALANCE": 14, "CONTACT_NUMBER": 15, "CRITICISM": 16, "DEDUCTED_AMOUNT_NOT_RECEIVED": 17, "DELETE_PAN_CARD": 18, "DOWNLOAD_POWERPLAY11": 19, "FAIRPLAY_VIOLATIONS": 20, "FAKE_TEAMS": 21, "FEEDBACK": 22, "GREETINGS_DAY": 23, "HOW_POINTS_CALCULATED": 24, "HOW_TO_PLAY": 25, "INSTANT_WITHDRAWAL": 26, "JOIN_CONTEST": 27, "LESS_WINNINGS_AMOUNT": 28, "MATCH_ABANDONED": 29, "NEW_TEAM_PATTERN": 30, "NO_EMAIL_CONFIRMATION": 31, "OFFERS_AND_REFERRALS": 32, "PAN_VERIFICATION_FAILED": 33, "POINTS_NOT_UPDATED": 34, "PRESENCE": 35, "REFUND_OF_ADDED_CASH": 36, "REFUND_OF_WRONG_AMOUNT": 37, "SIGNUP_BONUS": 38, "TAXES_ON_WINNINGS": 39, "TEAM_DEADLINE": 40, "THANKS": 41, "TYPES_BONUS": 42, "TYPES_CONTESTS": 43, "UNUTILIZED_MONEY": 44, "UPDATE_APP": 45, "VERIFY_EMAIL": 46, "VERIFY_MOBILE": 47, "VERIFY_PAN": 48, "WHAT_IF_THERES_A_TIE": 49, "WHEN_SCORES_UPDATED": 50, "WHEN_WINNINGS_DISTRIBUTED": 51, "WHY_VERIFY": 52, "WINNINGS": 53, "WITHDRAWAL_INTRO": 54, "WITHDRAWAL_STATUS": 55, "WITHDRAWAL_TIME": 56, "WITHDRAW_CASH_BONUS": 57, "WRONG_SCORES": 58}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/tblogs/events.out.tfevents.1597160596.haptik-ai-research-mum-ml-2-vm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11/tblogs/events.out.tfevents.1597160596.haptik-ai-research-mum-ml-2-vm


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"do_lower_case": true, "model_max_length": 512}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/training_args.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11/training_args.bin


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11/training_progress_scores.csv:
--------------------------------------------------------------------------------
 1 | global_step,mcc,train_loss,eval_loss
 2 | 59,0.027858490914640408,4.28368616104126,4.027746014675851
 3 | 100,0.1388860375009059,4.070891857147217,3.8102310673665194
 4 | 118,0.24644621058859223,3.6875147819519043,3.638863547373626
 5 | 177,0.631254500370442,3.256054639816284,3.030882439370883
 6 | 200,0.8061529798397276,2.834552049636841,2.6386114864026085
 7 | 236,0.8573551743063274,1.9863373041152954,2.184069906250905
 8 | 295,0.9221981792365874,1.7925243377685547,1.4907570501505318
 9 | 300,0.9200168612282212,2.2602338790893555,1.4633160819441586
10 | 354,0.9825407935361834,1.0775566101074219,0.9389180697626986
11 | 400,0.9912666800868809,0.5885568261146545,0.6571497010477518
12 | 413,0.9890793756226487,1.1089935302734375,0.5821901269383349
13 | 472,0.9956353411596269,0.22419719398021698,0.37195264686972407
14 | 500,0.9978154170392022,0.2659372389316559,0.3241557360200559
15 | 531,0.9978153375284966,0.14318816363811493,0.24539439506449942
16 | 590,0.9956353411596269,0.9525749683380127,0.17348836974824888
17 | 600,0.9956353411596269,0.07537608593702316,0.16977044946308864
18 | 649,1.0,0.08004013448953629,0.12424232431892622
19 | 700,1.0,0.11937856674194336,0.09923130501124819
20 | 708,1.0,0.8454639911651611,0.09593275220969975
21 | 767,1.0,0.04086548089981079,0.07504452547153175
22 | 800,1.0,0.048212651163339615,0.0641876907261499
23 | 826,1.0,0.023425769060850143,0.057786518834152464
24 | 885,1.0,0.019573189318180084,0.04410979456378747
25 | 900,1.0,0.03591879829764366,0.042635942493581165
26 | 944,1.0,0.01790086179971695,0.036145012320603354
27 | 1000,1.0,0.024176493287086487,0.031013001214270874
28 | 1003,1.0,0.016845354810357094,0.03092983246670436
29 | 1062,1.0,0.017478376626968384,0.025017562306533427
30 | 1100,1.0,0.012325120158493519,0.02451390260041265
31 | 1121,1.0,0.014549019746482372,0.0232171771763745
32 | 1180,1.0,0.014574043452739716,0.020135768657644926
33 | 1200,1.0,0.13982988893985748,0.017196936019838363
34 | 1239,1.0,0.010516177862882614,0.0157944549553854
35 | 1298,1.0,0.014294502325356007,0.012959890919974295
36 | 1300,1.0,0.00912060122936964,0.012967476392385819
37 | 1357,1.0,0.007764187641441822,0.011520646465153007
38 | 1400,1.0,0.008232791908085346,0.010727511103249203
39 | 1416,1.0,0.008479280397295952,0.010446412244148679
40 | 1475,1.0,0.011196212843060493,0.009631002890103954
41 | 1500,1.0,0.033641137182712555,0.009016437732251519
42 | 1534,1.0,0.008253618143498898,0.008638874067277726
43 | 1593,1.0,0.020688096061348915,0.00793147428055941
44 | 1600,1.0,0.00887473113834858,0.00787849471730701
45 | 1652,1.0,0.025912323966622353,0.007468934810199475
46 | 1700,1.0,0.006430561188608408,0.007107750378485958
47 | 1711,1.0,0.007730090990662575,0.007032733473737361
48 | 1770,1.0,0.006430315785109997,0.006667172366563799
49 | 1800,1.0,0.00811198353767395,0.006497785089916344
50 | 1829,1.0,0.005642751231789589,0.006349041450263585
51 | 1888,1.0,0.006205701734870672,0.006077040227572039
52 | 1900,1.0,0.012514472007751465,0.006018065050307472
53 | 1947,1.0,0.005345325917005539,0.005813967006407299
54 | 2000,1.0,0.004667398054152727,0.005611742819043792
55 | 2006,1.0,0.005360324867069721,0.005591547732257237
56 | 2065,1.0,0.0050165653228759766,0.005392878351859369
57 | 2100,1.0,0.005425313953310251,0.005290802931255203
58 | 2124,1.0,0.014841246418654919,0.005215194995783396
59 | 2183,1.0,0.00473443791270256,0.005049212650253864
60 | 2200,1.0,0.005167374853044748,0.005006618553109594
61 | 2242,1.0,0.005450689699500799,0.004903205370498916
62 | 2300,1.0,0.004484163597226143,0.004777703106687483
63 | 2301,1.0,0.004542009439319372,0.0047756676341138655
64 | 2360,1.0,0.003938835114240646,0.0046628776945615725
65 | 2400,1.0,0.004314529709517956,0.00459044983500015
66 | 2419,1.0,0.004678010940551758,0.004558072354391975
67 | 2478,1.0,0.00308023183606565,0.004468135906667527
68 | 2500,1.0,0.01107504591345787,0.004436580420834786
69 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/best_model/config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "architectures": [
  3 |     "BertForSequenceClassification"
  4 |   ],
  5 |   "attention_probs_dropout_prob": 0.1,
  6 |   "gradient_checkpointing": false,
  7 |   "hidden_act": "gelu",
  8 |   "hidden_dropout_prob": 0.1,
  9 |   "hidden_size": 768,
 10 |   "id2label": {
 11 |     "0": "LABEL_0",
 12 |     "1": "LABEL_1",
 13 |     "2": "LABEL_2",
 14 |     "3": "LABEL_3",
 15 |     "4": "LABEL_4",
 16 |     "5": "LABEL_5",
 17 |     "6": "LABEL_6",
 18 |     "7": "LABEL_7",
 19 |     "8": "LABEL_8",
 20 |     "9": "LABEL_9",
 21 |     "10": "LABEL_10",
 22 |     "11": "LABEL_11",
 23 |     "12": "LABEL_12",
 24 |     "13": "LABEL_13",
 25 |     "14": "LABEL_14",
 26 |     "15": "LABEL_15",
 27 |     "16": "LABEL_16",
 28 |     "17": "LABEL_17",
 29 |     "18": "LABEL_18",
 30 |     "19": "LABEL_19",
 31 |     "20": "LABEL_20",
 32 |     "21": "LABEL_21",
 33 |     "22": "LABEL_22",
 34 |     "23": "LABEL_23",
 35 |     "24": "LABEL_24",
 36 |     "25": "LABEL_25",
 37 |     "26": "LABEL_26",
 38 |     "27": "LABEL_27",
 39 |     "28": "LABEL_28",
 40 |     "29": "LABEL_29",
 41 |     "30": "LABEL_30",
 42 |     "31": "LABEL_31",
 43 |     "32": "LABEL_32",
 44 |     "33": "LABEL_33",
 45 |     "34": "LABEL_34",
 46 |     "35": "LABEL_35",
 47 |     "36": "LABEL_36",
 48 |     "37": "LABEL_37",
 49 |     "38": "LABEL_38",
 50 |     "39": "LABEL_39",
 51 |     "40": "LABEL_40",
 52 |     "41": "LABEL_41",
 53 |     "42": "LABEL_42",
 54 |     "43": "LABEL_43",
 55 |     "44": "LABEL_44",
 56 |     "45": "LABEL_45",
 57 |     "46": "LABEL_46",
 58 |     "47": "LABEL_47",
 59 |     "48": "LABEL_48",
 60 |     "49": "LABEL_49",
 61 |     "50": "LABEL_50",
 62 |     "51": "LABEL_51",
 63 |     "52": "LABEL_52",
 64 |     "53": "LABEL_53",
 65 |     "54": "LABEL_54",
 66 |     "55": "LABEL_55",
 67 |     "56": "LABEL_56",
 68 |     "57": "LABEL_57",
 69 |     "58": "LABEL_58"
 70 |   },
 71 |   "initializer_range": 0.02,
 72 |   "intermediate_size": 3072,
 73 |   "label2id": {
 74 |     "LABEL_0": 0,
 75 |     "LABEL_1": 1,
 76 |     "LABEL_10": 10,
 77 |     "LABEL_11": 11,
 78 |     "LABEL_12": 12,
 79 |     "LABEL_13": 13,
 80 |     "LABEL_14": 14,
 81 |     "LABEL_15": 15,
 82 |     "LABEL_16": 16,
 83 |     "LABEL_17": 17,
 84 |     "LABEL_18": 18,
 85 |     "LABEL_19": 19,
 86 |     "LABEL_2": 2,
 87 |     "LABEL_20": 20,
 88 |     "LABEL_21": 21,
 89 |     "LABEL_22": 22,
 90 |     "LABEL_23": 23,
 91 |     "LABEL_24": 24,
 92 |     "LABEL_25": 25,
 93 |     "LABEL_26": 26,
 94 |     "LABEL_27": 27,
 95 |     "LABEL_28": 28,
 96 |     "LABEL_29": 29,
 97 |     "LABEL_3": 3,
 98 |     "LABEL_30": 30,
 99 |     "LABEL_31": 31,
100 |     "LABEL_32": 32,
101 |     "LABEL_33": 33,
102 |     "LABEL_34": 34,
103 |     "LABEL_35": 35,
104 |     "LABEL_36": 36,
105 |     "LABEL_37": 37,
106 |     "LABEL_38": 38,
107 |     "LABEL_39": 39,
108 |     "LABEL_4": 4,
109 |     "LABEL_40": 40,
110 |     "LABEL_41": 41,
111 |     "LABEL_42": 42,
112 |     "LABEL_43": 43,
113 |     "LABEL_44": 44,
114 |     "LABEL_45": 45,
115 |     "LABEL_46": 46,
116 |     "LABEL_47": 47,
117 |     "LABEL_48": 48,
118 |     "LABEL_49": 49,
119 |     "LABEL_5": 5,
120 |     "LABEL_50": 50,
121 |     "LABEL_51": 51,
122 |     "LABEL_52": 52,
123 |     "LABEL_53": 53,
124 |     "LABEL_54": 54,
125 |     "LABEL_55": 55,
126 |     "LABEL_56": 56,
127 |     "LABEL_57": 57,
128 |     "LABEL_58": 58,
129 |     "LABEL_6": 6,
130 |     "LABEL_7": 7,
131 |     "LABEL_8": 8,
132 |     "LABEL_9": 9
133 |   },
134 |   "layer_norm_eps": 1e-12,
135 |   "max_position_embeddings": 512,
136 |   "model_type": "bert",
137 |   "num_attention_heads": 12,
138 |   "num_hidden_layers": 12,
139 |   "pad_token_id": 0,
140 |   "type_vocab_size": 2,
141 |   "vocab_size": 30522
142 | }
143 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/best_model/eval_results.txt:
--------------------------------------------------------------------------------
1 | eval_loss = 0.015619493484722845
2 | mcc = 1.0
3 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/best_model/model_args.json:
--------------------------------------------------------------------------------
1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/powerplay11_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/powerplay11_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/powerplay11_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 99, "weight_decay": 0, "labels_list": ["ACCOUNT_BALANCE_DEDUCTED", "ACCOUNT_NOT_VERIFIED", "ACCOUNT_RESET", "APPRECIATION", "BANK_VERIFICATION_DETAILS", "CANNOT_SEE_JOINED_CONTESTS", "CAPABILITIES", "CASH_BONUS", "CASH_BONUS_EXPIRY", "CHANGE_BANK_ACCOUNT", "CHANGE_MOBILE_NUMBER", "CHANGE_PROFILE_TEAM_DETAILS", "CHAT_WITH_AN_AGENT", "CHECK_DEPOSIT_STATUS", "CHECK_WALLET_BALANCE", "CONTACT_NUMBER", "CRITICISM", "DEDUCTED_AMOUNT_NOT_RECEIVED", "DELETE_PAN_CARD", "DOWNLOAD_POWERPLAY11", "FAIRPLAY_VIOLATIONS", "FAKE_TEAMS", "FEEDBACK", "GREETINGS_DAY", "HOW_POINTS_CALCULATED", "HOW_TO_PLAY", "INSTANT_WITHDRAWAL", "JOIN_CONTEST", "LESS_WINNINGS_AMOUNT", "MATCH_ABANDONED", "NEW_TEAM_PATTERN", "NO_EMAIL_CONFIRMATION", "OFFERS_AND_REFERRALS", "PAN_VERIFICATION_FAILED", "POINTS_NOT_UPDATED", "PRESENCE", "REFUND_OF_ADDED_CASH", "REFUND_OF_WRONG_AMOUNT", "SIGNUP_BONUS", "TAXES_ON_WINNINGS", "TEAM_DEADLINE", "THANKS", "TYPES_BONUS", "TYPES_CONTESTS", "UNUTILIZED_MONEY", "UPDATE_APP", "VERIFY_EMAIL", "VERIFY_MOBILE", "VERIFY_PAN", "WHAT_IF_THERES_A_TIE", "WHEN_SCORES_UPDATED", "WHEN_WINNINGS_DISTRIBUTED", "WHY_VERIFY", "WINNINGS", "WITHDRAWAL_INTRO", "WITHDRAWAL_STATUS", "WITHDRAWAL_TIME", "WITHDRAW_CASH_BONUS", "WRONG_SCORES"], "labels_map": {"ACCOUNT_BALANCE_DEDUCTED": 0, "ACCOUNT_NOT_VERIFIED": 1, "ACCOUNT_RESET": 2, "APPRECIATION": 3, "BANK_VERIFICATION_DETAILS": 4, "CANNOT_SEE_JOINED_CONTESTS": 5, "CAPABILITIES": 6, "CASH_BONUS": 7, "CASH_BONUS_EXPIRY": 8, "CHANGE_BANK_ACCOUNT": 9, "CHANGE_MOBILE_NUMBER": 10, "CHANGE_PROFILE_TEAM_DETAILS": 11, "CHAT_WITH_AN_AGENT": 12, "CHECK_DEPOSIT_STATUS": 13, "CHECK_WALLET_BALANCE": 14, "CONTACT_NUMBER": 15, "CRITICISM": 16, "DEDUCTED_AMOUNT_NOT_RECEIVED": 17, "DELETE_PAN_CARD": 18, "DOWNLOAD_POWERPLAY11": 19, "FAIRPLAY_VIOLATIONS": 20, "FAKE_TEAMS": 21, "FEEDBACK": 22, "GREETINGS_DAY": 23, "HOW_POINTS_CALCULATED": 24, "HOW_TO_PLAY": 25, "INSTANT_WITHDRAWAL": 26, "JOIN_CONTEST": 27, "LESS_WINNINGS_AMOUNT": 28, "MATCH_ABANDONED": 29, "NEW_TEAM_PATTERN": 30, "NO_EMAIL_CONFIRMATION": 31, "OFFERS_AND_REFERRALS": 32, "PAN_VERIFICATION_FAILED": 33, "POINTS_NOT_UPDATED": 34, "PRESENCE": 35, "REFUND_OF_ADDED_CASH": 36, "REFUND_OF_WRONG_AMOUNT": 37, "SIGNUP_BONUS": 38, "TAXES_ON_WINNINGS": 39, "TEAM_DEADLINE": 40, "THANKS": 41, "TYPES_BONUS": 42, "TYPES_CONTESTS": 43, "UNUTILIZED_MONEY": 44, "UPDATE_APP": 45, "VERIFY_EMAIL": 46, "VERIFY_MOBILE": 47, "VERIFY_PAN": 48, "WHAT_IF_THERES_A_TIE": 49, "WHEN_SCORES_UPDATED": 50, "WHEN_WINNINGS_DISTRIBUTED": 51, "WHY_VERIFY": 52, "WINNINGS": 53, "WITHDRAWAL_INTRO": 54, "WITHDRAWAL_STATUS": 55, "WITHDRAWAL_TIME": 56, "WITHDRAW_CASH_BONUS": 57, "WRONG_SCORES": 58}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/best_model/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/best_model/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"do_lower_case": true, "model_max_length": 512}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/best_model/training_args.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11_subset/best_model/training_args.bin


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "architectures": [
  3 |     "BertForSequenceClassification"
  4 |   ],
  5 |   "attention_probs_dropout_prob": 0.1,
  6 |   "gradient_checkpointing": false,
  7 |   "hidden_act": "gelu",
  8 |   "hidden_dropout_prob": 0.1,
  9 |   "hidden_size": 768,
 10 |   "id2label": {
 11 |     "0": "LABEL_0",
 12 |     "1": "LABEL_1",
 13 |     "2": "LABEL_2",
 14 |     "3": "LABEL_3",
 15 |     "4": "LABEL_4",
 16 |     "5": "LABEL_5",
 17 |     "6": "LABEL_6",
 18 |     "7": "LABEL_7",
 19 |     "8": "LABEL_8",
 20 |     "9": "LABEL_9",
 21 |     "10": "LABEL_10",
 22 |     "11": "LABEL_11",
 23 |     "12": "LABEL_12",
 24 |     "13": "LABEL_13",
 25 |     "14": "LABEL_14",
 26 |     "15": "LABEL_15",
 27 |     "16": "LABEL_16",
 28 |     "17": "LABEL_17",
 29 |     "18": "LABEL_18",
 30 |     "19": "LABEL_19",
 31 |     "20": "LABEL_20",
 32 |     "21": "LABEL_21",
 33 |     "22": "LABEL_22",
 34 |     "23": "LABEL_23",
 35 |     "24": "LABEL_24",
 36 |     "25": "LABEL_25",
 37 |     "26": "LABEL_26",
 38 |     "27": "LABEL_27",
 39 |     "28": "LABEL_28",
 40 |     "29": "LABEL_29",
 41 |     "30": "LABEL_30",
 42 |     "31": "LABEL_31",
 43 |     "32": "LABEL_32",
 44 |     "33": "LABEL_33",
 45 |     "34": "LABEL_34",
 46 |     "35": "LABEL_35",
 47 |     "36": "LABEL_36",
 48 |     "37": "LABEL_37",
 49 |     "38": "LABEL_38",
 50 |     "39": "LABEL_39",
 51 |     "40": "LABEL_40",
 52 |     "41": "LABEL_41",
 53 |     "42": "LABEL_42",
 54 |     "43": "LABEL_43",
 55 |     "44": "LABEL_44",
 56 |     "45": "LABEL_45",
 57 |     "46": "LABEL_46",
 58 |     "47": "LABEL_47",
 59 |     "48": "LABEL_48",
 60 |     "49": "LABEL_49",
 61 |     "50": "LABEL_50",
 62 |     "51": "LABEL_51",
 63 |     "52": "LABEL_52",
 64 |     "53": "LABEL_53",
 65 |     "54": "LABEL_54",
 66 |     "55": "LABEL_55",
 67 |     "56": "LABEL_56",
 68 |     "57": "LABEL_57",
 69 |     "58": "LABEL_58"
 70 |   },
 71 |   "initializer_range": 0.02,
 72 |   "intermediate_size": 3072,
 73 |   "label2id": {
 74 |     "LABEL_0": 0,
 75 |     "LABEL_1": 1,
 76 |     "LABEL_10": 10,
 77 |     "LABEL_11": 11,
 78 |     "LABEL_12": 12,
 79 |     "LABEL_13": 13,
 80 |     "LABEL_14": 14,
 81 |     "LABEL_15": 15,
 82 |     "LABEL_16": 16,
 83 |     "LABEL_17": 17,
 84 |     "LABEL_18": 18,
 85 |     "LABEL_19": 19,
 86 |     "LABEL_2": 2,
 87 |     "LABEL_20": 20,
 88 |     "LABEL_21": 21,
 89 |     "LABEL_22": 22,
 90 |     "LABEL_23": 23,
 91 |     "LABEL_24": 24,
 92 |     "LABEL_25": 25,
 93 |     "LABEL_26": 26,
 94 |     "LABEL_27": 27,
 95 |     "LABEL_28": 28,
 96 |     "LABEL_29": 29,
 97 |     "LABEL_3": 3,
 98 |     "LABEL_30": 30,
 99 |     "LABEL_31": 31,
100 |     "LABEL_32": 32,
101 |     "LABEL_33": 33,
102 |     "LABEL_34": 34,
103 |     "LABEL_35": 35,
104 |     "LABEL_36": 36,
105 |     "LABEL_37": 37,
106 |     "LABEL_38": 38,
107 |     "LABEL_39": 39,
108 |     "LABEL_4": 4,
109 |     "LABEL_40": 40,
110 |     "LABEL_41": 41,
111 |     "LABEL_42": 42,
112 |     "LABEL_43": 43,
113 |     "LABEL_44": 44,
114 |     "LABEL_45": 45,
115 |     "LABEL_46": 46,
116 |     "LABEL_47": 47,
117 |     "LABEL_48": 48,
118 |     "LABEL_49": 49,
119 |     "LABEL_5": 5,
120 |     "LABEL_50": 50,
121 |     "LABEL_51": 51,
122 |     "LABEL_52": 52,
123 |     "LABEL_53": 53,
124 |     "LABEL_54": 54,
125 |     "LABEL_55": 55,
126 |     "LABEL_56": 56,
127 |     "LABEL_57": 57,
128 |     "LABEL_58": 58,
129 |     "LABEL_6": 6,
130 |     "LABEL_7": 7,
131 |     "LABEL_8": 8,
132 |     "LABEL_9": 9
133 |   },
134 |   "layer_norm_eps": 1e-12,
135 |   "max_position_embeddings": 512,
136 |   "model_type": "bert",
137 |   "num_attention_heads": 12,
138 |   "num_hidden_layers": 12,
139 |   "pad_token_id": 0,
140 |   "type_vocab_size": 2,
141 |   "vocab_size": 30522
142 | }
143 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/eval_results.txt:
--------------------------------------------------------------------------------
1 | eval_loss = 0.015237725763158365
2 | mcc = 1.0
3 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/model_args.json:
--------------------------------------------------------------------------------
1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/powerplay11_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/powerplay11_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/powerplay11_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 99, "weight_decay": 0, "labels_list": ["ACCOUNT_BALANCE_DEDUCTED", "ACCOUNT_NOT_VERIFIED", "ACCOUNT_RESET", "APPRECIATION", "BANK_VERIFICATION_DETAILS", "CANNOT_SEE_JOINED_CONTESTS", "CAPABILITIES", "CASH_BONUS", "CASH_BONUS_EXPIRY", "CHANGE_BANK_ACCOUNT", "CHANGE_MOBILE_NUMBER", "CHANGE_PROFILE_TEAM_DETAILS", "CHAT_WITH_AN_AGENT", "CHECK_DEPOSIT_STATUS", "CHECK_WALLET_BALANCE", "CONTACT_NUMBER", "CRITICISM", "DEDUCTED_AMOUNT_NOT_RECEIVED", "DELETE_PAN_CARD", "DOWNLOAD_POWERPLAY11", "FAIRPLAY_VIOLATIONS", "FAKE_TEAMS", "FEEDBACK", "GREETINGS_DAY", "HOW_POINTS_CALCULATED", "HOW_TO_PLAY", "INSTANT_WITHDRAWAL", "JOIN_CONTEST", "LESS_WINNINGS_AMOUNT", "MATCH_ABANDONED", "NEW_TEAM_PATTERN", "NO_EMAIL_CONFIRMATION", "OFFERS_AND_REFERRALS", "PAN_VERIFICATION_FAILED", "POINTS_NOT_UPDATED", "PRESENCE", "REFUND_OF_ADDED_CASH", "REFUND_OF_WRONG_AMOUNT", "SIGNUP_BONUS", "TAXES_ON_WINNINGS", "TEAM_DEADLINE", "THANKS", "TYPES_BONUS", "TYPES_CONTESTS", "UNUTILIZED_MONEY", "UPDATE_APP", "VERIFY_EMAIL", "VERIFY_MOBILE", "VERIFY_PAN", "WHAT_IF_THERES_A_TIE", "WHEN_SCORES_UPDATED", "WHEN_WINNINGS_DISTRIBUTED", "WHY_VERIFY", "WINNINGS", "WITHDRAWAL_INTRO", "WITHDRAWAL_STATUS", "WITHDRAWAL_TIME", "WITHDRAW_CASH_BONUS", "WRONG_SCORES"], "labels_map": {"ACCOUNT_BALANCE_DEDUCTED": 0, "ACCOUNT_NOT_VERIFIED": 1, "ACCOUNT_RESET": 2, "APPRECIATION": 3, "BANK_VERIFICATION_DETAILS": 4, "CANNOT_SEE_JOINED_CONTESTS": 5, "CAPABILITIES": 6, "CASH_BONUS": 7, "CASH_BONUS_EXPIRY": 8, "CHANGE_BANK_ACCOUNT": 9, "CHANGE_MOBILE_NUMBER": 10, "CHANGE_PROFILE_TEAM_DETAILS": 11, "CHAT_WITH_AN_AGENT": 12, "CHECK_DEPOSIT_STATUS": 13, "CHECK_WALLET_BALANCE": 14, "CONTACT_NUMBER": 15, "CRITICISM": 16, "DEDUCTED_AMOUNT_NOT_RECEIVED": 17, "DELETE_PAN_CARD": 18, "DOWNLOAD_POWERPLAY11": 19, "FAIRPLAY_VIOLATIONS": 20, "FAKE_TEAMS": 21, "FEEDBACK": 22, "GREETINGS_DAY": 23, "HOW_POINTS_CALCULATED": 24, "HOW_TO_PLAY": 25, "INSTANT_WITHDRAWAL": 26, "JOIN_CONTEST": 27, "LESS_WINNINGS_AMOUNT": 28, "MATCH_ABANDONED": 29, "NEW_TEAM_PATTERN": 30, "NO_EMAIL_CONFIRMATION": 31, "OFFERS_AND_REFERRALS": 32, "PAN_VERIFICATION_FAILED": 33, "POINTS_NOT_UPDATED": 34, "PRESENCE": 35, "REFUND_OF_ADDED_CASH": 36, "REFUND_OF_WRONG_AMOUNT": 37, "SIGNUP_BONUS": 38, "TAXES_ON_WINNINGS": 39, "TEAM_DEADLINE": 40, "THANKS": 41, "TYPES_BONUS": 42, "TYPES_CONTESTS": 43, "UNUTILIZED_MONEY": 44, "UPDATE_APP": 45, "VERIFY_EMAIL": 46, "VERIFY_MOBILE": 47, "VERIFY_PAN": 48, "WHAT_IF_THERES_A_TIE": 49, "WHEN_SCORES_UPDATED": 50, "WHEN_WINNINGS_DISTRIBUTED": 51, "WHY_VERIFY": 52, "WINNINGS": 53, "WITHDRAWAL_INTRO": 54, "WITHDRAWAL_STATUS": 55, "WITHDRAWAL_TIME": 56, "WITHDRAW_CASH_BONUS": 57, "WRONG_SCORES": 58}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/tblogs/events.out.tfevents.1597163469.haptik-ai-research-mum-ml-2-vm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11_subset/tblogs/events.out.tfevents.1597163469.haptik-ai-research-mum-ml-2-vm


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"do_lower_case": true, "model_max_length": 512}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/training_args.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11_subset/training_args.bin


--------------------------------------------------------------------------------
/platforms/bert/bert_models/powerplay11_subset/training_progress_scores.csv:
--------------------------------------------------------------------------------
 1 | global_step,mcc,train_loss,eval_loss
 2 | 33,0.006966065033674241,4.275940418243408,4.088082140142268
 3 | 66,0.10181784338639555,4.076957702636719,3.853632081638683
 4 | 99,0.31666196985103084,3.524799346923828,3.481291640888561
 5 | 100,0.33622205757382423,3.2819881439208984,3.4670489845853862
 6 | 132,0.5491825774659016,3.228721857070923,3.1800931294759116
 7 | 165,0.7988562456319384,2.9554340839385986,2.7183942072319263
 8 | 198,0.9029989574195603,2.1860103607177734,2.3607854120659106
 9 | 200,0.8998233054849961,2.3953323364257812,2.3395220908251675
10 | 231,0.9608116153033132,2.4642422199249268,1.8749224055897107
11 | 264,0.976456540029086,1.8713085651397705,1.491812203869675
12 | 297,1.0,1.0459480285644531,1.167946174289241
13 | 300,1.0,1.2138229608535767,1.1448865298068884
14 | 330,1.0,1.2414703369140625,0.8946083594452251
15 | 363,0.9960677596969424,1.0370596647262573,0.6936323516296617
16 | 396,1.0,0.7679606676101685,0.5725420885013811
17 | 400,1.0,0.7788105010986328,0.5357545216878256
18 | 429,1.0,0.4268004894256592,0.4331010647795417
19 | 462,1.0,0.48410671949386597,0.3253726607019251
20 | 495,1.0,0.2329229712486267,0.27336488235177414
21 | 500,1.0,0.1546279340982437,0.2654947123744271
22 | 528,1.0,0.12326618283987045,0.20769070269483508
23 | 561,1.0,0.07649935036897659,0.16234044392000546
24 | 594,1.0,0.11826501041650772,0.1328082587005514
25 | 600,1.0,0.1337045580148697,0.1261398276370583
26 | 627,1.0,0.055853527039289474,0.10562320934100584
27 | 660,1.0,0.26937466859817505,0.08653705059127374
28 | 693,1.0,0.06533235311508179,0.07060574209599783
29 | 700,1.0,0.04325779899954796,0.06917516362260688
30 | 726,1.0,0.04639727249741554,0.06082955251137415
31 | 759,1.0,0.0582999512553215,0.05410802725589636
32 | 792,1.0,0.0368194580078125,0.049195334428187576
33 | 800,1.0,0.03648754954338074,0.04870836216617714
34 | 825,1.0,0.02754053846001625,0.04354375335528995
35 | 858,1.0,0.03395608440041542,0.03737479882935683
36 | 891,1.0,0.09397966414690018,0.033415756950324234
37 | 900,1.0,0.037745073437690735,0.032758059092994896
38 | 924,1.0,0.02583269588649273,0.03013399328020486
39 | 957,1.0,0.0232711024582386,0.027921919456937096
40 | 990,1.0,0.023077527061104774,0.025824453455932213
41 | 1000,1.0,0.023229582235217094,0.025291424921967766
42 | 1023,1.0,0.03731586039066315,0.024271921311138256
43 | 1056,1.0,0.02345268800854683,0.022990159295273548
44 | 1089,1.0,0.02155761979520321,0.0219306516827959
45 | 1100,1.0,0.017719632014632225,0.021582975898954002
46 | 1122,1.0,0.013472222723066807,0.020954335305952664
47 | 1155,1.0,0.04192318022251129,0.02015054000146461
48 | 1188,1.0,0.01810283586382866,0.019379823920175884
49 | 1200,1.0,0.028326159343123436,0.019140559446179504
50 | 1221,1.0,0.013884490355849266,0.018714478070085697
51 | 1254,1.0,0.01331179216504097,0.01814545854700334
52 | 1287,1.0,0.015835518017411232,0.01765377311543985
53 | 1300,1.0,0.013886284083127975,0.017462069253352554
54 | 1320,1.0,0.014652382582426071,0.01720011445947669
55 | 1353,1.0,0.02778574638068676,0.016815894622017036
56 | 1386,1.0,0.012468253262341022,0.016461734010866196
57 | 1400,1.0,0.02463061362504959,0.016339947497754387
58 | 1419,1.0,0.018762772902846336,0.0161741218162757
59 | 1452,1.0,0.0297338105738163,0.01592277100479061
60 | 1485,1.0,0.010572281666100025,0.015703567698823685
61 | 1500,1.0,0.014699919149279594,0.015619493484722845
62 | 1518,1.0,0.011565894819796085,0.01553100926067793
63 | 1551,1.0,0.01407864410430193,0.015407157356314587
64 | 1584,1.0,0.015383089892566204,0.015314364721151915
65 | 1600,1.0,0.022277144715189934,0.015281464396552607
66 | 1617,1.0,0.017058053985238075,0.015256680124862627
67 | 1650,1.0,0.013741384260356426,0.015237725763158365
68 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/best_model/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertForSequenceClassification"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "gradient_checkpointing": false,
 7 |   "hidden_act": "gelu",
 8 |   "hidden_dropout_prob": 0.1,
 9 |   "hidden_size": 768,
10 |   "id2label": {
11 |     "0": "LABEL_0",
12 |     "1": "LABEL_1",
13 |     "2": "LABEL_2",
14 |     "3": "LABEL_3",
15 |     "4": "LABEL_4",
16 |     "5": "LABEL_5",
17 |     "6": "LABEL_6",
18 |     "7": "LABEL_7",
19 |     "8": "LABEL_8",
20 |     "9": "LABEL_9",
21 |     "10": "LABEL_10",
22 |     "11": "LABEL_11",
23 |     "12": "LABEL_12",
24 |     "13": "LABEL_13",
25 |     "14": "LABEL_14",
26 |     "15": "LABEL_15",
27 |     "16": "LABEL_16",
28 |     "17": "LABEL_17",
29 |     "18": "LABEL_18",
30 |     "19": "LABEL_19",
31 |     "20": "LABEL_20"
32 |   },
33 |   "initializer_range": 0.02,
34 |   "intermediate_size": 3072,
35 |   "label2id": {
36 |     "LABEL_0": 0,
37 |     "LABEL_1": 1,
38 |     "LABEL_10": 10,
39 |     "LABEL_11": 11,
40 |     "LABEL_12": 12,
41 |     "LABEL_13": 13,
42 |     "LABEL_14": 14,
43 |     "LABEL_15": 15,
44 |     "LABEL_16": 16,
45 |     "LABEL_17": 17,
46 |     "LABEL_18": 18,
47 |     "LABEL_19": 19,
48 |     "LABEL_2": 2,
49 |     "LABEL_20": 20,
50 |     "LABEL_3": 3,
51 |     "LABEL_4": 4,
52 |     "LABEL_5": 5,
53 |     "LABEL_6": 6,
54 |     "LABEL_7": 7,
55 |     "LABEL_8": 8,
56 |     "LABEL_9": 9
57 |   },
58 |   "layer_norm_eps": 1e-12,
59 |   "max_position_embeddings": 512,
60 |   "model_type": "bert",
61 |   "num_attention_heads": 12,
62 |   "num_hidden_layers": 12,
63 |   "pad_token_id": 0,
64 |   "type_vocab_size": 2,
65 |   "vocab_size": 30522
66 | }
67 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/best_model/eval_results.txt:
--------------------------------------------------------------------------------
1 | eval_loss = 0.0024695384270716006
2 | mcc = 1.0
3 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/best_model/model_args.json:
--------------------------------------------------------------------------------
1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/sofmattress/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/sofmattress/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/sofmattress/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 123, "weight_decay": 0, "labels_list": ["100_NIGHT_TRIAL_OFFER", "ABOUT_SOF_MATTRESS", "CANCEL_ORDER", "CHECK_PINCODE", "COD", "COMPARISON", "DELAY_IN_DELIVERY", "DISTRIBUTORS", "EMI", "ERGO_FEATURES", "LEAD_GEN", "MATTRESS_COST", "OFFERS", "ORDER_STATUS", "ORTHO_FEATURES", "PILLOWS", "PRODUCT_VARIANTS", "RETURN_EXCHANGE", "SIZE_CUSTOMIZATION", "WARRANTY", "WHAT_SIZE_TO_ORDER"], "labels_map": {"100_NIGHT_TRIAL_OFFER": 0, "ABOUT_SOF_MATTRESS": 1, "CANCEL_ORDER": 2, "CHECK_PINCODE": 3, "COD": 4, "COMPARISON": 5, "DELAY_IN_DELIVERY": 6, "DISTRIBUTORS": 7, "EMI": 8, "ERGO_FEATURES": 9, "LEAD_GEN": 10, "MATTRESS_COST": 11, "OFFERS": 12, "ORDER_STATUS": 13, "ORTHO_FEATURES": 14, "PILLOWS": 15, "PRODUCT_VARIANTS": 16, "RETURN_EXCHANGE": 17, "SIZE_CUSTOMIZATION": 18, "WARRANTY": 19, "WHAT_SIZE_TO_ORDER": 20}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/best_model/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/best_model/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"do_lower_case": true, "model_max_length": 512}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/best_model/training_args.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress/best_model/training_args.bin


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertForSequenceClassification"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "gradient_checkpointing": false,
 7 |   "hidden_act": "gelu",
 8 |   "hidden_dropout_prob": 0.1,
 9 |   "hidden_size": 768,
10 |   "id2label": {
11 |     "0": "LABEL_0",
12 |     "1": "LABEL_1",
13 |     "2": "LABEL_2",
14 |     "3": "LABEL_3",
15 |     "4": "LABEL_4",
16 |     "5": "LABEL_5",
17 |     "6": "LABEL_6",
18 |     "7": "LABEL_7",
19 |     "8": "LABEL_8",
20 |     "9": "LABEL_9",
21 |     "10": "LABEL_10",
22 |     "11": "LABEL_11",
23 |     "12": "LABEL_12",
24 |     "13": "LABEL_13",
25 |     "14": "LABEL_14",
26 |     "15": "LABEL_15",
27 |     "16": "LABEL_16",
28 |     "17": "LABEL_17",
29 |     "18": "LABEL_18",
30 |     "19": "LABEL_19",
31 |     "20": "LABEL_20"
32 |   },
33 |   "initializer_range": 0.02,
34 |   "intermediate_size": 3072,
35 |   "label2id": {
36 |     "LABEL_0": 0,
37 |     "LABEL_1": 1,
38 |     "LABEL_10": 10,
39 |     "LABEL_11": 11,
40 |     "LABEL_12": 12,
41 |     "LABEL_13": 13,
42 |     "LABEL_14": 14,
43 |     "LABEL_15": 15,
44 |     "LABEL_16": 16,
45 |     "LABEL_17": 17,
46 |     "LABEL_18": 18,
47 |     "LABEL_19": 19,
48 |     "LABEL_2": 2,
49 |     "LABEL_20": 20,
50 |     "LABEL_3": 3,
51 |     "LABEL_4": 4,
52 |     "LABEL_5": 5,
53 |     "LABEL_6": 6,
54 |     "LABEL_7": 7,
55 |     "LABEL_8": 8,
56 |     "LABEL_9": 9
57 |   },
58 |   "layer_norm_eps": 1e-12,
59 |   "max_position_embeddings": 512,
60 |   "model_type": "bert",
61 |   "num_attention_heads": 12,
62 |   "num_hidden_layers": 12,
63 |   "pad_token_id": 0,
64 |   "type_vocab_size": 2,
65 |   "vocab_size": 30522
66 | }
67 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/eval_results.txt:
--------------------------------------------------------------------------------
1 | eval_loss = 0.002091184871770987
2 | mcc = 1.0
3 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/model_args.json:
--------------------------------------------------------------------------------
1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/sofmattress/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/sofmattress/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/sofmattress/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 123, "weight_decay": 0, "labels_list": ["100_NIGHT_TRIAL_OFFER", "ABOUT_SOF_MATTRESS", "CANCEL_ORDER", "CHECK_PINCODE", "COD", "COMPARISON", "DELAY_IN_DELIVERY", "DISTRIBUTORS", "EMI", "ERGO_FEATURES", "LEAD_GEN", "MATTRESS_COST", "OFFERS", "ORDER_STATUS", "ORTHO_FEATURES", "PILLOWS", "PRODUCT_VARIANTS", "RETURN_EXCHANGE", "SIZE_CUSTOMIZATION", "WARRANTY", "WHAT_SIZE_TO_ORDER"], "labels_map": {"100_NIGHT_TRIAL_OFFER": 0, "ABOUT_SOF_MATTRESS": 1, "CANCEL_ORDER": 2, "CHECK_PINCODE": 3, "COD": 4, "COMPARISON": 5, "DELAY_IN_DELIVERY": 6, "DISTRIBUTORS": 7, "EMI": 8, "ERGO_FEATURES": 9, "LEAD_GEN": 10, "MATTRESS_COST": 11, "OFFERS": 12, "ORDER_STATUS": 13, "ORTHO_FEATURES": 14, "PILLOWS": 15, "PRODUCT_VARIANTS": 16, "RETURN_EXCHANGE": 17, "SIZE_CUSTOMIZATION": 18, "WARRANTY": 19, "WHAT_SIZE_TO_ORDER": 20}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/tblogs/events.out.tfevents.1597159859.haptik-ai-research-mum-ml-2-vm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress/tblogs/events.out.tfevents.1597159859.haptik-ai-research-mum-ml-2-vm


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"do_lower_case": true, "model_max_length": 512}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/training_args.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress/training_args.bin


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress/training_progress_scores.csv:
--------------------------------------------------------------------------------
 1 | global_step,mcc,train_loss,eval_loss
 2 | 41,0.03741377683874904,2.9892494678497314,2.970834377335339
 3 | 82,0.27124933812063723,2.810255527496338,2.6317273523749374
 4 | 100,0.4951481145617849,2.4308252334594727,2.384494185447693
 5 | 123,0.7154417194916144,2.1131794452667236,1.9524706195040447
 6 | 164,0.9177045047865772,1.0891566276550293,1.0203317098501252
 7 | 200,0.9806938791151254,0.6156212091445923,0.4422170934153766
 8 | 205,0.9903337975187425,0.6984859108924866,0.38467208968430033
 9 | 246,0.9967804561889998,0.16406548023223877,0.1386817933582678
10 | 287,1.0,0.04927260801196098,0.05331289704616477
11 | 300,1.0,0.040390364825725555,0.04004393454368522
12 | 328,0.9967807673479875,0.02060823328793049,0.02845955994434473
13 | 369,1.0,0.015270467847585678,0.0164372295868106
14 | 400,1.0,0.013940568082034588,0.013048355899206021
15 | 410,1.0,0.014762028120458126,0.012293782971072488
16 | 451,1.0,0.010723605751991272,0.009894206552062093
17 | 492,1.0,0.00876891054213047,0.008308980886529132
18 | 500,1.0,0.007853677496314049,0.008068628541034897
19 | 533,1.0,0.007977331057190895,0.0071861760622662745
20 | 574,1.0,0.007647466380149126,0.006318186761856806
21 | 600,1.0,0.005637817084789276,0.005862325509419528
22 | 615,1.0,0.0068319146521389484,0.005629960061391679
23 | 656,1.0,0.005148341413587332,0.005074366859001357
24 | 697,1.0,0.004916307516396046,0.004619956442450241
25 | 700,1.0,0.004129624925553799,0.004589953357580959
26 | 738,1.0,0.004497524816542864,0.0042399843166605
27 | 779,1.0,0.004239839501678944,0.003916610889818247
28 | 800,1.0,0.0041899955831468105,0.003770596035415443
29 | 820,1.0,0.003951283171772957,0.003637692340218076
30 | 861,1.0,0.003503492334857583,0.003397048292000119
31 | 900,1.0,0.0032421466894447803,0.0031967297332679352
32 | 902,1.0,0.0031739480327814817,0.003187044398722852
33 | 943,1.0,0.0034325651358813047,0.003001999433674827
34 | 984,1.0,0.0032226387411355972,0.0028411815306398926
35 | 1000,1.0,0.0030870833434164524,0.0027810372468992702
36 | 1025,1.0,0.002899862127378583,0.002694926326867284
37 | 1066,1.0,0.002674049697816372,0.0025664146978226377
38 | 1100,1.0,0.002695617265999317,0.0024695384270716006
39 | 1107,1.0,0.0022791139781475067,0.002450472715015455
40 | 1148,1.0,0.002618222963064909,0.002346959058763232
41 | 1189,1.0,0.0022062344942241907,0.002252724277219031
42 | 1200,1.0,0.0024347787257283926,0.0022290816911064632
43 | 1230,1.0,0.002199501032009721,0.002167849923574888
44 | 1271,1.0,0.002031237818300724,0.002091184871770987
45 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/best_model/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertForSequenceClassification"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "gradient_checkpointing": false,
 7 |   "hidden_act": "gelu",
 8 |   "hidden_dropout_prob": 0.1,
 9 |   "hidden_size": 768,
10 |   "id2label": {
11 |     "0": "LABEL_0",
12 |     "1": "LABEL_1",
13 |     "2": "LABEL_2",
14 |     "3": "LABEL_3",
15 |     "4": "LABEL_4",
16 |     "5": "LABEL_5",
17 |     "6": "LABEL_6",
18 |     "7": "LABEL_7",
19 |     "8": "LABEL_8",
20 |     "9": "LABEL_9",
21 |     "10": "LABEL_10",
22 |     "11": "LABEL_11",
23 |     "12": "LABEL_12",
24 |     "13": "LABEL_13",
25 |     "14": "LABEL_14",
26 |     "15": "LABEL_15",
27 |     "16": "LABEL_16",
28 |     "17": "LABEL_17",
29 |     "18": "LABEL_18",
30 |     "19": "LABEL_19",
31 |     "20": "LABEL_20"
32 |   },
33 |   "initializer_range": 0.02,
34 |   "intermediate_size": 3072,
35 |   "label2id": {
36 |     "LABEL_0": 0,
37 |     "LABEL_1": 1,
38 |     "LABEL_10": 10,
39 |     "LABEL_11": 11,
40 |     "LABEL_12": 12,
41 |     "LABEL_13": 13,
42 |     "LABEL_14": 14,
43 |     "LABEL_15": 15,
44 |     "LABEL_16": 16,
45 |     "LABEL_17": 17,
46 |     "LABEL_18": 18,
47 |     "LABEL_19": 19,
48 |     "LABEL_2": 2,
49 |     "LABEL_20": 20,
50 |     "LABEL_3": 3,
51 |     "LABEL_4": 4,
52 |     "LABEL_5": 5,
53 |     "LABEL_6": 6,
54 |     "LABEL_7": 7,
55 |     "LABEL_8": 8,
56 |     "LABEL_9": 9
57 |   },
58 |   "layer_norm_eps": 1e-12,
59 |   "max_position_embeddings": 512,
60 |   "model_type": "bert",
61 |   "num_attention_heads": 12,
62 |   "num_hidden_layers": 12,
63 |   "pad_token_id": 0,
64 |   "type_vocab_size": 2,
65 |   "vocab_size": 30522
66 | }
67 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/best_model/eval_results.txt:
--------------------------------------------------------------------------------
1 | eval_loss = 0.004921248577454168
2 | mcc = 1.0
3 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/best_model/model_args.json:
--------------------------------------------------------------------------------
1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/sofmattress_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/sofmattress_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/sofmattress_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 69, "weight_decay": 0, "labels_list": ["100_NIGHT_TRIAL_OFFER", "ABOUT_SOF_MATTRESS", "CANCEL_ORDER", "CHECK_PINCODE", "COD", "COMPARISON", "DELAY_IN_DELIVERY", "DISTRIBUTORS", "EMI", "ERGO_FEATURES", "LEAD_GEN", "MATTRESS_COST", "OFFERS", "ORDER_STATUS", "ORTHO_FEATURES", "PILLOWS", "PRODUCT_VARIANTS", "RETURN_EXCHANGE", "SIZE_CUSTOMIZATION", "WARRANTY", "WHAT_SIZE_TO_ORDER"], "labels_map": {"100_NIGHT_TRIAL_OFFER": 0, "ABOUT_SOF_MATTRESS": 1, "CANCEL_ORDER": 2, "CHECK_PINCODE": 3, "COD": 4, "COMPARISON": 5, "DELAY_IN_DELIVERY": 6, "DISTRIBUTORS": 7, "EMI": 8, "ERGO_FEATURES": 9, "LEAD_GEN": 10, "MATTRESS_COST": 11, "OFFERS": 12, "ORDER_STATUS": 13, "ORTHO_FEATURES": 14, "PILLOWS": 15, "PRODUCT_VARIANTS": 16, "RETURN_EXCHANGE": 17, "SIZE_CUSTOMIZATION": 18, "WARRANTY": 19, "WHAT_SIZE_TO_ORDER": 20}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/best_model/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/best_model/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"do_lower_case": true, "model_max_length": 512}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/best_model/training_args.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress_subset/best_model/training_args.bin


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertForSequenceClassification"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "gradient_checkpointing": false,
 7 |   "hidden_act": "gelu",
 8 |   "hidden_dropout_prob": 0.1,
 9 |   "hidden_size": 768,
10 |   "id2label": {
11 |     "0": "LABEL_0",
12 |     "1": "LABEL_1",
13 |     "2": "LABEL_2",
14 |     "3": "LABEL_3",
15 |     "4": "LABEL_4",
16 |     "5": "LABEL_5",
17 |     "6": "LABEL_6",
18 |     "7": "LABEL_7",
19 |     "8": "LABEL_8",
20 |     "9": "LABEL_9",
21 |     "10": "LABEL_10",
22 |     "11": "LABEL_11",
23 |     "12": "LABEL_12",
24 |     "13": "LABEL_13",
25 |     "14": "LABEL_14",
26 |     "15": "LABEL_15",
27 |     "16": "LABEL_16",
28 |     "17": "LABEL_17",
29 |     "18": "LABEL_18",
30 |     "19": "LABEL_19",
31 |     "20": "LABEL_20"
32 |   },
33 |   "initializer_range": 0.02,
34 |   "intermediate_size": 3072,
35 |   "label2id": {
36 |     "LABEL_0": 0,
37 |     "LABEL_1": 1,
38 |     "LABEL_10": 10,
39 |     "LABEL_11": 11,
40 |     "LABEL_12": 12,
41 |     "LABEL_13": 13,
42 |     "LABEL_14": 14,
43 |     "LABEL_15": 15,
44 |     "LABEL_16": 16,
45 |     "LABEL_17": 17,
46 |     "LABEL_18": 18,
47 |     "LABEL_19": 19,
48 |     "LABEL_2": 2,
49 |     "LABEL_20": 20,
50 |     "LABEL_3": 3,
51 |     "LABEL_4": 4,
52 |     "LABEL_5": 5,
53 |     "LABEL_6": 6,
54 |     "LABEL_7": 7,
55 |     "LABEL_8": 8,
56 |     "LABEL_9": 9
57 |   },
58 |   "layer_norm_eps": 1e-12,
59 |   "max_position_embeddings": 512,
60 |   "model_type": "bert",
61 |   "num_attention_heads": 12,
62 |   "num_hidden_layers": 12,
63 |   "pad_token_id": 0,
64 |   "type_vocab_size": 2,
65 |   "vocab_size": 30522
66 | }
67 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/eval_results.txt:
--------------------------------------------------------------------------------
1 | eval_loss = 0.004485103038504072
2 | mcc = 1.0
3 | 


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/model_args.json:
--------------------------------------------------------------------------------
1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/sofmattress_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/sofmattress_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/sofmattress_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 69, "weight_decay": 0, "labels_list": ["100_NIGHT_TRIAL_OFFER", "ABOUT_SOF_MATTRESS", "CANCEL_ORDER", "CHECK_PINCODE", "COD", "COMPARISON", "DELAY_IN_DELIVERY", "DISTRIBUTORS", "EMI", "ERGO_FEATURES", "LEAD_GEN", "MATTRESS_COST", "OFFERS", "ORDER_STATUS", "ORTHO_FEATURES", "PILLOWS", "PRODUCT_VARIANTS", "RETURN_EXCHANGE", "SIZE_CUSTOMIZATION", "WARRANTY", "WHAT_SIZE_TO_ORDER"], "labels_map": {"100_NIGHT_TRIAL_OFFER": 0, "ABOUT_SOF_MATTRESS": 1, "CANCEL_ORDER": 2, "CHECK_PINCODE": 3, "COD": 4, "COMPARISON": 5, "DELAY_IN_DELIVERY": 6, "DISTRIBUTORS": 7, "EMI": 8, "ERGO_FEATURES": 9, "LEAD_GEN": 10, "MATTRESS_COST": 11, "OFFERS": 12, "ORDER_STATUS": 13, "ORTHO_FEATURES": 14, "PILLOWS": 15, "PRODUCT_VARIANTS": 16, "RETURN_EXCHANGE": 17, "SIZE_CUSTOMIZATION": 18, "WARRANTY": 19, "WHAT_SIZE_TO_ORDER": 20}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/tblogs/events.out.tfevents.1597162813.haptik-ai-research-mum-ml-2-vm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress_subset/tblogs/events.out.tfevents.1597162813.haptik-ai-research-mum-ml-2-vm


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {"do_lower_case": true, "model_max_length": 512}


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/training_args.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress_subset/training_args.bin


--------------------------------------------------------------------------------
/platforms/bert/bert_models/sofmattress_subset/training_progress_scores.csv:
--------------------------------------------------------------------------------
 1 | global_step,mcc,train_loss,eval_loss
 2 | 23,0.06857313816302607,3.296887159347534,2.970322909562484
 3 | 46,0.11255032170974624,3.0493152141571045,2.7839398798735244
 4 | 69,0.38557671771111646,2.382086753845215,2.502764193908028
 5 | 92,0.6421384771966505,2.477137565612793,2.1489793321360713
 6 | 100,0.7418468326305145,2.250485897064209,1.967790370402129
 7 | 115,0.9012127750551232,1.7204360961914062,1.6867278451504915
 8 | 138,0.9649152017652197,1.0926810503005981,1.1395558429800945
 9 | 161,0.9824078059539685,0.8549859523773193,0.7397814315298329
10 | 184,1.0,0.6483610272407532,0.41853353251581604
11 | 200,1.0,0.2512955963611603,0.29198158629562543
12 | 207,1.0,0.25165069103240967,0.25284040978421335
13 | 230,1.0,0.13244327902793884,0.12950243800878525
14 | 253,1.0,0.06572035700082779,0.07247054738842923
15 | 276,1.0,0.03972963988780975,0.043389985013915146
16 | 299,1.0,0.023962117731571198,0.02914676556120748
17 | 300,1.0,0.03062400594353676,0.028741382791296295
18 | 322,1.0,0.0201791450381279,0.022470875197778576
19 | 345,1.0,0.01955774612724781,0.018866707449373993
20 | 368,1.0,0.01244509220123291,0.01629435388452333
21 | 391,1.0,0.015682876110076904,0.014368740598792616
22 | 400,1.0,0.014525731094181538,0.01375624415991099
23 | 414,1.0,0.014349344186484814,0.012883992379774218
24 | 437,1.0,0.013241786509752274,0.011689582839608192
25 | 460,1.0,0.00939303170889616,0.010714562569299469
26 | 483,1.0,0.009124535135924816,0.009903177801195694
27 | 500,1.0,0.010284009389579296,0.009391567993747152
28 | 506,1.0,0.010540238581597805,0.00921912825382922
29 | 529,1.0,0.00783354602754116,0.008615645600239868
30 | 552,1.0,0.010056305676698685,0.008105209866619629
31 | 575,1.0,0.007870323024690151,0.007658644133935804
32 | 598,1.0,0.007335765287280083,0.007270081859567891
33 | 600,1.0,0.006628462113440037,0.007236725832943035
34 | 621,1.0,0.006512134801596403,0.006916760689700427
35 | 644,1.0,0.005578524433076382,0.006608464593148749
36 | 667,1.0,0.005031981505453587,0.006333170053751573
37 | 690,1.0,0.007127638440579176,0.006084576006168904
38 | 700,1.0,0.005631319712847471,0.005984096929592931
39 | 713,1.0,0.005153062753379345,0.005861621997926546
40 | 736,1.0,0.005420178174972534,0.005661310709041098
41 | 759,1.0,0.00673981849104166,0.00548125121175595
42 | 782,1.0,0.005228007677942514,0.0053189582515345965
43 | 800,1.0,0.005543965380638838,0.005201474702714578
44 | 805,1.0,0.00513844657689333,0.005171149321224379
45 | 828,1.0,0.0055143460631370544,0.005038455999253884
46 | 851,1.0,0.005841855891048908,0.004921248577454168
47 | 874,1.0,0.004199676681309938,0.00481208910882149
48 | 897,1.0,0.004328454844653606,0.004715297877302636
49 | 900,1.0,0.0057920184917747974,0.004703508386307437
50 | 920,1.0,0.004222389310598373,0.004629216561822787
51 | 943,1.0,0.003910826984792948,0.004553011365477805
52 | 966,1.0,0.003987176809459925,0.004485103038504072
53 | 


--------------------------------------------------------------------------------
/platforms/bert/down-requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==2.11.0
2 | 


--------------------------------------------------------------------------------
/platforms/bert/run_bert_experiments.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pip install -U -r up-requirements.txt
 4 | #declare -a datasets=("sofmattress" "powerplay11")
 5 | declare -a datasets=("curekart")
 6 | 
 7 | for dataset in "${datasets[@]}"
 8 | do
 9 | 
10 |     python bert-bot-only-data-es.py \
11 | 	--train_file  "../train/${dataset}_train.csv" \
12 | 	--test_file "../test/${dataset}_test.csv" \
13 | 	--output_dir "../bert_models/${dataset}/" \
14 | 	--model_type "bert" \
15 | 	--model_name  "bert-base-uncased" \
16 | 	--do_lower_case true \
17 | 	--seed 42 \
18 | 	--learning_rate 0.00004 \
19 | 	--batch_size 8 \
20 | 	--epochs 50 \
21 | 	--eval_frac 0.0 \
22 | 	--eval_every_n_steps 100 \
23 | 	--use_early_stopping true \
24 | 	--early_stopping_patience 5 \
25 | 	--early_stopping_delta 0.0005 \
26 | 
27 |     cp "../bert_models/${dataset}/predictions.csv" "../preds/bert_${dataset}.csv"
28 | done
29 | 
30 | 
31 | for dataset in "${datasets[@]}"
32 | do
33 | 
34 |     python bert-bot-only-data-es.py \
35 | 	--train_file  "../train/${dataset}_subset_train.csv" \
36 | 	--test_file "../test/${dataset}_test.csv" \
37 | 	--output_dir "../bert_models/${dataset}_subset/" \
38 | 	--model_type "bert" \
39 | 	--model_name  "bert-base-uncased" \
40 | 	--do_lower_case true \
41 | 	--seed 42 \
42 | 	--learning_rate 0.00004 \
43 | 	--batch_size 8 \
44 | 	--epochs 50 \
45 | 	--eval_frac 0.0 \
46 | 	--eval_every_n_steps 100 \
47 | 	--use_early_stopping true \
48 | 	--early_stopping_patience 5 \
49 | 	--early_stopping_delta 0.0005 \
50 | 
51 |     cp "../bert_models/${dataset}_subset/predictions.csv" "../preds/bert_${dataset}_subset.csv"
52 | done
53 | 
54 | pip install -U -r down-requirements.txt
55 | 


--------------------------------------------------------------------------------
/platforms/bert/up-requirements.txt:
--------------------------------------------------------------------------------
1 | # We need transformers 3.0.2 for simpletransformers 0.43.6
2 | transformers==3.0.2
3 | simpletransformers==0.43.6
4 | 


--------------------------------------------------------------------------------
/platforms/dialogflow/agent_template/agent.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "",
 3 |   "language": "en",
 4 |   "shortDescription": "",
 5 |   "examples": "",
 6 |   "linkToDocs": "",
 7 |   "displayName": "agent_template",
 8 |   "disableInteractionLogs": false,
 9 |   "disableStackdriverLogs": true,
10 |   "googleAssistant": {
11 |     "googleAssistantCompatible": false,
12 |     "project": "",
13 |     "welcomeIntentSignInRequired": false,
14 |     "startIntents": [],
15 |     "systemIntents": [],
16 |     "endIntentIds": [],
17 |     "oAuthLinking": {
18 |       "required": false,
19 |       "providerId": "",
20 |       "authorizationUrl": "",
21 |       "tokenUrl": "",
22 |       "scopes": "",
23 |       "privacyPolicyUrl": "",
24 |       "grantType": "AUTH_CODE_GRANT"
25 |     },
26 |     "voiceType": "MALE_1",
27 |     "capabilities": [],
28 |     "env": "",
29 |     "protocolVersion": "V2",
30 |     "autoPreviewEnabled": false,
31 |     "isDeviceAgent": false
32 |   },
33 |   "defaultTimezone": "Asia/Almaty",
34 |   "webhook": {
35 |     "url": "",
36 |     "username": "",
37 |     "headers": {},
38 |     "available": false,
39 |     "useForDomains": false,
40 |     "cloudFunctionsEnabled": false,
41 |     "cloudFunctionsInitialized": false
42 |   },
43 |   "isPrivate": true,
44 |   "mlMinConfidence": 0.3,
45 |   "supportedLanguages": [],
46 |   "enableOnePlatformApi": true,
47 |   "onePlatformApiVersion": "v2",
48 |   "secondaryKey": "9d94370027814a69ad2fdd82a9532288",
49 |   "analyzeQueryTextSentiment": false,
50 |   "enabledKnowledgeBaseNames": [],
51 |   "knowledgeServiceConfidenceAdjustment": 0.0,
52 |   "dialogBuilderMode": false,
53 |   "baseActionPackagesUrl": ""
54 | }
55 | 


--------------------------------------------------------------------------------
/platforms/dialogflow/agent_template/package.json:
--------------------------------------------------------------------------------
1 | 
2 | {
3 |   "version": "1.0.0"
4 | }
5 | 


--------------------------------------------------------------------------------
/platforms/haptik/convert_data.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pathlib
 3 | import pandas as pd
 4 | 
 5 | in_path = sys.argv[1]
 6 | p = pathlib.Path(in_path)
 7 | print(p)
 8 | 
 9 | df = pd.read_csv(str(p))
10 | data = []
11 | for k, g_df in df.groupby('label'):
12 |     data.append({'node_name': k, 'question': '|'.join(g_df['sentence']), 'answer': f'Answer for {k}'})
13 | out_df = pd.DataFrame(data)
14 | out_df.to_csv(p.name, index=False)
15 | 


--------------------------------------------------------------------------------
/platforms/luis/training_data_conversion.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import json\n",
 10 |     "import pandas as pd"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "bot_name = 'curekart_subset'"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 3,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "luis_dict = {\n",
 29 |     "    \"luis_schema_version\": \"6.0.0\",\n",
 30 |     "    \"intents\": None,\n",
 31 |     "    \"entities\": [],\n",
 32 |     "    \"hierarchicals\": [],\n",
 33 |     "    \"composites\": [],\n",
 34 |     "    \"closedLists\": [],\n",
 35 |     "    \"prebuiltEntities\": [],\n",
 36 |     "    \"utterances\": None,\n",
 37 |     "    \"versionId\": \"0.1\",\n",
 38 |     "    \"name\": bot_name,\n",
 39 |     "    \"desc\": \"\",\n",
 40 |     "    \"culture\": \"en-us\",\n",
 41 |     "    \"tokenizerVersion\": \"1.0.0\",\n",
 42 |     "    \"patternAnyEntities\": [],\n",
 43 |     "    \"regex_entities\": [],\n",
 44 |     "    \"phraselists\": [],\n",
 45 |     "    \"regex_features\": [],\n",
 46 |     "    \"patterns\": [],\n",
 47 |     "    \"settings\": [],\n",
 48 |     "}"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 4,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "data": {
 58 |       "text/html": [
 59 |        "<div>\n",
 60 |        "<style scoped>\n",
 61 |        "    .dataframe tbody tr th:only-of-type {\n",
 62 |        "        vertical-align: middle;\n",
 63 |        "    }\n",
 64 |        "\n",
 65 |        "    .dataframe tbody tr th {\n",
 66 |        "        vertical-align: top;\n",
 67 |        "    }\n",
 68 |        "\n",
 69 |        "    .dataframe thead th {\n",
 70 |        "        text-align: right;\n",
 71 |        "    }\n",
 72 |        "</style>\n",
 73 |        "<table border=\"1\" class=\"dataframe\">\n",
 74 |        "  <thead>\n",
 75 |        "    <tr style=\"text-align: right;\">\n",
 76 |        "      <th></th>\n",
 77 |        "      <th>label</th>\n",
 78 |        "      <th>sentence</th>\n",
 79 |        "    </tr>\n",
 80 |        "  </thead>\n",
 81 |        "  <tbody>\n",
 82 |        "    <tr>\n",
 83 |        "      <th>0</th>\n",
 84 |        "      <td>RECOMMEND_PRODUCT</td>\n",
 85 |        "      <td>I am confused about what to buy since there ar...</td>\n",
 86 |        "    </tr>\n",
 87 |        "    <tr>\n",
 88 |        "      <th>1</th>\n",
 89 |        "      <td>RECOMMEND_PRODUCT</td>\n",
 90 |        "      <td>I have been trying to maintain a healthy lifes...</td>\n",
 91 |        "    </tr>\n",
 92 |        "    <tr>\n",
 93 |        "      <th>2</th>\n",
 94 |        "      <td>RECOMMEND_PRODUCT</td>\n",
 95 |        "      <td>Can you help me with building an athletic body...</td>\n",
 96 |        "    </tr>\n",
 97 |        "    <tr>\n",
 98 |        "      <th>3</th>\n",
 99 |        "      <td>RECOMMEND_PRODUCT</td>\n",
100 |        "      <td>I need some hair care products since I have be...</td>\n",
101 |        "    </tr>\n",
102 |        "    <tr>\n",
103 |        "      <th>4</th>\n",
104 |        "      <td>RECOMMEND_PRODUCT</td>\n",
105 |        "      <td>I'm here to browse some products because my fr...</td>\n",
106 |        "    </tr>\n",
107 |        "  </tbody>\n",
108 |        "</table>\n",
109 |        "</div>"
110 |       ],
111 |       "text/plain": [
112 |        "               label                                           sentence\n",
113 |        "0  RECOMMEND_PRODUCT  I am confused about what to buy since there ar...\n",
114 |        "1  RECOMMEND_PRODUCT  I have been trying to maintain a healthy lifes...\n",
115 |        "2  RECOMMEND_PRODUCT  Can you help me with building an athletic body...\n",
116 |        "3  RECOMMEND_PRODUCT  I need some hair care products since I have be...\n",
117 |        "4  RECOMMEND_PRODUCT  I'm here to browse some products because my fr..."
118 |       ]
119 |      },
120 |      "execution_count": 4,
121 |      "metadata": {},
122 |      "output_type": "execute_result"
123 |     }
124 |    ],
125 |    "source": [
126 |     "data = pd.read_csv(f'../../train/{bot_name}_train.csv')\n",
127 |     "data.head()"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 5,
133 |    "metadata": {},
134 |    "outputs": [
135 |     {
136 |      "data": {
137 |       "text/plain": [
138 |        "(413, 2)"
139 |       ]
140 |      },
141 |      "execution_count": 5,
142 |      "metadata": {},
143 |      "output_type": "execute_result"
144 |     }
145 |    ],
146 |    "source": [
147 |     "data.shape"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 6,
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "intents = [{\"name\": label, \"features\" :[]} for label in list(set(data['label'].to_list()))]\n",
157 |     "utterances = []\n",
158 |     "for index, row in data.iterrows():\n",
159 |     "    utterances.append({ \"text\": row['sentence'], \"intent\": row['label'], \"entities\": []})\n",
160 |     "luis_dict['intents'] = intents\n",
161 |     "luis_dict['utterances'] = utterances"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 7,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "with open(f'data/{bot_name}.json', 'w') as fp:\n",
171 |     "    json.dump(luis_dict, fp)"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": []
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "kernelspec": {
184 |    "display_name": "Python (py36)",
185 |    "language": "python",
186 |    "name": "py36"
187 |   },
188 |   "language_info": {
189 |    "codemirror_mode": {
190 |     "name": "ipython",
191 |     "version": 3
192 |    },
193 |    "file_extension": ".py",
194 |    "mimetype": "text/x-python",
195 |    "name": "python",
196 |    "nbconvert_exporter": "python",
197 |    "pygments_lexer": "ipython3",
198 |    "version": "3.6.8"
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 4
203 | }
204 | 


--------------------------------------------------------------------------------
/platforms/rasa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/rasa/__init__.py


--------------------------------------------------------------------------------
/platforms/rasa/actions.py:
--------------------------------------------------------------------------------
 1 | # This files contains your custom actions which can be used to run
 2 | # custom Python code.
 3 | #
 4 | # See this guide on how to implement these action:
 5 | # https://rasa.com/docs/rasa/core/actions/#custom-actions/
 6 | 
 7 | 
 8 | # This is a simple example for a custom action which utters "Hello World!"
 9 | 
10 | # from typing import Any, Text, Dict, List
11 | #
12 | # from rasa_sdk import Action, Tracker
13 | # from rasa_sdk.executor import CollectingDispatcher
14 | #
15 | #
16 | # class ActionHelloWorld(Action):
17 | #
18 | #     def name(self) -> Text:
19 | #         return "action_hello_world"
20 | #
21 | #     def run(self, dispatcher: CollectingDispatcher,
22 | #             tracker: Tracker,
23 | #             domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
24 | #
25 | #         dispatcher.utter_message(text="Hello World!")
26 | #
27 | #         return []
28 | 


--------------------------------------------------------------------------------
/platforms/rasa/config.yml:
--------------------------------------------------------------------------------
 1 | # Configuration for Rasa NLU.
 2 | # https://rasa.com/docs/rasa/nlu/components/
 3 | language: "en"
 4 | 
 5 | pipeline:
 6 |   - name: ConveRTTokenizer
 7 |   - name: ConveRTFeaturizer
 8 |   - name: RegexFeaturizer
 9 |   - name: LexicalSyntacticFeaturizer
10 |   - name: CountVectorsFeaturizer
11 |   - name: CountVectorsFeaturizer
12 |     analyzer: "char_wb"
13 |     min_ngram: 1
14 |     max_ngram: 4
15 |   - name: DIETClassifier
16 |     epochs: 100
17 |   - name: EntitySynonymMapper
18 |   - name: ResponseSelector
19 |     epochs: 100
20 | 
21 | # Configuration for Rasa Core.
22 | # https://rasa.com/docs/rasa/core/policies/
23 | policies:
24 |   - name: MemoizationPolicy
25 |   - name: TEDPolicy
26 |     max_history: 5
27 |     epochs: 100
28 |   - name: MappingPolicy
29 | 


--------------------------------------------------------------------------------
/platforms/rasa/credentials.yml:
--------------------------------------------------------------------------------
 1 | # This file contains the credentials for the voice & chat platforms
 2 | # which your bot is using.
 3 | # https://rasa.com/docs/rasa/user-guide/messaging-and-voice-channels/
 4 | 
 5 | rest:
 6 | #  # you don't need to provide anything here - this channel doesn't
 7 | #  # require any credentials
 8 | 
 9 | 
10 | #facebook:
11 | #  verify: "<verify>"
12 | #  secret: "<your secret>"
13 | #  page-access-token: "<your page access token>"
14 | 
15 | #slack:
16 | #  slack_token: "<your slack token>"
17 | #  slack_channel: "<the slack channel>"
18 | 
19 | #socketio:
20 | #  user_message_evt: <event name for user message>
21 | #  bot_message_evt: <event name for but messages>
22 | #  session_persistence: <true/false>
23 | 
24 | #mattermost:
25 | #  url: "https://<mattermost instance>/api/v4"
26 | #  token: "<bot token>"
27 | #  webhook_url: "<callback URL>"
28 | 
29 | # This entry is needed if you are using Rasa X. The entry represents credentials 
30 | # for the Rasa X "channel", i.e. Talk to your bot and Share with guest testers.
31 | rasa:
32 |   url: "http://localhost:5002/api"
33 | 


--------------------------------------------------------------------------------
/platforms/rasa/domain.yml:
--------------------------------------------------------------------------------
 1 | intents:
 2 |   - greet
 3 |   - goodbye
 4 |   - affirm
 5 |   - deny
 6 |   - mood_great
 7 |   - mood_unhappy
 8 |   - bot_challenge
 9 | 
10 | responses:
11 |   utter_greet:
12 |   - text: "Hey! How are you?"
13 | 
14 |   utter_cheer_up:
15 |   - text: "Here is something to cheer you up:"
16 |     image: "https://i.imgur.com/nGF1K8f.jpg"
17 | 
18 |   utter_did_that_help:
19 |   - text: "Did that help you?"
20 | 
21 |   utter_happy:
22 |   - text: "Great, carry on!"
23 | 
24 |   utter_goodbye:
25 |   - text: "Bye"
26 | 
27 |   utter_iamabot:
28 |   - text: "I am a bot, powered by Rasa."
29 | 
30 | session_config:
31 |   session_expiration_time: 60
32 |   carry_over_slots_to_new_session: true
33 | 


--------------------------------------------------------------------------------
/platforms/rasa/endpoints.yml:
--------------------------------------------------------------------------------
 1 | # This file contains the different endpoints your bot can use.
 2 | 
 3 | # Server where the models are pulled from.
 4 | # https://rasa.com/docs/rasa/user-guide/configuring-http-api/#fetching-models-from-a-server/
 5 | 
 6 | #models:
 7 | #  url: http://my-server.com/models/default_core@latest
 8 | #  wait_time_between_pulls:  10   # [optional](default: 100)
 9 | 
10 | # Server which runs your custom actions.
11 | # https://rasa.com/docs/rasa/core/actions/#custom-actions/
12 | 
13 | #action_endpoint:
14 | #  url: "http://localhost:5055/webhook"
15 | 
16 | # Tracker store which is used to store the conversations.
17 | # By default the conversations are stored in memory.
18 | # https://rasa.com/docs/rasa/api/tracker-stores/
19 | 
20 | #tracker_store:
21 | #    type: redis
22 | #    url: <host of the redis instance, e.g. localhost>
23 | #    port: <port of your redis instance, usually 6379>
24 | #    db: <number of your database within redis, e.g. 0>
25 | #    password: <password used for authentication>
26 | #    use_ssl: <whether or not the communication is encrypted, default false>
27 | 
28 | #tracker_store:
29 | #    type: mongod
30 | #    url: <url to your mongo instance, e.g. mongodb://localhost:27017>
31 | #    db: <name of the db within your mongo instance, e.g. rasa>
32 | #    username: <username used for authentication>
33 | #    password: <password used for authentication>
34 | 
35 | # Event broker which all conversation events should be streamed to.
36 | # https://rasa.com/docs/rasa/api/event-brokers/
37 | 
38 | #event_broker:
39 | #  url: localhost
40 | #  username: username
41 | #  password: password
42 | #  queue: queue
43 | 


--------------------------------------------------------------------------------
/platforms/rasa/generate_preds.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "import json\n",
 11 |     "import requests"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "bot_name = 'curekart'"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 3,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "# MAKE SURE RASA SERVER OF {bot_name} IS UP ON  http://localhost:5005/"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 4,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "def predict_node_name(text):\n",
 39 |     "    data = {\"text\": text}\n",
 40 |     "    url = 'http://localhost:5005/model/parse'\n",
 41 |     "    response = requests.post(url, data=json.dumps(data))\n",
 42 |     "    predicted_node = response.json()['intent']['name']\n",
 43 |     "    predicted_node_score = response.json()['intent_ranking'][0]['confidence']\n",
 44 |     "    return predicted_node, predicted_node_score"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 5,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "data": {
 54 |       "text/html": [
 55 |        "<div>\n",
 56 |        "<style scoped>\n",
 57 |        "    .dataframe tbody tr th:only-of-type {\n",
 58 |        "        vertical-align: middle;\n",
 59 |        "    }\n",
 60 |        "\n",
 61 |        "    .dataframe tbody tr th {\n",
 62 |        "        vertical-align: top;\n",
 63 |        "    }\n",
 64 |        "\n",
 65 |        "    .dataframe thead th {\n",
 66 |        "        text-align: right;\n",
 67 |        "    }\n",
 68 |        "</style>\n",
 69 |        "<table border=\"1\" class=\"dataframe\">\n",
 70 |        "  <thead>\n",
 71 |        "    <tr style=\"text-align: right;\">\n",
 72 |        "      <th></th>\n",
 73 |        "      <th>sentence</th>\n",
 74 |        "      <th>label</th>\n",
 75 |        "    </tr>\n",
 76 |        "  </thead>\n",
 77 |        "  <tbody>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>0</th>\n",
 80 |        "      <td>Order my product</td>\n",
 81 |        "      <td>ORDER_TAKING</td>\n",
 82 |        "    </tr>\n",
 83 |        "    <tr>\n",
 84 |        "      <th>1</th>\n",
 85 |        "      <td>Hy</td>\n",
 86 |        "      <td>NO_NODES_DETECTED</td>\n",
 87 |        "    </tr>\n",
 88 |        "    <tr>\n",
 89 |        "      <th>2</th>\n",
 90 |        "      <td>I want to order Wottle sensitive handwash refill</td>\n",
 91 |        "      <td>RECOMMEND_PRODUCT</td>\n",
 92 |        "    </tr>\n",
 93 |        "    <tr>\n",
 94 |        "      <th>3</th>\n",
 95 |        "      <td>have u started C D provision for pin code 702164</td>\n",
 96 |        "      <td>CHECK_PINCODE</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>4</th>\n",
100 |        "      <td>How do I know it will deliver in my place</td>\n",
101 |        "      <td>NO_NODES_DETECTED</td>\n",
102 |        "    </tr>\n",
103 |        "  </tbody>\n",
104 |        "</table>\n",
105 |        "</div>"
106 |       ],
107 |       "text/plain": [
108 |        "                                           sentence              label\n",
109 |        "0                                  Order my product       ORDER_TAKING\n",
110 |        "1                                                Hy  NO_NODES_DETECTED\n",
111 |        "2  I want to order Wottle sensitive handwash refill  RECOMMEND_PRODUCT\n",
112 |        "3  have u started C D provision for pin code 702164      CHECK_PINCODE\n",
113 |        "4         How do I know it will deliver in my place  NO_NODES_DETECTED"
114 |       ]
115 |      },
116 |      "execution_count": 5,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "test_file_name = bot_name if '_subset' not in bot_name else bot_name.replace('_subset', '')\n",
123 |     "df_test = pd.read_csv(f'../../test/{test_file_name}_test.csv')\n",
124 |     "df_test.head()"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 6,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "data": {
134 |       "text/plain": [
135 |        "(991, 2)"
136 |       ]
137 |      },
138 |      "execution_count": 6,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "df_test.shape"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 7,
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "data": {
154 |       "text/plain": [
155 |        "('ORDER_QUERY', 0.31236547231674194)"
156 |       ]
157 |      },
158 |      "execution_count": 7,
159 |      "metadata": {},
160 |      "output_type": "execute_result"
161 |     }
162 |    ],
163 |    "source": [
164 |     "predict_node_name('Order my product')"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 8,
170 |    "metadata": {},
171 |    "outputs": [
172 |     {
173 |      "data": {
174 |       "text/html": [
175 |        "<div>\n",
176 |        "<style scoped>\n",
177 |        "    .dataframe tbody tr th:only-of-type {\n",
178 |        "        vertical-align: middle;\n",
179 |        "    }\n",
180 |        "\n",
181 |        "    .dataframe tbody tr th {\n",
182 |        "        vertical-align: top;\n",
183 |        "    }\n",
184 |        "\n",
185 |        "    .dataframe thead th {\n",
186 |        "        text-align: right;\n",
187 |        "    }\n",
188 |        "</style>\n",
189 |        "<table border=\"1\" class=\"dataframe\">\n",
190 |        "  <thead>\n",
191 |        "    <tr style=\"text-align: right;\">\n",
192 |        "      <th></th>\n",
193 |        "      <th>sentence</th>\n",
194 |        "      <th>label</th>\n",
195 |        "      <th>predicted_node</th>\n",
196 |        "      <th>predicted_node_score</th>\n",
197 |        "    </tr>\n",
198 |        "  </thead>\n",
199 |        "  <tbody>\n",
200 |        "    <tr>\n",
201 |        "      <th>0</th>\n",
202 |        "      <td>Order my product</td>\n",
203 |        "      <td>ORDER_TAKING</td>\n",
204 |        "      <td>ORDER_QUERY</td>\n",
205 |        "      <td>0.312366</td>\n",
206 |        "    </tr>\n",
207 |        "    <tr>\n",
208 |        "      <th>1</th>\n",
209 |        "      <td>Hy</td>\n",
210 |        "      <td>NO_NODES_DETECTED</td>\n",
211 |        "      <td>RECOMMEND_PRODUCT</td>\n",
212 |        "      <td>0.866968</td>\n",
213 |        "    </tr>\n",
214 |        "    <tr>\n",
215 |        "      <th>2</th>\n",
216 |        "      <td>I want to order Wottle sensitive handwash refill</td>\n",
217 |        "      <td>RECOMMEND_PRODUCT</td>\n",
218 |        "      <td>RECOMMEND_PRODUCT</td>\n",
219 |        "      <td>0.452898</td>\n",
220 |        "    </tr>\n",
221 |        "    <tr>\n",
222 |        "      <th>3</th>\n",
223 |        "      <td>have u started C D provision for pin code 702164</td>\n",
224 |        "      <td>CHECK_PINCODE</td>\n",
225 |        "      <td>ORDER_STATUS</td>\n",
226 |        "      <td>0.885070</td>\n",
227 |        "    </tr>\n",
228 |        "    <tr>\n",
229 |        "      <th>4</th>\n",
230 |        "      <td>How do I know it will deliver in my place</td>\n",
231 |        "      <td>NO_NODES_DETECTED</td>\n",
232 |        "      <td>RESUME_DELIVERY</td>\n",
233 |        "      <td>0.942246</td>\n",
234 |        "    </tr>\n",
235 |        "  </tbody>\n",
236 |        "</table>\n",
237 |        "</div>"
238 |       ],
239 |       "text/plain": [
240 |        "                                           sentence              label  \\\n",
241 |        "0                                  Order my product       ORDER_TAKING   \n",
242 |        "1                                                Hy  NO_NODES_DETECTED   \n",
243 |        "2  I want to order Wottle sensitive handwash refill  RECOMMEND_PRODUCT   \n",
244 |        "3  have u started C D provision for pin code 702164      CHECK_PINCODE   \n",
245 |        "4         How do I know it will deliver in my place  NO_NODES_DETECTED   \n",
246 |        "\n",
247 |        "      predicted_node  predicted_node_score  \n",
248 |        "0        ORDER_QUERY              0.312366  \n",
249 |        "1  RECOMMEND_PRODUCT              0.866968  \n",
250 |        "2  RECOMMEND_PRODUCT              0.452898  \n",
251 |        "3       ORDER_STATUS              0.885070  \n",
252 |        "4    RESUME_DELIVERY              0.942246  "
253 |       ]
254 |      },
255 |      "execution_count": 8,
256 |      "metadata": {},
257 |      "output_type": "execute_result"
258 |     }
259 |    ],
260 |    "source": [
261 |     "pred_nodes = []\n",
262 |     "pred_scores = []\n",
263 |     "for index, row in df_test.iterrows():\n",
264 |     "    node_name, node_score = predict_node_name(row['sentence'])\n",
265 |     "    pred_nodes.append(node_name)\n",
266 |     "    pred_scores.append(node_score)\n",
267 |     "df_test['predicted_node'] = pred_nodes\n",
268 |     "df_test['predicted_node_score'] = pred_scores\n",
269 |     "df_test.head()"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 9,
275 |    "metadata": {},
276 |    "outputs": [],
277 |    "source": [
278 |     "df_test.to_csv(f'../../preds/rasa_{bot_name}.csv', index=False)"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": []
287 |   }
288 |  ],
289 |  "metadata": {
290 |   "kernelspec": {
291 |    "display_name": "Python (py36)",
292 |    "language": "python",
293 |    "name": "py36"
294 |   },
295 |   "language_info": {
296 |    "codemirror_mode": {
297 |     "name": "ipython",
298 |     "version": 3
299 |    },
300 |    "file_extension": ".py",
301 |    "mimetype": "text/x-python",
302 |    "name": "python",
303 |    "nbconvert_exporter": "python",
304 |    "pygments_lexer": "ipython3",
305 |    "version": "3.6.8"
306 |   }
307 |  },
308 |  "nbformat": 4,
309 |  "nbformat_minor": 4
310 | }
311 | 


--------------------------------------------------------------------------------
/platforms/rasa/tests/conversation_tests.md:
--------------------------------------------------------------------------------
 1 | #### This file contains tests to evaluate that your bot behaves as expected.
 2 | #### If you want to learn more, please see the docs: https://rasa.com/docs/rasa/user-guide/testing-your-assistant/
 3 | 
 4 | ## happy path 1
 5 | * greet: hello there!
 6 |   - utter_greet
 7 | * mood_great: amazing
 8 |   - utter_happy
 9 | 
10 | ## happy path 2
11 | * greet: hello there!
12 |   - utter_greet
13 | * mood_great: amazing
14 |   - utter_happy
15 | * goodbye: bye-bye!
16 |   - utter_goodbye
17 | 
18 | ## sad path 1
19 | * greet: hello
20 |   - utter_greet
21 | * mood_unhappy: not good
22 |   - utter_cheer_up
23 |   - utter_did_that_help
24 | * affirm: yes
25 |   - utter_happy
26 | 
27 | ## sad path 2
28 | * greet: hello
29 |   - utter_greet
30 | * mood_unhappy: not good
31 |   - utter_cheer_up
32 |   - utter_did_that_help
33 | * deny: not really
34 |   - utter_goodbye
35 | 
36 | ## sad path 3
37 | * greet: hi
38 |   - utter_greet
39 | * mood_unhappy: very terrible
40 |   - utter_cheer_up
41 |   - utter_did_that_help
42 | * deny: no
43 |   - utter_goodbye
44 | 
45 | ## say goodbye
46 | * goodbye: bye-bye!
47 |   - utter_goodbye
48 | 
49 | ## bot challenge
50 | * bot_challenge: are you a bot?
51 |   - utter_iamabot
52 | 


--------------------------------------------------------------------------------
/platforms/rasa/training_data_conversion.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import json\n",
 10 |     "import pandas as pd"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "bot_name = 'curekart'"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 3,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "rasa_data_template = {\n",
 29 |     "  \"rasa_nlu_data\": {\n",
 30 |     "    \"common_examples\": None,\n",
 31 |     "    \"regex_features\": [],\n",
 32 |     "    \"lookup_tables\": [],\n",
 33 |     "    \"entity_synonyms\": []\n",
 34 |     "  }\n",
 35 |     "}"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 4,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "data": {
 45 |       "text/html": [
 46 |        "<div>\n",
 47 |        "<style scoped>\n",
 48 |        "    .dataframe tbody tr th:only-of-type {\n",
 49 |        "        vertical-align: middle;\n",
 50 |        "    }\n",
 51 |        "\n",
 52 |        "    .dataframe tbody tr th {\n",
 53 |        "        vertical-align: top;\n",
 54 |        "    }\n",
 55 |        "\n",
 56 |        "    .dataframe thead th {\n",
 57 |        "        text-align: right;\n",
 58 |        "    }\n",
 59 |        "</style>\n",
 60 |        "<table border=\"1\" class=\"dataframe\">\n",
 61 |        "  <thead>\n",
 62 |        "    <tr style=\"text-align: right;\">\n",
 63 |        "      <th></th>\n",
 64 |        "      <th>label</th>\n",
 65 |        "      <th>sentence</th>\n",
 66 |        "    </tr>\n",
 67 |        "  </thead>\n",
 68 |        "  <tbody>\n",
 69 |        "    <tr>\n",
 70 |        "      <th>0</th>\n",
 71 |        "      <td>CALL_CENTER</td>\n",
 72 |        "      <td>What time is your call centre operational duri...</td>\n",
 73 |        "    </tr>\n",
 74 |        "    <tr>\n",
 75 |        "      <th>1</th>\n",
 76 |        "      <td>CALL_CENTER</td>\n",
 77 |        "      <td>is the call center still functioning during lo...</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>2</th>\n",
 81 |        "      <td>CALL_CENTER</td>\n",
 82 |        "      <td>what are the working hours of your call center...</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>3</th>\n",
 86 |        "      <td>CALL_CENTER</td>\n",
 87 |        "      <td>does covid affext your call center time</td>\n",
 88 |        "    </tr>\n",
 89 |        "    <tr>\n",
 90 |        "      <th>4</th>\n",
 91 |        "      <td>CALL_CENTER</td>\n",
 92 |        "      <td>is your call center working during covid?</td>\n",
 93 |        "    </tr>\n",
 94 |        "  </tbody>\n",
 95 |        "</table>\n",
 96 |        "</div>"
 97 |       ],
 98 |       "text/plain": [
 99 |        "         label                                           sentence\n",
100 |        "0  CALL_CENTER  What time is your call centre operational duri...\n",
101 |        "1  CALL_CENTER  is the call center still functioning during lo...\n",
102 |        "2  CALL_CENTER  what are the working hours of your call center...\n",
103 |        "3  CALL_CENTER            does covid affext your call center time\n",
104 |        "4  CALL_CENTER          is your call center working during covid?"
105 |       ]
106 |      },
107 |      "execution_count": 4,
108 |      "metadata": {},
109 |      "output_type": "execute_result"
110 |     }
111 |    ],
112 |    "source": [
113 |     "data = pd.read_csv(f'../../train/{bot_name}_train.csv')\n",
114 |     "data.head()"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 5,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "data": {
124 |       "text/plain": [
125 |        "(600, 2)"
126 |       ]
127 |      },
128 |      "execution_count": 5,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "data.shape"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 6,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "examples = []"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 7,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "for index, row in data.iterrows():\n",
153 |     "    examples.append({\n",
154 |     "        'intent': row['label'],\n",
155 |     "        'text': row['sentence']\n",
156 |     "    })"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 8,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "rasa_data_template['rasa_nlu_data']['common_examples'] = examples"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 9,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "with open(f'data/{bot_name}.json', 'w') as fp:\n",
175 |     "    json.dump(rasa_data_template, fp)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": []
184 |   }
185 |  ],
186 |  "metadata": {
187 |   "kernelspec": {
188 |    "display_name": "Python (py36)",
189 |    "language": "python",
190 |    "name": "py36"
191 |   },
192 |   "language_info": {
193 |    "codemirror_mode": {
194 |     "name": "ipython",
195 |     "version": 3
196 |    },
197 |    "file_extension": ".py",
198 |    "mimetype": "text/x-python",
199 |    "name": "python",
200 |    "nbconvert_exporter": "python",
201 |    "pygments_lexer": "ipython3",
202 |    "version": "3.6.8"
203 |   }
204 |  },
205 |  "nbformat": 4,
206 |  "nbformat_minor": 4
207 | }
208 | 


--------------------------------------------------------------------------------
/results/bert_curekart.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.38143289606458125,0.2777203394658263,0.8362831858407079,0.0,0.3914278743423574
 3 | 0.2,0.39253279515640765,0.30127499197851487,0.8362831858407079,0.02040816326530612,0.397844106254895
 4 | 0.3,0.40968718466195764,0.336381914722132,0.834070796460177,0.05380333951762523,0.4069243322470297
 5 | 0.4,0.48335015136226034,0.4643749179345553,0.834070796460177,0.18923933209647495,0.4579319049416409
 6 | 0.5,0.5216952573158425,0.5176570270763616,0.834070796460177,0.2597402597402597,0.4803538135930759
 7 | 0.6,0.5539858728557013,0.5589509843467156,0.831858407079646,0.3209647495361781,0.49917800035688836
 8 | 0.7,0.574167507568113,0.5819867165161616,0.827433628318584,0.36178107606679033,0.5073404790666131
 9 | 0.8,0.6094853683148335,0.6203545801533257,0.8185840707964602,0.43413729128014844,0.5275041116659444
10 | 0.9,0.6508577194752775,0.6630666755960122,0.8030973451327433,0.5231910946196661,0.5532011200360406
11 | 


--------------------------------------------------------------------------------
/results/bert_curekart_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.3753784056508577,0.26873428028394586,0.8230088495575221,0.0,0.3827216986955868
 3 | 0.2,0.38244197780020184,0.28385856420222894,0.8230088495575221,0.012987012987012988,0.3867371892968888
 4 | 0.3,0.4117053481331988,0.3421306793975774,0.8230088495575221,0.06679035250463822,0.4038261321081724
 5 | 0.4,0.4934409687184662,0.4799107169648507,0.8207964601769911,0.2189239332096475,0.4601885575577133
 6 | 0.5,0.5307769929364279,0.5315308808700412,0.8119469026548672,0.2949907235621521,0.47797497265635175
 7 | 0.6,0.5640766902119072,0.5734870968713671,0.8053097345132744,0.36178107606679033,0.49681857852806144
 8 | 0.7,0.5943491422805247,0.6069184518856748,0.8030973451327433,0.41929499072356213,0.5133875786446674
 9 | 0.8,0.6397578203834511,0.6526137924505135,0.7920353982300885,0.5120593692022264,0.5453064578171033
10 | 0.9,0.6770938446014128,0.6864213066902995,0.7809734513274337,0.5899814471243042,0.5695074884225043
11 | 


--------------------------------------------------------------------------------
/results/bert_powerplay11.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.16581892166836215,0.09458310530768466,0.5854545454545454,0.002824858757062147,0.22662854232741195
 3 | 0.2,0.21770091556459817,0.19463095423585083,0.5818181818181818,0.07627118644067797,0.2390862177147459
 4 | 0.3,0.31129196337741605,0.3411393950048982,0.5781818181818181,0.2076271186440678,0.2668898498371918
 5 | 0.4,0.37843336724313326,0.42794405017038767,0.5636363636363636,0.3064971751412429,0.28144422084283677
 6 | 0.5,0.4516785350966429,0.5062601790074037,0.5454545454545454,0.4152542372881356,0.2983788806071609
 7 | 0.6,0.5188199389623601,0.568649768741218,0.5272727272727272,0.5155367231638418,0.3165516468104266
 8 | 0.7,0.5676500508646999,0.6080192664664281,0.5018181818181818,0.5932203389830508,0.32863737204394544
 9 | 0.8,0.6063072227873856,0.6331582177286087,0.48,0.655367231638418,0.32758929661892505
10 | 0.9,0.6653102746693794,0.6712780046426039,0.43636363636363634,0.7542372881355932,0.347322182251494
11 | 


--------------------------------------------------------------------------------
/results/bert_powerplay11_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.1617497456765005,0.11337011172449207,0.5309090909090909,0.018361581920903956,0.2063998849903869
 3 | 0.2,0.2970498474059003,0.33631881380821965,0.5236363636363637,0.20903954802259886,0.24544955242839175
 4 | 0.3,0.4252288911495422,0.48976321608883183,0.48727272727272725,0.4011299435028249,0.2677829569621612
 5 | 0.4,0.5178026449643948,0.5740094067056727,0.4509090909090909,0.5437853107344632,0.28113965587425205
 6 | 0.5,0.5859613428280773,0.6223629071205065,0.41818181818181815,0.6511299435028248,0.286288916282711
 7 | 0.6,0.624618514750763,0.6433052440106094,0.3890909090909091,0.7161016949152542,0.2887958272661481
 8 | 0.7,0.6602238046795524,0.6637153181331166,0.36,0.7768361581920904,0.29851914737268237
 9 | 0.8,0.698880976602238,0.6786901502595527,0.3018181818181818,0.8531073446327684,0.30169293594539803
10 | 0.9,0.728382502543235,0.6765203325400962,0.21454545454545454,0.9279661016949152,0.2854049912230014
11 | 


--------------------------------------------------------------------------------
/results/bert_sofmattress.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.4282115869017632,0.3327023465634267,0.7359307359307359,0.0,0.457123093925187
 3 | 0.2,0.43828715365239296,0.3540585844388577,0.7359307359307359,0.024096385542168676,0.4626917219806727
 4 | 0.3,0.45843828715365237,0.3989092079935,0.7316017316017316,0.0783132530120482,0.47205584437489384
 5 | 0.4,0.5088161209068011,0.48236500416255096,0.7316017316017316,0.19879518072289157,0.5062789592792611
 6 | 0.5,0.5541561712846348,0.5480458219448876,0.7316017316017316,0.3072289156626506,0.5379162557311129
 7 | 0.6,0.6146095717884131,0.6232869733224982,0.7229437229437229,0.463855421686747,0.5787346390816308
 8 | 0.7,0.6372795969773299,0.6450564416398155,0.7056277056277056,0.5421686746987951,0.5897178388469266
 9 | 0.8,0.672544080604534,0.6798829189224005,0.6926406926406926,0.6445783132530121,0.6147775061582121
10 | 0.9,0.690176322418136,0.690025982806961,0.6363636363636364,0.7650602409638554,0.6167910208651333
11 | 


--------------------------------------------------------------------------------
/results/bert_sofmattress_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.33249370277078083,0.2629308626158718,0.5714285714285714,0.0,0.3500891574145308
 3 | 0.2,0.3425692695214106,0.2832268690766706,0.5714285714285714,0.024096385542168676,0.35685082873850854
 4 | 0.3,0.40302267002518893,0.39136641443902487,0.5714285714285714,0.1686746987951807,0.39459601861278215
 5 | 0.4,0.44080604534005036,0.44314397484121,0.5627705627705628,0.2710843373493976,0.4060595816100176
 6 | 0.5,0.5012594458438288,0.5080367036869831,0.5541125541125541,0.42771084337349397,0.43449046213243375
 7 | 0.6,0.5667506297229219,0.5694958962020813,0.5497835497835498,0.5903614457831325,0.4838865511383501
 8 | 0.7,0.5869017632241813,0.5821474403837933,0.5151515151515151,0.6867469879518072,0.48828547382614906
 9 | 0.8,0.5994962216624685,0.5825208162114784,0.48484848484848486,0.7590361445783133,0.4908155382461614
10 | 0.9,0.6120906801007556,0.5757913259833879,0.43722943722943725,0.8554216867469879,0.4918195897349411
11 | 


--------------------------------------------------------------------------------
/results/dialogflow_curekart.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.6417759838546923,0.6693030807681715,0.75,0.5510204081632653,0.5475730047883745
 3 | 0.2,0.6417759838546923,0.6693030807681715,0.75,0.5510204081632653,0.5475730047883745
 4 | 0.3,0.6508577194752775,0.6783643590281482,0.745575221238938,0.5714285714285714,0.5540730516224459
 5 | 0.4,0.6821392532795156,0.7048752250496552,0.7234513274336283,0.647495361781076,0.5689670963713872
 6 | 0.5,0.7114026236125126,0.7249213686064777,0.668141592920354,0.7476808905380334,0.5779492515439503
 7 | 0.6,0.7315842583249244,0.7277228793178449,0.5730088495575221,0.8645640074211502,0.5782589504099579
 8 | 0.7,0.722502522704339,0.6912920440176313,0.45353982300884954,0.948051948051948,0.5533669228890408
 9 | 0.8,0.6528758829465187,0.5718238662329262,0.24778761061946902,0.9925788497217068,0.4356136748918091
10 | 0.9,0.5893037336024218,0.46757782989434266,0.10176991150442478,0.9981447124304267,0.2805046000382007
11 | 


--------------------------------------------------------------------------------
/results/dialogflow_curekart_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.6357214934409687,0.6644852572049998,0.7123893805309734,0.5714285714285714,0.5308419346603385
 3 | 0.2,0.6357214934409687,0.6644852572049998,0.7123893805309734,0.5714285714285714,0.5308419346603385
 4 | 0.3,0.644803229061554,0.6730954682965349,0.7035398230088495,0.5955473098330241,0.5356197263402213
 5 | 0.4,0.686175580221998,0.7065621324418394,0.6747787610619469,0.6957328385899815,0.5586030288583995
 6 | 0.5,0.7093844601412714,0.7178992921423805,0.6106194690265486,0.7922077922077922,0.5620501471721884
 7 | 0.6,0.7184661957618567,0.7036159200150304,0.5110619469026548,0.8923933209647495,0.5516289447691117
 8 | 0.7,0.6841574167507568,0.6317791384139099,0.35176991150442477,0.9628942486085343,0.4849493608742825
 9 | 0.8,0.6226034308779012,0.5199548440744413,0.17920353982300885,0.9944341372912802,0.36706442812074147
10 | 0.9,0.5671039354187689,0.42794627592006745,0.05088495575221239,1.0,0.20027480364663677
11 | 


--------------------------------------------------------------------------------
/results/dialogflow_powerplay11.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.34994913530010174,0.39190881618283185,0.5963636363636363,0.2542372881355932,0.3026644216431138
 3 | 0.2,0.34994913530010174,0.39190881618283185,0.5963636363636363,0.2542372881355932,0.3026644216431138
 4 | 0.3,0.35910478128179046,0.4039036213694102,0.5963636363636363,0.2669491525423729,0.30716042960113416
 5 | 0.4,0.40895218718209564,0.46636724373089034,0.5818181818181818,0.3418079096045198,0.31849843696846203
 6 | 0.5,0.5167853509664293,0.577415171615548,0.5527272727272727,0.5028248587570622,0.35232998422313205
 7 | 0.6,0.6205493387589013,0.6511586832758449,0.4763636363636364,0.6765536723163842,0.35263103368370324
 8 | 0.7,0.6917599186164801,0.6774930217127464,0.33454545454545453,0.8305084745762712,0.3126850195378528
 9 | 0.8,0.7232960325534079,0.6553717504378747,0.14909090909090908,0.9463276836158192,0.22745806536718088
10 | 0.9,0.728382502543235,0.6229727120687398,0.04,0.9957627118644068,0.1568255894769402
11 | 


--------------------------------------------------------------------------------
/results/dialogflow_powerplay11_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.34486266531027465,0.3955649131856894,0.5563636363636364,0.2627118644067797,0.28612194336184515
 3 | 0.2,0.34486266531027465,0.3955649131856894,0.5563636363636364,0.2627118644067797,0.28612194336184515
 4 | 0.3,0.3570701932858596,0.41171748588212914,0.5527272727272727,0.2810734463276836,0.2900596423980539
 5 | 0.4,0.41810783316378436,0.48421464263544084,0.5345454545454545,0.3728813559322034,0.305992657613556
 6 | 0.5,0.5330620549338759,0.586584111496249,0.4909090909090909,0.5494350282485876,0.32979968484851874
 7 | 0.6,0.6429298067141404,0.6548585998711222,0.3927272727272727,0.7401129943502824,0.32008491782007265
 8 | 0.7,0.7029501525940997,0.6591335575393154,0.2290909090909091,0.8870056497175142,0.2628573782646314
 9 | 0.8,0.7263479145473042,0.6362517620665049,0.08363636363636363,0.9759887005649718,0.18572784895334576
10 | 0.9,0.7222787385554426,0.6087200715529609,0.01090909090909091,0.998587570621469,0.07938321993813488
11 | 


--------------------------------------------------------------------------------
/results/dialogflow_sofmattress.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.6498740554156172,0.6596368491168915,0.7316017316017316,0.536144578313253,0.6089435891175473
 3 | 0.2,0.6498740554156172,0.6596368491168915,0.7316017316017316,0.536144578313253,0.6089435891175473
 4 | 0.3,0.6599496221662469,0.6706643152884479,0.7272727272727273,0.5662650602409639,0.6161452947200737
 5 | 0.4,0.6876574307304786,0.6991714568570798,0.7186147186147186,0.6445783132530121,0.6375316099746389
 6 | 0.5,0.6952141057934509,0.6993933172704418,0.670995670995671,0.7289156626506024,0.6253234385397347
 7 | 0.6,0.7128463476070529,0.7036431692361467,0.6060606060606061,0.8614457831325302,0.6325405291419348
 8 | 0.7,0.6574307304785895,0.6198759132209378,0.4588744588744589,0.9337349397590361,0.5537662881862647
 9 | 0.8,0.5340050377833753,0.44520234648343804,0.2077922077922078,0.9879518072289156,0.3746576501866642
10 | 0.9,0.44836272040302266,0.3064359332591206,0.05194805194805195,1.0,0.1912720697455713
11 | 


--------------------------------------------------------------------------------
/results/dialogflow_sofmattress_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.5869017632241813,0.6043002974102839,0.6536796536796536,0.4939759036144578,0.5377594861327528
 3 | 0.2,0.5869017632241813,0.6043002974102839,0.6536796536796536,0.4939759036144578,0.5377594861327528
 4 | 0.3,0.5994962216624685,0.6166759030825154,0.6536796536796536,0.5240963855421686,0.5475288277977552
 5 | 0.4,0.6196473551637279,0.6341347392091125,0.6363636363636364,0.5963855421686747,0.5551008257904813
 6 | 0.5,0.6448362720403022,0.6436995136282042,0.5627705627705628,0.7590361445783133,0.5501185870880984
 7 | 0.6,0.6272040302267002,0.5980370724455657,0.42857142857142855,0.9036144578313253,0.5084391869901971
 8 | 0.7,0.5591939546599496,0.4797003360393145,0.2683982683982684,0.963855421686747,0.4052237963084955
 9 | 0.8,0.47858942065491183,0.36335513496681104,0.11688311688311688,0.9819277108433735,0.25583983648496217
10 | 0.9,0.4282115869017632,0.2668556988455543,0.017316017316017316,1.0,0.10959614750180845
11 | 


--------------------------------------------------------------------------------
/results/haptik_curekart.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.4308779011099899,0.38596188662657294,0.8030973451327433,0.11873840445269017,0.4129311873500288
 3 | 0.2,0.5893037336024218,0.6100177353773819,0.7787610619469026,0.43042671614100186,0.5065195660657139
 4 | 0.3,0.7073662966700303,0.7274093358615785,0.7278761061946902,0.6901669758812616,0.5886149912987301
 5 | 0.4,0.7255297679112008,0.7354370714668711,0.672566371681416,0.7699443413729128,0.5900770415779899
 6 | 0.5,0.7356205852674067,0.738516382285541,0.6393805309734514,0.8163265306122449,0.5929143365670386
 7 | 0.6,0.7477295660948536,0.7454891722124777,0.6039823008849557,0.8682745825602969,0.6026655022329167
 8 | 0.7,0.7416750756811302,0.7256424545915114,0.5398230088495575,0.9109461966604824,0.5847882508776726
 9 | 0.8,0.693239152371342,0.6498473099828318,0.37831858407079644,0.9573283858998145,0.49917255429706
10 | 0.9,0.6256306760847629,0.5286265770933399,0.1902654867256637,0.9907235621521335,0.3699512520116685
11 | 


--------------------------------------------------------------------------------
/results/haptik_curekart_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.4127144298688194,0.3483712897587261,0.7986725663716814,0.08905380333951762,0.39672624230371795
 3 | 0.2,0.5469223007063572,0.5527488336780364,0.7809734513274337,0.35064935064935066,0.47737375406876575
 4 | 0.3,0.615539858728557,0.633122730084551,0.7411504424778761,0.5102040816326531,0.5054029220052958
 5 | 0.4,0.7083753784056509,0.720889395945921,0.7013274336283186,0.7142857142857143,0.5782016648924359
 6 | 0.5,0.739656912209889,0.7434613173842762,0.6703539823008849,0.7977736549165121,0.6039077415751544
 7 | 0.6,0.7356205852674067,0.7317150984950263,0.6017699115044248,0.8478664192949907,0.5825590315967132
 8 | 0.7,0.7447023208879919,0.7283624743769378,0.5398230088495575,0.9165120593692022,0.5898117449297346
 9 | 0.8,0.6831483350151363,0.6262606315395998,0.334070796460177,0.9758812615955473,0.4866391552274101
10 | 0.9,0.6004036326942482,0.48556878323957986,0.13274336283185842,0.9925788497217068,0.3091295392937593
11 | 


--------------------------------------------------------------------------------
/results/haptik_powerplay11.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.2634791454730417,0.23947977500824366,0.6654545454545454,0.10734463276836158,0.2861451954799664
 3 | 0.2,0.47914547304170907,0.5303094707781417,0.6545454545454545,0.4110169491525424,0.3717481358080229
 4 | 0.3,0.6164801627670397,0.657869511578882,0.6145454545454545,0.617231638418079,0.4246606743440914
 5 | 0.4,0.6724313326551373,0.6924174546699668,0.52,0.731638418079096,0.4162990412384738
 6 | 0.5,0.6968463886063072,0.6927014973969432,0.41818181818181815,0.8050847457627118,0.38275866167865685
 7 | 0.6,0.7090539165818922,0.6931356450002476,0.36727272727272725,0.8418079096045198,0.36171310214427965
 8 | 0.7,0.7263479145473042,0.6903458667142265,0.2872727272727273,0.8968926553672316,0.33052160648007983
 9 | 0.8,0.7416073245167853,0.6845081151776063,0.21454545454545454,0.9463276836158192,0.313490577230533
10 | 0.9,0.7273652085452695,0.6389699648264028,0.08,0.9788135593220338,0.18147366140292198
11 | 


--------------------------------------------------------------------------------
/results/haptik_powerplay11_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.25839267548321465,0.2539041201537312,0.5927272727272728,0.1285310734463277,0.2589451989613422
 3 | 0.2,0.5249237029501526,0.5797717163275461,0.5381818181818182,0.519774011299435,0.3499411970869179
 4 | 0.3,0.646998982706002,0.6652129639491713,0.4290909090909091,0.731638418079096,0.3523908681179462
 5 | 0.4,0.6978636826042727,0.6897866217628258,0.37454545454545457,0.8234463276836158,0.353035561598512
 6 | 0.5,0.7171922685656155,0.6874629475785807,0.2909090909090909,0.882768361581921,0.323382782619308
 7 | 0.6,0.7243133265513734,0.6759599697716699,0.21454545454545454,0.922316384180791,0.2834750823593769
 8 | 0.7,0.7344862665310274,0.6683039740136187,0.1709090909090909,0.9533898305084746,0.27227421438979416
 9 | 0.8,0.7365208545269583,0.6495595899194136,0.10545454545454545,0.981638418079096,0.23935251490404322
10 | 0.9,0.7243133265513734,0.6169229203805074,0.02909090909090909,0.9943502824858758,0.1205748726742888
11 | 


--------------------------------------------------------------------------------
/results/haptik_sofmattress.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.5088161209068011,0.4852612066932231,0.7229437229437229,0.21084337349397592,0.5086235739335777
 3 | 0.2,0.6171284634760705,0.6277105221754836,0.70995670995671,0.4879518072289157,0.577301101585455
 4 | 0.3,0.6599496221662469,0.6631911532048385,0.6536796536796536,0.6686746987951807,0.5876489726418147
 5 | 0.4,0.6599496221662469,0.6546533905227607,0.5714285714285714,0.7831325301204819,0.5662714661729004
 6 | 0.5,0.6523929471032746,0.6423603196563007,0.5324675324675324,0.8192771084337349,0.5496451534253737
 7 | 0.6,0.6473551637279596,0.6235443190103139,0.48484848484848486,0.8734939759036144,0.5380382274804033
 8 | 0.7,0.6146095717884131,0.576832466271812,0.4155844155844156,0.891566265060241,0.48896120356942585
 9 | 0.8,0.5869017632241813,0.5266436611764808,0.3246753246753247,0.9518072289156626,0.45081352366335453
10 | 0.9,0.48866498740554154,0.376810974037845,0.12554112554112554,0.9939759036144579,0.28868545245640687
11 | 


--------------------------------------------------------------------------------
/results/haptik_sofmattress_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.4609571788413098,0.45096457375594345,0.6406926406926406,0.21084337349397592,0.45335512570977937
 3 | 0.2,0.5642317380352645,0.5828012955855193,0.5887445887445888,0.5301204819277109,0.49591325222825966
 4 | 0.3,0.5793450881612091,0.5780365207906163,0.48484848484848486,0.7108433734939759,0.46860604999628325
 5 | 0.4,0.5919395465994962,0.5745311704147583,0.4458874458874459,0.7951807228915663,0.46786678404969667
 6 | 0.5,0.6020151133501259,0.5638019847351878,0.3939393939393939,0.891566265060241,0.4719265671294265
 7 | 0.6,0.5919395465994962,0.543623046678172,0.354978354978355,0.9216867469879518,0.4548641132965435
 8 | 0.7,0.5768261964735516,0.5171188331426371,0.2987012987012987,0.963855421686747,0.4338106577877485
 9 | 0.8,0.5239294710327456,0.4429667417958547,0.19913419913419914,0.9759036144578314,0.34820134456192214
10 | 0.9,0.4609571788413098,0.32549065235972047,0.0735930735930736,1.0,0.2269661352081343
11 | 


--------------------------------------------------------------------------------
/results/luis_curekart.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.4520686175580222,0.46146738088457,0.7256637168141593,0.22263450834879406,0.4005380109377977
 3 | 0.2,0.5802219979818365,0.6170061697077579,0.6991150442477876,0.4805194805194805,0.47653031805332025
 4 | 0.3,0.6337033299697276,0.6681462015554164,0.6415929203539823,0.62708719851577,0.4978934972990696
 5 | 0.4,0.6599394550958627,0.6807671697256525,0.577433628318584,0.7291280148423006,0.4929963911775212
 6 | 0.5,0.7023208879919274,0.7131409661083393,0.5376106194690266,0.8404452690166976,0.5359301731023312
 7 | 0.6,0.7103935418768921,0.7101188683186717,0.5088495575221239,0.8794063079777366,0.5385934445324991
 8 | 0.7,0.7093844601412714,0.6961165287568397,0.4668141592920354,0.9128014842300557,0.5287337256051646
 9 | 0.8,0.6972754793138244,0.6680534106767997,0.39823008849557523,0.948051948051948,0.5047539542079575
10 | 0.9,0.6518668012108981,0.5932475283103315,0.26548672566371684,0.9758812615955473,0.41997687434554737
11 | 


--------------------------------------------------------------------------------
/results/luis_curekart_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.5035317860746721,0.5201836972792051,0.7168141592920354,0.3246753246753247,0.4190697944500741
 3 | 0.2,0.6226034308779012,0.644386986815755,0.6526548672566371,0.5974025974025974,0.4746277223794737
 4 | 0.3,0.6589303733602422,0.6686864474137902,0.5907079646017699,0.7161410018552876,0.4842478941806614
 5 | 0.4,0.686175580221998,0.6830179988979004,0.5265486725663717,0.8200371057513914,0.4996450508983592
 6 | 0.5,0.7033299697275479,0.6893408154450895,0.47123893805309736,0.8979591836734694,0.5169967885853658
 7 | 0.6,0.7053481331987891,0.6826588122328711,0.4336283185840708,0.9332096474953617,0.5168028658065381
 8 | 0.7,0.6841574167507568,0.6475173117892167,0.36283185840707965,0.9536178107606679,0.4770886651801918
 9 | 0.8,0.6781029263370333,0.6274089842065953,0.32079646017699115,0.9777365491651205,0.46955337503955225
10 | 0.9,0.6387487386478304,0.5581202630086133,0.21902654867256638,0.9907235621521335,0.3941968849304019
11 | 


--------------------------------------------------------------------------------
/results/luis_powerplay11.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.5330620549338759,0.5992526506682163,0.48,0.5536723163841808,0.3353203174087024
 3 | 0.2,0.6408952187182095,0.6733466777252709,0.41454545454545455,0.7288135593220338,0.35886833679968055
 4 | 0.3,0.6795523906408952,0.6867843231081363,0.36363636363636365,0.8022598870056498,0.3538670872899427
 5 | 0.4,0.7009155645981688,0.6895231702778716,0.31272727272727274,0.8516949152542372,0.3410825635819119
 6 | 0.5,0.7171922685656155,0.6876970376602419,0.27636363636363637,0.8884180790960452,0.3264174681218831
 7 | 0.6,0.7151576805696847,0.6742611836665858,0.2290909090909091,0.903954802259887,0.2821349541214711
 8 | 0.7,0.7192268565615463,0.6695140228765639,0.2,0.9209039548022598,0.26638372270148986
 9 | 0.8,0.7243133265513734,0.6577880820529207,0.14545454545454545,0.9491525423728814,0.23403584637750863
10 | 0.9,0.7314343845371313,0.6457877199926221,0.09454545454545454,0.9788135593220338,0.2118993983537775
11 | 


--------------------------------------------------------------------------------
/results/luis_powerplay11_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.602238046795524,0.6489652908924319,0.44,0.6652542372881356,0.3493129242510959
 3 | 0.2,0.6785350966429298,0.6884322152181961,0.38545454545454544,0.7923728813559322,0.36443959593678765
 4 | 0.3,0.7121057985757884,0.6988097741885357,0.3381818181818182,0.8573446327683616,0.3641509922646042
 5 | 0.4,0.7253306205493387,0.6965591610819797,0.2872727272727273,0.8954802259887006,0.3479732789168995
 6 | 0.5,0.7202441505595117,0.6737287833585557,0.22181818181818183,0.9138418079096046,0.291862017703851
 7 | 0.6,0.7304170905391658,0.6715937089389293,0.19636363636363635,0.9378531073446328,0.28507886991542475
 8 | 0.7,0.7334689725330621,0.6585884333705816,0.14545454545454545,0.961864406779661,0.2559865327520182
 9 | 0.8,0.7334689725330621,0.6425099069619017,0.09090909090909091,0.9830508474576272,0.2192384862783402
10 | 0.9,0.7263479145473042,0.6239689485734526,0.04363636363636364,0.9915254237288136,0.14980967211811785
11 | 


--------------------------------------------------------------------------------
/results/luis_sofmattress.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.6599496221662469,0.6583511406238304,0.5930735930735931,0.7530120481927711,0.580661387850616
 3 | 0.2,0.6649874055415617,0.6481165167919319,0.5238095238095238,0.8614457831325302,0.5694742710422257
 4 | 0.3,0.6599496221662469,0.6309783853401376,0.47186147186147187,0.9216867469879518,0.5568475822263478
 5 | 0.4,0.6523929471032746,0.6162481775882876,0.4329004329004329,0.9578313253012049,0.5466956315650783
 6 | 0.5,0.6196473551637279,0.5737145815429269,0.37662337662337664,0.9578313253012049,0.49930551246201366
 7 | 0.6,0.5894206549118388,0.5269758841858341,0.31601731601731603,0.9698795180722891,0.45563924496546165
 8 | 0.7,0.5667506297229219,0.49174587864161706,0.2727272727272727,0.9759036144578314,0.4201578655531844
 9 | 0.8,0.5440806045340051,0.45627702776114243,0.22510822510822512,0.9879518072289156,0.38564698008973847
10 | 0.9,0.5088161209068011,0.399913951597676,0.15584415584415584,1.0,0.32910030494957465
11 | 


--------------------------------------------------------------------------------
/results/luis_sofmattress_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.5894206549118388,0.5868721604972246,0.4935064935064935,0.7228915662650602,0.49200567052325594
 3 | 0.2,0.5894206549118388,0.5624126892818334,0.39826839826839827,0.8554216867469879,0.4583002646125471
 4 | 0.3,0.5994962216624685,0.5530659814351441,0.35064935064935066,0.9457831325301205,0.467134202661144
 5 | 0.4,0.5617128463476071,0.4980472241466623,0.2683982683982684,0.9698795180722891,0.4087570327249752
 6 | 0.5,0.5440806045340051,0.46525596370760114,0.22943722943722944,0.9819277108433735,0.38164536737032145
 7 | 0.6,0.5365239294710328,0.4455633242012838,0.2077922077922078,0.9939759036144579,0.3715625578876327
 8 | 0.7,0.5163727959697733,0.41249751281068137,0.16883116883116883,1.0,0.34033496707440136
 9 | 0.8,0.4811083123425693,0.3547342029900023,0.10822510822510822,1.0,0.26789715303727896
10 | 0.9,0.4659949622166247,0.32592434738214054,0.08225108225108226,1.0,0.2311697054790726
11 | 


--------------------------------------------------------------------------------
/results/rasa_curekart.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.3834510595358224,0.28125242085213886,0.8407079646017699,0.0,0.39651377278996913
 3 | 0.2,0.38647830474268413,0.2879650521374422,0.8407079646017699,0.0055658627087198514,0.39849991095205295
 4 | 0.3,0.4177598385469223,0.34951458898695087,0.8407079646017699,0.06307977736549165,0.41835926466428414
 5 | 0.4,0.4591321897073663,0.4241729718132308,0.8362831858407079,0.14285714285714285,0.43921564816195047
 6 | 0.5,0.5146316851664985,0.5139404655873048,0.8185840707964602,0.2597402597402597,0.4650631495322916
 7 | 0.6,0.557013118062563,0.570384538783362,0.8075221238938053,0.3469387755102041,0.48891931036364183
 8 | 0.7,0.5893037336024218,0.6073736858221093,0.7898230088495575,0.42115027829313545,0.4992209609417083
 9 | 0.8,0.615539858728557,0.6316430358173197,0.7632743362831859,0.49165120593692024,0.5039480710117246
10 | 0.9,0.6407669021190716,0.6537639235616978,0.7389380530973452,0.5584415584415584,0.5108416672903147
11 | 


--------------------------------------------------------------------------------
/results/rasa_curekart_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.36730575176589303,0.28070361362834495,0.8053097345132744,0.0,0.37949396040632505
 3 | 0.2,0.3763874873864783,0.2996587611811544,0.8053097345132744,0.016697588126159554,0.38527320917267344
 4 | 0.3,0.42885973763874874,0.39669039127807143,0.8053097345132744,0.11317254174397032,0.41623445272563037
 5 | 0.4,0.4873864783047427,0.488306040548246,0.7986725663716814,0.22634508348794063,0.4457023194109826
 6 | 0.5,0.5479313824419778,0.5648611471668102,0.7809734513274337,0.3525046382189239,0.4714792055404184
 7 | 0.6,0.5973763874873865,0.6187987334530253,0.7588495575221239,0.4619666048237477,0.4941972946101975
 8 | 0.7,0.6337033299697276,0.6539156700960813,0.7367256637168141,0.5473098330241187,0.5114806681921403
 9 | 0.8,0.6770938446014128,0.6942037327893981,0.7190265486725663,0.6419294990723562,0.5443926493175281
10 | 0.9,0.7043390514631686,0.7122579441592488,0.6592920353982301,0.7421150278293135,0.5509541661974698
11 | 


--------------------------------------------------------------------------------
/results/rasa_powerplay11.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.1373346897253306,0.0740760502880126,0.4909090909090909,0.0,0.18889472272624616
 3 | 0.2,0.17395727365208546,0.14517846073743704,0.4909090909090909,0.05084745762711865,0.20209392565355375
 4 | 0.3,0.31739572736520855,0.3716455438220059,0.4618181818181818,0.2612994350282486,0.23384958998978336
 5 | 0.4,0.46083418107833163,0.5241212101471256,0.44363636363636366,0.4675141242937853,0.2688555152920154
 6 | 0.5,0.5645981688708036,0.6078808921124113,0.4254545454545455,0.6186440677966102,0.30147733123460646
 7 | 0.6,0.6388606307222787,0.6580630758375814,0.3927272727272727,0.7344632768361582,0.3224489737817428
 8 | 0.7,0.676500508646999,0.673736480201581,0.3381818181818182,0.807909604519774,0.31166032692222884
 9 | 0.8,0.7171922685656155,0.6961317099627315,0.31636363636363635,0.8728813559322034,0.3399775187743717
10 | 0.9,0.7314343845371313,0.68167001253479,0.21454545454545454,0.9322033898305084,0.2949058644577997
11 | 


--------------------------------------------------------------------------------
/results/rasa_powerplay11_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.10783316378433368,0.0563726671323755,0.38545454545454544,0.0,0.14538813844067086
 3 | 0.2,0.22380467955239064,0.25981415203931457,0.38181818181818183,0.16242937853107345,0.17654599691688472
 4 | 0.3,0.4303153611393693,0.49508352249020116,0.3563636363636364,0.4590395480225989,0.21778856977821506
 5 | 0.4,0.5401831129196337,0.5755376510078896,0.32,0.6257062146892656,0.22912569156377594
 6 | 0.5,0.6195320447609359,0.6209068207485843,0.2909090909090909,0.7471751412429378,0.24082020235515628
 7 | 0.6,0.6744659206510681,0.6498908482830816,0.24363636363636362,0.8418079096045198,0.24926272155574747
 8 | 0.7,0.7019328585961343,0.6566132900273823,0.2,0.8968926553672316,0.2474993544193783
 9 | 0.8,0.7232960325534079,0.6561342706688587,0.14909090909090908,0.9463276836158192,0.2361606564545725
10 | 0.9,0.7355035605289929,0.6527306716834309,0.11636363636363636,0.9759887005649718,0.2406068525264252
11 | 


--------------------------------------------------------------------------------
/results/rasa_sofmattress.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.40302267002518893,0.3173417229356829,0.6926406926406926,0.0,0.42952049480375937
 3 | 0.2,0.40554156171284633,0.3237280250807377,0.6926406926406926,0.006024096385542169,0.43019399717323903
 4 | 0.3,0.44080604534005036,0.40017562032308784,0.683982683982684,0.10240963855421686,0.44554324727480543
 5 | 0.4,0.5037783375314862,0.5025993126150237,0.670995670995671,0.2710843373493976,0.4797158451836411
 6 | 0.5,0.5390428211586902,0.5488575093593298,0.6406926406926406,0.39759036144578314,0.49298645472110264
 7 | 0.6,0.5818639798488665,0.5945734802728632,0.6103896103896104,0.5421686746987951,0.510679733490003
 8 | 0.7,0.6120906801007556,0.6164923202933457,0.5757575757575758,0.6626506024096386,0.5230432925400154
 9 | 0.8,0.6272040302267002,0.620617079478249,0.5367965367965368,0.7530120481927711,0.5241637786271162
10 | 0.9,0.6372795969773299,0.6090461164829147,0.4805194805194805,0.8554216867469879,0.5258050832127276
11 | 


--------------------------------------------------------------------------------
/results/rasa_sofmattress_subset.csv:
--------------------------------------------------------------------------------
 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
 2 | 0.1,0.327455919395466,0.24487420952343436,0.5627705627705628,0.0,0.34321170982570903
 3 | 0.2,0.3425692695214106,0.2771987206468406,0.5627705627705628,0.03614457831325301,0.35244337808363696
 4 | 0.3,0.4156171284634761,0.39895490341311163,0.5411255411255411,0.24096385542168675,0.37732642825938606
 5 | 0.4,0.4811083123425693,0.47511485998374675,0.5238095238095238,0.42168674698795183,0.407076025250971
 6 | 0.5,0.5289672544080605,0.5155202574924295,0.48917748917748916,0.5843373493975904,0.420109406500981
 7 | 0.6,0.5617128463476071,0.5385047638512739,0.4458874458874459,0.7228915662650602,0.4331244327993763
 8 | 0.7,0.5667506297229219,0.5290891893997581,0.3939393939393939,0.8072289156626506,0.42492922305773534
 9 | 0.8,0.5617128463476071,0.5099903608529387,0.33766233766233766,0.8734939759036144,0.40819956754895265
10 | 0.9,0.5415617128463476,0.4674266966864961,0.2510822510822511,0.9457831325301205,0.3740752005668206
11 | 


--------------------------------------------------------------------------------
/run_evaluation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "platform = 'haptik'\n",
 20 |     "bot_name = 'curekart_subset'"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 3,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "def get_inscope_accuracy(actual_node, pred_node):\n",
 30 |     "    total = 0\n",
 31 |     "    correct = 0\n",
 32 |     "    for act, pred in zip(actual_node, pred_node):\n",
 33 |     "        if act == 'NO_NODES_DETECTED':\n",
 34 |     "            continue\n",
 35 |     "        total += 1\n",
 36 |     "        if act == pred:\n",
 37 |     "            correct += 1\n",
 38 |     "    return correct/total\n",
 39 |     "\n",
 40 |     "def get_oos_recall(actual_node, pred_node):\n",
 41 |     "    total = 0\n",
 42 |     "    correct = 0\n",
 43 |     "    for act, pred in zip(actual_node, pred_node):\n",
 44 |     "        if act != 'NO_NODES_DETECTED':\n",
 45 |     "            continue\n",
 46 |     "        total += 1\n",
 47 |     "        if act == pred:\n",
 48 |     "            correct += 1\n",
 49 |     "    return correct/total\n",
 50 |     "\n",
 51 |     "def get_metrics(filepath, thresh):\n",
 52 |     "    df = pd.read_csv(filepath)\n",
 53 |     "    pred_node = []\n",
 54 |     "    for index, row in df.iterrows(): \n",
 55 |     "        if row['predicted_node_score'] < thresh:\n",
 56 |     "            pred_node.append('NO_NODES_DETECTED')\n",
 57 |     "        else:\n",
 58 |     "            pred_node.append(row['predicted_node'])\n",
 59 |     "    actual_node = list(df['label'])\n",
 60 |     "    accuracy = accuracy_score(actual_node, pred_node)\n",
 61 |     "    overall_f1 = f1_score(actual_node, pred_node, labels=list(set(actual_node)), average='weighted')\n",
 62 |     "    inscope_accuracy = get_inscope_accuracy(actual_node, pred_node)\n",
 63 |     "    oos_recall = get_oos_recall(actual_node, pred_node)\n",
 64 |     "    mcc = matthews_corrcoef(actual_node, pred_node)\n",
 65 |     "    return accuracy, overall_f1, inscope_accuracy, oos_recall, mcc"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 4,
 71 |    "metadata": {},
 72 |    "outputs": [
 73 |     {
 74 |      "data": {
 75 |       "text/html": [
 76 |        "<div>\n",
 77 |        "<style scoped>\n",
 78 |        "    .dataframe tbody tr th:only-of-type {\n",
 79 |        "        vertical-align: middle;\n",
 80 |        "    }\n",
 81 |        "\n",
 82 |        "    .dataframe tbody tr th {\n",
 83 |        "        vertical-align: top;\n",
 84 |        "    }\n",
 85 |        "\n",
 86 |        "    .dataframe thead th {\n",
 87 |        "        text-align: right;\n",
 88 |        "    }\n",
 89 |        "</style>\n",
 90 |        "<table border=\"1\" class=\"dataframe\">\n",
 91 |        "  <thead>\n",
 92 |        "    <tr style=\"text-align: right;\">\n",
 93 |        "      <th></th>\n",
 94 |        "      <th>Threshold</th>\n",
 95 |        "      <th>Accuracy</th>\n",
 96 |        "      <th>Weighted F1</th>\n",
 97 |        "      <th>Inscope Accuracy</th>\n",
 98 |        "      <th>OOS Recall</th>\n",
 99 |        "      <th>MCC</th>\n",
100 |        "    </tr>\n",
101 |        "  </thead>\n",
102 |        "  <tbody>\n",
103 |        "    <tr>\n",
104 |        "      <th>0</th>\n",
105 |        "      <td>0.1</td>\n",
106 |        "      <td>0.412714</td>\n",
107 |        "      <td>0.348371</td>\n",
108 |        "      <td>0.798673</td>\n",
109 |        "      <td>0.089054</td>\n",
110 |        "      <td>0.396726</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>1</th>\n",
114 |        "      <td>0.2</td>\n",
115 |        "      <td>0.546922</td>\n",
116 |        "      <td>0.552749</td>\n",
117 |        "      <td>0.780973</td>\n",
118 |        "      <td>0.350649</td>\n",
119 |        "      <td>0.477374</td>\n",
120 |        "    </tr>\n",
121 |        "    <tr>\n",
122 |        "      <th>2</th>\n",
123 |        "      <td>0.3</td>\n",
124 |        "      <td>0.615540</td>\n",
125 |        "      <td>0.633123</td>\n",
126 |        "      <td>0.741150</td>\n",
127 |        "      <td>0.510204</td>\n",
128 |        "      <td>0.505403</td>\n",
129 |        "    </tr>\n",
130 |        "    <tr>\n",
131 |        "      <th>3</th>\n",
132 |        "      <td>0.4</td>\n",
133 |        "      <td>0.708375</td>\n",
134 |        "      <td>0.720889</td>\n",
135 |        "      <td>0.701327</td>\n",
136 |        "      <td>0.714286</td>\n",
137 |        "      <td>0.578202</td>\n",
138 |        "    </tr>\n",
139 |        "    <tr>\n",
140 |        "      <th>4</th>\n",
141 |        "      <td>0.5</td>\n",
142 |        "      <td>0.739657</td>\n",
143 |        "      <td>0.743461</td>\n",
144 |        "      <td>0.670354</td>\n",
145 |        "      <td>0.797774</td>\n",
146 |        "      <td>0.603908</td>\n",
147 |        "    </tr>\n",
148 |        "    <tr>\n",
149 |        "      <th>5</th>\n",
150 |        "      <td>0.6</td>\n",
151 |        "      <td>0.735621</td>\n",
152 |        "      <td>0.731715</td>\n",
153 |        "      <td>0.601770</td>\n",
154 |        "      <td>0.847866</td>\n",
155 |        "      <td>0.582559</td>\n",
156 |        "    </tr>\n",
157 |        "    <tr>\n",
158 |        "      <th>6</th>\n",
159 |        "      <td>0.7</td>\n",
160 |        "      <td>0.744702</td>\n",
161 |        "      <td>0.728362</td>\n",
162 |        "      <td>0.539823</td>\n",
163 |        "      <td>0.916512</td>\n",
164 |        "      <td>0.589812</td>\n",
165 |        "    </tr>\n",
166 |        "    <tr>\n",
167 |        "      <th>7</th>\n",
168 |        "      <td>0.8</td>\n",
169 |        "      <td>0.683148</td>\n",
170 |        "      <td>0.626261</td>\n",
171 |        "      <td>0.334071</td>\n",
172 |        "      <td>0.975881</td>\n",
173 |        "      <td>0.486639</td>\n",
174 |        "    </tr>\n",
175 |        "    <tr>\n",
176 |        "      <th>8</th>\n",
177 |        "      <td>0.9</td>\n",
178 |        "      <td>0.600404</td>\n",
179 |        "      <td>0.485569</td>\n",
180 |        "      <td>0.132743</td>\n",
181 |        "      <td>0.992579</td>\n",
182 |        "      <td>0.309130</td>\n",
183 |        "    </tr>\n",
184 |        "  </tbody>\n",
185 |        "</table>\n",
186 |        "</div>"
187 |       ],
188 |       "text/plain": [
189 |        "   Threshold  Accuracy  Weighted F1  Inscope Accuracy  OOS Recall       MCC\n",
190 |        "0        0.1  0.412714     0.348371          0.798673    0.089054  0.396726\n",
191 |        "1        0.2  0.546922     0.552749          0.780973    0.350649  0.477374\n",
192 |        "2        0.3  0.615540     0.633123          0.741150    0.510204  0.505403\n",
193 |        "3        0.4  0.708375     0.720889          0.701327    0.714286  0.578202\n",
194 |        "4        0.5  0.739657     0.743461          0.670354    0.797774  0.603908\n",
195 |        "5        0.6  0.735621     0.731715          0.601770    0.847866  0.582559\n",
196 |        "6        0.7  0.744702     0.728362          0.539823    0.916512  0.589812\n",
197 |        "7        0.8  0.683148     0.626261          0.334071    0.975881  0.486639\n",
198 |        "8        0.9  0.600404     0.485569          0.132743    0.992579  0.309130"
199 |       ]
200 |      },
201 |      "execution_count": 4,
202 |      "metadata": {},
203 |      "output_type": "execute_result"
204 |     }
205 |    ],
206 |    "source": [
207 |     "thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]\n",
208 |     "accuracy_over_thresh = []\n",
209 |     "overall_f1_over_thresh = []\n",
210 |     "inscope_recall_over_thresh = []\n",
211 |     "oos_recall_over_thresh = []\n",
212 |     "mcc_over_thresh = []\n",
213 |     "for thresh in thresholds:\n",
214 |     "    accuracy, overall_f1, inscope_recall, oos_recall, mcc = get_metrics(f'preds/{platform}_{bot_name}.csv', thresh)\n",
215 |     "    accuracy_over_thresh.append(accuracy)\n",
216 |     "    overall_f1_over_thresh.append(overall_f1)\n",
217 |     "    inscope_recall_over_thresh.append(inscope_recall)\n",
218 |     "    oos_recall_over_thresh.append(oos_recall)\n",
219 |     "    mcc_over_thresh.append(mcc)\n",
220 |     "df_metrics = pd.DataFrame({'Threshold': thresholds,\n",
221 |     "                           'Accuracy': accuracy_over_thresh,\n",
222 |     "                           'Weighted F1': overall_f1_over_thresh,\n",
223 |     "                          'Inscope Accuracy': inscope_recall_over_thresh,\n",
224 |     "                          'OOS Recall': oos_recall_over_thresh,\n",
225 |     "                          'MCC': mcc_over_thresh})\n",
226 |     "df_metrics.head(10)"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": 5,
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "df_metrics.to_csv(f'results/{platform}_{bot_name}.csv', index=False)"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 6,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/plain": [
246 |        "{'IMMUNITY',\n",
247 |        " 'INTERNATIONAL_SHIPPING',\n",
248 |        " 'MODES_OF_PAYMENTS',\n",
249 |        " 'PORTAL_ISSUE',\n",
250 |        " 'REFER_EARN',\n",
251 |        " 'START_OVER',\n",
252 |        " 'WORK_FROM_HOME'}"
253 |       ]
254 |      },
255 |      "execution_count": 6,
256 |      "metadata": {},
257 |      "output_type": "execute_result"
258 |     }
259 |    ],
260 |    "source": [
261 |     "df_res = pd.read_csv(f'preds/{platform}_{bot_name}.csv')\n",
262 |     "set(df_res['predicted_node']) - set(df_res['label'])"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": null,
268 |    "metadata": {},
269 |    "outputs": [],
270 |    "source": []
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": null,
275 |    "metadata": {},
276 |    "outputs": [],
277 |    "source": []
278 |   }
279 |  ],
280 |  "metadata": {
281 |   "kernelspec": {
282 |    "display_name": "Python (py36)",
283 |    "language": "python",
284 |    "name": "py36"
285 |   },
286 |   "language_info": {
287 |    "codemirror_mode": {
288 |     "name": "ipython",
289 |     "version": 3
290 |    },
291 |    "file_extension": ".py",
292 |    "mimetype": "text/x-python",
293 |    "name": "python",
294 |    "nbconvert_exporter": "python",
295 |    "pygments_lexer": "ipython3",
296 |    "version": "3.6.8"
297 |   }
298 |  },
299 |  "nbformat": 4,
300 |  "nbformat_minor": 4
301 | }
302 | 


--------------------------------------------------------------------------------