├── .gitattributes ├── .github └── mergeable.yml ├── .gitignore ├── LICENSE.md ├── README.md ├── analysis └── plot_metrics_graph.ipynb ├── data_exploration ├── eda_curekart.ipynb ├── eda_powerplay11.ipynb ├── eda_sofmattress.ipynb └── stats.ipynb ├── dataset ├── v1 │ ├── test │ │ ├── curekart_test.csv │ │ ├── powerplay11_test.csv │ │ └── sofmattress_test.csv │ └── train │ │ ├── curekart_subset_train.csv │ │ ├── curekart_train.csv │ │ ├── powerplay11_subset_train.csv │ │ ├── powerplay11_train.csv │ │ ├── sofmattress_subset_train.csv │ │ └── sofmattress_train.csv └── v2 │ ├── test │ ├── curekart_test.csv │ ├── powerplay11_test.csv │ └── sofmattress_test.csv │ └── train │ ├── curekart_train.csv │ ├── powerplay11_train.csv │ └── sofmattress_train.csv ├── env-requirements.txt ├── platforms ├── bert │ ├── bert-bot-only-data-es.py │ ├── bert_models │ │ ├── curekart │ │ │ ├── config.json │ │ │ ├── eval_results.txt │ │ │ ├── model_args.json │ │ │ ├── predictions.csv │ │ │ ├── predictions.jsonl │ │ │ ├── special_tokens_map.json │ │ │ ├── tblogs │ │ │ │ └── events.out.tfevents.1597350081.haptik-ai-research-mum-ml-2-vm │ │ │ ├── tokenizer_config.json │ │ │ ├── training_args.bin │ │ │ ├── training_progress_scores.csv │ │ │ └── vocab.txt │ │ ├── curekart_subset │ │ │ ├── best_model │ │ │ │ ├── config.json │ │ │ │ ├── eval_results.txt │ │ │ │ ├── model_args.json │ │ │ │ ├── special_tokens_map.json │ │ │ │ ├── tokenizer_config.json │ │ │ │ ├── training_args.bin │ │ │ │ └── vocab.txt │ │ │ ├── config.json │ │ │ ├── eval_results.txt │ │ │ ├── model_args.json │ │ │ ├── predictions.csv │ │ │ ├── predictions.jsonl │ │ │ ├── special_tokens_map.json │ │ │ ├── tblogs │ │ │ │ └── events.out.tfevents.1597351105.haptik-ai-research-mum-ml-2-vm │ │ │ ├── tokenizer_config.json │ │ │ ├── training_args.bin │ │ │ ├── training_progress_scores.csv │ │ │ └── vocab.txt │ │ ├── powerplay11 │ │ │ ├── best_model │ │ │ │ ├── config.json │ │ │ │ ├── eval_results.txt │ │ │ │ ├── model_args.json │ │ │ │ ├── special_tokens_map.json │ │ │ │ ├── tokenizer_config.json │ │ │ │ ├── training_args.bin │ │ │ │ └── vocab.txt │ │ │ ├── config.json │ │ │ ├── eval_results.txt │ │ │ ├── model_args.json │ │ │ ├── predictions.csv │ │ │ ├── predictions.jsonl │ │ │ ├── special_tokens_map.json │ │ │ ├── tblogs │ │ │ │ └── events.out.tfevents.1597160596.haptik-ai-research-mum-ml-2-vm │ │ │ ├── tokenizer_config.json │ │ │ ├── training_args.bin │ │ │ ├── training_progress_scores.csv │ │ │ └── vocab.txt │ │ ├── powerplay11_subset │ │ │ ├── best_model │ │ │ │ ├── config.json │ │ │ │ ├── eval_results.txt │ │ │ │ ├── model_args.json │ │ │ │ ├── special_tokens_map.json │ │ │ │ ├── tokenizer_config.json │ │ │ │ ├── training_args.bin │ │ │ │ └── vocab.txt │ │ │ ├── config.json │ │ │ ├── eval_results.txt │ │ │ ├── model_args.json │ │ │ ├── predictions.csv │ │ │ ├── predictions.jsonl │ │ │ ├── special_tokens_map.json │ │ │ ├── tblogs │ │ │ │ └── events.out.tfevents.1597163469.haptik-ai-research-mum-ml-2-vm │ │ │ ├── tokenizer_config.json │ │ │ ├── training_args.bin │ │ │ ├── training_progress_scores.csv │ │ │ └── vocab.txt │ │ ├── sofmattress │ │ │ ├── best_model │ │ │ │ ├── config.json │ │ │ │ ├── eval_results.txt │ │ │ │ ├── model_args.json │ │ │ │ ├── special_tokens_map.json │ │ │ │ ├── tokenizer_config.json │ │ │ │ ├── training_args.bin │ │ │ │ └── vocab.txt │ │ │ ├── config.json │ │ │ ├── eval_results.txt │ │ │ ├── model_args.json │ │ │ ├── predictions.csv │ │ │ ├── predictions.jsonl │ │ │ ├── special_tokens_map.json │ │ │ ├── tblogs │ │ │ │ └── events.out.tfevents.1597159859.haptik-ai-research-mum-ml-2-vm │ │ │ ├── tokenizer_config.json │ │ │ ├── training_args.bin │ │ │ ├── training_progress_scores.csv │ │ │ └── vocab.txt │ │ └── sofmattress_subset │ │ │ ├── best_model │ │ │ ├── config.json │ │ │ ├── eval_results.txt │ │ │ ├── model_args.json │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer_config.json │ │ │ ├── training_args.bin │ │ │ └── vocab.txt │ │ │ ├── config.json │ │ │ ├── eval_results.txt │ │ │ ├── model_args.json │ │ │ ├── predictions.csv │ │ │ ├── predictions.jsonl │ │ │ ├── special_tokens_map.json │ │ │ ├── tblogs │ │ │ └── events.out.tfevents.1597162813.haptik-ai-research-mum-ml-2-vm │ │ │ ├── tokenizer_config.json │ │ │ ├── training_args.bin │ │ │ ├── training_progress_scores.csv │ │ │ └── vocab.txt │ ├── down-requirements.txt │ ├── run_bert_experiments.sh │ └── up-requirements.txt ├── dialogflow │ ├── agent_template │ │ ├── agent.json │ │ └── package.json │ ├── generate_preds.ipynb │ └── training_data_conversion.ipynb ├── haptik │ └── convert_data.py ├── luis │ ├── generate_preds.ipynb │ └── training_data_conversion.ipynb └── rasa │ ├── __init__.py │ ├── actions.py │ ├── config.yml │ ├── credentials.yml │ ├── data │ └── curekart.json │ ├── domain.yml │ ├── endpoints.yml │ ├── generate_preds.ipynb │ ├── tests │ └── conversation_tests.md │ └── training_data_conversion.ipynb ├── preds ├── bert_curekart.csv ├── bert_curekart_subset.csv ├── bert_powerplay11.csv ├── bert_powerplay11_subset.csv ├── bert_sofmattress.csv ├── bert_sofmattress_subset.csv ├── dialogflow_curekart.csv ├── dialogflow_curekart_subset.csv ├── dialogflow_powerplay11.csv ├── dialogflow_powerplay11_subset.csv ├── dialogflow_sofmattress.csv ├── dialogflow_sofmattress_subset.csv ├── haptik_curekart.csv ├── haptik_curekart_subset.csv ├── haptik_powerplay11.csv ├── haptik_powerplay11_subset.csv ├── haptik_sofmattress.csv ├── haptik_sofmattress_subset.csv ├── luis_curekart.csv ├── luis_curekart_subset.csv ├── luis_powerplay11.csv ├── luis_powerplay11_subset.csv ├── luis_sofmattress.csv ├── luis_sofmattress_subset.csv ├── rasa_curekart.csv ├── rasa_curekart_subset.csv ├── rasa_powerplay11.csv ├── rasa_powerplay11_subset.csv ├── rasa_sofmattress.csv └── rasa_sofmattress_subset.csv ├── prepare_subset_of_data.ipynb ├── results ├── bert_curekart.csv ├── bert_curekart_subset.csv ├── bert_powerplay11.csv ├── bert_powerplay11_subset.csv ├── bert_sofmattress.csv ├── bert_sofmattress_subset.csv ├── dialogflow_curekart.csv ├── dialogflow_curekart_subset.csv ├── dialogflow_powerplay11.csv ├── dialogflow_powerplay11_subset.csv ├── dialogflow_sofmattress.csv ├── dialogflow_sofmattress_subset.csv ├── haptik_curekart.csv ├── haptik_curekart_subset.csv ├── haptik_powerplay11.csv ├── haptik_powerplay11_subset.csv ├── haptik_sofmattress.csv ├── haptik_sofmattress_subset.csv ├── luis_curekart.csv ├── luis_curekart_subset.csv ├── luis_powerplay11.csv ├── luis_powerplay11_subset.csv ├── luis_sofmattress.csv ├── luis_sofmattress_subset.csv ├── rasa_curekart.csv ├── rasa_curekart_subset.csv ├── rasa_powerplay11.csv ├── rasa_powerplay11_subset.csv ├── rasa_sofmattress.csv └── rasa_sofmattress_subset.csv └── run_evaluation.ipynb /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/.gitattributes -------------------------------------------------------------------------------- /.github/mergeable.yml: -------------------------------------------------------------------------------- 1 | mergeable: 2 | pull_requests: 3 | stale: 4 | days: 14 5 | message: 'This PR is stale. Please follow up!' 6 | 7 | label: 8 | must_include: 9 | regex: '(new-feature)|(documentation)|(bug-fixes)|(enhancement)|(needs-migration)|(packages-updated)|(miscellaneous)|(superman)' 10 | message: 'Can you please add a valid label! [One of (new-feature) / (documentation) / (bug-fixes) / (enhancement) / (needs-migration) / (packages-updated) / (miscellaneous)]' 11 | must_exclude: 12 | regex: '(do-not-merge)' 13 | message: 'This PR is work in progress. Cannot be merged yet.' 14 | 15 | description: 16 | no_empty: 17 | enabled: true 18 | message: 'Can you please add a description!' 19 | must_exclude: 20 | regex: 'do not merge' 21 | message: 'This PR is work in progress. Cannot be merged yet.' 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .ipynb_checkpoints 132 | .idea 133 | 134 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | LICENSE 2 | ============== 3 | HINT3 dataset is made available under the Open Database License: http://opendatacommons.org/licenses/odbl/1.0/. 4 | Any rights in individual contents of the database are licensed under the Database Contents License: http://opendatacommons.org/licenses/dbcl/1.0/ 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## HINT3: Raising the bar for Intent Detection in the Wild 2 | 3 | This repository contains datasets and code for the paper 4 | "HINT3: Raising the bar for Intent Detection in the Wild" 5 | accepted at EMNLP-2020's 6 | [Insights workshop](https://insights-workshop.github.io/) 7 | 8 | Published paper is available [here](https://www.aclweb.org/anthology/2020.insights-1.16/) 9 | 10 | **Update Feb 2021: We noticed in our analysis of the results that 11 | there are few ground truth labels which are incorrect. Hence, we're releasing 12 | a new version, v2 of the dataset, present inside dataset/v2 folder. All the 13 | results in the paper were obtained on the earlier version of the dataset 14 | present inside dataset/v1, which should be used to exactly reproduce 15 | the results presented in the paper.** 16 | 17 | 18 | ### Dataset 19 | 20 | - Train and Test sets for SOFMattress, Curekart and Powerplay11 21 | are available in `dataset` folder for both Full and Subset variations. 22 | - You can also use `prepare_subset_of_data.ipynb` notebook to generate 23 | subset variations of full datasets. All the entailment assets 24 | generated can be downloaded from [here](https://drive.google.com/drive/folders/1Un97REmtSbxmcNlDgg5qX0awxFoGz0n4?usp=sharing). 25 | 26 | 27 | ### EDA 28 | 29 | We have done EDA analysis on the datasets which is accessible 30 | from the `data_exploration` folder. 31 | 32 | 33 | ### Test set predictions 34 | 35 | Predictions from BERT and 4 NLU platforms on test sets used for 36 | analysis in the paper are present in `preds` folder. Feel free to 37 | do further analysis on these predictions if you want. 38 | 39 | ### Test set metrics 40 | 41 | All the metrics from BERT and 4 NLU platforms on test sets 42 | are present in `results` folder for further analysis. Graphs plotted in 43 | the paper can be reproduced using `analysis/plot_metrics_graph.ipynb` 44 | notebook 45 | 46 | 47 | ### Reproducibility Instructions 48 | 49 | The scripts to generate training data and predicting intents 50 | based on the testing data for all the 4 platforms and BERT 51 | based classifier are inside `platforms` folder within 52 | their named directories. 53 | 54 | 55 | #### Rasa 56 | 57 | - The `training_data_conversion.ipynb` notebook is used to 58 | convert the training set into a JSON format that Rasa 59 | mandates in order to train its model. The generated JSON 60 | file is created inside the `data` directory 61 | 62 | - In order to train a model for one particular bot, keep only 63 | that bot's JSON file inside the `data` directory 64 | 65 | - Train the model using this command: `rasa train nlu` 66 | 67 | - Once the model is trained, its tar.gz file will be stored 68 | inside the `models` directory based on the current timestamp 69 | 70 | - In order to start the NLU server, run the following command: 71 | `rasa run --enable-api -m models/nlu-.tar.gz` where 72 | `nlu-.tar.gz` is the name of the model's file 73 | created in the previous step 74 | 75 | - In order to generate a report against a testing set file, 76 | run the `generate_preds.ipynb` notebook after specifying the 77 | name of the bot. Generated predictions will be stored inside 78 | `preds` folder 79 | 80 | 81 | 82 | #### Dialogflow 83 | - The `training_data_conversion.ipynb` file is used to convert 84 | the training set into a bunch of JSON files that Dialogflow 85 | mandates in order to train its model. The generated JSON files 86 | are stored inside the `intents` directory 87 | 88 | - Login to the Diaologflow dashboard using a Gmail account 89 | and visit `https://dialogflow.cloud.google.com` 90 | 91 | - Dialogflow allows bulk upload of the training set by 92 | importing a zip file. The compressed folder has a predefined 93 | structure. In order to create this folder, create a copy of 94 | the `agent_template` directory and rename the folder as 95 | per your bot name. Then, copy all the JSON files created 96 | in step 1 and paste it inside the `intents` folder of your 97 | agent directory. Then, open the `agent.json` file and edit 98 | the `displayName` property to specify the name of the 99 | agent of your bot. An agent is analogous to an app or 100 | a bot. Once these changes are done, compress the agent 101 | directory into a zip file 102 | 103 | - Create a new agent on the Dialogflow dashboard 104 | here: `https://dialogflow.cloud.google.com/?authuser=1#/newAgent` 105 | 106 | - Delete `Default Fallback Intent` from the intents dashboard 107 | 108 | - Edit the agent: `https://dialogflow.cloud.google.com/?authuser=1#/editAgent/mt11-agent-ugmx/` -> Export & Import -> Import from zip -> upload the agent zip file. This will allow us to bulk upload all intents along with their respective utterances 109 | 110 | - Go to Edit agent -> ML settings. The default threshold value 111 | is 0.3. Change it to 0.05 and Train the model 112 | 113 | - Copy the CURL request from the API playground. We can get 114 | the authentication token and the model's API endpoint from 115 | this CURL request 116 | 117 | - The `generate_preds.ipynb` file will help generate predictions 118 | for the bot. 119 | 120 | 121 | #### LUIS 122 | - The `training_data_conversion.ipynb` file will generate 123 | a JSON file based on the training set's CSV file 124 | 125 | - Login to `luis.ai`, go to `https://www.luis.ai/applications` 126 | and click on `New app for conversation` -> `Import as JSON`. 127 | Upload the JSON file generated in the first step 128 | 129 | - Once all the intents are uploaded, click on the `Train` button 130 | to train the model. Once the model is trained, click on 131 | `Publish` followed by selecting `Production slot` 132 | 133 | - Now, go to the `Manage` section of the app and copy the 134 | App ID. We will be using this App ID in the `generate_preds.ipynb` file to generate our prediction reports 135 | 136 | - Go to the settings page of your account in order to get 137 | the `PREDICTION_KEY` and `PREDICTION_ENDPOINT` used in 138 | `generate_preds.ipynb` file 139 | 140 | #### Haptik 141 | 142 | - Access requests for signup on Haptik are processed via contact 143 | form at https://haptik.ai/contact-us/ 144 | 145 | - Once you get the access, you'll be able to create bots 146 | and run predictions using the scripts provided in 147 | `platforms/haptik` 148 | 149 | 150 | #### BERT 151 | 152 | - Results on BERT can be reproduced using scripts in the 153 | folder `platforms/bert` 154 | 155 | - The folder also contains config for each of the models 156 | trained on Full and Subset variations of datasets 157 | 158 | 159 | ### Citation 160 | 161 | If you use this in your research, please consider citing: 162 | 163 | ```latex 164 | @inproceedings{arora-etal-2020-hint3, 165 | title = "{HINT}3: Raising the bar for Intent Detection in the Wild", 166 | author = "Arora, Gaurav and 167 | Jain, Chirag and 168 | Chaturvedi, Manas and 169 | Modi, Krupal", 170 | booktitle = "Proceedings of the First Workshop on Insights from Negative Results in NLP", 171 | month = nov, 172 | year = "2020", 173 | address = "Online", 174 | publisher = "Association for Computational Linguistics", 175 | url = "https://www.aclweb.org/anthology/2020.insights-1.16", 176 | doi = "10.18653/v1/2020.insights-1.16", 177 | pages = "100--105", 178 | abstract = "Intent Detection systems in the real world are exposed to complexities of imbalanced datasets containing varying perception of intent, unintended correlations and domain-specific aberrations. To facilitate benchmarking which can reflect near real-world scenarios, we introduce 3 new datasets created from live chatbots in diverse domains. Unlike most existing datasets that are crowdsourced, our datasets contain real user queries received by the chatbots and facilitates penalising unwanted correlations grasped during the training process. We evaluate 4 NLU platforms and a BERT based classifier and find that performance saturates at inadequate levels on test sets because all systems latch on to unintended patterns in training data.", 179 | } 180 | ``` 181 | 182 | 183 | -------------------------------------------------------------------------------- /data_exploration/stats.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import nltk\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "import spacy\n", 13 | "import pickle\n", 14 | "from pathlib import Path\n", 15 | "from typing import List\n", 16 | "from fastai.text import SpacyTokenizer" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "OOS_CLASS = 'NO_NODES_DETECTED'" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "train_dir = Path('../train/')\n", 35 | "test_dir = Path('../test/')" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 4, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "nlp = spacy.load('en_core_web_lg', disable=['ner', 'parser'])" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 5, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "60000\n" 57 | ] 58 | } 59 | ], 60 | "source": [ 61 | "with open(\"/home/ubuntu/gaurav/.fastai/models/wt103-fwd/itos_wt103.pkl\", \"rb\") as input_file:\n", 62 | " wiki_vocab = set(pickle.load(input_file))\n", 63 | "print(len(wiki_vocab))\n", 64 | "tokenizer = SpacyTokenizer('en')" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 6, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "def dist(a: List[str], b: List[str], n: int = 3) -> float:\n", 74 | " answer = 1.0\n", 75 | " for i in range(1, n + 1):\n", 76 | " a_i = set(nltk.ngrams(a, i))\n", 77 | " b_i = set(nltk.ngrams(b, i))\n", 78 | " intersection = len(a_i & b_i)\n", 79 | " union = len(a_i | b_i)\n", 80 | " if union:\n", 81 | " answer -= (intersection / union)\n", 82 | " return answer\n", 83 | "# return max(0, answer)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 7, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "def diversity(train_df: pd.DataFrame) -> float:\n", 93 | " div, labelsc = 0, 0\n", 94 | " for label, group_df in train_df.groupby('label'):\n", 95 | " acc = 0\n", 96 | " labelsc += 1\n", 97 | " for text_a in group_df['sentence_tokens']:\n", 98 | " for text_b in group_df['sentence_tokens']:\n", 99 | " d = dist(text_a, text_b)\n", 100 | " acc += d\n", 101 | " div += (acc / (len(group_df) * len(group_df)))\n", 102 | " return div / labelsc" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 8, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "def coverage(train_df: pd.DataFrame, test_df: pd.DataFrame) -> float:\n", 112 | " cov, labelsc = 0, 0\n", 113 | " for label, group_df in test_df.groupby('label'):\n", 114 | " if label == OOS_CLASS:\n", 115 | " continue\n", 116 | " labelsc += 1\n", 117 | " train_group = train_df[train_df['label'] == label]\n", 118 | " acc = 0\n", 119 | " for text_b in group_df['sentence_tokens']:\n", 120 | " acc += max(1.0 - dist(text_a, text_b) for text_a in train_group['sentence_tokens'])\n", 121 | " cov += (acc / len(group_df))\n", 122 | " return cov / labelsc" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 9, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "def read_file(path: Path) -> pd.DataFrame:\n", 132 | " print(f'Reading file {path}')\n", 133 | " df = pd.read_csv(str(path))\n", 134 | " df['sentence_tokens'] = df['sentence'].apply(lambda sent: [tok.text for tok in nlp(sent.lower().strip())])\n", 135 | " return df" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 10, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "def df_stats(df):\n", 145 | " all_toks = set()\n", 146 | " oov_toks = set()\n", 147 | " tok_lens = []\n", 148 | " for sentence in df['sentence']:\n", 149 | " doc = tokenizer.tokenizer(sentence.lower().strip())\n", 150 | " tok_lens.append(len(doc))\n", 151 | " for tok in doc:\n", 152 | " all_toks.add(tok)\n", 153 | " if tok not in wiki_vocab:\n", 154 | " oov_toks.add(tok)\n", 155 | " return {\n", 156 | " 'len': len(df),\n", 157 | " 'in-scope': len(df[df['label'] != OOS_CLASS]),\n", 158 | " 'oos': len(df[df['label'] == OOS_CLASS]),\n", 159 | " 'labels': len(df[df['label'] != OOS_CLASS]['label'].unique()),\n", 160 | " 'tok_min': min(tok_lens),\n", 161 | " 'tok_max': max(tok_lens),\n", 162 | " 'tok_mean': np.mean(tok_lens),\n", 163 | " 'tok_std': np.std(tok_lens),\n", 164 | " 'oov_percentage': len(oov_toks) / len(all_toks),\n", 165 | " }" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 11, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "Reading file ../train/sofmattress_train.csv\n", 178 | "Reading file ../test/sofmattress_test.csv\n", 179 | "train stats: {'len': 328, 'in-scope': 328, 'oos': 0, 'labels': 21, 'tok_min': 1, 'tok_max': 28, 'tok_mean': 4.414634146341464, 'tok_std': 2.542090648688811, 'oov_percentage': 0.05084745762711865}\n", 180 | "test stats: {'len': 397, 'in-scope': 231, 'oos': 166, 'labels': 20, 'tok_min': 1, 'tok_max': 53, 'tok_mean': 6.607052896725441, 'tok_std': 5.770746222932882, 'oov_percentage': 0.20937042459736457}\n", 181 | "Diversity: 0.6168521547770577\n", 182 | "Coverage: 0.4411860589631133\n", 183 | "Reading file ../train/sofmattress_subset_train.csv\n", 184 | "Reading file ../test/sofmattress_test.csv\n", 185 | "train stats: {'len': 180, 'in-scope': 180, 'oos': 0, 'labels': 21, 'tok_min': 1, 'tok_max': 28, 'tok_mean': 5.338888888888889, 'tok_std': 2.828749014609518, 'oov_percentage': 0.049429657794676805}\n", 186 | "test stats: {'len': 397, 'in-scope': 231, 'oos': 166, 'labels': 20, 'tok_min': 1, 'tok_max': 53, 'tok_mean': 6.607052896725441, 'tok_std': 5.770746222932882, 'oov_percentage': 0.20937042459736457}\n", 187 | "Diversity: 0.44642491013275915\n", 188 | "Coverage: 0.3599558172984878\n", 189 | "Reading file ../train/powerplay11_train.csv\n", 190 | "Reading file ../test/powerplay11_test.csv\n", 191 | "train stats: {'len': 471, 'in-scope': 471, 'oos': 0, 'labels': 59, 'tok_min': 1, 'tok_max': 31, 'tok_mean': 5.021231422505308, 'tok_std': 3.5458141003642187, 'oov_percentage': 0.05102040816326531}\n", 192 | "test stats: {'len': 983, 'in-scope': 275, 'oos': 708, 'labels': 58, 'tok_min': 1, 'tok_max': 73, 'tok_mean': 7.2868769074262465, 'tok_std': 6.8174198446968575, 'oov_percentage': 0.3639822447685479}\n", 193 | "Diversity: -0.014979018179958047\n", 194 | "Coverage: 0.507159939637793\n", 195 | "Reading file ../train/powerplay11_subset_train.csv\n", 196 | "Reading file ../test/powerplay11_test.csv\n", 197 | "train stats: {'len': 261, 'in-scope': 261, 'oos': 0, 'labels': 59, 'tok_min': 1, 'tok_max': 31, 'tok_mean': 6.0344827586206895, 'tok_std': 4.166409028895613, 'oov_percentage': 0.05070422535211268}\n", 198 | "test stats: {'len': 983, 'in-scope': 275, 'oos': 708, 'labels': 58, 'tok_min': 1, 'tok_max': 73, 'tok_mean': 7.2868769074262465, 'tok_std': 6.8174198446968575, 'oov_percentage': 0.3639822447685479}\n", 199 | "Diversity: -0.3480264198264641\n", 200 | "Coverage: 0.4201842473694509\n", 201 | "Reading file ../train/curekart_train.csv\n", 202 | "Reading file ../test/curekart_test.csv\n", 203 | "train stats: {'len': 600, 'in-scope': 600, 'oos': 0, 'labels': 28, 'tok_min': 1, 'tok_max': 27, 'tok_mean': 6.29, 'tok_std': 4.032273965560045, 'oov_percentage': 0.10204081632653061}\n", 204 | "test stats: {'len': 991, 'in-scope': 452, 'oos': 539, 'labels': 21, 'tok_min': 1, 'tok_max': 44, 'tok_mean': 6.424823410696266, 'tok_std': 5.02407416696985, 'oov_percentage': 0.30474934036939316}\n", 205 | "Diversity: 0.5431161766997563\n", 206 | "Coverage: 0.7164532041212871\n", 207 | "Reading file ../train/curekart_subset_train.csv\n", 208 | "Reading file ../test/curekart_test.csv\n", 209 | "train stats: {'len': 413, 'in-scope': 413, 'oos': 0, 'labels': 28, 'tok_min': 1, 'tok_max': 27, 'tok_mean': 7.196125907990314, 'tok_std': 4.236676538359367, 'oov_percentage': 0.09433962264150944}\n", 210 | "test stats: {'len': 991, 'in-scope': 452, 'oos': 539, 'labels': 21, 'tok_min': 1, 'tok_max': 44, 'tok_mean': 6.424823410696266, 'tok_std': 5.02407416696985, 'oov_percentage': 0.30474934036939316}\n", 211 | "Diversity: 0.4406516266397046\n", 212 | "Coverage: 0.5727839744833852\n" 213 | ] 214 | } 215 | ], 216 | "source": [ 217 | "datasets = ['sofmattress', 'powerplay11', 'curekart']\n", 218 | "for dataset in datasets:\n", 219 | " for suf in ['', '_subset']:\n", 220 | " train_df = read_file(train_dir / f'{dataset}{suf}_train.csv')\n", 221 | " test_df = read_file(test_dir / f'{dataset}_test.csv')\n", 222 | " print('train stats:', df_stats(train_df))\n", 223 | " print('test stats:', df_stats(test_df))\n", 224 | " print('Diversity:', diversity(train_df))\n", 225 | " print('Coverage:', coverage(train_df, test_df))" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [] 234 | } 235 | ], 236 | "metadata": { 237 | "kernelspec": { 238 | "display_name": "Python (py36)", 239 | "language": "python", 240 | "name": "py36" 241 | }, 242 | "language_info": { 243 | "codemirror_mode": { 244 | "name": "ipython", 245 | "version": 3 246 | }, 247 | "file_extension": ".py", 248 | "mimetype": "text/x-python", 249 | "name": "python", 250 | "nbconvert_exporter": "python", 251 | "pygments_lexer": "ipython3", 252 | "version": "3.6.8" 253 | } 254 | }, 255 | "nbformat": 4, 256 | "nbformat_minor": 4 257 | } 258 | -------------------------------------------------------------------------------- /dataset/v1/train/sofmattress_subset_train.csv: -------------------------------------------------------------------------------- 1 | sentence,label 2 | How do I know the size of my bed?,WHAT_SIZE_TO_ORDER 3 | How do I know what size to order?,WHAT_SIZE_TO_ORDER 4 | Can I please have the size chart?,WHAT_SIZE_TO_ORDER 5 | Want to know the custom size chart,WHAT_SIZE_TO_ORDER 6 | Can you help with the size?,WHAT_SIZE_TO_ORDER 7 | Help me with the size chart,WHAT_SIZE_TO_ORDER 8 | What are the available mattress sizes,WHAT_SIZE_TO_ORDER 9 | What are the available sizes?,WHAT_SIZE_TO_ORDER 10 | What are the sizes available?,WHAT_SIZE_TO_ORDER 11 | Show me all available sizes,WHAT_SIZE_TO_ORDER 12 | Share the size structure,WHAT_SIZE_TO_ORDER 13 | What size to order?,WHAT_SIZE_TO_ORDER 14 | What are the sizes,WHAT_SIZE_TO_ORDER 15 | Mattress size,WHAT_SIZE_TO_ORDER 16 | King Size,WHAT_SIZE_TO_ORDER 17 | Inches,WHAT_SIZE_TO_ORDER 18 | Length,WHAT_SIZE_TO_ORDER 19 | 6*3,WHAT_SIZE_TO_ORDER 20 | How much does a SOF mattress cost,MATTRESS_COST 21 | Want to know the price,MATTRESS_COST 22 | What will be the price,MATTRESS_COST 23 | What is the cost,MATTRESS_COST 24 | Price of mattress,MATTRESS_COST 25 | I need price,MATTRESS_COST 26 | Cost of Bed,MATTRESS_COST 27 | Price Range,MATTRESS_COST 28 | Low price,MATTRESS_COST 29 | MRP,MATTRESS_COST 30 | Not happy with the product please help me to return,RETURN_EXCHANGE 31 | I want to return my mattress,RETURN_EXCHANGE 32 | How can I replace the mattress.,RETURN_EXCHANGE 33 | Help me with exchange process,RETURN_EXCHANGE 34 | How do I return it,RETURN_EXCHANGE 35 | Need my money back,RETURN_EXCHANGE 36 | I want refund,RETURN_EXCHANGE 37 | Replacement policy,RETURN_EXCHANGE 38 | What is the price for size (x ft x y ft)? What is the price for size (x inches x y inches)?,DISTRIBUTORS 39 | Where can I see the product before I buy,DISTRIBUTORS 40 | Do you have any showrooms in Delhi state,DISTRIBUTORS 41 | Do you have any distributors in Mumbai city,DISTRIBUTORS 42 | Do you have any retailers in Pune city,DISTRIBUTORS 43 | Can I visit SOF mattress showroom,DISTRIBUTORS 44 | Any shop that I can visit,DISTRIBUTORS 45 | You have any branch,DISTRIBUTORS 46 | We want dealer ship,DISTRIBUTORS 47 | Nearby Show room,DISTRIBUTORS 48 | Shop near by,DISTRIBUTORS 49 | Demo store,DISTRIBUTORS 50 | Need dealership,DISTRIBUTORS 51 | Distributors/Retailers/Showrooms,DISTRIBUTORS 52 | What is the difference between the Ergo & Ortho variants,COMPARISON 53 | Is the mattress good for my back,COMPARISON 54 | I wanna know the difference,COMPARISON 55 | Difference between the products,COMPARISON 56 | Compare the 2 mattresses,COMPARISON 57 | Which mattress to buy?,COMPARISON 58 | It's been 30 days my product haven't received,DELAY_IN_DELIVERY 59 | I did not receive my order yet,DELAY_IN_DELIVERY 60 | It's too late to get delivered,DELAY_IN_DELIVERY 61 | It's been so many days,DELAY_IN_DELIVERY 62 | Why so many days,DELAY_IN_DELIVERY 63 | Almost 1 month over,DELAY_IN_DELIVERY 64 | Why so long?,DELAY_IN_DELIVERY 65 | Delivery is delayed,DELAY_IN_DELIVERY 66 | It's delayed,DELAY_IN_DELIVERY 67 | Do you offer Zero Percent EMI payment options?,EMI 68 | I want to buy this in installments,EMI 69 | I want it on 0% interest,EMI 70 | I want to buy on EMI,EMI 71 | Do you accept Paisa EMI card,EMI 72 | Can we buy through Paisa finance,EMI 73 | You guys provide EMI option?,EMI 74 | How to get in EMI,EMI 75 | What is minimum down payment,EMI 76 | No cost EMI is available?,EMI 77 | I want in installment,EMI 78 | what about emi options,EMI 79 | Paisa finance service available,EMI 80 | 0% EMI.,EMI 81 | Down payments,EMI 82 | Installments,EMI 83 | Can I buy pillows from here,PILLOWS 84 | I want to buy pillows,PILLOWS 85 | Can I also have pillows,PILLOWS 86 | Can I get pillows?,PILLOWS 87 | May I please know about the offers,OFFERS 88 | Tell me about the latest offers,OFFERS 89 | What are the available offers,OFFERS 90 | Want to know the discount ,OFFERS 91 | Give me some discount,OFFERS 92 | Discount,OFFERS 93 | Can you make delivery on this pin code?,CHECK_PINCODE 94 | Will you be able to deliver here,CHECK_PINCODE 95 | Can you please deliver on my pincode,CHECK_PINCODE 96 | Check pincode,CHECK_PINCODE 97 | How long is the warranty you offer on your mattresses and what does it cover,WARRANTY 98 | Does mattress cover is included in warranty,WARRANTY 99 | Tell me about the product warranty,WARRANTY 100 | What is the warranty period?,WARRANTY 101 | Want to know about warranty,WARRANTY 102 | would interested in warranty details,WARRANTY 103 | Do you offer warranty ,WARRANTY 104 | Is the 100 night return trial applicable for custom size as well,100_NIGHT_TRIAL_OFFER 105 | How does the 100 night trial work,100_NIGHT_TRIAL_OFFER 106 | Can you give me 100 night trial ,100_NIGHT_TRIAL_OFFER 107 | Can I try a mattress first,100_NIGHT_TRIAL_OFFER 108 | What is 100 Night trial offer,100_NIGHT_TRIAL_OFFER 109 | What is the 100-night offer,100_NIGHT_TRIAL_OFFER 110 | How to enroll for trial,100_NIGHT_TRIAL_OFFER 111 | I want to check offers,100_NIGHT_TRIAL_OFFER 112 | Can I get free trial,100_NIGHT_TRIAL_OFFER 113 | Trial offer on customisation,100_NIGHT_TRIAL_OFFER 114 | do you provide exchange,100_NIGHT_TRIAL_OFFER 115 | Need 100 days trial,100_NIGHT_TRIAL_OFFER 116 | 100 Nights trial version,100_NIGHT_TRIAL_OFFER 117 | 100 night trial,100_NIGHT_TRIAL_OFFER 118 | 100 days trial,100_NIGHT_TRIAL_OFFER 119 | 100 free Nights,100_NIGHT_TRIAL_OFFER 120 | 100 night,100_NIGHT_TRIAL_OFFER 121 | What are the key features of the SOF Ortho mattress,ORTHO_FEATURES 122 | I m looking mattress for slip disc problem,ORTHO_FEATURES 123 | I am cervical and Lombard section problem,ORTHO_FEATURES 124 | Tell me about SOF Ortho mattress,ORTHO_FEATURES 125 | Do we have anything for backache,ORTHO_FEATURES 126 | I have back pain issue,ORTHO_FEATURES 127 | Is there orthopedic mattress available,ORTHO_FEATURES 128 | Have a back problem,ORTHO_FEATURES 129 | ortho,ORTHO_FEATURES 130 | How is SOF different from other mattress brands,ABOUT_SOF_MATTRESS 131 | How is SOF mattress different from,ABOUT_SOF_MATTRESS 132 | Tell me about SOF mattresses,ABOUT_SOF_MATTRESS 133 | Who are SOF mattress,ABOUT_SOF_MATTRESS 134 | Tell me about company,ABOUT_SOF_MATTRESS 135 | What is SOF,ABOUT_SOF_MATTRESS 136 | What are the key features of the SOF Ergo mattress,ERGO_FEATURES 137 | Tell me about SOF Ergo mattress,ERGO_FEATURES 138 | Does this have ergonomic support?,ERGO_FEATURES 139 | Features of Ergo mattress,ERGO_FEATURES 140 | What about ergo,ERGO_FEATURES 141 | Want to talk to an live agent,LEAD_GEN 142 | Need a call from your representative,LEAD_GEN 143 | Want to get in touch,LEAD_GEN 144 | I want to buy this,LEAD_GEN 145 | I want to order,LEAD_GEN 146 | Connect to an agent,LEAD_GEN 147 | Get in Touch,LEAD_GEN 148 | Schedule a callback ,LEAD_GEN 149 | Interested in buying,LEAD_GEN 150 | I want to cancel my order,CANCEL_ORDER 151 | How can I cancel my order,CANCEL_ORDER 152 | Cancellation status,CANCEL_ORDER 153 | Do you offer COD to my pincode?,COD 154 | Is it possible to COD,COD 155 | Cash on delivery is acceptable?,COD 156 | Can pay later on delivery ,COD 157 | I want to change the size of the mattress.,SIZE_CUSTOMIZATION 158 | Need some help in changing size of the mattress,SIZE_CUSTOMIZATION 159 | Will I get an option to Customise the size,SIZE_CUSTOMIZATION 160 | How can I order a custom sized mattress,SIZE_CUSTOMIZATION 161 | Can mattress size be customised?,SIZE_CUSTOMIZATION 162 | Customisation is possible?,SIZE_CUSTOMIZATION 163 | Custom size,SIZE_CUSTOMIZATION 164 | What are the SOF mattress products,PRODUCT_VARIANTS 165 | I want to buy a mattress,PRODUCT_VARIANTS 166 | Tell me about SOF mattress features,PRODUCT_VARIANTS 167 | What are the product variants,PRODUCT_VARIANTS 168 | Help me with different products,PRODUCT_VARIANTS 169 | I want to check products,PRODUCT_VARIANTS 170 | I am looking the mattress,PRODUCT_VARIANTS 171 | Which product is best,PRODUCT_VARIANTS 172 | Which mattress is best,PRODUCT_VARIANTS 173 | Type of foam used,PRODUCT_VARIANTS 174 | Show me products,PRODUCT_VARIANTS 175 | Show more mattress,PRODUCT_VARIANTS 176 | When will the order be delivered to me?,ORDER_STATUS 177 | I want updates of my order,ORDER_STATUS 178 | What is my order status?,ORDER_STATUS 179 | Where is my product,ORDER_STATUS 180 | When can we expect,ORDER_STATUS 181 | Order related,ORDER_STATUS 182 | -------------------------------------------------------------------------------- /env-requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | _libgcc_mutex=0.1=main 5 | absl-py=0.9.0=pypi_0 6 | adal=1.2.2=pypi_0 7 | aiofiles=0.5.0=pypi_0 8 | aiohttp=3.6.2=pypi_0 9 | alibi=0.3.2=pypi_0 10 | allennlp=1.0.0=pypi_0 11 | allennlp-models=1.0.0=pypi_0 12 | ansiwrap=0.8.4=pypi_0 13 | apex=0.1=pypi_0 14 | applicationinsights=0.11.9=pypi_0 15 | apscheduler=3.6.3=pypi_0 16 | asn1crypto=0.24.0=pypi_0 17 | astor=0.8.0=pypi_0 18 | astunparse=1.6.3=pypi_0 19 | async-generator=1.10=pypi_0 20 | async-timeout=3.0.1=pypi_0 21 | attrs=19.3.0=py_0 22 | awscli=1.16.255=pypi_0 23 | azure-common=1.1.23=pypi_0 24 | azure-graphrbac=0.61.1=pypi_0 25 | azure-mgmt-authorization=0.60.0=pypi_0 26 | azure-mgmt-containerregistry=2.8.0=pypi_0 27 | azure-mgmt-keyvault=2.0.0=pypi_0 28 | azure-mgmt-resource=3.0.0=pypi_0 29 | azure-mgmt-storage=4.0.0=pypi_0 30 | azure-storage-blob=2.0.1=pypi_0 31 | azure-storage-common=2.0.0=pypi_0 32 | azureml-automl-core=1.0.45.1=pypi_0 33 | azureml-contrib-explain-model=1.0.45=pypi_0 34 | azureml-contrib-notebook=1.0.45=pypi_0 35 | azureml-contrib-opendatasets=1.0.45=pypi_0 36 | azureml-contrib-server=1.0.45=pypi_0 37 | azureml-contrib-services=1.0.45=pypi_0 38 | azureml-core=1.0.45=pypi_0 39 | azureml-dataprep=1.1.7=pypi_0 40 | azureml-dataprep-native=13.0.0=pypi_0 41 | azureml-explain-model=1.0.45=pypi_0 42 | azureml-pipeline=1.0.45=pypi_0 43 | azureml-pipeline-core=1.0.45=pypi_0 44 | azureml-pipeline-steps=1.0.45=pypi_0 45 | azureml-sdk=1.0.45=pypi_0 46 | azureml-telemetry=1.0.45=pypi_0 47 | azureml-train=1.0.45=pypi_0 48 | azureml-train-automl=1.0.45.1=pypi_0 49 | azureml-train-core=1.0.45=pypi_0 50 | azureml-train-restclients-hyperdrive=1.0.45=pypi_0 51 | azureml-widgets=1.0.45.1=pypi_0 52 | backcall=0.2.0=py_0 53 | backports-csv=1.0.7=pypi_0 54 | backports-tempfile=1.0=pypi_0 55 | backports-weakref=1.0.post1=pypi_0 56 | beautifulsoup4=4.9.1=py36_0 57 | blas=1.0=mkl 58 | bleach=3.1.5=py_0 59 | blessings=1.7=pypi_0 60 | boto=2.49.0=pypi_0 61 | boto3=1.14.39=pypi_0 62 | botocore=1.17.39=pypi_0 63 | bottleneck=1.3.2=py36heb32a55_1 64 | brotlipy=0.7.0=py36h7b6447c_1000 65 | bs4=0.0.1=pypi_0 66 | ca-certificates=2020.6.24=0 67 | cachetools=4.1.1=pypi_0 68 | captum=0.2.0=pypi_0 69 | certifi=2020.6.20=py36_0 70 | cffi=1.14.0=py36h2e261b9_0 71 | chainer=6.2.0=pypi_0 72 | chardet=3.0.4=py36_1003 73 | cheroot=8.2.1=pypi_0 74 | cherrypy=18.5.0=pypi_0 75 | click=7.0=pypi_0 76 | cloudpickle=1.2.1=pypi_0 77 | colorama=0.4.1=pypi_0 78 | colorclass=2.2.0=pypi_0 79 | coloredlogs=10.0=pypi_0 80 | colorhash=1.0.2=pypi_0 81 | conllu=3.0=pypi_0 82 | contextlib2=0.5.5=pypi_0 83 | contextvars=2.4=pypi_0 84 | cryptography=2.9.2=py36h1ba5d50_0 85 | cudatoolkit=10.1.243=h6bb024c_0 86 | cycler=0.10.0=py36_0 87 | cymem=2.0.2=py36he1b5a44_0 88 | cython=0.29.11=py36he6710b0_0 89 | cython-blis=0.2.4=py36h516909a_1 90 | dataclasses=0.7=pypi_0 91 | dbus=1.13.16=hb2f20db_0 92 | decorator=4.4.2=py_0 93 | defusedxml=0.6.0=py_0 94 | distro=1.4.0=pypi_0 95 | dnspython=1.16.0=pypi_0 96 | docker=4.0.2=pypi_0 97 | docopt=0.6.2=pypi_0 98 | docutils=0.14=pypi_0 99 | docx=0.2.4=pypi_0 100 | dotnetcore2=2.1.8=pypi_0 101 | en-core-web-lg=2.1.0=pypi_0 102 | en-core-web-md=2.1.0=pypi_0 103 | en-core-web-sm=2.1.0=pypi_0 104 | entrypoints=0.3=py36_0 105 | expat=2.2.9=he6710b0_2 106 | fastai=1.0.61=pypi_0 107 | fastprogress=0.2.2=py_0 108 | fbmessenger=6.0.0=pypi_0 109 | feedparser=5.2.1=pypi_0 110 | filelock=3.0.12=pypi_0 111 | flask=1.1.0=pypi_0 112 | fontconfig=2.13.0=h9420a91_0 113 | freetype=2.10.2=h5ab3b9f_0 114 | future=0.17.1=pypi_0 115 | gast=0.3.3=pypi_0 116 | gdown=3.8.3=pypi_0 117 | gensim=3.7.3=pypi_0 118 | gevent=1.5.0=pypi_0 119 | glib=2.63.1=h5a9c865_0 120 | google-auth=1.20.1=pypi_0 121 | google-auth-oauthlib=0.4.1=pypi_0 122 | google-pasta=0.2.0=pypi_0 123 | gpustat=1.0.0.dev0=pypi_0 124 | greenlet=0.4.16=pypi_0 125 | grpcio=1.22.0=pypi_0 126 | gst-plugins-base=1.14.0=hbbd80ab_1 127 | gstreamer=1.14.0=hb453b48_1 128 | h11=0.8.1=pypi_0 129 | h2=3.2.0=pypi_0 130 | h5py=2.10.0=pypi_0 131 | horovod=0.16.4=pypi_0 132 | hpack=3.0.0=pypi_0 133 | hstspreload=2020.8.11=pypi_0 134 | httplib2=0.18.1=pypi_0 135 | httptools=0.1.1=pypi_0 136 | httpx=0.9.3=pypi_0 137 | humanfriendly=8.2=pypi_0 138 | hyperframe=5.2.0=pypi_0 139 | icu=58.2=he6710b0_3 140 | idna=2.8=pypi_0 141 | idna-ssl=1.1.0=pypi_0 142 | imageio=2.5.0=pypi_0 143 | immutables=0.14=pypi_0 144 | importlib-metadata=1.1.0=pypi_0 145 | importlib-resources=1.0.2=pypi_0 146 | importlib_metadata=1.7.0=0 147 | inltk=0.8.1=pypi_0 148 | intel-openmp=2020.1=217 149 | ipdb=0.12.2=pypi_0 150 | ipykernel=5.1.1=py36h39e3cac_0 151 | ipython=7.16.1=py36h5ca1d4c_0 152 | ipython_genutils=0.2.0=py36_0 153 | ipywidgets=7.5.0=pypi_0 154 | isodate=0.6.0=pypi_0 155 | itsdangerous=1.1.0=pypi_0 156 | jamspell=0.0.11=pypi_0 157 | jaraco-classes=2.0=pypi_0 158 | jaraco-collections=2.1=pypi_0 159 | jaraco-functools=2.0=pypi_0 160 | jaraco-text=3.2.0=pypi_0 161 | jedi=0.17.1=py36_0 162 | jeepney=0.4=pypi_0 163 | jinja2=2.11.2=py_0 164 | jmespath=0.9.4=pypi_0 165 | joblib=0.13.2=py36_0 166 | jpeg=9b=h024ee3a_2 167 | jsonform=0.0.2=pypi_0 168 | jsonnet=0.16.0=pypi_0 169 | jsonpickle=1.4.1=pypi_0 170 | jsonschema=3.2.0=py36_0 171 | jsonsir=0.0.2=pypi_0 172 | jupyter=1.0.0=pypi_0 173 | jupyter-console=6.0.0=pypi_0 174 | jupyter_client=6.1.5=py_0 175 | jupyter_core=4.6.3=py36_0 176 | jupyterlab=1.2.2=pypi_0 177 | kafka-python=1.4.7=pypi_0 178 | keras=2.2.4=pypi_0 179 | keras-applications=1.0.8=pypi_0 180 | keras-preprocessing=1.1.0=pypi_0 181 | keras2onnx=1.5.0=pypi_0 182 | kiwisolver=1.2.0=py36hfd86e86_0 183 | lab=4.2=pypi_0 184 | lcms2=2.11=h396b838_0 185 | libedit=3.1.20191231=h14c3975_1 186 | libffi=3.2.1=hd88cf55_4 187 | libgcc-ng=9.1.0=hdf63c60_0 188 | libgfortran-ng=7.3.0=hdf63c60_0 189 | libpng=1.6.37=hbc83047_0 190 | libsodium=1.0.18=h7b6447c_0 191 | libstdcxx-ng=9.1.0=hdf63c60_0 192 | libtiff=4.1.0=h2733197_1 193 | libuuid=1.0.3=h1bed415_2 194 | libxcb=1.14=h7b6447c_0 195 | libxml2=2.9.10=he19cac6_1 196 | lightgbm=2.2.1=pypi_0 197 | lime=0.2.0.1=pypi_0 198 | lxml=4.4.2=pypi_0 199 | lz4-c=1.9.2=he6710b0_0 200 | markdown=3.1.1=pypi_0 201 | markupsafe=1.1.1=py36h7b6447c_0 202 | matplotlib=3.1.0=py36h5429711_0 203 | mattermostwrapper=2.2=pypi_0 204 | mistune=0.8.4=py36h7b6447c_0 205 | mkl=2020.1=217 206 | mkl-service=2.3.0=py36he904b0f_0 207 | mkl_fft=1.1.0=py36h23d657b_0 208 | mkl_random=1.1.1=py36h0573a6f_0 209 | more-itertools=8.0.0=pypi_0 210 | msgpack=0.5.6=pypi_0 211 | msrest=0.6.8=pypi_0 212 | msrestazure=0.6.1=pypi_0 213 | multidict=4.7.6=pypi_0 214 | multimethods=1.0.0=pypi_0 215 | murmurhash=1.0.2=py36he6710b0_0 216 | mysqlclient=1.4.6=pypi_0 217 | nb_conda=2.2.1=py36_0 218 | nb_conda_kernels=2.2.3=py36_0 219 | nbconvert=5.6.1=py36_0 220 | nbformat=5.0.7=py_0 221 | ncurses=6.2=he6710b0_1 222 | ndg-httpsclient=0.5.1=pypi_0 223 | networkx=2.4=pypi_0 224 | newrelic=5.2.3.131=pypi_0 225 | nimbusml=0.6.5=pypi_0 226 | ninja=1.9.0=py36hfd86e86_0 227 | nlpaug=0.0.9=pypi_0 228 | nltk=3.4.5=pypi_0 229 | nodejs=12.4.0=he1b5a44_0 230 | notebook=6.0.3=py36_0 231 | numexpr=2.7.1=py36h423224d_0 232 | numpy=1.16.2=pypi_0 233 | numpy-base=1.16.4=py36hde5b4d6_0 234 | nvidia-ml-py3=7.352.0=pypi_0 235 | oauth2client=4.1.3=pypi_0 236 | oauthlib=3.0.2=pypi_0 237 | olefile=0.46=py36_0 238 | onnx=1.5.0=pypi_0 239 | onnx-chainer=1.5.0=pypi_0 240 | onnxconverter-common=1.5.0=pypi_0 241 | onnxmltools=1.4.1=pypi_0 242 | onnxruntime=0.5.0=pypi_0 243 | openssl=1.1.1g=h7b6447c_0 244 | opt-einsum=3.1.0=pypi_0 245 | overrides=3.0.0=pypi_0 246 | packaging=20.4=py_0 247 | pandas=1.0.5=py36h0573a6f_0 248 | pandoc=2.9.2.1=0 249 | pandocfilters=1.4.2=py36_1 250 | papermill=1.0.1=pypi_0 251 | parso=0.7.0=py_0 252 | pathspec=0.5.9=pypi_0 253 | patsy=0.5.1=pypi_0 254 | pattern=3.6.1=pypi_0 255 | pcre=8.44=he6710b0_0 256 | pdfminer-six=20191110=pypi_0 257 | pdfminer3k=1.3.1=pypi_0 258 | pexpect=4.8.0=py36_0 259 | pickleshare=0.7.5=py36_0 260 | pika=1.1.0=pypi_0 261 | pillow=7.2.0=py36hb39fc2d_0 262 | pip=19.2.3=py36_0 263 | pip-autoremove=0.9.1=pypi_0 264 | plac=0.9.6=py36_1 265 | pluggy=0.13.1=pypi_0 266 | ply=3.11=pypi_0 267 | portend=2.6=pypi_0 268 | preshed=2.0.1=py36he6710b0_0 269 | prometheus_client=0.8.0=py_0 270 | prompt-toolkit=2.0.10=pypi_0 271 | protobuf=3.12.4=pypi_0 272 | psutil=5.6.3=pypi_0 273 | psycopg2-binary=2.8.5=pypi_0 274 | ptyprocess=0.6.0=py36_0 275 | py=1.8.0=pypi_0 276 | py-rouge=1.1=pypi_0 277 | py4j=0.10.7=pypi_0 278 | pyarrow=0.14.0=pypi_0 279 | pyasn1=0.4.8=pypi_0 280 | pyasn1-modules=0.2.8=pypi_0 281 | pycparser=2.20=py_2 282 | pycryptodome=3.9.4=pypi_0 283 | pydot=1.4.1=pypi_0 284 | pygments=2.6.1=py_0 285 | pyjwt=1.7.1=pypi_0 286 | pykwalify=1.7.0=pypi_0 287 | pymongo=3.8.0=pypi_0 288 | pyopenssl=19.0.0=pypi_0 289 | pyparsing=2.4.7=py_0 290 | pyqt=5.9.2=py36h05f1152_2 291 | pyrsistent=0.16.0=py36h7b6447c_0 292 | pysocks=1.7.1=py36_0 293 | pyspark=2.4.3=pypi_0 294 | pyspellchecker=0.5.1=pypi_0 295 | pytest=5.3.1=pypi_0 296 | python=3.6.8=h0371630_0 297 | python-crfsuite=0.9.7=pypi_0 298 | python-dateutil=2.8.1=py_0 299 | python-docx=0.8.10=pypi_0 300 | python-dotenv=0.10.3=pypi_0 301 | python-easyconfig=0.1.7=pypi_0 302 | python-engineio=3.12.1=pypi_0 303 | python-socketio=4.5.1=pypi_0 304 | python-telegram-bot=12.8=pypi_0 305 | pytorch=1.5.1=py3.6_cuda10.1.243_cudnn7.6.3_0 306 | pytorch-lightning=0.7.5=pypi_0 307 | pytorch-pretrained-bert=0.6.2=pypi_0 308 | pytorch-transformers=1.2.0=pypi_0 309 | pytz=2019.3=pypi_0 310 | pywavelets=1.0.3=pypi_0 311 | pyyaml=5.1.1=pypi_0 312 | pyzmq=19.0.1=py36he6710b0_1 313 | qt=5.9.7=h5867ecd_1 314 | qtconsole=4.6.0=pypi_0 315 | questionary=1.5.2=pypi_0 316 | rasa=1.10.10=pypi_0 317 | rasa-sdk=1.10.2=pypi_0 318 | readline=7.0=h7b6447c_5 319 | redis=3.5.3=pypi_0 320 | regex=2020.6.8=pypi_0 321 | requests=2.24.0=py_0 322 | requests-oauthlib=1.2.0=pypi_0 323 | requests-toolbelt=0.9.1=pypi_0 324 | resource=0.2.1=pypi_0 325 | rfc3986=1.4.0=pypi_0 326 | rocketchat-api=1.3.1=pypi_0 327 | rsa=3.4.2=pypi_0 328 | ruamel-yaml=0.16.10=pypi_0 329 | ruamel-yaml-clib=0.2.0=pypi_0 330 | s3transfer=0.3.3=pypi_0 331 | sacremoses=0.0.33=pypi_0 332 | sanic=19.12.2=pypi_0 333 | sanic-cors=0.10.0.post3=pypi_0 334 | sanic-jwt=1.4.1=pypi_0 335 | sanic-plugins-framework=0.9.3=pypi_0 336 | scikit-image=0.15.0=pypi_0 337 | scikit-learn=0.22.2.post1=pypi_0 338 | scipy=1.1.0=pypi_0 339 | secretstorage=3.1.1=pypi_0 340 | send2trash=1.5.0=py36_0 341 | sentencepiece=0.1.83=pypi_0 342 | seqeval=0.0.12=pypi_0 343 | setuptools=47.3.1=py36_0 344 | shap=0.29.1=pypi_0 345 | simplejson=3.17.0=pypi_0 346 | simpletransformers=0.43.6=pypi_0 347 | sip=4.19.8=py36hf484d3e_0 348 | six=1.15.0=py_0 349 | skl2onnx=1.4.9=pypi_0 350 | sklearn-crfsuite=0.3.6=pypi_0 351 | sklearn-pandas=1.7.0=pypi_0 352 | slackclient=2.8.0=pypi_0 353 | smart-open=1.8.4=pypi_0 354 | sniffio=1.1.0=pypi_0 355 | sortedcontainers=2.1.0=pypi_0 356 | soupsieve=2.0.1=py_0 357 | spacy=2.1.8=py36hc9558a2_0 358 | sqlalchemy=1.3.18=pypi_0 359 | sqlite=3.32.3=h62c20be_0 360 | srsly=0.1.0=py36he1b5a44_0 361 | statsmodels=0.10.0=pypi_0 362 | tabulate=0.8.7=pypi_0 363 | tempora=1.14.1=pypi_0 364 | tenacity=5.0.4=pypi_0 365 | tensorboard=2.1.1=pypi_0 366 | tensorboard-plugin-wit=1.7.0=pypi_0 367 | tensorboardx=2.1=pypi_0 368 | tensorflow=2.1.1=pypi_0 369 | tensorflow-addons=0.7.1=pypi_0 370 | tensorflow-estimator=2.1.0=pypi_0 371 | tensorflow-gpu=1.14.0=pypi_0 372 | tensorflow-hub=0.8.0=pypi_0 373 | tensorflow-probability=0.9.0=pypi_0 374 | tensorflow-text=2.1.0rc0=pypi_0 375 | termcolor=1.1.0=pypi_0 376 | terminado=0.8.3=py36_0 377 | terminaltables=3.1.0=pypi_0 378 | testpath=0.4.4=py_0 379 | textwrap3=0.9.2=pypi_0 380 | tf2onnx=1.5.3=pypi_0 381 | thinc=7.0.8=py36hc9558a2_0 382 | tk=8.6.10=hbc83047_0 383 | tokenizers=0.7.0=pypi_0 384 | torch=1.4.0=pypi_0 385 | torchvision=0.6.1=py36_cu101 386 | tornado=6.0.4=py36h7b6447c_1 387 | tqdm=4.45.0=pypi_0 388 | traitlets=4.3.3=py36_0 389 | transformers=2.11.0=pypi_0 390 | twilio=6.26.3=pypi_0 391 | typing=3.7.4=pypi_0 392 | typing-extensions=3.7.4=pypi_0 393 | tzlocal=2.1=pypi_0 394 | ujson=2.0.3=pypi_0 395 | urllib3=1.25.3=pypi_0 396 | uvloop=0.14.0=pypi_0 397 | wasabi=0.2.2=py_0 398 | wcwidth=0.2.5=py_0 399 | webencodings=0.5.1=py36_1 400 | webexteamssdk=1.3=pypi_0 401 | websocket-client=0.56.0=pypi_0 402 | websockets=8.1=pypi_0 403 | werkzeug=0.15.4=pypi_0 404 | wheel=0.30.0=pypi_0 405 | widgetsnbextension=3.5.0=pypi_0 406 | word2number=1.1=pypi_0 407 | wrapt=1.12.1=pypi_0 408 | xlrd=1.2.0=pypi_0 409 | xz=5.2.5=h7b6447c_0 410 | yaml=0.2.5=h7b6447c_0 411 | yarl=1.4.2=pypi_0 412 | zc-lockfile=2.0=pypi_0 413 | zeromq=4.3.2=he6710b0_2 414 | zipp=0.6.0=pypi_0 415 | zlib=1.2.11=h7b6447c_3 416 | zstd=1.4.4=h0b5b093_3 417 | -------------------------------------------------------------------------------- /platforms/bert/bert-bot-only-data-es.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import collections 3 | import datetime 4 | import logging 5 | import os 6 | import random 7 | import sys 8 | from logging.handlers import WatchedFileHandler 9 | from typing import Any 10 | 11 | import numpy as np 12 | import pandas as pd 13 | from scipy.special import softmax 14 | from simpletransformers.classification import ClassificationModel 15 | from sklearn.metrics import f1_score 16 | from sklearn.model_selection import train_test_split 17 | from sklearn.utils.class_weight import compute_class_weight 18 | import torch 19 | 20 | assert(torch.cuda.is_available()) 21 | 22 | logger = logging.getLogger(__name__) 23 | now_ts = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") 24 | OOS_CLASS = 'NO_NODES_DETECTED' 25 | 26 | 27 | def str2bool(v: Any): 28 | # https://stackoverflow.com/q/15008758/3697191 29 | if isinstance(v, bool): 30 | return v 31 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 32 | return True 33 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 34 | return False 35 | else: 36 | raise argparse.ArgumentTypeError('Boolean value expected.') 37 | 38 | 39 | def random_seed(seed_value: int, use_cuda: bool): 40 | np.random.seed(seed_value) # cpu vars 41 | torch.manual_seed(seed_value) # cpu vars 42 | random.seed(seed_value) # Python 43 | if use_cuda: 44 | torch.cuda.manual_seed(seed_value) 45 | torch.cuda.manual_seed_all(seed_value) # gpu vars 46 | torch.backends.cudnn.deterministic = True # needed 47 | torch.backends.cudnn.benchmark = False 48 | 49 | 50 | def setup_logging(output_dir: str): 51 | global logger 52 | for handler in logger.handlers: 53 | logger.removeHandler(handler) 54 | logger.setLevel(logging.DEBUG) 55 | handler = WatchedFileHandler(f'{output_dir}/run_logs.log') 56 | handler.setLevel(logging.DEBUG) 57 | logger.addHandler(handler) 58 | handler = logging.StreamHandler(sys.stdout) 59 | handler.setLevel(logging.DEBUG) 60 | logger.addHandler(handler) 61 | 62 | 63 | def log(*parts): 64 | logger.info(' '.join([str(part) for part in parts])) 65 | logger.info("=" * 80) 66 | 67 | 68 | def make_st_args(cmd_args): 69 | args = [ 70 | ('fp16', False), 71 | ('output_dir', f'{cmd_args.output_dir}/'), 72 | ('best_model_dir', f'{cmd_args.output_dir}/best_model/'), 73 | ('tensorboard_dir', f'{cmd_args.output_dir}/tblogs/'), 74 | ('manual_seed', cmd_args.seed), 75 | ('do_lower_case', cmd_args.do_lower_case), 76 | ('learning_rate', cmd_args.learning_rate), 77 | ('train_batch_size', cmd_args.batch_size), 78 | ('eval_batch_size', cmd_args.batch_size), 79 | ('num_train_epochs', cmd_args.epochs), 80 | ('gradient_accumulation_steps', 1), 81 | ('max_seq_length', 512), 82 | ('overwrite_output_dir', True), 83 | ('reprocess_input_data', True), 84 | ('save_best_model', True), 85 | ('save_eval_checkpoints', False), 86 | ('save_model_every_epoch', False), 87 | ('save_optimizer_and_scheduler', True), 88 | ('save_steps', -1), 89 | ('evaluate_during_training', True), 90 | ('evaluate_during_training_silent', False), 91 | ('evaluate_during_training_steps', cmd_args.eval_every_n_steps), 92 | ('evaluate_during_training_verbose', True), 93 | ] 94 | 95 | if cmd_args.use_early_stopping: 96 | args.append(('use_early_stopping', True)) 97 | args.append(('early_stopping_consider_epochs', True)) 98 | args.append(('early_stopping_metric', 'eval_loss')) 99 | args.append(('early_stopping_metric_minimize', True)) 100 | args.append(('early_stopping_delta', cmd_args.early_stopping_delta)) 101 | args.append(('early_stopping_patience', cmd_args.early_stopping_patience)) 102 | else: 103 | args.append(('use_early_stopping', False)) 104 | 105 | return dict(args) 106 | 107 | 108 | def read_data(path: str) -> pd.DataFrame: 109 | df = pd.read_csv(path) 110 | df = df.rename(columns={'sentence': 'text', 'label': 'labels'}) 111 | df = df[['text', 'labels']] 112 | return df 113 | 114 | 115 | def get_labels_map(df): 116 | labels = sorted(df['labels'].unique().tolist()) 117 | label2id = collections.OrderedDict(zip(labels, range(len(labels)))) 118 | return label2id 119 | 120 | 121 | def f1_at_threshold(preds, y_true, labels_list, threshold): 122 | labels_list = labels_list + [OOS_CLASS] 123 | idxs = np.argmax(preds, axis=1) 124 | scores = preds[np.arange(preds.shape[0]), idxs] 125 | idxs[scores < threshold] = -1 126 | y_pred = [labels_list[i] for i in idxs] 127 | return f1_score(y_true=y_true, y_pred=y_pred, average='weighted') 128 | 129 | 130 | def run_experiment(cmd_args): 131 | setup_logging(cmd_args.output_dir) 132 | log('Run args', vars(cmd_args)) 133 | torch.cuda.empty_cache() 134 | random_seed(cmd_args.seed, True) 135 | train_df = read_data(cmd_args.train_file) 136 | eval_df = read_data(cmd_args.train_file) 137 | label2id = get_labels_map(train_df) 138 | test_df = read_data(cmd_args.test_file) 139 | if cmd_args.eval_frac > 0: 140 | train_df, eval_df = train_test_split( 141 | train_df, test_size=cmd_args.eval_frac, 142 | random_state=cmd_args.seed, 143 | shuffle=True, 144 | stratify=train_df['labels'] 145 | ) 146 | 147 | log('Train Shape', train_df.shape) 148 | log('Eval Shape', train_df.shape) 149 | log('Test Shape', train_df.shape) 150 | 151 | weights = compute_class_weight('balanced', classes=list(label2id.keys()), y=train_df['labels']).tolist() 152 | log('Class weights', weights) 153 | 154 | args = make_st_args(cmd_args) 155 | args['labels_list'] = list(label2id.keys()) 156 | args['labels_map'] = label2id 157 | 158 | log('Labels map', label2id) 159 | log('ST args', args) 160 | 161 | m = ClassificationModel( 162 | model_type=cmd_args.model_type, 163 | model_name=cmd_args.model_name, 164 | num_labels=len(label2id), 165 | weight=weights, 166 | args=args) 167 | m.train_model(train_df=train_df, eval_df=eval_df) 168 | m = ClassificationModel( 169 | cmd_args.model_type, 170 | args['best_model_dir'], 171 | args=args, 172 | ) 173 | _, logits = m.predict(test_df['text']) 174 | preds = softmax(logits, axis=1) 175 | top_predicted = np.argmax(preds, axis=1) 176 | 177 | out_df = test_df.rename(columns={'text': 'sentence', 'labels': 'label'}) 178 | out_df['predicted_node'] = [m.args.labels_list[top_predicted[i]] for i in range(len(test_df))] 179 | out_df['predicted_node_score'] = [preds[i][top_predicted[i]] for i in range(len(test_df))] 180 | out_df.to_csv(f'{cmd_args.output_dir}/predictions.csv', columns=['sentence', 'label', 'predicted_node', 'predicted_node_score'], index=False) 181 | 182 | test_df['predictions'] = [dict(zip(m.args.labels_list, preds[i])) for i in range(len(test_df))] 183 | test_df.to_json(f'{cmd_args.output_dir}/predictions.jsonl', orient='records', lines=True) 184 | for t in range(0, 101, 5): 185 | t = t / 100.0 186 | f1 = f1_at_threshold(preds, test_df['labels'], m.args.labels_list, t) 187 | log(f'F1 @ t={t}', f1) 188 | 189 | 190 | def main(): 191 | parser = argparse.ArgumentParser() 192 | parser.add_argument('--train_file', required=True, type=str) 193 | parser.add_argument('--test_file', required=True, type=str) 194 | parser.add_argument('--output_dir', required=True, type=str) 195 | parser.add_argument('--model_type', required=False, default='bert', type=str) 196 | parser.add_argument('--model_name', required=False, default='bert-base-uncased', type=str) 197 | parser.add_argument('--do_lower_case', required=False, default=True, type=str2bool) 198 | parser.add_argument('--seed', required=False, default=42, type=int) 199 | parser.add_argument('--learning_rate', required=False, default=0.00004, type=float) 200 | parser.add_argument('--batch_size', required=False, default=16, type=int) 201 | parser.add_argument('--epochs', required=False, default=10, type=int) 202 | parser.add_argument('--eval_frac', required=False, default=0.1, type=float) 203 | parser.add_argument('--eval_every_n_steps', required=False, default=100, type=int) 204 | parser.add_argument('--use_early_stopping', required=False, default=True, type=str2bool) 205 | parser.add_argument('--early_stopping_patience', required=False, default=5, type=int) 206 | parser.add_argument('--early_stopping_delta', required=False, default=0.00005, type=float) 207 | cmd_args = parser.parse_args() 208 | cmd_args.output_dir = f'{cmd_args.output_dir.rstrip("/")}' # /{now_ts}' 209 | os.makedirs(cmd_args.output_dir, exist_ok=True) 210 | random_seed(cmd_args.seed, True) 211 | run_experiment(cmd_args) 212 | 213 | 214 | if __name__ == '__main__': 215 | main() 216 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForSequenceClassification" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "id2label": { 11 | "0": "LABEL_0", 12 | "1": "LABEL_1", 13 | "2": "LABEL_2", 14 | "3": "LABEL_3", 15 | "4": "LABEL_4", 16 | "5": "LABEL_5", 17 | "6": "LABEL_6", 18 | "7": "LABEL_7", 19 | "8": "LABEL_8", 20 | "9": "LABEL_9", 21 | "10": "LABEL_10", 22 | "11": "LABEL_11", 23 | "12": "LABEL_12", 24 | "13": "LABEL_13", 25 | "14": "LABEL_14", 26 | "15": "LABEL_15", 27 | "16": "LABEL_16", 28 | "17": "LABEL_17", 29 | "18": "LABEL_18", 30 | "19": "LABEL_19", 31 | "20": "LABEL_20", 32 | "21": "LABEL_21", 33 | "22": "LABEL_22", 34 | "23": "LABEL_23", 35 | "24": "LABEL_24", 36 | "25": "LABEL_25", 37 | "26": "LABEL_26", 38 | "27": "LABEL_27" 39 | }, 40 | "initializer_range": 0.02, 41 | "intermediate_size": 3072, 42 | "label2id": { 43 | "LABEL_0": 0, 44 | "LABEL_1": 1, 45 | "LABEL_10": 10, 46 | "LABEL_11": 11, 47 | "LABEL_12": 12, 48 | "LABEL_13": 13, 49 | "LABEL_14": 14, 50 | "LABEL_15": 15, 51 | "LABEL_16": 16, 52 | "LABEL_17": 17, 53 | "LABEL_18": 18, 54 | "LABEL_19": 19, 55 | "LABEL_2": 2, 56 | "LABEL_20": 20, 57 | "LABEL_21": 21, 58 | "LABEL_22": 22, 59 | "LABEL_23": 23, 60 | "LABEL_24": 24, 61 | "LABEL_25": 25, 62 | "LABEL_26": 26, 63 | "LABEL_27": 27, 64 | "LABEL_3": 3, 65 | "LABEL_4": 4, 66 | "LABEL_5": 5, 67 | "LABEL_6": 6, 68 | "LABEL_7": 7, 69 | "LABEL_8": 8, 70 | "LABEL_9": 9 71 | }, 72 | "layer_norm_eps": 1e-12, 73 | "max_position_embeddings": 512, 74 | "model_type": "bert", 75 | "num_attention_heads": 12, 76 | "num_hidden_layers": 12, 77 | "pad_token_id": 0, 78 | "type_vocab_size": 2, 79 | "vocab_size": 30522 80 | } 81 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart/eval_results.txt: -------------------------------------------------------------------------------- 1 | eval_loss = 0.0016611038629586498 2 | mcc = 1.0 3 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart/model_args.json: -------------------------------------------------------------------------------- 1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/curekart/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/curekart/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/curekart/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 225, "weight_decay": 0, "labels_list": ["CALL_CENTER", "CANCEL_ORDER", "CHAT_WITH_AGENT", "CHECK_PINCODE", "CONSULT_START", "DELAY_IN_PARCEL", "EXPIRY_DATE", "FRANCHISE", "IMMUNITY", "INTERNATIONAL_SHIPPING", "MODES_OF_PAYMENTS", "MODIFY_ADDRESS", "ORDER_QUERY", "ORDER_STATUS", "ORDER_TAKING", "ORIGINAL_PRODUCT", "PAYMENT_AND_BILL", "PORTAL_ISSUE", "RECOMMEND_PRODUCT", "REFER_EARN", "REFUNDS_RETURNS_REPLACEMENTS", "RESUME_DELIVERY", "SIDE_EFFECT", "SIGN_UP", "START_OVER", "STORE_INFORMATION", "USER_GOAL_FORM", "WORK_FROM_HOME"], "labels_map": {"CALL_CENTER": 0, "CANCEL_ORDER": 1, "CHAT_WITH_AGENT": 2, "CHECK_PINCODE": 3, "CONSULT_START": 4, "DELAY_IN_PARCEL": 5, "EXPIRY_DATE": 6, "FRANCHISE": 7, "IMMUNITY": 8, "INTERNATIONAL_SHIPPING": 9, "MODES_OF_PAYMENTS": 10, "MODIFY_ADDRESS": 11, "ORDER_QUERY": 12, "ORDER_STATUS": 13, "ORDER_TAKING": 14, "ORIGINAL_PRODUCT": 15, "PAYMENT_AND_BILL": 16, "PORTAL_ISSUE": 17, "RECOMMEND_PRODUCT": 18, "REFER_EARN": 19, "REFUNDS_RETURNS_REPLACEMENTS": 20, "RESUME_DELIVERY": 21, "SIDE_EFFECT": 22, "SIGN_UP": 23, "START_OVER": 24, "STORE_INFORMATION": 25, "USER_GOAL_FORM": 26, "WORK_FROM_HOME": 27}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1} -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart/tblogs/events.out.tfevents.1597350081.haptik-ai-research-mum-ml-2-vm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/curekart/tblogs/events.out.tfevents.1597350081.haptik-ai-research-mum-ml-2-vm -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "model_max_length": 512} -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/curekart/training_args.bin -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart/training_progress_scores.csv: -------------------------------------------------------------------------------- 1 | global_step,mcc,train_loss,eval_loss 2 | 75,0.0489226281429655,3.3800320625305176,3.268240276972453 3 | 100,0.14227801509842952,3.2683238983154297,3.097734079360962 4 | 150,0.318445814589155,3.260500431060791,2.6981120856602985 5 | 200,0.7360823978724693,2.241304636001587,2.1334492603937787 6 | 225,0.850652764009118,1.5696055889129639,1.7193865990638733 7 | 300,0.9732721399504127,0.7272082567214966,0.7657513042291005 8 | 300,0.9732721399504127,0.7272082567214966,0.7657513042291005 9 | 375,0.9839939029676897,0.19786734879016876,0.262628804842631 10 | 400,0.9964266032875477,0.1580124795436859,0.17506494929393132 11 | 450,0.9982145156768018,0.061699073761701584,0.08133237525820732 12 | 500,1.0,0.044864166527986526,0.04304662769039472 13 | 525,0.9982146902794541,0.1949339509010315,0.03420027763893207 14 | 600,0.9982146902794541,0.01979409158229828,0.0174630964299043 15 | 600,0.9982146902794541,0.01979409158229828,0.0174630964299043 16 | 675,1.0,0.010599198751151562,0.012711792197078466 17 | 700,1.0,0.009350229986011982,0.00925802268087864 18 | 750,1.0,0.006756887771189213,0.007775597075621287 19 | 800,1.0,0.007059372961521149,0.006613130954404672 20 | 825,1.0,0.006302967667579651,0.006184613835066557 21 | 900,1.0,0.005933687090873718,0.005213701774676641 22 | 900,1.0,0.005933687090873718,0.005213701774676641 23 | 975,1.0,0.0048155635595321655,0.004500839247678717 24 | 1000,1.0,0.003983728121966124,0.004296117999280493 25 | 1050,1.0,0.005057642702013254,0.003936607257152597 26 | 1100,1.0,0.003730300348252058,0.003631598042945067 27 | 1125,1.0,0.005177562590688467,0.003497548938418428 28 | 1200,1.0,0.004394039046019316,0.0031360636465251448 29 | 1200,1.0,0.004394039046019316,0.0031360636465251448 30 | 1275,1.0,0.003173314966261387,0.0028356259533514577 31 | 1300,1.0,0.00268826843239367,0.002747759005675713 32 | 1350,1.0,0.0037474841810762882,0.0025869984024514754 33 | 1400,1.0,0.002153025707229972,0.002440233255426089 34 | 1425,1.0,0.003527172142639756,0.0023733606096357105 35 | 1500,1.0,0.002447231439873576,0.0021881715798129637 36 | 1500,1.0,0.002447231439873576,0.0021881715798129637 37 | 1575,1.0,0.002039331942796707,0.0020307602997248373 38 | 1600,1.0,0.003677624510601163,0.001983012695175906 39 | 1650,1.0,0.0019816916901618242,0.0018912556565677126 40 | 1700,1.0,0.0018908806378021836,0.001807974735274911 41 | 1725,1.0,0.0018591739935800433,0.0017696570216988524 42 | 1800,1.0,0.0017346820095553994,0.0016611038629586498 43 | 1800,1.0,0.0017346820095553994,0.0016611038629586498 44 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/best_model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForSequenceClassification" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "id2label": { 11 | "0": "LABEL_0", 12 | "1": "LABEL_1", 13 | "2": "LABEL_2", 14 | "3": "LABEL_3", 15 | "4": "LABEL_4", 16 | "5": "LABEL_5", 17 | "6": "LABEL_6", 18 | "7": "LABEL_7", 19 | "8": "LABEL_8", 20 | "9": "LABEL_9", 21 | "10": "LABEL_10", 22 | "11": "LABEL_11", 23 | "12": "LABEL_12", 24 | "13": "LABEL_13", 25 | "14": "LABEL_14", 26 | "15": "LABEL_15", 27 | "16": "LABEL_16", 28 | "17": "LABEL_17", 29 | "18": "LABEL_18", 30 | "19": "LABEL_19", 31 | "20": "LABEL_20", 32 | "21": "LABEL_21", 33 | "22": "LABEL_22", 34 | "23": "LABEL_23", 35 | "24": "LABEL_24", 36 | "25": "LABEL_25", 37 | "26": "LABEL_26", 38 | "27": "LABEL_27" 39 | }, 40 | "initializer_range": 0.02, 41 | "intermediate_size": 3072, 42 | "label2id": { 43 | "LABEL_0": 0, 44 | "LABEL_1": 1, 45 | "LABEL_10": 10, 46 | "LABEL_11": 11, 47 | "LABEL_12": 12, 48 | "LABEL_13": 13, 49 | "LABEL_14": 14, 50 | "LABEL_15": 15, 51 | "LABEL_16": 16, 52 | "LABEL_17": 17, 53 | "LABEL_18": 18, 54 | "LABEL_19": 19, 55 | "LABEL_2": 2, 56 | "LABEL_20": 20, 57 | "LABEL_21": 21, 58 | "LABEL_22": 22, 59 | "LABEL_23": 23, 60 | "LABEL_24": 24, 61 | "LABEL_25": 25, 62 | "LABEL_26": 26, 63 | "LABEL_27": 27, 64 | "LABEL_3": 3, 65 | "LABEL_4": 4, 66 | "LABEL_5": 5, 67 | "LABEL_6": 6, 68 | "LABEL_7": 7, 69 | "LABEL_8": 8, 70 | "LABEL_9": 9 71 | }, 72 | "layer_norm_eps": 1e-12, 73 | "max_position_embeddings": 512, 74 | "model_type": "bert", 75 | "num_attention_heads": 12, 76 | "num_hidden_layers": 12, 77 | "pad_token_id": 0, 78 | "type_vocab_size": 2, 79 | "vocab_size": 30522 80 | } 81 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/best_model/eval_results.txt: -------------------------------------------------------------------------------- 1 | eval_loss = 0.002526673533094044 2 | mcc = 1.0 3 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/best_model/model_args.json: -------------------------------------------------------------------------------- 1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/curekart_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/curekart_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/curekart_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 156, "weight_decay": 0, "labels_list": ["CALL_CENTER", "CANCEL_ORDER", "CHAT_WITH_AGENT", "CHECK_PINCODE", "CONSULT_START", "DELAY_IN_PARCEL", "EXPIRY_DATE", "FRANCHISE", "IMMUNITY", "INTERNATIONAL_SHIPPING", "MODES_OF_PAYMENTS", "MODIFY_ADDRESS", "ORDER_QUERY", "ORDER_STATUS", "ORDER_TAKING", "ORIGINAL_PRODUCT", "PAYMENT_AND_BILL", "PORTAL_ISSUE", "RECOMMEND_PRODUCT", "REFER_EARN", "REFUNDS_RETURNS_REPLACEMENTS", "RESUME_DELIVERY", "SIDE_EFFECT", "SIGN_UP", "START_OVER", "STORE_INFORMATION", "USER_GOAL_FORM", "WORK_FROM_HOME"], "labels_map": {"CALL_CENTER": 0, "CANCEL_ORDER": 1, "CHAT_WITH_AGENT": 2, "CHECK_PINCODE": 3, "CONSULT_START": 4, "DELAY_IN_PARCEL": 5, "EXPIRY_DATE": 6, "FRANCHISE": 7, "IMMUNITY": 8, "INTERNATIONAL_SHIPPING": 9, "MODES_OF_PAYMENTS": 10, "MODIFY_ADDRESS": 11, "ORDER_QUERY": 12, "ORDER_STATUS": 13, "ORDER_TAKING": 14, "ORIGINAL_PRODUCT": 15, "PAYMENT_AND_BILL": 16, "PORTAL_ISSUE": 17, "RECOMMEND_PRODUCT": 18, "REFER_EARN": 19, "REFUNDS_RETURNS_REPLACEMENTS": 20, "RESUME_DELIVERY": 21, "SIDE_EFFECT": 22, "SIGN_UP": 23, "START_OVER": 24, "STORE_INFORMATION": 25, "USER_GOAL_FORM": 26, "WORK_FROM_HOME": 27}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1} -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/best_model/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/best_model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "model_max_length": 512} -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/best_model/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/curekart_subset/best_model/training_args.bin -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForSequenceClassification" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "id2label": { 11 | "0": "LABEL_0", 12 | "1": "LABEL_1", 13 | "2": "LABEL_2", 14 | "3": "LABEL_3", 15 | "4": "LABEL_4", 16 | "5": "LABEL_5", 17 | "6": "LABEL_6", 18 | "7": "LABEL_7", 19 | "8": "LABEL_8", 20 | "9": "LABEL_9", 21 | "10": "LABEL_10", 22 | "11": "LABEL_11", 23 | "12": "LABEL_12", 24 | "13": "LABEL_13", 25 | "14": "LABEL_14", 26 | "15": "LABEL_15", 27 | "16": "LABEL_16", 28 | "17": "LABEL_17", 29 | "18": "LABEL_18", 30 | "19": "LABEL_19", 31 | "20": "LABEL_20", 32 | "21": "LABEL_21", 33 | "22": "LABEL_22", 34 | "23": "LABEL_23", 35 | "24": "LABEL_24", 36 | "25": "LABEL_25", 37 | "26": "LABEL_26", 38 | "27": "LABEL_27" 39 | }, 40 | "initializer_range": 0.02, 41 | "intermediate_size": 3072, 42 | "label2id": { 43 | "LABEL_0": 0, 44 | "LABEL_1": 1, 45 | "LABEL_10": 10, 46 | "LABEL_11": 11, 47 | "LABEL_12": 12, 48 | "LABEL_13": 13, 49 | "LABEL_14": 14, 50 | "LABEL_15": 15, 51 | "LABEL_16": 16, 52 | "LABEL_17": 17, 53 | "LABEL_18": 18, 54 | "LABEL_19": 19, 55 | "LABEL_2": 2, 56 | "LABEL_20": 20, 57 | "LABEL_21": 21, 58 | "LABEL_22": 22, 59 | "LABEL_23": 23, 60 | "LABEL_24": 24, 61 | "LABEL_25": 25, 62 | "LABEL_26": 26, 63 | "LABEL_27": 27, 64 | "LABEL_3": 3, 65 | "LABEL_4": 4, 66 | "LABEL_5": 5, 67 | "LABEL_6": 6, 68 | "LABEL_7": 7, 69 | "LABEL_8": 8, 70 | "LABEL_9": 9 71 | }, 72 | "layer_norm_eps": 1e-12, 73 | "max_position_embeddings": 512, 74 | "model_type": "bert", 75 | "num_attention_heads": 12, 76 | "num_hidden_layers": 12, 77 | "pad_token_id": 0, 78 | "type_vocab_size": 2, 79 | "vocab_size": 30522 80 | } 81 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/eval_results.txt: -------------------------------------------------------------------------------- 1 | eval_loss = 0.0021807193876996348 2 | mcc = 1.0 3 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/model_args.json: -------------------------------------------------------------------------------- 1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/curekart_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/curekart_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/curekart_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 156, "weight_decay": 0, "labels_list": ["CALL_CENTER", "CANCEL_ORDER", "CHAT_WITH_AGENT", "CHECK_PINCODE", "CONSULT_START", "DELAY_IN_PARCEL", "EXPIRY_DATE", "FRANCHISE", "IMMUNITY", "INTERNATIONAL_SHIPPING", "MODES_OF_PAYMENTS", "MODIFY_ADDRESS", "ORDER_QUERY", "ORDER_STATUS", "ORDER_TAKING", "ORIGINAL_PRODUCT", "PAYMENT_AND_BILL", "PORTAL_ISSUE", "RECOMMEND_PRODUCT", "REFER_EARN", "REFUNDS_RETURNS_REPLACEMENTS", "RESUME_DELIVERY", "SIDE_EFFECT", "SIGN_UP", "START_OVER", "STORE_INFORMATION", "USER_GOAL_FORM", "WORK_FROM_HOME"], "labels_map": {"CALL_CENTER": 0, "CANCEL_ORDER": 1, "CHAT_WITH_AGENT": 2, "CHECK_PINCODE": 3, "CONSULT_START": 4, "DELAY_IN_PARCEL": 5, "EXPIRY_DATE": 6, "FRANCHISE": 7, "IMMUNITY": 8, "INTERNATIONAL_SHIPPING": 9, "MODES_OF_PAYMENTS": 10, "MODIFY_ADDRESS": 11, "ORDER_QUERY": 12, "ORDER_STATUS": 13, "ORDER_TAKING": 14, "ORIGINAL_PRODUCT": 15, "PAYMENT_AND_BILL": 16, "PORTAL_ISSUE": 17, "RECOMMEND_PRODUCT": 18, "REFER_EARN": 19, "REFUNDS_RETURNS_REPLACEMENTS": 20, "RESUME_DELIVERY": 21, "SIDE_EFFECT": 22, "SIGN_UP": 23, "START_OVER": 24, "STORE_INFORMATION": 25, "USER_GOAL_FORM": 26, "WORK_FROM_HOME": 27}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1} -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/tblogs/events.out.tfevents.1597351105.haptik-ai-research-mum-ml-2-vm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/curekart_subset/tblogs/events.out.tfevents.1597351105.haptik-ai-research-mum-ml-2-vm -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "model_max_length": 512} -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/curekart_subset/training_args.bin -------------------------------------------------------------------------------- /platforms/bert/bert_models/curekart_subset/training_progress_scores.csv: -------------------------------------------------------------------------------- 1 | global_step,mcc,train_loss,eval_loss 2 | 52,0.015618999829071147,3.3157989978790283,3.295060153190906 3 | 100,0.15194244760341802,3.0897583961486816,2.9831165144076714 4 | 104,0.1341155552042914,3.1407852172851562,2.9660061368575463 5 | 156,0.6815403439984585,2.89314603805542,2.3054693043231964 6 | 200,0.8222967981450889,1.9050589799880981,1.7316755652427673 7 | 208,0.9141834131268949,1.519261360168457,1.533938548885859 8 | 260,0.9660890899536336,1.3009628057479858,0.8027959448786882 9 | 300,0.9870062869068875,0.8840746879577637,0.44703088586147016 10 | 312,0.9947941467363587,0.6657969355583191,0.38270918279886246 11 | 364,0.9817915970321776,0.1275816559791565,0.214721856208948 12 | 400,0.9922147550894792,0.25742307305336,0.12980968003662732 13 | 416,1.0,0.13560621440410614,0.09332964519182077 14 | 468,1.0,0.03233667090535164,0.043612775536110766 15 | 500,1.0,0.0301946010440588,0.030464474362536118 16 | 520,1.0,0.02194424904882908,0.0229594346243315 17 | 572,1.0,0.014761185273528099,0.01612605733008912 18 | 600,1.0,0.015239364467561245,0.01403906225011899 19 | 624,1.0,0.011298744939267635,0.012511847576556297 20 | 676,1.0,0.009137298911809921,0.010069010976272134 21 | 700,1.0,0.009275787509977818,0.009287668401017211 22 | 728,1.0,0.010665534995496273,0.00839059384396443 23 | 780,1.0,0.01529514417052269,0.007350107448963592 24 | 800,1.0,0.009811767376959324,0.007007549937742834 25 | 832,1.0,0.008346081711351871,0.006515947215330715 26 | 884,1.0,0.006207848433405161,0.005841879720369784 27 | 900,1.0,0.007257106713950634,0.005661116948781105 28 | 936,1.0,0.0076172430999577045,0.00529962131090892 29 | 988,1.0,0.005975247826427221,0.004852061746462893 30 | 1000,1.0,0.0038416371680796146,0.0047596723306924105 31 | 1040,1.0,0.005082871299237013,0.004475821105118554 32 | 1092,1.0,0.003949825186282396,0.004150250917658783 33 | 1100,1.0,0.004515018314123154,0.004103761085738929 34 | 1144,1.0,0.00412968173623085,0.003867794764049065 35 | 1196,1.0,0.0031884999480098486,0.0036216092412360013 36 | 1200,1.0,0.004237225279211998,0.003604552045894357 37 | 1248,1.0,0.004021225031465292,0.0034063787136871656 38 | 1300,1.0,0.0037145880050957203,0.003216816734773322 39 | 1300,1.0,0.0037145880050957203,0.003216816734773322 40 | 1352,1.0,0.0035830303095281124,0.0030451807408378674 41 | 1400,1.0,0.002926103537902236,0.002906436136422249 42 | 1404,1.0,0.004418825265020132,0.0028954993781203833 43 | 1456,1.0,0.003054356202483177,0.0027609795327477446 44 | 1500,1.0,0.002867345931008458,0.002655705150503379 45 | 1508,1.0,0.0023572815116494894,0.0026374717507188995 46 | 1560,1.0,0.0022416089195758104,0.002526673533094044 47 | 1600,1.0,0.002718471921980381,0.0024489216000522273 48 | 1612,1.0,0.002809862606227398,0.002426815902044137 49 | 1664,1.0,0.0025117190089076757,0.0023362424690276384 50 | 1700,1.0,0.0023699230514466763,0.002280304313619406 51 | 1716,1.0,0.003509317059069872,0.0022554639391063783 52 | 1768,1.0,0.002868139650672674,0.0021807193876996348 53 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/best_model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForSequenceClassification" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "id2label": { 11 | "0": "LABEL_0", 12 | "1": "LABEL_1", 13 | "2": "LABEL_2", 14 | "3": "LABEL_3", 15 | "4": "LABEL_4", 16 | "5": "LABEL_5", 17 | "6": "LABEL_6", 18 | "7": "LABEL_7", 19 | "8": "LABEL_8", 20 | "9": "LABEL_9", 21 | "10": "LABEL_10", 22 | "11": "LABEL_11", 23 | "12": "LABEL_12", 24 | "13": "LABEL_13", 25 | "14": "LABEL_14", 26 | "15": "LABEL_15", 27 | "16": "LABEL_16", 28 | "17": "LABEL_17", 29 | "18": "LABEL_18", 30 | "19": "LABEL_19", 31 | "20": "LABEL_20", 32 | "21": "LABEL_21", 33 | "22": "LABEL_22", 34 | "23": "LABEL_23", 35 | "24": "LABEL_24", 36 | "25": "LABEL_25", 37 | "26": "LABEL_26", 38 | "27": "LABEL_27", 39 | "28": "LABEL_28", 40 | "29": "LABEL_29", 41 | "30": "LABEL_30", 42 | "31": "LABEL_31", 43 | "32": "LABEL_32", 44 | "33": "LABEL_33", 45 | "34": "LABEL_34", 46 | "35": "LABEL_35", 47 | "36": "LABEL_36", 48 | "37": "LABEL_37", 49 | "38": "LABEL_38", 50 | "39": "LABEL_39", 51 | "40": "LABEL_40", 52 | "41": "LABEL_41", 53 | "42": "LABEL_42", 54 | "43": "LABEL_43", 55 | "44": "LABEL_44", 56 | "45": "LABEL_45", 57 | "46": "LABEL_46", 58 | "47": "LABEL_47", 59 | "48": "LABEL_48", 60 | "49": "LABEL_49", 61 | "50": "LABEL_50", 62 | "51": "LABEL_51", 63 | "52": "LABEL_52", 64 | "53": "LABEL_53", 65 | "54": "LABEL_54", 66 | "55": "LABEL_55", 67 | "56": "LABEL_56", 68 | "57": "LABEL_57", 69 | "58": "LABEL_58" 70 | }, 71 | "initializer_range": 0.02, 72 | "intermediate_size": 3072, 73 | "label2id": { 74 | "LABEL_0": 0, 75 | "LABEL_1": 1, 76 | "LABEL_10": 10, 77 | "LABEL_11": 11, 78 | "LABEL_12": 12, 79 | "LABEL_13": 13, 80 | "LABEL_14": 14, 81 | "LABEL_15": 15, 82 | "LABEL_16": 16, 83 | "LABEL_17": 17, 84 | "LABEL_18": 18, 85 | "LABEL_19": 19, 86 | "LABEL_2": 2, 87 | "LABEL_20": 20, 88 | "LABEL_21": 21, 89 | "LABEL_22": 22, 90 | "LABEL_23": 23, 91 | "LABEL_24": 24, 92 | "LABEL_25": 25, 93 | "LABEL_26": 26, 94 | "LABEL_27": 27, 95 | "LABEL_28": 28, 96 | "LABEL_29": 29, 97 | "LABEL_3": 3, 98 | "LABEL_30": 30, 99 | "LABEL_31": 31, 100 | "LABEL_32": 32, 101 | "LABEL_33": 33, 102 | "LABEL_34": 34, 103 | "LABEL_35": 35, 104 | "LABEL_36": 36, 105 | "LABEL_37": 37, 106 | "LABEL_38": 38, 107 | "LABEL_39": 39, 108 | "LABEL_4": 4, 109 | "LABEL_40": 40, 110 | "LABEL_41": 41, 111 | "LABEL_42": 42, 112 | "LABEL_43": 43, 113 | "LABEL_44": 44, 114 | "LABEL_45": 45, 115 | "LABEL_46": 46, 116 | "LABEL_47": 47, 117 | "LABEL_48": 48, 118 | "LABEL_49": 49, 119 | "LABEL_5": 5, 120 | "LABEL_50": 50, 121 | "LABEL_51": 51, 122 | "LABEL_52": 52, 123 | "LABEL_53": 53, 124 | "LABEL_54": 54, 125 | "LABEL_55": 55, 126 | "LABEL_56": 56, 127 | "LABEL_57": 57, 128 | "LABEL_58": 58, 129 | "LABEL_6": 6, 130 | "LABEL_7": 7, 131 | "LABEL_8": 8, 132 | "LABEL_9": 9 133 | }, 134 | "layer_norm_eps": 1e-12, 135 | "max_position_embeddings": 512, 136 | "model_type": "bert", 137 | "num_attention_heads": 12, 138 | "num_hidden_layers": 12, 139 | "pad_token_id": 0, 140 | "type_vocab_size": 2, 141 | "vocab_size": 30522 142 | } 143 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/best_model/eval_results.txt: -------------------------------------------------------------------------------- 1 | eval_loss = 0.004777703106687483 2 | mcc = 1.0 3 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/best_model/model_args.json: -------------------------------------------------------------------------------- 1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/powerplay11/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/powerplay11/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/powerplay11/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 177, "weight_decay": 0, "labels_list": ["ACCOUNT_BALANCE_DEDUCTED", "ACCOUNT_NOT_VERIFIED", "ACCOUNT_RESET", "APPRECIATION", "BANK_VERIFICATION_DETAILS", "CANNOT_SEE_JOINED_CONTESTS", "CAPABILITIES", "CASH_BONUS", "CASH_BONUS_EXPIRY", "CHANGE_BANK_ACCOUNT", "CHANGE_MOBILE_NUMBER", "CHANGE_PROFILE_TEAM_DETAILS", "CHAT_WITH_AN_AGENT", "CHECK_DEPOSIT_STATUS", "CHECK_WALLET_BALANCE", "CONTACT_NUMBER", "CRITICISM", "DEDUCTED_AMOUNT_NOT_RECEIVED", "DELETE_PAN_CARD", "DOWNLOAD_POWERPLAY11", "FAIRPLAY_VIOLATIONS", "FAKE_TEAMS", "FEEDBACK", "GREETINGS_DAY", "HOW_POINTS_CALCULATED", "HOW_TO_PLAY", "INSTANT_WITHDRAWAL", "JOIN_CONTEST", "LESS_WINNINGS_AMOUNT", "MATCH_ABANDONED", "NEW_TEAM_PATTERN", "NO_EMAIL_CONFIRMATION", "OFFERS_AND_REFERRALS", "PAN_VERIFICATION_FAILED", "POINTS_NOT_UPDATED", "PRESENCE", "REFUND_OF_ADDED_CASH", "REFUND_OF_WRONG_AMOUNT", "SIGNUP_BONUS", "TAXES_ON_WINNINGS", "TEAM_DEADLINE", "THANKS", "TYPES_BONUS", "TYPES_CONTESTS", "UNUTILIZED_MONEY", "UPDATE_APP", "VERIFY_EMAIL", "VERIFY_MOBILE", "VERIFY_PAN", "WHAT_IF_THERES_A_TIE", "WHEN_SCORES_UPDATED", "WHEN_WINNINGS_DISTRIBUTED", "WHY_VERIFY", "WINNINGS", "WITHDRAWAL_INTRO", "WITHDRAWAL_STATUS", "WITHDRAWAL_TIME", "WITHDRAW_CASH_BONUS", "WRONG_SCORES"], "labels_map": {"ACCOUNT_BALANCE_DEDUCTED": 0, "ACCOUNT_NOT_VERIFIED": 1, "ACCOUNT_RESET": 2, "APPRECIATION": 3, "BANK_VERIFICATION_DETAILS": 4, "CANNOT_SEE_JOINED_CONTESTS": 5, "CAPABILITIES": 6, "CASH_BONUS": 7, "CASH_BONUS_EXPIRY": 8, "CHANGE_BANK_ACCOUNT": 9, "CHANGE_MOBILE_NUMBER": 10, "CHANGE_PROFILE_TEAM_DETAILS": 11, "CHAT_WITH_AN_AGENT": 12, "CHECK_DEPOSIT_STATUS": 13, "CHECK_WALLET_BALANCE": 14, "CONTACT_NUMBER": 15, "CRITICISM": 16, "DEDUCTED_AMOUNT_NOT_RECEIVED": 17, "DELETE_PAN_CARD": 18, "DOWNLOAD_POWERPLAY11": 19, "FAIRPLAY_VIOLATIONS": 20, "FAKE_TEAMS": 21, "FEEDBACK": 22, "GREETINGS_DAY": 23, "HOW_POINTS_CALCULATED": 24, "HOW_TO_PLAY": 25, "INSTANT_WITHDRAWAL": 26, "JOIN_CONTEST": 27, "LESS_WINNINGS_AMOUNT": 28, "MATCH_ABANDONED": 29, "NEW_TEAM_PATTERN": 30, "NO_EMAIL_CONFIRMATION": 31, "OFFERS_AND_REFERRALS": 32, "PAN_VERIFICATION_FAILED": 33, "POINTS_NOT_UPDATED": 34, "PRESENCE": 35, "REFUND_OF_ADDED_CASH": 36, "REFUND_OF_WRONG_AMOUNT": 37, "SIGNUP_BONUS": 38, "TAXES_ON_WINNINGS": 39, "TEAM_DEADLINE": 40, "THANKS": 41, "TYPES_BONUS": 42, "TYPES_CONTESTS": 43, "UNUTILIZED_MONEY": 44, "UPDATE_APP": 45, "VERIFY_EMAIL": 46, "VERIFY_MOBILE": 47, "VERIFY_PAN": 48, "WHAT_IF_THERES_A_TIE": 49, "WHEN_SCORES_UPDATED": 50, "WHEN_WINNINGS_DISTRIBUTED": 51, "WHY_VERIFY": 52, "WINNINGS": 53, "WITHDRAWAL_INTRO": 54, "WITHDRAWAL_STATUS": 55, "WITHDRAWAL_TIME": 56, "WITHDRAW_CASH_BONUS": 57, "WRONG_SCORES": 58}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/best_model/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/best_model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "model_max_length": 512} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/best_model/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11/best_model/training_args.bin -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForSequenceClassification" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "id2label": { 11 | "0": "LABEL_0", 12 | "1": "LABEL_1", 13 | "2": "LABEL_2", 14 | "3": "LABEL_3", 15 | "4": "LABEL_4", 16 | "5": "LABEL_5", 17 | "6": "LABEL_6", 18 | "7": "LABEL_7", 19 | "8": "LABEL_8", 20 | "9": "LABEL_9", 21 | "10": "LABEL_10", 22 | "11": "LABEL_11", 23 | "12": "LABEL_12", 24 | "13": "LABEL_13", 25 | "14": "LABEL_14", 26 | "15": "LABEL_15", 27 | "16": "LABEL_16", 28 | "17": "LABEL_17", 29 | "18": "LABEL_18", 30 | "19": "LABEL_19", 31 | "20": "LABEL_20", 32 | "21": "LABEL_21", 33 | "22": "LABEL_22", 34 | "23": "LABEL_23", 35 | "24": "LABEL_24", 36 | "25": "LABEL_25", 37 | "26": "LABEL_26", 38 | "27": "LABEL_27", 39 | "28": "LABEL_28", 40 | "29": "LABEL_29", 41 | "30": "LABEL_30", 42 | "31": "LABEL_31", 43 | "32": "LABEL_32", 44 | "33": "LABEL_33", 45 | "34": "LABEL_34", 46 | "35": "LABEL_35", 47 | "36": "LABEL_36", 48 | "37": "LABEL_37", 49 | "38": "LABEL_38", 50 | "39": "LABEL_39", 51 | "40": "LABEL_40", 52 | "41": "LABEL_41", 53 | "42": "LABEL_42", 54 | "43": "LABEL_43", 55 | "44": "LABEL_44", 56 | "45": "LABEL_45", 57 | "46": "LABEL_46", 58 | "47": "LABEL_47", 59 | "48": "LABEL_48", 60 | "49": "LABEL_49", 61 | "50": "LABEL_50", 62 | "51": "LABEL_51", 63 | "52": "LABEL_52", 64 | "53": "LABEL_53", 65 | "54": "LABEL_54", 66 | "55": "LABEL_55", 67 | "56": "LABEL_56", 68 | "57": "LABEL_57", 69 | "58": "LABEL_58" 70 | }, 71 | "initializer_range": 0.02, 72 | "intermediate_size": 3072, 73 | "label2id": { 74 | "LABEL_0": 0, 75 | "LABEL_1": 1, 76 | "LABEL_10": 10, 77 | "LABEL_11": 11, 78 | "LABEL_12": 12, 79 | "LABEL_13": 13, 80 | "LABEL_14": 14, 81 | "LABEL_15": 15, 82 | "LABEL_16": 16, 83 | "LABEL_17": 17, 84 | "LABEL_18": 18, 85 | "LABEL_19": 19, 86 | "LABEL_2": 2, 87 | "LABEL_20": 20, 88 | "LABEL_21": 21, 89 | "LABEL_22": 22, 90 | "LABEL_23": 23, 91 | "LABEL_24": 24, 92 | "LABEL_25": 25, 93 | "LABEL_26": 26, 94 | "LABEL_27": 27, 95 | "LABEL_28": 28, 96 | "LABEL_29": 29, 97 | "LABEL_3": 3, 98 | "LABEL_30": 30, 99 | "LABEL_31": 31, 100 | "LABEL_32": 32, 101 | "LABEL_33": 33, 102 | "LABEL_34": 34, 103 | "LABEL_35": 35, 104 | "LABEL_36": 36, 105 | "LABEL_37": 37, 106 | "LABEL_38": 38, 107 | "LABEL_39": 39, 108 | "LABEL_4": 4, 109 | "LABEL_40": 40, 110 | "LABEL_41": 41, 111 | "LABEL_42": 42, 112 | "LABEL_43": 43, 113 | "LABEL_44": 44, 114 | "LABEL_45": 45, 115 | "LABEL_46": 46, 116 | "LABEL_47": 47, 117 | "LABEL_48": 48, 118 | "LABEL_49": 49, 119 | "LABEL_5": 5, 120 | "LABEL_50": 50, 121 | "LABEL_51": 51, 122 | "LABEL_52": 52, 123 | "LABEL_53": 53, 124 | "LABEL_54": 54, 125 | "LABEL_55": 55, 126 | "LABEL_56": 56, 127 | "LABEL_57": 57, 128 | "LABEL_58": 58, 129 | "LABEL_6": 6, 130 | "LABEL_7": 7, 131 | "LABEL_8": 8, 132 | "LABEL_9": 9 133 | }, 134 | "layer_norm_eps": 1e-12, 135 | "max_position_embeddings": 512, 136 | "model_type": "bert", 137 | "num_attention_heads": 12, 138 | "num_hidden_layers": 12, 139 | "pad_token_id": 0, 140 | "type_vocab_size": 2, 141 | "vocab_size": 30522 142 | } 143 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/eval_results.txt: -------------------------------------------------------------------------------- 1 | eval_loss = 0.004436580420834786 2 | mcc = 1.0 3 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/model_args.json: -------------------------------------------------------------------------------- 1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/powerplay11/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/powerplay11/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/powerplay11/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 177, "weight_decay": 0, "labels_list": ["ACCOUNT_BALANCE_DEDUCTED", "ACCOUNT_NOT_VERIFIED", "ACCOUNT_RESET", "APPRECIATION", "BANK_VERIFICATION_DETAILS", "CANNOT_SEE_JOINED_CONTESTS", "CAPABILITIES", "CASH_BONUS", "CASH_BONUS_EXPIRY", "CHANGE_BANK_ACCOUNT", "CHANGE_MOBILE_NUMBER", "CHANGE_PROFILE_TEAM_DETAILS", "CHAT_WITH_AN_AGENT", "CHECK_DEPOSIT_STATUS", "CHECK_WALLET_BALANCE", "CONTACT_NUMBER", "CRITICISM", "DEDUCTED_AMOUNT_NOT_RECEIVED", "DELETE_PAN_CARD", "DOWNLOAD_POWERPLAY11", "FAIRPLAY_VIOLATIONS", "FAKE_TEAMS", "FEEDBACK", "GREETINGS_DAY", "HOW_POINTS_CALCULATED", "HOW_TO_PLAY", "INSTANT_WITHDRAWAL", "JOIN_CONTEST", "LESS_WINNINGS_AMOUNT", "MATCH_ABANDONED", "NEW_TEAM_PATTERN", "NO_EMAIL_CONFIRMATION", "OFFERS_AND_REFERRALS", "PAN_VERIFICATION_FAILED", "POINTS_NOT_UPDATED", "PRESENCE", "REFUND_OF_ADDED_CASH", "REFUND_OF_WRONG_AMOUNT", "SIGNUP_BONUS", "TAXES_ON_WINNINGS", "TEAM_DEADLINE", "THANKS", "TYPES_BONUS", "TYPES_CONTESTS", "UNUTILIZED_MONEY", "UPDATE_APP", "VERIFY_EMAIL", "VERIFY_MOBILE", "VERIFY_PAN", "WHAT_IF_THERES_A_TIE", "WHEN_SCORES_UPDATED", "WHEN_WINNINGS_DISTRIBUTED", "WHY_VERIFY", "WINNINGS", "WITHDRAWAL_INTRO", "WITHDRAWAL_STATUS", "WITHDRAWAL_TIME", "WITHDRAW_CASH_BONUS", "WRONG_SCORES"], "labels_map": {"ACCOUNT_BALANCE_DEDUCTED": 0, "ACCOUNT_NOT_VERIFIED": 1, "ACCOUNT_RESET": 2, "APPRECIATION": 3, "BANK_VERIFICATION_DETAILS": 4, "CANNOT_SEE_JOINED_CONTESTS": 5, "CAPABILITIES": 6, "CASH_BONUS": 7, "CASH_BONUS_EXPIRY": 8, "CHANGE_BANK_ACCOUNT": 9, "CHANGE_MOBILE_NUMBER": 10, "CHANGE_PROFILE_TEAM_DETAILS": 11, "CHAT_WITH_AN_AGENT": 12, "CHECK_DEPOSIT_STATUS": 13, "CHECK_WALLET_BALANCE": 14, "CONTACT_NUMBER": 15, "CRITICISM": 16, "DEDUCTED_AMOUNT_NOT_RECEIVED": 17, "DELETE_PAN_CARD": 18, "DOWNLOAD_POWERPLAY11": 19, "FAIRPLAY_VIOLATIONS": 20, "FAKE_TEAMS": 21, "FEEDBACK": 22, "GREETINGS_DAY": 23, "HOW_POINTS_CALCULATED": 24, "HOW_TO_PLAY": 25, "INSTANT_WITHDRAWAL": 26, "JOIN_CONTEST": 27, "LESS_WINNINGS_AMOUNT": 28, "MATCH_ABANDONED": 29, "NEW_TEAM_PATTERN": 30, "NO_EMAIL_CONFIRMATION": 31, "OFFERS_AND_REFERRALS": 32, "PAN_VERIFICATION_FAILED": 33, "POINTS_NOT_UPDATED": 34, "PRESENCE": 35, "REFUND_OF_ADDED_CASH": 36, "REFUND_OF_WRONG_AMOUNT": 37, "SIGNUP_BONUS": 38, "TAXES_ON_WINNINGS": 39, "TEAM_DEADLINE": 40, "THANKS": 41, "TYPES_BONUS": 42, "TYPES_CONTESTS": 43, "UNUTILIZED_MONEY": 44, "UPDATE_APP": 45, "VERIFY_EMAIL": 46, "VERIFY_MOBILE": 47, "VERIFY_PAN": 48, "WHAT_IF_THERES_A_TIE": 49, "WHEN_SCORES_UPDATED": 50, "WHEN_WINNINGS_DISTRIBUTED": 51, "WHY_VERIFY": 52, "WINNINGS": 53, "WITHDRAWAL_INTRO": 54, "WITHDRAWAL_STATUS": 55, "WITHDRAWAL_TIME": 56, "WITHDRAW_CASH_BONUS": 57, "WRONG_SCORES": 58}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/tblogs/events.out.tfevents.1597160596.haptik-ai-research-mum-ml-2-vm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11/tblogs/events.out.tfevents.1597160596.haptik-ai-research-mum-ml-2-vm -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "model_max_length": 512} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11/training_args.bin -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11/training_progress_scores.csv: -------------------------------------------------------------------------------- 1 | global_step,mcc,train_loss,eval_loss 2 | 59,0.027858490914640408,4.28368616104126,4.027746014675851 3 | 100,0.1388860375009059,4.070891857147217,3.8102310673665194 4 | 118,0.24644621058859223,3.6875147819519043,3.638863547373626 5 | 177,0.631254500370442,3.256054639816284,3.030882439370883 6 | 200,0.8061529798397276,2.834552049636841,2.6386114864026085 7 | 236,0.8573551743063274,1.9863373041152954,2.184069906250905 8 | 295,0.9221981792365874,1.7925243377685547,1.4907570501505318 9 | 300,0.9200168612282212,2.2602338790893555,1.4633160819441586 10 | 354,0.9825407935361834,1.0775566101074219,0.9389180697626986 11 | 400,0.9912666800868809,0.5885568261146545,0.6571497010477518 12 | 413,0.9890793756226487,1.1089935302734375,0.5821901269383349 13 | 472,0.9956353411596269,0.22419719398021698,0.37195264686972407 14 | 500,0.9978154170392022,0.2659372389316559,0.3241557360200559 15 | 531,0.9978153375284966,0.14318816363811493,0.24539439506449942 16 | 590,0.9956353411596269,0.9525749683380127,0.17348836974824888 17 | 600,0.9956353411596269,0.07537608593702316,0.16977044946308864 18 | 649,1.0,0.08004013448953629,0.12424232431892622 19 | 700,1.0,0.11937856674194336,0.09923130501124819 20 | 708,1.0,0.8454639911651611,0.09593275220969975 21 | 767,1.0,0.04086548089981079,0.07504452547153175 22 | 800,1.0,0.048212651163339615,0.0641876907261499 23 | 826,1.0,0.023425769060850143,0.057786518834152464 24 | 885,1.0,0.019573189318180084,0.04410979456378747 25 | 900,1.0,0.03591879829764366,0.042635942493581165 26 | 944,1.0,0.01790086179971695,0.036145012320603354 27 | 1000,1.0,0.024176493287086487,0.031013001214270874 28 | 1003,1.0,0.016845354810357094,0.03092983246670436 29 | 1062,1.0,0.017478376626968384,0.025017562306533427 30 | 1100,1.0,0.012325120158493519,0.02451390260041265 31 | 1121,1.0,0.014549019746482372,0.0232171771763745 32 | 1180,1.0,0.014574043452739716,0.020135768657644926 33 | 1200,1.0,0.13982988893985748,0.017196936019838363 34 | 1239,1.0,0.010516177862882614,0.0157944549553854 35 | 1298,1.0,0.014294502325356007,0.012959890919974295 36 | 1300,1.0,0.00912060122936964,0.012967476392385819 37 | 1357,1.0,0.007764187641441822,0.011520646465153007 38 | 1400,1.0,0.008232791908085346,0.010727511103249203 39 | 1416,1.0,0.008479280397295952,0.010446412244148679 40 | 1475,1.0,0.011196212843060493,0.009631002890103954 41 | 1500,1.0,0.033641137182712555,0.009016437732251519 42 | 1534,1.0,0.008253618143498898,0.008638874067277726 43 | 1593,1.0,0.020688096061348915,0.00793147428055941 44 | 1600,1.0,0.00887473113834858,0.00787849471730701 45 | 1652,1.0,0.025912323966622353,0.007468934810199475 46 | 1700,1.0,0.006430561188608408,0.007107750378485958 47 | 1711,1.0,0.007730090990662575,0.007032733473737361 48 | 1770,1.0,0.006430315785109997,0.006667172366563799 49 | 1800,1.0,0.00811198353767395,0.006497785089916344 50 | 1829,1.0,0.005642751231789589,0.006349041450263585 51 | 1888,1.0,0.006205701734870672,0.006077040227572039 52 | 1900,1.0,0.012514472007751465,0.006018065050307472 53 | 1947,1.0,0.005345325917005539,0.005813967006407299 54 | 2000,1.0,0.004667398054152727,0.005611742819043792 55 | 2006,1.0,0.005360324867069721,0.005591547732257237 56 | 2065,1.0,0.0050165653228759766,0.005392878351859369 57 | 2100,1.0,0.005425313953310251,0.005290802931255203 58 | 2124,1.0,0.014841246418654919,0.005215194995783396 59 | 2183,1.0,0.00473443791270256,0.005049212650253864 60 | 2200,1.0,0.005167374853044748,0.005006618553109594 61 | 2242,1.0,0.005450689699500799,0.004903205370498916 62 | 2300,1.0,0.004484163597226143,0.004777703106687483 63 | 2301,1.0,0.004542009439319372,0.0047756676341138655 64 | 2360,1.0,0.003938835114240646,0.0046628776945615725 65 | 2400,1.0,0.004314529709517956,0.00459044983500015 66 | 2419,1.0,0.004678010940551758,0.004558072354391975 67 | 2478,1.0,0.00308023183606565,0.004468135906667527 68 | 2500,1.0,0.01107504591345787,0.004436580420834786 69 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/best_model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForSequenceClassification" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "id2label": { 11 | "0": "LABEL_0", 12 | "1": "LABEL_1", 13 | "2": "LABEL_2", 14 | "3": "LABEL_3", 15 | "4": "LABEL_4", 16 | "5": "LABEL_5", 17 | "6": "LABEL_6", 18 | "7": "LABEL_7", 19 | "8": "LABEL_8", 20 | "9": "LABEL_9", 21 | "10": "LABEL_10", 22 | "11": "LABEL_11", 23 | "12": "LABEL_12", 24 | "13": "LABEL_13", 25 | "14": "LABEL_14", 26 | "15": "LABEL_15", 27 | "16": "LABEL_16", 28 | "17": "LABEL_17", 29 | "18": "LABEL_18", 30 | "19": "LABEL_19", 31 | "20": "LABEL_20", 32 | "21": "LABEL_21", 33 | "22": "LABEL_22", 34 | "23": "LABEL_23", 35 | "24": "LABEL_24", 36 | "25": "LABEL_25", 37 | "26": "LABEL_26", 38 | "27": "LABEL_27", 39 | "28": "LABEL_28", 40 | "29": "LABEL_29", 41 | "30": "LABEL_30", 42 | "31": "LABEL_31", 43 | "32": "LABEL_32", 44 | "33": "LABEL_33", 45 | "34": "LABEL_34", 46 | "35": "LABEL_35", 47 | "36": "LABEL_36", 48 | "37": "LABEL_37", 49 | "38": "LABEL_38", 50 | "39": "LABEL_39", 51 | "40": "LABEL_40", 52 | "41": "LABEL_41", 53 | "42": "LABEL_42", 54 | "43": "LABEL_43", 55 | "44": "LABEL_44", 56 | "45": "LABEL_45", 57 | "46": "LABEL_46", 58 | "47": "LABEL_47", 59 | "48": "LABEL_48", 60 | "49": "LABEL_49", 61 | "50": "LABEL_50", 62 | "51": "LABEL_51", 63 | "52": "LABEL_52", 64 | "53": "LABEL_53", 65 | "54": "LABEL_54", 66 | "55": "LABEL_55", 67 | "56": "LABEL_56", 68 | "57": "LABEL_57", 69 | "58": "LABEL_58" 70 | }, 71 | "initializer_range": 0.02, 72 | "intermediate_size": 3072, 73 | "label2id": { 74 | "LABEL_0": 0, 75 | "LABEL_1": 1, 76 | "LABEL_10": 10, 77 | "LABEL_11": 11, 78 | "LABEL_12": 12, 79 | "LABEL_13": 13, 80 | "LABEL_14": 14, 81 | "LABEL_15": 15, 82 | "LABEL_16": 16, 83 | "LABEL_17": 17, 84 | "LABEL_18": 18, 85 | "LABEL_19": 19, 86 | "LABEL_2": 2, 87 | "LABEL_20": 20, 88 | "LABEL_21": 21, 89 | "LABEL_22": 22, 90 | "LABEL_23": 23, 91 | "LABEL_24": 24, 92 | "LABEL_25": 25, 93 | "LABEL_26": 26, 94 | "LABEL_27": 27, 95 | "LABEL_28": 28, 96 | "LABEL_29": 29, 97 | "LABEL_3": 3, 98 | "LABEL_30": 30, 99 | "LABEL_31": 31, 100 | "LABEL_32": 32, 101 | "LABEL_33": 33, 102 | "LABEL_34": 34, 103 | "LABEL_35": 35, 104 | "LABEL_36": 36, 105 | "LABEL_37": 37, 106 | "LABEL_38": 38, 107 | "LABEL_39": 39, 108 | "LABEL_4": 4, 109 | "LABEL_40": 40, 110 | "LABEL_41": 41, 111 | "LABEL_42": 42, 112 | "LABEL_43": 43, 113 | "LABEL_44": 44, 114 | "LABEL_45": 45, 115 | "LABEL_46": 46, 116 | "LABEL_47": 47, 117 | "LABEL_48": 48, 118 | "LABEL_49": 49, 119 | "LABEL_5": 5, 120 | "LABEL_50": 50, 121 | "LABEL_51": 51, 122 | "LABEL_52": 52, 123 | "LABEL_53": 53, 124 | "LABEL_54": 54, 125 | "LABEL_55": 55, 126 | "LABEL_56": 56, 127 | "LABEL_57": 57, 128 | "LABEL_58": 58, 129 | "LABEL_6": 6, 130 | "LABEL_7": 7, 131 | "LABEL_8": 8, 132 | "LABEL_9": 9 133 | }, 134 | "layer_norm_eps": 1e-12, 135 | "max_position_embeddings": 512, 136 | "model_type": "bert", 137 | "num_attention_heads": 12, 138 | "num_hidden_layers": 12, 139 | "pad_token_id": 0, 140 | "type_vocab_size": 2, 141 | "vocab_size": 30522 142 | } 143 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/best_model/eval_results.txt: -------------------------------------------------------------------------------- 1 | eval_loss = 0.015619493484722845 2 | mcc = 1.0 3 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/best_model/model_args.json: -------------------------------------------------------------------------------- 1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/powerplay11_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/powerplay11_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/powerplay11_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 99, "weight_decay": 0, "labels_list": ["ACCOUNT_BALANCE_DEDUCTED", "ACCOUNT_NOT_VERIFIED", "ACCOUNT_RESET", "APPRECIATION", "BANK_VERIFICATION_DETAILS", "CANNOT_SEE_JOINED_CONTESTS", "CAPABILITIES", "CASH_BONUS", "CASH_BONUS_EXPIRY", "CHANGE_BANK_ACCOUNT", "CHANGE_MOBILE_NUMBER", "CHANGE_PROFILE_TEAM_DETAILS", "CHAT_WITH_AN_AGENT", "CHECK_DEPOSIT_STATUS", "CHECK_WALLET_BALANCE", "CONTACT_NUMBER", "CRITICISM", "DEDUCTED_AMOUNT_NOT_RECEIVED", "DELETE_PAN_CARD", "DOWNLOAD_POWERPLAY11", "FAIRPLAY_VIOLATIONS", "FAKE_TEAMS", "FEEDBACK", "GREETINGS_DAY", "HOW_POINTS_CALCULATED", "HOW_TO_PLAY", "INSTANT_WITHDRAWAL", "JOIN_CONTEST", "LESS_WINNINGS_AMOUNT", "MATCH_ABANDONED", "NEW_TEAM_PATTERN", "NO_EMAIL_CONFIRMATION", "OFFERS_AND_REFERRALS", "PAN_VERIFICATION_FAILED", "POINTS_NOT_UPDATED", "PRESENCE", "REFUND_OF_ADDED_CASH", "REFUND_OF_WRONG_AMOUNT", "SIGNUP_BONUS", "TAXES_ON_WINNINGS", "TEAM_DEADLINE", "THANKS", "TYPES_BONUS", "TYPES_CONTESTS", "UNUTILIZED_MONEY", "UPDATE_APP", "VERIFY_EMAIL", "VERIFY_MOBILE", "VERIFY_PAN", "WHAT_IF_THERES_A_TIE", "WHEN_SCORES_UPDATED", "WHEN_WINNINGS_DISTRIBUTED", "WHY_VERIFY", "WINNINGS", "WITHDRAWAL_INTRO", "WITHDRAWAL_STATUS", "WITHDRAWAL_TIME", "WITHDRAW_CASH_BONUS", "WRONG_SCORES"], "labels_map": {"ACCOUNT_BALANCE_DEDUCTED": 0, "ACCOUNT_NOT_VERIFIED": 1, "ACCOUNT_RESET": 2, "APPRECIATION": 3, "BANK_VERIFICATION_DETAILS": 4, "CANNOT_SEE_JOINED_CONTESTS": 5, "CAPABILITIES": 6, "CASH_BONUS": 7, "CASH_BONUS_EXPIRY": 8, "CHANGE_BANK_ACCOUNT": 9, "CHANGE_MOBILE_NUMBER": 10, "CHANGE_PROFILE_TEAM_DETAILS": 11, "CHAT_WITH_AN_AGENT": 12, "CHECK_DEPOSIT_STATUS": 13, "CHECK_WALLET_BALANCE": 14, "CONTACT_NUMBER": 15, "CRITICISM": 16, "DEDUCTED_AMOUNT_NOT_RECEIVED": 17, "DELETE_PAN_CARD": 18, "DOWNLOAD_POWERPLAY11": 19, "FAIRPLAY_VIOLATIONS": 20, "FAKE_TEAMS": 21, "FEEDBACK": 22, "GREETINGS_DAY": 23, "HOW_POINTS_CALCULATED": 24, "HOW_TO_PLAY": 25, "INSTANT_WITHDRAWAL": 26, "JOIN_CONTEST": 27, "LESS_WINNINGS_AMOUNT": 28, "MATCH_ABANDONED": 29, "NEW_TEAM_PATTERN": 30, "NO_EMAIL_CONFIRMATION": 31, "OFFERS_AND_REFERRALS": 32, "PAN_VERIFICATION_FAILED": 33, "POINTS_NOT_UPDATED": 34, "PRESENCE": 35, "REFUND_OF_ADDED_CASH": 36, "REFUND_OF_WRONG_AMOUNT": 37, "SIGNUP_BONUS": 38, "TAXES_ON_WINNINGS": 39, "TEAM_DEADLINE": 40, "THANKS": 41, "TYPES_BONUS": 42, "TYPES_CONTESTS": 43, "UNUTILIZED_MONEY": 44, "UPDATE_APP": 45, "VERIFY_EMAIL": 46, "VERIFY_MOBILE": 47, "VERIFY_PAN": 48, "WHAT_IF_THERES_A_TIE": 49, "WHEN_SCORES_UPDATED": 50, "WHEN_WINNINGS_DISTRIBUTED": 51, "WHY_VERIFY": 52, "WINNINGS": 53, "WITHDRAWAL_INTRO": 54, "WITHDRAWAL_STATUS": 55, "WITHDRAWAL_TIME": 56, "WITHDRAW_CASH_BONUS": 57, "WRONG_SCORES": 58}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/best_model/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/best_model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "model_max_length": 512} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/best_model/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11_subset/best_model/training_args.bin -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForSequenceClassification" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "id2label": { 11 | "0": "LABEL_0", 12 | "1": "LABEL_1", 13 | "2": "LABEL_2", 14 | "3": "LABEL_3", 15 | "4": "LABEL_4", 16 | "5": "LABEL_5", 17 | "6": "LABEL_6", 18 | "7": "LABEL_7", 19 | "8": "LABEL_8", 20 | "9": "LABEL_9", 21 | "10": "LABEL_10", 22 | "11": "LABEL_11", 23 | "12": "LABEL_12", 24 | "13": "LABEL_13", 25 | "14": "LABEL_14", 26 | "15": "LABEL_15", 27 | "16": "LABEL_16", 28 | "17": "LABEL_17", 29 | "18": "LABEL_18", 30 | "19": "LABEL_19", 31 | "20": "LABEL_20", 32 | "21": "LABEL_21", 33 | "22": "LABEL_22", 34 | "23": "LABEL_23", 35 | "24": "LABEL_24", 36 | "25": "LABEL_25", 37 | "26": "LABEL_26", 38 | "27": "LABEL_27", 39 | "28": "LABEL_28", 40 | "29": "LABEL_29", 41 | "30": "LABEL_30", 42 | "31": "LABEL_31", 43 | "32": "LABEL_32", 44 | "33": "LABEL_33", 45 | "34": "LABEL_34", 46 | "35": "LABEL_35", 47 | "36": "LABEL_36", 48 | "37": "LABEL_37", 49 | "38": "LABEL_38", 50 | "39": "LABEL_39", 51 | "40": "LABEL_40", 52 | "41": "LABEL_41", 53 | "42": "LABEL_42", 54 | "43": "LABEL_43", 55 | "44": "LABEL_44", 56 | "45": "LABEL_45", 57 | "46": "LABEL_46", 58 | "47": "LABEL_47", 59 | "48": "LABEL_48", 60 | "49": "LABEL_49", 61 | "50": "LABEL_50", 62 | "51": "LABEL_51", 63 | "52": "LABEL_52", 64 | "53": "LABEL_53", 65 | "54": "LABEL_54", 66 | "55": "LABEL_55", 67 | "56": "LABEL_56", 68 | "57": "LABEL_57", 69 | "58": "LABEL_58" 70 | }, 71 | "initializer_range": 0.02, 72 | "intermediate_size": 3072, 73 | "label2id": { 74 | "LABEL_0": 0, 75 | "LABEL_1": 1, 76 | "LABEL_10": 10, 77 | "LABEL_11": 11, 78 | "LABEL_12": 12, 79 | "LABEL_13": 13, 80 | "LABEL_14": 14, 81 | "LABEL_15": 15, 82 | "LABEL_16": 16, 83 | "LABEL_17": 17, 84 | "LABEL_18": 18, 85 | "LABEL_19": 19, 86 | "LABEL_2": 2, 87 | "LABEL_20": 20, 88 | "LABEL_21": 21, 89 | "LABEL_22": 22, 90 | "LABEL_23": 23, 91 | "LABEL_24": 24, 92 | "LABEL_25": 25, 93 | "LABEL_26": 26, 94 | "LABEL_27": 27, 95 | "LABEL_28": 28, 96 | "LABEL_29": 29, 97 | "LABEL_3": 3, 98 | "LABEL_30": 30, 99 | "LABEL_31": 31, 100 | "LABEL_32": 32, 101 | "LABEL_33": 33, 102 | "LABEL_34": 34, 103 | "LABEL_35": 35, 104 | "LABEL_36": 36, 105 | "LABEL_37": 37, 106 | "LABEL_38": 38, 107 | "LABEL_39": 39, 108 | "LABEL_4": 4, 109 | "LABEL_40": 40, 110 | "LABEL_41": 41, 111 | "LABEL_42": 42, 112 | "LABEL_43": 43, 113 | "LABEL_44": 44, 114 | "LABEL_45": 45, 115 | "LABEL_46": 46, 116 | "LABEL_47": 47, 117 | "LABEL_48": 48, 118 | "LABEL_49": 49, 119 | "LABEL_5": 5, 120 | "LABEL_50": 50, 121 | "LABEL_51": 51, 122 | "LABEL_52": 52, 123 | "LABEL_53": 53, 124 | "LABEL_54": 54, 125 | "LABEL_55": 55, 126 | "LABEL_56": 56, 127 | "LABEL_57": 57, 128 | "LABEL_58": 58, 129 | "LABEL_6": 6, 130 | "LABEL_7": 7, 131 | "LABEL_8": 8, 132 | "LABEL_9": 9 133 | }, 134 | "layer_norm_eps": 1e-12, 135 | "max_position_embeddings": 512, 136 | "model_type": "bert", 137 | "num_attention_heads": 12, 138 | "num_hidden_layers": 12, 139 | "pad_token_id": 0, 140 | "type_vocab_size": 2, 141 | "vocab_size": 30522 142 | } 143 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/eval_results.txt: -------------------------------------------------------------------------------- 1 | eval_loss = 0.015237725763158365 2 | mcc = 1.0 3 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/model_args.json: -------------------------------------------------------------------------------- 1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/powerplay11_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/powerplay11_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/powerplay11_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 99, "weight_decay": 0, "labels_list": ["ACCOUNT_BALANCE_DEDUCTED", "ACCOUNT_NOT_VERIFIED", "ACCOUNT_RESET", "APPRECIATION", "BANK_VERIFICATION_DETAILS", "CANNOT_SEE_JOINED_CONTESTS", "CAPABILITIES", "CASH_BONUS", "CASH_BONUS_EXPIRY", "CHANGE_BANK_ACCOUNT", "CHANGE_MOBILE_NUMBER", "CHANGE_PROFILE_TEAM_DETAILS", "CHAT_WITH_AN_AGENT", "CHECK_DEPOSIT_STATUS", "CHECK_WALLET_BALANCE", "CONTACT_NUMBER", "CRITICISM", "DEDUCTED_AMOUNT_NOT_RECEIVED", "DELETE_PAN_CARD", "DOWNLOAD_POWERPLAY11", "FAIRPLAY_VIOLATIONS", "FAKE_TEAMS", "FEEDBACK", "GREETINGS_DAY", "HOW_POINTS_CALCULATED", "HOW_TO_PLAY", "INSTANT_WITHDRAWAL", "JOIN_CONTEST", "LESS_WINNINGS_AMOUNT", "MATCH_ABANDONED", "NEW_TEAM_PATTERN", "NO_EMAIL_CONFIRMATION", "OFFERS_AND_REFERRALS", "PAN_VERIFICATION_FAILED", "POINTS_NOT_UPDATED", "PRESENCE", "REFUND_OF_ADDED_CASH", "REFUND_OF_WRONG_AMOUNT", "SIGNUP_BONUS", "TAXES_ON_WINNINGS", "TEAM_DEADLINE", "THANKS", "TYPES_BONUS", "TYPES_CONTESTS", "UNUTILIZED_MONEY", "UPDATE_APP", "VERIFY_EMAIL", "VERIFY_MOBILE", "VERIFY_PAN", "WHAT_IF_THERES_A_TIE", "WHEN_SCORES_UPDATED", "WHEN_WINNINGS_DISTRIBUTED", "WHY_VERIFY", "WINNINGS", "WITHDRAWAL_INTRO", "WITHDRAWAL_STATUS", "WITHDRAWAL_TIME", "WITHDRAW_CASH_BONUS", "WRONG_SCORES"], "labels_map": {"ACCOUNT_BALANCE_DEDUCTED": 0, "ACCOUNT_NOT_VERIFIED": 1, "ACCOUNT_RESET": 2, "APPRECIATION": 3, "BANK_VERIFICATION_DETAILS": 4, "CANNOT_SEE_JOINED_CONTESTS": 5, "CAPABILITIES": 6, "CASH_BONUS": 7, "CASH_BONUS_EXPIRY": 8, "CHANGE_BANK_ACCOUNT": 9, "CHANGE_MOBILE_NUMBER": 10, "CHANGE_PROFILE_TEAM_DETAILS": 11, "CHAT_WITH_AN_AGENT": 12, "CHECK_DEPOSIT_STATUS": 13, "CHECK_WALLET_BALANCE": 14, "CONTACT_NUMBER": 15, "CRITICISM": 16, "DEDUCTED_AMOUNT_NOT_RECEIVED": 17, "DELETE_PAN_CARD": 18, "DOWNLOAD_POWERPLAY11": 19, "FAIRPLAY_VIOLATIONS": 20, "FAKE_TEAMS": 21, "FEEDBACK": 22, "GREETINGS_DAY": 23, "HOW_POINTS_CALCULATED": 24, "HOW_TO_PLAY": 25, "INSTANT_WITHDRAWAL": 26, "JOIN_CONTEST": 27, "LESS_WINNINGS_AMOUNT": 28, "MATCH_ABANDONED": 29, "NEW_TEAM_PATTERN": 30, "NO_EMAIL_CONFIRMATION": 31, "OFFERS_AND_REFERRALS": 32, "PAN_VERIFICATION_FAILED": 33, "POINTS_NOT_UPDATED": 34, "PRESENCE": 35, "REFUND_OF_ADDED_CASH": 36, "REFUND_OF_WRONG_AMOUNT": 37, "SIGNUP_BONUS": 38, "TAXES_ON_WINNINGS": 39, "TEAM_DEADLINE": 40, "THANKS": 41, "TYPES_BONUS": 42, "TYPES_CONTESTS": 43, "UNUTILIZED_MONEY": 44, "UPDATE_APP": 45, "VERIFY_EMAIL": 46, "VERIFY_MOBILE": 47, "VERIFY_PAN": 48, "WHAT_IF_THERES_A_TIE": 49, "WHEN_SCORES_UPDATED": 50, "WHEN_WINNINGS_DISTRIBUTED": 51, "WHY_VERIFY": 52, "WINNINGS": 53, "WITHDRAWAL_INTRO": 54, "WITHDRAWAL_STATUS": 55, "WITHDRAWAL_TIME": 56, "WITHDRAW_CASH_BONUS": 57, "WRONG_SCORES": 58}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/tblogs/events.out.tfevents.1597163469.haptik-ai-research-mum-ml-2-vm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11_subset/tblogs/events.out.tfevents.1597163469.haptik-ai-research-mum-ml-2-vm -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "model_max_length": 512} -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/powerplay11_subset/training_args.bin -------------------------------------------------------------------------------- /platforms/bert/bert_models/powerplay11_subset/training_progress_scores.csv: -------------------------------------------------------------------------------- 1 | global_step,mcc,train_loss,eval_loss 2 | 33,0.006966065033674241,4.275940418243408,4.088082140142268 3 | 66,0.10181784338639555,4.076957702636719,3.853632081638683 4 | 99,0.31666196985103084,3.524799346923828,3.481291640888561 5 | 100,0.33622205757382423,3.2819881439208984,3.4670489845853862 6 | 132,0.5491825774659016,3.228721857070923,3.1800931294759116 7 | 165,0.7988562456319384,2.9554340839385986,2.7183942072319263 8 | 198,0.9029989574195603,2.1860103607177734,2.3607854120659106 9 | 200,0.8998233054849961,2.3953323364257812,2.3395220908251675 10 | 231,0.9608116153033132,2.4642422199249268,1.8749224055897107 11 | 264,0.976456540029086,1.8713085651397705,1.491812203869675 12 | 297,1.0,1.0459480285644531,1.167946174289241 13 | 300,1.0,1.2138229608535767,1.1448865298068884 14 | 330,1.0,1.2414703369140625,0.8946083594452251 15 | 363,0.9960677596969424,1.0370596647262573,0.6936323516296617 16 | 396,1.0,0.7679606676101685,0.5725420885013811 17 | 400,1.0,0.7788105010986328,0.5357545216878256 18 | 429,1.0,0.4268004894256592,0.4331010647795417 19 | 462,1.0,0.48410671949386597,0.3253726607019251 20 | 495,1.0,0.2329229712486267,0.27336488235177414 21 | 500,1.0,0.1546279340982437,0.2654947123744271 22 | 528,1.0,0.12326618283987045,0.20769070269483508 23 | 561,1.0,0.07649935036897659,0.16234044392000546 24 | 594,1.0,0.11826501041650772,0.1328082587005514 25 | 600,1.0,0.1337045580148697,0.1261398276370583 26 | 627,1.0,0.055853527039289474,0.10562320934100584 27 | 660,1.0,0.26937466859817505,0.08653705059127374 28 | 693,1.0,0.06533235311508179,0.07060574209599783 29 | 700,1.0,0.04325779899954796,0.06917516362260688 30 | 726,1.0,0.04639727249741554,0.06082955251137415 31 | 759,1.0,0.0582999512553215,0.05410802725589636 32 | 792,1.0,0.0368194580078125,0.049195334428187576 33 | 800,1.0,0.03648754954338074,0.04870836216617714 34 | 825,1.0,0.02754053846001625,0.04354375335528995 35 | 858,1.0,0.03395608440041542,0.03737479882935683 36 | 891,1.0,0.09397966414690018,0.033415756950324234 37 | 900,1.0,0.037745073437690735,0.032758059092994896 38 | 924,1.0,0.02583269588649273,0.03013399328020486 39 | 957,1.0,0.0232711024582386,0.027921919456937096 40 | 990,1.0,0.023077527061104774,0.025824453455932213 41 | 1000,1.0,0.023229582235217094,0.025291424921967766 42 | 1023,1.0,0.03731586039066315,0.024271921311138256 43 | 1056,1.0,0.02345268800854683,0.022990159295273548 44 | 1089,1.0,0.02155761979520321,0.0219306516827959 45 | 1100,1.0,0.017719632014632225,0.021582975898954002 46 | 1122,1.0,0.013472222723066807,0.020954335305952664 47 | 1155,1.0,0.04192318022251129,0.02015054000146461 48 | 1188,1.0,0.01810283586382866,0.019379823920175884 49 | 1200,1.0,0.028326159343123436,0.019140559446179504 50 | 1221,1.0,0.013884490355849266,0.018714478070085697 51 | 1254,1.0,0.01331179216504097,0.01814545854700334 52 | 1287,1.0,0.015835518017411232,0.01765377311543985 53 | 1300,1.0,0.013886284083127975,0.017462069253352554 54 | 1320,1.0,0.014652382582426071,0.01720011445947669 55 | 1353,1.0,0.02778574638068676,0.016815894622017036 56 | 1386,1.0,0.012468253262341022,0.016461734010866196 57 | 1400,1.0,0.02463061362504959,0.016339947497754387 58 | 1419,1.0,0.018762772902846336,0.0161741218162757 59 | 1452,1.0,0.0297338105738163,0.01592277100479061 60 | 1485,1.0,0.010572281666100025,0.015703567698823685 61 | 1500,1.0,0.014699919149279594,0.015619493484722845 62 | 1518,1.0,0.011565894819796085,0.01553100926067793 63 | 1551,1.0,0.01407864410430193,0.015407157356314587 64 | 1584,1.0,0.015383089892566204,0.015314364721151915 65 | 1600,1.0,0.022277144715189934,0.015281464396552607 66 | 1617,1.0,0.017058053985238075,0.015256680124862627 67 | 1650,1.0,0.013741384260356426,0.015237725763158365 68 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/best_model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForSequenceClassification" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "id2label": { 11 | "0": "LABEL_0", 12 | "1": "LABEL_1", 13 | "2": "LABEL_2", 14 | "3": "LABEL_3", 15 | "4": "LABEL_4", 16 | "5": "LABEL_5", 17 | "6": "LABEL_6", 18 | "7": "LABEL_7", 19 | "8": "LABEL_8", 20 | "9": "LABEL_9", 21 | "10": "LABEL_10", 22 | "11": "LABEL_11", 23 | "12": "LABEL_12", 24 | "13": "LABEL_13", 25 | "14": "LABEL_14", 26 | "15": "LABEL_15", 27 | "16": "LABEL_16", 28 | "17": "LABEL_17", 29 | "18": "LABEL_18", 30 | "19": "LABEL_19", 31 | "20": "LABEL_20" 32 | }, 33 | "initializer_range": 0.02, 34 | "intermediate_size": 3072, 35 | "label2id": { 36 | "LABEL_0": 0, 37 | "LABEL_1": 1, 38 | "LABEL_10": 10, 39 | "LABEL_11": 11, 40 | "LABEL_12": 12, 41 | "LABEL_13": 13, 42 | "LABEL_14": 14, 43 | "LABEL_15": 15, 44 | "LABEL_16": 16, 45 | "LABEL_17": 17, 46 | "LABEL_18": 18, 47 | "LABEL_19": 19, 48 | "LABEL_2": 2, 49 | "LABEL_20": 20, 50 | "LABEL_3": 3, 51 | "LABEL_4": 4, 52 | "LABEL_5": 5, 53 | "LABEL_6": 6, 54 | "LABEL_7": 7, 55 | "LABEL_8": 8, 56 | "LABEL_9": 9 57 | }, 58 | "layer_norm_eps": 1e-12, 59 | "max_position_embeddings": 512, 60 | "model_type": "bert", 61 | "num_attention_heads": 12, 62 | "num_hidden_layers": 12, 63 | "pad_token_id": 0, 64 | "type_vocab_size": 2, 65 | "vocab_size": 30522 66 | } 67 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/best_model/eval_results.txt: -------------------------------------------------------------------------------- 1 | eval_loss = 0.0024695384270716006 2 | mcc = 1.0 3 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/best_model/model_args.json: -------------------------------------------------------------------------------- 1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/sofmattress/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/sofmattress/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/sofmattress/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 123, "weight_decay": 0, "labels_list": ["100_NIGHT_TRIAL_OFFER", "ABOUT_SOF_MATTRESS", "CANCEL_ORDER", "CHECK_PINCODE", "COD", "COMPARISON", "DELAY_IN_DELIVERY", "DISTRIBUTORS", "EMI", "ERGO_FEATURES", "LEAD_GEN", "MATTRESS_COST", "OFFERS", "ORDER_STATUS", "ORTHO_FEATURES", "PILLOWS", "PRODUCT_VARIANTS", "RETURN_EXCHANGE", "SIZE_CUSTOMIZATION", "WARRANTY", "WHAT_SIZE_TO_ORDER"], "labels_map": {"100_NIGHT_TRIAL_OFFER": 0, "ABOUT_SOF_MATTRESS": 1, "CANCEL_ORDER": 2, "CHECK_PINCODE": 3, "COD": 4, "COMPARISON": 5, "DELAY_IN_DELIVERY": 6, "DISTRIBUTORS": 7, "EMI": 8, "ERGO_FEATURES": 9, "LEAD_GEN": 10, "MATTRESS_COST": 11, "OFFERS": 12, "ORDER_STATUS": 13, "ORTHO_FEATURES": 14, "PILLOWS": 15, "PRODUCT_VARIANTS": 16, "RETURN_EXCHANGE": 17, "SIZE_CUSTOMIZATION": 18, "WARRANTY": 19, "WHAT_SIZE_TO_ORDER": 20}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/best_model/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/best_model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "model_max_length": 512} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/best_model/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress/best_model/training_args.bin -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForSequenceClassification" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "id2label": { 11 | "0": "LABEL_0", 12 | "1": "LABEL_1", 13 | "2": "LABEL_2", 14 | "3": "LABEL_3", 15 | "4": "LABEL_4", 16 | "5": "LABEL_5", 17 | "6": "LABEL_6", 18 | "7": "LABEL_7", 19 | "8": "LABEL_8", 20 | "9": "LABEL_9", 21 | "10": "LABEL_10", 22 | "11": "LABEL_11", 23 | "12": "LABEL_12", 24 | "13": "LABEL_13", 25 | "14": "LABEL_14", 26 | "15": "LABEL_15", 27 | "16": "LABEL_16", 28 | "17": "LABEL_17", 29 | "18": "LABEL_18", 30 | "19": "LABEL_19", 31 | "20": "LABEL_20" 32 | }, 33 | "initializer_range": 0.02, 34 | "intermediate_size": 3072, 35 | "label2id": { 36 | "LABEL_0": 0, 37 | "LABEL_1": 1, 38 | "LABEL_10": 10, 39 | "LABEL_11": 11, 40 | "LABEL_12": 12, 41 | "LABEL_13": 13, 42 | "LABEL_14": 14, 43 | "LABEL_15": 15, 44 | "LABEL_16": 16, 45 | "LABEL_17": 17, 46 | "LABEL_18": 18, 47 | "LABEL_19": 19, 48 | "LABEL_2": 2, 49 | "LABEL_20": 20, 50 | "LABEL_3": 3, 51 | "LABEL_4": 4, 52 | "LABEL_5": 5, 53 | "LABEL_6": 6, 54 | "LABEL_7": 7, 55 | "LABEL_8": 8, 56 | "LABEL_9": 9 57 | }, 58 | "layer_norm_eps": 1e-12, 59 | "max_position_embeddings": 512, 60 | "model_type": "bert", 61 | "num_attention_heads": 12, 62 | "num_hidden_layers": 12, 63 | "pad_token_id": 0, 64 | "type_vocab_size": 2, 65 | "vocab_size": 30522 66 | } 67 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/eval_results.txt: -------------------------------------------------------------------------------- 1 | eval_loss = 0.002091184871770987 2 | mcc = 1.0 3 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/model_args.json: -------------------------------------------------------------------------------- 1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/sofmattress/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/sofmattress/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/sofmattress/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 123, "weight_decay": 0, "labels_list": ["100_NIGHT_TRIAL_OFFER", "ABOUT_SOF_MATTRESS", "CANCEL_ORDER", "CHECK_PINCODE", "COD", "COMPARISON", "DELAY_IN_DELIVERY", "DISTRIBUTORS", "EMI", "ERGO_FEATURES", "LEAD_GEN", "MATTRESS_COST", "OFFERS", "ORDER_STATUS", "ORTHO_FEATURES", "PILLOWS", "PRODUCT_VARIANTS", "RETURN_EXCHANGE", "SIZE_CUSTOMIZATION", "WARRANTY", "WHAT_SIZE_TO_ORDER"], "labels_map": {"100_NIGHT_TRIAL_OFFER": 0, "ABOUT_SOF_MATTRESS": 1, "CANCEL_ORDER": 2, "CHECK_PINCODE": 3, "COD": 4, "COMPARISON": 5, "DELAY_IN_DELIVERY": 6, "DISTRIBUTORS": 7, "EMI": 8, "ERGO_FEATURES": 9, "LEAD_GEN": 10, "MATTRESS_COST": 11, "OFFERS": 12, "ORDER_STATUS": 13, "ORTHO_FEATURES": 14, "PILLOWS": 15, "PRODUCT_VARIANTS": 16, "RETURN_EXCHANGE": 17, "SIZE_CUSTOMIZATION": 18, "WARRANTY": 19, "WHAT_SIZE_TO_ORDER": 20}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/tblogs/events.out.tfevents.1597159859.haptik-ai-research-mum-ml-2-vm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress/tblogs/events.out.tfevents.1597159859.haptik-ai-research-mum-ml-2-vm -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "model_max_length": 512} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress/training_args.bin -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress/training_progress_scores.csv: -------------------------------------------------------------------------------- 1 | global_step,mcc,train_loss,eval_loss 2 | 41,0.03741377683874904,2.9892494678497314,2.970834377335339 3 | 82,0.27124933812063723,2.810255527496338,2.6317273523749374 4 | 100,0.4951481145617849,2.4308252334594727,2.384494185447693 5 | 123,0.7154417194916144,2.1131794452667236,1.9524706195040447 6 | 164,0.9177045047865772,1.0891566276550293,1.0203317098501252 7 | 200,0.9806938791151254,0.6156212091445923,0.4422170934153766 8 | 205,0.9903337975187425,0.6984859108924866,0.38467208968430033 9 | 246,0.9967804561889998,0.16406548023223877,0.1386817933582678 10 | 287,1.0,0.04927260801196098,0.05331289704616477 11 | 300,1.0,0.040390364825725555,0.04004393454368522 12 | 328,0.9967807673479875,0.02060823328793049,0.02845955994434473 13 | 369,1.0,0.015270467847585678,0.0164372295868106 14 | 400,1.0,0.013940568082034588,0.013048355899206021 15 | 410,1.0,0.014762028120458126,0.012293782971072488 16 | 451,1.0,0.010723605751991272,0.009894206552062093 17 | 492,1.0,0.00876891054213047,0.008308980886529132 18 | 500,1.0,0.007853677496314049,0.008068628541034897 19 | 533,1.0,0.007977331057190895,0.0071861760622662745 20 | 574,1.0,0.007647466380149126,0.006318186761856806 21 | 600,1.0,0.005637817084789276,0.005862325509419528 22 | 615,1.0,0.0068319146521389484,0.005629960061391679 23 | 656,1.0,0.005148341413587332,0.005074366859001357 24 | 697,1.0,0.004916307516396046,0.004619956442450241 25 | 700,1.0,0.004129624925553799,0.004589953357580959 26 | 738,1.0,0.004497524816542864,0.0042399843166605 27 | 779,1.0,0.004239839501678944,0.003916610889818247 28 | 800,1.0,0.0041899955831468105,0.003770596035415443 29 | 820,1.0,0.003951283171772957,0.003637692340218076 30 | 861,1.0,0.003503492334857583,0.003397048292000119 31 | 900,1.0,0.0032421466894447803,0.0031967297332679352 32 | 902,1.0,0.0031739480327814817,0.003187044398722852 33 | 943,1.0,0.0034325651358813047,0.003001999433674827 34 | 984,1.0,0.0032226387411355972,0.0028411815306398926 35 | 1000,1.0,0.0030870833434164524,0.0027810372468992702 36 | 1025,1.0,0.002899862127378583,0.002694926326867284 37 | 1066,1.0,0.002674049697816372,0.0025664146978226377 38 | 1100,1.0,0.002695617265999317,0.0024695384270716006 39 | 1107,1.0,0.0022791139781475067,0.002450472715015455 40 | 1148,1.0,0.002618222963064909,0.002346959058763232 41 | 1189,1.0,0.0022062344942241907,0.002252724277219031 42 | 1200,1.0,0.0024347787257283926,0.0022290816911064632 43 | 1230,1.0,0.002199501032009721,0.002167849923574888 44 | 1271,1.0,0.002031237818300724,0.002091184871770987 45 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/best_model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForSequenceClassification" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "id2label": { 11 | "0": "LABEL_0", 12 | "1": "LABEL_1", 13 | "2": "LABEL_2", 14 | "3": "LABEL_3", 15 | "4": "LABEL_4", 16 | "5": "LABEL_5", 17 | "6": "LABEL_6", 18 | "7": "LABEL_7", 19 | "8": "LABEL_8", 20 | "9": "LABEL_9", 21 | "10": "LABEL_10", 22 | "11": "LABEL_11", 23 | "12": "LABEL_12", 24 | "13": "LABEL_13", 25 | "14": "LABEL_14", 26 | "15": "LABEL_15", 27 | "16": "LABEL_16", 28 | "17": "LABEL_17", 29 | "18": "LABEL_18", 30 | "19": "LABEL_19", 31 | "20": "LABEL_20" 32 | }, 33 | "initializer_range": 0.02, 34 | "intermediate_size": 3072, 35 | "label2id": { 36 | "LABEL_0": 0, 37 | "LABEL_1": 1, 38 | "LABEL_10": 10, 39 | "LABEL_11": 11, 40 | "LABEL_12": 12, 41 | "LABEL_13": 13, 42 | "LABEL_14": 14, 43 | "LABEL_15": 15, 44 | "LABEL_16": 16, 45 | "LABEL_17": 17, 46 | "LABEL_18": 18, 47 | "LABEL_19": 19, 48 | "LABEL_2": 2, 49 | "LABEL_20": 20, 50 | "LABEL_3": 3, 51 | "LABEL_4": 4, 52 | "LABEL_5": 5, 53 | "LABEL_6": 6, 54 | "LABEL_7": 7, 55 | "LABEL_8": 8, 56 | "LABEL_9": 9 57 | }, 58 | "layer_norm_eps": 1e-12, 59 | "max_position_embeddings": 512, 60 | "model_type": "bert", 61 | "num_attention_heads": 12, 62 | "num_hidden_layers": 12, 63 | "pad_token_id": 0, 64 | "type_vocab_size": 2, 65 | "vocab_size": 30522 66 | } 67 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/best_model/eval_results.txt: -------------------------------------------------------------------------------- 1 | eval_loss = 0.004921248577454168 2 | mcc = 1.0 3 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/best_model/model_args.json: -------------------------------------------------------------------------------- 1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/sofmattress_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/sofmattress_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/sofmattress_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 69, "weight_decay": 0, "labels_list": ["100_NIGHT_TRIAL_OFFER", "ABOUT_SOF_MATTRESS", "CANCEL_ORDER", "CHECK_PINCODE", "COD", "COMPARISON", "DELAY_IN_DELIVERY", "DISTRIBUTORS", "EMI", "ERGO_FEATURES", "LEAD_GEN", "MATTRESS_COST", "OFFERS", "ORDER_STATUS", "ORTHO_FEATURES", "PILLOWS", "PRODUCT_VARIANTS", "RETURN_EXCHANGE", "SIZE_CUSTOMIZATION", "WARRANTY", "WHAT_SIZE_TO_ORDER"], "labels_map": {"100_NIGHT_TRIAL_OFFER": 0, "ABOUT_SOF_MATTRESS": 1, "CANCEL_ORDER": 2, "CHECK_PINCODE": 3, "COD": 4, "COMPARISON": 5, "DELAY_IN_DELIVERY": 6, "DISTRIBUTORS": 7, "EMI": 8, "ERGO_FEATURES": 9, "LEAD_GEN": 10, "MATTRESS_COST": 11, "OFFERS": 12, "ORDER_STATUS": 13, "ORTHO_FEATURES": 14, "PILLOWS": 15, "PRODUCT_VARIANTS": 16, "RETURN_EXCHANGE": 17, "SIZE_CUSTOMIZATION": 18, "WARRANTY": 19, "WHAT_SIZE_TO_ORDER": 20}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/best_model/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/best_model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "model_max_length": 512} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/best_model/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress_subset/best_model/training_args.bin -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForSequenceClassification" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "id2label": { 11 | "0": "LABEL_0", 12 | "1": "LABEL_1", 13 | "2": "LABEL_2", 14 | "3": "LABEL_3", 15 | "4": "LABEL_4", 16 | "5": "LABEL_5", 17 | "6": "LABEL_6", 18 | "7": "LABEL_7", 19 | "8": "LABEL_8", 20 | "9": "LABEL_9", 21 | "10": "LABEL_10", 22 | "11": "LABEL_11", 23 | "12": "LABEL_12", 24 | "13": "LABEL_13", 25 | "14": "LABEL_14", 26 | "15": "LABEL_15", 27 | "16": "LABEL_16", 28 | "17": "LABEL_17", 29 | "18": "LABEL_18", 30 | "19": "LABEL_19", 31 | "20": "LABEL_20" 32 | }, 33 | "initializer_range": 0.02, 34 | "intermediate_size": 3072, 35 | "label2id": { 36 | "LABEL_0": 0, 37 | "LABEL_1": 1, 38 | "LABEL_10": 10, 39 | "LABEL_11": 11, 40 | "LABEL_12": 12, 41 | "LABEL_13": 13, 42 | "LABEL_14": 14, 43 | "LABEL_15": 15, 44 | "LABEL_16": 16, 45 | "LABEL_17": 17, 46 | "LABEL_18": 18, 47 | "LABEL_19": 19, 48 | "LABEL_2": 2, 49 | "LABEL_20": 20, 50 | "LABEL_3": 3, 51 | "LABEL_4": 4, 52 | "LABEL_5": 5, 53 | "LABEL_6": 6, 54 | "LABEL_7": 7, 55 | "LABEL_8": 8, 56 | "LABEL_9": 9 57 | }, 58 | "layer_norm_eps": 1e-12, 59 | "max_position_embeddings": 512, 60 | "model_type": "bert", 61 | "num_attention_heads": 12, 62 | "num_hidden_layers": 12, 63 | "pad_token_id": 0, 64 | "type_vocab_size": 2, 65 | "vocab_size": 30522 66 | } 67 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/eval_results.txt: -------------------------------------------------------------------------------- 1 | eval_loss = 0.004485103038504072 2 | mcc = 1.0 3 | -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/model_args.json: -------------------------------------------------------------------------------- 1 | {"adam_epsilon": 1e-08, "best_model_dir": "../bert_models/sofmattress_subset/best_model/", "cache_dir": "cache_dir/", "custom_layer_parameters": [], "custom_parameter_groups": [], "train_custom_parameters_only": false, "config": {}, "do_lower_case": true, "early_stopping_consider_epochs": true, "early_stopping_delta": 0.0005, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 5, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": true, "evaluate_during_training_silent": false, "evaluate_during_training_steps": 100, "evaluate_during_training_verbose": true, "fp16": false, "fp16_opt_level": "O1", "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": 42, "max_grad_norm": 1.0, "max_seq_length": 512, "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "num_train_epochs": 50, "output_dir": "../bert_models/sofmattress_subset/", "overwrite_output_dir": true, "process_count": 4, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_steps": -1, "save_optimizer_and_scheduler": true, "silent": false, "tensorboard_dir": "../bert_models/sofmattress_subset/tblogs/", "train_batch_size": 8, "use_cached_eval_features": false, "use_early_stopping": true, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 69, "weight_decay": 0, "labels_list": ["100_NIGHT_TRIAL_OFFER", "ABOUT_SOF_MATTRESS", "CANCEL_ORDER", "CHECK_PINCODE", "COD", "COMPARISON", "DELAY_IN_DELIVERY", "DISTRIBUTORS", "EMI", "ERGO_FEATURES", "LEAD_GEN", "MATTRESS_COST", "OFFERS", "ORDER_STATUS", "ORTHO_FEATURES", "PILLOWS", "PRODUCT_VARIANTS", "RETURN_EXCHANGE", "SIZE_CUSTOMIZATION", "WARRANTY", "WHAT_SIZE_TO_ORDER"], "labels_map": {"100_NIGHT_TRIAL_OFFER": 0, "ABOUT_SOF_MATTRESS": 1, "CANCEL_ORDER": 2, "CHECK_PINCODE": 3, "COD": 4, "COMPARISON": 5, "DELAY_IN_DELIVERY": 6, "DISTRIBUTORS": 7, "EMI": 8, "ERGO_FEATURES": 9, "LEAD_GEN": 10, "MATTRESS_COST": 11, "OFFERS": 12, "ORDER_STATUS": 13, "ORTHO_FEATURES": 14, "PILLOWS": 15, "PRODUCT_VARIANTS": 16, "RETURN_EXCHANGE": 17, "SIZE_CUSTOMIZATION": 18, "WARRANTY": 19, "WHAT_SIZE_TO_ORDER": 20}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "regression": false, "sliding_window": false, "stride": 0.8, "tie_value": 1} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/tblogs/events.out.tfevents.1597162813.haptik-ai-research-mum-ml-2-vm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress_subset/tblogs/events.out.tfevents.1597162813.haptik-ai-research-mum-ml-2-vm -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "model_max_length": 512} -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/bert/bert_models/sofmattress_subset/training_args.bin -------------------------------------------------------------------------------- /platforms/bert/bert_models/sofmattress_subset/training_progress_scores.csv: -------------------------------------------------------------------------------- 1 | global_step,mcc,train_loss,eval_loss 2 | 23,0.06857313816302607,3.296887159347534,2.970322909562484 3 | 46,0.11255032170974624,3.0493152141571045,2.7839398798735244 4 | 69,0.38557671771111646,2.382086753845215,2.502764193908028 5 | 92,0.6421384771966505,2.477137565612793,2.1489793321360713 6 | 100,0.7418468326305145,2.250485897064209,1.967790370402129 7 | 115,0.9012127750551232,1.7204360961914062,1.6867278451504915 8 | 138,0.9649152017652197,1.0926810503005981,1.1395558429800945 9 | 161,0.9824078059539685,0.8549859523773193,0.7397814315298329 10 | 184,1.0,0.6483610272407532,0.41853353251581604 11 | 200,1.0,0.2512955963611603,0.29198158629562543 12 | 207,1.0,0.25165069103240967,0.25284040978421335 13 | 230,1.0,0.13244327902793884,0.12950243800878525 14 | 253,1.0,0.06572035700082779,0.07247054738842923 15 | 276,1.0,0.03972963988780975,0.043389985013915146 16 | 299,1.0,0.023962117731571198,0.02914676556120748 17 | 300,1.0,0.03062400594353676,0.028741382791296295 18 | 322,1.0,0.0201791450381279,0.022470875197778576 19 | 345,1.0,0.01955774612724781,0.018866707449373993 20 | 368,1.0,0.01244509220123291,0.01629435388452333 21 | 391,1.0,0.015682876110076904,0.014368740598792616 22 | 400,1.0,0.014525731094181538,0.01375624415991099 23 | 414,1.0,0.014349344186484814,0.012883992379774218 24 | 437,1.0,0.013241786509752274,0.011689582839608192 25 | 460,1.0,0.00939303170889616,0.010714562569299469 26 | 483,1.0,0.009124535135924816,0.009903177801195694 27 | 500,1.0,0.010284009389579296,0.009391567993747152 28 | 506,1.0,0.010540238581597805,0.00921912825382922 29 | 529,1.0,0.00783354602754116,0.008615645600239868 30 | 552,1.0,0.010056305676698685,0.008105209866619629 31 | 575,1.0,0.007870323024690151,0.007658644133935804 32 | 598,1.0,0.007335765287280083,0.007270081859567891 33 | 600,1.0,0.006628462113440037,0.007236725832943035 34 | 621,1.0,0.006512134801596403,0.006916760689700427 35 | 644,1.0,0.005578524433076382,0.006608464593148749 36 | 667,1.0,0.005031981505453587,0.006333170053751573 37 | 690,1.0,0.007127638440579176,0.006084576006168904 38 | 700,1.0,0.005631319712847471,0.005984096929592931 39 | 713,1.0,0.005153062753379345,0.005861621997926546 40 | 736,1.0,0.005420178174972534,0.005661310709041098 41 | 759,1.0,0.00673981849104166,0.00548125121175595 42 | 782,1.0,0.005228007677942514,0.0053189582515345965 43 | 800,1.0,0.005543965380638838,0.005201474702714578 44 | 805,1.0,0.00513844657689333,0.005171149321224379 45 | 828,1.0,0.0055143460631370544,0.005038455999253884 46 | 851,1.0,0.005841855891048908,0.004921248577454168 47 | 874,1.0,0.004199676681309938,0.00481208910882149 48 | 897,1.0,0.004328454844653606,0.004715297877302636 49 | 900,1.0,0.0057920184917747974,0.004703508386307437 50 | 920,1.0,0.004222389310598373,0.004629216561822787 51 | 943,1.0,0.003910826984792948,0.004553011365477805 52 | 966,1.0,0.003987176809459925,0.004485103038504072 53 | -------------------------------------------------------------------------------- /platforms/bert/down-requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==2.11.0 2 | -------------------------------------------------------------------------------- /platforms/bert/run_bert_experiments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pip install -U -r up-requirements.txt 4 | #declare -a datasets=("sofmattress" "powerplay11") 5 | declare -a datasets=("curekart") 6 | 7 | for dataset in "${datasets[@]}" 8 | do 9 | 10 | python bert-bot-only-data-es.py \ 11 | --train_file "../train/${dataset}_train.csv" \ 12 | --test_file "../test/${dataset}_test.csv" \ 13 | --output_dir "../bert_models/${dataset}/" \ 14 | --model_type "bert" \ 15 | --model_name "bert-base-uncased" \ 16 | --do_lower_case true \ 17 | --seed 42 \ 18 | --learning_rate 0.00004 \ 19 | --batch_size 8 \ 20 | --epochs 50 \ 21 | --eval_frac 0.0 \ 22 | --eval_every_n_steps 100 \ 23 | --use_early_stopping true \ 24 | --early_stopping_patience 5 \ 25 | --early_stopping_delta 0.0005 \ 26 | 27 | cp "../bert_models/${dataset}/predictions.csv" "../preds/bert_${dataset}.csv" 28 | done 29 | 30 | 31 | for dataset in "${datasets[@]}" 32 | do 33 | 34 | python bert-bot-only-data-es.py \ 35 | --train_file "../train/${dataset}_subset_train.csv" \ 36 | --test_file "../test/${dataset}_test.csv" \ 37 | --output_dir "../bert_models/${dataset}_subset/" \ 38 | --model_type "bert" \ 39 | --model_name "bert-base-uncased" \ 40 | --do_lower_case true \ 41 | --seed 42 \ 42 | --learning_rate 0.00004 \ 43 | --batch_size 8 \ 44 | --epochs 50 \ 45 | --eval_frac 0.0 \ 46 | --eval_every_n_steps 100 \ 47 | --use_early_stopping true \ 48 | --early_stopping_patience 5 \ 49 | --early_stopping_delta 0.0005 \ 50 | 51 | cp "../bert_models/${dataset}_subset/predictions.csv" "../preds/bert_${dataset}_subset.csv" 52 | done 53 | 54 | pip install -U -r down-requirements.txt 55 | -------------------------------------------------------------------------------- /platforms/bert/up-requirements.txt: -------------------------------------------------------------------------------- 1 | # We need transformers 3.0.2 for simpletransformers 0.43.6 2 | transformers==3.0.2 3 | simpletransformers==0.43.6 4 | -------------------------------------------------------------------------------- /platforms/dialogflow/agent_template/agent.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "", 3 | "language": "en", 4 | "shortDescription": "", 5 | "examples": "", 6 | "linkToDocs": "", 7 | "displayName": "agent_template", 8 | "disableInteractionLogs": false, 9 | "disableStackdriverLogs": true, 10 | "googleAssistant": { 11 | "googleAssistantCompatible": false, 12 | "project": "", 13 | "welcomeIntentSignInRequired": false, 14 | "startIntents": [], 15 | "systemIntents": [], 16 | "endIntentIds": [], 17 | "oAuthLinking": { 18 | "required": false, 19 | "providerId": "", 20 | "authorizationUrl": "", 21 | "tokenUrl": "", 22 | "scopes": "", 23 | "privacyPolicyUrl": "", 24 | "grantType": "AUTH_CODE_GRANT" 25 | }, 26 | "voiceType": "MALE_1", 27 | "capabilities": [], 28 | "env": "", 29 | "protocolVersion": "V2", 30 | "autoPreviewEnabled": false, 31 | "isDeviceAgent": false 32 | }, 33 | "defaultTimezone": "Asia/Almaty", 34 | "webhook": { 35 | "url": "", 36 | "username": "", 37 | "headers": {}, 38 | "available": false, 39 | "useForDomains": false, 40 | "cloudFunctionsEnabled": false, 41 | "cloudFunctionsInitialized": false 42 | }, 43 | "isPrivate": true, 44 | "mlMinConfidence": 0.3, 45 | "supportedLanguages": [], 46 | "enableOnePlatformApi": true, 47 | "onePlatformApiVersion": "v2", 48 | "secondaryKey": "9d94370027814a69ad2fdd82a9532288", 49 | "analyzeQueryTextSentiment": false, 50 | "enabledKnowledgeBaseNames": [], 51 | "knowledgeServiceConfidenceAdjustment": 0.0, 52 | "dialogBuilderMode": false, 53 | "baseActionPackagesUrl": "" 54 | } 55 | -------------------------------------------------------------------------------- /platforms/dialogflow/agent_template/package.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "version": "1.0.0" 4 | } 5 | -------------------------------------------------------------------------------- /platforms/haptik/convert_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pathlib 3 | import pandas as pd 4 | 5 | in_path = sys.argv[1] 6 | p = pathlib.Path(in_path) 7 | print(p) 8 | 9 | df = pd.read_csv(str(p)) 10 | data = [] 11 | for k, g_df in df.groupby('label'): 12 | data.append({'node_name': k, 'question': '|'.join(g_df['sentence']), 'answer': f'Answer for {k}'}) 13 | out_df = pd.DataFrame(data) 14 | out_df.to_csv(p.name, index=False) 15 | -------------------------------------------------------------------------------- /platforms/luis/training_data_conversion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "bot_name = 'curekart_subset'" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "luis_dict = {\n", 29 | " \"luis_schema_version\": \"6.0.0\",\n", 30 | " \"intents\": None,\n", 31 | " \"entities\": [],\n", 32 | " \"hierarchicals\": [],\n", 33 | " \"composites\": [],\n", 34 | " \"closedLists\": [],\n", 35 | " \"prebuiltEntities\": [],\n", 36 | " \"utterances\": None,\n", 37 | " \"versionId\": \"0.1\",\n", 38 | " \"name\": bot_name,\n", 39 | " \"desc\": \"\",\n", 40 | " \"culture\": \"en-us\",\n", 41 | " \"tokenizerVersion\": \"1.0.0\",\n", 42 | " \"patternAnyEntities\": [],\n", 43 | " \"regex_entities\": [],\n", 44 | " \"phraselists\": [],\n", 45 | " \"regex_features\": [],\n", 46 | " \"patterns\": [],\n", 47 | " \"settings\": [],\n", 48 | "}" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/html": [ 59 | "
\n", 60 | "\n", 73 | "\n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "
labelsentence
0RECOMMEND_PRODUCTI am confused about what to buy since there ar...
1RECOMMEND_PRODUCTI have been trying to maintain a healthy lifes...
2RECOMMEND_PRODUCTCan you help me with building an athletic body...
3RECOMMEND_PRODUCTI need some hair care products since I have be...
4RECOMMEND_PRODUCTI'm here to browse some products because my fr...
\n", 109 | "
" 110 | ], 111 | "text/plain": [ 112 | " label sentence\n", 113 | "0 RECOMMEND_PRODUCT I am confused about what to buy since there ar...\n", 114 | "1 RECOMMEND_PRODUCT I have been trying to maintain a healthy lifes...\n", 115 | "2 RECOMMEND_PRODUCT Can you help me with building an athletic body...\n", 116 | "3 RECOMMEND_PRODUCT I need some hair care products since I have be...\n", 117 | "4 RECOMMEND_PRODUCT I'm here to browse some products because my fr..." 118 | ] 119 | }, 120 | "execution_count": 4, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "data = pd.read_csv(f'../../train/{bot_name}_train.csv')\n", 127 | "data.head()" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 5, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "(413, 2)" 139 | ] 140 | }, 141 | "execution_count": 5, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "data.shape" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 6, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "intents = [{\"name\": label, \"features\" :[]} for label in list(set(data['label'].to_list()))]\n", 157 | "utterances = []\n", 158 | "for index, row in data.iterrows():\n", 159 | " utterances.append({ \"text\": row['sentence'], \"intent\": row['label'], \"entities\": []})\n", 160 | "luis_dict['intents'] = intents\n", 161 | "luis_dict['utterances'] = utterances" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 7, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "with open(f'data/{bot_name}.json', 'w') as fp:\n", 171 | " json.dump(luis_dict, fp)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "Python (py36)", 185 | "language": "python", 186 | "name": "py36" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.6.8" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 4 203 | } 204 | -------------------------------------------------------------------------------- /platforms/rasa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellohaptik/HINT3/99965c892ea9a6801a87083d3861b0b04c91a4e7/platforms/rasa/__init__.py -------------------------------------------------------------------------------- /platforms/rasa/actions.py: -------------------------------------------------------------------------------- 1 | # This files contains your custom actions which can be used to run 2 | # custom Python code. 3 | # 4 | # See this guide on how to implement these action: 5 | # https://rasa.com/docs/rasa/core/actions/#custom-actions/ 6 | 7 | 8 | # This is a simple example for a custom action which utters "Hello World!" 9 | 10 | # from typing import Any, Text, Dict, List 11 | # 12 | # from rasa_sdk import Action, Tracker 13 | # from rasa_sdk.executor import CollectingDispatcher 14 | # 15 | # 16 | # class ActionHelloWorld(Action): 17 | # 18 | # def name(self) -> Text: 19 | # return "action_hello_world" 20 | # 21 | # def run(self, dispatcher: CollectingDispatcher, 22 | # tracker: Tracker, 23 | # domain: Dict[Text, Any]) -> List[Dict[Text, Any]]: 24 | # 25 | # dispatcher.utter_message(text="Hello World!") 26 | # 27 | # return [] 28 | -------------------------------------------------------------------------------- /platforms/rasa/config.yml: -------------------------------------------------------------------------------- 1 | # Configuration for Rasa NLU. 2 | # https://rasa.com/docs/rasa/nlu/components/ 3 | language: "en" 4 | 5 | pipeline: 6 | - name: ConveRTTokenizer 7 | - name: ConveRTFeaturizer 8 | - name: RegexFeaturizer 9 | - name: LexicalSyntacticFeaturizer 10 | - name: CountVectorsFeaturizer 11 | - name: CountVectorsFeaturizer 12 | analyzer: "char_wb" 13 | min_ngram: 1 14 | max_ngram: 4 15 | - name: DIETClassifier 16 | epochs: 100 17 | - name: EntitySynonymMapper 18 | - name: ResponseSelector 19 | epochs: 100 20 | 21 | # Configuration for Rasa Core. 22 | # https://rasa.com/docs/rasa/core/policies/ 23 | policies: 24 | - name: MemoizationPolicy 25 | - name: TEDPolicy 26 | max_history: 5 27 | epochs: 100 28 | - name: MappingPolicy 29 | -------------------------------------------------------------------------------- /platforms/rasa/credentials.yml: -------------------------------------------------------------------------------- 1 | # This file contains the credentials for the voice & chat platforms 2 | # which your bot is using. 3 | # https://rasa.com/docs/rasa/user-guide/messaging-and-voice-channels/ 4 | 5 | rest: 6 | # # you don't need to provide anything here - this channel doesn't 7 | # # require any credentials 8 | 9 | 10 | #facebook: 11 | # verify: "" 12 | # secret: "" 13 | # page-access-token: "" 14 | 15 | #slack: 16 | # slack_token: "" 17 | # slack_channel: "" 18 | 19 | #socketio: 20 | # user_message_evt: 21 | # bot_message_evt: 22 | # session_persistence: 23 | 24 | #mattermost: 25 | # url: "https:///api/v4" 26 | # token: "" 27 | # webhook_url: "" 28 | 29 | # This entry is needed if you are using Rasa X. The entry represents credentials 30 | # for the Rasa X "channel", i.e. Talk to your bot and Share with guest testers. 31 | rasa: 32 | url: "http://localhost:5002/api" 33 | -------------------------------------------------------------------------------- /platforms/rasa/domain.yml: -------------------------------------------------------------------------------- 1 | intents: 2 | - greet 3 | - goodbye 4 | - affirm 5 | - deny 6 | - mood_great 7 | - mood_unhappy 8 | - bot_challenge 9 | 10 | responses: 11 | utter_greet: 12 | - text: "Hey! How are you?" 13 | 14 | utter_cheer_up: 15 | - text: "Here is something to cheer you up:" 16 | image: "https://i.imgur.com/nGF1K8f.jpg" 17 | 18 | utter_did_that_help: 19 | - text: "Did that help you?" 20 | 21 | utter_happy: 22 | - text: "Great, carry on!" 23 | 24 | utter_goodbye: 25 | - text: "Bye" 26 | 27 | utter_iamabot: 28 | - text: "I am a bot, powered by Rasa." 29 | 30 | session_config: 31 | session_expiration_time: 60 32 | carry_over_slots_to_new_session: true 33 | -------------------------------------------------------------------------------- /platforms/rasa/endpoints.yml: -------------------------------------------------------------------------------- 1 | # This file contains the different endpoints your bot can use. 2 | 3 | # Server where the models are pulled from. 4 | # https://rasa.com/docs/rasa/user-guide/configuring-http-api/#fetching-models-from-a-server/ 5 | 6 | #models: 7 | # url: http://my-server.com/models/default_core@latest 8 | # wait_time_between_pulls: 10 # [optional](default: 100) 9 | 10 | # Server which runs your custom actions. 11 | # https://rasa.com/docs/rasa/core/actions/#custom-actions/ 12 | 13 | #action_endpoint: 14 | # url: "http://localhost:5055/webhook" 15 | 16 | # Tracker store which is used to store the conversations. 17 | # By default the conversations are stored in memory. 18 | # https://rasa.com/docs/rasa/api/tracker-stores/ 19 | 20 | #tracker_store: 21 | # type: redis 22 | # url: 23 | # port: 24 | # db: 25 | # password: 26 | # use_ssl: 27 | 28 | #tracker_store: 29 | # type: mongod 30 | # url: 31 | # db: 32 | # username: 33 | # password: 34 | 35 | # Event broker which all conversation events should be streamed to. 36 | # https://rasa.com/docs/rasa/api/event-brokers/ 37 | 38 | #event_broker: 39 | # url: localhost 40 | # username: username 41 | # password: password 42 | # queue: queue 43 | -------------------------------------------------------------------------------- /platforms/rasa/generate_preds.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import json\n", 11 | "import requests" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "bot_name = 'curekart'" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# MAKE SURE RASA SERVER OF {bot_name} IS UP ON http://localhost:5005/" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "def predict_node_name(text):\n", 39 | " data = {\"text\": text}\n", 40 | " url = 'http://localhost:5005/model/parse'\n", 41 | " response = requests.post(url, data=json.dumps(data))\n", 42 | " predicted_node = response.json()['intent']['name']\n", 43 | " predicted_node_score = response.json()['intent_ranking'][0]['confidence']\n", 44 | " return predicted_node, predicted_node_score" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 5, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/html": [ 55 | "
\n", 56 | "\n", 69 | "\n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | "
sentencelabel
0Order my productORDER_TAKING
1HyNO_NODES_DETECTED
2I want to order Wottle sensitive handwash refillRECOMMEND_PRODUCT
3have u started C D provision for pin code 702164CHECK_PINCODE
4How do I know it will deliver in my placeNO_NODES_DETECTED
\n", 105 | "
" 106 | ], 107 | "text/plain": [ 108 | " sentence label\n", 109 | "0 Order my product ORDER_TAKING\n", 110 | "1 Hy NO_NODES_DETECTED\n", 111 | "2 I want to order Wottle sensitive handwash refill RECOMMEND_PRODUCT\n", 112 | "3 have u started C D provision for pin code 702164 CHECK_PINCODE\n", 113 | "4 How do I know it will deliver in my place NO_NODES_DETECTED" 114 | ] 115 | }, 116 | "execution_count": 5, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "test_file_name = bot_name if '_subset' not in bot_name else bot_name.replace('_subset', '')\n", 123 | "df_test = pd.read_csv(f'../../test/{test_file_name}_test.csv')\n", 124 | "df_test.head()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 6, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "(991, 2)" 136 | ] 137 | }, 138 | "execution_count": 6, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "df_test.shape" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 7, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "data": { 154 | "text/plain": [ 155 | "('ORDER_QUERY', 0.31236547231674194)" 156 | ] 157 | }, 158 | "execution_count": 7, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "predict_node_name('Order my product')" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 8, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "data": { 174 | "text/html": [ 175 | "
\n", 176 | "\n", 189 | "\n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | "
sentencelabelpredicted_nodepredicted_node_score
0Order my productORDER_TAKINGORDER_QUERY0.312366
1HyNO_NODES_DETECTEDRECOMMEND_PRODUCT0.866968
2I want to order Wottle sensitive handwash refillRECOMMEND_PRODUCTRECOMMEND_PRODUCT0.452898
3have u started C D provision for pin code 702164CHECK_PINCODEORDER_STATUS0.885070
4How do I know it will deliver in my placeNO_NODES_DETECTEDRESUME_DELIVERY0.942246
\n", 237 | "
" 238 | ], 239 | "text/plain": [ 240 | " sentence label \\\n", 241 | "0 Order my product ORDER_TAKING \n", 242 | "1 Hy NO_NODES_DETECTED \n", 243 | "2 I want to order Wottle sensitive handwash refill RECOMMEND_PRODUCT \n", 244 | "3 have u started C D provision for pin code 702164 CHECK_PINCODE \n", 245 | "4 How do I know it will deliver in my place NO_NODES_DETECTED \n", 246 | "\n", 247 | " predicted_node predicted_node_score \n", 248 | "0 ORDER_QUERY 0.312366 \n", 249 | "1 RECOMMEND_PRODUCT 0.866968 \n", 250 | "2 RECOMMEND_PRODUCT 0.452898 \n", 251 | "3 ORDER_STATUS 0.885070 \n", 252 | "4 RESUME_DELIVERY 0.942246 " 253 | ] 254 | }, 255 | "execution_count": 8, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [ 261 | "pred_nodes = []\n", 262 | "pred_scores = []\n", 263 | "for index, row in df_test.iterrows():\n", 264 | " node_name, node_score = predict_node_name(row['sentence'])\n", 265 | " pred_nodes.append(node_name)\n", 266 | " pred_scores.append(node_score)\n", 267 | "df_test['predicted_node'] = pred_nodes\n", 268 | "df_test['predicted_node_score'] = pred_scores\n", 269 | "df_test.head()" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 9, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "df_test.to_csv(f'../../preds/rasa_{bot_name}.csv', index=False)" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [] 287 | } 288 | ], 289 | "metadata": { 290 | "kernelspec": { 291 | "display_name": "Python (py36)", 292 | "language": "python", 293 | "name": "py36" 294 | }, 295 | "language_info": { 296 | "codemirror_mode": { 297 | "name": "ipython", 298 | "version": 3 299 | }, 300 | "file_extension": ".py", 301 | "mimetype": "text/x-python", 302 | "name": "python", 303 | "nbconvert_exporter": "python", 304 | "pygments_lexer": "ipython3", 305 | "version": "3.6.8" 306 | } 307 | }, 308 | "nbformat": 4, 309 | "nbformat_minor": 4 310 | } 311 | -------------------------------------------------------------------------------- /platforms/rasa/tests/conversation_tests.md: -------------------------------------------------------------------------------- 1 | #### This file contains tests to evaluate that your bot behaves as expected. 2 | #### If you want to learn more, please see the docs: https://rasa.com/docs/rasa/user-guide/testing-your-assistant/ 3 | 4 | ## happy path 1 5 | * greet: hello there! 6 | - utter_greet 7 | * mood_great: amazing 8 | - utter_happy 9 | 10 | ## happy path 2 11 | * greet: hello there! 12 | - utter_greet 13 | * mood_great: amazing 14 | - utter_happy 15 | * goodbye: bye-bye! 16 | - utter_goodbye 17 | 18 | ## sad path 1 19 | * greet: hello 20 | - utter_greet 21 | * mood_unhappy: not good 22 | - utter_cheer_up 23 | - utter_did_that_help 24 | * affirm: yes 25 | - utter_happy 26 | 27 | ## sad path 2 28 | * greet: hello 29 | - utter_greet 30 | * mood_unhappy: not good 31 | - utter_cheer_up 32 | - utter_did_that_help 33 | * deny: not really 34 | - utter_goodbye 35 | 36 | ## sad path 3 37 | * greet: hi 38 | - utter_greet 39 | * mood_unhappy: very terrible 40 | - utter_cheer_up 41 | - utter_did_that_help 42 | * deny: no 43 | - utter_goodbye 44 | 45 | ## say goodbye 46 | * goodbye: bye-bye! 47 | - utter_goodbye 48 | 49 | ## bot challenge 50 | * bot_challenge: are you a bot? 51 | - utter_iamabot 52 | -------------------------------------------------------------------------------- /platforms/rasa/training_data_conversion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "bot_name = 'curekart'" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "rasa_data_template = {\n", 29 | " \"rasa_nlu_data\": {\n", 30 | " \"common_examples\": None,\n", 31 | " \"regex_features\": [],\n", 32 | " \"lookup_tables\": [],\n", 33 | " \"entity_synonyms\": []\n", 34 | " }\n", 35 | "}" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 4, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/html": [ 46 | "
\n", 47 | "\n", 60 | "\n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | "
labelsentence
0CALL_CENTERWhat time is your call centre operational duri...
1CALL_CENTERis the call center still functioning during lo...
2CALL_CENTERwhat are the working hours of your call center...
3CALL_CENTERdoes covid affext your call center time
4CALL_CENTERis your call center working during covid?
\n", 96 | "
" 97 | ], 98 | "text/plain": [ 99 | " label sentence\n", 100 | "0 CALL_CENTER What time is your call centre operational duri...\n", 101 | "1 CALL_CENTER is the call center still functioning during lo...\n", 102 | "2 CALL_CENTER what are the working hours of your call center...\n", 103 | "3 CALL_CENTER does covid affext your call center time\n", 104 | "4 CALL_CENTER is your call center working during covid?" 105 | ] 106 | }, 107 | "execution_count": 4, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "data = pd.read_csv(f'../../train/{bot_name}_train.csv')\n", 114 | "data.head()" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 5, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "(600, 2)" 126 | ] 127 | }, 128 | "execution_count": 5, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "data.shape" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 6, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "examples = []" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 7, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "for index, row in data.iterrows():\n", 153 | " examples.append({\n", 154 | " 'intent': row['label'],\n", 155 | " 'text': row['sentence']\n", 156 | " })" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 8, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "rasa_data_template['rasa_nlu_data']['common_examples'] = examples" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 9, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "with open(f'data/{bot_name}.json', 'w') as fp:\n", 175 | " json.dump(rasa_data_template, fp)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python (py36)", 189 | "language": "python", 190 | "name": "py36" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.6.8" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 4 207 | } 208 | -------------------------------------------------------------------------------- /results/bert_curekart.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.38143289606458125,0.2777203394658263,0.8362831858407079,0.0,0.3914278743423574 3 | 0.2,0.39253279515640765,0.30127499197851487,0.8362831858407079,0.02040816326530612,0.397844106254895 4 | 0.3,0.40968718466195764,0.336381914722132,0.834070796460177,0.05380333951762523,0.4069243322470297 5 | 0.4,0.48335015136226034,0.4643749179345553,0.834070796460177,0.18923933209647495,0.4579319049416409 6 | 0.5,0.5216952573158425,0.5176570270763616,0.834070796460177,0.2597402597402597,0.4803538135930759 7 | 0.6,0.5539858728557013,0.5589509843467156,0.831858407079646,0.3209647495361781,0.49917800035688836 8 | 0.7,0.574167507568113,0.5819867165161616,0.827433628318584,0.36178107606679033,0.5073404790666131 9 | 0.8,0.6094853683148335,0.6203545801533257,0.8185840707964602,0.43413729128014844,0.5275041116659444 10 | 0.9,0.6508577194752775,0.6630666755960122,0.8030973451327433,0.5231910946196661,0.5532011200360406 11 | -------------------------------------------------------------------------------- /results/bert_curekart_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.3753784056508577,0.26873428028394586,0.8230088495575221,0.0,0.3827216986955868 3 | 0.2,0.38244197780020184,0.28385856420222894,0.8230088495575221,0.012987012987012988,0.3867371892968888 4 | 0.3,0.4117053481331988,0.3421306793975774,0.8230088495575221,0.06679035250463822,0.4038261321081724 5 | 0.4,0.4934409687184662,0.4799107169648507,0.8207964601769911,0.2189239332096475,0.4601885575577133 6 | 0.5,0.5307769929364279,0.5315308808700412,0.8119469026548672,0.2949907235621521,0.47797497265635175 7 | 0.6,0.5640766902119072,0.5734870968713671,0.8053097345132744,0.36178107606679033,0.49681857852806144 8 | 0.7,0.5943491422805247,0.6069184518856748,0.8030973451327433,0.41929499072356213,0.5133875786446674 9 | 0.8,0.6397578203834511,0.6526137924505135,0.7920353982300885,0.5120593692022264,0.5453064578171033 10 | 0.9,0.6770938446014128,0.6864213066902995,0.7809734513274337,0.5899814471243042,0.5695074884225043 11 | -------------------------------------------------------------------------------- /results/bert_powerplay11.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.16581892166836215,0.09458310530768466,0.5854545454545454,0.002824858757062147,0.22662854232741195 3 | 0.2,0.21770091556459817,0.19463095423585083,0.5818181818181818,0.07627118644067797,0.2390862177147459 4 | 0.3,0.31129196337741605,0.3411393950048982,0.5781818181818181,0.2076271186440678,0.2668898498371918 5 | 0.4,0.37843336724313326,0.42794405017038767,0.5636363636363636,0.3064971751412429,0.28144422084283677 6 | 0.5,0.4516785350966429,0.5062601790074037,0.5454545454545454,0.4152542372881356,0.2983788806071609 7 | 0.6,0.5188199389623601,0.568649768741218,0.5272727272727272,0.5155367231638418,0.3165516468104266 8 | 0.7,0.5676500508646999,0.6080192664664281,0.5018181818181818,0.5932203389830508,0.32863737204394544 9 | 0.8,0.6063072227873856,0.6331582177286087,0.48,0.655367231638418,0.32758929661892505 10 | 0.9,0.6653102746693794,0.6712780046426039,0.43636363636363634,0.7542372881355932,0.347322182251494 11 | -------------------------------------------------------------------------------- /results/bert_powerplay11_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.1617497456765005,0.11337011172449207,0.5309090909090909,0.018361581920903956,0.2063998849903869 3 | 0.2,0.2970498474059003,0.33631881380821965,0.5236363636363637,0.20903954802259886,0.24544955242839175 4 | 0.3,0.4252288911495422,0.48976321608883183,0.48727272727272725,0.4011299435028249,0.2677829569621612 5 | 0.4,0.5178026449643948,0.5740094067056727,0.4509090909090909,0.5437853107344632,0.28113965587425205 6 | 0.5,0.5859613428280773,0.6223629071205065,0.41818181818181815,0.6511299435028248,0.286288916282711 7 | 0.6,0.624618514750763,0.6433052440106094,0.3890909090909091,0.7161016949152542,0.2887958272661481 8 | 0.7,0.6602238046795524,0.6637153181331166,0.36,0.7768361581920904,0.29851914737268237 9 | 0.8,0.698880976602238,0.6786901502595527,0.3018181818181818,0.8531073446327684,0.30169293594539803 10 | 0.9,0.728382502543235,0.6765203325400962,0.21454545454545454,0.9279661016949152,0.2854049912230014 11 | -------------------------------------------------------------------------------- /results/bert_sofmattress.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.4282115869017632,0.3327023465634267,0.7359307359307359,0.0,0.457123093925187 3 | 0.2,0.43828715365239296,0.3540585844388577,0.7359307359307359,0.024096385542168676,0.4626917219806727 4 | 0.3,0.45843828715365237,0.3989092079935,0.7316017316017316,0.0783132530120482,0.47205584437489384 5 | 0.4,0.5088161209068011,0.48236500416255096,0.7316017316017316,0.19879518072289157,0.5062789592792611 6 | 0.5,0.5541561712846348,0.5480458219448876,0.7316017316017316,0.3072289156626506,0.5379162557311129 7 | 0.6,0.6146095717884131,0.6232869733224982,0.7229437229437229,0.463855421686747,0.5787346390816308 8 | 0.7,0.6372795969773299,0.6450564416398155,0.7056277056277056,0.5421686746987951,0.5897178388469266 9 | 0.8,0.672544080604534,0.6798829189224005,0.6926406926406926,0.6445783132530121,0.6147775061582121 10 | 0.9,0.690176322418136,0.690025982806961,0.6363636363636364,0.7650602409638554,0.6167910208651333 11 | -------------------------------------------------------------------------------- /results/bert_sofmattress_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.33249370277078083,0.2629308626158718,0.5714285714285714,0.0,0.3500891574145308 3 | 0.2,0.3425692695214106,0.2832268690766706,0.5714285714285714,0.024096385542168676,0.35685082873850854 4 | 0.3,0.40302267002518893,0.39136641443902487,0.5714285714285714,0.1686746987951807,0.39459601861278215 5 | 0.4,0.44080604534005036,0.44314397484121,0.5627705627705628,0.2710843373493976,0.4060595816100176 6 | 0.5,0.5012594458438288,0.5080367036869831,0.5541125541125541,0.42771084337349397,0.43449046213243375 7 | 0.6,0.5667506297229219,0.5694958962020813,0.5497835497835498,0.5903614457831325,0.4838865511383501 8 | 0.7,0.5869017632241813,0.5821474403837933,0.5151515151515151,0.6867469879518072,0.48828547382614906 9 | 0.8,0.5994962216624685,0.5825208162114784,0.48484848484848486,0.7590361445783133,0.4908155382461614 10 | 0.9,0.6120906801007556,0.5757913259833879,0.43722943722943725,0.8554216867469879,0.4918195897349411 11 | -------------------------------------------------------------------------------- /results/dialogflow_curekart.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.6417759838546923,0.6693030807681715,0.75,0.5510204081632653,0.5475730047883745 3 | 0.2,0.6417759838546923,0.6693030807681715,0.75,0.5510204081632653,0.5475730047883745 4 | 0.3,0.6508577194752775,0.6783643590281482,0.745575221238938,0.5714285714285714,0.5540730516224459 5 | 0.4,0.6821392532795156,0.7048752250496552,0.7234513274336283,0.647495361781076,0.5689670963713872 6 | 0.5,0.7114026236125126,0.7249213686064777,0.668141592920354,0.7476808905380334,0.5779492515439503 7 | 0.6,0.7315842583249244,0.7277228793178449,0.5730088495575221,0.8645640074211502,0.5782589504099579 8 | 0.7,0.722502522704339,0.6912920440176313,0.45353982300884954,0.948051948051948,0.5533669228890408 9 | 0.8,0.6528758829465187,0.5718238662329262,0.24778761061946902,0.9925788497217068,0.4356136748918091 10 | 0.9,0.5893037336024218,0.46757782989434266,0.10176991150442478,0.9981447124304267,0.2805046000382007 11 | -------------------------------------------------------------------------------- /results/dialogflow_curekart_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.6357214934409687,0.6644852572049998,0.7123893805309734,0.5714285714285714,0.5308419346603385 3 | 0.2,0.6357214934409687,0.6644852572049998,0.7123893805309734,0.5714285714285714,0.5308419346603385 4 | 0.3,0.644803229061554,0.6730954682965349,0.7035398230088495,0.5955473098330241,0.5356197263402213 5 | 0.4,0.686175580221998,0.7065621324418394,0.6747787610619469,0.6957328385899815,0.5586030288583995 6 | 0.5,0.7093844601412714,0.7178992921423805,0.6106194690265486,0.7922077922077922,0.5620501471721884 7 | 0.6,0.7184661957618567,0.7036159200150304,0.5110619469026548,0.8923933209647495,0.5516289447691117 8 | 0.7,0.6841574167507568,0.6317791384139099,0.35176991150442477,0.9628942486085343,0.4849493608742825 9 | 0.8,0.6226034308779012,0.5199548440744413,0.17920353982300885,0.9944341372912802,0.36706442812074147 10 | 0.9,0.5671039354187689,0.42794627592006745,0.05088495575221239,1.0,0.20027480364663677 11 | -------------------------------------------------------------------------------- /results/dialogflow_powerplay11.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.34994913530010174,0.39190881618283185,0.5963636363636363,0.2542372881355932,0.3026644216431138 3 | 0.2,0.34994913530010174,0.39190881618283185,0.5963636363636363,0.2542372881355932,0.3026644216431138 4 | 0.3,0.35910478128179046,0.4039036213694102,0.5963636363636363,0.2669491525423729,0.30716042960113416 5 | 0.4,0.40895218718209564,0.46636724373089034,0.5818181818181818,0.3418079096045198,0.31849843696846203 6 | 0.5,0.5167853509664293,0.577415171615548,0.5527272727272727,0.5028248587570622,0.35232998422313205 7 | 0.6,0.6205493387589013,0.6511586832758449,0.4763636363636364,0.6765536723163842,0.35263103368370324 8 | 0.7,0.6917599186164801,0.6774930217127464,0.33454545454545453,0.8305084745762712,0.3126850195378528 9 | 0.8,0.7232960325534079,0.6553717504378747,0.14909090909090908,0.9463276836158192,0.22745806536718088 10 | 0.9,0.728382502543235,0.6229727120687398,0.04,0.9957627118644068,0.1568255894769402 11 | -------------------------------------------------------------------------------- /results/dialogflow_powerplay11_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.34486266531027465,0.3955649131856894,0.5563636363636364,0.2627118644067797,0.28612194336184515 3 | 0.2,0.34486266531027465,0.3955649131856894,0.5563636363636364,0.2627118644067797,0.28612194336184515 4 | 0.3,0.3570701932858596,0.41171748588212914,0.5527272727272727,0.2810734463276836,0.2900596423980539 5 | 0.4,0.41810783316378436,0.48421464263544084,0.5345454545454545,0.3728813559322034,0.305992657613556 6 | 0.5,0.5330620549338759,0.586584111496249,0.4909090909090909,0.5494350282485876,0.32979968484851874 7 | 0.6,0.6429298067141404,0.6548585998711222,0.3927272727272727,0.7401129943502824,0.32008491782007265 8 | 0.7,0.7029501525940997,0.6591335575393154,0.2290909090909091,0.8870056497175142,0.2628573782646314 9 | 0.8,0.7263479145473042,0.6362517620665049,0.08363636363636363,0.9759887005649718,0.18572784895334576 10 | 0.9,0.7222787385554426,0.6087200715529609,0.01090909090909091,0.998587570621469,0.07938321993813488 11 | -------------------------------------------------------------------------------- /results/dialogflow_sofmattress.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.6498740554156172,0.6596368491168915,0.7316017316017316,0.536144578313253,0.6089435891175473 3 | 0.2,0.6498740554156172,0.6596368491168915,0.7316017316017316,0.536144578313253,0.6089435891175473 4 | 0.3,0.6599496221662469,0.6706643152884479,0.7272727272727273,0.5662650602409639,0.6161452947200737 5 | 0.4,0.6876574307304786,0.6991714568570798,0.7186147186147186,0.6445783132530121,0.6375316099746389 6 | 0.5,0.6952141057934509,0.6993933172704418,0.670995670995671,0.7289156626506024,0.6253234385397347 7 | 0.6,0.7128463476070529,0.7036431692361467,0.6060606060606061,0.8614457831325302,0.6325405291419348 8 | 0.7,0.6574307304785895,0.6198759132209378,0.4588744588744589,0.9337349397590361,0.5537662881862647 9 | 0.8,0.5340050377833753,0.44520234648343804,0.2077922077922078,0.9879518072289156,0.3746576501866642 10 | 0.9,0.44836272040302266,0.3064359332591206,0.05194805194805195,1.0,0.1912720697455713 11 | -------------------------------------------------------------------------------- /results/dialogflow_sofmattress_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.5869017632241813,0.6043002974102839,0.6536796536796536,0.4939759036144578,0.5377594861327528 3 | 0.2,0.5869017632241813,0.6043002974102839,0.6536796536796536,0.4939759036144578,0.5377594861327528 4 | 0.3,0.5994962216624685,0.6166759030825154,0.6536796536796536,0.5240963855421686,0.5475288277977552 5 | 0.4,0.6196473551637279,0.6341347392091125,0.6363636363636364,0.5963855421686747,0.5551008257904813 6 | 0.5,0.6448362720403022,0.6436995136282042,0.5627705627705628,0.7590361445783133,0.5501185870880984 7 | 0.6,0.6272040302267002,0.5980370724455657,0.42857142857142855,0.9036144578313253,0.5084391869901971 8 | 0.7,0.5591939546599496,0.4797003360393145,0.2683982683982684,0.963855421686747,0.4052237963084955 9 | 0.8,0.47858942065491183,0.36335513496681104,0.11688311688311688,0.9819277108433735,0.25583983648496217 10 | 0.9,0.4282115869017632,0.2668556988455543,0.017316017316017316,1.0,0.10959614750180845 11 | -------------------------------------------------------------------------------- /results/haptik_curekart.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.4308779011099899,0.38596188662657294,0.8030973451327433,0.11873840445269017,0.4129311873500288 3 | 0.2,0.5893037336024218,0.6100177353773819,0.7787610619469026,0.43042671614100186,0.5065195660657139 4 | 0.3,0.7073662966700303,0.7274093358615785,0.7278761061946902,0.6901669758812616,0.5886149912987301 5 | 0.4,0.7255297679112008,0.7354370714668711,0.672566371681416,0.7699443413729128,0.5900770415779899 6 | 0.5,0.7356205852674067,0.738516382285541,0.6393805309734514,0.8163265306122449,0.5929143365670386 7 | 0.6,0.7477295660948536,0.7454891722124777,0.6039823008849557,0.8682745825602969,0.6026655022329167 8 | 0.7,0.7416750756811302,0.7256424545915114,0.5398230088495575,0.9109461966604824,0.5847882508776726 9 | 0.8,0.693239152371342,0.6498473099828318,0.37831858407079644,0.9573283858998145,0.49917255429706 10 | 0.9,0.6256306760847629,0.5286265770933399,0.1902654867256637,0.9907235621521335,0.3699512520116685 11 | -------------------------------------------------------------------------------- /results/haptik_curekart_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.4127144298688194,0.3483712897587261,0.7986725663716814,0.08905380333951762,0.39672624230371795 3 | 0.2,0.5469223007063572,0.5527488336780364,0.7809734513274337,0.35064935064935066,0.47737375406876575 4 | 0.3,0.615539858728557,0.633122730084551,0.7411504424778761,0.5102040816326531,0.5054029220052958 5 | 0.4,0.7083753784056509,0.720889395945921,0.7013274336283186,0.7142857142857143,0.5782016648924359 6 | 0.5,0.739656912209889,0.7434613173842762,0.6703539823008849,0.7977736549165121,0.6039077415751544 7 | 0.6,0.7356205852674067,0.7317150984950263,0.6017699115044248,0.8478664192949907,0.5825590315967132 8 | 0.7,0.7447023208879919,0.7283624743769378,0.5398230088495575,0.9165120593692022,0.5898117449297346 9 | 0.8,0.6831483350151363,0.6262606315395998,0.334070796460177,0.9758812615955473,0.4866391552274101 10 | 0.9,0.6004036326942482,0.48556878323957986,0.13274336283185842,0.9925788497217068,0.3091295392937593 11 | -------------------------------------------------------------------------------- /results/haptik_powerplay11.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.2634791454730417,0.23947977500824366,0.6654545454545454,0.10734463276836158,0.2861451954799664 3 | 0.2,0.47914547304170907,0.5303094707781417,0.6545454545454545,0.4110169491525424,0.3717481358080229 4 | 0.3,0.6164801627670397,0.657869511578882,0.6145454545454545,0.617231638418079,0.4246606743440914 5 | 0.4,0.6724313326551373,0.6924174546699668,0.52,0.731638418079096,0.4162990412384738 6 | 0.5,0.6968463886063072,0.6927014973969432,0.41818181818181815,0.8050847457627118,0.38275866167865685 7 | 0.6,0.7090539165818922,0.6931356450002476,0.36727272727272725,0.8418079096045198,0.36171310214427965 8 | 0.7,0.7263479145473042,0.6903458667142265,0.2872727272727273,0.8968926553672316,0.33052160648007983 9 | 0.8,0.7416073245167853,0.6845081151776063,0.21454545454545454,0.9463276836158192,0.313490577230533 10 | 0.9,0.7273652085452695,0.6389699648264028,0.08,0.9788135593220338,0.18147366140292198 11 | -------------------------------------------------------------------------------- /results/haptik_powerplay11_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.25839267548321465,0.2539041201537312,0.5927272727272728,0.1285310734463277,0.2589451989613422 3 | 0.2,0.5249237029501526,0.5797717163275461,0.5381818181818182,0.519774011299435,0.3499411970869179 4 | 0.3,0.646998982706002,0.6652129639491713,0.4290909090909091,0.731638418079096,0.3523908681179462 5 | 0.4,0.6978636826042727,0.6897866217628258,0.37454545454545457,0.8234463276836158,0.353035561598512 6 | 0.5,0.7171922685656155,0.6874629475785807,0.2909090909090909,0.882768361581921,0.323382782619308 7 | 0.6,0.7243133265513734,0.6759599697716699,0.21454545454545454,0.922316384180791,0.2834750823593769 8 | 0.7,0.7344862665310274,0.6683039740136187,0.1709090909090909,0.9533898305084746,0.27227421438979416 9 | 0.8,0.7365208545269583,0.6495595899194136,0.10545454545454545,0.981638418079096,0.23935251490404322 10 | 0.9,0.7243133265513734,0.6169229203805074,0.02909090909090909,0.9943502824858758,0.1205748726742888 11 | -------------------------------------------------------------------------------- /results/haptik_sofmattress.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.5088161209068011,0.4852612066932231,0.7229437229437229,0.21084337349397592,0.5086235739335777 3 | 0.2,0.6171284634760705,0.6277105221754836,0.70995670995671,0.4879518072289157,0.577301101585455 4 | 0.3,0.6599496221662469,0.6631911532048385,0.6536796536796536,0.6686746987951807,0.5876489726418147 5 | 0.4,0.6599496221662469,0.6546533905227607,0.5714285714285714,0.7831325301204819,0.5662714661729004 6 | 0.5,0.6523929471032746,0.6423603196563007,0.5324675324675324,0.8192771084337349,0.5496451534253737 7 | 0.6,0.6473551637279596,0.6235443190103139,0.48484848484848486,0.8734939759036144,0.5380382274804033 8 | 0.7,0.6146095717884131,0.576832466271812,0.4155844155844156,0.891566265060241,0.48896120356942585 9 | 0.8,0.5869017632241813,0.5266436611764808,0.3246753246753247,0.9518072289156626,0.45081352366335453 10 | 0.9,0.48866498740554154,0.376810974037845,0.12554112554112554,0.9939759036144579,0.28868545245640687 11 | -------------------------------------------------------------------------------- /results/haptik_sofmattress_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.4609571788413098,0.45096457375594345,0.6406926406926406,0.21084337349397592,0.45335512570977937 3 | 0.2,0.5642317380352645,0.5828012955855193,0.5887445887445888,0.5301204819277109,0.49591325222825966 4 | 0.3,0.5793450881612091,0.5780365207906163,0.48484848484848486,0.7108433734939759,0.46860604999628325 5 | 0.4,0.5919395465994962,0.5745311704147583,0.4458874458874459,0.7951807228915663,0.46786678404969667 6 | 0.5,0.6020151133501259,0.5638019847351878,0.3939393939393939,0.891566265060241,0.4719265671294265 7 | 0.6,0.5919395465994962,0.543623046678172,0.354978354978355,0.9216867469879518,0.4548641132965435 8 | 0.7,0.5768261964735516,0.5171188331426371,0.2987012987012987,0.963855421686747,0.4338106577877485 9 | 0.8,0.5239294710327456,0.4429667417958547,0.19913419913419914,0.9759036144578314,0.34820134456192214 10 | 0.9,0.4609571788413098,0.32549065235972047,0.0735930735930736,1.0,0.2269661352081343 11 | -------------------------------------------------------------------------------- /results/luis_curekart.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.4520686175580222,0.46146738088457,0.7256637168141593,0.22263450834879406,0.4005380109377977 3 | 0.2,0.5802219979818365,0.6170061697077579,0.6991150442477876,0.4805194805194805,0.47653031805332025 4 | 0.3,0.6337033299697276,0.6681462015554164,0.6415929203539823,0.62708719851577,0.4978934972990696 5 | 0.4,0.6599394550958627,0.6807671697256525,0.577433628318584,0.7291280148423006,0.4929963911775212 6 | 0.5,0.7023208879919274,0.7131409661083393,0.5376106194690266,0.8404452690166976,0.5359301731023312 7 | 0.6,0.7103935418768921,0.7101188683186717,0.5088495575221239,0.8794063079777366,0.5385934445324991 8 | 0.7,0.7093844601412714,0.6961165287568397,0.4668141592920354,0.9128014842300557,0.5287337256051646 9 | 0.8,0.6972754793138244,0.6680534106767997,0.39823008849557523,0.948051948051948,0.5047539542079575 10 | 0.9,0.6518668012108981,0.5932475283103315,0.26548672566371684,0.9758812615955473,0.41997687434554737 11 | -------------------------------------------------------------------------------- /results/luis_curekart_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.5035317860746721,0.5201836972792051,0.7168141592920354,0.3246753246753247,0.4190697944500741 3 | 0.2,0.6226034308779012,0.644386986815755,0.6526548672566371,0.5974025974025974,0.4746277223794737 4 | 0.3,0.6589303733602422,0.6686864474137902,0.5907079646017699,0.7161410018552876,0.4842478941806614 5 | 0.4,0.686175580221998,0.6830179988979004,0.5265486725663717,0.8200371057513914,0.4996450508983592 6 | 0.5,0.7033299697275479,0.6893408154450895,0.47123893805309736,0.8979591836734694,0.5169967885853658 7 | 0.6,0.7053481331987891,0.6826588122328711,0.4336283185840708,0.9332096474953617,0.5168028658065381 8 | 0.7,0.6841574167507568,0.6475173117892167,0.36283185840707965,0.9536178107606679,0.4770886651801918 9 | 0.8,0.6781029263370333,0.6274089842065953,0.32079646017699115,0.9777365491651205,0.46955337503955225 10 | 0.9,0.6387487386478304,0.5581202630086133,0.21902654867256638,0.9907235621521335,0.3941968849304019 11 | -------------------------------------------------------------------------------- /results/luis_powerplay11.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.5330620549338759,0.5992526506682163,0.48,0.5536723163841808,0.3353203174087024 3 | 0.2,0.6408952187182095,0.6733466777252709,0.41454545454545455,0.7288135593220338,0.35886833679968055 4 | 0.3,0.6795523906408952,0.6867843231081363,0.36363636363636365,0.8022598870056498,0.3538670872899427 5 | 0.4,0.7009155645981688,0.6895231702778716,0.31272727272727274,0.8516949152542372,0.3410825635819119 6 | 0.5,0.7171922685656155,0.6876970376602419,0.27636363636363637,0.8884180790960452,0.3264174681218831 7 | 0.6,0.7151576805696847,0.6742611836665858,0.2290909090909091,0.903954802259887,0.2821349541214711 8 | 0.7,0.7192268565615463,0.6695140228765639,0.2,0.9209039548022598,0.26638372270148986 9 | 0.8,0.7243133265513734,0.6577880820529207,0.14545454545454545,0.9491525423728814,0.23403584637750863 10 | 0.9,0.7314343845371313,0.6457877199926221,0.09454545454545454,0.9788135593220338,0.2118993983537775 11 | -------------------------------------------------------------------------------- /results/luis_powerplay11_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.602238046795524,0.6489652908924319,0.44,0.6652542372881356,0.3493129242510959 3 | 0.2,0.6785350966429298,0.6884322152181961,0.38545454545454544,0.7923728813559322,0.36443959593678765 4 | 0.3,0.7121057985757884,0.6988097741885357,0.3381818181818182,0.8573446327683616,0.3641509922646042 5 | 0.4,0.7253306205493387,0.6965591610819797,0.2872727272727273,0.8954802259887006,0.3479732789168995 6 | 0.5,0.7202441505595117,0.6737287833585557,0.22181818181818183,0.9138418079096046,0.291862017703851 7 | 0.6,0.7304170905391658,0.6715937089389293,0.19636363636363635,0.9378531073446328,0.28507886991542475 8 | 0.7,0.7334689725330621,0.6585884333705816,0.14545454545454545,0.961864406779661,0.2559865327520182 9 | 0.8,0.7334689725330621,0.6425099069619017,0.09090909090909091,0.9830508474576272,0.2192384862783402 10 | 0.9,0.7263479145473042,0.6239689485734526,0.04363636363636364,0.9915254237288136,0.14980967211811785 11 | -------------------------------------------------------------------------------- /results/luis_sofmattress.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.6599496221662469,0.6583511406238304,0.5930735930735931,0.7530120481927711,0.580661387850616 3 | 0.2,0.6649874055415617,0.6481165167919319,0.5238095238095238,0.8614457831325302,0.5694742710422257 4 | 0.3,0.6599496221662469,0.6309783853401376,0.47186147186147187,0.9216867469879518,0.5568475822263478 5 | 0.4,0.6523929471032746,0.6162481775882876,0.4329004329004329,0.9578313253012049,0.5466956315650783 6 | 0.5,0.6196473551637279,0.5737145815429269,0.37662337662337664,0.9578313253012049,0.49930551246201366 7 | 0.6,0.5894206549118388,0.5269758841858341,0.31601731601731603,0.9698795180722891,0.45563924496546165 8 | 0.7,0.5667506297229219,0.49174587864161706,0.2727272727272727,0.9759036144578314,0.4201578655531844 9 | 0.8,0.5440806045340051,0.45627702776114243,0.22510822510822512,0.9879518072289156,0.38564698008973847 10 | 0.9,0.5088161209068011,0.399913951597676,0.15584415584415584,1.0,0.32910030494957465 11 | -------------------------------------------------------------------------------- /results/luis_sofmattress_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.5894206549118388,0.5868721604972246,0.4935064935064935,0.7228915662650602,0.49200567052325594 3 | 0.2,0.5894206549118388,0.5624126892818334,0.39826839826839827,0.8554216867469879,0.4583002646125471 4 | 0.3,0.5994962216624685,0.5530659814351441,0.35064935064935066,0.9457831325301205,0.467134202661144 5 | 0.4,0.5617128463476071,0.4980472241466623,0.2683982683982684,0.9698795180722891,0.4087570327249752 6 | 0.5,0.5440806045340051,0.46525596370760114,0.22943722943722944,0.9819277108433735,0.38164536737032145 7 | 0.6,0.5365239294710328,0.4455633242012838,0.2077922077922078,0.9939759036144579,0.3715625578876327 8 | 0.7,0.5163727959697733,0.41249751281068137,0.16883116883116883,1.0,0.34033496707440136 9 | 0.8,0.4811083123425693,0.3547342029900023,0.10822510822510822,1.0,0.26789715303727896 10 | 0.9,0.4659949622166247,0.32592434738214054,0.08225108225108226,1.0,0.2311697054790726 11 | -------------------------------------------------------------------------------- /results/rasa_curekart.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.3834510595358224,0.28125242085213886,0.8407079646017699,0.0,0.39651377278996913 3 | 0.2,0.38647830474268413,0.2879650521374422,0.8407079646017699,0.0055658627087198514,0.39849991095205295 4 | 0.3,0.4177598385469223,0.34951458898695087,0.8407079646017699,0.06307977736549165,0.41835926466428414 5 | 0.4,0.4591321897073663,0.4241729718132308,0.8362831858407079,0.14285714285714285,0.43921564816195047 6 | 0.5,0.5146316851664985,0.5139404655873048,0.8185840707964602,0.2597402597402597,0.4650631495322916 7 | 0.6,0.557013118062563,0.570384538783362,0.8075221238938053,0.3469387755102041,0.48891931036364183 8 | 0.7,0.5893037336024218,0.6073736858221093,0.7898230088495575,0.42115027829313545,0.4992209609417083 9 | 0.8,0.615539858728557,0.6316430358173197,0.7632743362831859,0.49165120593692024,0.5039480710117246 10 | 0.9,0.6407669021190716,0.6537639235616978,0.7389380530973452,0.5584415584415584,0.5108416672903147 11 | -------------------------------------------------------------------------------- /results/rasa_curekart_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.36730575176589303,0.28070361362834495,0.8053097345132744,0.0,0.37949396040632505 3 | 0.2,0.3763874873864783,0.2996587611811544,0.8053097345132744,0.016697588126159554,0.38527320917267344 4 | 0.3,0.42885973763874874,0.39669039127807143,0.8053097345132744,0.11317254174397032,0.41623445272563037 5 | 0.4,0.4873864783047427,0.488306040548246,0.7986725663716814,0.22634508348794063,0.4457023194109826 6 | 0.5,0.5479313824419778,0.5648611471668102,0.7809734513274337,0.3525046382189239,0.4714792055404184 7 | 0.6,0.5973763874873865,0.6187987334530253,0.7588495575221239,0.4619666048237477,0.4941972946101975 8 | 0.7,0.6337033299697276,0.6539156700960813,0.7367256637168141,0.5473098330241187,0.5114806681921403 9 | 0.8,0.6770938446014128,0.6942037327893981,0.7190265486725663,0.6419294990723562,0.5443926493175281 10 | 0.9,0.7043390514631686,0.7122579441592488,0.6592920353982301,0.7421150278293135,0.5509541661974698 11 | -------------------------------------------------------------------------------- /results/rasa_powerplay11.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.1373346897253306,0.0740760502880126,0.4909090909090909,0.0,0.18889472272624616 3 | 0.2,0.17395727365208546,0.14517846073743704,0.4909090909090909,0.05084745762711865,0.20209392565355375 4 | 0.3,0.31739572736520855,0.3716455438220059,0.4618181818181818,0.2612994350282486,0.23384958998978336 5 | 0.4,0.46083418107833163,0.5241212101471256,0.44363636363636366,0.4675141242937853,0.2688555152920154 6 | 0.5,0.5645981688708036,0.6078808921124113,0.4254545454545455,0.6186440677966102,0.30147733123460646 7 | 0.6,0.6388606307222787,0.6580630758375814,0.3927272727272727,0.7344632768361582,0.3224489737817428 8 | 0.7,0.676500508646999,0.673736480201581,0.3381818181818182,0.807909604519774,0.31166032692222884 9 | 0.8,0.7171922685656155,0.6961317099627315,0.31636363636363635,0.8728813559322034,0.3399775187743717 10 | 0.9,0.7314343845371313,0.68167001253479,0.21454545454545454,0.9322033898305084,0.2949058644577997 11 | -------------------------------------------------------------------------------- /results/rasa_powerplay11_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.10783316378433368,0.0563726671323755,0.38545454545454544,0.0,0.14538813844067086 3 | 0.2,0.22380467955239064,0.25981415203931457,0.38181818181818183,0.16242937853107345,0.17654599691688472 4 | 0.3,0.4303153611393693,0.49508352249020116,0.3563636363636364,0.4590395480225989,0.21778856977821506 5 | 0.4,0.5401831129196337,0.5755376510078896,0.32,0.6257062146892656,0.22912569156377594 6 | 0.5,0.6195320447609359,0.6209068207485843,0.2909090909090909,0.7471751412429378,0.24082020235515628 7 | 0.6,0.6744659206510681,0.6498908482830816,0.24363636363636362,0.8418079096045198,0.24926272155574747 8 | 0.7,0.7019328585961343,0.6566132900273823,0.2,0.8968926553672316,0.2474993544193783 9 | 0.8,0.7232960325534079,0.6561342706688587,0.14909090909090908,0.9463276836158192,0.2361606564545725 10 | 0.9,0.7355035605289929,0.6527306716834309,0.11636363636363636,0.9759887005649718,0.2406068525264252 11 | -------------------------------------------------------------------------------- /results/rasa_sofmattress.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.40302267002518893,0.3173417229356829,0.6926406926406926,0.0,0.42952049480375937 3 | 0.2,0.40554156171284633,0.3237280250807377,0.6926406926406926,0.006024096385542169,0.43019399717323903 4 | 0.3,0.44080604534005036,0.40017562032308784,0.683982683982684,0.10240963855421686,0.44554324727480543 5 | 0.4,0.5037783375314862,0.5025993126150237,0.670995670995671,0.2710843373493976,0.4797158451836411 6 | 0.5,0.5390428211586902,0.5488575093593298,0.6406926406926406,0.39759036144578314,0.49298645472110264 7 | 0.6,0.5818639798488665,0.5945734802728632,0.6103896103896104,0.5421686746987951,0.510679733490003 8 | 0.7,0.6120906801007556,0.6164923202933457,0.5757575757575758,0.6626506024096386,0.5230432925400154 9 | 0.8,0.6272040302267002,0.620617079478249,0.5367965367965368,0.7530120481927711,0.5241637786271162 10 | 0.9,0.6372795969773299,0.6090461164829147,0.4805194805194805,0.8554216867469879,0.5258050832127276 11 | -------------------------------------------------------------------------------- /results/rasa_sofmattress_subset.csv: -------------------------------------------------------------------------------- 1 | Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC 2 | 0.1,0.327455919395466,0.24487420952343436,0.5627705627705628,0.0,0.34321170982570903 3 | 0.2,0.3425692695214106,0.2771987206468406,0.5627705627705628,0.03614457831325301,0.35244337808363696 4 | 0.3,0.4156171284634761,0.39895490341311163,0.5411255411255411,0.24096385542168675,0.37732642825938606 5 | 0.4,0.4811083123425693,0.47511485998374675,0.5238095238095238,0.42168674698795183,0.407076025250971 6 | 0.5,0.5289672544080605,0.5155202574924295,0.48917748917748916,0.5843373493975904,0.420109406500981 7 | 0.6,0.5617128463476071,0.5385047638512739,0.4458874458874459,0.7228915662650602,0.4331244327993763 8 | 0.7,0.5667506297229219,0.5290891893997581,0.3939393939393939,0.8072289156626506,0.42492922305773534 9 | 0.8,0.5617128463476071,0.5099903608529387,0.33766233766233766,0.8734939759036144,0.40819956754895265 10 | 0.9,0.5415617128463476,0.4674266966864961,0.2510822510822511,0.9457831325301205,0.3740752005668206 11 | -------------------------------------------------------------------------------- /run_evaluation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "platform = 'haptik'\n", 20 | "bot_name = 'curekart_subset'" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "def get_inscope_accuracy(actual_node, pred_node):\n", 30 | " total = 0\n", 31 | " correct = 0\n", 32 | " for act, pred in zip(actual_node, pred_node):\n", 33 | " if act == 'NO_NODES_DETECTED':\n", 34 | " continue\n", 35 | " total += 1\n", 36 | " if act == pred:\n", 37 | " correct += 1\n", 38 | " return correct/total\n", 39 | "\n", 40 | "def get_oos_recall(actual_node, pred_node):\n", 41 | " total = 0\n", 42 | " correct = 0\n", 43 | " for act, pred in zip(actual_node, pred_node):\n", 44 | " if act != 'NO_NODES_DETECTED':\n", 45 | " continue\n", 46 | " total += 1\n", 47 | " if act == pred:\n", 48 | " correct += 1\n", 49 | " return correct/total\n", 50 | "\n", 51 | "def get_metrics(filepath, thresh):\n", 52 | " df = pd.read_csv(filepath)\n", 53 | " pred_node = []\n", 54 | " for index, row in df.iterrows(): \n", 55 | " if row['predicted_node_score'] < thresh:\n", 56 | " pred_node.append('NO_NODES_DETECTED')\n", 57 | " else:\n", 58 | " pred_node.append(row['predicted_node'])\n", 59 | " actual_node = list(df['label'])\n", 60 | " accuracy = accuracy_score(actual_node, pred_node)\n", 61 | " overall_f1 = f1_score(actual_node, pred_node, labels=list(set(actual_node)), average='weighted')\n", 62 | " inscope_accuracy = get_inscope_accuracy(actual_node, pred_node)\n", 63 | " oos_recall = get_oos_recall(actual_node, pred_node)\n", 64 | " mcc = matthews_corrcoef(actual_node, pred_node)\n", 65 | " return accuracy, overall_f1, inscope_accuracy, oos_recall, mcc" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 4, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/html": [ 76 | "
\n", 77 | "\n", 90 | "\n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | "
ThresholdAccuracyWeighted F1Inscope AccuracyOOS RecallMCC
00.10.4127140.3483710.7986730.0890540.396726
10.20.5469220.5527490.7809730.3506490.477374
20.30.6155400.6331230.7411500.5102040.505403
30.40.7083750.7208890.7013270.7142860.578202
40.50.7396570.7434610.6703540.7977740.603908
50.60.7356210.7317150.6017700.8478660.582559
60.70.7447020.7283620.5398230.9165120.589812
70.80.6831480.6262610.3340710.9758810.486639
80.90.6004040.4855690.1327430.9925790.309130
\n", 186 | "
" 187 | ], 188 | "text/plain": [ 189 | " Threshold Accuracy Weighted F1 Inscope Accuracy OOS Recall MCC\n", 190 | "0 0.1 0.412714 0.348371 0.798673 0.089054 0.396726\n", 191 | "1 0.2 0.546922 0.552749 0.780973 0.350649 0.477374\n", 192 | "2 0.3 0.615540 0.633123 0.741150 0.510204 0.505403\n", 193 | "3 0.4 0.708375 0.720889 0.701327 0.714286 0.578202\n", 194 | "4 0.5 0.739657 0.743461 0.670354 0.797774 0.603908\n", 195 | "5 0.6 0.735621 0.731715 0.601770 0.847866 0.582559\n", 196 | "6 0.7 0.744702 0.728362 0.539823 0.916512 0.589812\n", 197 | "7 0.8 0.683148 0.626261 0.334071 0.975881 0.486639\n", 198 | "8 0.9 0.600404 0.485569 0.132743 0.992579 0.309130" 199 | ] 200 | }, 201 | "execution_count": 4, 202 | "metadata": {}, 203 | "output_type": "execute_result" 204 | } 205 | ], 206 | "source": [ 207 | "thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]\n", 208 | "accuracy_over_thresh = []\n", 209 | "overall_f1_over_thresh = []\n", 210 | "inscope_recall_over_thresh = []\n", 211 | "oos_recall_over_thresh = []\n", 212 | "mcc_over_thresh = []\n", 213 | "for thresh in thresholds:\n", 214 | " accuracy, overall_f1, inscope_recall, oos_recall, mcc = get_metrics(f'preds/{platform}_{bot_name}.csv', thresh)\n", 215 | " accuracy_over_thresh.append(accuracy)\n", 216 | " overall_f1_over_thresh.append(overall_f1)\n", 217 | " inscope_recall_over_thresh.append(inscope_recall)\n", 218 | " oos_recall_over_thresh.append(oos_recall)\n", 219 | " mcc_over_thresh.append(mcc)\n", 220 | "df_metrics = pd.DataFrame({'Threshold': thresholds,\n", 221 | " 'Accuracy': accuracy_over_thresh,\n", 222 | " 'Weighted F1': overall_f1_over_thresh,\n", 223 | " 'Inscope Accuracy': inscope_recall_over_thresh,\n", 224 | " 'OOS Recall': oos_recall_over_thresh,\n", 225 | " 'MCC': mcc_over_thresh})\n", 226 | "df_metrics.head(10)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 5, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "df_metrics.to_csv(f'results/{platform}_{bot_name}.csv', index=False)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 6, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/plain": [ 246 | "{'IMMUNITY',\n", 247 | " 'INTERNATIONAL_SHIPPING',\n", 248 | " 'MODES_OF_PAYMENTS',\n", 249 | " 'PORTAL_ISSUE',\n", 250 | " 'REFER_EARN',\n", 251 | " 'START_OVER',\n", 252 | " 'WORK_FROM_HOME'}" 253 | ] 254 | }, 255 | "execution_count": 6, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [ 261 | "df_res = pd.read_csv(f'preds/{platform}_{bot_name}.csv')\n", 262 | "set(df_res['predicted_node']) - set(df_res['label'])" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [] 278 | } 279 | ], 280 | "metadata": { 281 | "kernelspec": { 282 | "display_name": "Python (py36)", 283 | "language": "python", 284 | "name": "py36" 285 | }, 286 | "language_info": { 287 | "codemirror_mode": { 288 | "name": "ipython", 289 | "version": 3 290 | }, 291 | "file_extension": ".py", 292 | "mimetype": "text/x-python", 293 | "name": "python", 294 | "nbconvert_exporter": "python", 295 | "pygments_lexer": "ipython3", 296 | "version": "3.6.8" 297 | } 298 | }, 299 | "nbformat": 4, 300 | "nbformat_minor": 4 301 | } 302 | --------------------------------------------------------------------------------