├── tests ├── utils │ ├── __init__.py │ ├── test_workspace_credentials.py │ ├── test_skills_util.py │ └── test_lang_utils.py ├── end2end │ ├── __init__.py │ └── end2end_test.py ├── data_analysis │ ├── __init__.py │ ├── test_summary_generator.py │ ├── test_similarity_analyzer.py │ └── test_divergence_analyzer.py ├── experimentation │ ├── __init__.py │ └── test_data_manipulator.py ├── highlighting │ ├── __init__.py │ └── test_highlighting.py ├── term_analysis │ ├── __init__.py │ ├── test_entity_analyzer.py │ ├── test_keyword_analyzer.py │ └── test_chi2_analyzer.py ├── confidence_analysis │ ├── __init__.py │ └── test_confidence_analysis.py ├── __init__.py └── resources │ └── test_workspaces │ ├── test_set_action.tsv │ └── customer_care_skill_test.tsv ├── assistant_skill_analysis ├── utils │ ├── __init__.py │ ├── parse_secret.py │ ├── lang_utils.py │ └── skills_util.py ├── inferencing │ ├── __init__.py │ └── inferencer.py ├── data_analysis │ ├── __init__.py │ ├── similarity_analyzer.py │ ├── summary_generator.py │ └── divergence_analyzer.py ├── experimentation │ ├── __init__.py │ └── data_manipulator.py ├── highlighting │ ├── __init__.py │ └── highlighter.py ├── term_analysis │ ├── __init__.py │ ├── entity_analyzer.py │ ├── keyword_analyzer.py │ └── chi2_analyzer.py ├── confidence_analysis │ ├── __init__.py │ └── confidence_analyzer.py ├── __init__.py └── resources │ ├── en │ └── stopwords │ ├── de │ └── stopwords │ ├── fr │ └── stopwords │ ├── it │ └── stopwords │ ├── es │ └── stopwords │ ├── pt │ └── stopwords │ ├── zh-tw │ └── stopwords │ ├── nl │ ├── LICENSE.stopwords-nl │ └── stopwords │ ├── zh-cn │ └── stopwords │ └── cs │ └── stopwords ├── MANIFEST.in ├── _version.py ├── setup.cfg ├── requirements.txt ├── CONTRIBUTING.md ├── setup.py ├── test_set.tsv ├── test_set.csv ├── scripts └── train_test_split.py ├── README.md └── LICENSE /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/end2end/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/data_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/experimentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/highlighting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/term_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/confidence_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assistant_skill_analysis/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assistant_skill_analysis/inferencing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assistant_skill_analysis/data_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assistant_skill_analysis/experimentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assistant_skill_analysis/highlighting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assistant_skill_analysis/term_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include assistant_skill_analysis/resources/*/* -------------------------------------------------------------------------------- /assistant_skill_analysis/confidence_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assistant_skill_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "2.0.1" 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | 3 | nltk.download("punkt") 4 | -------------------------------------------------------------------------------- /_version.py: -------------------------------------------------------------------------------- 1 | # Semantic versioning 2 | # MAJOR.MINOR.PATCH 3 | 4 | __version__ = '2.0.1' 5 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | 4 | [easy_install] 5 | index-url=pypi.python.org -------------------------------------------------------------------------------- /assistant_skill_analysis/resources/en/stopwords: -------------------------------------------------------------------------------- 1 | an 2 | a 3 | in 4 | on 5 | be 6 | or 7 | of 8 | and 9 | can 10 | is 11 | to 12 | the 13 | i -------------------------------------------------------------------------------- /assistant_skill_analysis/resources/de/stopwords: -------------------------------------------------------------------------------- 1 | der 2 | die 3 | und 4 | in 5 | von 6 | das 7 | den 8 | für 9 | im 10 | mit 11 | ist 12 | sie 13 | auf 14 | zu 15 | des 16 | so -------------------------------------------------------------------------------- /assistant_skill_analysis/resources/fr/stopwords: -------------------------------------------------------------------------------- 1 | de 2 | la 3 | l' 4 | le 5 | et 6 | les 7 | des 8 | a 9 | d' 10 | du 11 | en 12 | un 13 | a 14 | pour 15 | dan 16 | une 17 | que 18 | est 19 | au -------------------------------------------------------------------------------- /assistant_skill_analysis/resources/it/stopwords: -------------------------------------------------------------------------------- 1 | di 2 | e 3 | il 4 | la 5 | che 6 | a 7 | in 8 | per 9 | un 10 | del 11 | l' 12 | è 13 | non 14 | i 15 | le 16 | della 17 | una 18 | da 19 | con 20 | si -------------------------------------------------------------------------------- /assistant_skill_analysis/resources/es/stopwords: -------------------------------------------------------------------------------- 1 | de 2 | la 3 | el 4 | en 5 | que 6 | a 7 | y 8 | los 9 | del 10 | por 11 | un 12 | las 13 | con 14 | se 15 | una 16 | para 17 | al 18 | su 19 | no 20 | este -------------------------------------------------------------------------------- /assistant_skill_analysis/resources/pt/stopwords: -------------------------------------------------------------------------------- 1 | de 2 | e 3 | a 4 | o 5 | do 6 | em 7 | que 8 | da 9 | no 10 | para 11 | na 12 | com 13 | é 14 | os 15 | por 16 | um 17 | mais 18 | não 19 | região 20 | uma -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn~=1.2.2 2 | numpy~=1.21.0 ; python_version <= "3.9" 3 | numpy~=1.26.0 ; python_version > "3.9" 4 | pandas~=1.4.3 5 | tabulate 6 | matplotlib 7 | nltk 8 | seaborn 9 | ibm-watson>=4.5.0 10 | scipy>=1.2.0 11 | jupyter 12 | spacy~=2.3.2 13 | ibm-cos-sdk>=2.11.0 14 | nbconvert>=7.7.1 15 | jieba -------------------------------------------------------------------------------- /assistant_skill_analysis/utils/parse_secret.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | WA_SECRETS = ["WA_CONFIG", "WA_CONFIG_ACTION"] 4 | 5 | if __name__ == "__main__": 6 | for secret in WA_SECRETS: 7 | entry = os.environ[secret] 8 | with open("./" + secret.lower() + ".txt", "w", encoding="utf-8") as f: 9 | f.writelines(val + "\n" for val in entry.split(",")) 10 | -------------------------------------------------------------------------------- /assistant_skill_analysis/resources/zh-tw/stopwords: -------------------------------------------------------------------------------- 1 | the 2 | of 3 | is 4 | and 5 | to 6 | in 7 | that 8 | we 9 | for 10 | an 11 | are 12 | by 13 | be 14 | as 15 | on 16 | with 17 | can 18 | if 19 | from 20 | which 21 | you 22 | it 23 | this 24 | then 25 | at 26 | have 27 | all 28 | not 29 | one 30 | has 31 | or 32 | that 33 | 的 34 | 了 35 | 和 36 | 是 37 | 就 38 | 都 39 | 而 40 | 及 41 | 與 42 | 著 43 | 或 44 | 一個 45 | 沒有 46 | 我們 47 | 你們 48 | 妳們 49 | 他們 50 | 她們 51 | 是否 -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## How to contribute 2 | 3 | If you'd like to contribute, start by searching through the [issues](https://github.com/watson-developer-cloud/assistant-dialog-skill-analysis/issues) and [pull requests](https://github.com/watson-developer-cloud/assistant-dialog-skill-analysis/pulls) to see if anyone else has raised a similar issue. 4 | 5 | If you don't see an issue listed, and you think it fits into the intended use of this repository, do this: 6 | * **If your contribution is minor,** such as a bug fix, open a pull request. 7 | * **If your contribution is major,** such as a new feature, start by opening an issue first. Others can then weigh in before you commence any work. -------------------------------------------------------------------------------- /assistant_skill_analysis/resources/nl/LICENSE.stopwords-nl: -------------------------------------------------------------------------------- 1 | 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2016 Gene Diaz 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import re 3 | 4 | 5 | def find_version(): 6 | with open('_version.py', encoding='utf8') as f: 7 | contents = f.read() 8 | match_result = re.search(r'^__version__ *= *\'(.*?)\'', contents, re.MULTILINE) 9 | if match_result: 10 | return match_result.group(1) 11 | raise Exception('Unable to find version string') 12 | 13 | 14 | def get_requirements(): 15 | with open('requirements.txt', encoding='utf8') as f: 16 | requirements = f.read().splitlines() 17 | return requirements 18 | 19 | 20 | VERSION = find_version() 21 | 22 | REQUIREMENTS = get_requirements() 23 | 24 | setup( 25 | name='assistant_skill_analysis', 26 | version=VERSION, 27 | author='Haode Qi, Navneet Rao, Ming Tan, Yang Yu, Panos Karagiannis and Ladislav Kunc', 28 | author_email='Haode.Qi@ibm.com, Cheng.qian@ibm.com, lada@ibm.com', 29 | description='Dialog/Action Skill Analysis Tool for Watson Assistant', 30 | url='https://github.com/watson-developer-cloud/assistant-skill-analysis', 31 | license='Apache License 2.0', 32 | install_requires=REQUIREMENTS, 33 | packages=find_packages(exclude=['tests']), 34 | test_suite='tests', 35 | include_package_data=True, 36 | ) 37 | -------------------------------------------------------------------------------- /tests/data_analysis/test_summary_generator.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | import pandas as pd 4 | 5 | from assistant_skill_analysis.data_analysis import summary_generator 6 | from assistant_skill_analysis.utils import skills_util, lang_utils 7 | 8 | 9 | class TestSummaryGenerator(unittest.TestCase): 10 | """Test for summary generator module""" 11 | 12 | @classmethod 13 | def setUpClass(cls): 14 | 15 | with open( 16 | "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r" 17 | ) as skill_file: 18 | ( 19 | workspace_data, 20 | workspace_vocabulary, 21 | _, 22 | _, 23 | ) = skills_util.extract_workspace_data( 24 | json.load(skill_file), lang_utils.LanguageUtility("en") 25 | ) 26 | cls.workspace_df = pd.DataFrame(workspace_data) 27 | 28 | def test_class_imbalance(self): 29 | 30 | is_imbalanced = summary_generator.class_imbalance_analysis(self.workspace_df) 31 | self.assertEqual(is_imbalanced, True, "Test class imbalance detection failed") 32 | 33 | def tearDown(self): 34 | unittest.TestCase.tearDown(self) 35 | 36 | 37 | if __name__ == "__main__": 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /assistant_skill_analysis/resources/zh-cn/stopwords: -------------------------------------------------------------------------------- 1 | < 2 | > 3 | | 4 | - 5 | , 6 | ; 7 | : 8 | ! 9 | ? 10 | . 11 | '' 12 | ' 13 | " 14 | ( 15 | ) 16 | [ 17 | ] 18 | { 19 | } 20 | * 21 | % 22 | + 23 | 。 24 | 25 | 一 26 | 一会儿 27 | 一边 28 | 一面 29 | 上 30 | 下 31 | 不 32 | 不但 33 | 不光 34 | 不可 35 | 不如 36 | 不是 37 | 不管 38 | 不论 39 | 与 40 | 与其 41 | 个 42 | 中 43 | 为 44 | 之 45 | 之所以 46 | 也 47 | 也不 48 | 也许 49 | 也许是 50 | 了 51 | 于 52 | 从 53 | 他 54 | 他们 55 | 以 56 | 会 57 | 但 58 | 你们 59 | 便 60 | 倘若 61 | 先 62 | 全 63 | 其 64 | 再 65 | 到 66 | 前 67 | 十 68 | 即使 69 | 却 70 | 又 71 | 及 72 | 只 73 | 只有 74 | 只要 75 | 可 76 | 可以 77 | 可是 78 | 可能 79 | 各 80 | 后 81 | 向 82 | 和 83 | 哪怕 84 | 因为 85 | 因此 86 | 在 87 | 地 88 | 多 89 | 她 90 | 她们 91 | 如果 92 | 宁可 93 | 它 94 | 它们 95 | 对 96 | 将 97 | 小 98 | 就 99 | 尽管 100 | 已 101 | 已经 102 | 并 103 | 并且 104 | 很 105 | 我 106 | 我们 107 | 或 108 | 所 109 | 所以 110 | 才 111 | 把 112 | 据 113 | 无论 114 | 既 115 | 既然 116 | 时 117 | 是 118 | 是因为 119 | 更 120 | 最 121 | 有 122 | 未 123 | 来 124 | 此 125 | 每 126 | 没有 127 | 然后 128 | 然而 129 | 用 130 | 由 131 | 由于 132 | 的 133 | 看 134 | 着 135 | 种 136 | 而 137 | 而且 138 | 而是 139 | 能 140 | 自己 141 | 至 142 | 虽然 143 | 被 144 | 要 145 | 认为 146 | 让 147 | 该 148 | 还 149 | 还是 150 | 这 151 | 通过 152 | 那么 153 | 都 154 | 非 155 | 、 156 | -------------------------------------------------------------------------------- /assistant_skill_analysis/resources/cs/stopwords: -------------------------------------------------------------------------------- 1 | ačkoli 2 | ale 3 | asi 4 | aspoň 5 | během 6 | bude 7 | budeme 8 | budeš 9 | budete 10 | budou 11 | budu 12 | byl 13 | byla 14 | byli 15 | bylo 16 | byly 17 | bys 18 | je 19 | jsi 20 | jsme 21 | jsou 22 | jste 23 | má 24 | mají 25 | málo 26 | mám 27 | máme 28 | máš 29 | máte 30 | mé 31 | mě 32 | mít 33 | mně 34 | mnou 35 | na 36 | nad 37 | nade 38 | nám 39 | námi 40 | nás 41 | náš 42 | naše 43 | naši 44 | ně 45 | nebo 46 | nedělá 47 | nedělají 48 | nedělám 49 | neděláme 50 | neděláš 51 | neděláte 52 | nějak 53 | nejsi 54 | někde 55 | někdo 56 | nemají 57 | nemáme 58 | nemáte 59 | neměl 60 | němu 61 | není 62 | nestačí 63 | než 64 | nic 65 | nich 66 | ním 67 | nimi 68 | od 69 | ode 70 | on 71 | ona 72 | oni 73 | ono 74 | ony 75 | pak 76 | po 77 | před 78 | přes 79 | přese 80 | pro 81 | proč 82 | proti 83 | protože 84 | se 85 | skoro 86 | smějí 87 | smí 88 | snad 89 | ta 90 | tak 91 | takhle 92 | taky 93 | tam 94 | tamhle 95 | tamhleto 96 | tamto 97 | tě 98 | tebe 99 | tebou 100 | ted' 101 | tedy 102 | ten 103 | ti 104 | to 105 | tobě 106 | tohle 107 | toto 108 | třeba 109 | tvá 110 | tvé 111 | tvoje 112 | tvůj 113 | ty 114 | už 115 | vám 116 | vámi 117 | vás 118 | váš 119 | vaše 120 | vaši 121 | ve 122 | vedle 123 | vlastně 124 | vy 125 | za -------------------------------------------------------------------------------- /tests/resources/test_workspaces/test_set_action.tsv: -------------------------------------------------------------------------------- 1 | hiii watson assistant chat bot, would you pleased let me known what are them hours? What are your hours? 2 | Hi I wanted to go to the store now to buy something I really need, is the store open or closed at the moment? What are your hours? 3 | hey bot, what are the hours What are your hours? 4 | What time do you close on jan 26th? What are your hours? 5 | Does the store in the city square opens till 8:30pm on weekends? What are your hours? 6 | how early do you open on Saturdyas What are your hours? 7 | Are you closed on Sunday What are your hours? 8 | What time are you closing tmr? What are your hours? 9 | what can i do to talk to someone Schedule An Appointment 10 | tell me the address Where are you located? 11 | how to get there from times square Where are you located? 12 | thanks again. Thank you 13 | What time is your store open on christmas? What are your hours? 14 | Ok, that is it, I really have to go, goodbye Goodbye 15 | i have an appointment in one hour at times square, i have to go. bye Goodbye 16 | I would like to speak to an agent Fallback 17 | appiontmnet Schedule An Appointment 18 | Can you tell me a good joke 19 | What is your IQ 20 | Luke, I am your father 21 | Where did Betty buy her butter? 22 | How many engineers does it take to change a lightbulb? 23 | Can you help me change my account password? 24 | What is a way to change my account address? 25 | 26 | -------------------------------------------------------------------------------- /tests/utils/test_workspace_credentials.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from assistant_skill_analysis.utils.skills_util import ( 3 | retrieve_workspace, 4 | retrieve_conversation, 5 | DEV_DATACENTER, 6 | ) 7 | 8 | CONFIG_FILE = "./wa_config.txt" 9 | CONFIG_FILE_ACTION = "./wa_config_action.txt" 10 | 11 | 12 | @unittest.skip("skip") 13 | class TestWorkspaceCredential(unittest.TestCase): 14 | @classmethod 15 | def setUpClass(cls): 16 | with open(CONFIG_FILE) as fi: 17 | cls.apikey = fi.readline().strip() 18 | cls.wksp_id = fi.readline().strip() 19 | 20 | with open(CONFIG_FILE_ACTION) as fi: 21 | _ = fi.readline().strip() 22 | cls.assistant_id = fi.readline().strip() 23 | 24 | def test_workspace_credentials(self): 25 | conversation = retrieve_conversation( 26 | iam_apikey=self.apikey, 27 | url=DEV_DATACENTER[0], 28 | authenticator_url=DEV_DATACENTER[1], 29 | ) 30 | ws_json = retrieve_workspace(skill_id=self.wksp_id, conversation=conversation) 31 | self.assertTrue(len(ws_json["intents"]) == 9) 32 | 33 | def test_action_credentials(self): 34 | conversation = retrieve_conversation( 35 | iam_apikey=self.apikey, 36 | url=DEV_DATACENTER[0], 37 | authenticator_url=DEV_DATACENTER[1], 38 | sdk_version="V2", 39 | ) 40 | result = conversation.message_stateless( 41 | input={ 42 | "message_type": "text", 43 | "text": "thank you", 44 | "options": {"alternate_intents": True}, 45 | }, 46 | context={"metadata": {"user_id": "123"}}, 47 | assistant_id=self.assistant_id, 48 | ).get_result() 49 | 50 | self.assertAlmostEqual( 51 | 1, result["output"]["intents"][0]["confidence"], delta=1e-6 52 | ) 53 | 54 | 55 | if __name__ == "__main__": 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /tests/experimentation/test_data_manipulator.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import unittest 3 | import json 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from assistant_skill_analysis.utils import skills_util, lang_utils 8 | from assistant_skill_analysis.experimentation import data_manipulator 9 | 10 | 11 | class TestDataManipulator(unittest.TestCase): 12 | """Test for Data manipulator module""" 13 | 14 | @classmethod 15 | def setUpClass(cls): 16 | with open( 17 | "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r" 18 | ) as skill_file: 19 | cls.workspace = json.load(skill_file) 20 | ( 21 | workspace_data, 22 | workspace_vocabulary, 23 | _, 24 | _, 25 | ) = skills_util.extract_workspace_data( 26 | cls.workspace, lang_utils.LanguageUtility("en") 27 | ) 28 | cls.workspace_df = pd.DataFrame(workspace_data) 29 | 30 | def test_undersampling(self): 31 | quantile = 0.6 32 | train_workspace_data = data_manipulator.under_sampling( 33 | self.workspace, self.workspace_df, quantile 34 | ) 35 | label_frequency_dict = dict(Counter(self.workspace_df["intent"]).most_common()) 36 | sampling_threshold = int( 37 | np.quantile(a=list(label_frequency_dict.values()), q=[quantile])[0] 38 | ) 39 | example_length = np.array( 40 | [ 41 | len(train_workspace_data[i]["examples"]) 42 | for i in range(len(train_workspace_data)) 43 | ] 44 | ) 45 | self.assertEqual( 46 | np.sum(example_length <= sampling_threshold), 47 | len(example_length), 48 | "Data manipulator test fail", 49 | ) 50 | 51 | def tearDown(self): 52 | unittest.TestCase.tearDown(self) 53 | 54 | 55 | if __name__ == "__main__": 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /assistant_skill_analysis/experimentation/data_manipulator.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import random 3 | import numpy as np 4 | 5 | 6 | def under_sampling(workspace, workspace_pd, quantile=None): 7 | """ 8 | Under sample data 9 | :param workspace: json format outputed by assistant api 10 | :param workspace_pd: workspace dataframe 11 | :param quantile: threshold to sample from 12 | :return train_workspace_data: list of intent json 13 | """ 14 | label_frequency_dict = dict(Counter(workspace_pd["intent"]).most_common()) 15 | train_workspace_data = list() 16 | 17 | if not quantile: 18 | quantile = 0.75 19 | sampling_threshold = int( 20 | np.quantile(a=list(label_frequency_dict.values()), q=[quantile])[0] 21 | ) 22 | 23 | for i in range(len(workspace["intents"])): 24 | 25 | if not workspace["intents"][i]["examples"]: 26 | continue 27 | 28 | if label_frequency_dict[workspace["intents"][i]["intent"]] > sampling_threshold: 29 | intent = workspace["intents"][i] 30 | sampling_index = list(np.arange(len(workspace["intents"][i]["examples"]))) 31 | random.shuffle(sampling_index) 32 | train_examples = [ 33 | intent["examples"][index] 34 | for index in sampling_index[:sampling_threshold] 35 | ] 36 | train_workspace_data.append({"intent": workspace["intents"][i]["intent"]}) 37 | train_workspace_data[-1].update({"description": "string"}) 38 | train_workspace_data[-1].update({"examples": train_examples}) 39 | else: 40 | train_workspace_data.append({"intent": workspace["intents"][i]["intent"]}) 41 | train_workspace_data[-1].update({"description": "string"}) 42 | train_workspace_data[-1].update( 43 | { 44 | "examples": [ 45 | example for example in workspace["intents"][i]["examples"] 46 | ] 47 | } 48 | ) 49 | 50 | return train_workspace_data 51 | -------------------------------------------------------------------------------- /test_set.tsv: -------------------------------------------------------------------------------- 1 | many thanks Thanks 2 | thank you Thanks 3 | ciao Goodbye 4 | see ya Goodbye 5 | Are you open on sundays, and if so what are the hours? Customer_Care_Store_Hours 6 | What are the hours of operation? Customer_Care_Store_Hours 7 | what are your hours Customer_Care_Store_Hours 8 | What time do you close today Customer_Care_Store_Hours 9 | Does the store in the city center opens till 8pm on weekends? Customer_Care_Store_Hours 10 | how early do you open on Saturdays Customer_Care_Store_Hours 11 | Are you open on Sunday Customer_Care_Store_Hours 12 | What time are you closing today? Customer_Care_Store_Hours 13 | When can I meet with one of your employees at your store? Customer_Care_Appointments 14 | I prefer a face to face visit Customer_Care_Appointments 15 | can you make an appointment for me Customer_Care_Appointments 16 | Set up an appt Customer_Care_Appointments 17 | what is the address Customer_Care_Store_Location 18 | I want to know about a store Customer_Care_Store_Location 19 | Find store Customer_Care_Store_Location 20 | give me directions Customer_Care_Store_Location 21 | where are you Customer_Care_Store_Location 22 | i changed my mind Cancel 23 | cancel the request Cancel 24 | can you help Help 25 | help Help 26 | Hey there General_Greetings 27 | Who is this? General_Greetings 28 | Looking good eve General_Greetings 29 | How is it going? General_Greetings 30 | Hi advisor General_Greetings 31 | Hey twin General_Greetings 32 | Can I talk to someone? General_Connect_to_Agent 33 | Please connect me to a live agent General_Connect_to_Agent 34 | Where is the closest agent? General_Connect_to_Agent 35 | Please assist me to get to an agent General_Connect_to_Agent 36 | Can I speak with somebody? General_Connect_to_Agent 37 | Do not want a robot? General_Connect_to_Agent 38 | I want to speak to a human General_Connect_to_Agent 39 | representative General_Connect_to_Agent 40 | Can I connect to an agent? General_Connect_to_Agent 41 | Can you please transition my conversation to a human General_Connect_to_Agent 42 | Is the store going to be open tomorrow Customer_Care_Store_Hours 43 | Can you arrange for me to meet at your closest store Customer_Care_Appointments 44 | see you tomorrow Goodbye 45 | I dont think I want to do that Cancel 46 | Hello you silly bot General_Greetings 47 | Can you tell me a good joke 48 | What is your IQ 49 | Luke, I am your father 50 | Where did Betty buy her butter? 51 | How many engineers does it take to change a lightbulb? 52 | Can you help me change my account password? 53 | What is a way to change my account address? 54 | 55 | -------------------------------------------------------------------------------- /tests/end2end/end2end_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from assistant_skill_analysis.utils import skills_util 3 | import json 4 | 5 | class TestNotebook(unittest.TestCase): 6 | @classmethod 7 | def setUpClass(cls): 8 | # points to dev010_Haode-Qi 9 | CONFIG_FILE = "./wa_config.txt" 10 | with open(CONFIG_FILE) as fi: 11 | cls.apikey = fi.readline().strip() 12 | 13 | with open( 14 | "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", 15 | "r", 16 | encoding="utf-8", 17 | ) as f: 18 | data = json.load(f) 19 | 20 | URL, authenticator_url = skills_util.DEV_DATACENTER 21 | cls.conversation = skills_util.retrieve_conversation( 22 | iam_apikey=cls.apikey, 23 | url=URL, 24 | authenticator_url=authenticator_url, 25 | api_version=skills_util.DEFAULT_V1_API_VERSION, 26 | ) 27 | cls.wksp_id = skills_util.get_test_workspace( 28 | conversation=cls.conversation, workspace_json=data 29 | ) 30 | 31 | # points to dev010_Haode-Qi 32 | CONFIG_FILE = "./wa_config_action.txt" 33 | with open(CONFIG_FILE) as fi: 34 | _ = fi.readline().strip() 35 | cls.assistant_id = fi.readline().strip() 36 | 37 | def test_notebook(self): 38 | test_file = "tests/resources/test_workspaces/customer_care_skill_test.tsv" 39 | nb, errors = skills_util.run_notebook( 40 | notebook_path="classic_dialog_skill_analysis.ipynb", 41 | iam_apikey=self.apikey, 42 | wksp_id=self.wksp_id, 43 | test_file=test_file, 44 | output_path="notebook_output", 45 | ) 46 | self.assertEqual(errors, []) 47 | 48 | def test_action_notebook(self): 49 | test_file = "tests/resources/test_workspaces/test_set_action.tsv" 50 | wksp_json = ( 51 | "tests/resources/test_workspaces/customer_care_sample_action_skill.json" 52 | ) 53 | nb, errors = skills_util.run_notebook( 54 | notebook_path="new_experience_skill_analysis.ipynb", 55 | iam_apikey=self.apikey, 56 | test_file=test_file, 57 | output_path="notebook_output", 58 | assistant_id=self.assistant_id, 59 | action_wksp_json_path=wksp_json, 60 | ) 61 | self.assertEqual(errors, []) 62 | 63 | @classmethod 64 | def tearDownClass(cls): 65 | unittest.TestCase.tearDown(cls) 66 | cls.conversation.delete_workspace(workspace_id=cls.wksp_id) 67 | 68 | 69 | if __name__ == "__main__": 70 | unittest.main() 71 | -------------------------------------------------------------------------------- /tests/data_analysis/test_similarity_analyzer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | import pandas as pd 4 | import numpy as np 5 | 6 | from assistant_skill_analysis.data_analysis import similarity_analyzer 7 | from assistant_skill_analysis.utils import skills_util, lang_utils 8 | 9 | TOLERANCE = 0.0000001 10 | 11 | 12 | class TestSimilarityAnalzyer(unittest.TestCase): 13 | """Test for Similarity Analyzer module""" 14 | 15 | @classmethod 16 | def setUpClass(cls): 17 | cls.lang_util = lang_utils.LanguageUtility("en") 18 | with open( 19 | "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r" 20 | ) as skill_file: 21 | ( 22 | workspace_data, 23 | workspace_vocabulary, 24 | _, 25 | _, 26 | ) = skills_util.extract_workspace_data(json.load(skill_file), cls.lang_util) 27 | cls.workspace_df = pd.DataFrame(workspace_data) 28 | 29 | def test_calculate_cosine_similarity(self): 30 | feature_matrix1 = np.array([[1, 2, 0], [0, 0, 1], [1, 2, 0]]) 31 | cos_sim_score1 = similarity_analyzer._calculate_cosine_similarity( 32 | feature_matrix1 33 | ) 34 | self.assertEqual( 35 | np.abs(np.sum(np.diag(cos_sim_score1) - np.array([1, 1, 1]))) < TOLERANCE, 36 | True, 37 | "Similarity Analyzer Test fail", 38 | ) 39 | 40 | self.assertEqual( 41 | np.abs(cos_sim_score1[0, 1]) < TOLERANCE, 42 | True, 43 | "Similarity Analyzer Test fail", 44 | ) 45 | 46 | self.assertEqual( 47 | np.abs(cos_sim_score1[0, 2] - 1) < TOLERANCE, 48 | True, 49 | "Similarity Analyzer Test fail", 50 | ) 51 | 52 | def test_ambiguous_examples_analysis(self): 53 | ambiguous_dataframe = similarity_analyzer.ambiguous_examples_analysis( 54 | self.workspace_df, threshold=0.85, lang_util=self.lang_util 55 | ) 56 | self.assertEqual( 57 | len(ambiguous_dataframe[ambiguous_dataframe["similarity score"] < 0.85]), 58 | 0, 59 | "Similarity Analyzer Test fail", 60 | ) 61 | 62 | self.assertEqual( 63 | len( 64 | np.intersect1d( 65 | ambiguous_dataframe["Intent1"], ambiguous_dataframe["Intent2"] 66 | ) 67 | ), 68 | 0, 69 | "Similarity Analyzer Test fail", 70 | ) 71 | 72 | def tearDown(self): 73 | unittest.TestCase.tearDown(self) 74 | 75 | 76 | if __name__ == "__main__": 77 | unittest.main() 78 | -------------------------------------------------------------------------------- /tests/term_analysis/test_entity_analyzer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | import pandas as pd 4 | 5 | from assistant_skill_analysis.utils import skills_util, lang_utils 6 | from assistant_skill_analysis.term_analysis import entity_analyzer 7 | 8 | 9 | class TestChi2Analyzer(unittest.TestCase): 10 | """ 11 | Test for Chi2 Analyzer module 12 | """ 13 | 14 | @classmethod 15 | def setUpClass(cls): 16 | test_skill_file = ( 17 | "tests/resources/test_workspaces/skill-Customer-Care-Sample.json" 18 | ) 19 | with open(test_skill_file, "r") as skill_file: 20 | ( 21 | workspace_data, 22 | workspace_vocabulary, 23 | _, 24 | _, 25 | ) = skills_util.extract_workspace_data( 26 | json.load(skill_file), lang_utils.LanguageUtility("en") 27 | ) 28 | cls.workspace_df = pd.DataFrame(workspace_data) 29 | cls.mock_test_result = pd.DataFrame( 30 | { 31 | "correct_intent": ["intent1", "intent2"], 32 | "entities": [ 33 | [ 34 | {"entity": "entity1", "confidence": 1}, 35 | {"entity": "entity2", "confidence": 1}, 36 | ], 37 | [{"entity": "entity1", "confidence": 0.5}], 38 | ], 39 | } 40 | ) 41 | 42 | def test_derive_entity_label_matrix(self): 43 | ( 44 | entity_feat_mat, 45 | labels, 46 | entity_avg_conf, 47 | ) = entity_analyzer._derive_entity_label_matrix( 48 | self.mock_test_result, ["entity1", "entity2"] 49 | ) 50 | self.assertEqual(entity_feat_mat[1][1], 0, "test for entity analyzer fail") 51 | self.assertEqual( 52 | entity_avg_conf["entity1"], 0.75, "test for entity analyzer fail" 53 | ) 54 | 55 | def test_entity_label_correlation_analysis(self): 56 | entity = {"entities": [{"entity": "entity1"}, {"entity": "entity2"}]} 57 | entities_list = [item["entity"] for item in entity["entities"]] 58 | entity_label_df = entity_analyzer.entity_label_correlation_analysis( 59 | self.mock_test_result, entities_list, p_value=1 60 | ) 61 | self.assertEqual( 62 | entity_label_df.iloc[0]["Correlated Entities"], 63 | "entity2", 64 | "test for entity analyzer fail", 65 | ) 66 | 67 | def tearDown(self): 68 | unittest.TestCase.tearDown(self) 69 | 70 | 71 | if __name__ == "__main__": 72 | unittest.main() 73 | -------------------------------------------------------------------------------- /tests/utils/test_skills_util.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | 4 | from assistant_skill_analysis.utils import skills_util, lang_utils 5 | 6 | 7 | class TestSkillsUtil(unittest.TestCase): 8 | """Test for skills utils module""" 9 | 10 | @classmethod 11 | def setUpClass(cls): 12 | cls.skill_file = open( 13 | "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r" 14 | ) 15 | cls.action_skill_file = open( 16 | "tests/resources/test_workspaces/customer_care_sample_action_skill.json", 17 | "r", 18 | ) 19 | cls.lang_util = lang_utils.LanguageUtility("en") 20 | 21 | def test_extract_action_workspace_data(self): 22 | skill_json = json.load(self.action_skill_file) 23 | ( 24 | workspace_pd, 25 | workspace_vocabulary, 26 | entities, 27 | intent_action_map, 28 | ) = skills_util.extract_workspace_data(skill_json, self.lang_util) 29 | 30 | self.assertTrue(workspace_pd is not None, "Extract workspace failed") 31 | self.assertEqual( 32 | len(workspace_pd["intent"].unique()), 7, "Extract workspace failed" 33 | ) 34 | 35 | # check correct number of entities parsed 36 | self.assertEqual(7, len(entities)) 37 | 38 | # check intent to action mapping working expectedly 39 | self.assertEqual( 40 | "Where are you located?", intent_action_map["action_11419_intent_44259"] 41 | ) 42 | self.assertEqual("Thank you", intent_action_map["action_12038_intent_13364"]) 43 | self.assertEqual("Goodbye", intent_action_map["action_22890_intent_48257"]) 44 | self.assertEqual( 45 | "Schedule An Appointment", intent_action_map["action_27164_intent_22860"] 46 | ) 47 | self.assertEqual( 48 | "What are your hours?", intent_action_map["action_33190_intent_33203"] 49 | ) 50 | self.assertEqual( 51 | "What can I do?", intent_action_map["action_5042_intent_38841"] 52 | ) 53 | self.assertEqual("Fallback", intent_action_map["fallback_connect_to_agent"]) 54 | 55 | def test_extract_workspace_data(self): 56 | skill_json = json.load(self.skill_file) 57 | workspace_pd, workspace_vocabulary, _, _ = skills_util.extract_workspace_data( 58 | skill_json, self.lang_util 59 | ) 60 | self.assertTrue(workspace_pd is not None, "Extract workspace failed") 61 | self.assertEqual( 62 | len(workspace_pd["intent"].unique()), 9, "Extract workspace failed" 63 | ) 64 | 65 | @classmethod 66 | def tearDownClass(cls): 67 | cls.skill_file.close() 68 | cls.action_skill_file.close() 69 | 70 | 71 | if __name__ == "__main__": 72 | unittest.main() 73 | -------------------------------------------------------------------------------- /test_set.csv: -------------------------------------------------------------------------------- 1 | "many thanks","Thanks" 2 | "thank you","Thanks" 3 | "ciao","Goodbye" 4 | "see ya","Goodbye" 5 | "Are you open on sundays, and if so what are the hours?","Customer_Care_Store_Hours" 6 | "What are the hours of operation?","Customer_Care_Store_Hours" 7 | "what are your hours","Customer_Care_Store_Hours" 8 | "What time do you close today","Customer_Care_Store_Hours" 9 | "Does the store in the city center opens till 8pm on weekends?","Customer_Care_Store_Hours" 10 | "how early do you open on Saturdays","Customer_Care_Store_Hours" 11 | "Are you open on Sunday","Customer_Care_Store_Hours" 12 | "What time are you closing today?","Customer_Care_Store_Hours" 13 | "When can I meet with one of your employees at your store?","Customer_Care_Appointments" 14 | "I prefer a face to face visit","Customer_Care_Appointments" 15 | "can you make an appointment for me","Customer_Care_Appointments" 16 | "Set up an appt","Customer_Care_Appointments" 17 | "what is the address","Customer_Care_Store_Location" 18 | "I want to know about a store","Customer_Care_Store_Location" 19 | "Find store","Customer_Care_Store_Location" 20 | "give me directions","Customer_Care_Store_Location" 21 | "where are you","Customer_Care_Store_Location" 22 | "i changed my mind","Cancel" 23 | "cancel the request","Cancel" 24 | "can you help","Help" 25 | "help","Help" 26 | "Hey there","General_Greetings" 27 | "Who is this?","General_Greetings" 28 | "Looking good eve","General_Greetings" 29 | "How is it going?","General_Greetings" 30 | "Hi advisor","General_Greetings" 31 | "Hey twin","General_Greetings" 32 | "Can I talk to someone?","General_Connect_to_Agent" 33 | "Please connect me to a live agent","General_Connect_to_Agent" 34 | "Where is the closest agent?","General_Connect_to_Agent" 35 | "Please assist me to get to an agent","General_Connect_to_Agent" 36 | "Can I speak with somebody?","General_Connect_to_Agent" 37 | "Do not want a robot?","General_Connect_to_Agent" 38 | "I want to speak to a human","General_Connect_to_Agent" 39 | "representative","General_Connect_to_Agent" 40 | "Can I connect to an agent?","General_Connect_to_Agent" 41 | "Can you please transition my conversation to a human","General_Connect_to_Agent" 42 | "Is the store going to be open tomorrow","Customer_Care_Store_Hours" 43 | "Can you arrange for me to meet at your closest store","Customer_Care_Appointments" 44 | "see you tomorrow","Goodbye" 45 | "I dont think I want to do that","Cancel" 46 | "Hello you silly bot","General_Greetings" 47 | "Can you tell me a good joke","SYSTEM_OUT_OF_DOMAIN" 48 | "What is your IQ","SYSTEM_OUT_OF_DOMAIN" 49 | "Luke, I am your father","SYSTEM_OUT_OF_DOMAIN" 50 | "Where did Betty buy her butter?","SYSTEM_OUT_OF_DOMAIN" 51 | "How many engineers does it take to change a lightbulb?","SYSTEM_OUT_OF_DOMAIN" 52 | "Can you help me change my account password?","SYSTEM_OUT_OF_DOMAIN" 53 | "What is a way to change my account address?","SYSTEM_OUT_OF_DOMAIN" 54 | -------------------------------------------------------------------------------- /tests/utils/test_lang_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from assistant_skill_analysis.utils.lang_utils import LanguageUtility 3 | 4 | 5 | class TestLangUtils(unittest.TestCase): 6 | """Test for lang utils module""" 7 | 8 | @classmethod 9 | def setUpClass(cls): 10 | cls.skill_file = open( 11 | "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r" 12 | ) 13 | 14 | def test_en(self): 15 | util = LanguageUtility("en") 16 | sent = util.preprocess("boston resided on the east coast of us!") 17 | self.assertEqual(sent, "boston resided on the east coast of us ") 18 | sent = util.tokenize(sent) 19 | self.assertEqual( 20 | sent, ["boston", "resid", "on", "the", "east", "coast", "of", "us"] 21 | ) 22 | 23 | def test_fr(self): 24 | util = LanguageUtility("fr") 25 | sent = util.preprocess("ils se donnèrent") 26 | self.assertEqual(sent, "ils se donnerent") 27 | sent = util.tokenize(sent) 28 | self.assertEqual(sent, ["il", "se", "donnerent"]) 29 | 30 | def test_cs(self): 31 | util = LanguageUtility("cs") 32 | sent = util.preprocess("ils se donnèrent") 33 | sent = util.tokenize(sent) 34 | self.assertEqual(sent, ["ils", "se", "donnerent"]) 35 | 36 | def test_pt(self): 37 | util = LanguageUtility("pt") 38 | sent = util.preprocess("ils se donnèrent") 39 | self.assertEqual(sent, "ils se donnerent") 40 | sent = util.tokenize(sent) 41 | self.assertEqual(sent, ["ils", "se", "donnerent"]) 42 | 43 | def test_it(self): 44 | util = LanguageUtility("it") 45 | sent = util.preprocess("pronuncerà") 46 | self.assertEqual(sent, "pronuncerà") 47 | sent = util.tokenize(sent) 48 | self.assertEqual(sent, ["pronunc"]) 49 | 50 | def test_es(self): 51 | util = LanguageUtility("es") 52 | sent = util.preprocess("toreándolo") 53 | self.assertEqual(sent, "toreandolo") 54 | sent = util.tokenize(sent) 55 | self.assertEqual(sent, ["tor"]) 56 | 57 | def test_de(self): 58 | util = LanguageUtility("de") 59 | sent = util.preprocess("Autobahnen") 60 | self.assertEqual(sent, "autobahnen") 61 | sent = util.tokenize(sent) 62 | self.assertEqual(sent, ["autobahn"]) 63 | 64 | def test_zh_cn(self): 65 | util = LanguageUtility("zh-cn") 66 | sent = util.preprocess("不想当兼职") 67 | self.assertEqual(sent, "不想当兼职") 68 | sent = util.tokenize(sent) 69 | self.assertEqual(sent, ['不想', '当', '兼职']) 70 | 71 | def test_zh_tw(self): 72 | util = LanguageUtility("zh-tw") 73 | sent = util.preprocess("畀到機會我嘗試") 74 | self.assertEqual(sent, "畀到機會我嘗試") 75 | sent = util.tokenize(sent) 76 | self.assertEqual(sent, ['畀', '到', '機會', '我', '嘗試']) 77 | 78 | def tearDown(self): 79 | unittest.TestCase.tearDown(self) 80 | self.skill_file.close() 81 | 82 | 83 | if __name__ == "__main__": 84 | unittest.main() 85 | -------------------------------------------------------------------------------- /tests/term_analysis/test_keyword_analyzer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from assistant_skill_analysis.utils import skills_util, lang_utils 7 | from assistant_skill_analysis.term_analysis import keyword_analyzer 8 | 9 | 10 | class TestKeywordAnalyzer(unittest.TestCase): 11 | """ 12 | Test for Key Word Analyzer module 13 | """ 14 | 15 | @classmethod 16 | def setUpClass(cls): 17 | cls.lang_util = lang_utils.LanguageUtility("en") 18 | test_skill_file = ( 19 | "tests/resources/test_workspaces/skill-Customer-Care-Sample.json" 20 | ) 21 | with open(test_skill_file, "r") as skill_file: 22 | ( 23 | workspace_data, 24 | workspace_vocabulary, 25 | _, 26 | _, 27 | ) = skills_util.extract_workspace_data(json.load(skill_file), cls.lang_util) 28 | cls.workspace_df = pd.DataFrame(workspace_data) 29 | cls.test_data = pd.DataFrame( 30 | { 31 | "utterance": [ 32 | "Boston is the capital city of massachusetts ", 33 | "Boston Celtics is a famous NBA team", 34 | "new york is a big city in the east coast", 35 | ], 36 | "intent": ["boston", "boston", "nyc"], 37 | } 38 | ) 39 | 40 | def test_get_counts_per_label(self): 41 | counts = keyword_analyzer._get_counts_per_label(self.test_data, self.lang_util) 42 | self.assertEqual( 43 | ("boston", "Celtics") in counts.index.tolist(), 44 | True, 45 | "Key word analyzer test fails", 46 | ) 47 | self.assertEqual( 48 | ("nyc", "coast") in counts.index.tolist(), 49 | True, 50 | "Key word analyzer test fails", 51 | ) 52 | self.assertEqual( 53 | ("boston", "is") in counts.index.tolist(), 54 | False, 55 | "Key word analyzer test fails", 56 | ) 57 | 58 | def test_get_top_n(self): 59 | counts = keyword_analyzer._get_counts_per_label(self.test_data, self.lang_util) 60 | top_n = keyword_analyzer._get_top_n(counts["n_w"], top_n=4) 61 | labels = [item for (item, _) in top_n.index.tolist() if item == "boston"] 62 | self.assertEqual(len(labels), 4, "Key word analyzer test fails") 63 | 64 | def test_preprocess_for_heat_map(self): 65 | counts, top_counts = keyword_analyzer._preprocess_for_heat_map( 66 | self.workspace_df, 67 | label_for_display=30, 68 | max_token_display=30, 69 | class_list=None, 70 | lang_util=self.lang_util, 71 | ) 72 | unique_counts = len(counts.index.get_level_values(0).unique()) 73 | actual_labels_shown = np.int_(np.ceil(30 / unique_counts)) * unique_counts 74 | self.assertEqual( 75 | len(top_counts) == actual_labels_shown, True, "Key word analyzer test fails" 76 | ) 77 | 78 | def tearDown(self): 79 | unittest.TestCase.tearDown(self) 80 | 81 | 82 | if __name__ == "__main__": 83 | unittest.main() 84 | -------------------------------------------------------------------------------- /tests/resources/test_workspaces/customer_care_skill_test.tsv: -------------------------------------------------------------------------------- 1 | many thanks Thanks 2 | thank you Thanks 3 | ciao Goodbye 4 | see ya Goodbye 5 | Are you open on sundays, and if so what are the hours? Customer_Care_Store_Hours 6 | What are the hours of operation? Customer_Care_Store_Hours 7 | what are your hours Customer_Care_Store_Hours 8 | What time do you close today Customer_Care_Store_Hours 9 | Does the store in the city center opens till 8pm on weekends? Customer_Care_Store_Hours 10 | how early do you open on Saturdays Customer_Care_Store_Hours 11 | Are you open on Sunday Customer_Care_Store_Hours 12 | What time are you closing today? Customer_Care_Store_Hours 13 | When can I meet with one of your employees at your store? Customer_Care_Appointments 14 | I prefer a face to face visit Customer_Care_Appointments 15 | can you make an appointment for me Customer_Care_Appointments 16 | Set up an appt Customer_Care_Appointments 17 | what is the address Customer_Care_Store_Location 18 | I want to know about a store Customer_Care_Store_Location 19 | Find store Customer_Care_Store_Location 20 | give me directions Customer_Care_Store_Location 21 | where are you Customer_Care_Store_Location 22 | i changed my mind Cancel 23 | cancel the request Cancel 24 | can you help Help 25 | help Help 26 | Hey there General_Greetings 27 | Who is this? General_Greetings 28 | Looking good eve General_Greetings 29 | How is it going? General_Greetings 30 | Hi advisor General_Greetings 31 | Hey twin General_Greetings 32 | Can I talk to someone? General_Connect_to_Agent 33 | Please connect me to a live agent General_Connect_to_Agent 34 | Where is the closest agent? General_Connect_to_Agent 35 | Please assist me to get to an agent General_Connect_to_Agent 36 | Can I speak with somebody? General_Connect_to_Agent 37 | Do not want a robot? General_Connect_to_Agent 38 | I want to speak to a human General_Connect_to_Agent 39 | representative General_Connect_to_Agent 40 | Can I connect to an agent? General_Connect_to_Agent 41 | Send me to an agent General_Connect_to_Agent 42 | there are now four references including britannica giving the correct date which is getting silly you complain in your edit summary that you can t find them so i ve added quotes to two to help 43 | i am simply relating that the research and professional organizations are essentially unanimous in indicating that these laws are not having the intended effects and that they may even be making the 44 | um taking a shot here um lets say three separate people whos wills are to each other 45 | must be said you are by far the most disrespectful malicious destructive user in all of wikipedia it s people like you who abuse their power that make this site defunct or obsolete 46 | august utc a simple you re wrong would have sufficed no need to try and make me look like an idiot 47 | that is ridiculous unless there s a good and non disingenuous response i would absolutely agree with you blocking indef outright falsifying sources should simply never be tolerated 48 | i moved this from quebec diaspora to this title because disapora is a description applicable only to jew s 49 | knock it off you bloody cwi trot 50 | no he is an arrogant self serving immature idiot get it right 51 | ices could hardly be labelled as a very obviously pro seal hunting organization the scientific base of seal management is the responsibility of ices covered out by the most prominent seal research... 52 | -------------------------------------------------------------------------------- /assistant_skill_analysis/data_analysis/similarity_analyzer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from sklearn.feature_extraction.text import CountVectorizer 5 | from IPython.display import display, Markdown, HTML 6 | 7 | 8 | def ambiguous_examples_analysis(workspace_pd, lang_util, threshold=0.7): 9 | """ 10 | Analyze the test workspace and find out similar utterances that belongs to different intent 11 | :param workspace_pd: pandas dataframe in format of [utterance,label] 12 | :param threshold: cut off for similarity score 13 | :return: pands dataframe in format of ['Intent1', 'Utterance1', 'Intent2', 'Utterance2', 14 | 'similarity score'] 15 | """ 16 | # first create the feature matrix 17 | vectorizer = CountVectorizer(ngram_range=(1, 2), tokenizer=lang_util.tokenize) 18 | workspace_bow = vectorizer.fit_transform(workspace_pd["utterance"]).todense() 19 | cos_sim_score_matrix = _calculate_cosine_similarity(workspace_bow) 20 | 21 | # remove the lower triangle of the matrix and apply threshold 22 | similar_utterance_index = np.argwhere( 23 | (cos_sim_score_matrix - np.tril(cos_sim_score_matrix)) > threshold 24 | ) 25 | similar_utterance_pd = pd.DataFrame( 26 | columns=["Intent1", "Utterance1", "Intent2", "Utterance2", "similarity score"] 27 | ) 28 | 29 | for index in similar_utterance_index: 30 | if ( 31 | workspace_pd["intent"].iloc[index[0]] 32 | != workspace_pd["intent"].iloc[index[1]] 33 | ): 34 | intent1 = workspace_pd["intent"].iloc[index[0]] 35 | utterance1 = workspace_pd["utterance"].iloc[index[0]] 36 | intent2 = workspace_pd["intent"].iloc[index[1]] 37 | utterance2 = workspace_pd["utterance"].iloc[index[1]] 38 | score = cos_sim_score_matrix[index[0], index[1]] 39 | temp_pd = pd.DataFrame( 40 | { 41 | "Intent1": [intent1], 42 | "Utterance1": [utterance1], 43 | "Intent2": [intent2], 44 | "Utterance2": [utterance2], 45 | "similarity score": [score], 46 | } 47 | ) 48 | similar_utterance_pd = similar_utterance_pd.append( 49 | temp_pd, ignore_index=True 50 | ) 51 | 52 | if not similar_utterance_pd.empty: 53 | with pd.option_context("max_colwidth", 250): 54 | display( 55 | HTML( 56 | similar_utterance_pd.sort_values( 57 | by=["similarity score"], ascending=False 58 | ).to_html(index=False) 59 | ) 60 | ) 61 | else: 62 | display(Markdown("### There are no similar utterances within different Intent")) 63 | 64 | return similar_utterance_pd 65 | 66 | 67 | def _calculate_cosine_similarity(workspace_bow): 68 | """ 69 | Given bow representation of the workspace utterance, calculate cosine similarity score 70 | :param workspace_bow: dense representation of BOW of workspace utterances 71 | :return: cosine_similarity_matrix 72 | """ 73 | # normalized and calculate cosine similarity 74 | workspace_bow = workspace_bow / np.linalg.norm(workspace_bow, axis=1, keepdims=True) 75 | cosine_similarity_matrix = workspace_bow.dot(np.transpose(workspace_bow)) 76 | return cosine_similarity_matrix 77 | -------------------------------------------------------------------------------- /assistant_skill_analysis/term_analysis/entity_analyzer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.feature_selection import chi2 4 | 5 | N = 5 6 | 7 | 8 | def _derive_entity_label_matrix(train_full_results, entities): 9 | """ 10 | Derive entity feature matrix for chi2 anaylsis using entity annotations from message api 11 | :param train_full_results: pandas data frame outputed by inference 12 | :param entities: list of entities that is defined in the workspace 13 | :return entity_feature_matrix: numpy matrix of examples with entities x number of entities 14 | :return labels: numpy array: number of labels correspond to number of examples 15 | :return entity_average_confidence_dict: dict entity --> average confidence for entity 16 | """ 17 | entity_feature_matrix = list() 18 | labels = list() 19 | entity_conf_dict = dict() 20 | entity_count_dict = dict() 21 | entity_average_confidence_dict = dict() 22 | for i in range(len(train_full_results)): 23 | current_result = train_full_results.iloc[i] 24 | if current_result["entities"]: 25 | # create empty feature vector 26 | current_feature = [0] * len(entities) 27 | for entity_reference in current_result["entities"]: 28 | e_ref = entity_reference["entity"] 29 | e_conf = entity_reference["confidence"] 30 | 31 | entity_idx = entities.index(e_ref) 32 | current_feature[entity_idx] += 1 33 | entity_conf_dict[e_ref] = entity_conf_dict.get(e_ref, 0) + e_conf 34 | entity_count_dict[e_ref] = entity_count_dict.get(e_ref, 0) + 1 35 | 36 | entity_feature_matrix.append(current_feature) 37 | labels.append(current_result["correct_intent"]) 38 | 39 | entity_feature_matrix = np.array(entity_feature_matrix) 40 | labels = np.array(labels) 41 | for key in entity_conf_dict: 42 | entity_average_confidence_dict[key] = ( 43 | entity_conf_dict[key] / entity_count_dict[key] 44 | ) 45 | 46 | return entity_feature_matrix, labels, entity_average_confidence_dict 47 | 48 | 49 | def entity_label_correlation_analysis(train_full_results, entities_list, p_value=0.05): 50 | """ 51 | Apply chi2 analysis on entities of the training set 52 | :param train_full_results: pandas data frame output by inference 53 | :param entities_list: the list of entities that is defined in the workspace 54 | :param p_value: threshold for chi2 analysis 55 | :return entity_label_df: pandas df with col 1 being intents and col 2 entities 56 | """ 57 | ( 58 | entity_feature_matrix, 59 | labels, 60 | entity_average_confidence_dict, 61 | ) = _derive_entity_label_matrix(train_full_results, entities_list) 62 | entities_list = np.array(entities_list) 63 | unique_labels = list(set(labels)) 64 | final_labels = list() 65 | final_entities = list() 66 | 67 | for label in unique_labels: 68 | chi2_statistics, pval = chi2(entity_feature_matrix, labels == label) 69 | temp_entities_list = entities_list[pval < p_value] 70 | chi2_statistics = chi2_statistics[pval < p_value] 71 | ordered_entities = temp_entities_list[np.argsort(chi2_statistics)] 72 | if len(ordered_entities) == 0: 73 | continue 74 | 75 | final_labels.append(label) 76 | final_entities.append(", ".join(ordered_entities[-N:])) 77 | 78 | entity_label_df = pd.DataFrame( 79 | {"Intent": final_labels, "Correlated Entities": final_entities} 80 | ) 81 | 82 | return entity_label_df 83 | -------------------------------------------------------------------------------- /scripts/train_test_split.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import copy 4 | import argparse 5 | import json 6 | import numpy as np 7 | 8 | OFFTOPIC_LABEL = 'SYSTEM_OUT_OF_DOMAIN' 9 | 10 | def stratified_sampling(workspace, sampling_percentage=.8): 11 | """ 12 | Perform stratified sampling on the workspace json 13 | :param workspace: json acceptable by watson assistant 14 | :param sampling_percentage: percentage of total to use for train 15 | """ 16 | 17 | train_workspace_data = copy.deepcopy(workspace) # copy everything except the intent list 18 | train_workspace_data['name'] = workspace['name'] + '_train' 19 | train_workspace_data.pop('intents') 20 | 21 | train_workspace_intent_list = list() 22 | test_workspace_data = list() 23 | 24 | for i in range(len(workspace['intents'])): 25 | intent = workspace['intents'][i] 26 | 27 | cutoff, sampling_index = find_split_cut_off( 28 | intent['examples'], sampling_percentage) 29 | 30 | # train set 31 | train_examples = [ 32 | intent['examples'][index] for index in sampling_index[:cutoff]] 33 | train_workspace_intent_list.append({'intent': workspace['intents'][i]['intent']}) 34 | train_workspace_intent_list[i].update({"description": "string"}) 35 | train_workspace_intent_list[i].update({"examples": train_examples}) 36 | 37 | # test set 38 | test_examples = [ 39 | intent['examples'][index] for index in sampling_index[cutoff:]] 40 | test_workspace_data.extend( 41 | [utterances['text'] + '\t' + 42 | workspace['intents'][i]['intent'] for utterances in test_examples]) 43 | train_workspace_data['intents'] = train_workspace_intent_list 44 | 45 | # counter examples 46 | if len(workspace['counterexamples']) > 0: 47 | train_workspace_data.pop('counterexamples') 48 | # train 49 | cutoff, sampling_index = find_split_cut_off( 50 | workspace['counterexamples'], sampling_percentage) 51 | train_workspace_data['counterexamples'] = [ 52 | workspace['counterexamples'][index] for index in sampling_index[:cutoff]] 53 | # test 54 | test_workspace_data.extend( 55 | [workspace['counterexamples'][index]['text'] + '\t' + 56 | OFFTOPIC_LABEL for index in sampling_index[cutoff:]]) 57 | 58 | return train_workspace_data, test_workspace_data 59 | 60 | def find_split_cut_off(enumerable, sampling_percentage): 61 | """ 62 | Find split cutoff point 63 | :param enumerable: 64 | :param sampling_percentage: 65 | """ 66 | sampling_index = list(np.arange(len(enumerable))) 67 | random.shuffle(sampling_index) 68 | 69 | if len(enumerable) * (1 - sampling_percentage) < 1: 70 | cutoff = -1 71 | else: 72 | cutoff = int(np.ceil(sampling_percentage * len(sampling_index))) 73 | 74 | return cutoff, sampling_index 75 | 76 | def main(args): 77 | workspace_data = json.load(open(args.input_data, 'r')) 78 | train_workspace_data, test_workspace_data = stratified_sampling(workspace_data, args.percentage) 79 | output_name = os.path.basename(args.input_data).replace('.json','') 80 | with open(os.path.join(args.output_folder, output_name+'_train.json'),'w',encoding='utf-8') as file: 81 | json.dump(train_workspace_data, file) 82 | with open(os.path.join(args.output_folder, output_name+'_test.tsv'), 'w', encoding='utf-8') as file: 83 | file.writelines([line +'\n' for line in test_workspace_data]) 84 | 85 | if __name__ == '__main__': 86 | parser = argparse.ArgumentParser(description='This script will split workspace json ') 87 | parser.add_argument('-p','--percentage', help='how much percentage of the data to keep in train', default=.8, type=float) 88 | parser.add_argument('-input', '--input_data', help='the location of the workspace json',required=True) 89 | parser.add_argument('-output','--output_folder', help='the location of the train.json and test.tsv to be saved',required=True) 90 | args = parser.parse_args() 91 | main(args) 92 | 93 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![test env](https://github.com/watson-developer-cloud/assistant-dialog-skill-analysis/actions/workflows/ci.yaml/badge.svg?branch=master)](https://github.com/watson-developer-cloud/assistant-dialog-skill-analysis/actions/workflows/ci.yaml) 2 | 3 | # Dialog/Action Skill Analysis for Watson Assistant 4 | 5 | ## Introduction 6 | Dialog/Action Skill Analysis is a Python framework along with an easy-to-use Python notebook to help you quickly and effectively build AI assistants using Watson Assistant 7 | 8 | Whether you are new to the process and are building your first AI assistant or you’re a veteran and have an assistant working well in production, this framework is intended to help everyone with questions like... 9 | - How do I know my assistant is doing a good job? 10 | - How do I test and measure my assistant’s performance? 11 | - Why is the assistant responding incorrectly to this question? 12 | - How do I improve my assistant’s ability to understand questions? 13 | 14 | Currently Supported Languages: en, fr, cs, de, es, it, pt, nl, zh-cn, zh-tw 15 | 16 | ## Usage 17 | If you clone the notebook from this repository locally, please use the steps below. For usage in Watson studio, please refer to the 18 | section below on [Hosted Python Notebook](#hosted-python-notebook) 19 | 20 | 21 | 1. Assumes familiarity using a Python Jupyter notebook 22 | 2. Assumes a Python 3.9 or greater environment (We recommend using Anaconda to setup a python environment) 23 | 3. Install dependencies with `pip install -r requirements.txt` 24 | 4. Start jupyter server with `jupyter notebook` 25 | 5. Run `classic_dialog_skill_analysis.ipynb` or `new_experience_skill_analysis.ipynb` to start session 26 | 27 | ## Update: March 15th 2023 28 | `new_experience_skill_analysis.ipynb` for dialog or action analysis under new Watson Assistant experience is available. 29 | 30 | 31 | ## Guides 32 | An introduction to Dialog Skill Analysis on Medium: [Announcing Dialog Skill Analysis](https://medium.com/ibm-watson/announcing-dialog-skill-analysis-for-watson-assistant-83cdfb968178?) 33 | 34 | ## Hosted Python Notebook 35 | For those of you familiar with IBM Watson Studio, we have also made a hosted version of the notebook available on the IBM Gallery: [Hosted Dialog Skill Analysis Notebook](https://dataplatform.cloud.ibm.com/exchange/public/entry/view/4d77701840fcb2f21587e39fdb887049) 36 | 37 | ## Current Contributor List 38 | 39 | | | | | 40 | :-------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------:| 41 | Haode Qi Avatar
[Haode Qi](https://github.com/haodeqi) | Cheng Qian Avatar
[Cheng Qian](https://github.com/tsinggggg) | Ladislav Kunc Avatar
[Ladislav Kunc](https://github.com/lada-kunc) | 42 | 43 | 44 | ## Previous Contributor List 45 | 46 | | | | | 47 | :-------------------------:|:-------------------------:|:-------------------------:| 48 | Panos Karagiannis Avatar
[Panos Karagiannis](https://github.com/apropos13) | Ming Tan Avatar
[Ming Tan](https://github.com/mingtan888) | 49 | Navneet Rao Avatar
[Navneet Rao](https://github.com/navneetrao)| Yang Yu Avatar
[Yang Yu](https://github.com/yangyuphd) | 50 | -------------------------------------------------------------------------------- /assistant_skill_analysis/term_analysis/keyword_analyzer.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import pandas as pd 3 | import numpy as np 4 | from IPython.display import display, Markdown 5 | import matplotlib.pyplot as plt 6 | import seaborn as sns 7 | import nltk 8 | from ..utils import skills_util 9 | 10 | 11 | def _preprocess_for_heat_map( 12 | workspace_df, lang_util, label_for_display=30, max_token_display=30, class_list=None 13 | ): 14 | """ 15 | Preprocess dataframe for heat map visualization 16 | :param workspace_df: 17 | :param label_for_display: 18 | :param max_token_display: 19 | :param class_list: 20 | """ 21 | label_frequency_dict = dict(Counter(workspace_df["intent"]).most_common()) 22 | if class_list: 23 | workspace_subsampled = workspace_df[workspace_df["intent"].isin(class_list)] 24 | counts = _get_counts_per_label( 25 | workspace_subsampled, lang_util=lang_util, unigrams_col_name="unigrams" 26 | ) 27 | else: 28 | if len(label_frequency_dict) > label_for_display: 29 | top_30_labels = list(label_frequency_dict.keys())[:label_for_display] 30 | workspace_subsampled = workspace_df[ 31 | workspace_df["intent"].isin(top_30_labels) 32 | ] 33 | counts = _get_counts_per_label( 34 | workspace_subsampled, lang_util=lang_util, unigrams_col_name="unigrams" 35 | ) 36 | else: 37 | counts = _get_counts_per_label( 38 | workspace_df, lang_util=lang_util, unigrams_col_name="unigrams" 39 | ) 40 | 41 | max_n = int( 42 | np.ceil(max_token_display / len(counts.index.get_level_values(0).unique())) 43 | ) 44 | top_counts = _get_top_n(counts["n_w"], top_n=max_n) 45 | return counts, top_counts 46 | 47 | 48 | def _get_counts_per_label(training_data, lang_util, unigrams_col_name="unigrams"): 49 | """ 50 | Create a new dataframe to store unigram counts for each label 51 | :param training_data: pandas df 52 | :param unigrams_col_name: name of unigrams column name 53 | :return counts: dataframe that contains the counts for all unigrams per label 54 | """ 55 | training_data[unigrams_col_name] = training_data["utterance"].apply( 56 | nltk.word_tokenize 57 | ) 58 | rows = list() 59 | for row in training_data[["intent", unigrams_col_name]].iterrows(): 60 | r = row[1] 61 | for word in r.unigrams: 62 | rows.append((r.intent, word)) 63 | 64 | words = pd.DataFrame(rows, columns=["intent", "word"]) 65 | # delete all empty words and chars 66 | words = words[words.word.str.len() > 1] 67 | # delete stopwords 68 | words = words.loc[~words["word"].isin(lang_util.stop_words)] 69 | # get counts per word 70 | counts = ( 71 | words.groupby("intent") 72 | .word.value_counts() 73 | .to_frame() 74 | .rename(columns={"word": "n_w"}) 75 | ) 76 | return counts 77 | 78 | 79 | def _get_top_n(series, top_n=5, index_level=0): 80 | """ 81 | Get most frequent words per label 82 | :param series: product of a call to get_counts_per_label 83 | :param top_n: integer signifying the number of most frequent tokens per class 84 | :param index_level: index to group by 85 | :return df: dataframe that contains the top_n unigrams per label 86 | """ 87 | return ( 88 | series.groupby(level=index_level) 89 | .nlargest(top_n) 90 | .reset_index(level=index_level, drop=True) 91 | ) 92 | 93 | 94 | def seaborn_heatmap( 95 | workspace_df, lang_util, label_for_display=30, max_token_display=30, class_list=None 96 | ): 97 | """ 98 | Create heat map of word frequencies per intent 99 | :param workspace_df: 100 | :param label_for_display: 101 | :param max_token_display: 102 | :param class_list: 103 | """ 104 | counts, top_counts = _preprocess_for_heat_map( 105 | workspace_df, lang_util, label_for_display, max_token_display, class_list 106 | ) 107 | reset_groupby = counts.reset_index() 108 | most_frequent_words = top_counts.reset_index()["word"].unique() 109 | table_format = reset_groupby.pivot(index="word", columns="intent", values="n_w") 110 | table_format = ( 111 | table_format[table_format.index.isin(most_frequent_words)] 112 | .fillna(0) 113 | .astype("int32") 114 | ) 115 | display( 116 | Markdown('##

Token Frequency per Intent

') 117 | ) 118 | fig, ax = plt.subplots(figsize=(20, 20)) 119 | 120 | sns.heatmap(table_format, annot=True, fmt="d", linewidths=0.1, cmap="PuBu", ax=ax) 121 | plt.ylabel("Token", fontdict=skills_util.LABEL_FONT) 122 | plt.xlabel("Intent", fontdict=skills_util.LABEL_FONT) 123 | -------------------------------------------------------------------------------- /assistant_skill_analysis/data_analysis/summary_generator.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import numpy as np 3 | import pandas as pd 4 | import seaborn as sns 5 | from IPython.display import Markdown, display 6 | from matplotlib import pyplot as plt 7 | from ..utils.skills_util import LABEL_FONT 8 | 9 | 10 | def generate_summary_statistics(data, entities_list=None): 11 | """ 12 | Take the workspace dictionary and display summary statistics regarding the workspace 13 | :param data: 14 | :param entities_list: 15 | :return: 16 | """ 17 | 18 | total_examples = len(data["utterance"]) 19 | label_frequency = Counter(data["intent"]).most_common() 20 | number_of_labels = len(label_frequency) 21 | average_example_per_intent = np.average(list(dict(label_frequency).values())) 22 | standard_deviation_of_intent = np.std(list(dict(label_frequency).values())) 23 | 24 | characteristics = list() 25 | characteristics.append(["Total User Examples", total_examples]) 26 | characteristics.append(["Unique Intents", number_of_labels]) 27 | characteristics.append( 28 | ["Average User Examples per Intent", int(np.around(average_example_per_intent))] 29 | ) 30 | characteristics.append( 31 | [ 32 | "Standard Deviation from Average", 33 | int(np.around(standard_deviation_of_intent)), 34 | ] 35 | ) 36 | if entities_list: 37 | characteristics.append(["Total Number of Entities", len(entities_list)]) 38 | else: 39 | characteristics.append(["Total Number of Entities", 0]) 40 | 41 | df = pd.DataFrame(data=characteristics, columns=["Data Characteristic", "Value"]) 42 | df.index = np.arange(1, len(df) + 1) 43 | display(Markdown("### Summary Statistics")) 44 | display(df) 45 | 46 | 47 | def show_user_examples_per_intent(data): 48 | """ 49 | Take the workspace dictionary and display summary statistics regarding the workspace 50 | :param data: 51 | :return: 52 | """ 53 | 54 | label_frequency = Counter(data["intent"]).most_common() 55 | frequencies = list(reversed(label_frequency)) 56 | df = pd.DataFrame(data=frequencies, columns=["Intent", "Number of User Examples"]) 57 | df.index = np.arange(1, len(df) + 1) 58 | display(Markdown("### Sorted Distribution of User Examples per Intent")) 59 | display(df) 60 | 61 | 62 | def scatter_plot_intent_dist(workspace_pd): 63 | """ 64 | takes the workspace_pd and generate a scatter distribution of the intents 65 | :param workspace_pd: 66 | :return: 67 | """ 68 | 69 | label_frequency = Counter(workspace_pd["intent"]).most_common() 70 | frequencies = list(reversed(label_frequency)) 71 | counter_list = list(range(1, len(frequencies) + 1)) 72 | df = pd.DataFrame(data=frequencies, columns=["Intent", "Number of User Examples"]) 73 | df["Intent"] = counter_list 74 | 75 | sns.set(rc={"figure.figsize": (15, 10)}) 76 | display( 77 | Markdown( 78 | '##

Sorted Distribution of User Examples \ 79 | per Intent

' 80 | ) 81 | ) 82 | 83 | plt.ylabel("Number of User Examples", fontdict=LABEL_FONT) 84 | plt.xlabel("Intent", fontdict=LABEL_FONT) 85 | ax = sns.scatterplot(x="Intent", y="Number of User Examples", data=df, s=100) 86 | 87 | 88 | def class_imbalance_analysis(workspace_pd): 89 | """ 90 | performance class imbalance analysis on the training workspace 91 | :param workspace_pd: 92 | :return: 93 | """ 94 | 95 | label_frequency = Counter(workspace_pd["intent"]).most_common() 96 | frequencies = list(reversed(label_frequency)) 97 | min_class, min_class_len = frequencies[0] 98 | max_class, max_class_len = frequencies[-1] 99 | 100 | if max_class_len >= 2 * min_class_len: 101 | display( 102 | Markdown( 103 | "### Class Imbalance Detected \ 104 | " 105 | ) 106 | ) 107 | display( 108 | Markdown( 109 | "- Data could be potentially biased towards intents with more user \ 110 | examples" 111 | ) 112 | ) 113 | display( 114 | Markdown( 115 | "- E.g. Intent < {} > has < {} > user examples while intent < {} > has \ 116 | just < {} > user examples ".format( 117 | max_class, max_class_len, min_class, min_class_len 118 | ) 119 | ) 120 | ) 121 | flag = True 122 | else: 123 | display( 124 | Markdown( 125 | "### No Significant Class \ 126 | Imbalance Detected " 127 | ) 128 | ) 129 | display( 130 | Markdown( 131 | "- Lower chances of inherent bias in classification towards intents with \ 132 | more user examples" 133 | ) 134 | ) 135 | flag = False 136 | 137 | return flag 138 | -------------------------------------------------------------------------------- /assistant_skill_analysis/utils/lang_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from types import SimpleNamespace 4 | import sys 5 | import jieba 6 | from nltk.stem.snowball import SnowballStemmer 7 | from spacy.tokenizer import Tokenizer 8 | import unicodedata 9 | import assistant_skill_analysis 10 | 11 | 12 | SUPPORTED_LANGUAGE = ["en", "fr", "de", "cs", "es", "it", "pt", "nl", "zh-cn", "zh-tw"] 13 | PUNCTUATION = [ 14 | "\\" + chr(i) 15 | for i in range(sys.maxunicode) 16 | if unicodedata.category(chr(i)).startswith("P") 17 | ] 18 | 19 | 20 | class _JiebaTokenizerWrapper: 21 | """for zh-cn and zh-tw""" 22 | 23 | def __call__(self, *args, **kwargs): 24 | text = args[0] 25 | for token in jieba.tokenize(text): 26 | yield SimpleNamespace(text=token[0]) 27 | 28 | 29 | class LanguageUtility: 30 | def __init__(self, language_code): 31 | if language_code not in SUPPORTED_LANGUAGE: 32 | raise Exception( 33 | "language_code %s not in supported languages %s", 34 | language_code, 35 | SUPPORTED_LANGUAGE, 36 | ) 37 | self.language_code = language_code 38 | self.init_resources() 39 | 40 | def tokenize(self, sentences): 41 | tokens = list(map(lambda x: x.text, self.tokenizer(sentences))) 42 | if self.stemmer: 43 | tokens = [self.stemmer.stem(str(token)).strip() for token in tokens] 44 | tokens = [token for token in tokens if len(token) > 0] 45 | return tokens 46 | 47 | def init_resources(self): 48 | self.punctuation_pattern = re.compile("|".join(PUNCTUATION)) 49 | self.stemmer = None 50 | stopwords_path = os.path.join( 51 | os.path.dirname(assistant_skill_analysis.__file__), 52 | "resources", 53 | self.language_code, 54 | "stopwords", 55 | ) 56 | if self.language_code == "en": 57 | from spacy.lang.en import English 58 | 59 | self.tokenizer = Tokenizer(English().vocab) 60 | self.stemmer = SnowballStemmer(language="english") 61 | self.stop_words = self.load_stop_words(stopwords_path) 62 | 63 | elif self.language_code == "fr": 64 | from spacy.lang.fr import French 65 | 66 | self.tokenizer = Tokenizer(French().vocab) 67 | self.stemmer = SnowballStemmer(language="french") 68 | self.stop_words = self.load_stop_words(stopwords_path) 69 | 70 | elif self.language_code == "de": 71 | from spacy.lang.de import German 72 | 73 | self.tokenizer = Tokenizer(German().vocab) 74 | self.stemmer = SnowballStemmer(language="german") 75 | self.stop_words = self.load_stop_words(stopwords_path) 76 | 77 | elif self.language_code == "it": 78 | from spacy.lang.it import Italian 79 | 80 | self.tokenizer = Tokenizer(Italian().vocab) 81 | self.stemmer = SnowballStemmer(language="italian") 82 | self.stop_words = self.load_stop_words(stopwords_path) 83 | 84 | elif self.language_code == "cs": 85 | from spacy.lang.cs import Czech 86 | 87 | self.tokenizer = Tokenizer(Czech().vocab) 88 | self.stop_words = self.load_stop_words(stopwords_path) 89 | 90 | elif self.language_code == "pt": 91 | from spacy.lang.pt import Portuguese 92 | 93 | self.tokenizer = Tokenizer(Portuguese().vocab) 94 | self.stemmer = SnowballStemmer(language="portuguese") 95 | self.stop_words = self.load_stop_words(stopwords_path) 96 | 97 | elif self.language_code == "es": 98 | from spacy.lang.es import Spanish 99 | 100 | self.tokenizer = Tokenizer(Spanish().vocab) 101 | self.stemmer = SnowballStemmer(language="spanish") 102 | self.stop_words = self.load_stop_words(stopwords_path) 103 | 104 | elif self.language_code == "nl": 105 | from spacy.lang.nl import Dutch 106 | 107 | self.tokenizer = Tokenizer(Dutch().vocab) 108 | self.stemmer = SnowballStemmer(language="dutch") 109 | self.stop_words = self.load_stop_words(stopwords_path) 110 | 111 | elif self.language_code in ["zh-cn", "zh-tw"]: 112 | self.tokenizer = _JiebaTokenizerWrapper() 113 | self.stop_words = self.load_stop_words(stopwords_path) 114 | 115 | else: 116 | raise Exception("language code %s is not supported", self.language_code) 117 | 118 | def preprocess(self, sentence): 119 | sentence = sentence.lower() 120 | sentence = self.contraction_normalization(sentence) 121 | sentence = self.strip_punctuations(sentence) 122 | if self.language_code in ["fr", "es", "cs", "es", "pt"]: 123 | sentence = self.accent_removal(sentence) 124 | return sentence 125 | 126 | def load_stop_words(self, path): 127 | stopwords = [] 128 | with open(path, "r", encoding="utf-8") as file: 129 | for line in file: 130 | if line.startswith("#"): 131 | continue 132 | line = line.strip() 133 | stopwords.append(line) 134 | return stopwords 135 | 136 | def strip_punctuations(self, sentence): 137 | """ 138 | function to strip punctuations from the utternace 139 | :param utterance: 140 | :return: 141 | """ 142 | sentence = re.sub(self.punctuation_pattern, " ", sentence) 143 | return sentence 144 | 145 | def contraction_normalization(self, sentence): 146 | """ 147 | common contraction normalization for english 148 | :param sentence: 149 | :return: 150 | """ 151 | sentence = sentence.replace("'s", " is ") 152 | sentence = sentence.replace("n't", " not ") 153 | sentence = sentence.replace("'ll", " will ") 154 | sentence = sentence.replace("'m", " am ") 155 | return sentence 156 | 157 | def accent_removal(self, sentence): 158 | """origin from facebook research xlm preprocessing 159 | https://github.com/facebookresearch/XLM""" 160 | 161 | return "".join( 162 | [ 163 | ch 164 | for ch in unicodedata.normalize("NFD", sentence) 165 | if unicodedata.category(ch) != "Mn" 166 | ] 167 | ) 168 | -------------------------------------------------------------------------------- /tests/term_analysis/test_chi2_analyzer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | import pandas as pd 4 | 5 | from assistant_skill_analysis.utils import skills_util, lang_utils 6 | from assistant_skill_analysis.term_analysis import chi2_analyzer 7 | 8 | 9 | class TestChi2Analyzer(unittest.TestCase): 10 | """Test for Chi2 Analyzer module""" 11 | 12 | @classmethod 13 | def setUpClass(cls): 14 | cls.lang_util = lang_utils.LanguageUtility("en") 15 | test_skill_file = ( 16 | "tests/resources/test_workspaces/skill-Customer-Care-Sample.json" 17 | ) 18 | with open(test_skill_file, "r") as skill_file: 19 | ( 20 | workspace_data, 21 | workspace_vocabulary, 22 | _, 23 | _, 24 | ) = skills_util.extract_workspace_data(json.load(skill_file), cls.lang_util) 25 | cls.workspace_df = pd.DataFrame(workspace_data) 26 | 27 | def test_preprocess_chi2(self): 28 | test_data = pd.DataFrame({"utterance": ["This is boston"], "intent": "label1"}) 29 | labels, convec, features = chi2_analyzer._preprocess_chi2( 30 | test_data, self.lang_util 31 | ) 32 | self.assertEqual( 33 | set(convec.get_feature_names_out()), 34 | set(["this", "boston", "this boston"]), 35 | "Test for chi2 analyzer fail", 36 | ) 37 | 38 | labels, convec, features = chi2_analyzer._preprocess_chi2( 39 | self.workspace_df, self.lang_util 40 | ) 41 | max_len = 0 42 | for ngram in convec.get_feature_names_out(): 43 | if len(ngram.split(" ")) > max_len: 44 | max_len = len(ngram.split(" ")) 45 | assert max_len <= 2 46 | 47 | def test_compute_chi2_top_feature(self): 48 | # test case 1, mini dataset 49 | test_data = pd.DataFrame( 50 | { 51 | "utterance": [ 52 | "Boston is the capital city of massachusetts ", 53 | "Boston Celtics is a famous NBA team", 54 | "new york is a big city in the east coast", 55 | ], 56 | "intent": ["boston", "boston", "nyc"], 57 | } 58 | ) 59 | 60 | labels, con_vec, features = chi2_analyzer._preprocess_chi2( 61 | test_data, self.lang_util 62 | ) 63 | unigrams, bigrams = chi2_analyzer._compute_chi2_top_feature( 64 | features, labels, con_vec, "boston", 0.05 65 | ) 66 | self.assertEqual(len(unigrams), 0, "chi2 analyzer fail") 67 | self.assertEqual(len(bigrams), 0, "chi2 analyzer fail") 68 | 69 | # test case 2 with punctuation 70 | test_data = pd.DataFrame( 71 | { 72 | "utterance": [ 73 | "Boston is the capital city of massachusetts! ", 74 | "Boston Celtics is a famous NBA team!", 75 | "new york is a big city in the east coast", 76 | ], 77 | "intent": ["boston", "boston", "nyc"], 78 | } 79 | ) 80 | labels, con_vec, features = chi2_analyzer._preprocess_chi2( 81 | test_data, self.lang_util 82 | ) 83 | unigrams, bigrams = chi2_analyzer._compute_chi2_top_feature( 84 | features, labels, con_vec, "boston", 1 85 | ) 86 | self.assertEqual("!" not in unigrams, True, "chi2 analyzer fail") 87 | 88 | # test case 3 , medium size dataset 89 | labels, con_vec, features = chi2_analyzer._preprocess_chi2( 90 | self.workspace_df, self.lang_util 91 | ) 92 | unigrams, bigrams = chi2_analyzer._compute_chi2_top_feature( 93 | features, labels, con_vec, "Help" 94 | ) 95 | self.assertEqual( 96 | unigrams, ["need", "me", "assist", "decid", "help"], "chi2 analyzer fail" 97 | ) 98 | test_bigrams = [ 99 | "assist me", 100 | "you assist", 101 | "me decid", 102 | "need assist", 103 | "you help", 104 | "help me", 105 | ] 106 | self.assertEqual(bigrams, test_bigrams, "chi2 analyzer fail") 107 | 108 | def test_get_chi2_analysis(self): 109 | test_data = pd.DataFrame( 110 | { 111 | "utterance": [ 112 | "Boston is the capital city of massachusetts ", 113 | "Boston Celtics is a famous NBA team", 114 | "new york is a big city in the east coast", 115 | ], 116 | "intent": ["boston", "boston", "nyc"], 117 | } 118 | ) 119 | unigram_intent_dict, bigram_intent_dict = chi2_analyzer.get_chi2_analysis( 120 | test_data, significance_level=0.05, lang_util=self.lang_util 121 | ) 122 | self.assertEqual(len(unigram_intent_dict), 0, "chi2 analyzer fail") 123 | 124 | unigram_intent_dict, bigram_intent_dict = chi2_analyzer.get_chi2_analysis( 125 | self.workspace_df, significance_level=0.05, lang_util=self.lang_util 126 | ) 127 | list_of_intent_list = list(unigram_intent_dict.values()) 128 | one_bigram_set = list(bigram_intent_dict.keys())[0] 129 | self.assertEqual( 130 | all(len(intents) >= 1 for intents in list_of_intent_list), 131 | True, 132 | "chi2 analyzer fail", 133 | ) 134 | self.assertEqual( 135 | all(len(item.split(" ")) == 2 for item in one_bigram_set), 136 | True, 137 | "chi2 analyzer fail", 138 | ) 139 | 140 | def test_get_confusing_keyterms(self): 141 | unigram_intent_dict = { 142 | frozenset(["a", "b", "c"]): ["intent1"], 143 | frozenset(["a", "b"]): ["intent2"], 144 | } 145 | ambiguous_data_frame = chi2_analyzer.get_confusing_key_terms( 146 | unigram_intent_dict 147 | ) 148 | self.assertTrue( 149 | str(ambiguous_data_frame.iloc[0, 0]) == "" 150 | or str(ambiguous_data_frame.iloc[0, 0]) == "", 151 | "chi2 analyzer fail", 152 | ) 153 | self.assertEqual( 154 | "a" in list(ambiguous_data_frame["Terms"]), True, "chi2 analyzer fail" 155 | ) 156 | 157 | def tearDown(self): 158 | unittest.TestCase.tearDown(self) 159 | 160 | 161 | if __name__ == "__main__": 162 | unittest.main() 163 | -------------------------------------------------------------------------------- /tests/data_analysis/test_divergence_analyzer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | import pandas as pd 4 | import numpy as np 5 | from assistant_skill_analysis.utils import skills_util, lang_utils 6 | from assistant_skill_analysis.data_analysis import divergence_analyzer 7 | 8 | 9 | class TestDivergenceAnalyzer(unittest.TestCase): 10 | """Test for Divergence Analyzer module""" 11 | 12 | @classmethod 13 | def setUpClass(cls): 14 | cls.lang_util = lang_utils.LanguageUtility("en") 15 | with open( 16 | "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r" 17 | ) as skill_file: 18 | ( 19 | workspace_data, 20 | workspace_vocabulary, 21 | _, 22 | _, 23 | ) = skills_util.extract_workspace_data(json.load(skill_file), cls.lang_util) 24 | 25 | cls.workspace_df = pd.DataFrame(workspace_data) 26 | cls.train_set_pd = pd.DataFrame( 27 | { 28 | "utterance": ["boston is close to new york"], 29 | "intent": ["Boston_New_York"], 30 | "tokens": [["boston", "is", "close", "to", "new", "york"]], 31 | } 32 | ) 33 | cls.test_set_pd = pd.DataFrame( 34 | { 35 | "utterance": [ 36 | "both boston and new york are on east coast", 37 | "boston is close to new york", 38 | ], 39 | "intent": ["Boston_New_York", "Boston_New_York"], 40 | "tokens": [ 41 | [ 42 | "both", 43 | "boston", 44 | "and", 45 | "new", 46 | "york", 47 | "are", 48 | "on", 49 | "east", 50 | "coast", 51 | ], 52 | ["boston", "is", "close", "to", "new", "york"], 53 | ], 54 | } 55 | ) 56 | 57 | def test_label_percentage(self): 58 | label_percentage_dict = divergence_analyzer._label_percentage(self.workspace_df) 59 | label_percentage_vec = np.array(list(label_percentage_dict.values())) 60 | self.assertEqual( 61 | np.all(label_percentage_vec > 0), True, "label percentage test fail" 62 | ) 63 | self.assertAlmostEqual(np.sum(label_percentage_vec), 1, delta=1e-6) 64 | 65 | def test_train_test_vocab_difference(self): 66 | train_vocab, test_vocab = divergence_analyzer._train_test_vocab_difference( 67 | self.train_set_pd, self.test_set_pd 68 | ) 69 | 70 | self.assertEqual( 71 | train_vocab, 72 | set(["boston", "is", "close", "to", "new", "york"]), 73 | "train test vocab difference test fail", 74 | ) 75 | 76 | def test_train_test_uttterance_length_difference(self): 77 | temp_df = divergence_analyzer._train_test_utterance_length_difference( 78 | self.train_set_pd, self.test_set_pd 79 | ) 80 | 81 | self.assertEqual( 82 | temp_df.iloc[0]["Absolute Difference"], 83 | 1.5, 84 | "train test utterance length differene test fail", 85 | ) 86 | 87 | def test_train_test_label_difference(self): 88 | # Test 1 89 | percentage_dict1 = {"Intent1": 0.5, "Intent2": 0.5} 90 | percentage_dict2 = {"Intent1": 0.5, "Intent2": 0.5} 91 | 92 | ( 93 | missing_labels, 94 | difference_dict, 95 | js_distance, 96 | ) = divergence_analyzer._train_test_label_difference( 97 | percentage_dict1, percentage_dict2 98 | ) 99 | self.assertEqual(js_distance, 0, "train test difference test fail") 100 | self.assertEqual(missing_labels, [], "train test difference test fail") 101 | self.assertEqual( 102 | difference_dict["Intent1"], [50, 50, 0], "train test difference test fail" 103 | ) 104 | 105 | # Test 2 106 | percentage_dict1 = {"Intent1": 1, "Intent2": 0} 107 | percentage_dict2 = {"Intent1": 1} 108 | 109 | ( 110 | missing_labels, 111 | difference_dict, 112 | js_distance, 113 | ) = divergence_analyzer._train_test_label_difference( 114 | percentage_dict1, percentage_dict2 115 | ) 116 | self.assertEqual(js_distance, 0, "train test difference test fail") 117 | self.assertEqual(missing_labels, ["Intent2"], "train test difference test fail") 118 | self.assertEqual( 119 | difference_dict["Intent1"], [100, 100, 0], "train test difference test fail" 120 | ) 121 | 122 | # Test 3 123 | percentage_dict1 = {"Intent1": 1, "Intent2": 0} 124 | percentage_dict2 = {"Intent1": 0, "Intent2": 1} 125 | ( 126 | missing_labels, 127 | difference_dict, 128 | js_distance, 129 | ) = divergence_analyzer._train_test_label_difference( 130 | percentage_dict1, percentage_dict2 131 | ) 132 | self.assertEqual(js_distance, 1, "train test difference test fail") 133 | self.assertEqual( 134 | difference_dict["Intent1"], [100, 0, 100], "train test difference test fail" 135 | ) 136 | self.assertEqual( 137 | difference_dict["Intent2"], [0, 100, 100], "train test difference test fail" 138 | ) 139 | self.assertEqual(len(missing_labels), 0, "train test difference test fail") 140 | 141 | # Test 4 142 | percentage_dict1 = {"Intent1": 1} 143 | percentage_dict2 = {"Intent2": 1} 144 | ( 145 | missing_labels, 146 | difference_dict, 147 | js_distance, 148 | ) = divergence_analyzer._train_test_label_difference( 149 | percentage_dict1, percentage_dict2 150 | ) 151 | self.assertEqual(str(js_distance), "nan", "train test difference test fail") 152 | self.assertEqual(missing_labels, ["Intent1"], "train test difference test fail") 153 | self.assertEqual(len(difference_dict), 0, "train test difference test fail") 154 | 155 | def tearDown(self): 156 | unittest.TestCase.tearDown(self) 157 | 158 | 159 | if __name__ == "__main__": 160 | unittest.main() 161 | -------------------------------------------------------------------------------- /tests/confidence_analysis/test_confidence_analysis.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import math 3 | import pandas as pd 4 | from assistant_skill_analysis.confidence_analysis.confidence_analyzer import ( 5 | _get_far_list, 6 | _get_ontopic_accuracy_list, 7 | _find_threshold, 8 | abnormal_conf, 9 | generate_unique_thresholds, 10 | _get_bot_coverage_list, 11 | analysis, 12 | extract_table_analysis, 13 | _convert_data_format, 14 | analysis_pipeline, 15 | extract_by_topic, 16 | create_display_table, 17 | ) 18 | from assistant_skill_analysis.utils.skills_util import OFFTOPIC_LABEL 19 | 20 | 21 | class TestThresholdAnalysis(unittest.TestCase): 22 | """Test for summary generator module""" 23 | 24 | def setUp(self): 25 | 26 | self.sorted_list = [ 27 | (OFFTOPIC_LABEL, "B", 0.1), 28 | ("A", "A", 0.2), 29 | (OFFTOPIC_LABEL, "A", 0.2), 30 | ("A", "A", 0.2), 31 | ("A", "B", 0.3), 32 | ("A", "A", 0.4), 33 | (OFFTOPIC_LABEL, "A", 0.8), 34 | ("A", "A", 0.8), 35 | ("B", "B", 1.0), 36 | ] 37 | 38 | self.results = pd.DataFrame( 39 | self.sorted_list, columns=["correct_intent", "top_intent", "top_confidence"] 40 | ) 41 | 42 | self.thresholds = [0.15, 0.25, 0.35, 0.6, 0.9] 43 | 44 | def test_abnormal_conf(self): 45 | test_dataframe = pd.DataFrame( 46 | { 47 | "correct_intent": ["A", "A", "A"], 48 | "top_intent": ["A", "B", "B"], 49 | "top_confidence": [0.2, 0.9, 0.9], 50 | "utterance": ["a", "a", "a"], 51 | "top_predicts": [ 52 | [], 53 | [], 54 | [ 55 | {"intent": "B", "confidence": 0.9}, 56 | {"intent": "C", "confidence": 0.1}, 57 | ], 58 | ], 59 | } 60 | ) 61 | 62 | correct_low_conf, incorrect_high_conf = abnormal_conf(test_dataframe, 0.3, 0.7) 63 | self.assertEqual(len(correct_low_conf), 1, "test_find_threshold failed: first") 64 | self.assertEqual( 65 | len(incorrect_high_conf), 2, "test_find_threshold failed: first" 66 | ) 67 | self.assertEqual( 68 | incorrect_high_conf.iloc[0, :]["top2_prediction"], 69 | "NA", 70 | "test_find_threshold failed: first", 71 | ) 72 | 73 | def test_get_ontopic_accuracy_list(self): 74 | 75 | res, _ = _get_ontopic_accuracy_list(self.sorted_list, self.thresholds) 76 | gt = [5 / 6, 0.75, 1.0, 1.0, 1.0] 77 | for r, g in zip(res, gt): 78 | self.assertEqual(math.fabs(r - g) < 0.0001, True, "FAR values changed") 79 | 80 | def test_find_threshold(self): 81 | a = 0.0 82 | b = [0, 0.1, 0.2, 0.3, 0.5] 83 | pos = _find_threshold(a, b) 84 | self.assertEqual(pos, 0, "test_find_threshold failed: first") 85 | a = 0.1 86 | b = [0, 0.1, 0.2, 0.3, 0.5] 87 | pos = _find_threshold(a, b) 88 | self.assertEqual(pos, 1, "test_find_threshold failed: second") 89 | a = 0.5 90 | b = [0, 0.1, 0.2, 0.3, 0.5] 91 | pos = _find_threshold(a, b) 92 | self.assertEqual(pos, 4, "test_find_threshold failed: third") 93 | 94 | def test_get_far_list(self): 95 | 96 | res, _ = _get_far_list(self.sorted_list, self.thresholds) 97 | gt = [2 / 3, 1 / 3, 1 / 3, 1 / 3, 0.0] 98 | for r, g in zip(res, gt): 99 | self.assertEqual(math.fabs(r - g) < 0.0001, True, "FAR values changed") 100 | 101 | def test_get_bot_coverage_list(self): 102 | 103 | res, _ = _get_bot_coverage_list(self.sorted_list, self.thresholds) 104 | gt = [0.888, 0.555, 0.444, 0.333, 0.111] 105 | for r, g in zip(res, gt): 106 | self.assertEqual( 107 | math.isclose(r, g, abs_tol=0.01, rel_tol=0.0001), 108 | True, 109 | "bot coverage value change", 110 | ) 111 | 112 | def test_analysis(self): 113 | analysis_df1 = analysis(self.results) 114 | self.assertEqual( 115 | analysis_df1["Bot Coverage Counts"].iloc[9], "1 / 9", "analysis fail" 116 | ) 117 | analysis_df_list = analysis(self.results, ["A"]) 118 | self.assertEqual( 119 | analysis_df_list[0]["Bot Coverage Counts"].iloc[9], "2 / 7", "analysis fail" 120 | ) 121 | 122 | def test_convert_data_format(self): 123 | 124 | test1 = _convert_data_format(self.results) 125 | for element1, element2 in zip(test1, self.sorted_list): 126 | for ele1, ele2 in zip(element1, element2): 127 | self.assertEqual(ele1, ele2, "test for covert data format fail") 128 | 129 | def test_analysis_pipeline(self): 130 | analysis_df = analysis_pipeline(self.results) 131 | self.assertEqual( 132 | analysis_df["Bot Coverage Counts"].iloc[9], "1 / 9", "analysis fail" 133 | ) 134 | 135 | def test_extract_table_analysis(self): 136 | sorted_results = _convert_data_format(self.results) 137 | ontopic_infos, offtopics_infos = extract_by_topic(sorted_results) 138 | ( 139 | analysis_df, 140 | toa_list, 141 | bot_coverage_list, 142 | far_list, 143 | thresholds, 144 | ) = extract_table_analysis(sorted_results, ontopic_infos, offtopics_infos) 145 | self.assertEqual( 146 | math.isclose(toa_list[2], 0.75, abs_tol=0.01, rel_tol=0.0001), 147 | True, 148 | "extract table analysis fail", 149 | ) 150 | 151 | def test_create_display_table(self): 152 | sorted_results = _convert_data_format(self.results) 153 | thresholds, sort_uniq_confs = generate_unique_thresholds(sorted_results) 154 | toa_list, toa_count = _get_ontopic_accuracy_list(sorted_results, thresholds) 155 | bot_coverage_list, bot_coverage_count = _get_bot_coverage_list( 156 | sorted_results, thresholds 157 | ) 158 | ontopic_infos, offtopic_infos = extract_by_topic(sorted_results) 159 | far_list, _ = _get_far_list(sorted_results, thresholds) 160 | analysis_df = create_display_table( 161 | toa_list, 162 | bot_coverage_list, 163 | bot_coverage_count, 164 | sorted_results, 165 | thresholds, 166 | offtopic_infos, 167 | far_list, 168 | ) 169 | 170 | self.assertEqual( 171 | analysis_df["Bot Coverage Counts"].iloc[-1], 172 | "1 / 9", 173 | "create display \ 174 | table test fail", 175 | ) 176 | 177 | def test_generate_unique_thresholds(self): 178 | thresholds, unique_confidence = generate_unique_thresholds(self.sorted_list) 179 | self.assertEqual( 180 | math.isclose(thresholds[1], 0.15, abs_tol=0.01, rel_tol=0.0001), 181 | True, 182 | "test \ 183 | generate unique threshold fail", 184 | ) 185 | 186 | self.assertEqual( 187 | math.isclose(unique_confidence[5], 1, abs_tol=0.01, rel_tol=0.0001), 188 | True, 189 | "test generate unique threshold fail", 190 | ) 191 | 192 | def tearDown(self): 193 | unittest.TestCase.tearDown(self) 194 | 195 | 196 | if __name__ == "__main__": 197 | unittest.main() 198 | -------------------------------------------------------------------------------- /tests/highlighting/test_highlighting.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import math 3 | import os 4 | import shutil 5 | import numpy as np 6 | import pandas as pd 7 | 8 | from assistant_skill_analysis.highlighting import highlighter 9 | from assistant_skill_analysis.inferencing import inferencer 10 | from assistant_skill_analysis.utils import skills_util, lang_utils 11 | 12 | 13 | CONFIG_FILE = "./wa_config.txt" 14 | THREAD_NUM = 5 15 | TOLERANCE = 0.3 16 | 17 | 18 | def compare(a, b): 19 | if len(a) != len(b): 20 | return False 21 | for i, j in zip(a, b): 22 | if math.fabs(i - j) > 0.01: 23 | return False 24 | return True 25 | 26 | 27 | @unittest.skip("takes too long to run in github action") 28 | class TestHighLighting(unittest.TestCase): 29 | """ 30 | Test for summary generator module 31 | """ 32 | 33 | @classmethod 34 | def setUpClass(cls): 35 | cls.tmpfolder = "tests/resources/highlight_temp_folder/" 36 | cls.tmpbatchfolder = "tests/resources/highlight_temp_folder/batch" 37 | cls.input_file = "tests/resources/test_workspaces/customer_care_skill_test.tsv" 38 | cls.lang_util = lang_utils.LanguageUtility("en") 39 | unittest.TestCase.setUp(cls) 40 | with open(CONFIG_FILE) as fi: 41 | cls.apikey = fi.readline().strip() 42 | cls.wksp_id = fi.readline().strip() 43 | 44 | cls.conversation = skills_util.retrieve_conversation( 45 | iam_apikey=cls.apikey, 46 | url=skills_util.DEV_DATACENTER[0], 47 | authenticator_url=skills_util.DEV_DATACENTER[1], 48 | ) 49 | 50 | if not os.path.exists(cls.tmpfolder): 51 | os.makedirs(cls.tmpfolder) 52 | os.makedirs(cls.tmpbatchfolder) 53 | 54 | test_df = skills_util.process_test_set(cls.input_file, cls.lang_util) 55 | cls.results = inferencer.inference( 56 | cls.conversation, 57 | test_df, 58 | max_thread=THREAD_NUM, 59 | skill_id=cls.wksp_id, 60 | ) 61 | 62 | def test_filter_results(self): 63 | wrong_examples_sorted = highlighter._filter_results( 64 | self.results, 0.4, self.lang_util 65 | ) 66 | ground_truth = ( 67 | 47, 68 | "no he is an arrogant self serving immature idiot get it right", 69 | None, 70 | "General_Connect_to_Agent", 71 | 0.6697888851165772, 72 | 0.09834358692169187, 73 | 9, 74 | ) 75 | 76 | self.assertEqual( 77 | wrong_examples_sorted[0][2], None, "Test for filter results fail" 78 | ) 79 | self.assertEqual( 80 | math.isclose( 81 | wrong_examples_sorted[0][4], ground_truth[4], rel_tol=0.01, abs_tol=0.1 82 | ), 83 | True, 84 | "Test for filter results fail", 85 | ) 86 | 87 | def test_generate_adversarial_examples(self): 88 | test_utterance = "winter is coming" 89 | ( 90 | adversarial_examples, 91 | adversarial_span, 92 | ) = highlighter._generate_adversarial_examples(test_utterance, 1) 93 | self.assertEqual( 94 | "winter coming" in adversarial_examples, 95 | True, 96 | "Test for generate adversarial example fail", 97 | ) 98 | self.assertEqual( 99 | adversarial_span["winter coming_1"], 100 | (1, 2), 101 | "Test for generate adversarial example fail", 102 | ) 103 | 104 | def test_adversarial_examples_multi_thread_inference(self): 105 | long_example1 = ( 106 | "um taking a shot here um lets say three " 107 | + "separate people whos wills are to each other" 108 | ) 109 | wrong_examples_sorted = [ 110 | ( 111 | 1, 112 | "see ya", 113 | "Goodbye", 114 | "General_Greetings", 115 | 0.5005551099777221, 116 | 0.5005551099777221, 117 | 1, 118 | ), 119 | ( 120 | 42, 121 | long_example1, 122 | None, 123 | "General_Connect_to_Agent", 124 | 0.6537539958953857, 125 | 0.2537539958953857, 126 | 9, 127 | ), 128 | ] 129 | 130 | ( 131 | adv_results, 132 | adv_dict_span, 133 | ) = highlighter._adversarial_examples_multi_thread_inference( 134 | wrong_examples_sorted, self.conversation, self.wksp_id 135 | ) 136 | result = adv_results[adv_results["utterance"].str.match("see")] 137 | 138 | self.assertEqual( 139 | np.abs(result["top_confidence"].values[0] - 0.478708) < TOLERANCE, 140 | True, 141 | "Test for adversarial examples inference fail", 142 | ) 143 | 144 | self.assertEqual( 145 | adv_dict_span["see_1"], 146 | (1, 2), 147 | "test for adversarial example inference: adversarial span dict mismatch", 148 | ) 149 | 150 | def test_scoring_function(self): 151 | highlight = np.zeros(3, dtype="float32") 152 | highlight = highlighter._scoring_function( 153 | highlight=highlight, 154 | original_position=0, 155 | adversarial_position=1, 156 | original_confidence=0.7, 157 | adversarial_confidence=0.5, 158 | start_idx=2, 159 | end_idx=3, 160 | ) 161 | self.assertEqual( 162 | math.isclose(highlight[2], 0.44, rel_tol=0.0001, abs_tol=0.01), 163 | True, 164 | "Test for adversarial scoring fail", 165 | ) 166 | 167 | def test_highlight_scoring(self): 168 | original_example = ( 169 | 1, 170 | "see ya", 171 | "Goodbye", 172 | "General_Greetings", 173 | 0.5005551099777221, 174 | 0.5005551099777221, 175 | 1, 176 | ) 177 | subset_adversarial_result = pd.DataFrame( 178 | data={ 179 | "utterance": ["see"], 180 | "top_predicts": [ 181 | [ 182 | {"intent": "General_Greetings", "confidence": 0.6}, 183 | {"intent": "Goodbye", "confidence": 0.5}, 184 | ] 185 | ], 186 | } 187 | ) 188 | adversarial_span_dict = {"see_1": (1, 2)} 189 | 190 | highlight = highlighter._highlight_scoring( 191 | original_example, 192 | subset_adversarial_result, 193 | adversarial_span_dict, 194 | self.lang_util, 195 | ) 196 | self.assertEqual( 197 | compare(highlight, [0, -0.41988897]), 198 | True, 199 | "Test highlight scoring function fail", 200 | ) 201 | 202 | def test_get_highlights_in_batch_multi_thread(self): 203 | highlighter.get_highlights_in_batch_multi_thread( 204 | conversation=self.conversation, 205 | full_results=self.results, 206 | output_folder=self.tmpbatchfolder, 207 | confidence_threshold=0.4, 208 | show_worst_k=3, 209 | lang_util=self.lang_util, 210 | skill_id=self.wksp_id, 211 | ) 212 | self.assertEqual( 213 | len(os.listdir(self.tmpbatchfolder)), 214 | 3, 215 | "# of batch highlighting files is mismatched.", 216 | ) 217 | 218 | @classmethod 219 | def tearDownClass(cls): 220 | shutil.rmtree(cls.tmpfolder) 221 | unittest.TestCase.tearDown(cls) 222 | 223 | 224 | if __name__ == "__main__": 225 | unittest.main() 226 | -------------------------------------------------------------------------------- /assistant_skill_analysis/term_analysis/chi2_analyzer.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import pandas as pd 3 | import numpy as np 4 | from IPython.display import display, Markdown, HTML 5 | from sklearn.feature_selection import chi2 6 | from sklearn.feature_extraction.text import CountVectorizer 7 | from nltk import word_tokenize 8 | 9 | 10 | def _preprocess_chi2(workspace_pd, lang_util): 11 | """ 12 | Preprocess dataframe for chi2 analysis 13 | :param workspace_pd: Preprocess dataframe for chi2 14 | :return labels: intents processed 15 | :return count_vectorizer: vectorizer instance 16 | :return features: features from transform 17 | """ 18 | 19 | count_vectorizer = CountVectorizer( 20 | min_df=1, 21 | encoding="utf-8", 22 | ngram_range=(1, 2), 23 | stop_words=lang_util.stop_words, 24 | tokenizer=lang_util.tokenize, 25 | token_pattern="(?u)\b\w+\b", 26 | ) 27 | features = count_vectorizer.fit_transform(workspace_pd["utterance"]).toarray() 28 | labels = workspace_pd["intent"] 29 | return labels, count_vectorizer, features 30 | 31 | 32 | def _compute_chi2_top_feature( 33 | features, labels, vectorizer, cls, significance_level=0.05 34 | ): 35 | """ 36 | Perform chi2 analysis, punctuation filtering and deduplication 37 | :param features: count vectorizer features 38 | :param labels: intents processed 39 | :param vectorizer: count vectorizer instances 40 | :param cls: classes for chi square 41 | :param significance_level: specify an alpha 42 | :return deduplicated_unigram: 43 | :return deduplicated_bigram: 44 | """ 45 | features_chi2, pval = chi2(features, labels == cls) 46 | 47 | feature_names = np.array(vectorizer.get_feature_names_out()) 48 | 49 | features_chi2 = features_chi2[pval < significance_level] 50 | feature_names = feature_names[pval < significance_level] 51 | 52 | indices = np.argsort(features_chi2) 53 | feature_names = feature_names[indices] 54 | 55 | unigrams = [v.strip() for v in feature_names if len(v.strip().split()) == 1] 56 | deduplicated_unigram = list() 57 | 58 | for unigram in unigrams: 59 | if unigram not in deduplicated_unigram: 60 | deduplicated_unigram.append(unigram) 61 | 62 | bigrams = [v.strip() for v in feature_names if len(v.strip().split()) == 2] 63 | 64 | deduplicated_bigram = list() 65 | for bigram in bigrams: 66 | if bigram not in deduplicated_bigram: 67 | deduplicated_bigram.append(bigram) 68 | 69 | return deduplicated_unigram, deduplicated_bigram 70 | 71 | 72 | def get_chi2_analysis( 73 | workspace_pd, 74 | lang_util, 75 | significance_level=0.05, 76 | ): 77 | """ 78 | find correlated unigram and bigram of each intent with Chi2 analysis 79 | :param workspace_pd: dataframe, workspace data 80 | :param signficance_level: float, significance value to reject the null hypothesis 81 | :return unigram_intent_dict: 82 | :return bigram_intent_dict: 83 | """ 84 | labels, vectorizer, features = _preprocess_chi2(workspace_pd, lang_util) 85 | 86 | label_frequency_dict = dict(Counter(workspace_pd["intent"]).most_common()) 87 | N = 5 88 | 89 | # keys are the set of unigrams/bigrams and value will be the intent 90 | # maps one-to-many relationship between unigram and intent, 91 | unigram_intent_dict = dict() 92 | # maps one-to-many relationship between bigram and intent 93 | bigram_intent_dict = dict() 94 | 95 | classes = list() 96 | chi_unigrams = list() 97 | chi_bigrams = list() 98 | for cls in label_frequency_dict.keys(): 99 | 100 | unigrams, bigrams = _compute_chi2_top_feature( 101 | features, labels, vectorizer, cls, significance_level 102 | ) 103 | classes.append(cls) 104 | 105 | if unigrams: 106 | chi_unigrams.append(", ".join(unigrams[-N:])) 107 | else: 108 | chi_unigrams.append("None") 109 | 110 | if bigrams: 111 | chi_bigrams.append(", ".join(bigrams[-N:])) 112 | else: 113 | chi_bigrams.append("None") 114 | 115 | if unigrams: 116 | if frozenset(unigrams[-N:]) in unigram_intent_dict: 117 | unigram_intent_dict[frozenset(unigrams[-N:])].append(cls) 118 | else: 119 | unigram_intent_dict[frozenset(unigrams[-N:])] = list() 120 | unigram_intent_dict[frozenset(unigrams[-N:])].append(cls) 121 | 122 | if bigrams: 123 | if frozenset(bigrams[-N:]) in bigram_intent_dict: 124 | bigram_intent_dict[frozenset(bigrams[-N:])].append(cls) 125 | else: 126 | bigram_intent_dict[frozenset(bigrams[-N:])] = list() 127 | bigram_intent_dict[frozenset(bigrams[-N:])].append(cls) 128 | 129 | chi_df = pd.DataFrame(data={"Intent": classes}) 130 | chi_df["Correlated Unigrams"] = chi_unigrams 131 | chi_df["Correlated Bigrams"] = chi_bigrams 132 | 133 | display(Markdown(("## Chi-squared Analysis"))) 134 | with pd.option_context( 135 | "display.max_rows", 136 | None, 137 | "display.max_columns", 138 | None, 139 | "display.max_colwidth", 140 | 100, 141 | ): 142 | chi_df.index = np.arange(1, len(chi_df) + 1) 143 | display(chi_df) 144 | return unigram_intent_dict, bigram_intent_dict 145 | 146 | 147 | def get_confusing_key_terms(keyterm_intent_map): 148 | """ 149 | Greedy search for overlapping intents 150 | :param keyterm_intent_map: correlated terms 151 | :return df: ambiguous terms data frame 152 | """ 153 | ambiguous_intents = list() 154 | ambiguous_keywords = list() 155 | intents_seen = list() 156 | 157 | for i in range(len(keyterm_intent_map)): 158 | correlated_unigrams = list(keyterm_intent_map.keys())[i] 159 | current_label = keyterm_intent_map[correlated_unigrams] 160 | intents_seen.append(current_label) 161 | 162 | if len(keyterm_intent_map[correlated_unigrams]) > 1: 163 | print(keyterm_intent_map[correlated_unigrams]) 164 | print(correlated_unigrams) 165 | 166 | for other_correlated_unigrams in keyterm_intent_map.keys(): 167 | if keyterm_intent_map[other_correlated_unigrams] in intents_seen: 168 | continue 169 | overlap = correlated_unigrams.intersection(other_correlated_unigrams) 170 | if overlap: 171 | for keyword in overlap: 172 | ambiguous_intents.append( 173 | "<" 174 | + current_label[0] 175 | + ", " 176 | + keyterm_intent_map[other_correlated_unigrams][0] 177 | + ">" 178 | ) 179 | ambiguous_keywords.append(keyword) 180 | 181 | df = pd.DataFrame( 182 | data={"Intent Pairs": ambiguous_intents, "Terms": ambiguous_keywords} 183 | ) 184 | 185 | if not ambiguous_intents: 186 | display( 187 | Markdown("There is no ambiguity based on top 5 key terms in chi2 analysis") 188 | ) 189 | else: 190 | display_size = 10 191 | if not df.empty: 192 | if len(df) < display_size: 193 | display_size = len(df) 194 | display(HTML(df.sample(n=display_size).to_html(index=False))) 195 | 196 | return df 197 | 198 | 199 | def chi2_overlap_check(ambiguous_unigram_df, ambiguous_bigram_df, intent1, intent2): 200 | """ 201 | looks for intent overlap for specific intent or intent pairs 202 | :param ambiguous_unigram_df: 203 | :param ambiguous_bigram_df: 204 | :param intent1: 205 | :param intent2: 206 | """ 207 | intent = intent1 + ", " + intent2 + "|" + intent2 + ", " + intent1 208 | part1 = None 209 | part2 = None 210 | if not ambiguous_unigram_df.empty: 211 | part1 = ambiguous_unigram_df[ 212 | ambiguous_unigram_df["Intent Pairs"].str.contains(intent) 213 | ] 214 | 215 | if not ambiguous_bigram_df.empty: 216 | part2 = ambiguous_bigram_df[ 217 | ambiguous_bigram_df["Intent Pairs"].str.contains(intent) 218 | ] 219 | 220 | if part1 is not None and part2 is not None: 221 | display(HTML(pd.concat([part1, part2]).to_html(index=False))) 222 | elif part1 is not None: 223 | display(HTML(part1.to_html(index=False))) 224 | elif part2 is not None: 225 | display(HTML(part2.to_html(index=False))) 226 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /assistant_skill_analysis/inferencing/inferencer.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pandas as pd 3 | import numpy as np 4 | import ibm_watson 5 | from ..utils import skills_util 6 | from concurrent.futures import ThreadPoolExecutor 7 | from tqdm import tqdm 8 | 9 | MAX_RETRY = 5 10 | 11 | 12 | def inference( 13 | conversation, 14 | test_data, 15 | max_thread=5, 16 | user_id="256", 17 | assistant_id=None, 18 | skill_id=None, 19 | intent_to_action_mapping=None, 20 | timeout=1, 21 | environment_id=None, 22 | ): 23 | """ 24 | query the message api to generate results on the test data 25 | :parameter: conversation: the conversation object produced by AssistantV1 api 26 | :parameter: skill_id: the workspace id of the 27 | :parameter: test_data: the data that will be sent to the classifier 28 | :parameter: max_thread: the max number of threads to use for multi-threaded inference 29 | :parameter: verbose: flag indicates verbosity of outputs during mutli-threaded inference 30 | :parameter: assistant_id: 31 | :parameter: intent_to_action_mapping: 32 | :parameter: timeout: integer or float that specifies number of seconds each thread should wait for inference result 33 | :parameter: environment_id: environment id 34 | :return result_df: results dataframe 35 | """ 36 | skd_version = "V1" 37 | if isinstance(conversation, ibm_watson.AssistantV1): 38 | assert skill_id is not None 39 | else: 40 | assert assistant_id is not None 41 | skd_version = "V2" 42 | 43 | if max_thread == 1: 44 | reach_max_retry = False 45 | responses = [] 46 | for test_example, ground_truth in zip( 47 | test_data["utterance"], test_data["intent"] 48 | ): 49 | attempt = 1 50 | while attempt <= MAX_RETRY: 51 | try: 52 | prediction_json = skills_util.retrieve_classifier_response( 53 | conversation=conversation, 54 | text_input=test_example, 55 | alternate_intents=True, 56 | user_id=user_id, 57 | assistant_id=assistant_id, 58 | skill_id=skill_id, 59 | environment_id=environment_id, 60 | ) 61 | time.sleep(0.3) 62 | 63 | success_flag = True 64 | except Exception: 65 | continue 66 | if success_flag: 67 | break 68 | attempt += 1 69 | 70 | if attempt > MAX_RETRY: 71 | reach_max_retry = True 72 | 73 | if reach_max_retry: 74 | raise Exception("Maximum attempt of {} has reached".format(MAX_RETRY)) 75 | 76 | if skd_version == "V2": 77 | prediction_json = prediction_json["output"] 78 | if len(prediction_json["intents"]) > 0: 79 | # v2 api returns all intent predictions 80 | if ( 81 | prediction_json["intents"][0]["confidence"] 82 | < skills_util.OFFTOPIC_CONF_THRESHOLD 83 | ): 84 | prediction_json["intents"] = [] 85 | if intent_to_action_mapping is not None: 86 | for intents_prediction in prediction_json["intents"]: 87 | intents_prediction["intent"] = intent_to_action_mapping[ 88 | intents_prediction["intent"] 89 | ] 90 | 91 | if not prediction_json["intents"]: 92 | responses.append( 93 | { 94 | "top_intent": skills_util.OFFTOPIC_LABEL, 95 | "top_confidence": 0.0, 96 | "correct_intent": ground_truth, 97 | "utterance": test_example, 98 | "top_predicts": [], 99 | "entities": [], 100 | } 101 | ) 102 | else: 103 | responses.append( 104 | { 105 | "top_intent": prediction_json["intents"][0]["intent"], 106 | "top_confidence": prediction_json["intents"][0]["confidence"], 107 | "correct_intent": ground_truth, 108 | "utterance": test_example, 109 | "top_predicts": prediction_json["intents"], 110 | "entities": prediction_json["entities"], 111 | } 112 | ) 113 | result_df = pd.DataFrame(data=responses) 114 | else: 115 | result_df = thread_inference( 116 | conversation=conversation, 117 | test_data=test_data, 118 | max_thread=max_thread, 119 | user_id=user_id, 120 | skill_id=skill_id, 121 | assistant_id=assistant_id, 122 | intent_to_action_mapping=intent_to_action_mapping, 123 | timeout=timeout, 124 | environment_id=environment_id, 125 | ) 126 | return result_df 127 | 128 | 129 | def thread_inference( 130 | conversation, 131 | test_data, 132 | max_thread=5, 133 | user_id="256", 134 | assistant_id=None, 135 | skill_id=None, 136 | intent_to_action_mapping=None, 137 | timeout=1, 138 | environment_id=None, 139 | ): 140 | """ 141 | Perform multi thread inference for faster inference time 142 | :param conversation: 143 | :param skill_id: Assistant workspace id 144 | :param test_data: data to test on 145 | :param max_thread: max threads to use 146 | :param verbose: verbosity of output 147 | :param user_id: user_id for billing purpose 148 | :param assistant_id: 149 | :parameter: intent_to_action_mapping: 150 | :parameter: timeout: integer or float that specifies number of seconds each thread should wait for inference result 151 | :parameter: environment_id: environment id 152 | :return result_df: results dataframe 153 | """ 154 | if isinstance(conversation, ibm_watson.AssistantV1): 155 | assert skill_id is not None 156 | sdk_version = "V1" 157 | else: 158 | assert assistant_id is not None 159 | sdk_version = "V2" 160 | count = 0 161 | response = None 162 | while count < MAX_RETRY and not response: 163 | try: 164 | response = skills_util.retrieve_classifier_response( 165 | conversation=conversation, 166 | text_input="ping", 167 | alternate_intents=True, 168 | user_id=user_id, 169 | assistant_id=assistant_id, 170 | skill_id=skill_id, 171 | environment_id=environment_id, 172 | ) 173 | except Exception: 174 | count += 1 175 | time.sleep(0.5) 176 | 177 | executor = ThreadPoolExecutor(max_workers=max_thread) 178 | futures = {} 179 | result = [] 180 | for test_example, ground_truth in zip(test_data["utterance"], test_data["intent"]): 181 | future = executor.submit( 182 | get_intent_confidence_retry, 183 | conversation=conversation, 184 | text_input=test_example, 185 | alternative_intents=True, 186 | user_id=user_id, 187 | assistant_id=assistant_id, 188 | skill_id=skill_id, 189 | environment_id=environment_id, 190 | retry=0, 191 | ) 192 | futures[future] = (test_example, ground_truth) 193 | 194 | for future in tqdm(futures): 195 | res = future.result(timeout=timeout) 196 | test_example, ground_truth = futures[future] 197 | result.append( 198 | process_result( 199 | test_example, 200 | ground_truth, 201 | res, 202 | intent_to_action_mapping, 203 | sdk_version=sdk_version, 204 | ) 205 | ) 206 | 207 | result_df = pd.DataFrame(data=result) 208 | return result_df 209 | 210 | 211 | def process_result( 212 | utterance, ground_truth, response, intent_to_action_mapping, sdk_version 213 | ): 214 | if sdk_version == "V2": 215 | response = response["output"] 216 | if ( 217 | not response["intents"] 218 | or response["intents"][0]["confidence"] 219 | < skills_util.OFFTOPIC_CONF_THRESHOLD 220 | ): 221 | response["intents"] = [] 222 | if intent_to_action_mapping is not None: 223 | for intents_prediction in response["intents"]: 224 | intents_prediction["intent"] = intent_to_action_mapping[ 225 | intents_prediction["intent"] 226 | ] 227 | if response["intents"]: 228 | top_predicts = response["intents"] 229 | top_intent = response["intents"][0]["intent"] 230 | top_confidence = response["intents"][0]["confidence"] 231 | else: 232 | top_predicts = [] 233 | top_intent = skills_util.OFFTOPIC_LABEL 234 | top_confidence = 0 235 | 236 | if response["entities"]: 237 | entities = response["entities"] 238 | else: 239 | entities = [] 240 | 241 | new_dict = { 242 | "utterance": utterance, 243 | "correct_intent": ground_truth, 244 | "top_intent": top_intent, 245 | "top_confidence": top_confidence, 246 | "top_predicts": top_predicts, 247 | "entities": entities, 248 | } 249 | return new_dict 250 | 251 | 252 | def get_intent_confidence_retry( 253 | conversation, 254 | text_input, 255 | alternative_intents, 256 | user_id, 257 | assistant_id, 258 | skill_id, 259 | environment_id, 260 | retry=0, 261 | ): 262 | try: 263 | return skills_util.retrieve_classifier_response( 264 | conversation=conversation, 265 | text_input=text_input, 266 | alternate_intents=True, 267 | user_id=user_id, 268 | assistant_id=assistant_id, 269 | skill_id=skill_id, 270 | environment_id=environment_id, 271 | ) 272 | except Exception as e: 273 | if retry < MAX_RETRY: 274 | return get_intent_confidence_retry( 275 | conversation, 276 | text_input, 277 | alternative_intents, 278 | user_id, 279 | assistant_id, 280 | skill_id, 281 | environment_id=environment_id, 282 | retry=retry + 1, 283 | ) 284 | else: 285 | raise e 286 | 287 | 288 | def calculate_mistakes(results): 289 | """ 290 | retrieve the data frame of miss-classified examples 291 | :param results: results after tersting 292 | :return wrongs_df: data frame of mistakes 293 | """ 294 | wrongs = list() 295 | for idx, row in results.iterrows(): 296 | if row["correct_intent"] != row["top_intent"]: 297 | wrongs.append(row) 298 | wrongs_df = pd.DataFrame(data=wrongs) 299 | wrongs_df.index.name = "Test Example Index" 300 | return wrongs_df 301 | 302 | 303 | def calculate_accuracy(results): 304 | """ 305 | calculate the accuracy on the test set 306 | :param results: the results of testing 307 | :return accuracy: get accuracy on test set 308 | """ 309 | correct = 0 310 | for i in range(0, len(results["correct_intent"])): 311 | correct += 1 if results["top_intent"][i] == results["correct_intent"][i] else 0 312 | accuracy = np.around((correct / len(results["correct_intent"])) * 100, 2) 313 | return accuracy 314 | -------------------------------------------------------------------------------- /assistant_skill_analysis/data_analysis/divergence_analyzer.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | from IPython.display import Markdown, display 3 | import numpy as np 4 | import pandas as pd 5 | from scipy.spatial import distance 6 | 7 | 8 | def _label_percentage(data_frame): 9 | """ 10 | Calculate the percentage of each labels in the data frame 11 | :param data_frame: dataframe for train or test 12 | :return: label_percentage_dict: dictionary maps label : % of labels 13 | """ 14 | total_examples = len(data_frame) 15 | label_frequency_dict = dict(Counter(data_frame["intent"]).most_common()) 16 | percentage_list = np.array(list(label_frequency_dict.values())) / total_examples 17 | label_percentage_dict = dict( 18 | zip(list(label_frequency_dict.keys()), percentage_list) 19 | ) 20 | return label_percentage_dict 21 | 22 | 23 | def _train_test_coloring(val): 24 | """ 25 | color scheme for train test difference statistics 26 | :param val: 27 | :return: 28 | """ 29 | if val > 25: 30 | color = "red" 31 | elif val > 10: 32 | color = "DarkBlue" 33 | else: 34 | color = "green" 35 | return "color: %s" % color 36 | 37 | 38 | def _train_test_label_difference( 39 | workspace_label_percentage_dict, test_label_percentage_dict 40 | ): 41 | """ 42 | analyze the difference between training set and test set 43 | :param workspace_label_percentage_dict: 44 | :param test_label_percentage_dict: 45 | :return: 46 | missing_label: list of labels that are missing in the test set 47 | difference_dict: dictionary that maps intent:percentage difference 48 | js_distance: jensen-shannon distance between train and test label percentages 49 | """ 50 | difference_dict = dict() 51 | missing_label = list() 52 | distribution1 = list() 53 | distribution2 = list() 54 | 55 | for key in workspace_label_percentage_dict: 56 | workspace_percentage = workspace_label_percentage_dict[key] 57 | distribution1.append(workspace_percentage) 58 | if key in test_label_percentage_dict: 59 | 60 | test_percentage = test_label_percentage_dict[key] 61 | 62 | distribution2.append(test_percentage) 63 | else: 64 | missing_label.append(key) 65 | test_percentage = 0 66 | distribution2.append(test_percentage) 67 | 68 | # L1 dist 69 | current_difference = np.abs(test_percentage - workspace_percentage) 70 | 71 | if key in test_label_percentage_dict: 72 | difference_dict[key] = [ 73 | workspace_percentage * 100, 74 | test_percentage * 100, 75 | current_difference * 100, 76 | ] 77 | 78 | js_distance = distance.jensenshannon(distribution1, distribution2, 2.0) 79 | 80 | return missing_label, difference_dict, js_distance 81 | 82 | 83 | def _train_test_vocab_difference(train_set_pd, test_set_pd): 84 | """ 85 | Analyze the training set and test set and retrieve the vocabulary of each set 86 | :param train_set_pd: 87 | :param test_set_pd: 88 | :return: 89 | train vocab: the set that contains the vocabulary of training set 90 | test vocab: the set that contains the vocabulary of test set 91 | """ 92 | train_vocab = set() 93 | test_vocab = set() 94 | 95 | for tokens in train_set_pd["tokens"].tolist(): 96 | train_vocab.update(tokens) 97 | 98 | for tokens in test_set_pd["tokens"].tolist(): 99 | test_vocab.update(tokens) 100 | 101 | return train_vocab, test_vocab 102 | 103 | 104 | def _train_test_utterance_length_difference(train_set_pd, test_set_pd): 105 | """ 106 | Analyze difference in length of utterance of training set and test set per label 107 | :param train_set_pd: 108 | :param test_set_pd: 109 | :return: 110 | train_test_legnth_comparison: pandas dataframe [Intent, Absolute Difference] 111 | """ 112 | train_pd_temp = train_set_pd.copy() 113 | train_pd_temp["Train"] = train_pd_temp["tokens"].apply(len) 114 | train_avg_len_by_label = train_pd_temp[["intent", "Train"]].groupby("intent").mean() 115 | 116 | test_pd_temp = test_set_pd.copy() 117 | test_pd_temp["Test"] = test_pd_temp["tokens"].apply(len) 118 | test_avg_len_by_label = test_pd_temp[["intent", "Test"]].groupby("intent").mean() 119 | 120 | train_test_length_comparison = pd.merge( 121 | train_avg_len_by_label, test_avg_len_by_label, on="intent" 122 | ) 123 | train_test_length_comparison["Absolute Difference"] = np.abs( 124 | train_test_length_comparison["Train"] - train_test_length_comparison["Test"] 125 | ) 126 | train_test_length_comparison = train_test_length_comparison.sort_values( 127 | by=["Absolute Difference"], ascending=False 128 | ) 129 | train_test_length_comparison = train_test_length_comparison.reset_index() 130 | train_test_length_comparison.rename(columns={"intent": "Intent"}, inplace=True) 131 | return train_test_length_comparison 132 | 133 | 134 | def _get_metrics(results): 135 | """ 136 | compute the metrics of precision, recall and f1 per label 137 | :param results: inference results of the test set 138 | :return: 139 | precision_dict: maps the {intent: precision} 140 | recall_dict: maps the {intent: recall} 141 | f1_dict: maps the {intent:f1} 142 | """ 143 | groundtruth = results["correct_intent"].values.tolist() 144 | top_intent = results["top_intent"].values.tolist() 145 | gt_cnt_dict = dict() 146 | pred_cnt_dict = dict() 147 | true_positive_dict = dict() 148 | for gt, pred in zip(groundtruth, top_intent): 149 | gt_cnt_dict[gt] = gt_cnt_dict.get(gt, 0) + 1 150 | pred_cnt_dict[pred] = pred_cnt_dict.get(pred, 0) + 1 151 | if gt == pred: 152 | true_positive_dict[pred] = true_positive_dict.get(pred, 0) + 1 153 | precision_dict = dict() 154 | recall_dict = dict() 155 | f1_dict = dict() 156 | for lb in true_positive_dict: 157 | 158 | recall_dict[lb] = ( 159 | true_positive_dict[lb] / gt_cnt_dict[lb] if lb in gt_cnt_dict else 0 160 | ) 161 | 162 | precision_dict[lb] = ( 163 | true_positive_dict[lb] / pred_cnt_dict[lb] if lb in pred_cnt_dict else 0 164 | ) 165 | 166 | f1_dict[lb] = ( 167 | 0.0 168 | if recall_dict[lb] == 0 and precision_dict[lb] == 0 169 | else 2.0 170 | * recall_dict[lb] 171 | * precision_dict[lb] 172 | / (recall_dict[lb] + precision_dict[lb]) 173 | ) 174 | return precision_dict, recall_dict, f1_dict 175 | 176 | 177 | def analyze_train_test_diff(train_set_pd, test_set_pd, results): 178 | """ 179 | analyze the difference between training set and test set and generate visualizations 180 | :param train_set_pd: 181 | :param test_set_pd: 182 | :param results: 183 | """ 184 | workspace_label_percentage_dict = _label_percentage(train_set_pd) 185 | test_label_percentage_dict = _label_percentage(test_set_pd) 186 | 187 | missing_label, difference_dict, js = _train_test_label_difference( 188 | workspace_label_percentage_dict, test_label_percentage_dict 189 | ) 190 | train_test_length_comparison_pd = _train_test_utterance_length_difference( 191 | train_set_pd, test_set_pd 192 | ) 193 | 194 | train_vocab, test_vocab = _train_test_vocab_difference(train_set_pd, test_set_pd) 195 | 196 | display(Markdown("## Test Data Evaluation")) 197 | 198 | if difference_dict: 199 | 200 | label = list(difference_dict.keys()) 201 | diff = np.round(list(difference_dict.values()), 2) 202 | precision_dict, recall_dict, f1_dict = _get_metrics(results) 203 | precision = np.round( 204 | [precision_dict[l] * 100.0 if l in precision_dict else 0.0 for l in label], 205 | 2, 206 | ) 207 | 208 | recall = np.round( 209 | [recall_dict[l] * 100.0 if l in recall_dict else 0.0 for l in label], 2 210 | ) 211 | 212 | f1 = np.round([f1_dict[l] * 100.0 if l in f1_dict else 0.0 for l in label], 2) 213 | 214 | train_count_dict = dict(Counter(train_set_pd["intent"])) 215 | test_count_dict = dict(Counter(test_set_pd["intent"])) 216 | tr_cnt = [train_count_dict[l] if l in train_count_dict else 0.0 for l in label] 217 | te_cnt = [test_count_dict[l] if l in test_count_dict else 0.0 for l in label] 218 | 219 | difference_pd = pd.DataFrame( 220 | { 221 | "Intent": label, 222 | "% of Train": diff[:, 0], 223 | "% of Test": diff[:, 1], 224 | "Absolute Difference %": diff[:, 2], 225 | "Train Examples": tr_cnt, 226 | "Test Examples": te_cnt, 227 | "Test Precision %": precision, 228 | "Test Recall %": recall, 229 | "Test F1 %": f1, 230 | } 231 | ) 232 | 233 | if not difference_pd[difference_pd["Absolute Difference %"] > 0.001].empty: 234 | table_for_display = difference_pd[ 235 | difference_pd["Absolute Difference %"] > 0.001 236 | ].sort_values(by=["Absolute Difference %"], ascending=False) 237 | table_for_display = table_for_display.style.applymap( 238 | _train_test_coloring, subset=pd.IndexSlice[:, ["Absolute Difference %"]] 239 | ) 240 | display(table_for_display) 241 | display(Markdown("\n")) 242 | display(Markdown("Distribution Mismatch Color Code")) 243 | display(Markdown(" Red - Severe ")) 244 | display(Markdown(" Blue - Caution ")) 245 | display(Markdown(" Green - Good ")) 246 | 247 | if js >= 0: 248 | js = np.round(js, 2) * 100 249 | display( 250 | Markdown( 251 | "### Data Distribution Divergence Test vs Train \ 252 | {}%".format( 253 | js 254 | ) 255 | ) 256 | ) 257 | display(Markdown("**Note** Metric used is Jensen Shannon Distance")) 258 | 259 | if missing_label: 260 | display(Markdown("### Missing Intents in Test Data")) 261 | missing_label_pd = pd.DataFrame( 262 | missing_label, columns=["Missing Intents in Test Set "] 263 | ) 264 | missing_label_pd.index = np.arange(1, len(missing_label_pd) + 1) 265 | display(missing_label_pd) 266 | 267 | display(Markdown("### Test Data Example Length")) 268 | condition1 = ( 269 | train_test_length_comparison_pd["Absolute Difference"] 270 | / train_test_length_comparison_pd["Train"] 271 | > 0.3 272 | ) 273 | condition2 = train_test_length_comparison_pd["Absolute Difference"] > 3 274 | 275 | length_comparison_pd = train_test_length_comparison_pd[condition1 & condition2] 276 | 277 | if not length_comparison_pd.empty: 278 | display( 279 | Markdown( 280 | "Divergence found in average length of user examples in test vs training data" 281 | ) 282 | ) 283 | length_comparison_pd.index = np.arange(1, len(length_comparison_pd) + 1) 284 | display(length_comparison_pd.round(2)) 285 | else: 286 | display(Markdown("Average length of user examples is comparable")) 287 | 288 | if train_vocab and test_vocab: 289 | display(Markdown("### Vocabulary Size Test vs Train")) 290 | oov_vocab_percentage = ( 291 | (len(test_vocab) - len(train_vocab.intersection(test_vocab))) 292 | / len(test_vocab) 293 | * 100 294 | ) 295 | 296 | vocab_df = pd.DataFrame( 297 | data={ 298 | "Train Vocabulary Size": [len(train_vocab)], 299 | "Test Vocabulary Size": [len(test_vocab)], 300 | "% Test Set Vocabulary not found in Train": [oov_vocab_percentage], 301 | } 302 | ) 303 | vocab_df.index = np.arange(1, len(vocab_df) + 1) 304 | display(vocab_df.round(2)) 305 | 306 | display(Markdown(" ")) 307 | -------------------------------------------------------------------------------- /assistant_skill_analysis/resources/nl/stopwords: -------------------------------------------------------------------------------- 1 | # this is adapted from https://github.com/stopwords-iso/stopwords-nl 2 | aan 3 | aangaande 4 | aangezien 5 | achter 6 | achterna 7 | afgelopen 8 | al 9 | aldaar 10 | aldus 11 | alhoewel 12 | alias 13 | alle 14 | allebei 15 | alleen 16 | alsnog 17 | altijd 18 | altoos 19 | ander 20 | andere 21 | anders 22 | anderszins 23 | behalve 24 | behoudens 25 | beide 26 | beiden 27 | ben 28 | beneden 29 | bent 30 | bepaald 31 | betreffende 32 | bij 33 | binnen 34 | binnenin 35 | boven 36 | bovenal 37 | bovendien 38 | bovengenoemd 39 | bovenstaand 40 | bovenvermeld 41 | buiten 42 | daar 43 | daarheen 44 | daarin 45 | daarna 46 | daarnet 47 | daarom 48 | daarop 49 | daarvanlangs 50 | dan 51 | dat 52 | de 53 | die 54 | dikwijls 55 | dit 56 | door 57 | doorgaand 58 | dus 59 | echter 60 | eer 61 | eerdat 62 | eerder 63 | eerlang 64 | eerst 65 | elk 66 | elke 67 | en 68 | enig 69 | enigszins 70 | enkel 71 | er 72 | erdoor 73 | even 74 | eveneens 75 | evenwel 76 | gauw 77 | gedurende 78 | geen 79 | gehad 80 | gekund 81 | geleden 82 | gelijk 83 | gemoeten 84 | gemogen 85 | geweest 86 | gewoon 87 | gewoonweg 88 | haar 89 | had 90 | hadden 91 | hare 92 | heb 93 | hebben 94 | hebt 95 | heeft 96 | hem 97 | hen 98 | het 99 | hierbeneden 100 | hierboven 101 | hij 102 | hoe 103 | hoewel 104 | hun 105 | hunne 106 | ik 107 | ikzelf 108 | in 109 | inmiddels 110 | inzake 111 | is 112 | jezelf 113 | jij 114 | jijzelf 115 | jou 116 | jouw 117 | jouwe 118 | juist 119 | jullie 120 | kan 121 | klaar 122 | kon 123 | konden 124 | krachtens 125 | kunnen 126 | kunt 127 | later 128 | liever 129 | maar 130 | mag 131 | meer 132 | met 133 | mezelf 134 | mij 135 | mijn 136 | mijnent 137 | mijner 138 | mijzelf 139 | misschien 140 | mocht 141 | mochten 142 | moest 143 | moesten 144 | moet 145 | moeten 146 | mogen 147 | na 148 | naar 149 | nadat 150 | net 151 | niet 152 | noch 153 | nog 154 | nogal 155 | nu 156 | of 157 | ofschoon 158 | om 159 | omdat 160 | omhoog 161 | omlaag 162 | omstreeks 163 | omtrent 164 | omver 165 | onder 166 | ondertussen 167 | ongeveer 168 | ons 169 | onszelf 170 | onze 171 | ook 172 | op 173 | opnieuw 174 | opzij 175 | over 176 | overeind 177 | overigens 178 | pas 179 | precies 180 | reeds 181 | rond 182 | rondom 183 | sedert 184 | sinds 185 | sindsdien 186 | slechts 187 | sommige 188 | spoedig 189 | steeds 190 | tamelijk 191 | tenzij 192 | terwijl 193 | thans 194 | tijdens 195 | toch 196 | toen 197 | toenmaals 198 | toenmalig 199 | tot 200 | totdat 201 | tussen 202 | uit 203 | uitgezonderd 204 | vaakwat 205 | van 206 | vandaan 207 | vanuit 208 | vanwege 209 | veeleer 210 | verder 211 | vervolgens 212 | vol 213 | volgens 214 | voor 215 | vooraf 216 | vooral 217 | vooralsnog 218 | voorbij 219 | voordat 220 | voordezen 221 | voordien 222 | voorheen 223 | voorop 224 | vooruit 225 | vrij 226 | vroeg 227 | waar 228 | waarom 229 | wanneer 230 | want 231 | waren 232 | was 233 | weer 234 | weg 235 | wegens 236 | wel 237 | weldra 238 | welk 239 | welke 240 | wie 241 | wiens 242 | wier 243 | wij 244 | wijzelf 245 | zal 246 | ze 247 | zelfs 248 | zichzelf 249 | zij 250 | zijn 251 | zijne 252 | zo 253 | zodra 254 | zonder 255 | zou 256 | zouden 257 | zowat 258 | zulke 259 | zullen 260 | zult 261 | aan 262 | af 263 | al 264 | alles 265 | als 266 | altijd 267 | andere 268 | ben 269 | bij 270 | daar 271 | dan 272 | dat 273 | de 274 | der 275 | deze 276 | die 277 | dit 278 | doch 279 | doen 280 | door 281 | dus 282 | een 283 | eens 284 | en 285 | er 286 | ge 287 | geen 288 | geweest 289 | haar 290 | had 291 | heb 292 | hebben 293 | heeft 294 | hem 295 | het 296 | hier 297 | hij 298 | hoe 299 | hun 300 | iemand 301 | iets 302 | ik 303 | in 304 | is 305 | ja 306 | je 307 | kan 308 | kon 309 | kunnen 310 | maar 311 | me 312 | meer 313 | men 314 | met 315 | mij 316 | mijn 317 | moet 318 | na 319 | naar 320 | niet 321 | niets 322 | nog 323 | nu 324 | of 325 | om 326 | omdat 327 | ons 328 | ook 329 | op 330 | over 331 | reeds 332 | te 333 | tegen 334 | toch 335 | toen 336 | tot 337 | u 338 | uit 339 | uw 340 | van 341 | veel 342 | voor 343 | want 344 | waren 345 | was 346 | wat 347 | we 348 | wel 349 | werd 350 | wezen 351 | wie 352 | wij 353 | wil 354 | worden 355 | zal 356 | ze 357 | zei 358 | zelf 359 | zich 360 | zij 361 | zijn 362 | zo 363 | zonder 364 | zou 365 | Footer 366 | de 367 | en 368 | van 369 | ik 370 | te 371 | dat 372 | die 373 | in 374 | een 375 | hij 376 | het 377 | niet 378 | zijn 379 | is 380 | was 381 | op 382 | aan 383 | met 384 | als 385 | voor 386 | had 387 | er 388 | maar 389 | om 390 | hem 391 | dan 392 | zou 393 | of 394 | wat 395 | mijn 396 | men 397 | dit 398 | zo 399 | door 400 | over 401 | ze 402 | zich 403 | bij 404 | ook 405 | tot 406 | je 407 | mij 408 | uit 409 | der 410 | daar 411 | haar 412 | naar 413 | heb 414 | hoe 415 | heeft 416 | hebben 417 | deze 418 | u 419 | want 420 | nog 421 | zal 422 | me 423 | zij 424 | nu 425 | ge 426 | geen 427 | omdat 428 | iets 429 | worden 430 | toch 431 | al 432 | waren 433 | veel 434 | meer 435 | doen 436 | toen 437 | moet 438 | ben 439 | zonder 440 | kan 441 | hun 442 | dus 443 | alles 444 | onder 445 | ja 446 | eens 447 | hier 448 | wie 449 | werd 450 | altijd 451 | doch 452 | wordt 453 | wezen 454 | kunnen 455 | ons 456 | zelf 457 | tegen 458 | na 459 | reeds 460 | wil 461 | kon 462 | niets 463 | uw 464 | iemand 465 | geweest 466 | andere 467 | aan 468 | achte 469 | achter 470 | af 471 | al 472 | alle 473 | alleen 474 | alles 475 | als 476 | ander 477 | anders 478 | beetje 479 | behalve 480 | beide 481 | beiden 482 | ben 483 | beneden 484 | bent 485 | bij 486 | bijna 487 | bijv 488 | blijkbaar 489 | blijken 490 | boven 491 | bv 492 | daar 493 | daardoor 494 | daarin 495 | daarna 496 | daarom 497 | daaruit 498 | dan 499 | dat 500 | de 501 | deden 502 | deed 503 | derde 504 | derhalve 505 | dertig 506 | deze 507 | dhr 508 | die 509 | dit 510 | doe 511 | doen 512 | doet 513 | door 514 | drie 515 | duizend 516 | echter 517 | een 518 | eens 519 | eerst 520 | eerste 521 | eigen 522 | eigenlijk 523 | elk 524 | elke 525 | en 526 | enige 527 | er 528 | erg 529 | ergens 530 | etc 531 | etcetera 532 | even 533 | geen 534 | genoeg 535 | geweest 536 | haar 537 | haarzelf 538 | had 539 | hadden 540 | heb 541 | hebben 542 | hebt 543 | hedden 544 | heeft 545 | heel 546 | hem 547 | hemzelf 548 | hen 549 | het 550 | hetzelfde 551 | hier 552 | hierin 553 | hierna 554 | hierom 555 | hij 556 | hijzelf 557 | hoe 558 | honderd 559 | hun 560 | ieder 561 | iedere 562 | iedereen 563 | iemand 564 | iets 565 | ik 566 | in 567 | inderdaad 568 | intussen 569 | is 570 | ja 571 | je 572 | jij 573 | jijzelf 574 | jou 575 | jouw 576 | jullie 577 | kan 578 | kon 579 | konden 580 | kun 581 | kunnen 582 | kunt 583 | laatst 584 | later 585 | lijken 586 | lijkt 587 | maak 588 | maakt 589 | maakte 590 | maakten 591 | maar 592 | mag 593 | maken 594 | me 595 | meer 596 | meest 597 | meestal 598 | men 599 | met 600 | mevr 601 | mij 602 | mijn 603 | minder 604 | miss 605 | misschien 606 | missen 607 | mits 608 | mocht 609 | mochten 610 | moest 611 | moesten 612 | moet 613 | moeten 614 | mogen 615 | mr 616 | mrs 617 | mw 618 | na 619 | naar 620 | nam 621 | namelijk 622 | nee 623 | neem 624 | negen 625 | nemen 626 | nergens 627 | niemand 628 | niet 629 | niets 630 | niks 631 | noch 632 | nochtans 633 | nog 634 | nooit 635 | nu 636 | nv 637 | of 638 | om 639 | omdat 640 | ondanks 641 | onder 642 | ondertussen 643 | ons 644 | onze 645 | onzeker 646 | ooit 647 | ook 648 | op 649 | over 650 | overal 651 | overige 652 | paar 653 | per 654 | recent 655 | redelijk 656 | samen 657 | sinds 658 | steeds 659 | te 660 | tegen 661 | tegenover 662 | thans 663 | tien 664 | tiende 665 | tijdens 666 | tja 667 | toch 668 | toe 669 | tot 670 | totdat 671 | tussen 672 | twee 673 | tweede 674 | u 675 | uit 676 | uw 677 | vaak 678 | van 679 | vanaf 680 | veel 681 | veertig 682 | verder 683 | verscheidene 684 | verschillende 685 | via 686 | vier 687 | vierde 688 | vijf 689 | vijfde 690 | vijftig 691 | volgend 692 | volgens 693 | voor 694 | voordat 695 | voorts 696 | waar 697 | waarom 698 | waarschijnlijk 699 | wanneer 700 | waren 701 | was 702 | wat 703 | we 704 | wederom 705 | weer 706 | weinig 707 | wel 708 | welk 709 | welke 710 | werd 711 | werden 712 | werder 713 | whatever 714 | wie 715 | wij 716 | wijzelf 717 | wil 718 | wilden 719 | willen 720 | word 721 | worden 722 | wordt 723 | zal 724 | ze 725 | zei 726 | zeker 727 | zelf 728 | zelfde 729 | zes 730 | zeven 731 | zich 732 | zij 733 | zijn 734 | zijzelf 735 | zo 736 | zoals 737 | zodat 738 | zou 739 | zouden 740 | zulk 741 | zullen 742 | aan 743 | af 744 | al 745 | als 746 | bij 747 | dan 748 | dat 749 | die 750 | dit 751 | een 752 | en 753 | er 754 | had 755 | heb 756 | hem 757 | het 758 | hij 759 | hoe 760 | hun 761 | ik 762 | in 763 | is 764 | je 765 | kan 766 | me 767 | men 768 | met 769 | mij 770 | nog 771 | nu 772 | of 773 | ons 774 | ook 775 | te 776 | tot 777 | uit 778 | van 779 | was 780 | wat 781 | we 782 | wel 783 | wij 784 | zal 785 | ze 786 | zei 787 | zij 788 | zo 789 | zou 790 | de 791 | en 792 | van 793 | ik 794 | te 795 | dat 796 | die 797 | in 798 | een 799 | hij 800 | het 801 | niet 802 | zijn 803 | is 804 | was 805 | op 806 | aan 807 | met 808 | als 809 | voor 810 | had 811 | er 812 | maar 813 | om 814 | hem 815 | dan 816 | zou 817 | of 818 | wat 819 | mijn 820 | men 821 | dit 822 | zo 823 | door 824 | over 825 | ze 826 | zich 827 | bij 828 | ook 829 | tot 830 | je 831 | mij 832 | uit 833 | der 834 | daar 835 | haar 836 | naar 837 | heb 838 | hoe 839 | heeft 840 | hebben 841 | deze 842 | u 843 | want 844 | nog 845 | zal 846 | me 847 | zij 848 | nu 849 | ge 850 | geen 851 | omdat 852 | iets 853 | worden 854 | toch 855 | al 856 | waren 857 | veel 858 | meer 859 | doen 860 | toen 861 | moet 862 | ben 863 | zonder 864 | kan 865 | hun 866 | dus 867 | alles 868 | onder 869 | ja 870 | eens 871 | hier 872 | wie 873 | werd 874 | altijd 875 | doch 876 | wordt 877 | wezen 878 | kunnen 879 | ons 880 | zelf 881 | tegen 882 | na 883 | reeds 884 | wil 885 | kon 886 | niets 887 | uw 888 | iemand 889 | geweest 890 | andere 891 | aan 892 | aangaande 893 | aangezien 894 | achte 895 | achter 896 | achterna 897 | af 898 | afgelopen 899 | al 900 | aldaar 901 | aldus 902 | alhoewel 903 | alias 904 | alle 905 | allebei 906 | alleen 907 | alles 908 | als 909 | alsnog 910 | altijd 911 | altoos 912 | ander 913 | andere 914 | anders 915 | anderszins 916 | beetje 917 | behalve 918 | behoudens 919 | beide 920 | beiden 921 | ben 922 | beneden 923 | bent 924 | bepaald 925 | betreffende 926 | bij 927 | bijna 928 | bijv 929 | binnen 930 | binnenin 931 | blijkbaar 932 | blijken 933 | boven 934 | bovenal 935 | bovendien 936 | bovengenoemd 937 | bovenstaand 938 | bovenvermeld 939 | buiten 940 | bv 941 | daar 942 | daardoor 943 | daarheen 944 | daarin 945 | daarna 946 | daarnet 947 | daarom 948 | daarop 949 | daaruit 950 | daarvanlangs 951 | dan 952 | dat 953 | de 954 | deden 955 | deed 956 | der 957 | derde 958 | derhalve 959 | dertig 960 | deze 961 | dhr 962 | die 963 | dikwijls 964 | dit 965 | doch 966 | doe 967 | doen 968 | doet 969 | door 970 | doorgaand 971 | drie 972 | duizend 973 | dus 974 | echter 975 | een 976 | eens 977 | eer 978 | eerdat 979 | eerder 980 | eerlang 981 | eerst 982 | eerste 983 | eigen 984 | eigenlijk 985 | elk 986 | elke 987 | en 988 | enig 989 | enige 990 | enigszins 991 | enkel 992 | er 993 | erdoor 994 | erg 995 | ergens 996 | etc 997 | etcetera 998 | even 999 | eveneens 1000 | evenwel 1001 | gauw 1002 | ge 1003 | gedurende 1004 | geen 1005 | gehad 1006 | gekund 1007 | geleden 1008 | gelijk 1009 | gemoeten 1010 | gemogen 1011 | genoeg 1012 | geweest 1013 | gewoon 1014 | gewoonweg 1015 | haar 1016 | haarzelf 1017 | had 1018 | hadden 1019 | hare 1020 | heb 1021 | hebben 1022 | hebt 1023 | hedden 1024 | heeft 1025 | heel 1026 | hem 1027 | hemzelf 1028 | hen 1029 | het 1030 | hetzelfde 1031 | hier 1032 | hierbeneden 1033 | hierboven 1034 | hierin 1035 | hierna 1036 | hierom 1037 | hij 1038 | hijzelf 1039 | hoe 1040 | hoewel 1041 | honderd 1042 | hun 1043 | hunne 1044 | ieder 1045 | iedere 1046 | iedereen 1047 | iemand 1048 | iets 1049 | ik 1050 | ikzelf 1051 | in 1052 | inderdaad 1053 | inmiddels 1054 | intussen 1055 | inzake 1056 | is 1057 | ja 1058 | je 1059 | jezelf 1060 | jij 1061 | jijzelf 1062 | jou 1063 | jouw 1064 | jouwe 1065 | juist 1066 | jullie 1067 | kan 1068 | klaar 1069 | kon 1070 | konden 1071 | krachtens 1072 | kun 1073 | kunnen 1074 | kunt 1075 | laatst 1076 | later 1077 | liever 1078 | lijken 1079 | lijkt 1080 | maak 1081 | maakt 1082 | maakte 1083 | maakten 1084 | maar 1085 | mag 1086 | maken 1087 | me 1088 | meer 1089 | meest 1090 | meestal 1091 | men 1092 | met 1093 | mevr 1094 | mezelf 1095 | mij 1096 | mijn 1097 | mijnent 1098 | mijner 1099 | mijzelf 1100 | minder 1101 | miss 1102 | misschien 1103 | missen 1104 | mits 1105 | mocht 1106 | mochten 1107 | moest 1108 | moesten 1109 | moet 1110 | moeten 1111 | mogen 1112 | mr 1113 | mrs 1114 | mw 1115 | na 1116 | naar 1117 | nadat 1118 | nam 1119 | namelijk 1120 | nee 1121 | neem 1122 | negen 1123 | nemen 1124 | nergens 1125 | net 1126 | niemand 1127 | niet 1128 | niets 1129 | niks 1130 | noch 1131 | nochtans 1132 | nog 1133 | nogal 1134 | nooit 1135 | nu 1136 | nv 1137 | of 1138 | ofschoon 1139 | om 1140 | omdat 1141 | omhoog 1142 | omlaag 1143 | omstreeks 1144 | omtrent 1145 | omver 1146 | ondanks 1147 | onder 1148 | ondertussen 1149 | ongeveer 1150 | ons 1151 | onszelf 1152 | onze 1153 | onzeker 1154 | ooit 1155 | ook 1156 | op 1157 | opnieuw 1158 | opzij 1159 | over 1160 | overal 1161 | overeind 1162 | overige 1163 | overigens 1164 | paar 1165 | pas 1166 | per 1167 | precies 1168 | recent 1169 | redelijk 1170 | reeds 1171 | rond 1172 | rondom 1173 | samen 1174 | sedert 1175 | sinds 1176 | sindsdien 1177 | slechts 1178 | sommige 1179 | spoedig 1180 | steeds 1181 | tamelijk 1182 | te 1183 | tegen 1184 | tegenover 1185 | tenzij 1186 | terwijl 1187 | thans 1188 | tien 1189 | tiende 1190 | tijdens 1191 | tja 1192 | toch 1193 | toe 1194 | toen 1195 | toenmaals 1196 | toenmalig 1197 | tot 1198 | totdat 1199 | tussen 1200 | twee 1201 | tweede 1202 | u 1203 | uit 1204 | uitgezonderd 1205 | uw 1206 | vaak 1207 | vaakwat 1208 | van 1209 | vanaf 1210 | vandaan 1211 | vanuit 1212 | vanwege 1213 | veel 1214 | veeleer 1215 | veertig 1216 | verder 1217 | verscheidene 1218 | verschillende 1219 | vervolgens 1220 | via 1221 | vier 1222 | vierde 1223 | vijf 1224 | vijfde 1225 | vijftig 1226 | vol 1227 | volgend 1228 | volgens 1229 | voor 1230 | vooraf 1231 | vooral 1232 | vooralsnog 1233 | voorbij 1234 | voordat 1235 | voordezen 1236 | voordien 1237 | voorheen 1238 | voorop 1239 | voorts 1240 | vooruit 1241 | vrij 1242 | vroeg 1243 | waar 1244 | waarom 1245 | waarschijnlijk 1246 | wanneer 1247 | want 1248 | waren 1249 | was 1250 | wat 1251 | we 1252 | wederom 1253 | weer 1254 | weg 1255 | wegens 1256 | weinig 1257 | wel 1258 | weldra 1259 | welk 1260 | welke 1261 | werd 1262 | werden 1263 | werder 1264 | wezen 1265 | whatever 1266 | wie 1267 | wiens 1268 | wier 1269 | wij 1270 | wijzelf 1271 | wil 1272 | wilden 1273 | willen 1274 | word 1275 | worden 1276 | wordt 1277 | zal 1278 | ze 1279 | zei 1280 | zeker 1281 | zelf 1282 | zelfde 1283 | zelfs 1284 | zes 1285 | zeven 1286 | zich 1287 | zichzelf 1288 | zij 1289 | zijn 1290 | zijne 1291 | zijzelf 1292 | zo 1293 | zoals 1294 | zodat 1295 | zodra 1296 | zonder 1297 | zou 1298 | zouden 1299 | zowat 1300 | zulk 1301 | zulke 1302 | zullen 1303 | zult -------------------------------------------------------------------------------- /assistant_skill_analysis/highlighting/highlighter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import numpy as np 4 | import pandas as pd 5 | from IPython.display import display, Markdown 6 | 7 | import seaborn as sns 8 | import matplotlib.pyplot as plt 9 | 10 | import ibm_watson 11 | 12 | from ..utils import skills_util 13 | from ..inferencing import inferencer 14 | 15 | MAX_TOKEN_LENGTH = 20 16 | NGRAM_RANGE = [1] 17 | 18 | 19 | def get_highlights_in_batch_multi_thread( 20 | conversation, 21 | full_results, 22 | output_folder, 23 | confidence_threshold, 24 | show_worst_k, 25 | lang_util, 26 | skill_id=None, 27 | assistant_id=None, 28 | intent_to_action_mapping=None, 29 | environment_id=None, 30 | ): 31 | """ 32 | Given the prediction result, rank prediction results from worst to best 33 | & analyze the top k worst results. 34 | Term level highlighting on the worst results shows the sensitivity of terms in utterance 35 | :param conversation: conversation object produced by watson api 36 | :param skill_id: skill id 37 | :param full_results: prediction result showing the ranked list of intents by confidence scores 38 | :param output_folder: the output folder where the highlighting images will be saved 39 | :param confidence_threshold: the confidence threshold for offtopic detection 40 | :param show_worst_k: the top worst k results based on heuristics 41 | :param assistant_id: 42 | :param intent_to_action_mapping: 43 | :param environment_id: the environment id 44 | :return: 45 | """ 46 | if isinstance(conversation, ibm_watson.AssistantV1): 47 | assert skill_id is not None 48 | else: 49 | assert assistant_id is not None 50 | wrong_examples_sorted = _filter_results( 51 | full_results, confidence_threshold, lang_util 52 | ) 53 | display( 54 | Markdown( 55 | "### Identified {} problematic utterances ".format( 56 | len(wrong_examples_sorted) 57 | ) 58 | ) 59 | ) 60 | display(Markdown(" ")) 61 | 62 | wrong_examples_sorted = wrong_examples_sorted[:show_worst_k] 63 | 64 | ( 65 | adversarial_results, 66 | adversarial_span_dict, 67 | ) = _adversarial_examples_multi_thread_inference( 68 | wrong_examples_sorted=wrong_examples_sorted, 69 | conversation=conversation, 70 | skill_id=skill_id, 71 | assistant_id=assistant_id, 72 | intent_to_action_mapping=intent_to_action_mapping, 73 | environment_id=environment_id, 74 | ) 75 | 76 | if not adversarial_results.empty: 77 | 78 | display(Markdown("{} examples are shown below:".format(show_worst_k))) 79 | for original_example in wrong_examples_sorted: 80 | if not original_example[2]: 81 | label = skills_util.OFFTOPIC_LABEL 82 | else: 83 | label = original_example[2] 84 | label_idx = label + "\t" + str(original_example[0]) 85 | adversarial_result_subset = adversarial_results[ 86 | adversarial_results["correct_intent"] == label_idx 87 | ] 88 | highlight = _highlight_scoring( 89 | original_example, 90 | adversarial_result_subset, 91 | adversarial_span_dict, 92 | lang_util, 93 | ) 94 | _plot_highlight(highlight, original_example, output_folder, lang_util) 95 | 96 | 97 | def _filter_results(full_results, confidence_threshold, lang_util): 98 | """ 99 | Given the full predicted results and confidence threshold, 100 | this function returns a ranked list of the mis-classified examples 101 | :param full_results: 102 | :param confidence_threshold: 103 | :return highlighting_candidates_sorted 104 | """ 105 | highlighting_candidates = list() 106 | for idx in range(len(full_results)): 107 | item = full_results.iloc[idx] 108 | results_intent_list = [predict["intent"] for predict in item["top_predicts"]] 109 | if not item["top_predicts"]: 110 | result_dict = {} 111 | elif ( 112 | isinstance(item["top_predicts"], list) 113 | and "intent" in item["top_predicts"][0] 114 | ): 115 | result_dict = { 116 | pred["intent"]: pred["confidence"] for pred in item["top_predicts"] 117 | } 118 | else: 119 | result_dict = dict(item["top_predicts"]) 120 | if item["correct_intent"] in results_intent_list: 121 | reference_position = results_intent_list.index(item["correct_intent"]) 122 | else: 123 | reference_position = len(results_intent_list) 124 | 125 | rank_score = 0 126 | # for off-topic examples, rank score = off-topic confidence score - confidence threshold 127 | if item["correct_intent"] == skills_util.OFFTOPIC_LABEL: 128 | if item["top_confidence"] > confidence_threshold: 129 | rank_score = item["top_confidence"] - confidence_threshold 130 | 131 | highlighting_candidates.append( 132 | ( 133 | idx, 134 | item["utterance"], 135 | None, 136 | item["top_intent"], 137 | item["top_confidence"], 138 | rank_score, 139 | reference_position, 140 | ) 141 | ) 142 | else: 143 | if (item["top_intent"] != item["correct_intent"]) or ( 144 | item["top_confidence"] <= confidence_threshold 145 | ): 146 | if item["top_intent"] != item["correct_intent"]: 147 | # for incorrectly predicted examples, if the correct intent is not in top 10 148 | # rank score = confidence of the predicted intent 149 | if item["correct_intent"] not in result_dict: 150 | rank_score = item["top_confidence"] 151 | else: 152 | # for incorrectly predicted examples, if the correct intent is in top 10, 153 | # rank score = confidence of predicted intent - confidence of correct intent 154 | rank_score = ( 155 | item["top_confidence"] - result_dict[item["correct_intent"]] 156 | ) 157 | elif item["top_confidence"] <= confidence_threshold: 158 | # for correctly predicted examples, if the predicted confidence is less than 159 | # confidence threshold, rank score = confidence threshold - predicted confidence 160 | rank_score = confidence_threshold - item["top_confidence"] 161 | highlighting_candidates.append( 162 | ( 163 | idx, 164 | item["utterance"], 165 | item["correct_intent"], 166 | item["top_intent"], 167 | item["top_confidence"], 168 | rank_score, 169 | reference_position, 170 | ) 171 | ) 172 | 173 | highlighting_candidates_sorted = sorted( 174 | highlighting_candidates, key=lambda x: x[5], reverse=True 175 | ) 176 | highlighting_candidates_sorted = [ 177 | candidate 178 | for candidate in highlighting_candidates_sorted 179 | if len(lang_util.tokenize(candidate[1])) < MAX_TOKEN_LENGTH 180 | ] 181 | 182 | return highlighting_candidates_sorted 183 | 184 | 185 | def _plot_highlight(highlight, original_example, output_folder, lang_util): 186 | """ 187 | Plot the highlighting score into a plot and store the plot in the output folder 188 | :param highlight: 189 | :param original_example: 190 | :param output_folder: 191 | """ 192 | if not original_example[2]: 193 | label = skills_util.OFFTOPIC_LABEL 194 | else: 195 | label = original_example[2] 196 | fig, ax = plt.subplots(figsize=(2, 5)) 197 | ax = sns.heatmap( 198 | [[i] for i in highlight.tolist()], 199 | yticklabels=lang_util.tokenize(original_example[1]), 200 | xticklabels=["Sensitivity to intent: " + '"' + label + '"'], 201 | cbar_kws={"orientation": "vertical"}, 202 | linewidths=0, 203 | square=False, 204 | cmap="Blues", 205 | ) 206 | 207 | if output_folder: 208 | conf_str = "%.3f" % (original_example[4]) 209 | if original_example[2]: 210 | filename = ( 211 | str(original_example[0]) 212 | + "_groundtruth_" 213 | + original_example[2] 214 | + "_prediction_" 215 | + original_example[3] 216 | + "_confidence_" 217 | + conf_str 218 | + ".png" 219 | ) 220 | else: 221 | filename = ( 222 | str(original_example[0]) 223 | + "_groundtruth_offtopic_prediction_" 224 | + original_example[3] 225 | + "_confidence_" 226 | + conf_str 227 | + ".png" 228 | ) 229 | 230 | save_path = os.path.join(output_folder, filename) 231 | plt.savefig(os.path.join(save_path), bbox_inches="tight") 232 | 233 | table = list() 234 | table.append(["Test Set Index", original_example[0]]) 235 | table.append(["Utterance", original_example[1]]) 236 | table.append( 237 | [ 238 | "Actual Intent", 239 | original_example[2] 240 | if (original_example[2]) 241 | else skills_util.OFFTOPIC_LABEL, 242 | ] 243 | ) 244 | table.append(["Predicted Intent", original_example[3]]) 245 | table.append(["Confidence", original_example[4]]) 246 | with pd.option_context("max_colwidth", 250): 247 | df = pd.DataFrame(data=table, columns=["Characteristic", "Value"]) 248 | df.index = np.arange(1, len(df) + 1) 249 | display(df) 250 | plt.show() 251 | 252 | 253 | def _adversarial_examples_multi_thread_inference( 254 | wrong_examples_sorted, 255 | conversation, 256 | skill_id=None, 257 | assistant_id=None, 258 | intent_to_action_mapping=None, 259 | environment_id=None, 260 | ): 261 | """ 262 | Perform multi threaded inference on all the adversarial examples 263 | :param wrong_examples_sorted: 264 | :param conversation: 265 | :param skill_id: 266 | :param assistant_id: 267 | :param intent_to_action_mapping: 268 | :param environment_id: 269 | """ 270 | if isinstance(conversation, ibm_watson.AssistantV1): 271 | assert skill_id is not None 272 | else: 273 | assert assistant_id is not None 274 | all_adversarial_examples = list() 275 | # the adversarial labels will be label\tidx for later regrouping purposes 276 | all_adversarial_label_idx = list() 277 | # map the adversarial example: span of adversarial 278 | adversarial_span_dict = dict() 279 | for original_example in wrong_examples_sorted: 280 | 281 | adversarial_examples, adversarial_span = _generate_adversarial_examples( 282 | original_example[1], original_example[0] 283 | ) 284 | 285 | if not original_example[2]: 286 | label = skills_util.OFFTOPIC_LABEL 287 | else: 288 | label = original_example[2] 289 | adversarial_label = label + "\t" + str(original_example[0]) 290 | 291 | all_adversarial_examples.extend(adversarial_examples) 292 | all_adversarial_label_idx.extend( 293 | [adversarial_label] * len(adversarial_examples) 294 | ) 295 | adversarial_span_dict.update(adversarial_span) 296 | 297 | adversarial_test_data_frame = pd.DataFrame( 298 | {"utterance": all_adversarial_examples, "intent": all_adversarial_label_idx} 299 | ) 300 | adversarial_results = inferencer.inference( 301 | conversation=conversation, 302 | test_data=adversarial_test_data_frame, 303 | max_thread=min(4, os.cpu_count() if os.cpu_count() else 1), 304 | skill_id=skill_id, 305 | assistant_id=assistant_id, 306 | intent_to_action_mapping=intent_to_action_mapping, 307 | environment_id=environment_id, 308 | ) 309 | display(Markdown(" ")) 310 | return adversarial_results, adversarial_span_dict 311 | 312 | 313 | def _generate_adversarial_examples(utt, original_idx): 314 | """ 315 | Generate adversarial examples by removing single tokens 316 | :param utt: string, utterance for generation of adversarial examples 317 | :param original_idx: the idx of the example in the original input data 318 | :returns 319 | adversarial_examples: list of strings, list of adversarial examples 320 | adversarial_span: dictionary of adversarial examples and the token span of the removed token 321 | """ 322 | adversarial_examples = [] 323 | adversarial_span = dict() 324 | tokens = utt.split() 325 | for idx in range(len(tokens)): 326 | for ngram in NGRAM_RANGE: 327 | new_sent = " ".join(tokens[:idx] + tokens[idx + ngram :]) 328 | adversarial_examples.append(new_sent) 329 | adversarial_span[new_sent + "_" + str(original_idx)] = (idx, idx + ngram) 330 | return adversarial_examples, adversarial_span 331 | 332 | 333 | def _highlight_scoring( 334 | original_example, subset_adversarial_result, adversarial_span_dict, lang_util 335 | ): 336 | """ 337 | Calculate the highlighting score using classification results of adversarial examples 338 | :param original_example: 339 | :param subset_adversarial_result: 340 | :param adversarial_span_dict: 341 | """ 342 | original_utterance = " ".join(lang_util.tokenize(original_example[1])) 343 | original_idx = original_example[0] 344 | original_intent = original_example[3] 345 | original_confidence = original_example[4] 346 | original_position = original_example[6] 347 | tokens = original_utterance.split(" ") 348 | highlight = np.zeros(len(tokens), dtype="float32") 349 | for idx in range(len(subset_adversarial_result)): 350 | adversarial_example = subset_adversarial_result.iloc[idx] 351 | if not adversarial_example["top_predicts"]: 352 | continue 353 | 354 | predict_dict = dict() 355 | predict_intent_list = list() 356 | for prediction in adversarial_example["top_predicts"]: 357 | predict_dict[prediction["intent"]] = prediction["confidence"] 358 | predict_intent_list.append(prediction["intent"]) 359 | 360 | if original_intent in predict_dict: 361 | adversarial_position = list(predict_dict.keys()).index(original_intent) 362 | adversarial_confidence = predict_dict[original_intent] 363 | else: 364 | adversarial_position = len(list(predict_dict.keys())) 365 | adversarial_confidence = 0 366 | 367 | start, end = adversarial_span_dict[ 368 | adversarial_example["utterance"] + "_" + str(original_idx) 369 | ] 370 | 371 | highlight = _scoring_function( 372 | highlight, 373 | original_position, 374 | adversarial_position, 375 | original_confidence, 376 | adversarial_confidence, 377 | start, 378 | end, 379 | ) 380 | 381 | return highlight 382 | 383 | 384 | def _scoring_function( 385 | highlight, 386 | original_position, 387 | adversarial_position, 388 | original_confidence, 389 | adversarial_confidence, 390 | start_idx, 391 | end_idx, 392 | ): 393 | """ 394 | scoring function for highlighting of the interval start_idx:end_idx 395 | :param highlight: np.array of shape (n_tokens) 396 | :param original_position: ranking position of the target intent for the original sentence 397 | :param adversarial_position: ranking position of the target intent for the adversarial sentence 398 | :param original_confidence: confidence of the target intent for the original sentence 399 | :param adversarial_confidence: confidence of the target intent for the adversarial sentence 400 | :param start_idx: starting index of the adversarial mask 401 | :param end_idx: ending index of the adversarial mask 402 | :return: highlight: np.array of shape (n_tokens) 403 | """ 404 | # position difference accounts for the change in the position of the target intent among 405 | # the top 10 intents return by the message api 406 | position_difference = (1 / float(original_position + 1.0)) - ( 407 | 1 / float(adversarial_position + 1.0) 408 | ) 409 | 410 | # confidence difference accounts for the change in the confidence 411 | confidence_difference = original_confidence - adversarial_confidence 412 | 413 | ngram_size = end_idx - start_idx 414 | weight = math.pow(1.0 / ngram_size, 2.0) 415 | 416 | # highlight score for the interval of start_idx:end_idx is a weighted average of 417 | # the position difference and confidence difference 418 | weighted_difference = ( 419 | weight 420 | * ((0.2 * confidence_difference) + (0.8 * position_difference)) 421 | / ngram_size 422 | ) 423 | 424 | highlight[start_idx:end_idx] += weighted_difference 425 | 426 | return highlight 427 | -------------------------------------------------------------------------------- /assistant_skill_analysis/utils/skills_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import random 4 | import csv 5 | import re 6 | import getpass 7 | import nbformat 8 | import pandas as pd 9 | import numpy as np 10 | from nbconvert.preprocessors import ExecutePreprocessor 11 | import ibm_watson 12 | import codecs 13 | from ibm_cloud_sdk_core.authenticators import ( 14 | IAMAuthenticator, 15 | BasicAuthenticator, 16 | NoAuthAuthenticator, 17 | CloudPakForDataAuthenticator, 18 | BearerTokenAuthenticator, 19 | ) 20 | 21 | 22 | DEV_DATACENTER = ( 23 | "https://api.us-south.assistant.dev.watson.cloud.ibm.com", 24 | "https://iam.test.cloud.ibm.com/identity/token", 25 | ) 26 | DEFAULT_V1_API_VERSION = "2019-02-28" 27 | DEFAULT_V2_API_VERSION = "2021-11-27" 28 | DEFAULT_PROD_URL = "https://gateway.watsonplatform.net/assistant/api" 29 | DEFAULT_USERNAME = "apikey" 30 | STAGE_IAM_URL = "https://iam.stage1.bluemix.net/identity/token" 31 | DEFAULT_AUTHENTICATOR_URL = "https://iam.cloud.ibm.com/identity/token" 32 | 33 | OFFTOPIC_LABEL = "SYSTEM_OUT_OF_DOMAIN" 34 | 35 | OFFTOPIC_CONF_THRESHOLD = 0.2 36 | 37 | LABEL_FONT = {"family": "normal", "weight": "bold", "size": 17} 38 | 39 | TITLE_FONT = {"family": "normal", "weight": "bold", "size": 25} 40 | 41 | 42 | def stratified_sampling(workspace, sampling_percentage=0.8): 43 | """ 44 | Create a stratified sample of the workspace json 45 | & return a intent json acceptable in Assistant API 46 | 47 | :param workspace: json format output defined by Assistant API 48 | :param sampling_percentage: percentage of original to sample 49 | :return train_workspace_data: list of intents for train 50 | :return test_workspace_data: list of utterance,intent pairs for test 51 | """ 52 | train_workspace_data = list() 53 | test_workspace_data = list() 54 | for i in range(len(workspace["intents"])): 55 | intent = workspace["intents"][i] 56 | sampling_index = list(np.arange(len(intent["examples"]))) 57 | random.shuffle(sampling_index) 58 | # training set 59 | train_test_split_cutoff = int(sampling_percentage * len(sampling_index)) 60 | train_examples = [ 61 | intent["examples"][index] 62 | for index in sampling_index[:train_test_split_cutoff] 63 | ] 64 | train_workspace_data.append({"intent": workspace["intents"][i]["intent"]}) 65 | train_workspace_data[i].update({"description": "string"}) 66 | train_workspace_data[i].update({"examples": train_examples}) 67 | # test set 68 | test_examples = [ 69 | intent["examples"][index] 70 | for index in sampling_index[train_test_split_cutoff:] 71 | ] 72 | test_workspace_data.extend( 73 | [ 74 | utterances["text"] + "\t" + workspace["intents"][i]["intent"] 75 | for utterances in test_examples 76 | ] 77 | ) 78 | 79 | return train_workspace_data, test_workspace_data 80 | 81 | 82 | def create_workspace(conversation, intent_json=None): 83 | """ 84 | Create a workspace for testing purpose 85 | :param conversation: conversation object created by Watson Assistant api 86 | :param intent_json: nested json of utternance and intent pairs 87 | :return response: the workspace id and other metadata related to the new workspace 88 | """ 89 | response = conversation.create_workspace( 90 | name="test_workspace", 91 | description="", 92 | language="en", 93 | intents=intent_json, 94 | entities=[], 95 | counterexamples=[], 96 | metadata={}, 97 | ).get_result() 98 | return response 99 | 100 | 101 | def input_credentials(input_apikey=True, input_skill_id=True, input_assistant_id=False): 102 | """ 103 | Prompt user to enter apikey and skill id (workspace_id) 104 | """ 105 | apikey, skill_id, assistant_id = None, None, None 106 | if input_apikey: 107 | apikey = getpass.getpass("Please enter apikey: ") 108 | if input_skill_id: 109 | skill_id = getpass.getpass("Please enter skill-id (workspace_id): ") 110 | if input_assistant_id: 111 | assistant_id = getpass.getpass("Please enter assistant-id: ") 112 | return apikey, skill_id, assistant_id 113 | 114 | 115 | def retrieve_conversation( 116 | iam_apikey=None, 117 | url=DEFAULT_PROD_URL, 118 | api_version=None, 119 | username=DEFAULT_USERNAME, 120 | password=None, 121 | authenticator_url=DEFAULT_AUTHENTICATOR_URL, 122 | sdk_version="V1", 123 | cp4d_auth=False, 124 | bearer_token=None, 125 | ): 126 | """ 127 | Retrieve workspace from Assistant instance 128 | :param iam_apikey: 129 | :param url: 130 | :param api_version: 131 | :param username: 132 | :param password: 133 | :param sdk_version: V2 is needed for action workspaces 134 | :return workspace: workspace json 135 | """ 136 | assert sdk_version in ["V1", "V2"] 137 | if api_version is None: 138 | if sdk_version == "V1": 139 | api_version = DEFAULT_V1_API_VERSION 140 | else: 141 | api_version = DEFAULT_V2_API_VERSION 142 | if iam_apikey: 143 | authenticator = IAMAuthenticator(apikey=iam_apikey, url=authenticator_url) 144 | elif username and password: 145 | if cp4d_auth: 146 | authenticator = CloudPakForDataAuthenticator( 147 | username=username, password=password, url=authenticator_url 148 | ) 149 | else: 150 | authenticator = BasicAuthenticator(username=username, password=password) 151 | elif bearer_token is not None: 152 | authenticator = BearerTokenAuthenticator(bearer_token=bearer_token) 153 | else: 154 | authenticator = NoAuthAuthenticator() 155 | 156 | if sdk_version == "V1": 157 | conversation = ibm_watson.AssistantV1( 158 | authenticator=authenticator, version=api_version 159 | ) 160 | else: 161 | conversation = ibm_watson.AssistantV2( 162 | authenticator=authenticator, version=api_version 163 | ) 164 | 165 | conversation.set_service_url(url) 166 | 167 | return conversation 168 | 169 | 170 | def retrieve_workspace(skill_id, conversation, export_flag=True): 171 | """ 172 | retrieve the workspace based on the workspace id 173 | :param skill_id: 174 | :param conversation: 175 | :param export_flag: 176 | :return: workspace_dictionary 177 | """ 178 | ws_json = conversation.get_workspace(skill_id, export=export_flag) 179 | return ws_json.get_result() 180 | 181 | 182 | def parse_workspace_json(workspace_json): 183 | """ 184 | Parse workspace json and returns list of utterances, list of intents, and list of entities, and intent to action title mapping 185 | """ 186 | ws_type = workspace_json.get("type", "dialog") 187 | dialog_setting_action_flag = bool( 188 | workspace_json.get("dialog_settings", {}).get("actions", False) 189 | ) 190 | if dialog_setting_action_flag: 191 | ws_type = "action" 192 | utterances, intents, entities = [], [], [] 193 | raw_intent_name_to_action_title_mapping = None 194 | 195 | if ws_type == "dialog": 196 | 197 | for intent in workspace_json["intents"]: 198 | for example in intent["examples"]: 199 | utterances.append(example["text"]) 200 | intents.append(intent["intent"]) 201 | entities = workspace_json["entities"] 202 | 203 | else: 204 | # intent name to action title mapping for readability 205 | raw_intent_name_to_action_title_mapping = { 206 | action["condition"]["intent"]: action["title"] 207 | for action in workspace_json["workspace"]["actions"] 208 | if action.get("condition", {}).get("intent") 209 | } 210 | for intent in workspace_json["workspace"]["intents"]: 211 | action_title = raw_intent_name_to_action_title_mapping[intent["intent"]] 212 | for example in intent["examples"]: 213 | utterances.append(example["text"]) 214 | intents.append(action_title) 215 | entities = workspace_json["workspace"]["entities"] 216 | return utterances, intents, entities, raw_intent_name_to_action_title_mapping 217 | 218 | 219 | def extract_workspace_data(workspace, language_util): 220 | """ 221 | Extract relevant data and vocabulary 222 | :param workspace: workspace json, could be from an action skill or a dialog skill 223 | :param language_util: 224 | :return: workspace_pd, vocabulary, entities, intent name to action title mapping 225 | """ 226 | relevant_data = {"utterance": list(), "intent": list(), "tokens": list()} 227 | vocabulary = set() 228 | ( 229 | utterances, 230 | intents, 231 | entities, 232 | raw_intent_name_to_action_title_mapping, 233 | ) = parse_workspace_json(workspace) 234 | 235 | for utterance, intent in zip(utterances, intents): 236 | # preprocess utterance 237 | utterance = language_util.preprocess(utterance) 238 | tokens = language_util.tokenize(utterance) 239 | 240 | relevant_data["utterance"].append(utterance) 241 | relevant_data["intent"].append(intent) 242 | relevant_data["tokens"].append(tokens) 243 | 244 | vocabulary.update(tokens) 245 | 246 | workspace_pd = pd.DataFrame(relevant_data) 247 | return workspace_pd, vocabulary, entities, raw_intent_name_to_action_title_mapping 248 | 249 | 250 | def get_test_workspace(conversation, workspace_json): 251 | 252 | response_json = conversation.create_workspace( 253 | name="test", 254 | language="en", 255 | intents=workspace_json["intents"], 256 | entities=workspace_json["entities"], 257 | ).get_result() 258 | workspace_id = response_json["workspace_id"] 259 | return workspace_id 260 | 261 | 262 | def process_test_set(test_set, lang_util, delim="\t", cos=False): 263 | """ 264 | Process test set given the path to the test fil 265 | :param test_set: path to the test set on the local computer or cos object body of test csv 266 | :param lang_util: language utility 267 | :param delim: delimiter, use "," for cos instance 268 | :param cos: cos flag to indicate whether this is a path from local system or stream body from cos 269 | :return: 270 | """ 271 | user_inputs = list() 272 | intents = list() 273 | tokens_list = list() 274 | file_handle = None 275 | if not cos: 276 | file_handle = open(test_set, "r", encoding="utf-8") 277 | reader = csv.reader(file_handle, delimiter=delim) 278 | else: 279 | reader = csv.reader(codecs.getreader("utf-8")(test_set), delimiter=delim) 280 | 281 | for row in reader: 282 | if len(row) == 0: 283 | continue 284 | cur_example = lang_util.preprocess(row[0]) 285 | tokens = lang_util.tokenize(cur_example) 286 | user_inputs.append(cur_example) 287 | tokens_list.append(tokens) 288 | if len(row) == 2: 289 | intents.append(row[1].strip()) 290 | elif len(row) == 1: 291 | intents.append(OFFTOPIC_LABEL) 292 | if file_handle: 293 | file_handle.close() 294 | 295 | test_df = pd.DataFrame( 296 | data={"utterance": user_inputs, "intent": intents, "tokens": tokens_list} 297 | ) 298 | return test_df 299 | 300 | 301 | def export_workspace(conversation, experiment_skill_id, export_path): 302 | """ 303 | Export the workspace to target path 304 | :param conversation: conversation object output by assistant api 305 | :param experiment_skill_id: id of the experimental workspace 306 | :param export_path: the path where the exported workspace will be saved 307 | """ 308 | response = conversation.get_workspace( 309 | skill_id=experiment_skill_id, export=True 310 | ).get_result() 311 | with open(export_path, "w+", encoding="utf-8") as outfile: 312 | json.dump(response, outfile) 313 | 314 | 315 | def run_notebook( 316 | notebook_path, 317 | iam_apikey, 318 | test_file, 319 | output_path, 320 | wksp_id=None, 321 | assistant_id=None, 322 | action_wksp_json_path=None, 323 | ): 324 | """ 325 | Run notebook for end to end test 326 | :param notebook_path: 327 | :param uname: 328 | :param pwd: 329 | :param wksp_id: 330 | :param assistant_id: 331 | :param test_file: 332 | :param action_wksp_json_path: 333 | :param output_path: 334 | """ 335 | notebook_name, _ = os.path.splitext(os.path.basename(notebook_path)) 336 | 337 | with open(notebook_path) as f: 338 | nb = nbformat.read(f, as_version=4) 339 | nb, old_cred_text = _replace_nb_input( 340 | nb, iam_apikey, test_file, wksp_id, assistant_id, action_wksp_json_path 341 | ) 342 | # nb = _remove_experimentation(nb) 343 | 344 | proc = ExecutePreprocessor(timeout=60 * 60, kernel_name="python3") 345 | proc.allow_errors = True 346 | 347 | proc.preprocess(nb, {"metadata": {"path": os.getcwd()}}) 348 | errors = [] 349 | for cell in nb.cells: 350 | if "outputs" in cell: 351 | for output in cell["outputs"]: 352 | if output.output_type == "error": 353 | errors.append(output) 354 | if "source" in cell and "iam_apikey = " in cell["source"]: 355 | cell["source"] = old_cred_text 356 | 357 | with open(output_path + ".ipynb", mode="wt") as f: 358 | nbformat.write(nb, f) 359 | return nb, errors 360 | 361 | 362 | def _replace_nb_input( 363 | nb, apikey, test_file, wksp_id=None, assistant_id=None, action_wksp_json_path=None 364 | ): 365 | """ 366 | Replace notebook interactive input for tests 367 | :param nb: 368 | :param uname: 369 | :param pwd: 370 | :param wksp_id: 371 | :param assistant_id: 372 | :param action_wksp_json_path: 373 | :param test_file: 374 | """ 375 | apikey_patt = "iam_apikey = " 376 | wksp_id_patt = "skill_id = " 377 | assistant_id_patt = "ASSISTANT_ID = " 378 | action_wksp_json_patt = "SKILL_FILENAME = " 379 | test_file_name_patt = "test_set_path = " 380 | old_cred_text = "" 381 | test_urls = '"' + DEV_DATACENTER[0] + '",' + '"' + DEV_DATACENTER[1] + '"' 382 | for cell in nb.cells: 383 | if "source" in cell and apikey_patt in cell["source"]: 384 | old_cred_text = cell["source"] 385 | text = re.sub( 386 | "(.*)\niam_apikey, (.*)", (r"\1\n#iam_apikey, \2"), cell["source"] 387 | ) # comment out input_credentials 388 | text = re.sub("datacenters\['dallas'\]", test_urls, text) 389 | 390 | text = re.sub( 391 | "(.*)#" + apikey_patt + "'###'(.*)", 392 | r"\1" + apikey_patt + "'" + apikey + "'" + r"\2", 393 | text, 394 | ) # replace pwd 395 | if wksp_id: 396 | text = re.sub( 397 | "(.*)#" + wksp_id_patt + "'###'(.*)", 398 | r"\1" + wksp_id_patt + "'" + wksp_id + "'" + r"\2", 399 | text, 400 | ) # replace wksp_id 401 | if assistant_id: 402 | text = re.sub( 403 | "(.*)#" + assistant_id_patt + "'###'(.*)", 404 | r"\1" + assistant_id_patt + "'" + assistant_id + "'" + r"\2", 405 | text, 406 | ) # replace assistant_id 407 | if action_wksp_json_path: 408 | text = re.sub( 409 | "(.*)" + action_wksp_json_patt + "'###'(.*)", 410 | r"\1" 411 | + action_wksp_json_patt 412 | + "'" 413 | + action_wksp_json_path 414 | + "'" 415 | + r"\2", 416 | text, 417 | ) # replace action workspace json path 418 | cell["source"] = text 419 | elif "source" in cell and test_file_name_patt in cell["source"]: 420 | text = re.sub( 421 | "(.*)\n" + test_file_name_patt + '"./test.tsv"(.*)', 422 | r"\1\n" + test_file_name_patt + "'" + test_file + "'" + r"\2", 423 | cell["source"], 424 | ) # replace test file 425 | cell["source"] = text 426 | return nb, old_cred_text 427 | 428 | 429 | def _remove_experimentation(nb): 430 | """ 431 | Remove the experimentation session from end-to-end test 432 | :param nb: 433 | """ 434 | exp_patt = "Part 3: Experimentation" 435 | new_nb_cells = [] 436 | for cell in nb.cells: 437 | if ( 438 | cell.cell_type == "markdown" 439 | and "source" in cell 440 | and exp_patt in cell["source"] 441 | ): 442 | break 443 | else: 444 | new_nb_cells.append(cell) 445 | nb.cells = new_nb_cells 446 | return nb 447 | 448 | 449 | def retrieve_classifier_response( 450 | conversation, 451 | text_input, 452 | alternate_intents=False, 453 | user_id="256", 454 | assistant_id=None, 455 | skill_id=None, 456 | environment_id=None, 457 | ): 458 | """ 459 | retrieve classifier response 460 | :param conversation: instance 461 | :param skill_id: skill id 462 | :param text_input: the input utterance 463 | :param alternate_intents: 464 | :param user_id: 465 | :param assistant_id: 466 | :param environment_id: environment id 467 | :return response: 468 | """ 469 | if isinstance(conversation, ibm_watson.AssistantV1): 470 | assert skill_id is not None 471 | response = conversation.message( 472 | input={"message_type": "text", "text": text_input}, 473 | context={"metadata": {"user_id": user_id}}, 474 | workspace_id=skill_id, 475 | alternate_intents=alternate_intents, 476 | ).get_result() 477 | else: 478 | assert assistant_id is not None 479 | response = conversation.message_stateless( 480 | input={ 481 | "message_type": "text", 482 | "text": text_input, 483 | "options": {"alternate_intents": alternate_intents}, 484 | }, 485 | context={"metadata": {"user_id": user_id}}, 486 | assistant_id=assistant_id, 487 | environment_id=environment_id, 488 | ).get_result() 489 | return response 490 | -------------------------------------------------------------------------------- /assistant_skill_analysis/confidence_analysis/confidence_analyzer.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | from IPython.display import display, Markdown 6 | from ..utils.skills_util import OFFTOPIC_LABEL 7 | 8 | OFFTOPIC_CNT_THRESHOLD_FOR_DISPLAY = 5 9 | 10 | 11 | def abnormal_conf(full_results, correct_thresh, incorrect_thresh): 12 | """ 13 | perform abnormal confidence analysis on prediction results on the test set 14 | :param full_results: 15 | :param correct_thresh: 16 | :param incorrect_thresh: 17 | :return: 18 | """ 19 | test_pd = pd.DataFrame(full_results) 20 | test_pd = test_pd.loc[~(test_pd["correct_intent"] == "SYSTEM_OUT_OF_DOMAIN")] 21 | correct = test_pd.loc[test_pd["correct_intent"] == test_pd["top_intent"]] 22 | 23 | correct_low_conf = correct.loc[correct["top_confidence"] < correct_thresh] 24 | correct_low_conf = correct_low_conf[ 25 | ["correct_intent", "utterance", "top_confidence", "top_intent"] 26 | ] 27 | 28 | incorrect = test_pd.loc[~(test_pd["correct_intent"] == test_pd["top_intent"])] 29 | incorrect_high_conf = incorrect.loc[incorrect["top_confidence"] > incorrect_thresh] 30 | 31 | top1 = list() 32 | top2 = list() 33 | top3 = list() 34 | 35 | for i in range(len(incorrect_high_conf)): 36 | possible_range = len(incorrect_high_conf.iloc[i, :]["top_predicts"]) 37 | 38 | for j in range(3): 39 | if j == 0: 40 | if possible_range >= 1: 41 | top1.append( 42 | incorrect_high_conf.iloc[i, :]["top_predicts"][j]["intent"] 43 | + " " 44 | + "(" 45 | + str( 46 | np.round( 47 | incorrect_high_conf.iloc[i, :]["top_predicts"][j][ 48 | "confidence" 49 | ], 50 | 3, 51 | ) 52 | ) 53 | + ")" 54 | ) 55 | else: 56 | top1.append("NA") 57 | if j == 1: 58 | if possible_range >= 2: 59 | top2.append( 60 | incorrect_high_conf.iloc[i, :]["top_predicts"][j]["intent"] 61 | + " " 62 | + "(" 63 | + str( 64 | np.round( 65 | incorrect_high_conf.iloc[i, :]["top_predicts"][j][ 66 | "confidence" 67 | ], 68 | 3, 69 | ) 70 | ) 71 | + ")" 72 | ) 73 | else: 74 | top2.append("NA") 75 | if j == 2: 76 | if possible_range >= 3: 77 | top3.append( 78 | incorrect_high_conf.iloc[i, :]["top_predicts"][j]["intent"] 79 | + " " 80 | + "(" 81 | + str( 82 | np.round( 83 | incorrect_high_conf.iloc[i, :]["top_predicts"][j][ 84 | "confidence" 85 | ], 86 | 3, 87 | ) 88 | ) 89 | + ")" 90 | ) 91 | else: 92 | top3.append("NA") 93 | 94 | incorrect_high_conf["top1_prediction"] = top1 95 | incorrect_high_conf["top2_prediction"] = top2 96 | incorrect_high_conf["top3_prediction"] = top3 97 | incorrect_high_conf = incorrect_high_conf[ 98 | [ 99 | "correct_intent", 100 | "utterance", 101 | "top1_prediction", 102 | "top2_prediction", 103 | "top3_prediction", 104 | ] 105 | ] 106 | 107 | return correct_low_conf, incorrect_high_conf 108 | 109 | 110 | def analysis(results, intent_list=None): 111 | """ 112 | perform confidence analysis at the overall level or per intent basis 113 | :param results: 114 | :param intent_list: 115 | :return: 116 | """ 117 | 118 | if not intent_list: 119 | _display_analysis_metrics(True) 120 | analysis_df = analysis_pipeline(results) 121 | return analysis_df 122 | 123 | if len(intent_list) == 1 and intent_list[0] == "ALL_INTENTS": 124 | intent_list = list(results["correct_intent"].unique()) 125 | if OFFTOPIC_LABEL in intent_list: 126 | intent_list.remove(OFFTOPIC_LABEL) 127 | analysis_df_list = list() 128 | for intent_name in intent_list: 129 | display(Markdown("### Threshold Analysis for Intent: {}".format(intent_name))) 130 | analysis_df = analysis_pipeline(results, intent_name) 131 | if all(analysis_df): 132 | analysis_df.index = np.arange(1, len(analysis_df) + 1) 133 | display(analysis_df) 134 | analysis_df_list.append(analysis_df) 135 | 136 | return analysis_df_list 137 | 138 | 139 | def _display_analysis_metrics(display_far): 140 | """display the explanation for analysis metrics""" 141 | display(Markdown("### Threshold Metrics")) 142 | display( 143 | Markdown( 144 | "We calculate metrics for responses where the top intent has a confidence above the \ 145 | threshold specified on the x-axis. " 146 | ) 147 | ) 148 | 149 | display( 150 | Markdown( 151 | "We consider examples which are within the scope of the chatbot's problem formulation as \ 152 | on topic or in domain and those examples which are outside the scope of the problem to be \ 153 | out of domain or irrelevant" 154 | ) 155 | ) 156 | 157 | display(Markdown("#### 1) Thresholded On Topic Accuracy (TOA)")) 158 | display( 159 | Markdown( 160 | "x-axis: Confidence threshold used || " 161 | + "y-axis: Intent Detection Accuracy for On Topic utterances" 162 | ) 163 | ) 164 | 165 | display(Markdown("#### 2) Bot Coverage %")) 166 | display( 167 | Markdown( 168 | "x-axis: Confidence threshold used || " 169 | + "y-axis: Fraction of All utterances above the threshold" 170 | ) 171 | ) 172 | 173 | if display_far: 174 | display( 175 | Markdown("#### 3) False Acceptance Rate for Out of Domain Examples (FAR)") 176 | ) 177 | display( 178 | Markdown( 179 | "x-axis: Confidence threshold used || " 180 | + "y-axis: Fraction of Out of Domain utterances falsely considered on topic" 181 | ) 182 | ) 183 | 184 | display( 185 | Markdown( 186 | "#### Note: Default acceptance threshold for Watson Assistant is set at 0.2.\ 187 | Utterances with top intent confidence < 0.2 will be considered irrelevant" 188 | ) 189 | ) 190 | 191 | 192 | def generate_unique_thresholds(sorted_results_tuples): 193 | """ 194 | generate list of unique thresholds based off changes in confidence 195 | and sorted list of unique confidences 196 | :return: unique_thresholds 197 | """ 198 | sort_uniq_confs = list(sorted(set([info[2] for info in sorted_results_tuples]))) 199 | thresholds = [0] 200 | thresholds.extend( 201 | [ 202 | (sort_uniq_confs[idx] + sort_uniq_confs[idx + 1]) / 2 203 | for idx in range(len(sort_uniq_confs) - 1) 204 | ] 205 | ) 206 | return thresholds, sort_uniq_confs 207 | 208 | 209 | def _find_threshold(t, thresholds): 210 | """ 211 | find the appropriate cut-off 212 | :param t: 213 | :param thresholds: 214 | :return: 215 | """ 216 | for index in range(len(thresholds) - 1): 217 | if thresholds[index] <= t < thresholds[index + 1]: 218 | return index 219 | 220 | return len(thresholds) - 1 221 | 222 | 223 | def _get_ontopic_accuracy_list(sorted_infos, thresholds): 224 | """ 225 | generate the list of on-topic accuracy and on-topic counts 226 | based on the list of thresholds 227 | :param sorted_infos: 228 | :param thresholds: 229 | :return: 230 | """ 231 | ontopic_infos = [info for info in sorted_infos if info[0] != OFFTOPIC_LABEL] 232 | cor = len([info for info in ontopic_infos if info[0] == info[1]]) 233 | tol = len(ontopic_infos) 234 | accuracy_list = list() 235 | count_list = list() 236 | current_step = 0 237 | for t in thresholds: 238 | while current_step < len(ontopic_infos): 239 | 240 | if ontopic_infos[current_step][2] < t: 241 | tol -= 1 242 | if ontopic_infos[current_step][0] == ontopic_infos[current_step][1]: 243 | cor -= 1 244 | else: 245 | break 246 | current_step += 1 247 | accuracy_list.append(cor / tol) 248 | count_list.append(cor) 249 | 250 | return accuracy_list, count_list 251 | 252 | 253 | def _get_bot_coverage_list(sorted_infos, thresholds): 254 | """ 255 | generate the list of bot coverage ratio and bot coverage counts 256 | based on the list of thresholds 257 | :param sorted_infos: 258 | :param thresholds: 259 | :return: 260 | """ 261 | tol = len(sorted_infos) 262 | cur_bot_coverage = tol 263 | bot_coverage_count_list = list() 264 | bot_coverage_list = list() 265 | current_step = 0 266 | for t in thresholds: 267 | while sorted_infos[current_step][2] < t: 268 | cur_bot_coverage -= 1 269 | current_step += 1 270 | bot_coverage_count_list.append(cur_bot_coverage) 271 | bot_coverage_list.append(cur_bot_coverage / tol) 272 | return bot_coverage_list, bot_coverage_count_list 273 | 274 | 275 | def _get_far_list(sorted_infos, thresholds): 276 | """ 277 | find the list of false acceptance rates and false acceptance counts 278 | :param sorted_infos: 279 | :param thresholds: 280 | :return: 281 | """ 282 | offtopic_infos = [info for info in sorted_infos if info[0] == OFFTOPIC_LABEL] 283 | cur_fa_count = len(offtopic_infos) 284 | tol = len(offtopic_infos) 285 | far_list = list() 286 | far_count = list() 287 | current_step = 0 288 | for t in thresholds: 289 | while current_step < len(offtopic_infos): 290 | if offtopic_infos[current_step][2] < t: 291 | cur_fa_count -= 1 292 | current_step += 1 293 | else: 294 | break 295 | far_list.append(cur_fa_count / tol) 296 | far_count.append(cur_fa_count) 297 | return far_list, far_count 298 | 299 | 300 | def _convert_data_format(results, intent_name=None): 301 | """ 302 | convert the dataframe format to tuples of (ground_truth, prediction, confidence) 303 | :param results: results dataframe 304 | :param intent_name: optional parameter to allow different definition of offtopic label in per 305 | intent cases 306 | :return: result_list: list of tuples of (ground_truth, prediction, confidence) sorted by conf 307 | """ 308 | if intent_name: 309 | results = results[ 310 | (results["correct_intent"] == intent_name) 311 | | (results["top_intent"] == intent_name) 312 | ].copy() 313 | 314 | results["correct_intent"] = np.where( 315 | (results["correct_intent"] != results["top_intent"]) 316 | & (results["top_intent"] == intent_name), 317 | OFFTOPIC_LABEL, 318 | results["correct_intent"], 319 | ) 320 | 321 | results_list = [ 322 | (gt, pred, conf) 323 | for gt, pred, conf in zip( 324 | results["correct_intent"], 325 | results["top_intent"], 326 | results["top_confidence"], 327 | ) 328 | ] 329 | 330 | results_list = sorted(results_list, key=lambda x: x[2]) 331 | 332 | else: 333 | results_list = [ 334 | (truth, prediction, confidence) 335 | for truth, prediction, confidence in zip( 336 | results["correct_intent"], 337 | results["top_intent"], 338 | results["top_confidence"], 339 | ) 340 | ] 341 | results_list = sorted(results_list, key=lambda x: x[2]) 342 | 343 | return results_list 344 | 345 | 346 | def extract_by_topic(sorted_results): 347 | """ 348 | extract information by topics 349 | :param sorted_results: 350 | :return: 351 | ontopic_infos, list 352 | """ 353 | offtopic_infos = [ 354 | prediction for prediction in sorted_results if prediction[0] == OFFTOPIC_LABEL 355 | ] 356 | 357 | ontopic_infos = [ 358 | prediction for prediction in sorted_results if prediction[0] != OFFTOPIC_LABEL 359 | ] 360 | 361 | return ontopic_infos, offtopic_infos 362 | 363 | 364 | def analysis_pipeline(results, intent_name=None): 365 | """ 366 | perform the operation of extraction of table analysis and produce threshold graph 367 | :param results: list of tuples of (ground_truth, prediction, confidence) sorted by confidence 368 | :param intent_name: 369 | :return: analysis_df 370 | """ 371 | sorted_results = _convert_data_format(results, intent_name=intent_name) 372 | 373 | ontopic_infos, offtopic_infos = extract_by_topic(sorted_results) 374 | 375 | # if ontopic counts or sorted results are less than 3, the graph will show almost no variation 376 | # if all confidence of the predicted result are the same, there will be no variation 377 | if ( 378 | len(ontopic_infos) < 3 379 | or len(sorted_results) < 3 380 | or all(ele[2] == sorted_results[0][2] for ele in sorted_results) 381 | ): 382 | display(Markdown("**Inadequate Data Points**: No analysis will be conducted")) 383 | analysis_df = pd.DataFrame() 384 | return analysis_df 385 | 386 | ( 387 | analysis_df, 388 | toa_list, 389 | bot_coverage_list, 390 | far_list, 391 | thresholds, 392 | ) = extract_table_analysis(sorted_results, ontopic_infos, offtopic_infos) 393 | 394 | if not intent_name and not analysis_df.empty: 395 | line_graph_data = pd.DataFrame( 396 | data={ 397 | "Thresholded On Topic Accuracy": toa_list, 398 | "Bot Coverage %": bot_coverage_list, 399 | "False Acceptance Rate (FAR) for Out of Domain Examples": far_list, 400 | }, 401 | index=thresholds, 402 | ) 403 | 404 | create_threshold_graph(line_graph_data) 405 | 406 | return analysis_df 407 | 408 | 409 | def extract_table_analysis(sorted_results, ontopic_infos, offtopic_infos): 410 | """ 411 | extract informations for table analysis 412 | :param sorted_results: 413 | :return: 414 | analysis_df: pandas dataframe of the table for dispaly 415 | toa_list: list of sorted on-topic accuracy 416 | bot_coverage_list: list of sorted bot coverage ratio 417 | far_list: list of sorted false acceptance rate 418 | thresholds: list of sorted & unique thresholds 419 | """ 420 | thresholds, sort_uniq_confs = generate_unique_thresholds(sorted_results) 421 | 422 | toa_list, toa_count = _get_ontopic_accuracy_list(sorted_results, thresholds) 423 | bot_coverage_list, bot_coverage_count = _get_bot_coverage_list( 424 | sorted_results, thresholds 425 | ) 426 | 427 | if len(offtopic_infos) >= OFFTOPIC_CNT_THRESHOLD_FOR_DISPLAY: 428 | 429 | far_list, _ = _get_far_list(sorted_results, thresholds) 430 | else: 431 | display( 432 | Markdown( 433 | "Out of Domain examples fewer than **%d** thus \ 434 | no False Acceptance Rate (FAR) calculated" 435 | % OFFTOPIC_CNT_THRESHOLD_FOR_DISPLAY 436 | ) 437 | ) 438 | far_list = [-1] * len(thresholds) 439 | 440 | analysis_df = create_display_table( 441 | toa_list, 442 | bot_coverage_list, 443 | bot_coverage_count, 444 | sorted_results, 445 | thresholds, 446 | offtopic_infos, 447 | far_list, 448 | ) 449 | 450 | return analysis_df, toa_list, bot_coverage_list, far_list, thresholds 451 | 452 | 453 | def create_threshold_graph(data): 454 | """ 455 | display threshold analysis graph 456 | :param data: 457 | :return: None 458 | """ 459 | sns.set(rc={"figure.figsize": (20.7, 10.27)}) 460 | plt.ylim(0, 1.1) 461 | plt.axvline(0.2, 0, 1) 462 | plot = sns.lineplot(data=data, palette="tab10", linewidth=3.5) 463 | plt.setp(plot.legend().get_texts(), fontsize="22") 464 | plot.set_xlabel("Threshold T", fontsize=18) 465 | plot.set_ylabel("Metrics mentioned above", fontsize=18) 466 | 467 | 468 | def create_display_table( 469 | toa_list, 470 | bot_coverage_list, 471 | bot_coverage_count, 472 | sorted_results, 473 | thresholds, 474 | offtopic_infos, 475 | far_list, 476 | ): 477 | """ 478 | create table for display purpose 479 | :param toa_list: 480 | :param bot_coverage_list: 481 | :param bot_coverage_count: 482 | :param sorted_results: 483 | :param thresholds: 484 | :param offtopic_infos: 485 | :param far_list: 486 | :return: analysis_df, pandas dataframe containing metrics at intervals of 10% 487 | """ 488 | # produce the threhold quantiles for extraction of relevant information 489 | display_thresholds = [t / 100 for t in range(0, 100, 10)] 490 | display_indexes = [_find_threshold(t, thresholds) for t in display_thresholds] 491 | 492 | analysis_data = dict() 493 | analysis_data["Threshold (T)"] = display_thresholds 494 | analysis_data["Ontopic Accuracy (TOA)"] = [ 495 | toa_list[idx] * 100 for idx in display_indexes 496 | ] 497 | analysis_data["Bot Coverage %"] = [ 498 | bot_coverage_list[idx] * 100 for idx in display_indexes 499 | ] 500 | analysis_data["Bot Coverage Counts"] = [ 501 | str(np.round(bot_coverage_count[idx], decimals=0)) 502 | + " / " 503 | + str(len(sorted_results)) 504 | for idx in display_indexes 505 | ] 506 | 507 | if len(offtopic_infos) >= OFFTOPIC_CNT_THRESHOLD_FOR_DISPLAY: 508 | analysis_data["False Acceptance Rate (FAR)"] = [ 509 | far_list[idx] * 100 for idx in display_indexes 510 | ] 511 | 512 | analysis_df = pd.DataFrame(data=analysis_data) 513 | return analysis_df 514 | --------------------------------------------------------------------------------