├── tests
    ├── utils
    │   ├── __init__.py
    │   ├── test_workspace_credentials.py
    │   ├── test_skills_util.py
    │   └── test_lang_utils.py
    ├── end2end
    │   ├── __init__.py
    │   └── end2end_test.py
    ├── data_analysis
    │   ├── __init__.py
    │   ├── test_summary_generator.py
    │   ├── test_similarity_analyzer.py
    │   └── test_divergence_analyzer.py
    ├── experimentation
    │   ├── __init__.py
    │   └── test_data_manipulator.py
    ├── highlighting
    │   ├── __init__.py
    │   └── test_highlighting.py
    ├── term_analysis
    │   ├── __init__.py
    │   ├── test_entity_analyzer.py
    │   ├── test_keyword_analyzer.py
    │   └── test_chi2_analyzer.py
    ├── confidence_analysis
    │   ├── __init__.py
    │   └── test_confidence_analysis.py
    ├── __init__.py
    └── resources
    │   └── test_workspaces
    │       ├── test_set_action.tsv
    │       └── customer_care_skill_test.tsv
├── assistant_skill_analysis
    ├── utils
    │   ├── __init__.py
    │   ├── parse_secret.py
    │   ├── lang_utils.py
    │   └── skills_util.py
    ├── inferencing
    │   ├── __init__.py
    │   └── inferencer.py
    ├── data_analysis
    │   ├── __init__.py
    │   ├── similarity_analyzer.py
    │   ├── summary_generator.py
    │   └── divergence_analyzer.py
    ├── experimentation
    │   ├── __init__.py
    │   └── data_manipulator.py
    ├── highlighting
    │   ├── __init__.py
    │   └── highlighter.py
    ├── term_analysis
    │   ├── __init__.py
    │   ├── entity_analyzer.py
    │   ├── keyword_analyzer.py
    │   └── chi2_analyzer.py
    ├── confidence_analysis
    │   ├── __init__.py
    │   └── confidence_analyzer.py
    ├── __init__.py
    └── resources
    │   ├── en
    │       └── stopwords
    │   ├── de
    │       └── stopwords
    │   ├── fr
    │       └── stopwords
    │   ├── it
    │       └── stopwords
    │   ├── es
    │       └── stopwords
    │   ├── pt
    │       └── stopwords
    │   ├── zh-tw
    │       └── stopwords
    │   ├── nl
    │       ├── LICENSE.stopwords-nl
    │       └── stopwords
    │   ├── zh-cn
    │       └── stopwords
    │   └── cs
    │       └── stopwords
├── MANIFEST.in
├── _version.py
├── setup.cfg
├── requirements.txt
├── CONTRIBUTING.md
├── setup.py
├── test_set.tsv
├── test_set.csv
├── scripts
    └── train_test_split.py
├── README.md
└── LICENSE


/tests/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/end2end/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/data_analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/experimentation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/highlighting/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/term_analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/confidence_analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/inferencing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/data_analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/experimentation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/highlighting/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/term_analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include assistant_skill_analysis/resources/*/*


--------------------------------------------------------------------------------
/assistant_skill_analysis/confidence_analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "2.0.1"
2 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import nltk
2 | 
3 | nltk.download("punkt")
4 | 


--------------------------------------------------------------------------------
/_version.py:
--------------------------------------------------------------------------------
1 | # Semantic versioning
2 | # MAJOR.MINOR.PATCH
3 | 
4 | __version__ = '2.0.1'
5 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 
4 | [easy_install]
5 | index-url=pypi.python.org


--------------------------------------------------------------------------------
/assistant_skill_analysis/resources/en/stopwords:
--------------------------------------------------------------------------------
 1 | an
 2 | a
 3 | in
 4 | on
 5 | be
 6 | or
 7 | of
 8 | and
 9 | can
10 | is
11 | to
12 | the
13 | i


--------------------------------------------------------------------------------
/assistant_skill_analysis/resources/de/stopwords:
--------------------------------------------------------------------------------
 1 | der
 2 | die
 3 | und
 4 | in
 5 | von
 6 | das
 7 | den
 8 | für
 9 | im
10 | mit
11 | ist
12 | sie
13 | auf
14 | zu
15 | des
16 | so


--------------------------------------------------------------------------------
/assistant_skill_analysis/resources/fr/stopwords:
--------------------------------------------------------------------------------
 1 | de
 2 | la
 3 | l'
 4 | le
 5 | et
 6 | les
 7 | des
 8 | a
 9 | d'
10 | du
11 | en
12 | un
13 | a
14 | pour
15 | dan
16 | une
17 | que
18 | est
19 | au


--------------------------------------------------------------------------------
/assistant_skill_analysis/resources/it/stopwords:
--------------------------------------------------------------------------------
 1 | di
 2 | e
 3 | il
 4 | la
 5 | che
 6 | a
 7 | in
 8 | per
 9 | un
10 | del
11 | l'
12 | è
13 | non
14 | i
15 | le
16 | della
17 | una
18 | da
19 | con
20 | si


--------------------------------------------------------------------------------
/assistant_skill_analysis/resources/es/stopwords:
--------------------------------------------------------------------------------
 1 | de
 2 | la
 3 | el
 4 | en
 5 | que
 6 | a
 7 | y
 8 | los
 9 | del
10 | por
11 | un
12 | las
13 | con
14 | se
15 | una
16 | para
17 | al
18 | su
19 | no
20 | este


--------------------------------------------------------------------------------
/assistant_skill_analysis/resources/pt/stopwords:
--------------------------------------------------------------------------------
 1 | de
 2 | e
 3 | a
 4 | o
 5 | do
 6 | em
 7 | que
 8 | da
 9 | no
10 | para
11 | na
12 | com
13 | é
14 | os
15 | por
16 | um
17 | mais
18 | não
19 | região
20 | uma


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | scikit-learn~=1.2.2
 2 | numpy~=1.21.0 ; python_version <= "3.9"
 3 | numpy~=1.26.0 ; python_version > "3.9"
 4 | pandas~=1.4.3
 5 | tabulate
 6 | matplotlib
 7 | nltk
 8 | seaborn
 9 | ibm-watson>=4.5.0
10 | scipy>=1.2.0
11 | jupyter
12 | spacy~=2.3.2
13 | ibm-cos-sdk>=2.11.0
14 | nbconvert>=7.7.1
15 | jieba


--------------------------------------------------------------------------------
/assistant_skill_analysis/utils/parse_secret.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | WA_SECRETS = ["WA_CONFIG", "WA_CONFIG_ACTION"]
 4 | 
 5 | if __name__ == "__main__":
 6 |     for secret in WA_SECRETS:
 7 |         entry = os.environ[secret]
 8 |         with open("./" + secret.lower() + ".txt", "w", encoding="utf-8") as f:
 9 |             f.writelines(val + "\n" for val in entry.split(","))
10 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/resources/zh-tw/stopwords:
--------------------------------------------------------------------------------
 1 | the
 2 | of
 3 | is
 4 | and
 5 | to
 6 | in
 7 | that
 8 | we
 9 | for
10 | an
11 | are
12 | by
13 | be
14 | as
15 | on
16 | with
17 | can
18 | if
19 | from
20 | which
21 | you
22 | it
23 | this
24 | then
25 | at
26 | have
27 | all
28 | not
29 | one
30 | has
31 | or
32 | that
33 | 的
34 | 了
35 | 和
36 | 是
37 | 就
38 | 都
39 | 而
40 | 及
41 | 與
42 | 著
43 | 或
44 | 一個
45 | 沒有
46 | 我們
47 | 你們
48 | 妳們
49 | 他們
50 | 她們
51 | 是否


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ## How to contribute
2 | 
3 | If you'd like to contribute, start by searching through the [issues](https://github.com/watson-developer-cloud/assistant-dialog-skill-analysis/issues) and [pull requests](https://github.com/watson-developer-cloud/assistant-dialog-skill-analysis/pulls) to see if anyone else has raised a similar issue.
4 | 
5 | If you don't see an issue listed, and you think it fits into the intended use of this repository, do this:
6 | * **If your contribution is minor,** such as a bug fix, open a pull request.
7 | * **If your contribution is major,** such as a new feature, start by opening an issue first. Others can then weigh in before you commence any work.


--------------------------------------------------------------------------------
/assistant_skill_analysis/resources/nl/LICENSE.stopwords-nl:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | The MIT License (MIT)
 4 | 
 5 | Copyright (c) 2016 Gene Diaz
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | import re
 3 | 
 4 | 
 5 | def find_version():
 6 |     with open('_version.py', encoding='utf8') as f:
 7 |         contents = f.read()
 8 |     match_result = re.search(r'^__version__ *= *\'(.*?)\'', contents, re.MULTILINE)
 9 |     if match_result:
10 |         return match_result.group(1)
11 |     raise Exception('Unable to find version string')
12 | 
13 | 
14 | def get_requirements():
15 |     with open('requirements.txt', encoding='utf8') as f:
16 |         requirements = f.read().splitlines()
17 |     return requirements
18 | 
19 | 
20 | VERSION = find_version()
21 | 
22 | REQUIREMENTS = get_requirements()
23 | 
24 | setup(
25 |     name='assistant_skill_analysis',
26 |     version=VERSION,
27 |     author='Haode Qi, Navneet Rao, Ming Tan, Yang Yu, Panos Karagiannis and Ladislav Kunc',
28 |     author_email='Haode.Qi@ibm.com, Cheng.qian@ibm.com, lada@ibm.com',
29 |     description='Dialog/Action Skill Analysis Tool for Watson Assistant',
30 |     url='https://github.com/watson-developer-cloud/assistant-skill-analysis',
31 |     license='Apache License 2.0',
32 |     install_requires=REQUIREMENTS,
33 |     packages=find_packages(exclude=['tests']),
34 |     test_suite='tests',
35 |     include_package_data=True,
36 | )
37 | 


--------------------------------------------------------------------------------
/tests/data_analysis/test_summary_generator.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import json
 3 | import pandas as pd
 4 | 
 5 | from assistant_skill_analysis.data_analysis import summary_generator
 6 | from assistant_skill_analysis.utils import skills_util, lang_utils
 7 | 
 8 | 
 9 | class TestSummaryGenerator(unittest.TestCase):
10 |     """Test for summary generator module"""
11 | 
12 |     @classmethod
13 |     def setUpClass(cls):
14 | 
15 |         with open(
16 |             "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r"
17 |         ) as skill_file:
18 |             (
19 |                 workspace_data,
20 |                 workspace_vocabulary,
21 |                 _,
22 |                 _,
23 |             ) = skills_util.extract_workspace_data(
24 |                 json.load(skill_file), lang_utils.LanguageUtility("en")
25 |             )
26 |             cls.workspace_df = pd.DataFrame(workspace_data)
27 | 
28 |     def test_class_imbalance(self):
29 | 
30 |         is_imbalanced = summary_generator.class_imbalance_analysis(self.workspace_df)
31 |         self.assertEqual(is_imbalanced, True, "Test class imbalance detection failed")
32 | 
33 |     def tearDown(self):
34 |         unittest.TestCase.tearDown(self)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     unittest.main()
39 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/resources/zh-cn/stopwords:
--------------------------------------------------------------------------------
  1 | <
  2 | >
  3 | |
  4 | -
  5 | ,
  6 | ;
  7 | :
  8 | !
  9 | ?
 10 | .
 11 | ''
 12 | '
 13 | "
 14 | (
 15 | )
 16 | [
 17 | ]
 18 | {
 19 | }
 20 | *
 21 | %
 22 | +
 23 | 。
 24 | <SE>
 25 | 一
 26 | 一会儿
 27 | 一边
 28 | 一面
 29 | 上
 30 | 下
 31 | 不
 32 | 不但
 33 | 不光
 34 | 不可
 35 | 不如
 36 | 不是
 37 | 不管
 38 | 不论
 39 | 与
 40 | 与其
 41 | 个
 42 | 中
 43 | 为
 44 | 之
 45 | 之所以
 46 | 也
 47 | 也不
 48 | 也许
 49 | 也许是
 50 | 了
 51 | 于
 52 | 从
 53 | 他
 54 | 他们
 55 | 以
 56 | 会
 57 | 但
 58 | 你们
 59 | 便
 60 | 倘若
 61 | 先
 62 | 全
 63 | 其
 64 | 再
 65 | 到
 66 | 前
 67 | 十
 68 | 即使
 69 | 却
 70 | 又
 71 | 及
 72 | 只
 73 | 只有
 74 | 只要
 75 | 可
 76 | 可以
 77 | 可是
 78 | 可能
 79 | 各
 80 | 后
 81 | 向
 82 | 和
 83 | 哪怕
 84 | 因为
 85 | 因此
 86 | 在
 87 | 地
 88 | 多
 89 | 她
 90 | 她们
 91 | 如果
 92 | 宁可
 93 | 它
 94 | 它们
 95 | 对
 96 | 将
 97 | 小
 98 | 就
 99 | 尽管
100 | 已
101 | 已经
102 | 并
103 | 并且
104 | 很
105 | 我
106 | 我们
107 | 或
108 | 所
109 | 所以
110 | 才
111 | 把
112 | 据
113 | 无论
114 | 既
115 | 既然
116 | 时
117 | 是
118 | 是因为
119 | 更
120 | 最
121 | 有
122 | 未
123 | 来
124 | 此
125 | 每
126 | 没有
127 | 然后
128 | 然而
129 | 用
130 | 由
131 | 由于
132 | 的
133 | 看
134 | 着
135 | 种
136 | 而
137 | 而且
138 | 而是
139 | 能
140 | 自己
141 | 至
142 | 虽然
143 | 被
144 | 要
145 | 认为
146 | 让
147 | 该
148 | 还
149 | 还是
150 | 这
151 | 通过
152 | 那么
153 | 都
154 | 非
155 | 、
156 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/resources/cs/stopwords:
--------------------------------------------------------------------------------
  1 | ačkoli
  2 | ale
  3 | asi
  4 | aspoň
  5 | během
  6 | bude
  7 | budeme
  8 | budeš
  9 | budete
 10 | budou
 11 | budu
 12 | byl
 13 | byla
 14 | byli
 15 | bylo
 16 | byly
 17 | bys
 18 | je
 19 | jsi
 20 | jsme
 21 | jsou
 22 | jste
 23 | má
 24 | mají
 25 | málo
 26 | mám
 27 | máme
 28 | máš
 29 | máte
 30 | mé
 31 | mě
 32 | mít
 33 | mně
 34 | mnou
 35 | na
 36 | nad
 37 | nade
 38 | nám
 39 | námi
 40 | nás
 41 | náš
 42 | naše
 43 | naši
 44 | ně
 45 | nebo
 46 | nedělá
 47 | nedělají
 48 | nedělám
 49 | neděláme
 50 | neděláš
 51 | neděláte
 52 | nějak
 53 | nejsi
 54 | někde
 55 | někdo
 56 | nemají
 57 | nemáme
 58 | nemáte
 59 | neměl
 60 | němu
 61 | není
 62 | nestačí
 63 | než
 64 | nic
 65 | nich
 66 | ním
 67 | nimi
 68 | od
 69 | ode
 70 | on
 71 | ona
 72 | oni
 73 | ono
 74 | ony
 75 | pak
 76 | po
 77 | před
 78 | přes
 79 | přese
 80 | pro
 81 | proč
 82 | proti
 83 | protože
 84 | se
 85 | skoro
 86 | smějí
 87 | smí
 88 | snad
 89 | ta
 90 | tak
 91 | takhle
 92 | taky
 93 | tam
 94 | tamhle
 95 | tamhleto
 96 | tamto
 97 | tě
 98 | tebe
 99 | tebou
100 | ted'
101 | tedy
102 | ten
103 | ti
104 | to
105 | tobě
106 | tohle
107 | toto
108 | třeba
109 | tvá
110 | tvé
111 | tvoje
112 | tvůj
113 | ty
114 | už
115 | vám
116 | vámi
117 | vás
118 | váš
119 | vaše
120 | vaši
121 | ve
122 | vedle
123 | vlastně
124 | vy
125 | za


--------------------------------------------------------------------------------
/tests/resources/test_workspaces/test_set_action.tsv:
--------------------------------------------------------------------------------
 1 | hiii watson assistant chat bot, would you pleased let me known what are them hours?	What are your hours?
 2 | Hi I wanted to go to the store now to buy something I really need, is the store open or closed at the moment?	What are your hours?
 3 | hey bot, what are the hours	What are your hours?
 4 | What time do you close on jan 26th?	What are your hours?
 5 | Does the store in the city square opens till 8:30pm on weekends?	What are your hours?
 6 | how early do you open on Saturdyas	What are your hours?
 7 | Are you closed on Sunday	What are your hours?
 8 | What time are you closing tmr?	What are your hours?
 9 | what can i do to talk to someone	Schedule An Appointment
10 | tell me the address	Where are you located?
11 | how to get there from times square	Where are you located?
12 | thanks again.	Thank you
13 | What time is your store open on christmas?	What are your hours?
14 | Ok, that is it, I really have to go, goodbye	Goodbye
15 | i have an appointment in one hour at times square, i have to go. bye	Goodbye
16 | I would like to speak to an agent	Fallback
17 | appiontmnet	Schedule An Appointment
18 | Can you tell me a good joke
19 | What is your IQ
20 | Luke, I am your father
21 | Where did Betty buy her butter?
22 | How many engineers does it take to change a lightbulb?
23 | Can you help me change my account password?
24 | What is a way to change my account address?
25 | 
26 | 


--------------------------------------------------------------------------------
/tests/utils/test_workspace_credentials.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from assistant_skill_analysis.utils.skills_util import (
 3 |     retrieve_workspace,
 4 |     retrieve_conversation,
 5 |     DEV_DATACENTER,
 6 | )
 7 | 
 8 | CONFIG_FILE = "./wa_config.txt"
 9 | CONFIG_FILE_ACTION = "./wa_config_action.txt"
10 | 
11 | 
12 | @unittest.skip("skip")
13 | class TestWorkspaceCredential(unittest.TestCase):
14 |     @classmethod
15 |     def setUpClass(cls):
16 |         with open(CONFIG_FILE) as fi:
17 |             cls.apikey = fi.readline().strip()
18 |             cls.wksp_id = fi.readline().strip()
19 | 
20 |         with open(CONFIG_FILE_ACTION) as fi:
21 |             _ = fi.readline().strip()
22 |             cls.assistant_id = fi.readline().strip()
23 | 
24 |     def test_workspace_credentials(self):
25 |         conversation = retrieve_conversation(
26 |             iam_apikey=self.apikey,
27 |             url=DEV_DATACENTER[0],
28 |             authenticator_url=DEV_DATACENTER[1],
29 |         )
30 |         ws_json = retrieve_workspace(skill_id=self.wksp_id, conversation=conversation)
31 |         self.assertTrue(len(ws_json["intents"]) == 9)
32 | 
33 |     def test_action_credentials(self):
34 |         conversation = retrieve_conversation(
35 |             iam_apikey=self.apikey,
36 |             url=DEV_DATACENTER[0],
37 |             authenticator_url=DEV_DATACENTER[1],
38 |             sdk_version="V2",
39 |         )
40 |         result = conversation.message_stateless(
41 |             input={
42 |                 "message_type": "text",
43 |                 "text": "thank you",
44 |                 "options": {"alternate_intents": True},
45 |             },
46 |             context={"metadata": {"user_id": "123"}},
47 |             assistant_id=self.assistant_id,
48 |         ).get_result()
49 | 
50 |         self.assertAlmostEqual(
51 |             1, result["output"]["intents"][0]["confidence"], delta=1e-6
52 |         )
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     unittest.main()
57 | 


--------------------------------------------------------------------------------
/tests/experimentation/test_data_manipulator.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | import unittest
 3 | import json
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | from assistant_skill_analysis.utils import skills_util, lang_utils
 8 | from assistant_skill_analysis.experimentation import data_manipulator
 9 | 
10 | 
11 | class TestDataManipulator(unittest.TestCase):
12 |     """Test for Data manipulator module"""
13 | 
14 |     @classmethod
15 |     def setUpClass(cls):
16 |         with open(
17 |             "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r"
18 |         ) as skill_file:
19 |             cls.workspace = json.load(skill_file)
20 |             (
21 |                 workspace_data,
22 |                 workspace_vocabulary,
23 |                 _,
24 |                 _,
25 |             ) = skills_util.extract_workspace_data(
26 |                 cls.workspace, lang_utils.LanguageUtility("en")
27 |             )
28 |             cls.workspace_df = pd.DataFrame(workspace_data)
29 | 
30 |     def test_undersampling(self):
31 |         quantile = 0.6
32 |         train_workspace_data = data_manipulator.under_sampling(
33 |             self.workspace, self.workspace_df, quantile
34 |         )
35 |         label_frequency_dict = dict(Counter(self.workspace_df["intent"]).most_common())
36 |         sampling_threshold = int(
37 |             np.quantile(a=list(label_frequency_dict.values()), q=[quantile])[0]
38 |         )
39 |         example_length = np.array(
40 |             [
41 |                 len(train_workspace_data[i]["examples"])
42 |                 for i in range(len(train_workspace_data))
43 |             ]
44 |         )
45 |         self.assertEqual(
46 |             np.sum(example_length <= sampling_threshold),
47 |             len(example_length),
48 |             "Data manipulator test fail",
49 |         )
50 | 
51 |     def tearDown(self):
52 |         unittest.TestCase.tearDown(self)
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     unittest.main()
57 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/experimentation/data_manipulator.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | import random
 3 | import numpy as np
 4 | 
 5 | 
 6 | def under_sampling(workspace, workspace_pd, quantile=None):
 7 |     """
 8 |     Under sample data
 9 |     :param workspace: json format outputed by assistant api
10 |     :param workspace_pd: workspace dataframe
11 |     :param quantile: threshold to sample from
12 |     :return train_workspace_data: list of intent json
13 |     """
14 |     label_frequency_dict = dict(Counter(workspace_pd["intent"]).most_common())
15 |     train_workspace_data = list()
16 | 
17 |     if not quantile:
18 |         quantile = 0.75
19 |     sampling_threshold = int(
20 |         np.quantile(a=list(label_frequency_dict.values()), q=[quantile])[0]
21 |     )
22 | 
23 |     for i in range(len(workspace["intents"])):
24 | 
25 |         if not workspace["intents"][i]["examples"]:
26 |             continue
27 | 
28 |         if label_frequency_dict[workspace["intents"][i]["intent"]] > sampling_threshold:
29 |             intent = workspace["intents"][i]
30 |             sampling_index = list(np.arange(len(workspace["intents"][i]["examples"])))
31 |             random.shuffle(sampling_index)
32 |             train_examples = [
33 |                 intent["examples"][index]
34 |                 for index in sampling_index[:sampling_threshold]
35 |             ]
36 |             train_workspace_data.append({"intent": workspace["intents"][i]["intent"]})
37 |             train_workspace_data[-1].update({"description": "string"})
38 |             train_workspace_data[-1].update({"examples": train_examples})
39 |         else:
40 |             train_workspace_data.append({"intent": workspace["intents"][i]["intent"]})
41 |             train_workspace_data[-1].update({"description": "string"})
42 |             train_workspace_data[-1].update(
43 |                 {
44 |                     "examples": [
45 |                         example for example in workspace["intents"][i]["examples"]
46 |                     ]
47 |                 }
48 |             )
49 | 
50 |     return train_workspace_data
51 | 


--------------------------------------------------------------------------------
/test_set.tsv:
--------------------------------------------------------------------------------
 1 | many thanks	Thanks
 2 | thank you	Thanks
 3 | ciao	Goodbye
 4 | see ya	Goodbye
 5 | Are you open on sundays, and if so what are the hours?	Customer_Care_Store_Hours
 6 | What are the hours of operation?	Customer_Care_Store_Hours
 7 | what are your hours	Customer_Care_Store_Hours
 8 | What time do you close today	Customer_Care_Store_Hours
 9 | Does the store in the city center opens till 8pm on weekends?	Customer_Care_Store_Hours
10 | how early do you open on Saturdays	Customer_Care_Store_Hours
11 | Are you open on Sunday	Customer_Care_Store_Hours
12 | What time are you closing today?	Customer_Care_Store_Hours
13 | When can I meet with one of your employees at your store?	Customer_Care_Appointments
14 | I prefer a face to face visit	Customer_Care_Appointments
15 | can you make an appointment for me	Customer_Care_Appointments
16 | Set up an appt	Customer_Care_Appointments
17 | what is the address	Customer_Care_Store_Location
18 | I want to know about a store	Customer_Care_Store_Location
19 | Find store	Customer_Care_Store_Location
20 | give me directions	Customer_Care_Store_Location
21 | where are you	Customer_Care_Store_Location
22 | i changed my mind	Cancel
23 | cancel the request	Cancel
24 | can you help	Help
25 | help	Help
26 | Hey there	General_Greetings
27 | Who is this?	General_Greetings
28 | Looking good eve	General_Greetings
29 | How is it going?	General_Greetings
30 | Hi advisor	General_Greetings
31 | Hey twin	General_Greetings
32 | Can I talk to someone?	General_Connect_to_Agent
33 | Please connect me to a live agent	General_Connect_to_Agent
34 | Where is the closest agent?	General_Connect_to_Agent
35 | Please assist me to get to an agent	General_Connect_to_Agent
36 | Can I speak with somebody?	General_Connect_to_Agent
37 | Do not want a robot?	General_Connect_to_Agent
38 | I want to speak to a human	General_Connect_to_Agent
39 | representative	General_Connect_to_Agent
40 | Can I connect to an agent?	General_Connect_to_Agent
41 | Can you please transition my conversation to a human	General_Connect_to_Agent
42 | Is the store going to be open tomorrow	Customer_Care_Store_Hours
43 | Can you arrange for me to meet at your closest store	Customer_Care_Appointments
44 | see you tomorrow	Goodbye
45 | I dont think I want to do that	Cancel
46 | Hello you silly bot	General_Greetings
47 | Can you tell me a good joke
48 | What is your IQ
49 | Luke, I am your father
50 | Where did Betty buy her butter?
51 | How many engineers does it take to change a lightbulb?
52 | Can you help me change my account password?
53 | What is a way to change my account address?
54 | 
55 | 


--------------------------------------------------------------------------------
/tests/end2end/end2end_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from assistant_skill_analysis.utils import skills_util
 3 | import json
 4 | 
 5 | class TestNotebook(unittest.TestCase):
 6 |     @classmethod
 7 |     def setUpClass(cls):
 8 |         # points to dev010_Haode-Qi
 9 |         CONFIG_FILE = "./wa_config.txt"
10 |         with open(CONFIG_FILE) as fi:
11 |             cls.apikey = fi.readline().strip()
12 | 
13 |         with open(
14 |             "tests/resources/test_workspaces/skill-Customer-Care-Sample.json",
15 |             "r",
16 |             encoding="utf-8",
17 |         ) as f:
18 |             data = json.load(f)
19 | 
20 |         URL, authenticator_url = skills_util.DEV_DATACENTER
21 |         cls.conversation = skills_util.retrieve_conversation(
22 |             iam_apikey=cls.apikey,
23 |             url=URL,
24 |             authenticator_url=authenticator_url,
25 |             api_version=skills_util.DEFAULT_V1_API_VERSION,
26 |         )
27 |         cls.wksp_id = skills_util.get_test_workspace(
28 |             conversation=cls.conversation, workspace_json=data
29 |         )
30 | 
31 |         # points to dev010_Haode-Qi
32 |         CONFIG_FILE = "./wa_config_action.txt"
33 |         with open(CONFIG_FILE) as fi:
34 |             _ = fi.readline().strip()
35 |             cls.assistant_id = fi.readline().strip()
36 | 
37 |     def test_notebook(self):
38 |         test_file = "tests/resources/test_workspaces/customer_care_skill_test.tsv"
39 |         nb, errors = skills_util.run_notebook(
40 |             notebook_path="classic_dialog_skill_analysis.ipynb",
41 |             iam_apikey=self.apikey,
42 |             wksp_id=self.wksp_id,
43 |             test_file=test_file,
44 |             output_path="notebook_output",
45 |         )
46 |         self.assertEqual(errors, [])
47 | 
48 |     def test_action_notebook(self):
49 |         test_file = "tests/resources/test_workspaces/test_set_action.tsv"
50 |         wksp_json = (
51 |             "tests/resources/test_workspaces/customer_care_sample_action_skill.json"
52 |         )
53 |         nb, errors = skills_util.run_notebook(
54 |             notebook_path="new_experience_skill_analysis.ipynb",
55 |             iam_apikey=self.apikey,
56 |             test_file=test_file,
57 |             output_path="notebook_output",
58 |             assistant_id=self.assistant_id,
59 |             action_wksp_json_path=wksp_json,
60 |         )
61 |         self.assertEqual(errors, [])
62 | 
63 |     @classmethod
64 |     def tearDownClass(cls):
65 |         unittest.TestCase.tearDown(cls)
66 |         cls.conversation.delete_workspace(workspace_id=cls.wksp_id)
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     unittest.main()
71 | 


--------------------------------------------------------------------------------
/tests/data_analysis/test_similarity_analyzer.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import json
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | from assistant_skill_analysis.data_analysis import similarity_analyzer
 7 | from assistant_skill_analysis.utils import skills_util, lang_utils
 8 | 
 9 | TOLERANCE = 0.0000001
10 | 
11 | 
12 | class TestSimilarityAnalzyer(unittest.TestCase):
13 |     """Test for Similarity Analyzer module"""
14 | 
15 |     @classmethod
16 |     def setUpClass(cls):
17 |         cls.lang_util = lang_utils.LanguageUtility("en")
18 |         with open(
19 |             "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r"
20 |         ) as skill_file:
21 |             (
22 |                 workspace_data,
23 |                 workspace_vocabulary,
24 |                 _,
25 |                 _,
26 |             ) = skills_util.extract_workspace_data(json.load(skill_file), cls.lang_util)
27 |             cls.workspace_df = pd.DataFrame(workspace_data)
28 | 
29 |     def test_calculate_cosine_similarity(self):
30 |         feature_matrix1 = np.array([[1, 2, 0], [0, 0, 1], [1, 2, 0]])
31 |         cos_sim_score1 = similarity_analyzer._calculate_cosine_similarity(
32 |             feature_matrix1
33 |         )
34 |         self.assertEqual(
35 |             np.abs(np.sum(np.diag(cos_sim_score1) - np.array([1, 1, 1]))) < TOLERANCE,
36 |             True,
37 |             "Similarity Analyzer Test fail",
38 |         )
39 | 
40 |         self.assertEqual(
41 |             np.abs(cos_sim_score1[0, 1]) < TOLERANCE,
42 |             True,
43 |             "Similarity Analyzer Test fail",
44 |         )
45 | 
46 |         self.assertEqual(
47 |             np.abs(cos_sim_score1[0, 2] - 1) < TOLERANCE,
48 |             True,
49 |             "Similarity Analyzer Test fail",
50 |         )
51 | 
52 |     def test_ambiguous_examples_analysis(self):
53 |         ambiguous_dataframe = similarity_analyzer.ambiguous_examples_analysis(
54 |             self.workspace_df, threshold=0.85, lang_util=self.lang_util
55 |         )
56 |         self.assertEqual(
57 |             len(ambiguous_dataframe[ambiguous_dataframe["similarity score"] < 0.85]),
58 |             0,
59 |             "Similarity Analyzer Test fail",
60 |         )
61 | 
62 |         self.assertEqual(
63 |             len(
64 |                 np.intersect1d(
65 |                     ambiguous_dataframe["Intent1"], ambiguous_dataframe["Intent2"]
66 |                 )
67 |             ),
68 |             0,
69 |             "Similarity Analyzer Test fail",
70 |         )
71 | 
72 |     def tearDown(self):
73 |         unittest.TestCase.tearDown(self)
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     unittest.main()
78 | 


--------------------------------------------------------------------------------
/tests/term_analysis/test_entity_analyzer.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import json
 3 | import pandas as pd
 4 | 
 5 | from assistant_skill_analysis.utils import skills_util, lang_utils
 6 | from assistant_skill_analysis.term_analysis import entity_analyzer
 7 | 
 8 | 
 9 | class TestChi2Analyzer(unittest.TestCase):
10 |     """
11 |     Test for Chi2 Analyzer module
12 |     """
13 | 
14 |     @classmethod
15 |     def setUpClass(cls):
16 |         test_skill_file = (
17 |             "tests/resources/test_workspaces/skill-Customer-Care-Sample.json"
18 |         )
19 |         with open(test_skill_file, "r") as skill_file:
20 |             (
21 |                 workspace_data,
22 |                 workspace_vocabulary,
23 |                 _,
24 |                 _,
25 |             ) = skills_util.extract_workspace_data(
26 |                 json.load(skill_file), lang_utils.LanguageUtility("en")
27 |             )
28 |             cls.workspace_df = pd.DataFrame(workspace_data)
29 |             cls.mock_test_result = pd.DataFrame(
30 |                 {
31 |                     "correct_intent": ["intent1", "intent2"],
32 |                     "entities": [
33 |                         [
34 |                             {"entity": "entity1", "confidence": 1},
35 |                             {"entity": "entity2", "confidence": 1},
36 |                         ],
37 |                         [{"entity": "entity1", "confidence": 0.5}],
38 |                     ],
39 |                 }
40 |             )
41 | 
42 |     def test_derive_entity_label_matrix(self):
43 |         (
44 |             entity_feat_mat,
45 |             labels,
46 |             entity_avg_conf,
47 |         ) = entity_analyzer._derive_entity_label_matrix(
48 |             self.mock_test_result, ["entity1", "entity2"]
49 |         )
50 |         self.assertEqual(entity_feat_mat[1][1], 0, "test for entity analyzer fail")
51 |         self.assertEqual(
52 |             entity_avg_conf["entity1"], 0.75, "test for entity analyzer fail"
53 |         )
54 | 
55 |     def test_entity_label_correlation_analysis(self):
56 |         entity = {"entities": [{"entity": "entity1"}, {"entity": "entity2"}]}
57 |         entities_list = [item["entity"] for item in entity["entities"]]
58 |         entity_label_df = entity_analyzer.entity_label_correlation_analysis(
59 |             self.mock_test_result, entities_list, p_value=1
60 |         )
61 |         self.assertEqual(
62 |             entity_label_df.iloc[0]["Correlated Entities"],
63 |             "entity2",
64 |             "test for entity analyzer fail",
65 |         )
66 | 
67 |     def tearDown(self):
68 |         unittest.TestCase.tearDown(self)
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     unittest.main()
73 | 


--------------------------------------------------------------------------------
/tests/utils/test_skills_util.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import json
 3 | 
 4 | from assistant_skill_analysis.utils import skills_util, lang_utils
 5 | 
 6 | 
 7 | class TestSkillsUtil(unittest.TestCase):
 8 |     """Test for skills utils module"""
 9 | 
10 |     @classmethod
11 |     def setUpClass(cls):
12 |         cls.skill_file = open(
13 |             "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r"
14 |         )
15 |         cls.action_skill_file = open(
16 |             "tests/resources/test_workspaces/customer_care_sample_action_skill.json",
17 |             "r",
18 |         )
19 |         cls.lang_util = lang_utils.LanguageUtility("en")
20 | 
21 |     def test_extract_action_workspace_data(self):
22 |         skill_json = json.load(self.action_skill_file)
23 |         (
24 |             workspace_pd,
25 |             workspace_vocabulary,
26 |             entities,
27 |             intent_action_map,
28 |         ) = skills_util.extract_workspace_data(skill_json, self.lang_util)
29 | 
30 |         self.assertTrue(workspace_pd is not None, "Extract workspace failed")
31 |         self.assertEqual(
32 |             len(workspace_pd["intent"].unique()), 7, "Extract workspace failed"
33 |         )
34 | 
35 |         # check correct number of entities parsed
36 |         self.assertEqual(7, len(entities))
37 | 
38 |         # check intent to action mapping working expectedly
39 |         self.assertEqual(
40 |             "Where are you located?", intent_action_map["action_11419_intent_44259"]
41 |         )
42 |         self.assertEqual("Thank you", intent_action_map["action_12038_intent_13364"])
43 |         self.assertEqual("Goodbye", intent_action_map["action_22890_intent_48257"])
44 |         self.assertEqual(
45 |             "Schedule An Appointment", intent_action_map["action_27164_intent_22860"]
46 |         )
47 |         self.assertEqual(
48 |             "What are your hours?", intent_action_map["action_33190_intent_33203"]
49 |         )
50 |         self.assertEqual(
51 |             "What can I do?", intent_action_map["action_5042_intent_38841"]
52 |         )
53 |         self.assertEqual("Fallback", intent_action_map["fallback_connect_to_agent"])
54 | 
55 |     def test_extract_workspace_data(self):
56 |         skill_json = json.load(self.skill_file)
57 |         workspace_pd, workspace_vocabulary, _, _ = skills_util.extract_workspace_data(
58 |             skill_json, self.lang_util
59 |         )
60 |         self.assertTrue(workspace_pd is not None, "Extract workspace failed")
61 |         self.assertEqual(
62 |             len(workspace_pd["intent"].unique()), 9, "Extract workspace failed"
63 |         )
64 | 
65 |     @classmethod
66 |     def tearDownClass(cls):
67 |         cls.skill_file.close()
68 |         cls.action_skill_file.close()
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     unittest.main()
73 | 


--------------------------------------------------------------------------------
/test_set.csv:
--------------------------------------------------------------------------------
 1 | "many thanks","Thanks"
 2 | "thank you","Thanks"
 3 | "ciao","Goodbye"
 4 | "see ya","Goodbye"
 5 | "Are you open on sundays,  and if so what are the hours?","Customer_Care_Store_Hours"
 6 | "What are the hours of operation?","Customer_Care_Store_Hours"
 7 | "what are your hours","Customer_Care_Store_Hours"
 8 | "What time do you close today","Customer_Care_Store_Hours"
 9 | "Does the store in the city center opens till 8pm on weekends?","Customer_Care_Store_Hours"
10 | "how early do you open on Saturdays","Customer_Care_Store_Hours"
11 | "Are you open on Sunday","Customer_Care_Store_Hours"
12 | "What time are you closing today?","Customer_Care_Store_Hours"
13 | "When can I meet with one of your employees at your store?","Customer_Care_Appointments"
14 | "I prefer a face to face visit","Customer_Care_Appointments"
15 | "can you make an appointment for me","Customer_Care_Appointments"
16 | "Set up an appt","Customer_Care_Appointments"
17 | "what is the address","Customer_Care_Store_Location"
18 | "I want to know about a store","Customer_Care_Store_Location"
19 | "Find store","Customer_Care_Store_Location"
20 | "give me directions","Customer_Care_Store_Location"
21 | "where are you","Customer_Care_Store_Location"
22 | "i changed my mind","Cancel"
23 | "cancel the request","Cancel"
24 | "can you help","Help"
25 | "help","Help"
26 | "Hey there","General_Greetings"
27 | "Who is this?","General_Greetings"
28 | "Looking good eve","General_Greetings"
29 | "How is it going?","General_Greetings"
30 | "Hi advisor","General_Greetings"
31 | "Hey twin","General_Greetings"
32 | "Can I talk to someone?","General_Connect_to_Agent"
33 | "Please connect me to a live agent","General_Connect_to_Agent"
34 | "Where is the closest agent?","General_Connect_to_Agent"
35 | "Please assist me to get to an agent","General_Connect_to_Agent"
36 | "Can I speak with somebody?","General_Connect_to_Agent"
37 | "Do not want a robot?","General_Connect_to_Agent"
38 | "I want to speak to a human","General_Connect_to_Agent"
39 | "representative","General_Connect_to_Agent"
40 | "Can I connect to an agent?","General_Connect_to_Agent"
41 | "Can you please transition my conversation to a human","General_Connect_to_Agent"
42 | "Is the store going to be open tomorrow","Customer_Care_Store_Hours"
43 | "Can you arrange for me to meet at your closest store","Customer_Care_Appointments"
44 | "see you tomorrow","Goodbye"
45 | "I dont think I want to do that","Cancel"
46 | "Hello you silly bot","General_Greetings"
47 | "Can you tell me a good joke","SYSTEM_OUT_OF_DOMAIN"
48 | "What is your IQ","SYSTEM_OUT_OF_DOMAIN"
49 | "Luke, I am your father","SYSTEM_OUT_OF_DOMAIN"
50 | "Where did Betty buy her butter?","SYSTEM_OUT_OF_DOMAIN"
51 | "How many engineers does it take to change a lightbulb?","SYSTEM_OUT_OF_DOMAIN"
52 | "Can you help me change my account password?","SYSTEM_OUT_OF_DOMAIN"
53 | "What is a way to change my account address?","SYSTEM_OUT_OF_DOMAIN"
54 | 


--------------------------------------------------------------------------------
/tests/utils/test_lang_utils.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from assistant_skill_analysis.utils.lang_utils import LanguageUtility
 3 | 
 4 | 
 5 | class TestLangUtils(unittest.TestCase):
 6 |     """Test for lang utils module"""
 7 | 
 8 |     @classmethod
 9 |     def setUpClass(cls):
10 |         cls.skill_file = open(
11 |             "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r"
12 |         )
13 | 
14 |     def test_en(self):
15 |         util = LanguageUtility("en")
16 |         sent = util.preprocess("boston resided on the east coast of us!")
17 |         self.assertEqual(sent, "boston resided on the east coast of us ")
18 |         sent = util.tokenize(sent)
19 |         self.assertEqual(
20 |             sent, ["boston", "resid", "on", "the", "east", "coast", "of", "us"]
21 |         )
22 | 
23 |     def test_fr(self):
24 |         util = LanguageUtility("fr")
25 |         sent = util.preprocess("ils se donnèrent")
26 |         self.assertEqual(sent, "ils se donnerent")
27 |         sent = util.tokenize(sent)
28 |         self.assertEqual(sent, ["il", "se", "donnerent"])
29 | 
30 |     def test_cs(self):
31 |         util = LanguageUtility("cs")
32 |         sent = util.preprocess("ils se donnèrent")
33 |         sent = util.tokenize(sent)
34 |         self.assertEqual(sent, ["ils", "se", "donnerent"])
35 | 
36 |     def test_pt(self):
37 |         util = LanguageUtility("pt")
38 |         sent = util.preprocess("ils se donnèrent")
39 |         self.assertEqual(sent, "ils se donnerent")
40 |         sent = util.tokenize(sent)
41 |         self.assertEqual(sent, ["ils", "se", "donnerent"])
42 | 
43 |     def test_it(self):
44 |         util = LanguageUtility("it")
45 |         sent = util.preprocess("pronuncerà")
46 |         self.assertEqual(sent, "pronuncerà")
47 |         sent = util.tokenize(sent)
48 |         self.assertEqual(sent, ["pronunc"])
49 | 
50 |     def test_es(self):
51 |         util = LanguageUtility("es")
52 |         sent = util.preprocess("toreándolo")
53 |         self.assertEqual(sent, "toreandolo")
54 |         sent = util.tokenize(sent)
55 |         self.assertEqual(sent, ["tor"])
56 | 
57 |     def test_de(self):
58 |         util = LanguageUtility("de")
59 |         sent = util.preprocess("Autobahnen")
60 |         self.assertEqual(sent, "autobahnen")
61 |         sent = util.tokenize(sent)
62 |         self.assertEqual(sent, ["autobahn"])
63 | 
64 |     def test_zh_cn(self):
65 |         util = LanguageUtility("zh-cn")
66 |         sent = util.preprocess("不想当兼职")
67 |         self.assertEqual(sent, "不想当兼职")
68 |         sent = util.tokenize(sent)
69 |         self.assertEqual(sent, ['不想', '当', '兼职'])
70 | 
71 |     def test_zh_tw(self):
72 |         util = LanguageUtility("zh-tw")
73 |         sent = util.preprocess("畀到機會我嘗試")
74 |         self.assertEqual(sent, "畀到機會我嘗試")
75 |         sent = util.tokenize(sent)
76 |         self.assertEqual(sent, ['畀', '到', '機會', '我', '嘗試'])
77 | 
78 |     def tearDown(self):
79 |         unittest.TestCase.tearDown(self)
80 |         self.skill_file.close()
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     unittest.main()
85 | 


--------------------------------------------------------------------------------
/tests/term_analysis/test_keyword_analyzer.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import json
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | from assistant_skill_analysis.utils import skills_util, lang_utils
 7 | from assistant_skill_analysis.term_analysis import keyword_analyzer
 8 | 
 9 | 
10 | class TestKeywordAnalyzer(unittest.TestCase):
11 |     """
12 |     Test for Key Word Analyzer module
13 |     """
14 | 
15 |     @classmethod
16 |     def setUpClass(cls):
17 |         cls.lang_util = lang_utils.LanguageUtility("en")
18 |         test_skill_file = (
19 |             "tests/resources/test_workspaces/skill-Customer-Care-Sample.json"
20 |         )
21 |         with open(test_skill_file, "r") as skill_file:
22 |             (
23 |                 workspace_data,
24 |                 workspace_vocabulary,
25 |                 _,
26 |                 _,
27 |             ) = skills_util.extract_workspace_data(json.load(skill_file), cls.lang_util)
28 |             cls.workspace_df = pd.DataFrame(workspace_data)
29 |             cls.test_data = pd.DataFrame(
30 |                 {
31 |                     "utterance": [
32 |                         "Boston is the capital city of massachusetts ",
33 |                         "Boston Celtics is a famous NBA team",
34 |                         "new york is a big city in the east coast",
35 |                     ],
36 |                     "intent": ["boston", "boston", "nyc"],
37 |                 }
38 |             )
39 | 
40 |     def test_get_counts_per_label(self):
41 |         counts = keyword_analyzer._get_counts_per_label(self.test_data, self.lang_util)
42 |         self.assertEqual(
43 |             ("boston", "Celtics") in counts.index.tolist(),
44 |             True,
45 |             "Key word analyzer test fails",
46 |         )
47 |         self.assertEqual(
48 |             ("nyc", "coast") in counts.index.tolist(),
49 |             True,
50 |             "Key word analyzer test fails",
51 |         )
52 |         self.assertEqual(
53 |             ("boston", "is") in counts.index.tolist(),
54 |             False,
55 |             "Key word analyzer test fails",
56 |         )
57 | 
58 |     def test_get_top_n(self):
59 |         counts = keyword_analyzer._get_counts_per_label(self.test_data, self.lang_util)
60 |         top_n = keyword_analyzer._get_top_n(counts["n_w"], top_n=4)
61 |         labels = [item for (item, _) in top_n.index.tolist() if item == "boston"]
62 |         self.assertEqual(len(labels), 4, "Key word analyzer test fails")
63 | 
64 |     def test_preprocess_for_heat_map(self):
65 |         counts, top_counts = keyword_analyzer._preprocess_for_heat_map(
66 |             self.workspace_df,
67 |             label_for_display=30,
68 |             max_token_display=30,
69 |             class_list=None,
70 |             lang_util=self.lang_util,
71 |         )
72 |         unique_counts = len(counts.index.get_level_values(0).unique())
73 |         actual_labels_shown = np.int_(np.ceil(30 / unique_counts)) * unique_counts
74 |         self.assertEqual(
75 |             len(top_counts) == actual_labels_shown, True, "Key word analyzer test fails"
76 |         )
77 | 
78 |     def tearDown(self):
79 |         unittest.TestCase.tearDown(self)
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     unittest.main()
84 | 


--------------------------------------------------------------------------------
/tests/resources/test_workspaces/customer_care_skill_test.tsv:
--------------------------------------------------------------------------------
 1 | many thanks	Thanks
 2 | thank you	Thanks
 3 | ciao	Goodbye
 4 | see ya	Goodbye
 5 | Are you open on sundays, and if so what are the hours?	Customer_Care_Store_Hours
 6 | What are the hours of operation?	Customer_Care_Store_Hours
 7 | what are your hours	Customer_Care_Store_Hours
 8 | What time do you close today	Customer_Care_Store_Hours
 9 | Does the store in the city center opens till 8pm on weekends?	Customer_Care_Store_Hours
10 | how early do you open on Saturdays	Customer_Care_Store_Hours
11 | Are you open on Sunday	Customer_Care_Store_Hours
12 | What time are you closing today?	Customer_Care_Store_Hours
13 | When can I meet with one of your employees at your store?	Customer_Care_Appointments
14 | I prefer a face to face visit	Customer_Care_Appointments
15 | can you make an appointment for me	Customer_Care_Appointments
16 | Set up an appt	Customer_Care_Appointments
17 | what is the address	Customer_Care_Store_Location
18 | I want to know about a store	Customer_Care_Store_Location
19 | Find store	Customer_Care_Store_Location
20 | give me directions	Customer_Care_Store_Location
21 | where are you	Customer_Care_Store_Location
22 | i changed my mind	Cancel
23 | cancel the request	Cancel
24 | can you help	Help
25 | help	Help
26 | Hey there	General_Greetings
27 | Who is this?	General_Greetings
28 | Looking good eve	General_Greetings
29 | How is it going?	General_Greetings
30 | Hi advisor	General_Greetings
31 | Hey twin	General_Greetings
32 | Can I talk to someone?	General_Connect_to_Agent
33 | Please connect me to a live agent	General_Connect_to_Agent
34 | Where is the closest agent?	General_Connect_to_Agent
35 | Please assist me to get to an agent	General_Connect_to_Agent
36 | Can I speak with somebody?	General_Connect_to_Agent
37 | Do not want a robot?	General_Connect_to_Agent
38 | I want to speak to a human	General_Connect_to_Agent
39 | representative	General_Connect_to_Agent
40 | Can I connect to an agent?	General_Connect_to_Agent
41 | Send me to an agent	General_Connect_to_Agent
42 | there are now four references including britannica giving the correct date which is getting silly you complain in your edit summary that you can t find them so i ve added quotes to two to help
43 | i am simply relating that the research and professional organizations are essentially unanimous in indicating that these laws are not having the intended effects and that they may even be making the
44 | um taking a shot here um lets say three separate people whos wills are to each other
45 | must be said you are by far the most disrespectful malicious destructive user in all of wikipedia it s people like you who abuse their power that make this site defunct or obsolete
46 | august utc a simple you re wrong would have sufficed no need to try and make me look like an idiot
47 | that is ridiculous unless there s a good and non disingenuous response i would absolutely agree with you blocking indef outright falsifying sources should simply never be tolerated
48 | i moved this from quebec diaspora to this title because disapora is a description applicable only to jew s
49 | knock it off you bloody cwi trot
50 | no he is an arrogant self serving immature idiot get it right
51 | ices could hardly be labelled as a very obviously pro seal hunting organization the scientific base of seal management is the responsibility of ices covered out by the most prominent seal research...
52 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/data_analysis/similarity_analyzer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from sklearn.feature_extraction.text import CountVectorizer
 5 | from IPython.display import display, Markdown, HTML
 6 | 
 7 | 
 8 | def ambiguous_examples_analysis(workspace_pd, lang_util, threshold=0.7):
 9 |     """
10 |     Analyze the test workspace and find out similar utterances that belongs to different intent
11 |     :param workspace_pd: pandas dataframe in format of [utterance,label]
12 |     :param threshold: cut off for similarity score
13 |     :return: pands dataframe in format of ['Intent1', 'Utterance1', 'Intent2', 'Utterance2',
14 |                                            'similarity score']
15 |     """
16 |     # first create the feature matrix
17 |     vectorizer = CountVectorizer(ngram_range=(1, 2), tokenizer=lang_util.tokenize)
18 |     workspace_bow = vectorizer.fit_transform(workspace_pd["utterance"]).todense()
19 |     cos_sim_score_matrix = _calculate_cosine_similarity(workspace_bow)
20 | 
21 |     # remove the lower triangle of the matrix and apply threshold
22 |     similar_utterance_index = np.argwhere(
23 |         (cos_sim_score_matrix - np.tril(cos_sim_score_matrix)) > threshold
24 |     )
25 |     similar_utterance_pd = pd.DataFrame(
26 |         columns=["Intent1", "Utterance1", "Intent2", "Utterance2", "similarity score"]
27 |     )
28 | 
29 |     for index in similar_utterance_index:
30 |         if (
31 |             workspace_pd["intent"].iloc[index[0]]
32 |             != workspace_pd["intent"].iloc[index[1]]
33 |         ):
34 |             intent1 = workspace_pd["intent"].iloc[index[0]]
35 |             utterance1 = workspace_pd["utterance"].iloc[index[0]]
36 |             intent2 = workspace_pd["intent"].iloc[index[1]]
37 |             utterance2 = workspace_pd["utterance"].iloc[index[1]]
38 |             score = cos_sim_score_matrix[index[0], index[1]]
39 |             temp_pd = pd.DataFrame(
40 |                 {
41 |                     "Intent1": [intent1],
42 |                     "Utterance1": [utterance1],
43 |                     "Intent2": [intent2],
44 |                     "Utterance2": [utterance2],
45 |                     "similarity score": [score],
46 |                 }
47 |             )
48 |             similar_utterance_pd = similar_utterance_pd.append(
49 |                 temp_pd, ignore_index=True
50 |             )
51 | 
52 |     if not similar_utterance_pd.empty:
53 |         with pd.option_context("max_colwidth", 250):
54 |             display(
55 |                 HTML(
56 |                     similar_utterance_pd.sort_values(
57 |                         by=["similarity score"], ascending=False
58 |                     ).to_html(index=False)
59 |                 )
60 |             )
61 |     else:
62 |         display(Markdown("### There are no similar utterances within different Intent"))
63 | 
64 |     return similar_utterance_pd
65 | 
66 | 
67 | def _calculate_cosine_similarity(workspace_bow):
68 |     """
69 |     Given bow representation of the workspace utterance, calculate cosine similarity score
70 |     :param workspace_bow: dense representation of BOW of workspace utterances
71 |     :return: cosine_similarity_matrix
72 |     """
73 |     # normalized and calculate cosine similarity
74 |     workspace_bow = workspace_bow / np.linalg.norm(workspace_bow, axis=1, keepdims=True)
75 |     cosine_similarity_matrix = workspace_bow.dot(np.transpose(workspace_bow))
76 |     return cosine_similarity_matrix
77 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/term_analysis/entity_analyzer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn.feature_selection import chi2
 4 | 
 5 | N = 5
 6 | 
 7 | 
 8 | def _derive_entity_label_matrix(train_full_results, entities):
 9 |     """
10 |     Derive entity feature matrix for chi2 anaylsis using entity annotations from message api
11 |     :param train_full_results: pandas data frame outputed by inference
12 |     :param entities: list of entities that is defined in the workspace
13 |     :return entity_feature_matrix: numpy matrix of examples with entities x number of entities
14 |     :return labels: numpy array: number of labels correspond to number of examples
15 |     :return entity_average_confidence_dict: dict entity --> average confidence for entity
16 |     """
17 |     entity_feature_matrix = list()
18 |     labels = list()
19 |     entity_conf_dict = dict()
20 |     entity_count_dict = dict()
21 |     entity_average_confidence_dict = dict()
22 |     for i in range(len(train_full_results)):
23 |         current_result = train_full_results.iloc[i]
24 |         if current_result["entities"]:
25 |             # create empty feature vector
26 |             current_feature = [0] * len(entities)
27 |             for entity_reference in current_result["entities"]:
28 |                 e_ref = entity_reference["entity"]
29 |                 e_conf = entity_reference["confidence"]
30 | 
31 |                 entity_idx = entities.index(e_ref)
32 |                 current_feature[entity_idx] += 1
33 |                 entity_conf_dict[e_ref] = entity_conf_dict.get(e_ref, 0) + e_conf
34 |                 entity_count_dict[e_ref] = entity_count_dict.get(e_ref, 0) + 1
35 | 
36 |             entity_feature_matrix.append(current_feature)
37 |             labels.append(current_result["correct_intent"])
38 | 
39 |     entity_feature_matrix = np.array(entity_feature_matrix)
40 |     labels = np.array(labels)
41 |     for key in entity_conf_dict:
42 |         entity_average_confidence_dict[key] = (
43 |             entity_conf_dict[key] / entity_count_dict[key]
44 |         )
45 | 
46 |     return entity_feature_matrix, labels, entity_average_confidence_dict
47 | 
48 | 
49 | def entity_label_correlation_analysis(train_full_results, entities_list, p_value=0.05):
50 |     """
51 |     Apply chi2 analysis on entities of the training set
52 |     :param train_full_results: pandas data frame output by inference
53 |     :param entities_list: the list of entities that is defined in the workspace
54 |     :param p_value: threshold for chi2 analysis
55 |     :return entity_label_df: pandas df with col 1 being intents and col 2 entities
56 |     """
57 |     (
58 |         entity_feature_matrix,
59 |         labels,
60 |         entity_average_confidence_dict,
61 |     ) = _derive_entity_label_matrix(train_full_results, entities_list)
62 |     entities_list = np.array(entities_list)
63 |     unique_labels = list(set(labels))
64 |     final_labels = list()
65 |     final_entities = list()
66 | 
67 |     for label in unique_labels:
68 |         chi2_statistics, pval = chi2(entity_feature_matrix, labels == label)
69 |         temp_entities_list = entities_list[pval < p_value]
70 |         chi2_statistics = chi2_statistics[pval < p_value]
71 |         ordered_entities = temp_entities_list[np.argsort(chi2_statistics)]
72 |         if len(ordered_entities) == 0:
73 |             continue
74 | 
75 |         final_labels.append(label)
76 |         final_entities.append(", ".join(ordered_entities[-N:]))
77 | 
78 |     entity_label_df = pd.DataFrame(
79 |         {"Intent": final_labels, "Correlated Entities": final_entities}
80 |     )
81 | 
82 |     return entity_label_df
83 | 


--------------------------------------------------------------------------------
/scripts/train_test_split.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import copy
 4 | import argparse
 5 | import json
 6 | import numpy as np
 7 | 
 8 | OFFTOPIC_LABEL = 'SYSTEM_OUT_OF_DOMAIN'
 9 | 
10 | def stratified_sampling(workspace, sampling_percentage=.8):
11 |     """
12 |     Perform stratified sampling on the workspace json
13 |     :param workspace: json acceptable by watson assistant
14 |     :param sampling_percentage: percentage of total to use for train
15 |     """
16 | 
17 |     train_workspace_data = copy.deepcopy(workspace) # copy everything except the intent list
18 |     train_workspace_data['name'] = workspace['name'] + '_train'
19 |     train_workspace_data.pop('intents')
20 | 
21 |     train_workspace_intent_list = list()
22 |     test_workspace_data = list()
23 | 
24 |     for i in range(len(workspace['intents'])):
25 |         intent = workspace['intents'][i]
26 | 
27 |         cutoff, sampling_index = find_split_cut_off(
28 |             intent['examples'], sampling_percentage)
29 | 
30 |         # train set
31 |         train_examples = [
32 |             intent['examples'][index] for index in sampling_index[:cutoff]]
33 |         train_workspace_intent_list.append({'intent': workspace['intents'][i]['intent']})
34 |         train_workspace_intent_list[i].update({"description": "string"})
35 |         train_workspace_intent_list[i].update({"examples": train_examples})
36 | 
37 |         # test set
38 |         test_examples = [
39 |             intent['examples'][index] for index in sampling_index[cutoff:]]
40 |         test_workspace_data.extend(
41 |             [utterances['text'] + '\t' +
42 |              workspace['intents'][i]['intent'] for utterances in test_examples])
43 |     train_workspace_data['intents'] = train_workspace_intent_list
44 |     
45 |     # counter examples
46 |     if len(workspace['counterexamples']) > 0:
47 |         train_workspace_data.pop('counterexamples')
48 |         # train
49 |         cutoff, sampling_index = find_split_cut_off(
50 |             workspace['counterexamples'], sampling_percentage)
51 |         train_workspace_data['counterexamples'] = [
52 |             workspace['counterexamples'][index] for index in sampling_index[:cutoff]]
53 |         # test
54 |         test_workspace_data.extend(
55 |             [workspace['counterexamples'][index]['text'] + '\t' +
56 |              OFFTOPIC_LABEL for index in sampling_index[cutoff:]])
57 | 
58 |     return train_workspace_data, test_workspace_data
59 | 
60 | def find_split_cut_off(enumerable, sampling_percentage):
61 |     """
62 |     Find split cutoff point
63 |     :param enumerable:
64 |     :param sampling_percentage:
65 |     """
66 |     sampling_index = list(np.arange(len(enumerable)))
67 |     random.shuffle(sampling_index)
68 | 
69 |     if len(enumerable) * (1 - sampling_percentage) < 1:
70 |         cutoff = -1
71 |     else:
72 |         cutoff = int(np.ceil(sampling_percentage * len(sampling_index)))
73 | 
74 |     return cutoff, sampling_index
75 | 
76 | def main(args):
77 |     workspace_data = json.load(open(args.input_data, 'r'))
78 |     train_workspace_data, test_workspace_data = stratified_sampling(workspace_data, args.percentage)
79 |     output_name = os.path.basename(args.input_data).replace('.json','')
80 |     with open(os.path.join(args.output_folder, output_name+'_train.json'),'w',encoding='utf-8') as file:
81 |         json.dump(train_workspace_data, file)
82 |     with open(os.path.join(args.output_folder, output_name+'_test.tsv'), 'w', encoding='utf-8') as file:
83 |         file.writelines([line +'\n' for line in test_workspace_data])
84 | 
85 | if __name__ == '__main__':
86 |     parser = argparse.ArgumentParser(description='This script will split workspace json ')
87 |     parser.add_argument('-p','--percentage', help='how much percentage of the data to keep in train', default=.8, type=float)
88 |     parser.add_argument('-input', '--input_data', help='the location of the workspace json',required=True)
89 |     parser.add_argument('-output','--output_folder', help='the location of the train.json and test.tsv to be saved',required=True)
90 |     args = parser.parse_args()
91 |     main(args)
92 | 
93 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![test env](https://github.com/watson-developer-cloud/assistant-dialog-skill-analysis/actions/workflows/ci.yaml/badge.svg?branch=master)](https://github.com/watson-developer-cloud/assistant-dialog-skill-analysis/actions/workflows/ci.yaml)
 2 | 
 3 | # Dialog/Action Skill Analysis for Watson Assistant
 4 | 
 5 | ## Introduction
 6 | Dialog/Action Skill Analysis is a Python framework along with an easy-to-use Python notebook to help you quickly and effectively build AI assistants using Watson Assistant 
 7 | 
 8 | Whether you are new to the process and are building your first AI assistant or you’re a veteran and have an assistant working well in production, this framework is intended to help everyone with questions like...
 9 | - How do I know my assistant is doing a good job?
10 | - How do I test and measure my assistant’s performance?
11 | - Why is the assistant responding incorrectly to this question?
12 | - How do I improve my assistant’s ability to understand questions?
13 | 
14 | Currently Supported Languages: en, fr, cs, de, es, it, pt, nl, zh-cn, zh-tw
15 | 
16 | ## Usage
17 | If you clone the notebook from this repository locally, please use the steps below. For usage in Watson studio, please refer to the 
18 | section below on [Hosted Python Notebook](#hosted-python-notebook)
19 | 
20 | 
21 | 1. Assumes familiarity using a Python Jupyter notebook
22 | 2. Assumes a Python 3.9 or greater environment (We recommend using Anaconda to setup a python environment)
23 | 3. Install dependencies with `pip install -r requirements.txt`
24 | 4. Start jupyter server with `jupyter notebook`
25 | 5. Run `classic_dialog_skill_analysis.ipynb` or `new_experience_skill_analysis.ipynb` to start session
26 | 
27 | ## Update: March 15th 2023
28 | `new_experience_skill_analysis.ipynb` for dialog or action analysis under new Watson Assistant experience is available.
29 | 
30 | 
31 | ## Guides
32 | An introduction to Dialog Skill Analysis on Medium: [Announcing Dialog Skill Analysis](https://medium.com/ibm-watson/announcing-dialog-skill-analysis-for-watson-assistant-83cdfb968178?)
33 | 
34 | ## Hosted Python Notebook
35 | For those of you familiar with IBM Watson Studio, we have also made a hosted version of the notebook available on the IBM Gallery: [Hosted Dialog Skill Analysis Notebook](https://dataplatform.cloud.ibm.com/exchange/public/entry/view/4d77701840fcb2f21587e39fdb887049)
36 | 
37 | ## Current Contributor List
38 | 
39 | | |                                                                                                                                                         |                                                                                                                                                         |
40 | :-------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------:|
41 | <img src="https://avatars1.githubusercontent.com/u/41268579" alt="Haode Qi Avatar" width=100/> <br/> <b>[Haode Qi](https://github.com/haodeqi)<b>  | <img src="https://avatars.githubusercontent.com/u/20978879" alt="Cheng Qian Avatar" width=100/> <br/> <b>[Cheng Qian](https://github.com/tsinggggg)<b>  | <img src="https://avatars0.githubusercontent.com/u/55497265" alt="Ladislav Kunc Avatar" width=100/> <br/> <b>[Ladislav Kunc](https://github.com/lada-kunc)<b> |
42 | 
43 | 
44 | ## Previous Contributor List
45 | 
46 | | | | |
47 | :-------------------------:|:-------------------------:|:-------------------------:|
48 | <img src="https://avatars2.githubusercontent.com/u/21293612" alt="Panos Karagiannis Avatar" width=100/> <br/> <b>[Panos Karagiannis](https://github.com/apropos13)<b> | <img src="https://avatars0.githubusercontent.com/u/10625921" alt="Ming Tan Avatar" width=100/> <br/> <b>[Ming Tan](https://github.com/mingtan888)<b> |
49 | <img src="https://avatars0.githubusercontent.com/u/5985542" alt="Navneet Rao Avatar" width=100/> <br/> <b>[Navneet Rao](https://github.com/navneetrao)<b>| <img src="https://avatars0.githubusercontent.com/u/12615278" alt="Yang Yu Avatar" width=100/> <br/> <b>[Yang Yu](https://github.com/yangyuphd)<b> |
50 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/term_analysis/keyword_analyzer.py:
--------------------------------------------------------------------------------
  1 | from collections import Counter
  2 | import pandas as pd
  3 | import numpy as np
  4 | from IPython.display import display, Markdown
  5 | import matplotlib.pyplot as plt
  6 | import seaborn as sns
  7 | import nltk
  8 | from ..utils import skills_util
  9 | 
 10 | 
 11 | def _preprocess_for_heat_map(
 12 |     workspace_df, lang_util, label_for_display=30, max_token_display=30, class_list=None
 13 | ):
 14 |     """
 15 |     Preprocess dataframe for heat map visualization
 16 |     :param workspace_df:
 17 |     :param label_for_display:
 18 |     :param max_token_display:
 19 |     :param class_list:
 20 |     """
 21 |     label_frequency_dict = dict(Counter(workspace_df["intent"]).most_common())
 22 |     if class_list:
 23 |         workspace_subsampled = workspace_df[workspace_df["intent"].isin(class_list)]
 24 |         counts = _get_counts_per_label(
 25 |             workspace_subsampled, lang_util=lang_util, unigrams_col_name="unigrams"
 26 |         )
 27 |     else:
 28 |         if len(label_frequency_dict) > label_for_display:
 29 |             top_30_labels = list(label_frequency_dict.keys())[:label_for_display]
 30 |             workspace_subsampled = workspace_df[
 31 |                 workspace_df["intent"].isin(top_30_labels)
 32 |             ]
 33 |             counts = _get_counts_per_label(
 34 |                 workspace_subsampled, lang_util=lang_util, unigrams_col_name="unigrams"
 35 |             )
 36 |         else:
 37 |             counts = _get_counts_per_label(
 38 |                 workspace_df, lang_util=lang_util, unigrams_col_name="unigrams"
 39 |             )
 40 | 
 41 |     max_n = int(
 42 |         np.ceil(max_token_display / len(counts.index.get_level_values(0).unique()))
 43 |     )
 44 |     top_counts = _get_top_n(counts["n_w"], top_n=max_n)
 45 |     return counts, top_counts
 46 | 
 47 | 
 48 | def _get_counts_per_label(training_data, lang_util, unigrams_col_name="unigrams"):
 49 |     """
 50 |     Create a new dataframe to store unigram counts for each label
 51 |     :param training_data: pandas df
 52 |     :param unigrams_col_name: name of unigrams column name
 53 |     :return counts: dataframe that contains the counts for all unigrams per label
 54 |     """
 55 |     training_data[unigrams_col_name] = training_data["utterance"].apply(
 56 |         nltk.word_tokenize
 57 |     )
 58 |     rows = list()
 59 |     for row in training_data[["intent", unigrams_col_name]].iterrows():
 60 |         r = row[1]
 61 |         for word in r.unigrams:
 62 |             rows.append((r.intent, word))
 63 | 
 64 |     words = pd.DataFrame(rows, columns=["intent", "word"])
 65 |     # delete all empty words and chars
 66 |     words = words[words.word.str.len() > 1]
 67 |     # delete stopwords
 68 |     words = words.loc[~words["word"].isin(lang_util.stop_words)]
 69 |     # get counts per word
 70 |     counts = (
 71 |         words.groupby("intent")
 72 |         .word.value_counts()
 73 |         .to_frame()
 74 |         .rename(columns={"word": "n_w"})
 75 |     )
 76 |     return counts
 77 | 
 78 | 
 79 | def _get_top_n(series, top_n=5, index_level=0):
 80 |     """
 81 |     Get most frequent words per label
 82 |     :param series: product of a call to get_counts_per_label
 83 |     :param top_n: integer signifying the number of most frequent tokens per class
 84 |     :param index_level: index to group by
 85 |     :return df: dataframe that contains the top_n unigrams per label
 86 |     """
 87 |     return (
 88 |         series.groupby(level=index_level)
 89 |         .nlargest(top_n)
 90 |         .reset_index(level=index_level, drop=True)
 91 |     )
 92 | 
 93 | 
 94 | def seaborn_heatmap(
 95 |     workspace_df, lang_util, label_for_display=30, max_token_display=30, class_list=None
 96 | ):
 97 |     """
 98 |     Create heat map of word frequencies per intent
 99 |     :param workspace_df:
100 |     :param label_for_display:
101 |     :param max_token_display:
102 |     :param class_list:
103 |     """
104 |     counts, top_counts = _preprocess_for_heat_map(
105 |         workspace_df, lang_util, label_for_display, max_token_display, class_list
106 |     )
107 |     reset_groupby = counts.reset_index()
108 |     most_frequent_words = top_counts.reset_index()["word"].unique()
109 |     table_format = reset_groupby.pivot(index="word", columns="intent", values="n_w")
110 |     table_format = (
111 |         table_format[table_format.index.isin(most_frequent_words)]
112 |         .fillna(0)
113 |         .astype("int32")
114 |     )
115 |     display(
116 |         Markdown('## <p style="text-align: center;"> Token Frequency per Intent </p>')
117 |     )
118 |     fig, ax = plt.subplots(figsize=(20, 20))
119 | 
120 |     sns.heatmap(table_format, annot=True, fmt="d", linewidths=0.1, cmap="PuBu", ax=ax)
121 |     plt.ylabel("Token", fontdict=skills_util.LABEL_FONT)
122 |     plt.xlabel("Intent", fontdict=skills_util.LABEL_FONT)
123 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/data_analysis/summary_generator.py:
--------------------------------------------------------------------------------
  1 | from collections import Counter
  2 | import numpy as np
  3 | import pandas as pd
  4 | import seaborn as sns
  5 | from IPython.display import Markdown, display
  6 | from matplotlib import pyplot as plt
  7 | from ..utils.skills_util import LABEL_FONT
  8 | 
  9 | 
 10 | def generate_summary_statistics(data, entities_list=None):
 11 |     """
 12 |     Take the workspace dictionary and display summary statistics regarding the workspace
 13 |     :param data:
 14 |     :param entities_list:
 15 |     :return:
 16 |     """
 17 | 
 18 |     total_examples = len(data["utterance"])
 19 |     label_frequency = Counter(data["intent"]).most_common()
 20 |     number_of_labels = len(label_frequency)
 21 |     average_example_per_intent = np.average(list(dict(label_frequency).values()))
 22 |     standard_deviation_of_intent = np.std(list(dict(label_frequency).values()))
 23 | 
 24 |     characteristics = list()
 25 |     characteristics.append(["Total User Examples", total_examples])
 26 |     characteristics.append(["Unique Intents", number_of_labels])
 27 |     characteristics.append(
 28 |         ["Average User Examples per Intent", int(np.around(average_example_per_intent))]
 29 |     )
 30 |     characteristics.append(
 31 |         [
 32 |             "Standard Deviation from Average",
 33 |             int(np.around(standard_deviation_of_intent)),
 34 |         ]
 35 |     )
 36 |     if entities_list:
 37 |         characteristics.append(["Total Number of Entities", len(entities_list)])
 38 |     else:
 39 |         characteristics.append(["Total Number of Entities", 0])
 40 | 
 41 |     df = pd.DataFrame(data=characteristics, columns=["Data Characteristic", "Value"])
 42 |     df.index = np.arange(1, len(df) + 1)
 43 |     display(Markdown("### Summary Statistics"))
 44 |     display(df)
 45 | 
 46 | 
 47 | def show_user_examples_per_intent(data):
 48 |     """
 49 |     Take the workspace dictionary and display summary statistics regarding the workspace
 50 |     :param data:
 51 |     :return:
 52 |     """
 53 | 
 54 |     label_frequency = Counter(data["intent"]).most_common()
 55 |     frequencies = list(reversed(label_frequency))
 56 |     df = pd.DataFrame(data=frequencies, columns=["Intent", "Number of User Examples"])
 57 |     df.index = np.arange(1, len(df) + 1)
 58 |     display(Markdown("### Sorted Distribution of User Examples per Intent"))
 59 |     display(df)
 60 | 
 61 | 
 62 | def scatter_plot_intent_dist(workspace_pd):
 63 |     """
 64 |     takes the workspace_pd and generate a scatter distribution of the intents
 65 |     :param workspace_pd:
 66 |     :return:
 67 |     """
 68 | 
 69 |     label_frequency = Counter(workspace_pd["intent"]).most_common()
 70 |     frequencies = list(reversed(label_frequency))
 71 |     counter_list = list(range(1, len(frequencies) + 1))
 72 |     df = pd.DataFrame(data=frequencies, columns=["Intent", "Number of User Examples"])
 73 |     df["Intent"] = counter_list
 74 | 
 75 |     sns.set(rc={"figure.figsize": (15, 10)})
 76 |     display(
 77 |         Markdown(
 78 |             '## <p style="text-align: center;">Sorted Distribution of User Examples \
 79 |                      per Intent</p>'
 80 |         )
 81 |     )
 82 | 
 83 |     plt.ylabel("Number of User Examples", fontdict=LABEL_FONT)
 84 |     plt.xlabel("Intent", fontdict=LABEL_FONT)
 85 |     ax = sns.scatterplot(x="Intent", y="Number of User Examples", data=df, s=100)
 86 | 
 87 | 
 88 | def class_imbalance_analysis(workspace_pd):
 89 |     """
 90 |     performance class imbalance analysis on the training workspace
 91 |     :param workspace_pd:
 92 |     :return:
 93 |     """
 94 | 
 95 |     label_frequency = Counter(workspace_pd["intent"]).most_common()
 96 |     frequencies = list(reversed(label_frequency))
 97 |     min_class, min_class_len = frequencies[0]
 98 |     max_class, max_class_len = frequencies[-1]
 99 | 
100 |     if max_class_len >= 2 * min_class_len:
101 |         display(
102 |             Markdown(
103 |                 "### <font style='color:rgb(165, 34, 34);'> Class Imbalance Detected \
104 |         </font>"
105 |             )
106 |         )
107 |         display(
108 |             Markdown(
109 |                 "- Data could be potentially biased towards intents with more user \
110 |         examples"
111 |             )
112 |         )
113 |         display(
114 |             Markdown(
115 |                 "- E.g. Intent < {} > has < {} > user examples while intent < {} > has \
116 |         just < {} > user examples ".format(
117 |                     max_class, max_class_len, min_class, min_class_len
118 |                 )
119 |             )
120 |         )
121 |         flag = True
122 |     else:
123 |         display(
124 |             Markdown(
125 |                 "### <font style='color:rgb(13, 153, 34);'> No Significant Class \
126 |         Imbalance Detected </font>"
127 |             )
128 |         )
129 |         display(
130 |             Markdown(
131 |                 "- Lower chances of inherent bias in classification towards intents with \
132 |         more user examples"
133 |             )
134 |         )
135 |         flag = False
136 | 
137 |     return flag
138 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/utils/lang_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | from types import SimpleNamespace
  4 | import sys
  5 | import jieba
  6 | from nltk.stem.snowball import SnowballStemmer
  7 | from spacy.tokenizer import Tokenizer
  8 | import unicodedata
  9 | import assistant_skill_analysis
 10 | 
 11 | 
 12 | SUPPORTED_LANGUAGE = ["en", "fr", "de", "cs", "es", "it", "pt", "nl", "zh-cn", "zh-tw"]
 13 | PUNCTUATION = [
 14 |     "\\" + chr(i)
 15 |     for i in range(sys.maxunicode)
 16 |     if unicodedata.category(chr(i)).startswith("P")
 17 | ]
 18 | 
 19 | 
 20 | class _JiebaTokenizerWrapper:
 21 |     """for zh-cn and zh-tw"""
 22 | 
 23 |     def __call__(self, *args, **kwargs):
 24 |         text = args[0]
 25 |         for token in jieba.tokenize(text):
 26 |             yield SimpleNamespace(text=token[0])
 27 | 
 28 | 
 29 | class LanguageUtility:
 30 |     def __init__(self, language_code):
 31 |         if language_code not in SUPPORTED_LANGUAGE:
 32 |             raise Exception(
 33 |                 "language_code %s not in supported languages %s",
 34 |                 language_code,
 35 |                 SUPPORTED_LANGUAGE,
 36 |             )
 37 |         self.language_code = language_code
 38 |         self.init_resources()
 39 | 
 40 |     def tokenize(self, sentences):
 41 |         tokens = list(map(lambda x: x.text, self.tokenizer(sentences)))
 42 |         if self.stemmer:
 43 |             tokens = [self.stemmer.stem(str(token)).strip() for token in tokens]
 44 |         tokens = [token for token in tokens if len(token) > 0]
 45 |         return tokens
 46 | 
 47 |     def init_resources(self):
 48 |         self.punctuation_pattern = re.compile("|".join(PUNCTUATION))
 49 |         self.stemmer = None
 50 |         stopwords_path = os.path.join(
 51 |             os.path.dirname(assistant_skill_analysis.__file__),
 52 |             "resources",
 53 |             self.language_code,
 54 |             "stopwords",
 55 |         )
 56 |         if self.language_code == "en":
 57 |             from spacy.lang.en import English
 58 | 
 59 |             self.tokenizer = Tokenizer(English().vocab)
 60 |             self.stemmer = SnowballStemmer(language="english")
 61 |             self.stop_words = self.load_stop_words(stopwords_path)
 62 | 
 63 |         elif self.language_code == "fr":
 64 |             from spacy.lang.fr import French
 65 | 
 66 |             self.tokenizer = Tokenizer(French().vocab)
 67 |             self.stemmer = SnowballStemmer(language="french")
 68 |             self.stop_words = self.load_stop_words(stopwords_path)
 69 | 
 70 |         elif self.language_code == "de":
 71 |             from spacy.lang.de import German
 72 | 
 73 |             self.tokenizer = Tokenizer(German().vocab)
 74 |             self.stemmer = SnowballStemmer(language="german")
 75 |             self.stop_words = self.load_stop_words(stopwords_path)
 76 | 
 77 |         elif self.language_code == "it":
 78 |             from spacy.lang.it import Italian
 79 | 
 80 |             self.tokenizer = Tokenizer(Italian().vocab)
 81 |             self.stemmer = SnowballStemmer(language="italian")
 82 |             self.stop_words = self.load_stop_words(stopwords_path)
 83 | 
 84 |         elif self.language_code == "cs":
 85 |             from spacy.lang.cs import Czech
 86 | 
 87 |             self.tokenizer = Tokenizer(Czech().vocab)
 88 |             self.stop_words = self.load_stop_words(stopwords_path)
 89 | 
 90 |         elif self.language_code == "pt":
 91 |             from spacy.lang.pt import Portuguese
 92 | 
 93 |             self.tokenizer = Tokenizer(Portuguese().vocab)
 94 |             self.stemmer = SnowballStemmer(language="portuguese")
 95 |             self.stop_words = self.load_stop_words(stopwords_path)
 96 | 
 97 |         elif self.language_code == "es":
 98 |             from spacy.lang.es import Spanish
 99 | 
100 |             self.tokenizer = Tokenizer(Spanish().vocab)
101 |             self.stemmer = SnowballStemmer(language="spanish")
102 |             self.stop_words = self.load_stop_words(stopwords_path)
103 | 
104 |         elif self.language_code == "nl":
105 |             from spacy.lang.nl import Dutch
106 | 
107 |             self.tokenizer = Tokenizer(Dutch().vocab)
108 |             self.stemmer = SnowballStemmer(language="dutch")
109 |             self.stop_words = self.load_stop_words(stopwords_path)
110 | 
111 |         elif self.language_code in ["zh-cn", "zh-tw"]:
112 |             self.tokenizer = _JiebaTokenizerWrapper()
113 |             self.stop_words = self.load_stop_words(stopwords_path)
114 | 
115 |         else:
116 |             raise Exception("language code %s is not supported", self.language_code)
117 | 
118 |     def preprocess(self, sentence):
119 |         sentence = sentence.lower()
120 |         sentence = self.contraction_normalization(sentence)
121 |         sentence = self.strip_punctuations(sentence)
122 |         if self.language_code in ["fr", "es", "cs", "es", "pt"]:
123 |             sentence = self.accent_removal(sentence)
124 |         return sentence
125 | 
126 |     def load_stop_words(self, path):
127 |         stopwords = []
128 |         with open(path, "r", encoding="utf-8") as file:
129 |             for line in file:
130 |                 if line.startswith("#"):
131 |                     continue
132 |                 line = line.strip()
133 |                 stopwords.append(line)
134 |         return stopwords
135 | 
136 |     def strip_punctuations(self, sentence):
137 |         """
138 |         function to strip punctuations from the utternace
139 |         :param utterance:
140 |         :return:
141 |         """
142 |         sentence = re.sub(self.punctuation_pattern, " ", sentence)
143 |         return sentence
144 | 
145 |     def contraction_normalization(self, sentence):
146 |         """
147 |         common contraction normalization for english
148 |         :param sentence:
149 |         :return:
150 |         """
151 |         sentence = sentence.replace("'s", " is ")
152 |         sentence = sentence.replace("n't", " not ")
153 |         sentence = sentence.replace("'ll", " will ")
154 |         sentence = sentence.replace("'m", " am ")
155 |         return sentence
156 | 
157 |     def accent_removal(self, sentence):
158 |         """origin from facebook research xlm preprocessing
159 |         https://github.com/facebookresearch/XLM"""
160 | 
161 |         return "".join(
162 |             [
163 |                 ch
164 |                 for ch in unicodedata.normalize("NFD", sentence)
165 |                 if unicodedata.category(ch) != "Mn"
166 |             ]
167 |         )
168 | 


--------------------------------------------------------------------------------
/tests/term_analysis/test_chi2_analyzer.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import json
  3 | import pandas as pd
  4 | 
  5 | from assistant_skill_analysis.utils import skills_util, lang_utils
  6 | from assistant_skill_analysis.term_analysis import chi2_analyzer
  7 | 
  8 | 
  9 | class TestChi2Analyzer(unittest.TestCase):
 10 |     """Test for Chi2 Analyzer module"""
 11 | 
 12 |     @classmethod
 13 |     def setUpClass(cls):
 14 |         cls.lang_util = lang_utils.LanguageUtility("en")
 15 |         test_skill_file = (
 16 |             "tests/resources/test_workspaces/skill-Customer-Care-Sample.json"
 17 |         )
 18 |         with open(test_skill_file, "r") as skill_file:
 19 |             (
 20 |                 workspace_data,
 21 |                 workspace_vocabulary,
 22 |                 _,
 23 |                 _,
 24 |             ) = skills_util.extract_workspace_data(json.load(skill_file), cls.lang_util)
 25 |             cls.workspace_df = pd.DataFrame(workspace_data)
 26 | 
 27 |     def test_preprocess_chi2(self):
 28 |         test_data = pd.DataFrame({"utterance": ["This is boston"], "intent": "label1"})
 29 |         labels, convec, features = chi2_analyzer._preprocess_chi2(
 30 |             test_data, self.lang_util
 31 |         )
 32 |         self.assertEqual(
 33 |             set(convec.get_feature_names_out()),
 34 |             set(["this", "boston", "this boston"]),
 35 |             "Test for chi2 analyzer fail",
 36 |         )
 37 | 
 38 |         labels, convec, features = chi2_analyzer._preprocess_chi2(
 39 |             self.workspace_df, self.lang_util
 40 |         )
 41 |         max_len = 0
 42 |         for ngram in convec.get_feature_names_out():
 43 |             if len(ngram.split(" ")) > max_len:
 44 |                 max_len = len(ngram.split(" "))
 45 |         assert max_len <= 2
 46 | 
 47 |     def test_compute_chi2_top_feature(self):
 48 |         # test case 1, mini dataset
 49 |         test_data = pd.DataFrame(
 50 |             {
 51 |                 "utterance": [
 52 |                     "Boston is the capital city of massachusetts ",
 53 |                     "Boston Celtics is a famous NBA team",
 54 |                     "new york is a big city in the east coast",
 55 |                 ],
 56 |                 "intent": ["boston", "boston", "nyc"],
 57 |             }
 58 |         )
 59 | 
 60 |         labels, con_vec, features = chi2_analyzer._preprocess_chi2(
 61 |             test_data, self.lang_util
 62 |         )
 63 |         unigrams, bigrams = chi2_analyzer._compute_chi2_top_feature(
 64 |             features, labels, con_vec, "boston", 0.05
 65 |         )
 66 |         self.assertEqual(len(unigrams), 0, "chi2 analyzer fail")
 67 |         self.assertEqual(len(bigrams), 0, "chi2 analyzer fail")
 68 | 
 69 |         # test case 2 with punctuation
 70 |         test_data = pd.DataFrame(
 71 |             {
 72 |                 "utterance": [
 73 |                     "Boston is the capital city of massachusetts! ",
 74 |                     "Boston Celtics is a famous NBA team!",
 75 |                     "new york is a big city in the east coast",
 76 |                 ],
 77 |                 "intent": ["boston", "boston", "nyc"],
 78 |             }
 79 |         )
 80 |         labels, con_vec, features = chi2_analyzer._preprocess_chi2(
 81 |             test_data, self.lang_util
 82 |         )
 83 |         unigrams, bigrams = chi2_analyzer._compute_chi2_top_feature(
 84 |             features, labels, con_vec, "boston", 1
 85 |         )
 86 |         self.assertEqual("!" not in unigrams, True, "chi2 analyzer fail")
 87 | 
 88 |         # test case 3 , medium size dataset
 89 |         labels, con_vec, features = chi2_analyzer._preprocess_chi2(
 90 |             self.workspace_df, self.lang_util
 91 |         )
 92 |         unigrams, bigrams = chi2_analyzer._compute_chi2_top_feature(
 93 |             features, labels, con_vec, "Help"
 94 |         )
 95 |         self.assertEqual(
 96 |             unigrams, ["need", "me", "assist", "decid", "help"], "chi2 analyzer fail"
 97 |         )
 98 |         test_bigrams = [
 99 |             "assist me",
100 |             "you assist",
101 |             "me decid",
102 |             "need assist",
103 |             "you help",
104 |             "help me",
105 |         ]
106 |         self.assertEqual(bigrams, test_bigrams, "chi2 analyzer fail")
107 | 
108 |     def test_get_chi2_analysis(self):
109 |         test_data = pd.DataFrame(
110 |             {
111 |                 "utterance": [
112 |                     "Boston is the capital city of massachusetts ",
113 |                     "Boston Celtics is a famous NBA team",
114 |                     "new york is a big city in the east coast",
115 |                 ],
116 |                 "intent": ["boston", "boston", "nyc"],
117 |             }
118 |         )
119 |         unigram_intent_dict, bigram_intent_dict = chi2_analyzer.get_chi2_analysis(
120 |             test_data, significance_level=0.05, lang_util=self.lang_util
121 |         )
122 |         self.assertEqual(len(unigram_intent_dict), 0, "chi2 analyzer fail")
123 | 
124 |         unigram_intent_dict, bigram_intent_dict = chi2_analyzer.get_chi2_analysis(
125 |             self.workspace_df, significance_level=0.05, lang_util=self.lang_util
126 |         )
127 |         list_of_intent_list = list(unigram_intent_dict.values())
128 |         one_bigram_set = list(bigram_intent_dict.keys())[0]
129 |         self.assertEqual(
130 |             all(len(intents) >= 1 for intents in list_of_intent_list),
131 |             True,
132 |             "chi2 analyzer fail",
133 |         )
134 |         self.assertEqual(
135 |             all(len(item.split(" ")) == 2 for item in one_bigram_set),
136 |             True,
137 |             "chi2 analyzer fail",
138 |         )
139 | 
140 |     def test_get_confusing_keyterms(self):
141 |         unigram_intent_dict = {
142 |             frozenset(["a", "b", "c"]): ["intent1"],
143 |             frozenset(["a", "b"]): ["intent2"],
144 |         }
145 |         ambiguous_data_frame = chi2_analyzer.get_confusing_key_terms(
146 |             unigram_intent_dict
147 |         )
148 |         self.assertTrue(
149 |             str(ambiguous_data_frame.iloc[0, 0]) == "<intent1, intent2>"
150 |             or str(ambiguous_data_frame.iloc[0, 0]) == "<intent2, intent1>",
151 |             "chi2 analyzer fail",
152 |         )
153 |         self.assertEqual(
154 |             "a" in list(ambiguous_data_frame["Terms"]), True, "chi2 analyzer fail"
155 |         )
156 | 
157 |     def tearDown(self):
158 |         unittest.TestCase.tearDown(self)
159 | 
160 | 
161 | if __name__ == "__main__":
162 |     unittest.main()
163 | 


--------------------------------------------------------------------------------
/tests/data_analysis/test_divergence_analyzer.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import json
  3 | import pandas as pd
  4 | import numpy as np
  5 | from assistant_skill_analysis.utils import skills_util, lang_utils
  6 | from assistant_skill_analysis.data_analysis import divergence_analyzer
  7 | 
  8 | 
  9 | class TestDivergenceAnalyzer(unittest.TestCase):
 10 |     """Test for Divergence Analyzer module"""
 11 | 
 12 |     @classmethod
 13 |     def setUpClass(cls):
 14 |         cls.lang_util = lang_utils.LanguageUtility("en")
 15 |         with open(
 16 |             "tests/resources/test_workspaces/skill-Customer-Care-Sample.json", "r"
 17 |         ) as skill_file:
 18 |             (
 19 |                 workspace_data,
 20 |                 workspace_vocabulary,
 21 |                 _,
 22 |                 _,
 23 |             ) = skills_util.extract_workspace_data(json.load(skill_file), cls.lang_util)
 24 | 
 25 |             cls.workspace_df = pd.DataFrame(workspace_data)
 26 |             cls.train_set_pd = pd.DataFrame(
 27 |                 {
 28 |                     "utterance": ["boston is close to new york"],
 29 |                     "intent": ["Boston_New_York"],
 30 |                     "tokens": [["boston", "is", "close", "to", "new", "york"]],
 31 |                 }
 32 |             )
 33 |             cls.test_set_pd = pd.DataFrame(
 34 |                 {
 35 |                     "utterance": [
 36 |                         "both boston and new york are on east coast",
 37 |                         "boston is close to new york",
 38 |                     ],
 39 |                     "intent": ["Boston_New_York", "Boston_New_York"],
 40 |                     "tokens": [
 41 |                         [
 42 |                             "both",
 43 |                             "boston",
 44 |                             "and",
 45 |                             "new",
 46 |                             "york",
 47 |                             "are",
 48 |                             "on",
 49 |                             "east",
 50 |                             "coast",
 51 |                         ],
 52 |                         ["boston", "is", "close", "to", "new", "york"],
 53 |                     ],
 54 |                 }
 55 |             )
 56 | 
 57 |     def test_label_percentage(self):
 58 |         label_percentage_dict = divergence_analyzer._label_percentage(self.workspace_df)
 59 |         label_percentage_vec = np.array(list(label_percentage_dict.values()))
 60 |         self.assertEqual(
 61 |             np.all(label_percentage_vec > 0), True, "label percentage test fail"
 62 |         )
 63 |         self.assertAlmostEqual(np.sum(label_percentage_vec), 1, delta=1e-6)
 64 | 
 65 |     def test_train_test_vocab_difference(self):
 66 |         train_vocab, test_vocab = divergence_analyzer._train_test_vocab_difference(
 67 |             self.train_set_pd, self.test_set_pd
 68 |         )
 69 | 
 70 |         self.assertEqual(
 71 |             train_vocab,
 72 |             set(["boston", "is", "close", "to", "new", "york"]),
 73 |             "train test vocab difference test fail",
 74 |         )
 75 | 
 76 |     def test_train_test_uttterance_length_difference(self):
 77 |         temp_df = divergence_analyzer._train_test_utterance_length_difference(
 78 |             self.train_set_pd, self.test_set_pd
 79 |         )
 80 | 
 81 |         self.assertEqual(
 82 |             temp_df.iloc[0]["Absolute Difference"],
 83 |             1.5,
 84 |             "train test utterance length differene test fail",
 85 |         )
 86 | 
 87 |     def test_train_test_label_difference(self):
 88 |         # Test 1
 89 |         percentage_dict1 = {"Intent1": 0.5, "Intent2": 0.5}
 90 |         percentage_dict2 = {"Intent1": 0.5, "Intent2": 0.5}
 91 | 
 92 |         (
 93 |             missing_labels,
 94 |             difference_dict,
 95 |             js_distance,
 96 |         ) = divergence_analyzer._train_test_label_difference(
 97 |             percentage_dict1, percentage_dict2
 98 |         )
 99 |         self.assertEqual(js_distance, 0, "train test difference test fail")
100 |         self.assertEqual(missing_labels, [], "train test difference test fail")
101 |         self.assertEqual(
102 |             difference_dict["Intent1"], [50, 50, 0], "train test difference test fail"
103 |         )
104 | 
105 |         # Test 2
106 |         percentage_dict1 = {"Intent1": 1, "Intent2": 0}
107 |         percentage_dict2 = {"Intent1": 1}
108 | 
109 |         (
110 |             missing_labels,
111 |             difference_dict,
112 |             js_distance,
113 |         ) = divergence_analyzer._train_test_label_difference(
114 |             percentage_dict1, percentage_dict2
115 |         )
116 |         self.assertEqual(js_distance, 0, "train test difference test fail")
117 |         self.assertEqual(missing_labels, ["Intent2"], "train test difference test fail")
118 |         self.assertEqual(
119 |             difference_dict["Intent1"], [100, 100, 0], "train test difference test fail"
120 |         )
121 | 
122 |         # Test 3
123 |         percentage_dict1 = {"Intent1": 1, "Intent2": 0}
124 |         percentage_dict2 = {"Intent1": 0, "Intent2": 1}
125 |         (
126 |             missing_labels,
127 |             difference_dict,
128 |             js_distance,
129 |         ) = divergence_analyzer._train_test_label_difference(
130 |             percentage_dict1, percentage_dict2
131 |         )
132 |         self.assertEqual(js_distance, 1, "train test difference test fail")
133 |         self.assertEqual(
134 |             difference_dict["Intent1"], [100, 0, 100], "train test difference test fail"
135 |         )
136 |         self.assertEqual(
137 |             difference_dict["Intent2"], [0, 100, 100], "train test difference test fail"
138 |         )
139 |         self.assertEqual(len(missing_labels), 0, "train test difference test fail")
140 | 
141 |         # Test 4
142 |         percentage_dict1 = {"Intent1": 1}
143 |         percentage_dict2 = {"Intent2": 1}
144 |         (
145 |             missing_labels,
146 |             difference_dict,
147 |             js_distance,
148 |         ) = divergence_analyzer._train_test_label_difference(
149 |             percentage_dict1, percentage_dict2
150 |         )
151 |         self.assertEqual(str(js_distance), "nan", "train test difference test fail")
152 |         self.assertEqual(missing_labels, ["Intent1"], "train test difference test fail")
153 |         self.assertEqual(len(difference_dict), 0, "train test difference test fail")
154 | 
155 |     def tearDown(self):
156 |         unittest.TestCase.tearDown(self)
157 | 
158 | 
159 | if __name__ == "__main__":
160 |     unittest.main()
161 | 


--------------------------------------------------------------------------------
/tests/confidence_analysis/test_confidence_analysis.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import math
  3 | import pandas as pd
  4 | from assistant_skill_analysis.confidence_analysis.confidence_analyzer import (
  5 |     _get_far_list,
  6 |     _get_ontopic_accuracy_list,
  7 |     _find_threshold,
  8 |     abnormal_conf,
  9 |     generate_unique_thresholds,
 10 |     _get_bot_coverage_list,
 11 |     analysis,
 12 |     extract_table_analysis,
 13 |     _convert_data_format,
 14 |     analysis_pipeline,
 15 |     extract_by_topic,
 16 |     create_display_table,
 17 | )
 18 | from assistant_skill_analysis.utils.skills_util import OFFTOPIC_LABEL
 19 | 
 20 | 
 21 | class TestThresholdAnalysis(unittest.TestCase):
 22 |     """Test for summary generator module"""
 23 | 
 24 |     def setUp(self):
 25 | 
 26 |         self.sorted_list = [
 27 |             (OFFTOPIC_LABEL, "B", 0.1),
 28 |             ("A", "A", 0.2),
 29 |             (OFFTOPIC_LABEL, "A", 0.2),
 30 |             ("A", "A", 0.2),
 31 |             ("A", "B", 0.3),
 32 |             ("A", "A", 0.4),
 33 |             (OFFTOPIC_LABEL, "A", 0.8),
 34 |             ("A", "A", 0.8),
 35 |             ("B", "B", 1.0),
 36 |         ]
 37 | 
 38 |         self.results = pd.DataFrame(
 39 |             self.sorted_list, columns=["correct_intent", "top_intent", "top_confidence"]
 40 |         )
 41 | 
 42 |         self.thresholds = [0.15, 0.25, 0.35, 0.6, 0.9]
 43 | 
 44 |     def test_abnormal_conf(self):
 45 |         test_dataframe = pd.DataFrame(
 46 |             {
 47 |                 "correct_intent": ["A", "A", "A"],
 48 |                 "top_intent": ["A", "B", "B"],
 49 |                 "top_confidence": [0.2, 0.9, 0.9],
 50 |                 "utterance": ["a", "a", "a"],
 51 |                 "top_predicts": [
 52 |                     [],
 53 |                     [],
 54 |                     [
 55 |                         {"intent": "B", "confidence": 0.9},
 56 |                         {"intent": "C", "confidence": 0.1},
 57 |                     ],
 58 |                 ],
 59 |             }
 60 |         )
 61 | 
 62 |         correct_low_conf, incorrect_high_conf = abnormal_conf(test_dataframe, 0.3, 0.7)
 63 |         self.assertEqual(len(correct_low_conf), 1, "test_find_threshold failed: first")
 64 |         self.assertEqual(
 65 |             len(incorrect_high_conf), 2, "test_find_threshold failed: first"
 66 |         )
 67 |         self.assertEqual(
 68 |             incorrect_high_conf.iloc[0, :]["top2_prediction"],
 69 |             "NA",
 70 |             "test_find_threshold failed: first",
 71 |         )
 72 | 
 73 |     def test_get_ontopic_accuracy_list(self):
 74 | 
 75 |         res, _ = _get_ontopic_accuracy_list(self.sorted_list, self.thresholds)
 76 |         gt = [5 / 6, 0.75, 1.0, 1.0, 1.0]
 77 |         for r, g in zip(res, gt):
 78 |             self.assertEqual(math.fabs(r - g) < 0.0001, True, "FAR values changed")
 79 | 
 80 |     def test_find_threshold(self):
 81 |         a = 0.0
 82 |         b = [0, 0.1, 0.2, 0.3, 0.5]
 83 |         pos = _find_threshold(a, b)
 84 |         self.assertEqual(pos, 0, "test_find_threshold failed: first")
 85 |         a = 0.1
 86 |         b = [0, 0.1, 0.2, 0.3, 0.5]
 87 |         pos = _find_threshold(a, b)
 88 |         self.assertEqual(pos, 1, "test_find_threshold failed: second")
 89 |         a = 0.5
 90 |         b = [0, 0.1, 0.2, 0.3, 0.5]
 91 |         pos = _find_threshold(a, b)
 92 |         self.assertEqual(pos, 4, "test_find_threshold failed: third")
 93 | 
 94 |     def test_get_far_list(self):
 95 | 
 96 |         res, _ = _get_far_list(self.sorted_list, self.thresholds)
 97 |         gt = [2 / 3, 1 / 3, 1 / 3, 1 / 3, 0.0]
 98 |         for r, g in zip(res, gt):
 99 |             self.assertEqual(math.fabs(r - g) < 0.0001, True, "FAR values changed")
100 | 
101 |     def test_get_bot_coverage_list(self):
102 | 
103 |         res, _ = _get_bot_coverage_list(self.sorted_list, self.thresholds)
104 |         gt = [0.888, 0.555, 0.444, 0.333, 0.111]
105 |         for r, g in zip(res, gt):
106 |             self.assertEqual(
107 |                 math.isclose(r, g, abs_tol=0.01, rel_tol=0.0001),
108 |                 True,
109 |                 "bot coverage value change",
110 |             )
111 | 
112 |     def test_analysis(self):
113 |         analysis_df1 = analysis(self.results)
114 |         self.assertEqual(
115 |             analysis_df1["Bot Coverage Counts"].iloc[9], "1 / 9", "analysis fail"
116 |         )
117 |         analysis_df_list = analysis(self.results, ["A"])
118 |         self.assertEqual(
119 |             analysis_df_list[0]["Bot Coverage Counts"].iloc[9], "2 / 7", "analysis fail"
120 |         )
121 | 
122 |     def test_convert_data_format(self):
123 | 
124 |         test1 = _convert_data_format(self.results)
125 |         for element1, element2 in zip(test1, self.sorted_list):
126 |             for ele1, ele2 in zip(element1, element2):
127 |                 self.assertEqual(ele1, ele2, "test for covert data format fail")
128 | 
129 |     def test_analysis_pipeline(self):
130 |         analysis_df = analysis_pipeline(self.results)
131 |         self.assertEqual(
132 |             analysis_df["Bot Coverage Counts"].iloc[9], "1 / 9", "analysis fail"
133 |         )
134 | 
135 |     def test_extract_table_analysis(self):
136 |         sorted_results = _convert_data_format(self.results)
137 |         ontopic_infos, offtopics_infos = extract_by_topic(sorted_results)
138 |         (
139 |             analysis_df,
140 |             toa_list,
141 |             bot_coverage_list,
142 |             far_list,
143 |             thresholds,
144 |         ) = extract_table_analysis(sorted_results, ontopic_infos, offtopics_infos)
145 |         self.assertEqual(
146 |             math.isclose(toa_list[2], 0.75, abs_tol=0.01, rel_tol=0.0001),
147 |             True,
148 |             "extract table analysis fail",
149 |         )
150 | 
151 |     def test_create_display_table(self):
152 |         sorted_results = _convert_data_format(self.results)
153 |         thresholds, sort_uniq_confs = generate_unique_thresholds(sorted_results)
154 |         toa_list, toa_count = _get_ontopic_accuracy_list(sorted_results, thresholds)
155 |         bot_coverage_list, bot_coverage_count = _get_bot_coverage_list(
156 |             sorted_results, thresholds
157 |         )
158 |         ontopic_infos, offtopic_infos = extract_by_topic(sorted_results)
159 |         far_list, _ = _get_far_list(sorted_results, thresholds)
160 |         analysis_df = create_display_table(
161 |             toa_list,
162 |             bot_coverage_list,
163 |             bot_coverage_count,
164 |             sorted_results,
165 |             thresholds,
166 |             offtopic_infos,
167 |             far_list,
168 |         )
169 | 
170 |         self.assertEqual(
171 |             analysis_df["Bot Coverage Counts"].iloc[-1],
172 |             "1 / 9",
173 |             "create display \
174 |                          table test fail",
175 |         )
176 | 
177 |     def test_generate_unique_thresholds(self):
178 |         thresholds, unique_confidence = generate_unique_thresholds(self.sorted_list)
179 |         self.assertEqual(
180 |             math.isclose(thresholds[1], 0.15, abs_tol=0.01, rel_tol=0.0001),
181 |             True,
182 |             "test \
183 |                          generate unique threshold fail",
184 |         )
185 | 
186 |         self.assertEqual(
187 |             math.isclose(unique_confidence[5], 1, abs_tol=0.01, rel_tol=0.0001),
188 |             True,
189 |             "test generate unique threshold fail",
190 |         )
191 | 
192 |     def tearDown(self):
193 |         unittest.TestCase.tearDown(self)
194 | 
195 | 
196 | if __name__ == "__main__":
197 |     unittest.main()
198 | 


--------------------------------------------------------------------------------
/tests/highlighting/test_highlighting.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import math
  3 | import os
  4 | import shutil
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | from assistant_skill_analysis.highlighting import highlighter
  9 | from assistant_skill_analysis.inferencing import inferencer
 10 | from assistant_skill_analysis.utils import skills_util, lang_utils
 11 | 
 12 | 
 13 | CONFIG_FILE = "./wa_config.txt"
 14 | THREAD_NUM = 5
 15 | TOLERANCE = 0.3
 16 | 
 17 | 
 18 | def compare(a, b):
 19 |     if len(a) != len(b):
 20 |         return False
 21 |     for i, j in zip(a, b):
 22 |         if math.fabs(i - j) > 0.01:
 23 |             return False
 24 |     return True
 25 | 
 26 | 
 27 | @unittest.skip("takes too long to run in github action")
 28 | class TestHighLighting(unittest.TestCase):
 29 |     """
 30 |     Test for summary generator module
 31 |     """
 32 | 
 33 |     @classmethod
 34 |     def setUpClass(cls):
 35 |         cls.tmpfolder = "tests/resources/highlight_temp_folder/"
 36 |         cls.tmpbatchfolder = "tests/resources/highlight_temp_folder/batch"
 37 |         cls.input_file = "tests/resources/test_workspaces/customer_care_skill_test.tsv"
 38 |         cls.lang_util = lang_utils.LanguageUtility("en")
 39 |         unittest.TestCase.setUp(cls)
 40 |         with open(CONFIG_FILE) as fi:
 41 |             cls.apikey = fi.readline().strip()
 42 |             cls.wksp_id = fi.readline().strip()
 43 | 
 44 |         cls.conversation = skills_util.retrieve_conversation(
 45 |             iam_apikey=cls.apikey,
 46 |             url=skills_util.DEV_DATACENTER[0],
 47 |             authenticator_url=skills_util.DEV_DATACENTER[1],
 48 |         )
 49 | 
 50 |         if not os.path.exists(cls.tmpfolder):
 51 |             os.makedirs(cls.tmpfolder)
 52 |             os.makedirs(cls.tmpbatchfolder)
 53 | 
 54 |         test_df = skills_util.process_test_set(cls.input_file, cls.lang_util)
 55 |         cls.results = inferencer.inference(
 56 |             cls.conversation,
 57 |             test_df,
 58 |             max_thread=THREAD_NUM,
 59 |             skill_id=cls.wksp_id,
 60 |         )
 61 | 
 62 |     def test_filter_results(self):
 63 |         wrong_examples_sorted = highlighter._filter_results(
 64 |             self.results, 0.4, self.lang_util
 65 |         )
 66 |         ground_truth = (
 67 |             47,
 68 |             "no he is an arrogant self serving immature idiot get it right",
 69 |             None,
 70 |             "General_Connect_to_Agent",
 71 |             0.6697888851165772,
 72 |             0.09834358692169187,
 73 |             9,
 74 |         )
 75 | 
 76 |         self.assertEqual(
 77 |             wrong_examples_sorted[0][2], None, "Test for filter results fail"
 78 |         )
 79 |         self.assertEqual(
 80 |             math.isclose(
 81 |                 wrong_examples_sorted[0][4], ground_truth[4], rel_tol=0.01, abs_tol=0.1
 82 |             ),
 83 |             True,
 84 |             "Test for filter results fail",
 85 |         )
 86 | 
 87 |     def test_generate_adversarial_examples(self):
 88 |         test_utterance = "winter is coming"
 89 |         (
 90 |             adversarial_examples,
 91 |             adversarial_span,
 92 |         ) = highlighter._generate_adversarial_examples(test_utterance, 1)
 93 |         self.assertEqual(
 94 |             "winter coming" in adversarial_examples,
 95 |             True,
 96 |             "Test for generate adversarial example fail",
 97 |         )
 98 |         self.assertEqual(
 99 |             adversarial_span["winter coming_1"],
100 |             (1, 2),
101 |             "Test for generate adversarial example fail",
102 |         )
103 | 
104 |     def test_adversarial_examples_multi_thread_inference(self):
105 |         long_example1 = (
106 |             "um taking a shot here um lets say three "
107 |             + "separate people whos wills are to each other"
108 |         )
109 |         wrong_examples_sorted = [
110 |             (
111 |                 1,
112 |                 "see ya",
113 |                 "Goodbye",
114 |                 "General_Greetings",
115 |                 0.5005551099777221,
116 |                 0.5005551099777221,
117 |                 1,
118 |             ),
119 |             (
120 |                 42,
121 |                 long_example1,
122 |                 None,
123 |                 "General_Connect_to_Agent",
124 |                 0.6537539958953857,
125 |                 0.2537539958953857,
126 |                 9,
127 |             ),
128 |         ]
129 | 
130 |         (
131 |             adv_results,
132 |             adv_dict_span,
133 |         ) = highlighter._adversarial_examples_multi_thread_inference(
134 |             wrong_examples_sorted, self.conversation, self.wksp_id
135 |         )
136 |         result = adv_results[adv_results["utterance"].str.match("see")]
137 | 
138 |         self.assertEqual(
139 |             np.abs(result["top_confidence"].values[0] - 0.478708) < TOLERANCE,
140 |             True,
141 |             "Test for adversarial examples inference fail",
142 |         )
143 | 
144 |         self.assertEqual(
145 |             adv_dict_span["see_1"],
146 |             (1, 2),
147 |             "test for adversarial example inference: adversarial span dict mismatch",
148 |         )
149 | 
150 |     def test_scoring_function(self):
151 |         highlight = np.zeros(3, dtype="float32")
152 |         highlight = highlighter._scoring_function(
153 |             highlight=highlight,
154 |             original_position=0,
155 |             adversarial_position=1,
156 |             original_confidence=0.7,
157 |             adversarial_confidence=0.5,
158 |             start_idx=2,
159 |             end_idx=3,
160 |         )
161 |         self.assertEqual(
162 |             math.isclose(highlight[2], 0.44, rel_tol=0.0001, abs_tol=0.01),
163 |             True,
164 |             "Test for adversarial scoring fail",
165 |         )
166 | 
167 |     def test_highlight_scoring(self):
168 |         original_example = (
169 |             1,
170 |             "see ya",
171 |             "Goodbye",
172 |             "General_Greetings",
173 |             0.5005551099777221,
174 |             0.5005551099777221,
175 |             1,
176 |         )
177 |         subset_adversarial_result = pd.DataFrame(
178 |             data={
179 |                 "utterance": ["see"],
180 |                 "top_predicts": [
181 |                     [
182 |                         {"intent": "General_Greetings", "confidence": 0.6},
183 |                         {"intent": "Goodbye", "confidence": 0.5},
184 |                     ]
185 |                 ],
186 |             }
187 |         )
188 |         adversarial_span_dict = {"see_1": (1, 2)}
189 | 
190 |         highlight = highlighter._highlight_scoring(
191 |             original_example,
192 |             subset_adversarial_result,
193 |             adversarial_span_dict,
194 |             self.lang_util,
195 |         )
196 |         self.assertEqual(
197 |             compare(highlight, [0, -0.41988897]),
198 |             True,
199 |             "Test highlight scoring function fail",
200 |         )
201 | 
202 |     def test_get_highlights_in_batch_multi_thread(self):
203 |         highlighter.get_highlights_in_batch_multi_thread(
204 |             conversation=self.conversation,
205 |             full_results=self.results,
206 |             output_folder=self.tmpbatchfolder,
207 |             confidence_threshold=0.4,
208 |             show_worst_k=3,
209 |             lang_util=self.lang_util,
210 |             skill_id=self.wksp_id,
211 |         )
212 |         self.assertEqual(
213 |             len(os.listdir(self.tmpbatchfolder)),
214 |             3,
215 |             "# of batch highlighting files is mismatched.",
216 |         )
217 | 
218 |     @classmethod
219 |     def tearDownClass(cls):
220 |         shutil.rmtree(cls.tmpfolder)
221 |         unittest.TestCase.tearDown(cls)
222 | 
223 | 
224 | if __name__ == "__main__":
225 |     unittest.main()
226 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/term_analysis/chi2_analyzer.py:
--------------------------------------------------------------------------------
  1 | from collections import Counter
  2 | import pandas as pd
  3 | import numpy as np
  4 | from IPython.display import display, Markdown, HTML
  5 | from sklearn.feature_selection import chi2
  6 | from sklearn.feature_extraction.text import CountVectorizer
  7 | from nltk import word_tokenize
  8 | 
  9 | 
 10 | def _preprocess_chi2(workspace_pd, lang_util):
 11 |     """
 12 |     Preprocess dataframe for chi2 analysis
 13 |     :param workspace_pd: Preprocess dataframe for chi2
 14 |     :return labels: intents processed
 15 |     :return count_vectorizer: vectorizer instance
 16 |     :return features: features from transform
 17 |     """
 18 | 
 19 |     count_vectorizer = CountVectorizer(
 20 |         min_df=1,
 21 |         encoding="utf-8",
 22 |         ngram_range=(1, 2),
 23 |         stop_words=lang_util.stop_words,
 24 |         tokenizer=lang_util.tokenize,
 25 |         token_pattern="(?u)\b\w+\b",
 26 |     )
 27 |     features = count_vectorizer.fit_transform(workspace_pd["utterance"]).toarray()
 28 |     labels = workspace_pd["intent"]
 29 |     return labels, count_vectorizer, features
 30 | 
 31 | 
 32 | def _compute_chi2_top_feature(
 33 |     features, labels, vectorizer, cls, significance_level=0.05
 34 | ):
 35 |     """
 36 |     Perform chi2 analysis, punctuation filtering and deduplication
 37 |     :param features: count vectorizer features
 38 |     :param labels: intents processed
 39 |     :param vectorizer: count vectorizer instances
 40 |     :param cls: classes for chi square
 41 |     :param significance_level: specify an alpha
 42 |     :return deduplicated_unigram:
 43 |     :return deduplicated_bigram:
 44 |     """
 45 |     features_chi2, pval = chi2(features, labels == cls)
 46 | 
 47 |     feature_names = np.array(vectorizer.get_feature_names_out())
 48 | 
 49 |     features_chi2 = features_chi2[pval < significance_level]
 50 |     feature_names = feature_names[pval < significance_level]
 51 | 
 52 |     indices = np.argsort(features_chi2)
 53 |     feature_names = feature_names[indices]
 54 | 
 55 |     unigrams = [v.strip() for v in feature_names if len(v.strip().split()) == 1]
 56 |     deduplicated_unigram = list()
 57 | 
 58 |     for unigram in unigrams:
 59 |         if unigram not in deduplicated_unigram:
 60 |             deduplicated_unigram.append(unigram)
 61 | 
 62 |     bigrams = [v.strip() for v in feature_names if len(v.strip().split()) == 2]
 63 | 
 64 |     deduplicated_bigram = list()
 65 |     for bigram in bigrams:
 66 |         if bigram not in deduplicated_bigram:
 67 |             deduplicated_bigram.append(bigram)
 68 | 
 69 |     return deduplicated_unigram, deduplicated_bigram
 70 | 
 71 | 
 72 | def get_chi2_analysis(
 73 |     workspace_pd,
 74 |     lang_util,
 75 |     significance_level=0.05,
 76 | ):
 77 |     """
 78 |     find correlated unigram and bigram of each intent with Chi2 analysis
 79 |     :param workspace_pd: dataframe, workspace data
 80 |     :param signficance_level: float, significance value to reject the null hypothesis
 81 |     :return unigram_intent_dict:
 82 |     :return bigram_intent_dict:
 83 |     """
 84 |     labels, vectorizer, features = _preprocess_chi2(workspace_pd, lang_util)
 85 | 
 86 |     label_frequency_dict = dict(Counter(workspace_pd["intent"]).most_common())
 87 |     N = 5
 88 | 
 89 |     # keys are the set of unigrams/bigrams and value will be the intent
 90 |     # maps one-to-many relationship between unigram and intent,
 91 |     unigram_intent_dict = dict()
 92 |     # maps one-to-many relationship between bigram and intent
 93 |     bigram_intent_dict = dict()
 94 | 
 95 |     classes = list()
 96 |     chi_unigrams = list()
 97 |     chi_bigrams = list()
 98 |     for cls in label_frequency_dict.keys():
 99 | 
100 |         unigrams, bigrams = _compute_chi2_top_feature(
101 |             features, labels, vectorizer, cls, significance_level
102 |         )
103 |         classes.append(cls)
104 | 
105 |         if unigrams:
106 |             chi_unigrams.append(", ".join(unigrams[-N:]))
107 |         else:
108 |             chi_unigrams.append("None")
109 | 
110 |         if bigrams:
111 |             chi_bigrams.append(", ".join(bigrams[-N:]))
112 |         else:
113 |             chi_bigrams.append("None")
114 | 
115 |         if unigrams:
116 |             if frozenset(unigrams[-N:]) in unigram_intent_dict:
117 |                 unigram_intent_dict[frozenset(unigrams[-N:])].append(cls)
118 |             else:
119 |                 unigram_intent_dict[frozenset(unigrams[-N:])] = list()
120 |                 unigram_intent_dict[frozenset(unigrams[-N:])].append(cls)
121 | 
122 |         if bigrams:
123 |             if frozenset(bigrams[-N:]) in bigram_intent_dict:
124 |                 bigram_intent_dict[frozenset(bigrams[-N:])].append(cls)
125 |             else:
126 |                 bigram_intent_dict[frozenset(bigrams[-N:])] = list()
127 |                 bigram_intent_dict[frozenset(bigrams[-N:])].append(cls)
128 | 
129 |     chi_df = pd.DataFrame(data={"Intent": classes})
130 |     chi_df["Correlated Unigrams"] = chi_unigrams
131 |     chi_df["Correlated Bigrams"] = chi_bigrams
132 | 
133 |     display(Markdown(("## Chi-squared Analysis")))
134 |     with pd.option_context(
135 |         "display.max_rows",
136 |         None,
137 |         "display.max_columns",
138 |         None,
139 |         "display.max_colwidth",
140 |         100,
141 |     ):
142 |         chi_df.index = np.arange(1, len(chi_df) + 1)
143 |         display(chi_df)
144 |     return unigram_intent_dict, bigram_intent_dict
145 | 
146 | 
147 | def get_confusing_key_terms(keyterm_intent_map):
148 |     """
149 |     Greedy search for overlapping intents
150 |     :param keyterm_intent_map: correlated terms
151 |     :return df: ambiguous terms data frame
152 |     """
153 |     ambiguous_intents = list()
154 |     ambiguous_keywords = list()
155 |     intents_seen = list()
156 | 
157 |     for i in range(len(keyterm_intent_map)):
158 |         correlated_unigrams = list(keyterm_intent_map.keys())[i]
159 |         current_label = keyterm_intent_map[correlated_unigrams]
160 |         intents_seen.append(current_label)
161 | 
162 |         if len(keyterm_intent_map[correlated_unigrams]) > 1:
163 |             print(keyterm_intent_map[correlated_unigrams])
164 |             print(correlated_unigrams)
165 | 
166 |         for other_correlated_unigrams in keyterm_intent_map.keys():
167 |             if keyterm_intent_map[other_correlated_unigrams] in intents_seen:
168 |                 continue
169 |             overlap = correlated_unigrams.intersection(other_correlated_unigrams)
170 |             if overlap:
171 |                 for keyword in overlap:
172 |                     ambiguous_intents.append(
173 |                         "<"
174 |                         + current_label[0]
175 |                         + ", "
176 |                         + keyterm_intent_map[other_correlated_unigrams][0]
177 |                         + ">"
178 |                     )
179 |                     ambiguous_keywords.append(keyword)
180 | 
181 |     df = pd.DataFrame(
182 |         data={"Intent Pairs": ambiguous_intents, "Terms": ambiguous_keywords}
183 |     )
184 | 
185 |     if not ambiguous_intents:
186 |         display(
187 |             Markdown("There is no ambiguity based on top 5 key terms in chi2 analysis")
188 |         )
189 |     else:
190 |         display_size = 10
191 |         if not df.empty:
192 |             if len(df) < display_size:
193 |                 display_size = len(df)
194 |             display(HTML(df.sample(n=display_size).to_html(index=False)))
195 | 
196 |     return df
197 | 
198 | 
199 | def chi2_overlap_check(ambiguous_unigram_df, ambiguous_bigram_df, intent1, intent2):
200 |     """
201 |     looks for intent overlap for specific intent or intent pairs
202 |     :param ambiguous_unigram_df:
203 |     :param ambiguous_bigram_df:
204 |     :param intent1:
205 |     :param intent2:
206 |     """
207 |     intent = intent1 + ", " + intent2 + "|" + intent2 + ", " + intent1
208 |     part1 = None
209 |     part2 = None
210 |     if not ambiguous_unigram_df.empty:
211 |         part1 = ambiguous_unigram_df[
212 |             ambiguous_unigram_df["Intent Pairs"].str.contains(intent)
213 |         ]
214 | 
215 |     if not ambiguous_bigram_df.empty:
216 |         part2 = ambiguous_bigram_df[
217 |             ambiguous_bigram_df["Intent Pairs"].str.contains(intent)
218 |         ]
219 | 
220 |     if part1 is not None and part2 is not None:
221 |         display(HTML(pd.concat([part1, part2]).to_html(index=False)))
222 |     elif part1 is not None:
223 |         display(HTML(part1.to_html(index=False)))
224 |     elif part2 is not None:
225 |         display(HTML(part2.to_html(index=False)))
226 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS


--------------------------------------------------------------------------------
/assistant_skill_analysis/inferencing/inferencer.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import pandas as pd
  3 | import numpy as np
  4 | import ibm_watson
  5 | from ..utils import skills_util
  6 | from concurrent.futures import ThreadPoolExecutor
  7 | from tqdm import tqdm
  8 | 
  9 | MAX_RETRY = 5
 10 | 
 11 | 
 12 | def inference(
 13 |     conversation,
 14 |     test_data,
 15 |     max_thread=5,
 16 |     user_id="256",
 17 |     assistant_id=None,
 18 |     skill_id=None,
 19 |     intent_to_action_mapping=None,
 20 |     timeout=1,
 21 |     environment_id=None,
 22 | ):
 23 |     """
 24 |     query the message api to generate results on the test data
 25 |     :parameter: conversation: the conversation object produced by AssistantV1 api
 26 |     :parameter: skill_id: the workspace id of the
 27 |     :parameter: test_data: the data that will be sent to the classifier
 28 |     :parameter: max_thread: the max number of threads to use for multi-threaded inference
 29 |     :parameter: verbose: flag indicates verbosity of outputs during mutli-threaded inference
 30 |     :parameter: assistant_id:
 31 |     :parameter: intent_to_action_mapping:
 32 |     :parameter: timeout: integer or float that specifies number of seconds each thread should wait for inference result
 33 |     :parameter: environment_id: environment id
 34 |     :return result_df: results dataframe
 35 |     """
 36 |     skd_version = "V1"
 37 |     if isinstance(conversation, ibm_watson.AssistantV1):
 38 |         assert skill_id is not None
 39 |     else:
 40 |         assert assistant_id is not None
 41 |         skd_version = "V2"
 42 | 
 43 |     if max_thread == 1:
 44 |         reach_max_retry = False
 45 |         responses = []
 46 |         for test_example, ground_truth in zip(
 47 |             test_data["utterance"], test_data["intent"]
 48 |         ):
 49 |             attempt = 1
 50 |             while attempt <= MAX_RETRY:
 51 |                 try:
 52 |                     prediction_json = skills_util.retrieve_classifier_response(
 53 |                         conversation=conversation,
 54 |                         text_input=test_example,
 55 |                         alternate_intents=True,
 56 |                         user_id=user_id,
 57 |                         assistant_id=assistant_id,
 58 |                         skill_id=skill_id,
 59 |                         environment_id=environment_id,
 60 |                     )
 61 |                     time.sleep(0.3)
 62 | 
 63 |                     success_flag = True
 64 |                 except Exception:
 65 |                     continue
 66 |                 if success_flag:
 67 |                     break
 68 |                 attempt += 1
 69 | 
 70 |                 if attempt > MAX_RETRY:
 71 |                     reach_max_retry = True
 72 | 
 73 |             if reach_max_retry:
 74 |                 raise Exception("Maximum attempt of {} has reached".format(MAX_RETRY))
 75 | 
 76 |             if skd_version == "V2":
 77 |                 prediction_json = prediction_json["output"]
 78 |                 if len(prediction_json["intents"]) > 0:
 79 |                     # v2 api returns all intent predictions
 80 |                     if (
 81 |                         prediction_json["intents"][0]["confidence"]
 82 |                         < skills_util.OFFTOPIC_CONF_THRESHOLD
 83 |                     ):
 84 |                         prediction_json["intents"] = []
 85 |                 if intent_to_action_mapping is not None:
 86 |                     for intents_prediction in prediction_json["intents"]:
 87 |                         intents_prediction["intent"] = intent_to_action_mapping[
 88 |                             intents_prediction["intent"]
 89 |                         ]
 90 | 
 91 |             if not prediction_json["intents"]:
 92 |                 responses.append(
 93 |                     {
 94 |                         "top_intent": skills_util.OFFTOPIC_LABEL,
 95 |                         "top_confidence": 0.0,
 96 |                         "correct_intent": ground_truth,
 97 |                         "utterance": test_example,
 98 |                         "top_predicts": [],
 99 |                         "entities": [],
100 |                     }
101 |                 )
102 |             else:
103 |                 responses.append(
104 |                     {
105 |                         "top_intent": prediction_json["intents"][0]["intent"],
106 |                         "top_confidence": prediction_json["intents"][0]["confidence"],
107 |                         "correct_intent": ground_truth,
108 |                         "utterance": test_example,
109 |                         "top_predicts": prediction_json["intents"],
110 |                         "entities": prediction_json["entities"],
111 |                     }
112 |                 )
113 |         result_df = pd.DataFrame(data=responses)
114 |     else:
115 |         result_df = thread_inference(
116 |             conversation=conversation,
117 |             test_data=test_data,
118 |             max_thread=max_thread,
119 |             user_id=user_id,
120 |             skill_id=skill_id,
121 |             assistant_id=assistant_id,
122 |             intent_to_action_mapping=intent_to_action_mapping,
123 |             timeout=timeout,
124 |             environment_id=environment_id,
125 |         )
126 |     return result_df
127 | 
128 | 
129 | def thread_inference(
130 |     conversation,
131 |     test_data,
132 |     max_thread=5,
133 |     user_id="256",
134 |     assistant_id=None,
135 |     skill_id=None,
136 |     intent_to_action_mapping=None,
137 |     timeout=1,
138 |     environment_id=None,
139 | ):
140 |     """
141 |     Perform multi thread inference for faster inference time
142 |     :param conversation:
143 |     :param skill_id: Assistant workspace id
144 |     :param test_data: data to test on
145 |     :param max_thread: max threads to use
146 |     :param verbose: verbosity of output
147 |     :param user_id: user_id for billing purpose
148 |     :param assistant_id:
149 |     :parameter: intent_to_action_mapping:
150 |     :parameter: timeout: integer or float that specifies number of seconds each thread should wait for inference result
151 |     :parameter: environment_id: environment id
152 |     :return result_df: results dataframe
153 |     """
154 |     if isinstance(conversation, ibm_watson.AssistantV1):
155 |         assert skill_id is not None
156 |         sdk_version = "V1"
157 |     else:
158 |         assert assistant_id is not None
159 |         sdk_version = "V2"
160 |     count = 0
161 |     response = None
162 |     while count < MAX_RETRY and not response:
163 |         try:
164 |             response = skills_util.retrieve_classifier_response(
165 |                 conversation=conversation,
166 |                 text_input="ping",
167 |                 alternate_intents=True,
168 |                 user_id=user_id,
169 |                 assistant_id=assistant_id,
170 |                 skill_id=skill_id,
171 |                 environment_id=environment_id,
172 |             )
173 |         except Exception:
174 |             count += 1
175 |             time.sleep(0.5)
176 | 
177 |     executor = ThreadPoolExecutor(max_workers=max_thread)
178 |     futures = {}
179 |     result = []
180 |     for test_example, ground_truth in zip(test_data["utterance"], test_data["intent"]):
181 |         future = executor.submit(
182 |             get_intent_confidence_retry,
183 |             conversation=conversation,
184 |             text_input=test_example,
185 |             alternative_intents=True,
186 |             user_id=user_id,
187 |             assistant_id=assistant_id,
188 |             skill_id=skill_id,
189 |             environment_id=environment_id,
190 |             retry=0,
191 |         )
192 |         futures[future] = (test_example, ground_truth)
193 | 
194 |     for future in tqdm(futures):
195 |         res = future.result(timeout=timeout)
196 |         test_example, ground_truth = futures[future]
197 |         result.append(
198 |             process_result(
199 |                 test_example,
200 |                 ground_truth,
201 |                 res,
202 |                 intent_to_action_mapping,
203 |                 sdk_version=sdk_version,
204 |             )
205 |         )
206 | 
207 |     result_df = pd.DataFrame(data=result)
208 |     return result_df
209 | 
210 | 
211 | def process_result(
212 |     utterance, ground_truth, response, intent_to_action_mapping, sdk_version
213 | ):
214 |     if sdk_version == "V2":
215 |         response = response["output"]
216 |         if (
217 |             not response["intents"]
218 |             or response["intents"][0]["confidence"]
219 |             < skills_util.OFFTOPIC_CONF_THRESHOLD
220 |         ):
221 |             response["intents"] = []
222 |         if intent_to_action_mapping is not None:
223 |             for intents_prediction in response["intents"]:
224 |                 intents_prediction["intent"] = intent_to_action_mapping[
225 |                     intents_prediction["intent"]
226 |                 ]
227 |     if response["intents"]:
228 |         top_predicts = response["intents"]
229 |         top_intent = response["intents"][0]["intent"]
230 |         top_confidence = response["intents"][0]["confidence"]
231 |     else:
232 |         top_predicts = []
233 |         top_intent = skills_util.OFFTOPIC_LABEL
234 |         top_confidence = 0
235 | 
236 |     if response["entities"]:
237 |         entities = response["entities"]
238 |     else:
239 |         entities = []
240 | 
241 |     new_dict = {
242 |         "utterance": utterance,
243 |         "correct_intent": ground_truth,
244 |         "top_intent": top_intent,
245 |         "top_confidence": top_confidence,
246 |         "top_predicts": top_predicts,
247 |         "entities": entities,
248 |     }
249 |     return new_dict
250 | 
251 | 
252 | def get_intent_confidence_retry(
253 |     conversation,
254 |     text_input,
255 |     alternative_intents,
256 |     user_id,
257 |     assistant_id,
258 |     skill_id,
259 |     environment_id,
260 |     retry=0,
261 | ):
262 |     try:
263 |         return skills_util.retrieve_classifier_response(
264 |             conversation=conversation,
265 |             text_input=text_input,
266 |             alternate_intents=True,
267 |             user_id=user_id,
268 |             assistant_id=assistant_id,
269 |             skill_id=skill_id,
270 |             environment_id=environment_id,
271 |         )
272 |     except Exception as e:
273 |         if retry < MAX_RETRY:
274 |             return get_intent_confidence_retry(
275 |                 conversation,
276 |                 text_input,
277 |                 alternative_intents,
278 |                 user_id,
279 |                 assistant_id,
280 |                 skill_id,
281 |                 environment_id=environment_id,
282 |                 retry=retry + 1,
283 |             )
284 |         else:
285 |             raise e
286 | 
287 | 
288 | def calculate_mistakes(results):
289 |     """
290 |     retrieve the data frame of miss-classified examples
291 |     :param results: results after tersting
292 |     :return wrongs_df: data frame of mistakes
293 |     """
294 |     wrongs = list()
295 |     for idx, row in results.iterrows():
296 |         if row["correct_intent"] != row["top_intent"]:
297 |             wrongs.append(row)
298 |     wrongs_df = pd.DataFrame(data=wrongs)
299 |     wrongs_df.index.name = "Test Example Index"
300 |     return wrongs_df
301 | 
302 | 
303 | def calculate_accuracy(results):
304 |     """
305 |     calculate the accuracy on the test set
306 |     :param results: the results of testing
307 |     :return accuracy: get accuracy on test set
308 |     """
309 |     correct = 0
310 |     for i in range(0, len(results["correct_intent"])):
311 |         correct += 1 if results["top_intent"][i] == results["correct_intent"][i] else 0
312 |     accuracy = np.around((correct / len(results["correct_intent"])) * 100, 2)
313 |     return accuracy
314 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/data_analysis/divergence_analyzer.py:
--------------------------------------------------------------------------------
  1 | from collections import Counter
  2 | from IPython.display import Markdown, display
  3 | import numpy as np
  4 | import pandas as pd
  5 | from scipy.spatial import distance
  6 | 
  7 | 
  8 | def _label_percentage(data_frame):
  9 |     """
 10 |     Calculate the percentage of each labels in the data frame
 11 |     :param data_frame: dataframe for train or test
 12 |     :return: label_percentage_dict: dictionary maps label : % of labels
 13 |     """
 14 |     total_examples = len(data_frame)
 15 |     label_frequency_dict = dict(Counter(data_frame["intent"]).most_common())
 16 |     percentage_list = np.array(list(label_frequency_dict.values())) / total_examples
 17 |     label_percentage_dict = dict(
 18 |         zip(list(label_frequency_dict.keys()), percentage_list)
 19 |     )
 20 |     return label_percentage_dict
 21 | 
 22 | 
 23 | def _train_test_coloring(val):
 24 |     """
 25 |     color scheme for train test difference statistics
 26 |     :param val:
 27 |     :return:
 28 |     """
 29 |     if val > 25:
 30 |         color = "red"
 31 |     elif val > 10:
 32 |         color = "DarkBlue"
 33 |     else:
 34 |         color = "green"
 35 |     return "color: %s" % color
 36 | 
 37 | 
 38 | def _train_test_label_difference(
 39 |     workspace_label_percentage_dict, test_label_percentage_dict
 40 | ):
 41 |     """
 42 |     analyze the difference between training set and test set
 43 |     :param workspace_label_percentage_dict:
 44 |     :param test_label_percentage_dict:
 45 |     :return:
 46 |     missing_label: list of labels that are missing in the test set
 47 |     difference_dict: dictionary that maps intent:percentage difference
 48 |     js_distance: jensen-shannon distance between train and test label percentages
 49 |     """
 50 |     difference_dict = dict()
 51 |     missing_label = list()
 52 |     distribution1 = list()
 53 |     distribution2 = list()
 54 | 
 55 |     for key in workspace_label_percentage_dict:
 56 |         workspace_percentage = workspace_label_percentage_dict[key]
 57 |         distribution1.append(workspace_percentage)
 58 |         if key in test_label_percentage_dict:
 59 | 
 60 |             test_percentage = test_label_percentage_dict[key]
 61 | 
 62 |             distribution2.append(test_percentage)
 63 |         else:
 64 |             missing_label.append(key)
 65 |             test_percentage = 0
 66 |             distribution2.append(test_percentage)
 67 | 
 68 |         # L1 dist
 69 |         current_difference = np.abs(test_percentage - workspace_percentage)
 70 | 
 71 |         if key in test_label_percentage_dict:
 72 |             difference_dict[key] = [
 73 |                 workspace_percentage * 100,
 74 |                 test_percentage * 100,
 75 |                 current_difference * 100,
 76 |             ]
 77 | 
 78 |     js_distance = distance.jensenshannon(distribution1, distribution2, 2.0)
 79 | 
 80 |     return missing_label, difference_dict, js_distance
 81 | 
 82 | 
 83 | def _train_test_vocab_difference(train_set_pd, test_set_pd):
 84 |     """
 85 |     Analyze the training set and test set and retrieve the vocabulary of each set
 86 |     :param train_set_pd:
 87 |     :param test_set_pd:
 88 |     :return:
 89 |     train vocab: the set that contains the vocabulary of training set
 90 |     test vocab: the set that contains the vocabulary of test set
 91 |     """
 92 |     train_vocab = set()
 93 |     test_vocab = set()
 94 | 
 95 |     for tokens in train_set_pd["tokens"].tolist():
 96 |         train_vocab.update(tokens)
 97 | 
 98 |     for tokens in test_set_pd["tokens"].tolist():
 99 |         test_vocab.update(tokens)
100 | 
101 |     return train_vocab, test_vocab
102 | 
103 | 
104 | def _train_test_utterance_length_difference(train_set_pd, test_set_pd):
105 |     """
106 |     Analyze difference in length of utterance of training set and test set per label
107 |     :param train_set_pd:
108 |     :param test_set_pd:
109 |     :return:
110 |     train_test_legnth_comparison: pandas dataframe [Intent, Absolute Difference]
111 |     """
112 |     train_pd_temp = train_set_pd.copy()
113 |     train_pd_temp["Train"] = train_pd_temp["tokens"].apply(len)
114 |     train_avg_len_by_label = train_pd_temp[["intent", "Train"]].groupby("intent").mean()
115 | 
116 |     test_pd_temp = test_set_pd.copy()
117 |     test_pd_temp["Test"] = test_pd_temp["tokens"].apply(len)
118 |     test_avg_len_by_label = test_pd_temp[["intent", "Test"]].groupby("intent").mean()
119 | 
120 |     train_test_length_comparison = pd.merge(
121 |         train_avg_len_by_label, test_avg_len_by_label, on="intent"
122 |     )
123 |     train_test_length_comparison["Absolute Difference"] = np.abs(
124 |         train_test_length_comparison["Train"] - train_test_length_comparison["Test"]
125 |     )
126 |     train_test_length_comparison = train_test_length_comparison.sort_values(
127 |         by=["Absolute Difference"], ascending=False
128 |     )
129 |     train_test_length_comparison = train_test_length_comparison.reset_index()
130 |     train_test_length_comparison.rename(columns={"intent": "Intent"}, inplace=True)
131 |     return train_test_length_comparison
132 | 
133 | 
134 | def _get_metrics(results):
135 |     """
136 |     compute the metrics of precision, recall and f1 per label
137 |     :param results: inference results of the test set
138 |     :return:
139 |     precision_dict: maps the {intent: precision}
140 |     recall_dict: maps the {intent: recall}
141 |     f1_dict: maps the {intent:f1}
142 |     """
143 |     groundtruth = results["correct_intent"].values.tolist()
144 |     top_intent = results["top_intent"].values.tolist()
145 |     gt_cnt_dict = dict()
146 |     pred_cnt_dict = dict()
147 |     true_positive_dict = dict()
148 |     for gt, pred in zip(groundtruth, top_intent):
149 |         gt_cnt_dict[gt] = gt_cnt_dict.get(gt, 0) + 1
150 |         pred_cnt_dict[pred] = pred_cnt_dict.get(pred, 0) + 1
151 |         if gt == pred:
152 |             true_positive_dict[pred] = true_positive_dict.get(pred, 0) + 1
153 |     precision_dict = dict()
154 |     recall_dict = dict()
155 |     f1_dict = dict()
156 |     for lb in true_positive_dict:
157 | 
158 |         recall_dict[lb] = (
159 |             true_positive_dict[lb] / gt_cnt_dict[lb] if lb in gt_cnt_dict else 0
160 |         )
161 | 
162 |         precision_dict[lb] = (
163 |             true_positive_dict[lb] / pred_cnt_dict[lb] if lb in pred_cnt_dict else 0
164 |         )
165 | 
166 |         f1_dict[lb] = (
167 |             0.0
168 |             if recall_dict[lb] == 0 and precision_dict[lb] == 0
169 |             else 2.0
170 |             * recall_dict[lb]
171 |             * precision_dict[lb]
172 |             / (recall_dict[lb] + precision_dict[lb])
173 |         )
174 |     return precision_dict, recall_dict, f1_dict
175 | 
176 | 
177 | def analyze_train_test_diff(train_set_pd, test_set_pd, results):
178 |     """
179 |     analyze the difference between training set and test set and generate visualizations
180 |     :param train_set_pd:
181 |     :param test_set_pd:
182 |     :param results:
183 |     """
184 |     workspace_label_percentage_dict = _label_percentage(train_set_pd)
185 |     test_label_percentage_dict = _label_percentage(test_set_pd)
186 | 
187 |     missing_label, difference_dict, js = _train_test_label_difference(
188 |         workspace_label_percentage_dict, test_label_percentage_dict
189 |     )
190 |     train_test_length_comparison_pd = _train_test_utterance_length_difference(
191 |         train_set_pd, test_set_pd
192 |     )
193 | 
194 |     train_vocab, test_vocab = _train_test_vocab_difference(train_set_pd, test_set_pd)
195 | 
196 |     display(Markdown("## Test Data Evaluation"))
197 | 
198 |     if difference_dict:
199 | 
200 |         label = list(difference_dict.keys())
201 |         diff = np.round(list(difference_dict.values()), 2)
202 |         precision_dict, recall_dict, f1_dict = _get_metrics(results)
203 |         precision = np.round(
204 |             [precision_dict[l] * 100.0 if l in precision_dict else 0.0 for l in label],
205 |             2,
206 |         )
207 | 
208 |         recall = np.round(
209 |             [recall_dict[l] * 100.0 if l in recall_dict else 0.0 for l in label], 2
210 |         )
211 | 
212 |         f1 = np.round([f1_dict[l] * 100.0 if l in f1_dict else 0.0 for l in label], 2)
213 | 
214 |         train_count_dict = dict(Counter(train_set_pd["intent"]))
215 |         test_count_dict = dict(Counter(test_set_pd["intent"]))
216 |         tr_cnt = [train_count_dict[l] if l in train_count_dict else 0.0 for l in label]
217 |         te_cnt = [test_count_dict[l] if l in test_count_dict else 0.0 for l in label]
218 | 
219 |         difference_pd = pd.DataFrame(
220 |             {
221 |                 "Intent": label,
222 |                 "% of Train": diff[:, 0],
223 |                 "% of Test": diff[:, 1],
224 |                 "Absolute Difference %": diff[:, 2],
225 |                 "Train Examples": tr_cnt,
226 |                 "Test Examples": te_cnt,
227 |                 "Test Precision %": precision,
228 |                 "Test Recall %": recall,
229 |                 "Test F1 %": f1,
230 |             }
231 |         )
232 | 
233 |         if not difference_pd[difference_pd["Absolute Difference %"] > 0.001].empty:
234 |             table_for_display = difference_pd[
235 |                 difference_pd["Absolute Difference %"] > 0.001
236 |             ].sort_values(by=["Absolute Difference %"], ascending=False)
237 |             table_for_display = table_for_display.style.applymap(
238 |                 _train_test_coloring, subset=pd.IndexSlice[:, ["Absolute Difference %"]]
239 |             )
240 |             display(table_for_display)
241 |             display(Markdown("\n"))
242 |             display(Markdown("Distribution Mismatch Color Code"))
243 |             display(Markdown("<font color = 'red'>      Red - Severe </font>"))
244 |             display(Markdown("<font color = 'blue'>     Blue - Caution </font>"))
245 |             display(Markdown("<font color = 'green'>    Green - Good </font>"))
246 | 
247 |     if js >= 0:
248 |         js = np.round(js, 2) * 100
249 |         display(
250 |             Markdown(
251 |                 "### Data Distribution Divergence Test vs Train \
252 |         <font color='blue'>{}%</font>".format(
253 |                     js
254 |                 )
255 |             )
256 |         )
257 |         display(Markdown("**Note** Metric used is Jensen Shannon Distance"))
258 | 
259 |     if missing_label:
260 |         display(Markdown("### Missing Intents in Test Data"))
261 |         missing_label_pd = pd.DataFrame(
262 |             missing_label, columns=["Missing Intents in Test Set "]
263 |         )
264 |         missing_label_pd.index = np.arange(1, len(missing_label_pd) + 1)
265 |         display(missing_label_pd)
266 | 
267 |     display(Markdown("### Test Data Example Length"))
268 |     condition1 = (
269 |         train_test_length_comparison_pd["Absolute Difference"]
270 |         / train_test_length_comparison_pd["Train"]
271 |         > 0.3
272 |     )
273 |     condition2 = train_test_length_comparison_pd["Absolute Difference"] > 3
274 | 
275 |     length_comparison_pd = train_test_length_comparison_pd[condition1 & condition2]
276 | 
277 |     if not length_comparison_pd.empty:
278 |         display(
279 |             Markdown(
280 |                 "Divergence found in average length of user examples in test vs training data"
281 |             )
282 |         )
283 |         length_comparison_pd.index = np.arange(1, len(length_comparison_pd) + 1)
284 |         display(length_comparison_pd.round(2))
285 |     else:
286 |         display(Markdown("Average length of user examples is comparable"))
287 | 
288 |     if train_vocab and test_vocab:
289 |         display(Markdown("### Vocabulary Size Test vs Train"))
290 |         oov_vocab_percentage = (
291 |             (len(test_vocab) - len(train_vocab.intersection(test_vocab)))
292 |             / len(test_vocab)
293 |             * 100
294 |         )
295 | 
296 |         vocab_df = pd.DataFrame(
297 |             data={
298 |                 "Train Vocabulary Size": [len(train_vocab)],
299 |                 "Test Vocabulary Size": [len(test_vocab)],
300 |                 "% Test Set Vocabulary not found in Train": [oov_vocab_percentage],
301 |             }
302 |         )
303 |         vocab_df.index = np.arange(1, len(vocab_df) + 1)
304 |         display(vocab_df.round(2))
305 | 
306 |     display(Markdown("   "))
307 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/resources/nl/stopwords:
--------------------------------------------------------------------------------
   1 | # this is adapted from https://github.com/stopwords-iso/stopwords-nl
   2 | aan
   3 | aangaande
   4 | aangezien
   5 | achter
   6 | achterna
   7 | afgelopen
   8 | al
   9 | aldaar
  10 | aldus
  11 | alhoewel
  12 | alias
  13 | alle
  14 | allebei
  15 | alleen
  16 | alsnog
  17 | altijd
  18 | altoos
  19 | ander
  20 | andere
  21 | anders
  22 | anderszins
  23 | behalve
  24 | behoudens
  25 | beide
  26 | beiden
  27 | ben
  28 | beneden
  29 | bent
  30 | bepaald
  31 | betreffende
  32 | bij
  33 | binnen
  34 | binnenin
  35 | boven
  36 | bovenal
  37 | bovendien
  38 | bovengenoemd
  39 | bovenstaand
  40 | bovenvermeld
  41 | buiten
  42 | daar
  43 | daarheen
  44 | daarin
  45 | daarna
  46 | daarnet
  47 | daarom
  48 | daarop
  49 | daarvanlangs
  50 | dan
  51 | dat
  52 | de
  53 | die
  54 | dikwijls
  55 | dit
  56 | door
  57 | doorgaand
  58 | dus
  59 | echter
  60 | eer
  61 | eerdat
  62 | eerder
  63 | eerlang
  64 | eerst
  65 | elk
  66 | elke
  67 | en
  68 | enig
  69 | enigszins
  70 | enkel
  71 | er
  72 | erdoor
  73 | even
  74 | eveneens
  75 | evenwel
  76 | gauw
  77 | gedurende
  78 | geen
  79 | gehad
  80 | gekund
  81 | geleden
  82 | gelijk
  83 | gemoeten
  84 | gemogen
  85 | geweest
  86 | gewoon
  87 | gewoonweg
  88 | haar
  89 | had
  90 | hadden
  91 | hare
  92 | heb
  93 | hebben
  94 | hebt
  95 | heeft
  96 | hem
  97 | hen
  98 | het
  99 | hierbeneden
 100 | hierboven
 101 | hij
 102 | hoe
 103 | hoewel
 104 | hun
 105 | hunne
 106 | ik
 107 | ikzelf
 108 | in
 109 | inmiddels
 110 | inzake
 111 | is
 112 | jezelf
 113 | jij
 114 | jijzelf
 115 | jou
 116 | jouw
 117 | jouwe
 118 | juist
 119 | jullie
 120 | kan
 121 | klaar
 122 | kon
 123 | konden
 124 | krachtens
 125 | kunnen
 126 | kunt
 127 | later
 128 | liever
 129 | maar
 130 | mag
 131 | meer
 132 | met
 133 | mezelf
 134 | mij
 135 | mijn
 136 | mijnent
 137 | mijner
 138 | mijzelf
 139 | misschien
 140 | mocht
 141 | mochten
 142 | moest
 143 | moesten
 144 | moet
 145 | moeten
 146 | mogen
 147 | na
 148 | naar
 149 | nadat
 150 | net
 151 | niet
 152 | noch
 153 | nog
 154 | nogal
 155 | nu
 156 | of
 157 | ofschoon
 158 | om
 159 | omdat
 160 | omhoog
 161 | omlaag
 162 | omstreeks
 163 | omtrent
 164 | omver
 165 | onder
 166 | ondertussen
 167 | ongeveer
 168 | ons
 169 | onszelf
 170 | onze
 171 | ook
 172 | op
 173 | opnieuw
 174 | opzij
 175 | over
 176 | overeind
 177 | overigens
 178 | pas
 179 | precies
 180 | reeds
 181 | rond
 182 | rondom
 183 | sedert
 184 | sinds
 185 | sindsdien
 186 | slechts
 187 | sommige
 188 | spoedig
 189 | steeds
 190 | tamelijk
 191 | tenzij
 192 | terwijl
 193 | thans
 194 | tijdens
 195 | toch
 196 | toen
 197 | toenmaals
 198 | toenmalig
 199 | tot
 200 | totdat
 201 | tussen
 202 | uit
 203 | uitgezonderd
 204 | vaakwat
 205 | van
 206 | vandaan
 207 | vanuit
 208 | vanwege
 209 | veeleer
 210 | verder
 211 | vervolgens
 212 | vol
 213 | volgens
 214 | voor
 215 | vooraf
 216 | vooral
 217 | vooralsnog
 218 | voorbij
 219 | voordat
 220 | voordezen
 221 | voordien
 222 | voorheen
 223 | voorop
 224 | vooruit
 225 | vrij
 226 | vroeg
 227 | waar
 228 | waarom
 229 | wanneer
 230 | want
 231 | waren
 232 | was
 233 | weer
 234 | weg
 235 | wegens
 236 | wel
 237 | weldra
 238 | welk
 239 | welke
 240 | wie
 241 | wiens
 242 | wier
 243 | wij
 244 | wijzelf
 245 | zal
 246 | ze
 247 | zelfs
 248 | zichzelf
 249 | zij
 250 | zijn
 251 | zijne
 252 | zo
 253 | zodra
 254 | zonder
 255 | zou
 256 | zouden
 257 | zowat
 258 | zulke
 259 | zullen
 260 | zult
 261 | aan
 262 | af
 263 | al
 264 | alles
 265 | als
 266 | altijd
 267 | andere
 268 | ben
 269 | bij
 270 | daar
 271 | dan
 272 | dat
 273 | de
 274 | der
 275 | deze
 276 | die
 277 | dit
 278 | doch
 279 | doen
 280 | door
 281 | dus
 282 | een
 283 | eens
 284 | en
 285 | er
 286 | ge
 287 | geen
 288 | geweest
 289 | haar
 290 | had
 291 | heb
 292 | hebben
 293 | heeft
 294 | hem
 295 | het
 296 | hier
 297 | hij
 298 | hoe
 299 | hun
 300 | iemand
 301 | iets
 302 | ik
 303 | in
 304 | is
 305 | ja
 306 | je
 307 | kan
 308 | kon
 309 | kunnen
 310 | maar
 311 | me
 312 | meer
 313 | men
 314 | met
 315 | mij
 316 | mijn
 317 | moet
 318 | na
 319 | naar
 320 | niet
 321 | niets
 322 | nog
 323 | nu
 324 | of
 325 | om
 326 | omdat
 327 | ons
 328 | ook
 329 | op
 330 | over
 331 | reeds
 332 | te
 333 | tegen
 334 | toch
 335 | toen
 336 | tot
 337 | u
 338 | uit
 339 | uw
 340 | van
 341 | veel
 342 | voor
 343 | want
 344 | waren
 345 | was
 346 | wat
 347 | we
 348 | wel
 349 | werd
 350 | wezen
 351 | wie
 352 | wij
 353 | wil
 354 | worden
 355 | zal
 356 | ze
 357 | zei
 358 | zelf
 359 | zich
 360 | zij
 361 | zijn
 362 | zo
 363 | zonder
 364 | zou
 365 | Footer
 366 | de
 367 | en
 368 | van
 369 | ik
 370 | te
 371 | dat
 372 | die
 373 | in
 374 | een
 375 | hij
 376 | het
 377 | niet
 378 | zijn
 379 | is
 380 | was
 381 | op
 382 | aan
 383 | met
 384 | als
 385 | voor
 386 | had
 387 | er
 388 | maar
 389 | om
 390 | hem
 391 | dan
 392 | zou
 393 | of
 394 | wat
 395 | mijn
 396 | men
 397 | dit
 398 | zo
 399 | door
 400 | over
 401 | ze
 402 | zich
 403 | bij
 404 | ook
 405 | tot
 406 | je
 407 | mij
 408 | uit
 409 | der
 410 | daar
 411 | haar
 412 | naar
 413 | heb
 414 | hoe
 415 | heeft
 416 | hebben
 417 | deze
 418 | u
 419 | want
 420 | nog
 421 | zal
 422 | me
 423 | zij
 424 | nu
 425 | ge
 426 | geen
 427 | omdat
 428 | iets
 429 | worden
 430 | toch
 431 | al
 432 | waren
 433 | veel
 434 | meer
 435 | doen
 436 | toen
 437 | moet
 438 | ben
 439 | zonder
 440 | kan
 441 | hun
 442 | dus
 443 | alles
 444 | onder
 445 | ja
 446 | eens
 447 | hier
 448 | wie
 449 | werd
 450 | altijd
 451 | doch
 452 | wordt
 453 | wezen
 454 | kunnen
 455 | ons
 456 | zelf
 457 | tegen
 458 | na
 459 | reeds
 460 | wil
 461 | kon
 462 | niets
 463 | uw
 464 | iemand
 465 | geweest
 466 | andere
 467 | aan
 468 | achte
 469 | achter
 470 | af
 471 | al
 472 | alle
 473 | alleen
 474 | alles
 475 | als
 476 | ander
 477 | anders
 478 | beetje
 479 | behalve
 480 | beide
 481 | beiden
 482 | ben
 483 | beneden
 484 | bent
 485 | bij
 486 | bijna
 487 | bijv
 488 | blijkbaar
 489 | blijken
 490 | boven
 491 | bv
 492 | daar
 493 | daardoor
 494 | daarin
 495 | daarna
 496 | daarom
 497 | daaruit
 498 | dan
 499 | dat
 500 | de
 501 | deden
 502 | deed
 503 | derde
 504 | derhalve
 505 | dertig
 506 | deze
 507 | dhr
 508 | die
 509 | dit
 510 | doe
 511 | doen
 512 | doet
 513 | door
 514 | drie
 515 | duizend
 516 | echter
 517 | een
 518 | eens
 519 | eerst
 520 | eerste
 521 | eigen
 522 | eigenlijk
 523 | elk
 524 | elke
 525 | en
 526 | enige
 527 | er
 528 | erg
 529 | ergens
 530 | etc
 531 | etcetera
 532 | even
 533 | geen
 534 | genoeg
 535 | geweest
 536 | haar
 537 | haarzelf
 538 | had
 539 | hadden
 540 | heb
 541 | hebben
 542 | hebt
 543 | hedden
 544 | heeft
 545 | heel
 546 | hem
 547 | hemzelf
 548 | hen
 549 | het
 550 | hetzelfde
 551 | hier
 552 | hierin
 553 | hierna
 554 | hierom
 555 | hij
 556 | hijzelf
 557 | hoe
 558 | honderd
 559 | hun
 560 | ieder
 561 | iedere
 562 | iedereen
 563 | iemand
 564 | iets
 565 | ik
 566 | in
 567 | inderdaad
 568 | intussen
 569 | is
 570 | ja
 571 | je
 572 | jij
 573 | jijzelf
 574 | jou
 575 | jouw
 576 | jullie
 577 | kan
 578 | kon
 579 | konden
 580 | kun
 581 | kunnen
 582 | kunt
 583 | laatst
 584 | later
 585 | lijken
 586 | lijkt
 587 | maak
 588 | maakt
 589 | maakte
 590 | maakten
 591 | maar
 592 | mag
 593 | maken
 594 | me
 595 | meer
 596 | meest
 597 | meestal
 598 | men
 599 | met
 600 | mevr
 601 | mij
 602 | mijn
 603 | minder
 604 | miss
 605 | misschien
 606 | missen
 607 | mits
 608 | mocht
 609 | mochten
 610 | moest
 611 | moesten
 612 | moet
 613 | moeten
 614 | mogen
 615 | mr
 616 | mrs
 617 | mw
 618 | na
 619 | naar
 620 | nam
 621 | namelijk
 622 | nee
 623 | neem
 624 | negen
 625 | nemen
 626 | nergens
 627 | niemand
 628 | niet
 629 | niets
 630 | niks
 631 | noch
 632 | nochtans
 633 | nog
 634 | nooit
 635 | nu
 636 | nv
 637 | of
 638 | om
 639 | omdat
 640 | ondanks
 641 | onder
 642 | ondertussen
 643 | ons
 644 | onze
 645 | onzeker
 646 | ooit
 647 | ook
 648 | op
 649 | over
 650 | overal
 651 | overige
 652 | paar
 653 | per
 654 | recent
 655 | redelijk
 656 | samen
 657 | sinds
 658 | steeds
 659 | te
 660 | tegen
 661 | tegenover
 662 | thans
 663 | tien
 664 | tiende
 665 | tijdens
 666 | tja
 667 | toch
 668 | toe
 669 | tot
 670 | totdat
 671 | tussen
 672 | twee
 673 | tweede
 674 | u
 675 | uit
 676 | uw
 677 | vaak
 678 | van
 679 | vanaf
 680 | veel
 681 | veertig
 682 | verder
 683 | verscheidene
 684 | verschillende
 685 | via
 686 | vier
 687 | vierde
 688 | vijf
 689 | vijfde
 690 | vijftig
 691 | volgend
 692 | volgens
 693 | voor
 694 | voordat
 695 | voorts
 696 | waar
 697 | waarom
 698 | waarschijnlijk
 699 | wanneer
 700 | waren
 701 | was
 702 | wat
 703 | we
 704 | wederom
 705 | weer
 706 | weinig
 707 | wel
 708 | welk
 709 | welke
 710 | werd
 711 | werden
 712 | werder
 713 | whatever
 714 | wie
 715 | wij
 716 | wijzelf
 717 | wil
 718 | wilden
 719 | willen
 720 | word
 721 | worden
 722 | wordt
 723 | zal
 724 | ze
 725 | zei
 726 | zeker
 727 | zelf
 728 | zelfde
 729 | zes
 730 | zeven
 731 | zich
 732 | zij
 733 | zijn
 734 | zijzelf
 735 | zo
 736 | zoals
 737 | zodat
 738 | zou
 739 | zouden
 740 | zulk
 741 | zullen
 742 | aan
 743 | af
 744 | al
 745 | als
 746 | bij
 747 | dan
 748 | dat
 749 | die
 750 | dit
 751 | een
 752 | en
 753 | er
 754 | had
 755 | heb
 756 | hem
 757 | het
 758 | hij
 759 | hoe
 760 | hun
 761 | ik
 762 | in
 763 | is
 764 | je
 765 | kan
 766 | me
 767 | men
 768 | met
 769 | mij
 770 | nog
 771 | nu
 772 | of
 773 | ons
 774 | ook
 775 | te
 776 | tot
 777 | uit
 778 | van
 779 | was
 780 | wat
 781 | we
 782 | wel
 783 | wij
 784 | zal
 785 | ze
 786 | zei
 787 | zij
 788 | zo
 789 | zou
 790 | de
 791 | en
 792 | van
 793 | ik
 794 | te
 795 | dat
 796 | die
 797 | in
 798 | een
 799 | hij
 800 | het
 801 | niet
 802 | zijn
 803 | is
 804 | was
 805 | op
 806 | aan
 807 | met
 808 | als
 809 | voor
 810 | had
 811 | er
 812 | maar
 813 | om
 814 | hem
 815 | dan
 816 | zou
 817 | of
 818 | wat
 819 | mijn
 820 | men
 821 | dit
 822 | zo
 823 | door
 824 | over
 825 | ze
 826 | zich
 827 | bij
 828 | ook
 829 | tot
 830 | je
 831 | mij
 832 | uit
 833 | der
 834 | daar
 835 | haar
 836 | naar
 837 | heb
 838 | hoe
 839 | heeft
 840 | hebben
 841 | deze
 842 | u
 843 | want
 844 | nog
 845 | zal
 846 | me
 847 | zij
 848 | nu
 849 | ge
 850 | geen
 851 | omdat
 852 | iets
 853 | worden
 854 | toch
 855 | al
 856 | waren
 857 | veel
 858 | meer
 859 | doen
 860 | toen
 861 | moet
 862 | ben
 863 | zonder
 864 | kan
 865 | hun
 866 | dus
 867 | alles
 868 | onder
 869 | ja
 870 | eens
 871 | hier
 872 | wie
 873 | werd
 874 | altijd
 875 | doch
 876 | wordt
 877 | wezen
 878 | kunnen
 879 | ons
 880 | zelf
 881 | tegen
 882 | na
 883 | reeds
 884 | wil
 885 | kon
 886 | niets
 887 | uw
 888 | iemand
 889 | geweest
 890 | andere
 891 | aan
 892 | aangaande
 893 | aangezien
 894 | achte
 895 | achter
 896 | achterna
 897 | af
 898 | afgelopen
 899 | al
 900 | aldaar
 901 | aldus
 902 | alhoewel
 903 | alias
 904 | alle
 905 | allebei
 906 | alleen
 907 | alles
 908 | als
 909 | alsnog
 910 | altijd
 911 | altoos
 912 | ander
 913 | andere
 914 | anders
 915 | anderszins
 916 | beetje
 917 | behalve
 918 | behoudens
 919 | beide
 920 | beiden
 921 | ben
 922 | beneden
 923 | bent
 924 | bepaald
 925 | betreffende
 926 | bij
 927 | bijna
 928 | bijv
 929 | binnen
 930 | binnenin
 931 | blijkbaar
 932 | blijken
 933 | boven
 934 | bovenal
 935 | bovendien
 936 | bovengenoemd
 937 | bovenstaand
 938 | bovenvermeld
 939 | buiten
 940 | bv
 941 | daar
 942 | daardoor
 943 | daarheen
 944 | daarin
 945 | daarna
 946 | daarnet
 947 | daarom
 948 | daarop
 949 | daaruit
 950 | daarvanlangs
 951 | dan
 952 | dat
 953 | de
 954 | deden
 955 | deed
 956 | der
 957 | derde
 958 | derhalve
 959 | dertig
 960 | deze
 961 | dhr
 962 | die
 963 | dikwijls
 964 | dit
 965 | doch
 966 | doe
 967 | doen
 968 | doet
 969 | door
 970 | doorgaand
 971 | drie
 972 | duizend
 973 | dus
 974 | echter
 975 | een
 976 | eens
 977 | eer
 978 | eerdat
 979 | eerder
 980 | eerlang
 981 | eerst
 982 | eerste
 983 | eigen
 984 | eigenlijk
 985 | elk
 986 | elke
 987 | en
 988 | enig
 989 | enige
 990 | enigszins
 991 | enkel
 992 | er
 993 | erdoor
 994 | erg
 995 | ergens
 996 | etc
 997 | etcetera
 998 | even
 999 | eveneens
1000 | evenwel
1001 | gauw
1002 | ge
1003 | gedurende
1004 | geen
1005 | gehad
1006 | gekund
1007 | geleden
1008 | gelijk
1009 | gemoeten
1010 | gemogen
1011 | genoeg
1012 | geweest
1013 | gewoon
1014 | gewoonweg
1015 | haar
1016 | haarzelf
1017 | had
1018 | hadden
1019 | hare
1020 | heb
1021 | hebben
1022 | hebt
1023 | hedden
1024 | heeft
1025 | heel
1026 | hem
1027 | hemzelf
1028 | hen
1029 | het
1030 | hetzelfde
1031 | hier
1032 | hierbeneden
1033 | hierboven
1034 | hierin
1035 | hierna
1036 | hierom
1037 | hij
1038 | hijzelf
1039 | hoe
1040 | hoewel
1041 | honderd
1042 | hun
1043 | hunne
1044 | ieder
1045 | iedere
1046 | iedereen
1047 | iemand
1048 | iets
1049 | ik
1050 | ikzelf
1051 | in
1052 | inderdaad
1053 | inmiddels
1054 | intussen
1055 | inzake
1056 | is
1057 | ja
1058 | je
1059 | jezelf
1060 | jij
1061 | jijzelf
1062 | jou
1063 | jouw
1064 | jouwe
1065 | juist
1066 | jullie
1067 | kan
1068 | klaar
1069 | kon
1070 | konden
1071 | krachtens
1072 | kun
1073 | kunnen
1074 | kunt
1075 | laatst
1076 | later
1077 | liever
1078 | lijken
1079 | lijkt
1080 | maak
1081 | maakt
1082 | maakte
1083 | maakten
1084 | maar
1085 | mag
1086 | maken
1087 | me
1088 | meer
1089 | meest
1090 | meestal
1091 | men
1092 | met
1093 | mevr
1094 | mezelf
1095 | mij
1096 | mijn
1097 | mijnent
1098 | mijner
1099 | mijzelf
1100 | minder
1101 | miss
1102 | misschien
1103 | missen
1104 | mits
1105 | mocht
1106 | mochten
1107 | moest
1108 | moesten
1109 | moet
1110 | moeten
1111 | mogen
1112 | mr
1113 | mrs
1114 | mw
1115 | na
1116 | naar
1117 | nadat
1118 | nam
1119 | namelijk
1120 | nee
1121 | neem
1122 | negen
1123 | nemen
1124 | nergens
1125 | net
1126 | niemand
1127 | niet
1128 | niets
1129 | niks
1130 | noch
1131 | nochtans
1132 | nog
1133 | nogal
1134 | nooit
1135 | nu
1136 | nv
1137 | of
1138 | ofschoon
1139 | om
1140 | omdat
1141 | omhoog
1142 | omlaag
1143 | omstreeks
1144 | omtrent
1145 | omver
1146 | ondanks
1147 | onder
1148 | ondertussen
1149 | ongeveer
1150 | ons
1151 | onszelf
1152 | onze
1153 | onzeker
1154 | ooit
1155 | ook
1156 | op
1157 | opnieuw
1158 | opzij
1159 | over
1160 | overal
1161 | overeind
1162 | overige
1163 | overigens
1164 | paar
1165 | pas
1166 | per
1167 | precies
1168 | recent
1169 | redelijk
1170 | reeds
1171 | rond
1172 | rondom
1173 | samen
1174 | sedert
1175 | sinds
1176 | sindsdien
1177 | slechts
1178 | sommige
1179 | spoedig
1180 | steeds
1181 | tamelijk
1182 | te
1183 | tegen
1184 | tegenover
1185 | tenzij
1186 | terwijl
1187 | thans
1188 | tien
1189 | tiende
1190 | tijdens
1191 | tja
1192 | toch
1193 | toe
1194 | toen
1195 | toenmaals
1196 | toenmalig
1197 | tot
1198 | totdat
1199 | tussen
1200 | twee
1201 | tweede
1202 | u
1203 | uit
1204 | uitgezonderd
1205 | uw
1206 | vaak
1207 | vaakwat
1208 | van
1209 | vanaf
1210 | vandaan
1211 | vanuit
1212 | vanwege
1213 | veel
1214 | veeleer
1215 | veertig
1216 | verder
1217 | verscheidene
1218 | verschillende
1219 | vervolgens
1220 | via
1221 | vier
1222 | vierde
1223 | vijf
1224 | vijfde
1225 | vijftig
1226 | vol
1227 | volgend
1228 | volgens
1229 | voor
1230 | vooraf
1231 | vooral
1232 | vooralsnog
1233 | voorbij
1234 | voordat
1235 | voordezen
1236 | voordien
1237 | voorheen
1238 | voorop
1239 | voorts
1240 | vooruit
1241 | vrij
1242 | vroeg
1243 | waar
1244 | waarom
1245 | waarschijnlijk
1246 | wanneer
1247 | want
1248 | waren
1249 | was
1250 | wat
1251 | we
1252 | wederom
1253 | weer
1254 | weg
1255 | wegens
1256 | weinig
1257 | wel
1258 | weldra
1259 | welk
1260 | welke
1261 | werd
1262 | werden
1263 | werder
1264 | wezen
1265 | whatever
1266 | wie
1267 | wiens
1268 | wier
1269 | wij
1270 | wijzelf
1271 | wil
1272 | wilden
1273 | willen
1274 | word
1275 | worden
1276 | wordt
1277 | zal
1278 | ze
1279 | zei
1280 | zeker
1281 | zelf
1282 | zelfde
1283 | zelfs
1284 | zes
1285 | zeven
1286 | zich
1287 | zichzelf
1288 | zij
1289 | zijn
1290 | zijne
1291 | zijzelf
1292 | zo
1293 | zoals
1294 | zodat
1295 | zodra
1296 | zonder
1297 | zou
1298 | zouden
1299 | zowat
1300 | zulk
1301 | zulke
1302 | zullen
1303 | zult


--------------------------------------------------------------------------------
/assistant_skill_analysis/highlighting/highlighter.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | import numpy as np
  4 | import pandas as pd
  5 | from IPython.display import display, Markdown
  6 | 
  7 | import seaborn as sns
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | import ibm_watson
 11 | 
 12 | from ..utils import skills_util
 13 | from ..inferencing import inferencer
 14 | 
 15 | MAX_TOKEN_LENGTH = 20
 16 | NGRAM_RANGE = [1]
 17 | 
 18 | 
 19 | def get_highlights_in_batch_multi_thread(
 20 |     conversation,
 21 |     full_results,
 22 |     output_folder,
 23 |     confidence_threshold,
 24 |     show_worst_k,
 25 |     lang_util,
 26 |     skill_id=None,
 27 |     assistant_id=None,
 28 |     intent_to_action_mapping=None,
 29 |     environment_id=None,
 30 | ):
 31 |     """
 32 |     Given the prediction result, rank prediction results from worst to best
 33 |     & analyze the top k worst results.
 34 |     Term level highlighting on the worst results shows the sensitivity of terms in utterance
 35 |     :param conversation: conversation object produced by watson api
 36 |     :param skill_id: skill id
 37 |     :param full_results: prediction result showing the ranked list of intents by confidence scores
 38 |     :param output_folder: the output folder where the highlighting images will be saved
 39 |     :param confidence_threshold: the confidence threshold for offtopic detection
 40 |     :param show_worst_k: the top worst k results based on heuristics
 41 |     :param assistant_id:
 42 |     :param intent_to_action_mapping:
 43 |     :param environment_id: the environment id
 44 |     :return:
 45 |     """
 46 |     if isinstance(conversation, ibm_watson.AssistantV1):
 47 |         assert skill_id is not None
 48 |     else:
 49 |         assert assistant_id is not None
 50 |     wrong_examples_sorted = _filter_results(
 51 |         full_results, confidence_threshold, lang_util
 52 |     )
 53 |     display(
 54 |         Markdown(
 55 |             "### Identified {} problematic utterances ".format(
 56 |                 len(wrong_examples_sorted)
 57 |             )
 58 |         )
 59 |     )
 60 |     display(Markdown("  "))
 61 | 
 62 |     wrong_examples_sorted = wrong_examples_sorted[:show_worst_k]
 63 | 
 64 |     (
 65 |         adversarial_results,
 66 |         adversarial_span_dict,
 67 |     ) = _adversarial_examples_multi_thread_inference(
 68 |         wrong_examples_sorted=wrong_examples_sorted,
 69 |         conversation=conversation,
 70 |         skill_id=skill_id,
 71 |         assistant_id=assistant_id,
 72 |         intent_to_action_mapping=intent_to_action_mapping,
 73 |         environment_id=environment_id,
 74 |     )
 75 | 
 76 |     if not adversarial_results.empty:
 77 | 
 78 |         display(Markdown("{} examples are shown below:".format(show_worst_k)))
 79 |         for original_example in wrong_examples_sorted:
 80 |             if not original_example[2]:
 81 |                 label = skills_util.OFFTOPIC_LABEL
 82 |             else:
 83 |                 label = original_example[2]
 84 |             label_idx = label + "\t" + str(original_example[0])
 85 |             adversarial_result_subset = adversarial_results[
 86 |                 adversarial_results["correct_intent"] == label_idx
 87 |             ]
 88 |             highlight = _highlight_scoring(
 89 |                 original_example,
 90 |                 adversarial_result_subset,
 91 |                 adversarial_span_dict,
 92 |                 lang_util,
 93 |             )
 94 |             _plot_highlight(highlight, original_example, output_folder, lang_util)
 95 | 
 96 | 
 97 | def _filter_results(full_results, confidence_threshold, lang_util):
 98 |     """
 99 |     Given the full predicted results and confidence threshold,
100 |     this function returns a ranked list of the mis-classified examples
101 |     :param full_results:
102 |     :param confidence_threshold:
103 |     :return highlighting_candidates_sorted
104 |     """
105 |     highlighting_candidates = list()
106 |     for idx in range(len(full_results)):
107 |         item = full_results.iloc[idx]
108 |         results_intent_list = [predict["intent"] for predict in item["top_predicts"]]
109 |         if not item["top_predicts"]:
110 |             result_dict = {}
111 |         elif (
112 |             isinstance(item["top_predicts"], list)
113 |             and "intent" in item["top_predicts"][0]
114 |         ):
115 |             result_dict = {
116 |                 pred["intent"]: pred["confidence"] for pred in item["top_predicts"]
117 |             }
118 |         else:
119 |             result_dict = dict(item["top_predicts"])
120 |         if item["correct_intent"] in results_intent_list:
121 |             reference_position = results_intent_list.index(item["correct_intent"])
122 |         else:
123 |             reference_position = len(results_intent_list)
124 | 
125 |         rank_score = 0
126 |         # for off-topic examples, rank score = off-topic confidence score - confidence threshold
127 |         if item["correct_intent"] == skills_util.OFFTOPIC_LABEL:
128 |             if item["top_confidence"] > confidence_threshold:
129 |                 rank_score = item["top_confidence"] - confidence_threshold
130 | 
131 |                 highlighting_candidates.append(
132 |                     (
133 |                         idx,
134 |                         item["utterance"],
135 |                         None,
136 |                         item["top_intent"],
137 |                         item["top_confidence"],
138 |                         rank_score,
139 |                         reference_position,
140 |                     )
141 |                 )
142 |         else:
143 |             if (item["top_intent"] != item["correct_intent"]) or (
144 |                 item["top_confidence"] <= confidence_threshold
145 |             ):
146 |                 if item["top_intent"] != item["correct_intent"]:
147 |                     # for incorrectly predicted examples, if the correct intent is not in top 10
148 |                     # rank score = confidence of the predicted intent
149 |                     if item["correct_intent"] not in result_dict:
150 |                         rank_score = item["top_confidence"]
151 |                     else:
152 |                         # for incorrectly predicted examples, if the correct intent is in top 10,
153 |                         # rank score = confidence of predicted intent - confidence of correct intent
154 |                         rank_score = (
155 |                             item["top_confidence"] - result_dict[item["correct_intent"]]
156 |                         )
157 |                 elif item["top_confidence"] <= confidence_threshold:
158 |                     # for correctly predicted examples, if the predicted confidence is less than
159 |                     # confidence threshold, rank score = confidence threshold - predicted confidence
160 |                     rank_score = confidence_threshold - item["top_confidence"]
161 |                 highlighting_candidates.append(
162 |                     (
163 |                         idx,
164 |                         item["utterance"],
165 |                         item["correct_intent"],
166 |                         item["top_intent"],
167 |                         item["top_confidence"],
168 |                         rank_score,
169 |                         reference_position,
170 |                     )
171 |                 )
172 | 
173 |     highlighting_candidates_sorted = sorted(
174 |         highlighting_candidates, key=lambda x: x[5], reverse=True
175 |     )
176 |     highlighting_candidates_sorted = [
177 |         candidate
178 |         for candidate in highlighting_candidates_sorted
179 |         if len(lang_util.tokenize(candidate[1])) < MAX_TOKEN_LENGTH
180 |     ]
181 | 
182 |     return highlighting_candidates_sorted
183 | 
184 | 
185 | def _plot_highlight(highlight, original_example, output_folder, lang_util):
186 |     """
187 |     Plot the highlighting score into a plot and store the plot in the output folder
188 |     :param highlight:
189 |     :param original_example:
190 |     :param output_folder:
191 |     """
192 |     if not original_example[2]:
193 |         label = skills_util.OFFTOPIC_LABEL
194 |     else:
195 |         label = original_example[2]
196 |     fig, ax = plt.subplots(figsize=(2, 5))
197 |     ax = sns.heatmap(
198 |         [[i] for i in highlight.tolist()],
199 |         yticklabels=lang_util.tokenize(original_example[1]),
200 |         xticklabels=["Sensitivity to intent: " + '"' + label + '"'],
201 |         cbar_kws={"orientation": "vertical"},
202 |         linewidths=0,
203 |         square=False,
204 |         cmap="Blues",
205 |     )
206 | 
207 |     if output_folder:
208 |         conf_str = "%.3f" % (original_example[4])
209 |         if original_example[2]:
210 |             filename = (
211 |                 str(original_example[0])
212 |                 + "_groundtruth_"
213 |                 + original_example[2]
214 |                 + "_prediction_"
215 |                 + original_example[3]
216 |                 + "_confidence_"
217 |                 + conf_str
218 |                 + ".png"
219 |             )
220 |         else:
221 |             filename = (
222 |                 str(original_example[0])
223 |                 + "_groundtruth_offtopic_prediction_"
224 |                 + original_example[3]
225 |                 + "_confidence_"
226 |                 + conf_str
227 |                 + ".png"
228 |             )
229 | 
230 |         save_path = os.path.join(output_folder, filename)
231 |         plt.savefig(os.path.join(save_path), bbox_inches="tight")
232 | 
233 |     table = list()
234 |     table.append(["Test Set Index", original_example[0]])
235 |     table.append(["Utterance", original_example[1]])
236 |     table.append(
237 |         [
238 |             "Actual Intent",
239 |             original_example[2]
240 |             if (original_example[2])
241 |             else skills_util.OFFTOPIC_LABEL,
242 |         ]
243 |     )
244 |     table.append(["Predicted Intent", original_example[3]])
245 |     table.append(["Confidence", original_example[4]])
246 |     with pd.option_context("max_colwidth", 250):
247 |         df = pd.DataFrame(data=table, columns=["Characteristic", "Value"])
248 |         df.index = np.arange(1, len(df) + 1)
249 |         display(df)
250 |     plt.show()
251 | 
252 | 
253 | def _adversarial_examples_multi_thread_inference(
254 |     wrong_examples_sorted,
255 |     conversation,
256 |     skill_id=None,
257 |     assistant_id=None,
258 |     intent_to_action_mapping=None,
259 |     environment_id=None,
260 | ):
261 |     """
262 |     Perform multi threaded inference on all the adversarial examples
263 |     :param wrong_examples_sorted:
264 |     :param conversation:
265 |     :param skill_id:
266 |     :param assistant_id:
267 |     :param intent_to_action_mapping:
268 |     :param environment_id:
269 |     """
270 |     if isinstance(conversation, ibm_watson.AssistantV1):
271 |         assert skill_id is not None
272 |     else:
273 |         assert assistant_id is not None
274 |     all_adversarial_examples = list()
275 |     # the adversarial labels will be label\tidx for later regrouping purposes
276 |     all_adversarial_label_idx = list()
277 |     # map the adversarial example: span of adversarial
278 |     adversarial_span_dict = dict()
279 |     for original_example in wrong_examples_sorted:
280 | 
281 |         adversarial_examples, adversarial_span = _generate_adversarial_examples(
282 |             original_example[1], original_example[0]
283 |         )
284 | 
285 |         if not original_example[2]:
286 |             label = skills_util.OFFTOPIC_LABEL
287 |         else:
288 |             label = original_example[2]
289 |         adversarial_label = label + "\t" + str(original_example[0])
290 | 
291 |         all_adversarial_examples.extend(adversarial_examples)
292 |         all_adversarial_label_idx.extend(
293 |             [adversarial_label] * len(adversarial_examples)
294 |         )
295 |         adversarial_span_dict.update(adversarial_span)
296 | 
297 |     adversarial_test_data_frame = pd.DataFrame(
298 |         {"utterance": all_adversarial_examples, "intent": all_adversarial_label_idx}
299 |     )
300 |     adversarial_results = inferencer.inference(
301 |         conversation=conversation,
302 |         test_data=adversarial_test_data_frame,
303 |         max_thread=min(4, os.cpu_count() if os.cpu_count() else 1),
304 |         skill_id=skill_id,
305 |         assistant_id=assistant_id,
306 |         intent_to_action_mapping=intent_to_action_mapping,
307 |         environment_id=environment_id,
308 |     )
309 |     display(Markdown("   "))
310 |     return adversarial_results, adversarial_span_dict
311 | 
312 | 
313 | def _generate_adversarial_examples(utt, original_idx):
314 |     """
315 |     Generate adversarial examples by removing single tokens
316 |     :param utt: string, utterance for generation of adversarial examples
317 |     :param original_idx: the idx of the example in the original input data
318 |     :returns
319 |     adversarial_examples: list of strings, list of adversarial examples
320 |     adversarial_span: dictionary of adversarial examples and the token span of the removed token
321 |     """
322 |     adversarial_examples = []
323 |     adversarial_span = dict()
324 |     tokens = utt.split()
325 |     for idx in range(len(tokens)):
326 |         for ngram in NGRAM_RANGE:
327 |             new_sent = " ".join(tokens[:idx] + tokens[idx + ngram :])
328 |             adversarial_examples.append(new_sent)
329 |             adversarial_span[new_sent + "_" + str(original_idx)] = (idx, idx + ngram)
330 |     return adversarial_examples, adversarial_span
331 | 
332 | 
333 | def _highlight_scoring(
334 |     original_example, subset_adversarial_result, adversarial_span_dict, lang_util
335 | ):
336 |     """
337 |     Calculate the highlighting score using classification results of adversarial examples
338 |     :param original_example:
339 |     :param subset_adversarial_result:
340 |     :param adversarial_span_dict:
341 |     """
342 |     original_utterance = " ".join(lang_util.tokenize(original_example[1]))
343 |     original_idx = original_example[0]
344 |     original_intent = original_example[3]
345 |     original_confidence = original_example[4]
346 |     original_position = original_example[6]
347 |     tokens = original_utterance.split(" ")
348 |     highlight = np.zeros(len(tokens), dtype="float32")
349 |     for idx in range(len(subset_adversarial_result)):
350 |         adversarial_example = subset_adversarial_result.iloc[idx]
351 |         if not adversarial_example["top_predicts"]:
352 |             continue
353 | 
354 |         predict_dict = dict()
355 |         predict_intent_list = list()
356 |         for prediction in adversarial_example["top_predicts"]:
357 |             predict_dict[prediction["intent"]] = prediction["confidence"]
358 |             predict_intent_list.append(prediction["intent"])
359 | 
360 |         if original_intent in predict_dict:
361 |             adversarial_position = list(predict_dict.keys()).index(original_intent)
362 |             adversarial_confidence = predict_dict[original_intent]
363 |         else:
364 |             adversarial_position = len(list(predict_dict.keys()))
365 |             adversarial_confidence = 0
366 | 
367 |         start, end = adversarial_span_dict[
368 |             adversarial_example["utterance"] + "_" + str(original_idx)
369 |         ]
370 | 
371 |         highlight = _scoring_function(
372 |             highlight,
373 |             original_position,
374 |             adversarial_position,
375 |             original_confidence,
376 |             adversarial_confidence,
377 |             start,
378 |             end,
379 |         )
380 | 
381 |     return highlight
382 | 
383 | 
384 | def _scoring_function(
385 |     highlight,
386 |     original_position,
387 |     adversarial_position,
388 |     original_confidence,
389 |     adversarial_confidence,
390 |     start_idx,
391 |     end_idx,
392 | ):
393 |     """
394 |     scoring function for highlighting of the interval start_idx:end_idx
395 |     :param highlight: np.array of shape (n_tokens)
396 |     :param original_position: ranking position of the target intent for the original sentence
397 |     :param adversarial_position: ranking position of the target intent for the adversarial sentence
398 |     :param original_confidence: confidence of the target intent for the original sentence
399 |     :param adversarial_confidence: confidence of the target intent for the adversarial sentence
400 |     :param start_idx: starting index of the adversarial mask
401 |     :param end_idx: ending index of the adversarial mask
402 |     :return: highlight: np.array of shape (n_tokens)
403 |     """
404 |     # position difference accounts for the change in the position of the target intent among
405 |     # the top 10 intents return by the message api
406 |     position_difference = (1 / float(original_position + 1.0)) - (
407 |         1 / float(adversarial_position + 1.0)
408 |     )
409 | 
410 |     # confidence difference accounts for the change in the confidence
411 |     confidence_difference = original_confidence - adversarial_confidence
412 | 
413 |     ngram_size = end_idx - start_idx
414 |     weight = math.pow(1.0 / ngram_size, 2.0)
415 | 
416 |     # highlight score for the interval of start_idx:end_idx is a weighted average of
417 |     # the position difference and confidence difference
418 |     weighted_difference = (
419 |         weight
420 |         * ((0.2 * confidence_difference) + (0.8 * position_difference))
421 |         / ngram_size
422 |     )
423 | 
424 |     highlight[start_idx:end_idx] += weighted_difference
425 | 
426 |     return highlight
427 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/utils/skills_util.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import random
  4 | import csv
  5 | import re
  6 | import getpass
  7 | import nbformat
  8 | import pandas as pd
  9 | import numpy as np
 10 | from nbconvert.preprocessors import ExecutePreprocessor
 11 | import ibm_watson
 12 | import codecs
 13 | from ibm_cloud_sdk_core.authenticators import (
 14 |     IAMAuthenticator,
 15 |     BasicAuthenticator,
 16 |     NoAuthAuthenticator,
 17 |     CloudPakForDataAuthenticator,
 18 |     BearerTokenAuthenticator,
 19 | )
 20 | 
 21 | 
 22 | DEV_DATACENTER = (
 23 |     "https://api.us-south.assistant.dev.watson.cloud.ibm.com",
 24 |     "https://iam.test.cloud.ibm.com/identity/token",
 25 | )
 26 | DEFAULT_V1_API_VERSION = "2019-02-28"
 27 | DEFAULT_V2_API_VERSION = "2021-11-27"
 28 | DEFAULT_PROD_URL = "https://gateway.watsonplatform.net/assistant/api"
 29 | DEFAULT_USERNAME = "apikey"
 30 | STAGE_IAM_URL = "https://iam.stage1.bluemix.net/identity/token"
 31 | DEFAULT_AUTHENTICATOR_URL = "https://iam.cloud.ibm.com/identity/token"
 32 | 
 33 | OFFTOPIC_LABEL = "SYSTEM_OUT_OF_DOMAIN"
 34 | 
 35 | OFFTOPIC_CONF_THRESHOLD = 0.2
 36 | 
 37 | LABEL_FONT = {"family": "normal", "weight": "bold", "size": 17}
 38 | 
 39 | TITLE_FONT = {"family": "normal", "weight": "bold", "size": 25}
 40 | 
 41 | 
 42 | def stratified_sampling(workspace, sampling_percentage=0.8):
 43 |     """
 44 |     Create a stratified sample of the workspace json
 45 |     & return a intent json acceptable in Assistant API
 46 | 
 47 |     :param workspace: json format output defined by Assistant API
 48 |     :param sampling_percentage: percentage of original to sample
 49 |     :return train_workspace_data: list of intents for train
 50 |     :return test_workspace_data: list of utterance,intent pairs for test
 51 |     """
 52 |     train_workspace_data = list()
 53 |     test_workspace_data = list()
 54 |     for i in range(len(workspace["intents"])):
 55 |         intent = workspace["intents"][i]
 56 |         sampling_index = list(np.arange(len(intent["examples"])))
 57 |         random.shuffle(sampling_index)
 58 |         # training set
 59 |         train_test_split_cutoff = int(sampling_percentage * len(sampling_index))
 60 |         train_examples = [
 61 |             intent["examples"][index]
 62 |             for index in sampling_index[:train_test_split_cutoff]
 63 |         ]
 64 |         train_workspace_data.append({"intent": workspace["intents"][i]["intent"]})
 65 |         train_workspace_data[i].update({"description": "string"})
 66 |         train_workspace_data[i].update({"examples": train_examples})
 67 |         # test set
 68 |         test_examples = [
 69 |             intent["examples"][index]
 70 |             for index in sampling_index[train_test_split_cutoff:]
 71 |         ]
 72 |         test_workspace_data.extend(
 73 |             [
 74 |                 utterances["text"] + "\t" + workspace["intents"][i]["intent"]
 75 |                 for utterances in test_examples
 76 |             ]
 77 |         )
 78 | 
 79 |     return train_workspace_data, test_workspace_data
 80 | 
 81 | 
 82 | def create_workspace(conversation, intent_json=None):
 83 |     """
 84 |     Create a workspace for testing purpose
 85 |     :param conversation: conversation object created by Watson Assistant api
 86 |     :param intent_json: nested json of utternance and intent pairs
 87 |     :return response: the workspace id and other metadata related to the new workspace
 88 |     """
 89 |     response = conversation.create_workspace(
 90 |         name="test_workspace",
 91 |         description="",
 92 |         language="en",
 93 |         intents=intent_json,
 94 |         entities=[],
 95 |         counterexamples=[],
 96 |         metadata={},
 97 |     ).get_result()
 98 |     return response
 99 | 
100 | 
101 | def input_credentials(input_apikey=True, input_skill_id=True, input_assistant_id=False):
102 |     """
103 |     Prompt user to enter apikey and skill id (workspace_id)
104 |     """
105 |     apikey, skill_id, assistant_id = None, None, None
106 |     if input_apikey:
107 |         apikey = getpass.getpass("Please enter apikey: ")
108 |     if input_skill_id:
109 |         skill_id = getpass.getpass("Please enter skill-id (workspace_id): ")
110 |     if input_assistant_id:
111 |         assistant_id = getpass.getpass("Please enter assistant-id: ")
112 |     return apikey, skill_id, assistant_id
113 | 
114 | 
115 | def retrieve_conversation(
116 |     iam_apikey=None,
117 |     url=DEFAULT_PROD_URL,
118 |     api_version=None,
119 |     username=DEFAULT_USERNAME,
120 |     password=None,
121 |     authenticator_url=DEFAULT_AUTHENTICATOR_URL,
122 |     sdk_version="V1",
123 |     cp4d_auth=False,
124 |     bearer_token=None,
125 | ):
126 |     """
127 |     Retrieve workspace from Assistant instance
128 |     :param iam_apikey:
129 |     :param url:
130 |     :param api_version:
131 |     :param username:
132 |     :param password:
133 |     :param sdk_version: V2 is needed for action workspaces
134 |     :return workspace: workspace json
135 |     """
136 |     assert sdk_version in ["V1", "V2"]
137 |     if api_version is None:
138 |         if sdk_version == "V1":
139 |             api_version = DEFAULT_V1_API_VERSION
140 |         else:
141 |             api_version = DEFAULT_V2_API_VERSION
142 |     if iam_apikey:
143 |         authenticator = IAMAuthenticator(apikey=iam_apikey, url=authenticator_url)
144 |     elif username and password:
145 |         if cp4d_auth:
146 |             authenticator = CloudPakForDataAuthenticator(
147 |                 username=username, password=password, url=authenticator_url
148 |             )
149 |         else:
150 |             authenticator = BasicAuthenticator(username=username, password=password)
151 |     elif bearer_token is not None:
152 |         authenticator = BearerTokenAuthenticator(bearer_token=bearer_token)
153 |     else:
154 |         authenticator = NoAuthAuthenticator()
155 | 
156 |     if sdk_version == "V1":
157 |         conversation = ibm_watson.AssistantV1(
158 |             authenticator=authenticator, version=api_version
159 |         )
160 |     else:
161 |         conversation = ibm_watson.AssistantV2(
162 |             authenticator=authenticator, version=api_version
163 |         )
164 | 
165 |     conversation.set_service_url(url)
166 | 
167 |     return conversation
168 | 
169 | 
170 | def retrieve_workspace(skill_id, conversation, export_flag=True):
171 |     """
172 |     retrieve the workspace based on the workspace id
173 |     :param skill_id:
174 |     :param conversation:
175 |     :param export_flag:
176 |     :return: workspace_dictionary
177 |     """
178 |     ws_json = conversation.get_workspace(skill_id, export=export_flag)
179 |     return ws_json.get_result()
180 | 
181 | 
182 | def parse_workspace_json(workspace_json):
183 |     """
184 |     Parse workspace json and returns list of utterances, list of intents, and list of entities, and intent to action title mapping
185 |     """
186 |     ws_type = workspace_json.get("type", "dialog")
187 |     dialog_setting_action_flag = bool(
188 |         workspace_json.get("dialog_settings", {}).get("actions", False)
189 |     )
190 |     if dialog_setting_action_flag:
191 |         ws_type = "action"
192 |     utterances, intents, entities = [], [], []
193 |     raw_intent_name_to_action_title_mapping = None
194 | 
195 |     if ws_type == "dialog":
196 | 
197 |         for intent in workspace_json["intents"]:
198 |             for example in intent["examples"]:
199 |                 utterances.append(example["text"])
200 |                 intents.append(intent["intent"])
201 |         entities = workspace_json["entities"]
202 | 
203 |     else:
204 |         # intent name to action title mapping for readability
205 |         raw_intent_name_to_action_title_mapping = {
206 |             action["condition"]["intent"]: action["title"]
207 |             for action in workspace_json["workspace"]["actions"]
208 |             if action.get("condition", {}).get("intent")
209 |         }
210 |         for intent in workspace_json["workspace"]["intents"]:
211 |             action_title = raw_intent_name_to_action_title_mapping[intent["intent"]]
212 |             for example in intent["examples"]:
213 |                 utterances.append(example["text"])
214 |                 intents.append(action_title)
215 |         entities = workspace_json["workspace"]["entities"]
216 |     return utterances, intents, entities, raw_intent_name_to_action_title_mapping
217 | 
218 | 
219 | def extract_workspace_data(workspace, language_util):
220 |     """
221 |     Extract relevant data and vocabulary
222 |     :param workspace: workspace json, could be from an action skill or a dialog skill
223 |     :param language_util:
224 |     :return: workspace_pd, vocabulary, entities, intent name to action title mapping
225 |     """
226 |     relevant_data = {"utterance": list(), "intent": list(), "tokens": list()}
227 |     vocabulary = set()
228 |     (
229 |         utterances,
230 |         intents,
231 |         entities,
232 |         raw_intent_name_to_action_title_mapping,
233 |     ) = parse_workspace_json(workspace)
234 | 
235 |     for utterance, intent in zip(utterances, intents):
236 |         # preprocess utterance
237 |         utterance = language_util.preprocess(utterance)
238 |         tokens = language_util.tokenize(utterance)
239 | 
240 |         relevant_data["utterance"].append(utterance)
241 |         relevant_data["intent"].append(intent)
242 |         relevant_data["tokens"].append(tokens)
243 | 
244 |         vocabulary.update(tokens)
245 | 
246 |     workspace_pd = pd.DataFrame(relevant_data)
247 |     return workspace_pd, vocabulary, entities, raw_intent_name_to_action_title_mapping
248 | 
249 | 
250 | def get_test_workspace(conversation, workspace_json):
251 | 
252 |     response_json = conversation.create_workspace(
253 |         name="test",
254 |         language="en",
255 |         intents=workspace_json["intents"],
256 |         entities=workspace_json["entities"],
257 |     ).get_result()
258 |     workspace_id = response_json["workspace_id"]
259 |     return workspace_id
260 | 
261 | 
262 | def process_test_set(test_set, lang_util, delim="\t", cos=False):
263 |     """
264 |     Process test set given the path to the test fil
265 |     :param test_set: path to the test set on the local computer or cos object body of test csv
266 |     :param lang_util: language utility
267 |     :param delim: delimiter, use "," for cos instance
268 |     :param cos: cos flag to indicate whether this is a path from local system or stream body from cos
269 |     :return:
270 |     """
271 |     user_inputs = list()
272 |     intents = list()
273 |     tokens_list = list()
274 |     file_handle = None
275 |     if not cos:
276 |         file_handle = open(test_set, "r", encoding="utf-8")
277 |         reader = csv.reader(file_handle, delimiter=delim)
278 |     else:
279 |         reader = csv.reader(codecs.getreader("utf-8")(test_set), delimiter=delim)
280 | 
281 |     for row in reader:
282 |         if len(row) == 0:
283 |             continue
284 |         cur_example = lang_util.preprocess(row[0])
285 |         tokens = lang_util.tokenize(cur_example)
286 |         user_inputs.append(cur_example)
287 |         tokens_list.append(tokens)
288 |         if len(row) == 2:
289 |             intents.append(row[1].strip())
290 |         elif len(row) == 1:
291 |             intents.append(OFFTOPIC_LABEL)
292 |     if file_handle:
293 |         file_handle.close()
294 | 
295 |     test_df = pd.DataFrame(
296 |         data={"utterance": user_inputs, "intent": intents, "tokens": tokens_list}
297 |     )
298 |     return test_df
299 | 
300 | 
301 | def export_workspace(conversation, experiment_skill_id, export_path):
302 |     """
303 |     Export the workspace to target path
304 |     :param conversation: conversation object output by assistant api
305 |     :param experiment_skill_id: id of the experimental workspace
306 |     :param export_path: the path where the exported workspace will be saved
307 |     """
308 |     response = conversation.get_workspace(
309 |         skill_id=experiment_skill_id, export=True
310 |     ).get_result()
311 |     with open(export_path, "w+", encoding="utf-8") as outfile:
312 |         json.dump(response, outfile)
313 | 
314 | 
315 | def run_notebook(
316 |     notebook_path,
317 |     iam_apikey,
318 |     test_file,
319 |     output_path,
320 |     wksp_id=None,
321 |     assistant_id=None,
322 |     action_wksp_json_path=None,
323 | ):
324 |     """
325 |     Run notebook for end to end test
326 |     :param notebook_path:
327 |     :param uname:
328 |     :param pwd:
329 |     :param wksp_id:
330 |     :param assistant_id:
331 |     :param test_file:
332 |     :param action_wksp_json_path:
333 |     :param output_path:
334 |     """
335 |     notebook_name, _ = os.path.splitext(os.path.basename(notebook_path))
336 | 
337 |     with open(notebook_path) as f:
338 |         nb = nbformat.read(f, as_version=4)
339 |     nb, old_cred_text = _replace_nb_input(
340 |         nb, iam_apikey, test_file, wksp_id, assistant_id, action_wksp_json_path
341 |     )
342 |     # nb = _remove_experimentation(nb)
343 | 
344 |     proc = ExecutePreprocessor(timeout=60 * 60, kernel_name="python3")
345 |     proc.allow_errors = True
346 | 
347 |     proc.preprocess(nb, {"metadata": {"path": os.getcwd()}})
348 |     errors = []
349 |     for cell in nb.cells:
350 |         if "outputs" in cell:
351 |             for output in cell["outputs"]:
352 |                 if output.output_type == "error":
353 |                     errors.append(output)
354 |         if "source" in cell and "iam_apikey = " in cell["source"]:
355 |             cell["source"] = old_cred_text
356 | 
357 |     with open(output_path + ".ipynb", mode="wt") as f:
358 |         nbformat.write(nb, f)
359 |     return nb, errors
360 | 
361 | 
362 | def _replace_nb_input(
363 |     nb, apikey, test_file, wksp_id=None, assistant_id=None, action_wksp_json_path=None
364 | ):
365 |     """
366 |     Replace notebook interactive input for tests
367 |     :param nb:
368 |     :param uname:
369 |     :param pwd:
370 |     :param wksp_id:
371 |     :param assistant_id:
372 |     :param action_wksp_json_path:
373 |     :param test_file:
374 |     """
375 |     apikey_patt = "iam_apikey = "
376 |     wksp_id_patt = "skill_id = "
377 |     assistant_id_patt = "ASSISTANT_ID = "
378 |     action_wksp_json_patt = "SKILL_FILENAME = "
379 |     test_file_name_patt = "test_set_path = "
380 |     old_cred_text = ""
381 |     test_urls = '"' + DEV_DATACENTER[0] + '",' + '"' + DEV_DATACENTER[1] + '"'
382 |     for cell in nb.cells:
383 |         if "source" in cell and apikey_patt in cell["source"]:
384 |             old_cred_text = cell["source"]
385 |             text = re.sub(
386 |                 "(.*)\niam_apikey, (.*)", (r"\1\n#iam_apikey, \2"), cell["source"]
387 |             )  # comment out input_credentials
388 |             text = re.sub("datacenters\['dallas'\]", test_urls, text)
389 | 
390 |             text = re.sub(
391 |                 "(.*)#" + apikey_patt + "'###'(.*)",
392 |                 r"\1" + apikey_patt + "'" + apikey + "'" + r"\2",
393 |                 text,
394 |             )  # replace pwd
395 |             if wksp_id:
396 |                 text = re.sub(
397 |                     "(.*)#" + wksp_id_patt + "'###'(.*)",
398 |                     r"\1" + wksp_id_patt + "'" + wksp_id + "'" + r"\2",
399 |                     text,
400 |                 )  # replace wksp_id
401 |             if assistant_id:
402 |                 text = re.sub(
403 |                     "(.*)#" + assistant_id_patt + "'###'(.*)",
404 |                     r"\1" + assistant_id_patt + "'" + assistant_id + "'" + r"\2",
405 |                     text,
406 |                 )  # replace assistant_id
407 |             if action_wksp_json_path:
408 |                 text = re.sub(
409 |                     "(.*)" + action_wksp_json_patt + "'###'(.*)",
410 |                     r"\1"
411 |                     + action_wksp_json_patt
412 |                     + "'"
413 |                     + action_wksp_json_path
414 |                     + "'"
415 |                     + r"\2",
416 |                     text,
417 |                 )  # replace action workspace json path
418 |             cell["source"] = text
419 |         elif "source" in cell and test_file_name_patt in cell["source"]:
420 |             text = re.sub(
421 |                 "(.*)\n" + test_file_name_patt + '"./test.tsv"(.*)',
422 |                 r"\1\n" + test_file_name_patt + "'" + test_file + "'" + r"\2",
423 |                 cell["source"],
424 |             )  # replace test file
425 |             cell["source"] = text
426 |     return nb, old_cred_text
427 | 
428 | 
429 | def _remove_experimentation(nb):
430 |     """
431 |     Remove the experimentation session from end-to-end test
432 |     :param nb:
433 |     """
434 |     exp_patt = "Part 3: Experimentation"
435 |     new_nb_cells = []
436 |     for cell in nb.cells:
437 |         if (
438 |             cell.cell_type == "markdown"
439 |             and "source" in cell
440 |             and exp_patt in cell["source"]
441 |         ):
442 |             break
443 |         else:
444 |             new_nb_cells.append(cell)
445 |     nb.cells = new_nb_cells
446 |     return nb
447 | 
448 | 
449 | def retrieve_classifier_response(
450 |     conversation,
451 |     text_input,
452 |     alternate_intents=False,
453 |     user_id="256",
454 |     assistant_id=None,
455 |     skill_id=None,
456 |     environment_id=None,
457 | ):
458 |     """
459 |     retrieve classifier response
460 |     :param conversation: instance
461 |     :param skill_id: skill id
462 |     :param text_input: the input utterance
463 |     :param alternate_intents:
464 |     :param user_id:
465 |     :param assistant_id:
466 |     :param environment_id: environment id
467 |     :return response:
468 |     """
469 |     if isinstance(conversation, ibm_watson.AssistantV1):
470 |         assert skill_id is not None
471 |         response = conversation.message(
472 |             input={"message_type": "text", "text": text_input},
473 |             context={"metadata": {"user_id": user_id}},
474 |             workspace_id=skill_id,
475 |             alternate_intents=alternate_intents,
476 |         ).get_result()
477 |     else:
478 |         assert assistant_id is not None
479 |         response = conversation.message_stateless(
480 |             input={
481 |                 "message_type": "text",
482 |                 "text": text_input,
483 |                 "options": {"alternate_intents": alternate_intents},
484 |             },
485 |             context={"metadata": {"user_id": user_id}},
486 |             assistant_id=assistant_id,
487 |             environment_id=environment_id,
488 |         ).get_result()
489 |     return response
490 | 


--------------------------------------------------------------------------------
/assistant_skill_analysis/confidence_analysis/confidence_analyzer.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import seaborn as sns
  4 | import matplotlib.pyplot as plt
  5 | from IPython.display import display, Markdown
  6 | from ..utils.skills_util import OFFTOPIC_LABEL
  7 | 
  8 | OFFTOPIC_CNT_THRESHOLD_FOR_DISPLAY = 5
  9 | 
 10 | 
 11 | def abnormal_conf(full_results, correct_thresh, incorrect_thresh):
 12 |     """
 13 |     perform abnormal confidence analysis on prediction results on the test set
 14 |     :param full_results:
 15 |     :param correct_thresh:
 16 |     :param incorrect_thresh:
 17 |     :return:
 18 |     """
 19 |     test_pd = pd.DataFrame(full_results)
 20 |     test_pd = test_pd.loc[~(test_pd["correct_intent"] == "SYSTEM_OUT_OF_DOMAIN")]
 21 |     correct = test_pd.loc[test_pd["correct_intent"] == test_pd["top_intent"]]
 22 | 
 23 |     correct_low_conf = correct.loc[correct["top_confidence"] < correct_thresh]
 24 |     correct_low_conf = correct_low_conf[
 25 |         ["correct_intent", "utterance", "top_confidence", "top_intent"]
 26 |     ]
 27 | 
 28 |     incorrect = test_pd.loc[~(test_pd["correct_intent"] == test_pd["top_intent"])]
 29 |     incorrect_high_conf = incorrect.loc[incorrect["top_confidence"] > incorrect_thresh]
 30 | 
 31 |     top1 = list()
 32 |     top2 = list()
 33 |     top3 = list()
 34 | 
 35 |     for i in range(len(incorrect_high_conf)):
 36 |         possible_range = len(incorrect_high_conf.iloc[i, :]["top_predicts"])
 37 | 
 38 |         for j in range(3):
 39 |             if j == 0:
 40 |                 if possible_range >= 1:
 41 |                     top1.append(
 42 |                         incorrect_high_conf.iloc[i, :]["top_predicts"][j]["intent"]
 43 |                         + " "
 44 |                         + "("
 45 |                         + str(
 46 |                             np.round(
 47 |                                 incorrect_high_conf.iloc[i, :]["top_predicts"][j][
 48 |                                     "confidence"
 49 |                                 ],
 50 |                                 3,
 51 |                             )
 52 |                         )
 53 |                         + ")"
 54 |                     )
 55 |                 else:
 56 |                     top1.append("NA")
 57 |             if j == 1:
 58 |                 if possible_range >= 2:
 59 |                     top2.append(
 60 |                         incorrect_high_conf.iloc[i, :]["top_predicts"][j]["intent"]
 61 |                         + " "
 62 |                         + "("
 63 |                         + str(
 64 |                             np.round(
 65 |                                 incorrect_high_conf.iloc[i, :]["top_predicts"][j][
 66 |                                     "confidence"
 67 |                                 ],
 68 |                                 3,
 69 |                             )
 70 |                         )
 71 |                         + ")"
 72 |                     )
 73 |                 else:
 74 |                     top2.append("NA")
 75 |             if j == 2:
 76 |                 if possible_range >= 3:
 77 |                     top3.append(
 78 |                         incorrect_high_conf.iloc[i, :]["top_predicts"][j]["intent"]
 79 |                         + " "
 80 |                         + "("
 81 |                         + str(
 82 |                             np.round(
 83 |                                 incorrect_high_conf.iloc[i, :]["top_predicts"][j][
 84 |                                     "confidence"
 85 |                                 ],
 86 |                                 3,
 87 |                             )
 88 |                         )
 89 |                         + ")"
 90 |                     )
 91 |                 else:
 92 |                     top3.append("NA")
 93 | 
 94 |     incorrect_high_conf["top1_prediction"] = top1
 95 |     incorrect_high_conf["top2_prediction"] = top2
 96 |     incorrect_high_conf["top3_prediction"] = top3
 97 |     incorrect_high_conf = incorrect_high_conf[
 98 |         [
 99 |             "correct_intent",
100 |             "utterance",
101 |             "top1_prediction",
102 |             "top2_prediction",
103 |             "top3_prediction",
104 |         ]
105 |     ]
106 | 
107 |     return correct_low_conf, incorrect_high_conf
108 | 
109 | 
110 | def analysis(results, intent_list=None):
111 |     """
112 |     perform confidence analysis at the overall level or per intent basis
113 |     :param results:
114 |     :param intent_list:
115 |     :return:
116 |     """
117 | 
118 |     if not intent_list:
119 |         _display_analysis_metrics(True)
120 |         analysis_df = analysis_pipeline(results)
121 |         return analysis_df
122 | 
123 |     if len(intent_list) == 1 and intent_list[0] == "ALL_INTENTS":
124 |         intent_list = list(results["correct_intent"].unique())
125 |         if OFFTOPIC_LABEL in intent_list:
126 |             intent_list.remove(OFFTOPIC_LABEL)
127 |     analysis_df_list = list()
128 |     for intent_name in intent_list:
129 |         display(Markdown("### Threshold Analysis for Intent: {}".format(intent_name)))
130 |         analysis_df = analysis_pipeline(results, intent_name)
131 |         if all(analysis_df):
132 |             analysis_df.index = np.arange(1, len(analysis_df) + 1)
133 |             display(analysis_df)
134 |         analysis_df_list.append(analysis_df)
135 | 
136 |     return analysis_df_list
137 | 
138 | 
139 | def _display_analysis_metrics(display_far):
140 |     """display the explanation for analysis metrics"""
141 |     display(Markdown("### Threshold Metrics"))
142 |     display(
143 |         Markdown(
144 |             "We calculate metrics for responses where the top intent has a confidence above the \
145 |         threshold specified on the x-axis.  "
146 |         )
147 |     )
148 | 
149 |     display(
150 |         Markdown(
151 |             "We consider examples which are within the scope of the chatbot's problem formulation as \
152 |          on topic or in domain and those examples which are outside the scope of the problem to be \
153 |          out of domain or irrelevant"
154 |         )
155 |     )
156 | 
157 |     display(Markdown("#### 1) Thresholded On Topic Accuracy (TOA)"))
158 |     display(
159 |         Markdown(
160 |             "x-axis: Confidence threshold used || "
161 |             + "y-axis: Intent Detection Accuracy for On Topic utterances"
162 |         )
163 |     )
164 | 
165 |     display(Markdown("#### 2)  Bot Coverage %"))
166 |     display(
167 |         Markdown(
168 |             "x-axis: Confidence threshold used || "
169 |             + "y-axis: Fraction of All utterances above the threshold"
170 |         )
171 |     )
172 | 
173 |     if display_far:
174 |         display(
175 |             Markdown("#### 3) False Acceptance Rate for Out of Domain Examples (FAR)")
176 |         )
177 |         display(
178 |             Markdown(
179 |                 "x-axis: Confidence threshold used || "
180 |                 + "y-axis: Fraction of Out of Domain utterances falsely considered on topic"
181 |             )
182 |         )
183 | 
184 |     display(
185 |         Markdown(
186 |             "#### Note: Default acceptance threshold for Watson Assistant is set at 0.2.\
187 |         Utterances with top intent confidence < 0.2 will be considered irrelevant"
188 |         )
189 |     )
190 | 
191 | 
192 | def generate_unique_thresholds(sorted_results_tuples):
193 |     """
194 |     generate list of unique thresholds based off changes in confidence
195 |     and sorted list of unique confidences
196 |     :return: unique_thresholds
197 |     """
198 |     sort_uniq_confs = list(sorted(set([info[2] for info in sorted_results_tuples])))
199 |     thresholds = [0]
200 |     thresholds.extend(
201 |         [
202 |             (sort_uniq_confs[idx] + sort_uniq_confs[idx + 1]) / 2
203 |             for idx in range(len(sort_uniq_confs) - 1)
204 |         ]
205 |     )
206 |     return thresholds, sort_uniq_confs
207 | 
208 | 
209 | def _find_threshold(t, thresholds):
210 |     """
211 |     find the appropriate cut-off
212 |     :param t:
213 |     :param thresholds:
214 |     :return:
215 |     """
216 |     for index in range(len(thresholds) - 1):
217 |         if thresholds[index] <= t < thresholds[index + 1]:
218 |             return index
219 | 
220 |     return len(thresholds) - 1
221 | 
222 | 
223 | def _get_ontopic_accuracy_list(sorted_infos, thresholds):
224 |     """
225 |     generate the list of on-topic accuracy and on-topic counts
226 |     based on the list of thresholds
227 |     :param sorted_infos:
228 |     :param thresholds:
229 |     :return:
230 |     """
231 |     ontopic_infos = [info for info in sorted_infos if info[0] != OFFTOPIC_LABEL]
232 |     cor = len([info for info in ontopic_infos if info[0] == info[1]])
233 |     tol = len(ontopic_infos)
234 |     accuracy_list = list()
235 |     count_list = list()
236 |     current_step = 0
237 |     for t in thresholds:
238 |         while current_step < len(ontopic_infos):
239 | 
240 |             if ontopic_infos[current_step][2] < t:
241 |                 tol -= 1
242 |                 if ontopic_infos[current_step][0] == ontopic_infos[current_step][1]:
243 |                     cor -= 1
244 |             else:
245 |                 break
246 |             current_step += 1
247 |         accuracy_list.append(cor / tol)
248 |         count_list.append(cor)
249 | 
250 |     return accuracy_list, count_list
251 | 
252 | 
253 | def _get_bot_coverage_list(sorted_infos, thresholds):
254 |     """
255 |     generate the list of bot coverage ratio and bot coverage counts
256 |     based on the list of thresholds
257 |     :param sorted_infos:
258 |     :param thresholds:
259 |     :return:
260 |     """
261 |     tol = len(sorted_infos)
262 |     cur_bot_coverage = tol
263 |     bot_coverage_count_list = list()
264 |     bot_coverage_list = list()
265 |     current_step = 0
266 |     for t in thresholds:
267 |         while sorted_infos[current_step][2] < t:
268 |             cur_bot_coverage -= 1
269 |             current_step += 1
270 |         bot_coverage_count_list.append(cur_bot_coverage)
271 |         bot_coverage_list.append(cur_bot_coverage / tol)
272 |     return bot_coverage_list, bot_coverage_count_list
273 | 
274 | 
275 | def _get_far_list(sorted_infos, thresholds):
276 |     """
277 |     find the list of false acceptance rates and false acceptance counts
278 |     :param sorted_infos:
279 |     :param thresholds:
280 |     :return:
281 |     """
282 |     offtopic_infos = [info for info in sorted_infos if info[0] == OFFTOPIC_LABEL]
283 |     cur_fa_count = len(offtopic_infos)
284 |     tol = len(offtopic_infos)
285 |     far_list = list()
286 |     far_count = list()
287 |     current_step = 0
288 |     for t in thresholds:
289 |         while current_step < len(offtopic_infos):
290 |             if offtopic_infos[current_step][2] < t:
291 |                 cur_fa_count -= 1
292 |                 current_step += 1
293 |             else:
294 |                 break
295 |         far_list.append(cur_fa_count / tol)
296 |         far_count.append(cur_fa_count)
297 |     return far_list, far_count
298 | 
299 | 
300 | def _convert_data_format(results, intent_name=None):
301 |     """
302 |     convert the dataframe format to tuples of (ground_truth, prediction, confidence)
303 |     :param results: results dataframe
304 |     :param intent_name: optional parameter to allow different definition of offtopic label in per
305 |     intent cases
306 |     :return: result_list: list of tuples of (ground_truth, prediction, confidence) sorted by conf
307 |     """
308 |     if intent_name:
309 |         results = results[
310 |             (results["correct_intent"] == intent_name)
311 |             | (results["top_intent"] == intent_name)
312 |         ].copy()
313 | 
314 |         results["correct_intent"] = np.where(
315 |             (results["correct_intent"] != results["top_intent"])
316 |             & (results["top_intent"] == intent_name),
317 |             OFFTOPIC_LABEL,
318 |             results["correct_intent"],
319 |         )
320 | 
321 |         results_list = [
322 |             (gt, pred, conf)
323 |             for gt, pred, conf in zip(
324 |                 results["correct_intent"],
325 |                 results["top_intent"],
326 |                 results["top_confidence"],
327 |             )
328 |         ]
329 | 
330 |         results_list = sorted(results_list, key=lambda x: x[2])
331 | 
332 |     else:
333 |         results_list = [
334 |             (truth, prediction, confidence)
335 |             for truth, prediction, confidence in zip(
336 |                 results["correct_intent"],
337 |                 results["top_intent"],
338 |                 results["top_confidence"],
339 |             )
340 |         ]
341 |         results_list = sorted(results_list, key=lambda x: x[2])
342 | 
343 |     return results_list
344 | 
345 | 
346 | def extract_by_topic(sorted_results):
347 |     """
348 |     extract information by topics
349 |     :param sorted_results:
350 |     :return:
351 |     ontopic_infos, list
352 |     """
353 |     offtopic_infos = [
354 |         prediction for prediction in sorted_results if prediction[0] == OFFTOPIC_LABEL
355 |     ]
356 | 
357 |     ontopic_infos = [
358 |         prediction for prediction in sorted_results if prediction[0] != OFFTOPIC_LABEL
359 |     ]
360 | 
361 |     return ontopic_infos, offtopic_infos
362 | 
363 | 
364 | def analysis_pipeline(results, intent_name=None):
365 |     """
366 |     perform the operation of extraction of table analysis and produce threshold graph
367 |     :param results: list of tuples of (ground_truth, prediction, confidence) sorted by confidence
368 |     :param intent_name:
369 |     :return: analysis_df
370 |     """
371 |     sorted_results = _convert_data_format(results, intent_name=intent_name)
372 | 
373 |     ontopic_infos, offtopic_infos = extract_by_topic(sorted_results)
374 | 
375 |     # if ontopic counts or sorted results are less than 3, the graph will show almost no variation
376 |     # if all confidence of the predicted result are the same, there will be no variation
377 |     if (
378 |         len(ontopic_infos) < 3
379 |         or len(sorted_results) < 3
380 |         or all(ele[2] == sorted_results[0][2] for ele in sorted_results)
381 |     ):
382 |         display(Markdown("**Inadequate Data Points**: No analysis will be conducted"))
383 |         analysis_df = pd.DataFrame()
384 |         return analysis_df
385 | 
386 |     (
387 |         analysis_df,
388 |         toa_list,
389 |         bot_coverage_list,
390 |         far_list,
391 |         thresholds,
392 |     ) = extract_table_analysis(sorted_results, ontopic_infos, offtopic_infos)
393 | 
394 |     if not intent_name and not analysis_df.empty:
395 |         line_graph_data = pd.DataFrame(
396 |             data={
397 |                 "Thresholded On Topic Accuracy": toa_list,
398 |                 "Bot Coverage %": bot_coverage_list,
399 |                 "False Acceptance Rate (FAR) for Out of Domain Examples": far_list,
400 |             },
401 |             index=thresholds,
402 |         )
403 | 
404 |         create_threshold_graph(line_graph_data)
405 | 
406 |     return analysis_df
407 | 
408 | 
409 | def extract_table_analysis(sorted_results, ontopic_infos, offtopic_infos):
410 |     """
411 |     extract informations for table analysis
412 |     :param sorted_results:
413 |     :return:
414 |         analysis_df: pandas dataframe of the table for dispaly
415 |         toa_list: list of sorted on-topic accuracy
416 |         bot_coverage_list: list of sorted bot coverage ratio
417 |         far_list: list of sorted false acceptance rate
418 |         thresholds: list of sorted & unique thresholds
419 |     """
420 |     thresholds, sort_uniq_confs = generate_unique_thresholds(sorted_results)
421 | 
422 |     toa_list, toa_count = _get_ontopic_accuracy_list(sorted_results, thresholds)
423 |     bot_coverage_list, bot_coverage_count = _get_bot_coverage_list(
424 |         sorted_results, thresholds
425 |     )
426 | 
427 |     if len(offtopic_infos) >= OFFTOPIC_CNT_THRESHOLD_FOR_DISPLAY:
428 | 
429 |         far_list, _ = _get_far_list(sorted_results, thresholds)
430 |     else:
431 |         display(
432 |             Markdown(
433 |                 "Out of Domain examples fewer than **%d** thus \
434 |             no False Acceptance Rate (FAR) calculated"
435 |                 % OFFTOPIC_CNT_THRESHOLD_FOR_DISPLAY
436 |             )
437 |         )
438 |         far_list = [-1] * len(thresholds)
439 | 
440 |     analysis_df = create_display_table(
441 |         toa_list,
442 |         bot_coverage_list,
443 |         bot_coverage_count,
444 |         sorted_results,
445 |         thresholds,
446 |         offtopic_infos,
447 |         far_list,
448 |     )
449 | 
450 |     return analysis_df, toa_list, bot_coverage_list, far_list, thresholds
451 | 
452 | 
453 | def create_threshold_graph(data):
454 |     """
455 |     display threshold analysis graph
456 |     :param data:
457 |     :return: None
458 |     """
459 |     sns.set(rc={"figure.figsize": (20.7, 10.27)})
460 |     plt.ylim(0, 1.1)
461 |     plt.axvline(0.2, 0, 1)
462 |     plot = sns.lineplot(data=data, palette="tab10", linewidth=3.5)
463 |     plt.setp(plot.legend().get_texts(), fontsize="22")
464 |     plot.set_xlabel("Threshold T", fontsize=18)
465 |     plot.set_ylabel("Metrics mentioned above", fontsize=18)
466 | 
467 | 
468 | def create_display_table(
469 |     toa_list,
470 |     bot_coverage_list,
471 |     bot_coverage_count,
472 |     sorted_results,
473 |     thresholds,
474 |     offtopic_infos,
475 |     far_list,
476 | ):
477 |     """
478 |     create table for display purpose
479 |     :param toa_list:
480 |     :param bot_coverage_list:
481 |     :param bot_coverage_count:
482 |     :param sorted_results:
483 |     :param thresholds:
484 |     :param offtopic_infos:
485 |     :param far_list:
486 |     :return: analysis_df, pandas dataframe containing metrics at intervals of 10%
487 |     """
488 |     # produce the threhold quantiles for extraction of relevant information
489 |     display_thresholds = [t / 100 for t in range(0, 100, 10)]
490 |     display_indexes = [_find_threshold(t, thresholds) for t in display_thresholds]
491 | 
492 |     analysis_data = dict()
493 |     analysis_data["Threshold (T)"] = display_thresholds
494 |     analysis_data["Ontopic Accuracy (TOA)"] = [
495 |         toa_list[idx] * 100 for idx in display_indexes
496 |     ]
497 |     analysis_data["Bot Coverage %"] = [
498 |         bot_coverage_list[idx] * 100 for idx in display_indexes
499 |     ]
500 |     analysis_data["Bot Coverage Counts"] = [
501 |         str(np.round(bot_coverage_count[idx], decimals=0))
502 |         + " / "
503 |         + str(len(sorted_results))
504 |         for idx in display_indexes
505 |     ]
506 | 
507 |     if len(offtopic_infos) >= OFFTOPIC_CNT_THRESHOLD_FOR_DISPLAY:
508 |         analysis_data["False Acceptance Rate (FAR)"] = [
509 |             far_list[idx] * 100 for idx in display_indexes
510 |         ]
511 | 
512 |     analysis_df = pd.DataFrame(data=analysis_data)
513 |     return analysis_df
514 | 


--------------------------------------------------------------------------------