├── .bumpversion.cfg
├── .github
└── workflows
│ ├── build.yml
│ └── deploy.yml
├── .gitignore
├── .releaserc
├── .travis.yml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── notebook
├── Effectiveness Notebook.ipynb
├── IBM Watson Assistant Continuous Improvement Best Practices.pdf
├── Logs Notebook-cp4d.ipynb
├── Logs Notebook.ipynb
├── Measure Notebook-cp4d.ipynb
├── Measure Notebook.ipynb
├── README.md
├── data
│ ├── annotation.xlsx
│ ├── book_recommender_logs.gz
│ ├── book_recommender_skill.json
│ ├── sample_logs.json
│ └── workspace.json
└── imgs
│ ├── analyze_process.png
│ ├── box_zoom.png
│ ├── click.png
│ ├── effectiveness_overall.png
│ ├── effort_computation.png
│ ├── find_data_icon.png
│ ├── measure_overall.png
│ ├── measure_process.png
│ ├── reset.png
│ ├── save.png
│ ├── suggestions.png
│ └── wheel_zoom.png
├── package-lock.json
├── package.json
├── requirements.txt
├── requirements_dev.txt
├── setup.py
├── src
├── __init__.py
├── assistant_improve_toolkit
│ ├── __init__.py
│ ├── computation_func.py
│ ├── cos_op.py
│ ├── export_csv_for_intent_recommendation.py
│ ├── fetch_logs.py
│ ├── version.py
│ ├── visualize_func.py
│ └── watson_assistant_func.py
└── main
│ ├── __init__.py
│ ├── css
│ ├── custom.css
│ ├── custom_jupyter.css
│ └── custom_watson_studio.css
│ └── python
│ ├── __init__.py
│ ├── computation_func.py
│ ├── cos_op.py
│ ├── export_csv_for_intent_recommendation.py
│ ├── fetch_logs.py
│ ├── visualize_func.py
│ ├── watson_assistant_func.py
│ └── watson_assistant_func_skip.py
├── test
├── __init__.py
└── assistant_improve_toolkit
│ ├── __init__.py
│ └── test_computation_func.py
└── tox.ini
/.bumpversion.cfg:
--------------------------------------------------------------------------------
1 | [bumpversion]
2 | current_version = 1.4.1
3 | commit = True
4 |
5 | [bumpversion:file:src/assistant_improve_toolkit/version.py]
6 | search = __version__ = '{current_version}'
7 | replace = __version__ = '{new_version}'
8 |
9 | [bumpversion:file:setup.py]
10 | search = __version__ = '{current_version}'
11 | replace = __version__ = '{new_version}'
12 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | # This workflow uses actions that are not certified by GitHub.
2 | # They are provided by a third-party and are governed by
3 | # separate terms of service, privacy policy, and support documentation.
4 | # This workflow will download a prebuilt Python version, install dependencies, build and deploy/publish a new release
5 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
6 |
7 | name: Build and Test
8 |
9 | on:
10 | pull_request:
11 | # Sequence of patterns matched against refs/heads
12 | branches:
13 | - master
14 |
15 | jobs:
16 | test:
17 | name: Build and Test
18 | runs-on: ubuntu-latest
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | with:
23 | persist-credentials: false
24 | - name: Set up Python
25 | uses: actions/setup-python@v2
26 | with:
27 | python-version: '3.10'
28 | - name: Install dependencies
29 | run: |
30 | python -m pip install --upgrade pip
31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 | if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
33 | - name: Test with pytest
34 | run: |
35 | pytest
36 |
--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
1 | # This workflow uses actions that are not certified by GitHub.
2 | # They are provided by a third-party and are governed by
3 | # separate terms of service, privacy policy, and support documentation.
4 | # This workflow will download a prebuilt Python version, install dependencies, build and deploy/publish a new release
5 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
6 |
7 | name: Deploy and Publish
8 |
9 | on:
10 | push:
11 | branches:
12 | - master
13 |
14 | jobs:
15 | deploy:
16 | if: "!contains(github.event.head_commit.message, 'skip ci')"
17 | name: Deploy and Publish
18 | runs-on: ubuntu-latest
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | with:
23 | persist-credentials: false
24 | - name: Set up Python
25 | uses: actions/setup-python@v2
26 | with:
27 | python-version: '3.10'
28 | - name: Install dependencies
29 | run: |
30 | python -m pip install --upgrade pip
31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 | if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
33 | - name: Test with pytest
34 | run: |
35 | pytest
36 | - name: Setup Node
37 | uses: actions/setup-node@v1
38 | with:
39 | node-version: 18
40 | - name: Install Semantic Release dependencies
41 | run: |
42 | sudo apt-get install bumpversion
43 | npm install -g semantic-release
44 | npm install -g @semantic-release/changelog
45 | npm install -g @semantic-release/exec
46 | npm install -g @semantic-release/git
47 | npm install -g @semantic-release/github
48 | npm install -g @semantic-release/commit-analyzer
49 | npm install -g @semantic-release/release-notes-generator
50 | npm install -g semantic-release-pypi
51 | - name: Install setuptools
52 | run: python -m pip install --upgrade setuptools wheel twine
53 | - name: Publish to Git Releases and Tags
54 | run: npx semantic-release
55 | env:
56 | GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
57 | NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
58 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
59 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Java
2 | *.class
3 |
4 | build/
5 | liberty/
6 | target/
7 | dist/
8 |
9 | # Some MarkDown preview plugin generate .filename.html file that we want to ignore
10 | .*.html
11 |
12 | ### Gradle ###
13 | .gradle
14 | gradle-app.setting
15 |
16 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
17 | !gradle-wrapper.jar
18 |
19 | ### Eclipse ###
20 | .classpath
21 | .project
22 | .factorypath
23 | .settings/
24 | .metadata
25 | bin/
26 | logs/
27 |
28 | # Ignore the service manifest project (subtree) files
29 | manifest/
30 |
31 | ### OS X ###
32 | .DS_Store
33 |
34 | ### Intelli J
35 | .idea
36 | *.iws
37 |
38 |
39 | ### Generated files ###
40 | /manifest.yml
41 | generated-sources/
42 |
43 | ### Vagrant ###
44 | .vagrant
45 |
46 | ### Python ###
47 | *.pyc
48 | .pydevproject
49 | __pycache__/
50 | *.egg-info/
51 |
52 |
53 | ### Log files ###
54 | *.log
55 |
56 | ### test coverage ###
57 | .coverage
58 | nosetests.xml
59 |
60 | ### test resources ###
61 | *.gz
62 |
--------------------------------------------------------------------------------
/.releaserc:
--------------------------------------------------------------------------------
1 | {
2 | "debug": true,
3 | "plugins": [
4 | "@semantic-release/commit-analyzer",
5 | "@semantic-release/release-notes-generator",
6 | "@semantic-release/changelog",
7 | "semantic-release-pypi",
8 | [
9 | "@semantic-release/exec",
10 | {
11 | "prepareCmd": "bumpversion --allow-dirty --current-version ${lastRelease.version} --new-version ${nextRelease.version} patch"
12 | }
13 | ],
14 | [
15 | "@semantic-release/git",
16 | {
17 | "message": "chore(release): ${nextRelease.version} release notes\n\n${nextRelease.notes}"
18 | }
19 | ],
20 | "@semantic-release/github"
21 | ]
22 | }
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - 3.7
4 | - 3.8
5 | cache: pip
6 | before_install:
7 | #- python -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)"
8 | - npm install npm@latest -g
9 | install:
10 | - pip3 install tox-travis
11 | before_script:
12 | - pip3 install -r requirements.txt
13 | script:
14 | - pip3 install python-dotenv
15 | #- travis_wait tox
16 | before_deploy:
17 | - pip3 install bumpversion pypandoc
18 | - sudo apt-get update
19 | - sudo apt-get install pandoc
20 | - nvm install 12
21 | - npm install @semantic-release/changelog
22 | - npm install @semantic-release/exec
23 | - npm install @semantic-release/git
24 | - npm install @semantic-release/github
25 | deploy:
26 | - provider: script
27 | script: npx semantic-release
28 | skip_cleanup: true
29 | on:
30 | python: 3.8
31 | branch: master
32 | - provider: pypi
33 | user: "$PYPI_USER"
34 | password: "$PYPI_PASSWORD"
35 | server: https://upload.pypi.org/legacy/
36 | skip_cleanup: true
37 | on:
38 | python: 3.8
39 | tags: true
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## [1.4.1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.4.0...v1.4.1) (2023-07-08)
2 |
3 |
4 | ### Bug Fixes
5 |
6 | * remove outdated log filter for v2 api ([#155](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/155)) ([58a9407](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/58a9407e89a5ce2abf9fe2de5a82d7269c6be440))
7 |
8 | # [1.4.0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.9...v1.4.0) (2023-06-30)
9 |
10 |
11 | ### Features
12 |
13 | * support watson assistant sdk v2 ([#154](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/154)) ([0ec1075](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/0ec107522993a549f6d96cbd07a58d1d25920574))
14 |
15 | ## [1.3.9](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.8...v1.3.9) (2023-06-22)
16 |
17 |
18 | ### Bug Fixes
19 |
20 | * update node version ([#153](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/153)) ([4ec5aa8](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/4ec5aa8a3f9410655b6e34eeed4d7d192a3315e4))
21 | * update numpy version to be compatible with python 3.10 ([#152](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/152)) ([bbd5686](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/bbd568696f8f44ee3dff4e0a9f3cadb1d0b87083))
22 |
23 | ## [1.3.8](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.7...v1.3.8) (2022-06-14)
24 |
25 |
26 | ### Bug Fixes
27 |
28 | * update requests version to avoid dependency issues ([#148](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/148)) ([b9fbdde](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/b9fbdde0e241ff3d9189a423ed6ca4d90591976b))
29 |
30 | ## [1.3.7](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.6...v1.3.7) (2022-05-06)
31 |
32 |
33 | ### Bug Fixes
34 |
35 | * keep __version__ variable ([#147](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/147)) ([2714d14](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/2714d146b7e69669eed3474f3b700b3789084927))
36 | * remove version from setup.py ([#146](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/146)) ([2d62284](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/2d622842a4ec002fa340810fe745ef04987f161c))
37 |
38 |
39 | ### Reverts
40 |
41 | * Revert "chore (github_actions): deploy and build configs (#141)" (#144) ([68f0610](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/68f0610e0950d53e4732f428122669b7503353a3)), closes [#141](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/141) [#144](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/144)
42 |
43 | ## [1.3.6](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.5...v1.3.6) (2021-07-20)
44 |
45 |
46 | ### Bug Fixes
47 |
48 | * deploy config and setup ([27d8de0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/27d8de0b41e0a6df300551460d7ec665f34d3d8c))
49 | * drop reference to dialog_stack field and update deploy config ([c43c8cd](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/c43c8cd243c2779453434826c21054ec7a1ed6df))
50 |
51 | ## [1.3.5](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.4...v1.3.5) (2021-07-19)
52 |
53 |
54 | ### Bug Fixes
55 |
56 | * deploy config ([e045930](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/e045930686b85e870f2201d02850f86040b49788))
57 | * deploy config ([bf66c2b](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/bf66c2b9a5c0e4b2b92397360d481d3fd93aceae))
58 | * deploy config ([7e88da9](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/7e88da9fb48d35239518e81db87f50a52647d6e0))
59 | * deploy config ([ae7ccf9](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/ae7ccf959225b14a8b585eec26b3fa9fc4588bc1))
60 | * deploy config ([0c0096c](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/0c0096c214d43fa304479cd70db14fcca0564fae))
61 | * readme parsing for pypi ([a6fbbad](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/a6fbbaddd530b4a4487219a41819231df3456257))
62 |
63 | ## [1.3.4](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.3...v1.3.4) (2021-07-17)
64 |
65 |
66 | ### Bug Fixes
67 |
68 | * deploy config ([244fe3f](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/244fe3fbe04e274997d912d501cbc6cf6ab596d9))
69 |
70 | ## [1.3.3](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.2...v1.3.3) (2021-07-16)
71 |
72 |
73 | ### Bug Fixes
74 |
75 | * add github actions settings ([ed00f97](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/ed00f9700113e01fc096c20d8a80073b9e185511))
76 | * add github actions settings ([320bfd2](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/320bfd2e915df1655759c97e0c03d31e55c88880))
77 | * deploy config ([63df0d8](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/63df0d8fd12d9898ef0b20b0754b24dcb8a929eb))
78 | * deploy config ([358fe32](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/358fe32dacddf66576111fcabc076a1a188ffc8d))
79 | * deploy config ([a3b5d7e](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/a3b5d7ea4542c9d81a2a748efa2caa5689ce0a0a))
80 | * deploy file ([13495c8](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/13495c890d0e9e3da68180507300419b82b69c12))
81 | * release version number ([be2b821](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/be2b8212cbf95c5ae29edface6857e3387ef4de5))
82 | * release version number ([9f2cbdc](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/9f2cbdc4385899604d22a31c4c5447b342c23730))
83 | * remove response_dialog fiedlds ([923cdf6](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/923cdf654dd25bd6526a2ea8c3a48106c34a2244))
84 | * remove response_dialog fields ([b09f905](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/b09f905bb32979af15b620ca6741c9fb0d7e241c))
85 | * remove response_dialog fields ([2c0b91a](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/2c0b91a50f9c29e8dbec68aebf4eb2c64d4760c3))
86 |
87 | ## [1.3.2](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.1...v1.3.2) (2021-07-15)
88 |
89 |
90 | ### Bug Fixes
91 |
92 | * Remove response_dialog fields ([#119](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/pull/119)) ([2c0b91a50f](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commits/2c0b91a50f9c29e8dbec68aebf4eb2c64d4760c3))
93 |
94 | ## [1.3.1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.0...v1.3.1) (2021-06-09)
95 |
96 |
97 | ### Bug Fixes
98 |
99 | * update requirement and fix typos in notebooks ([#113](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/113)) ([bc96b95](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/bc96b95e4deae6b7b5ad77ba066b5dc936669ab6))
100 |
101 | # [1.3.0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.2.3...v1.3.0) (2021-06-04)
102 |
103 |
104 | ### Features
105 |
106 | * log notebook for cp4d ([#111](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/111)) ([526f8f1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/526f8f1d3bc2a1e1f6bc4404459684073bbdf3e4))
107 |
108 | ## [1.2.3](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.2.2...v1.2.3) (2021-06-03)
109 |
110 |
111 | ### Bug Fixes
112 |
113 | * fix a bug ([#107](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/107)) ([dd7aca8](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/dd7aca876b2f98a2547b87f31181824c4a3fd7f1))
114 |
115 | ## [1.2.2](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.2.1...v1.2.2) (2021-06-03)
116 |
117 |
118 | ### Bug Fixes
119 |
120 | * add handler for nan value in context field ([#105](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/105)) ([41e68ef](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/41e68effbfbb55afb79e9f6daf1725fb92b553bc))
121 |
122 | ## [1.2.1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.2.0...v1.2.1) (2021-05-27)
123 |
124 |
125 | ### Bug Fixes
126 |
127 | * support cp4d format ([#102](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/102)) ([1e75b80](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/1e75b804a75d3d0745395371405e4d881cf953fa))
128 |
129 | # [1.2.0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.6...v1.2.0) (2021-05-21)
130 |
131 |
132 | ### Features
133 |
134 | * add support for assistant v2 log apis ([#100](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/100)) ([2c8d7bb](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/2c8d7bb3fab4f4dad4a7ba3849aa3fc2763c803d))
135 |
136 | ## [1.1.6](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.5...v1.1.6) (2020-09-02)
137 |
138 |
139 | ### Bug Fixes
140 |
141 | * fix visualization with 'week' interval ([#96](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/96)) ([fa0d9e0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/fa0d9e0fae5eb7901a9b77b580d41c940451f543))
142 |
143 | ## [1.1.5](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.4...v1.1.5) (2020-09-01)
144 |
145 |
146 | ### Bug Fixes
147 |
148 | * fix a bug when calculating coverage ([#95](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/95)) ([06ba033](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/06ba03380c1f309309f3e7cb484d736b7d3df1c3))
149 |
150 | ## [1.1.4](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.3...v1.1.4) (2020-08-24)
151 |
152 |
153 | ### Bug Fixes
154 |
155 | * delete watson_assistant_func_skip.py ([0da1456](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/0da145654157796a58a50d6a90348ff6b32478a2))
156 |
157 | ## [1.1.3](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.2...v1.1.3) (2020-08-22)
158 |
159 |
160 | ### Bug Fixes
161 |
162 | * fix a bug in show disambiguation click api ([#93](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/93)) ([a7eddca](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/a7eddca0b55f9520e1438a646416937ddfc3458e))
163 |
164 | ## [1.1.2](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.1...v1.1.2) (2020-08-21)
165 |
166 |
167 | ### Bug Fixes
168 |
169 | * remove tqdm for watson studio support ([#91](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/91)) ([e183b0d](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/e183b0d36a82660484cb3516d4f34b04ee0897e2))
170 |
171 | ## [1.1.1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.0...v1.1.1) (2020-08-21)
172 |
173 |
174 | ### Bug Fixes
175 |
176 | * fix pypi url ([#90](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/90)) ([b0f39f4](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/b0f39f45341729eba954cfdb387abfddaa91ec0f))
177 |
178 | # [1.1.0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.0.1...v1.1.0) (2020-08-21)
179 |
180 |
181 | ### Features
182 |
183 | * improve ReadMe.md to include all of the related notebooks ([#89](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/89)) ([68c3325](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/68c3325ddfc84e5866728360880f4dca5991303e))
184 |
185 | ## [1.0.1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.0.0...v1.0.1) (2020-08-17)
186 |
187 |
188 | ### Bug Fixes
189 |
190 | * releaserc ([eb6b2b2](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/eb6b2b260ef7bb43aaa3aab5cbb9b992c3f5049d))
191 |
192 | # 1.0.0 (2020-08-17)
193 |
194 |
195 | ### Bug Fixes
196 |
197 | * bumpversion.cfg ([143460a](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/143460ae166940d041fa674d4e771e1b7e9e2305))
198 | * change release branch ([#88](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/88)) ([a211584](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/a211584dfff38db35e374a03b4835f3ff1d1fea8))
199 | * current version error ([d3aac8c](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/d3aac8c9355822308964a51f79f527ab835ae442))
200 | * version file path ([4b7d0fa](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/4b7d0fa20859c72288945ed9ae9c70ee5756bacd))
201 |
202 |
203 | ### Features
204 |
205 | * Add Customer Effort Analysis notebook ([#83](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/83)) ([593d38d](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/593d38d2ec5595e758e33b96dbc02440ea059bab))
206 | * refactor for assistant improve toolkit ([#87](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/87)) ([f5d6e7e](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/f5d6e7e54661f761f33b20ebfa9e0f8f234c84c2))
207 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | ## Issues
4 |
5 | If you encounter an issue, start by searching through the list of [issues](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues) and active [pull requests](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/pulls) to see if anyone else has raised a similar issue.
6 |
7 | If you don't see an issue listed please submit a [new issue](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/new/choose). Make sure to provide sufficient information on your environment and how to reproduce the issue.
8 |
9 | ## Contributing code
10 |
11 | Please sign our [Contributor License Agreement (CLA)](https://cla-assistant.io/watson-developer-cloud/assistant-improve-recommendations-notebook) before sending PRs
12 |
13 | * **If your contribution is minor,** such as a bug fix, open a pull request.
14 | * **If your contribution is major,** such as a new feature, start by opening an issue first. Others can then weigh in before you commence any work.
15 |
16 | ## Submittion Guideline
17 | 1. Fork the repo
18 | 2. Create a local branch for your change, e.g. `git checkout -b my-new-feature-branch`
19 | 3. Test your changes
20 | 4. Commit your changes to local branch, e.g. `git commit -m "feat: my new feature"`. (see instructions below re: commit message)
21 | 5. Push your changes to remote `git push -u origin my-new-feature-branch`
22 | 6. From github, create a PR from your fork to this repo
23 |
24 | ### Format your commit message to properly document and trigger the semantic release
25 | * feat: A new feature
26 | * fix: A bug fix
27 | * docs: Documentation only changes
28 | * style: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons etc)
29 | * refactor: A code change that neither fixes a bug, not adds a feature
30 | * perf: A code change that improves performance
31 | * test: Adding missing tests
32 | * chore: Changes to the build process or auxiliary tools and libraries such as documentation generation
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Watson Assistant Improve Notebooks
2 |
3 | [](https://travis-ci.org/github/watson-developer-cloud/assistant-improve-recommendations-notebook)
4 | [](https://wdc-slack-inviter.mybluemix.net)
5 | [](https://pypi.org/project/assistant-improve-toolkit/)
6 | [](https://cla-assistant.io/watson-developer-cloud/assistant-improve-recommendations-notebook)
7 |
8 | This repository houses Watson Assistant Improve notebooks and the underlying assistant improve toolkit library.
9 |
10 | ## Introduction
11 | To help improving your Watson Assistant after you have deployed it to production, we prepared the following Jupyter notebooks. These notebooks include practical steps for measuring, analyzing, and actively improving your assistant in a continuous manner. Check out [IBM Watson Assistant Continuous Improvement Best Practices](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/raw/master/notebook/IBM%20Watson%20Assistant%20Continuous%20Improvement%20Best%20Practices.pdf) for more details.
12 |
13 | - __Measure notebook__ contains a set of automated metrics that help you monitor and understand the behavior of your system. The goal is to understand where your assistant is doing well vs where it isn’t, and to focus your improvement effort to one of the problem areas identified. This notebook generates an assessment spreadsheet for you to use to label problematic conversations, and then feed to the Effectiveness notebook.
14 |
15 | - __Effectiveness notebook__ helps you understand the relative performance of each intent and entity as well as the confusion between your intents. This information helps you prioritize your improvement effort. The input to this notebook is an assessment spreadsheet generated from the Measure notebook. Update the marked columns in the spreadsheet with your labels and load it into the Effectiveness notebook for analysis.
16 |
17 | - __Logs notebook__ helps you fetch logs using Watson Assistant API. You can fetch logs with various filters, and save them as a JSON file, or export the utterances in the logs into a CSV file. The JSON file can be loaded into the Measure notebook. The CSV file can be used for [intent recommendation service](https://cloud.ibm.com/docs/assistant?topic=assistant-intent-recommendations#intent-recommendations-get-intent-recommendations-task). Alternatively, you can run python scripts [`fetch_logs`](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/blob/master/src/main/python/fetch_logs.py) and [`export_csv_for_intent_recommendation`](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/blob/master/src/main/python/export_csv_for_intent_recommendation.py) to fetch logs and export them to [intent recommendation CSV](https://cloud.ibm.com/docs/assistant?topic=assistant-intent-recommendations#intent-recommendations-data-resources), respectively. Run `python get_logs -h` and `python export_csv_for_intent_recommendation.py -h` for usage.
18 |
19 | - __Dialog Flow Analysis notebook__ help you assess and analyze user journeys and issues related to the dialog flow of ineffective (low quality) conversations based on production logs. Check out [Dialog Flow Analysis](https://github.com/watson-developer-cloud/assistant-dialog-flow-analysis) for more details.
20 |
21 | - __Dialog Skill Analysis notebook__ help you analyze characteristics of your data such as the number of training examples for each intent or the terms which seem to be characteristic of a specific intent. Check out [Dialog Skill Analysis](https://github.com/watson-developer-cloud/assistant-dialog-skill-analysis) for more details.
22 |
23 | ## Getting Started
24 |
25 | You can either run the notebooks locally or in [IBM Watson Studio](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/notebooks-parent.html).
26 |
27 | - **Run locally**
28 |
29 | 1. Install Jupyter Notebook, see [Jupyter/IPython Notebook Quick Start Guide](https://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/install.html) for more details.
30 | 2. Download the Jupyter notebooks available in this repository's [notebook](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/tree/master/notebook) directory. __Note: These notebook files are not designed for Watson Studio environment__
31 | 3. Start jupyter server `jupyter notebook`
32 | 4. Follow the instructions in each of the notebooks. Be sure to add your Watson Assistant credentials if necessary.
33 |
34 | - **Run in Watson Studio**
35 |
36 | 1. Create a Watson Studio account.
37 | Sign up in [Watson Studio](https://www.ibm.com/cloud/watson-studio), or use an existing account. Lite plan is free to use.
38 |
39 | 2. Create a new project and add a Cloud Object Storage (COS) account.
40 | For more information regarding COS plans, see [Pricing](https://www.ibm.com/cloud-computing/bluemix/pricing-object-storage).
41 |
42 | 3. Copy [Measure](https://dataplatform.cloud.ibm.com/exchange/public/entry/view/133dfc4cd1480bbe4eaa78d3f635e568) or [Effectiveness](https://dataplatform.cloud.ibm.com/exchange/public/entry/view/133dfc4cd1480bbe4eaa78d3f636921c) notebook from Watson Studio community into your project.
43 |
44 | 4. Follow the instructions in each notebook to add project tokens and Watson Assistant credentials if necessary.
45 |
46 | ## Guides
47 | * Learn more about our measure and effectiveness notebook on Medium: [Continuously Improve Your Watson Assistant with Jupyter Notebooks](https://medium.com/ibm-watson/continuously-improve-your-watson-assistant-with-jupiter-notebooks-60231df4f01f)
48 |
49 | ## Contributing
50 | See [CONTRIBUTING.md](CONTRIBUTING.md) for more details on how to contribute
51 |
52 | ## License
53 | This library is licensed under the [Apache 2.0 license](http://www.apache.org/licenses/LICENSE-2.0).
54 |
55 |
--------------------------------------------------------------------------------
/notebook/IBM Watson Assistant Continuous Improvement Best Practices.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/IBM Watson Assistant Continuous Improvement Best Practices.pdf
--------------------------------------------------------------------------------
/notebook/Logs Notebook-cp4d.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Waston Assistant Logs Notebook\n",
8 | "### IBM Cloud Pak for Data version"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "## Introduction\n",
16 | "This notebook demonstrates how to download Watson Assistant user-generated logs based on different criteria.\n",
17 | "\n",
18 | "### Programming language and environment\n",
19 | "Some familiarity with Python is recommended. This notebook runs on Python 3.7+"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "\n",
27 | "## 1. Configuration and Setup\n",
28 | "\n",
29 | "In this section, we add data and workspace access credentials, import required libraries and functions.\n",
30 | "\n",
31 | "### 1.1 Install Assistant Improve Toolkit"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {
38 | "scrolled": true
39 | },
40 | "outputs": [],
41 | "source": [
42 | "!pip install --user --upgrade \"assistant-improve-toolkit\";"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "### 1.2 Import functions used in the notebook"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "# Import Watson Assistant related functions\n",
59 | "from ibm_cloud_sdk_core.authenticators import IAMAuthenticator\n",
60 | "from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator\n",
61 | "import pandas as pd\n",
62 | "import json\n",
63 | "from ibm_watson import AssistantV1, AssistantV2\n",
64 | "\n",
65 | "from assistant_improve_toolkit.watson_assistant_func import get_logs\n",
66 | "from assistant_improve_toolkit.watson_assistant_func import get_assistant_definition\n",
67 | "from assistant_improve_toolkit.watson_assistant_func import load_logs_from_file\n",
68 | "from assistant_improve_toolkit.watson_assistant_func import export_csv_for_intent_recommendation"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "## 2. Load and format data "
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {
81 | "pycharm": {
82 | "name": "#%% md\n"
83 | }
84 | },
85 | "source": [
86 | "### 2.1 Add Watson Assistant configuration\n",
87 | "\n",
88 | "The notebook uses `CloudPakForDataAuthenticator` to authenticate the APIs.\n",
89 | "\n",
90 | "- Replace `username` and `password` with your Cloud Pak for Data credentials\n",
91 | "- `base_url` is the base URL of your instance. It is in the format of `https://{cpd_cluster_host}{:port}/icp4d-api`\n",
92 | "- The string to set for version is a date in the format version=YYYY-MM-DD. The version date string determines which version of the Watson Assistant v1/v2 API will be called. For more information about version, see [Versioning](https://cloud.ibm.com/apidocs/assistant-data-v1?code=python#versioning)\n",
93 | "- The string to pass into `assistant.set_service_url` is the service URL of your Watson Assistant. The URL follows this pattern: `https://{cpd_cluster_host}{:port}/assistant/{release}/instances/{instance_id}/api`. To find this URL, view the details for the service instance from the Cloud Pak for Data web client. For more information, see [Service Endpoint](https://cloud.ibm.com/apidocs/assistant-data-v1?code=python#service-endpoint)\n",
94 | "\n",
95 | "The notebook requires initializing both v1 API instance `sdk_v1_object` and v2 API instance `sdk_v2_object`."
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "# Provide credentials to connect to assistant\n",
105 | "# Set disable_ssl_verification=True for self-signed certificate\n",
106 | "authenticator = CloudPakForDataAuthenticator(\n",
107 | " username='username',\n",
108 | " password='password',\n",
109 | " url='base_url',\n",
110 | " disable_ssl_verification=False\n",
111 | ")\n",
112 | "\n",
113 | "# Initialize v1 API instance\n",
114 | "sdk_v1_object = AssistantV1(version='2020-04-01', authenticator = authenticator)\n",
115 | "sdk_v1_object.set_service_url('service_url')\n",
116 | "\n",
117 | "# Initialize v2 API instance\n",
118 | "sdk_v2_object = AssistantV2(version='2020-09-24', authenticator = authenticator)\n",
119 | "sdk_v2_object.set_service_url('service_url')\n",
120 | "\n",
121 | "# Set set_disable_ssl_verification to True for self-signed certificate\n",
122 | "# sdk_v1_object.set_disable_ssl_verification(True)\n",
123 | "# sdk_v2_object.set_disable_ssl_verification(True)"
124 | ]
125 | },
126 | {
127 | "cell_type": "markdown",
128 | "metadata": {},
129 | "source": [
130 | "Add the information of your assistant. To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill ID. To locate your assistant ID, open the assistant settings and click __API Details__. To location your workspace ID or skill ID, go to the Skills page and select __View API Details__ from the menu of a skill tile. If you are using versioning in Watson Assistant, this ID represents the Development version of your skill definition."
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {},
137 | "outputs": [],
138 | "source": [
139 | "assistant_information = {'workspace_id' : '',\n",
140 | " 'skill_id' : '',\n",
141 | " 'assistant_id' : ''}"
142 | ]
143 | },
144 | {
145 | "cell_type": "markdown",
146 | "metadata": {},
147 | "source": [
148 | "### 2.2 Fetch and load logs\n",
149 | "\n",
150 | "- `num_logs`: number of logs to fetch\n",
151 | "- Use `filename` to specify if logs are saved as a JSON file (default: `None`)\n",
152 | "- Apply `filters` while fetching logs (default: `[]`), e.g.,\n",
153 | " - removing empty input: `meta.summary.input_text_length_i>0`\n",
154 | " - fetching logs generated after a timestamp: `response_timestamp>=2018-09-18`\n",
155 | " \n",
156 | " Refer to [Filter query reference](https://cloud.ibm.com/docs/services/assistant?topic=assistant-filter-reference) for\n",
157 | " more information.\n",
158 | "- Use `project` to specify project when using Watson Studio (default: `None`)\n",
159 | "- Use `overwrite` to overwrite if `filename` exists (default: `False`)\n"
160 | ]
161 | },
162 | {
163 | "cell_type": "markdown",
164 | "metadata": {},
165 | "source": [
166 | "__A. Download all logs for a period of time (and save as a JSON file for Measure notebook)__"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": null,
172 | "metadata": {},
173 | "outputs": [],
174 | "source": [
175 | "# Add filter queries\n",
176 | "filters = ['language::en', # Logs in English\n",
177 | " 'meta.summary.input_text_length_i>0', # Logs with non empty input \n",
178 | " 'response_timestamp>=2020-03-01'] # Logs with response timestamp later or equal to 2020-03-01\n",
179 | "\n",
180 | "# Query 20,000 logs\n",
181 | "filename = 'logs.json'\n",
182 | "\n",
183 | "# Fetch 20,000 logs, set `overwrite` to True to reload logs, set version=2 to use v2 log apis\n",
184 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
185 | " sdk_v2_object=sdk_v2_object,\n",
186 | " assistant_info=assistant_information,\n",
187 | " num_logs=20000,\n",
188 | " filename=filename,\n",
189 | " filters=filters,\n",
190 | " overwrite=True,\n",
191 | " project=None,\n",
192 | " version=2)"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "__B. Download and export logs for intent recommendation__\n",
200 | "\n",
201 | "For intent recommendation, by default, an utterance is considered only when:\n",
202 | "- It is the first user utterance in each conversation\n",
203 | "- its confidence `response.intents::confidence` is between 0.1 and 0.6 (exclusive),\n",
204 | "- its token count is between 3 and 20 (exclusive), and\n",
205 | "- it is not a duplicate of the other utterances in the logs.\n",
206 | "\n",
207 | "This example adds confidence filters when calling `get_logs`, and then exports the utterances to a CSV file by calling\n",
208 | "`export_csv_for_intent_recommendation` with token count filter and dedeplication applied.\n"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {
215 | "pycharm": {
216 | "name": "#%%\n"
217 | }
218 | },
219 | "outputs": [],
220 | "source": [
221 | "# Add filter queries\n",
222 | "filters = ['language::en', # Logs in English\n",
223 | " 'request.context.system.dialog_turn_counter::1', # The first user utterance in each conversation\n",
224 | " 'response.intents:confidence<0.6', # filter out high intent confidence utterance\n",
225 | " 'response.intents:confidence>0.1', # filter out low intent confidnce utterance\n",
226 | " ]\n",
227 | "\n",
228 | "# Query 20,000 logs using filename 'log_first_utterances.json'\n",
229 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
230 | " sdk_v2_object=sdk_v2_object,\n",
231 | " assistant_info=assistant_information,\n",
232 | " num_logs=20000,\n",
233 | " filename='log_for_intent_recommendation.json',\n",
234 | " filters=filters,\n",
235 | " overwrite=True,\n",
236 | " version=2)\n",
237 | "\n",
238 | "# Or, load previously saved logs.\n",
239 | "logs = load_logs_from_file(filename='log_for_intent_recommendation.json')"
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "metadata": {},
245 | "source": [
246 | "Export logs to a CSV file for intent recommendation\n",
247 | "\n",
248 | "- `logs`: the logs object from `get_logs` or `load_logs_from_file`\n",
249 | "- `filename`: the CSV output filename\n",
250 | "- Use `deduplicate` to specify if duplicate messages should be removed (default: `True`)\n",
251 | "- Use `project` to specify project when using Watson Studio (default: `None`)\n",
252 | "- Use `overwrite` to overwrite if `filename` exists (default: `False`)\n",
253 | "- Use `min_length` to filter out utterances that are less than certain number of tokens (exclusive, default: `3`)\n",
254 | "- Use `max_length` to filter out utterances that are more than certain number of tokens (exclusive, default: `20`)"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": null,
260 | "metadata": {
261 | "pycharm": {
262 | "name": "#%%\n"
263 | }
264 | },
265 | "outputs": [],
266 | "source": [
267 | "export_csv_for_intent_recommendation(logs,\n",
268 | " filename='log_for_intent_recommendation.csv',\n",
269 | " deduplicate=True,\n",
270 | " min_length=3,\n",
271 | " max_length=20,\n",
272 | " overwrite=True)"
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {},
278 | "source": [
279 | "__C. More examples__\n",
280 | "\n",
281 | "Download logs of the first user utterance in each conversation for a period of time"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": null,
287 | "metadata": {},
288 | "outputs": [],
289 | "source": [
290 | "# Add filter queries\n",
291 | "filters = ['language::en', # Logs in English \n",
292 | " 'request.context.system.dialog_turn_counter::1', # The first user utterance in each conversation\n",
293 | " 'response_timestamp>=2020-03-01'] # Logs with response timestamp later or equal to 2020-03-01\n",
294 | "\n",
295 | "# Query 20,000 logs using filename 'log_first_utterances.json'\n",
296 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
297 | " sdk_v2_object=sdk_v2_object,\n",
298 | " assistant_info=assistant_information,\n",
299 | " num_logs=20000,\n",
300 | " filename='log_first_utterances.json',\n",
301 | " filters=filters,\n",
302 | " overwrite=True,\n",
303 | " version=2)"
304 | ]
305 | },
306 | {
307 | "cell_type": "markdown",
308 | "metadata": {},
309 | "source": [
310 | "Download logs containing specific input text"
311 | ]
312 | },
313 | {
314 | "cell_type": "code",
315 | "execution_count": null,
316 | "metadata": {
317 | "pycharm": {
318 | "name": "#%%\n"
319 | }
320 | },
321 | "outputs": [],
322 | "source": [
323 | "# Add filter queries\n",
324 | "filters = ['language::en', # Logs in English\n",
325 | " 'request.input.text::\"Is there an article on how to make cherry pie?\"'] # Logs with input text: \"Is there an article on how to make cherry pie?\"\n",
326 | "\n",
327 | "# Query 20,000 logs using filename 'log_input.json'\n",
328 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
329 | " sdk_v2_object=sdk_v2_object,\n",
330 | " assistant_info=assistant_information,\n",
331 | " num_logs=20000,\n",
332 | " filename='log_input.json',\n",
333 | " filters=filters,\n",
334 | " overwrite=True,\n",
335 | " version=2)"
336 | ]
337 | },
338 | {
339 | "cell_type": "markdown",
340 | "metadata": {},
341 | "source": [
342 | "Download logs trigging specific intent"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": null,
348 | "metadata": {},
349 | "outputs": [],
350 | "source": [
351 | "# Add filter queries\n",
352 | "filters = ['language::en', # Logs in English\n",
353 | " 'response.intents:intent::\"article_food\"'] # Intent been triggered: article_food\n",
354 | "# Query 20,000 logs using filename log_intent.json\n",
355 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
356 | " sdk_v2_object=sdk_v2_object,\n",
357 | " assistant_info=assistant_information,\n",
358 | " num_logs=20000,\n",
359 | " filename='log_intent.json',\n",
360 | " filters=filters,\n",
361 | " overwrite=True,\n",
362 | " version=2)"
363 | ]
364 | },
365 | {
366 | "cell_type": "markdown",
367 | "metadata": {},
368 | "source": [
369 | "Download logs trigging specific intent with a confidence range"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": null,
375 | "metadata": {},
376 | "outputs": [],
377 | "source": [
378 | "# Add filter queries\n",
379 | "filters = ['language::en', # Logs in English\n",
380 | " 'response.intents:(intent:article_food,confidence<0.25)'] # Intent been triggered: article_food with confidence below 0.25\n",
381 | "# Query 20,000 logs using filename log_intent.json\n",
382 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
383 | " sdk_v2_object=sdk_v2_object,\n",
384 | " assistant_info=assistant_information,\n",
385 | " num_logs=20000,\n",
386 | " filename='log_intent_confidence.json',\n",
387 | " filters=filters,\n",
388 | " overwrite=True,\n",
389 | " version=2)"
390 | ]
391 | },
392 | {
393 | "cell_type": "markdown",
394 | "metadata": {},
395 | "source": [
396 | "Download logs visited specific node"
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": null,
402 | "metadata": {},
403 | "outputs": [],
404 | "source": [
405 | "# Fetch assistant definition and save to a JSON file\n",
406 | "df_assistant = get_assistant_definition(sdk_v1_object, assistant_information, filename='assistant_definition.json')\n",
407 | "\n",
408 | "# Get all intents\n",
409 | "assistant_intents = [intent['intent'] for intent in df_assistant['intents'].values[0]] \n",
410 | "\n",
411 | "# Get all dialog nodes\n",
412 | "assistant_nodes = pd.DataFrame(df_assistant['dialog_nodes'].values[0])\n",
413 | "\n",
414 | "# Find mappings betweeen node name and node id\n",
415 | "node_title_map = dict()\n",
416 | "for idx, node in assistant_nodes.iterrows():\n",
417 | " if str(node['title']) != 'nan':\n",
418 | " node_title_map[node['title']] = node['dialog_node']\n",
419 | "node_df = pd.DataFrame(node_title_map.items())\n",
420 | "node_df.columns = {'node_name', 'node_id'}\n",
421 | "\n",
422 | "# Add filter queries\n",
423 | "intent_name = 'book_short_dialog'\n",
424 | "if intent_name in node_title_map:\n",
425 | " filters = ['language::en', # Logs in English\n",
426 | " 'response.output:nodes_visited::[{}]'.format(node_title_map[intent_name])] # Visited node: book_short_dialog\n",
427 | " # Query 20,000 logs using filename log_node.json\n",
428 | " logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
429 | " sdk_v2_object=sdk_v2_object,\n",
430 | " assistant_info=assistant_information,\n",
431 | " num_logs=20000,\n",
432 | " filename='log_node.json',\n",
433 | " filters=filters,\n",
434 | " overwrite=True,\n",
435 | " version=2)\n",
436 | "else:\n",
437 | " print('Cannot find {} in skill definition.'.format(intent_name))"
438 | ]
439 | },
440 | {
441 | "cell_type": "markdown",
442 | "metadata": {},
443 | "source": [
444 | "Copyright © 2021 IBM. This notebook and its source code are released under the terms of the MIT License."
445 | ]
446 | }
447 | ],
448 | "metadata": {
449 | "kernelspec": {
450 | "display_name": "Python 3",
451 | "language": "python",
452 | "name": "python3"
453 | },
454 | "language_info": {
455 | "codemirror_mode": {
456 | "name": "ipython",
457 | "version": 3
458 | },
459 | "file_extension": ".py",
460 | "mimetype": "text/x-python",
461 | "name": "python",
462 | "nbconvert_exporter": "python",
463 | "pygments_lexer": "ipython3",
464 | "version": "3.7.10"
465 | }
466 | },
467 | "nbformat": 4,
468 | "nbformat_minor": 4
469 | }
470 |
--------------------------------------------------------------------------------
/notebook/Logs Notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Waston Assistant Logs Notebook"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Introduction\n",
15 | "This notebook demonstrates how to download Watson Assistant user-generated logs based on different criteria.\n",
16 | "\n",
17 | "### Programming language and environment\n",
18 | "Some familiarity with Python is recommended. This notebook runs on Python 3.7+"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "\n",
26 | "## 1. Configuration and Setup\n",
27 | "\n",
28 | "In this section, we add data and workspace access credentials, import required libraries and functions.\n",
29 | "\n",
30 | "### 1.1 Install Assistant Improve Toolkit"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": null,
36 | "metadata": {
37 | "scrolled": true
38 | },
39 | "outputs": [],
40 | "source": [
41 | "!pip install --user --upgrade \"assistant-improve-toolkit\";"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "### 1.2 Import functions used in the notebook"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "# Import Watson Assistant related functions\n",
58 | "from ibm_cloud_sdk_core.authenticators import IAMAuthenticator\n",
59 | "import pandas as pd\n",
60 | "import json\n",
61 | "from ibm_watson import AssistantV1, AssistantV2\n",
62 | "\n",
63 | "from assistant_improve_toolkit.watson_assistant_func import get_logs\n",
64 | "from assistant_improve_toolkit.watson_assistant_func import get_assistant_definition\n",
65 | "from assistant_improve_toolkit.watson_assistant_func import load_logs_from_file\n",
66 | "from assistant_improve_toolkit.watson_assistant_func import export_csv_for_intent_recommendation"
67 | ]
68 | },
69 | {
70 | "cell_type": "markdown",
71 | "metadata": {},
72 | "source": [
73 | "## 2. Load and format data "
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {
79 | "pycharm": {
80 | "name": "#%% md\n"
81 | }
82 | },
83 | "source": [
84 | "### 2.1 Add Watson Assistant configuration\n",
85 | "\n",
86 | "This notebook uses Watson Assistant v1 API to access skill definition. To access message logs, the notebook uses both v1 and v2 APIs. You authenticate to the API by using IBM Cloud Identity and Access Management (IAM).\n",
87 | "\n",
88 | "You can access the values you need for this configuration from the Watson Assistant user interface. Go to the Skills page and select View API Details from the menu of a skill title.\n",
89 | "\n",
90 | "- The string to set in the call to `IAMAuthenticator` is your Api Key under Service Credentials\n",
91 | "- The string to set for version is a date in the format version=YYYY-MM-DD. The version date string determines which version of the Watson Assistant V1 API will be called. For more information about version, see [Versioning](https://cloud.ibm.com/apidocs/assistant/assistant-v1#versioning).\n",
92 | "- The string to pass into `assistant.set_service_url` is the base URL of Watson Assistant. For example, for us-south, the endpoint is `https://api.us-south.assistant.watson.cloud.ibm.com`. This value will be different depending on the location of your service instance. For more information, see [Service Endpoint](https://cloud.ibm.com/apidocs/assistant/assistant-v1?code=python#service-endpoint)"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": null,
98 | "metadata": {},
99 | "outputs": [],
100 | "source": [
101 | "# Provide credentials to connect to assistant\n",
102 | "authenticator = IAMAuthenticator('API_KEY')\n",
103 | "\n",
104 | "# Initialize v1 SDK instance\n",
105 | "sdk_v1_object = AssistantV1(version='2020-04-01', authenticator = authenticator)\n",
106 | "sdk_v1_object.set_service_url('https://api.us-south.assistant.watson.cloud.ibm.com')\n",
107 | "\n",
108 | "# Initialize v2 SDK instance\n",
109 | "sdk_v2_object = AssistantV2(version='2020-09-24', authenticator = authenticator)\n",
110 | "sdk_v2_object.set_service_url('https://api.us-south.assistant.watson.cloud.ibm.com')\n"
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "metadata": {},
116 | "source": [
117 | "Add the information of your assistant. To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill ID. The values can be found on the View API Details page. If you are using versioning in Watson Assistant, this ID represents the Development version of your skill definition.\n",
118 | "\n",
119 | "For more information about authentication and finding credentials in the Watson Assistant UI, please see [Watson Assistant v1 API](https://cloud.ibm.com/apidocs/assistant/assistant-v1) in the offering documentation.\n"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "assistant_information = {'workspace_id' : '',\n",
129 | " 'skill_id' : '',\n",
130 | " 'assistant_id' : ''}"
131 | ]
132 | },
133 | {
134 | "cell_type": "markdown",
135 | "metadata": {},
136 | "source": [
137 | "### 2.2 Fetch and load logs\n",
138 | "\n",
139 | "- `num_logs`: number of logs to fetch\n",
140 | "- Use `filename` to specify if logs are saved as a JSON file (default: `None`)\n",
141 | "- Apply `filters` while fetching logs (default: `[]`), e.g.,\n",
142 | " - removing empty input: `meta.summary.input_text_length_i>0`\n",
143 | " - fetching logs generated after a timestamp: `response_timestamp>=2018-09-18`\n",
144 | " \n",
145 | " Refer to [Filter query reference](https://cloud.ibm.com/docs/services/assistant?topic=assistant-filter-reference) for\n",
146 | " more information.\n",
147 | "- Use `project` to specify project when using Watson Studio (default: `None`)\n",
148 | "- Use `overwrite` to overwrite if `filename` exists (default: `False`)\n"
149 | ]
150 | },
151 | {
152 | "cell_type": "markdown",
153 | "metadata": {},
154 | "source": [
155 | "__A. Download all logs for a period of time (and save as a JSON file for Measure notebook)__"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {},
162 | "outputs": [],
163 | "source": [
164 | "# Add filter queries\n",
165 | "filters = ['language::en', # Logs in English\n",
166 | " 'meta.summary.input_text_length_i>0', # Logs with non empty input \n",
167 | " 'response_timestamp>=2020-03-01'] # Logs with response timestamp later or equal to 2020-03-01\n",
168 | "\n",
169 | "# Query 20,000 logs\n",
170 | "filename = 'logs.json'\n",
171 | "\n",
172 | "# Fetch 20,000 logs, set `overwrite` to True to reload logs, set version=2 to use v2 log apis\n",
173 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
174 | " sdk_v2_object=sdk_v2_object,\n",
175 | " assistant_info=assistant_information,\n",
176 | " num_logs=20000,\n",
177 | " filename=filename,\n",
178 | " filters=filters,\n",
179 | " overwrite=True,\n",
180 | " project=None,\n",
181 | " version=2)"
182 | ]
183 | },
184 | {
185 | "cell_type": "markdown",
186 | "metadata": {},
187 | "source": [
188 | "__B. Download and export logs for intent recommendation__\n",
189 | "\n",
190 | "For intent recommendation, by default, an utterance is considered only when:\n",
191 | "- It is the first user utterance in each conversation\n",
192 | "- its confidence `response.intents::confidence` is between 0.1 and 0.6 (exclusive),\n",
193 | "- its token count is between 3 and 20 (exclusive), and\n",
194 | "- it is not a duplicate of the other utterances in the logs.\n",
195 | "\n",
196 | "This example adds confidence filters when calling `get_logs`, and then exports the utterances to a CSV file by calling\n",
197 | "`export_csv_for_intent_recommendation` with token count filter and dedeplication applied.\n"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {
204 | "pycharm": {
205 | "name": "#%%\n"
206 | }
207 | },
208 | "outputs": [],
209 | "source": [
210 | "# Add filter queries\n",
211 | "filters = ['language::en', # Logs in English\n",
212 | " 'request.context.system.dialog_turn_counter::1', # The first user utterance in each conversation\n",
213 | " 'response.intents:confidence<0.6', # filter out high intent confidence utterance\n",
214 | " 'response.intents:confidence>0.1', # filter out low intent confidnce utterance\n",
215 | " ]\n",
216 | "\n",
217 | "# Query 20,000 logs using filename 'log_first_utterances.json'\n",
218 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
219 | " sdk_v2_object=sdk_v2_object,\n",
220 | " assistant_info=assistant_information,\n",
221 | " num_logs=20000,\n",
222 | " filename='log_for_intent_recommendation.json',\n",
223 | " filters=filters,\n",
224 | " overwrite=True,\n",
225 | " version=2)\n",
226 | "\n",
227 | "# Or, load previously saved logs.\n",
228 | "logs = load_logs_from_file(filename='log_for_intent_recommendation.json')"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "metadata": {},
234 | "source": [
235 | "Export logs to a CSV file for intent recommendation\n",
236 | "\n",
237 | "- `logs`: the logs object from `get_logs` or `load_logs_from_file`\n",
238 | "- `filename`: the CSV output filename\n",
239 | "- Use `deduplicate` to specify if duplicate messages should be removed (default: `True`)\n",
240 | "- Use `project` to specify project when using Watson Studio (default: `None`)\n",
241 | "- Use `overwrite` to overwrite if `filename` exists (default: `False`)\n",
242 | "- Use `min_length` to filter out utterances that are less than certain number of tokens (exclusive, default: `3`)\n",
243 | "- Use `max_length` to filter out utterances that are more than certain number of tokens (exclusive, default: `20`)"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": null,
249 | "metadata": {
250 | "pycharm": {
251 | "name": "#%%\n"
252 | }
253 | },
254 | "outputs": [],
255 | "source": [
256 | "export_csv_for_intent_recommendation(logs,\n",
257 | " filename='log_for_intent_recommendation.csv',\n",
258 | " deduplicate=True,\n",
259 | " min_length=3,\n",
260 | " max_length=20,\n",
261 | " overwrite=False)"
262 | ]
263 | },
264 | {
265 | "cell_type": "markdown",
266 | "metadata": {},
267 | "source": [
268 | "__C. More examples__\n",
269 | "\n",
270 | "Download logs of the first user utterance in each conversation for a period of time"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": null,
276 | "metadata": {},
277 | "outputs": [],
278 | "source": [
279 | "# Add filter queries\n",
280 | "filters = ['language::en', # Logs in English \n",
281 | " 'request.context.system.dialog_turn_counter::1', # The first user utterance in each conversation\n",
282 | " 'response_timestamp>=2020-03-01'] # Logs with response timestamp later or equal to 2020-03-01\n",
283 | "\n",
284 | "# Query 20,000 logs using filename 'log_first_utterances.json'\n",
285 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
286 | " sdk_v2_object=sdk_v2_object,\n",
287 | " assistant_info=assistant_information,\n",
288 | " num_logs=20000,\n",
289 | " filename='log_first_utterances.json',\n",
290 | " filters=filters,\n",
291 | " overwrite=True,\n",
292 | " version=2)"
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "metadata": {},
298 | "source": [
299 | "Download logs containing specific input text"
300 | ]
301 | },
302 | {
303 | "cell_type": "code",
304 | "execution_count": null,
305 | "metadata": {
306 | "pycharm": {
307 | "name": "#%%\n"
308 | }
309 | },
310 | "outputs": [],
311 | "source": [
312 | "# Add filter queries\n",
313 | "filters = ['language::en', # Logs in English\n",
314 | " 'request.input.text::\"Is there an article on how to make cherry pie?\"'] # Logs with input text: \"Is there an article on how to make cherry pie?\"\n",
315 | "\n",
316 | "# Query 20,000 logs using filename 'log_input.json'\n",
317 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
318 | " sdk_v2_object=sdk_v2_object,\n",
319 | " assistant_info=assistant_information,\n",
320 | " num_logs=20000,\n",
321 | " filename='log_input.json',\n",
322 | " filters=filters,\n",
323 | " overwrite=True,\n",
324 | " version=2)"
325 | ]
326 | },
327 | {
328 | "cell_type": "markdown",
329 | "metadata": {},
330 | "source": [
331 | "Download logs trigging specific intent"
332 | ]
333 | },
334 | {
335 | "cell_type": "code",
336 | "execution_count": null,
337 | "metadata": {},
338 | "outputs": [],
339 | "source": [
340 | "# Add filter queries\n",
341 | "filters = ['language::en', # Logs in English\n",
342 | " 'response.intents:intent::\"article_food\"'] # Intent been triggered: article_food\n",
343 | "# Query 20,000 logs using filename log_intent.json\n",
344 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
345 | " sdk_v2_object=sdk_v2_object,\n",
346 | " assistant_info=assistant_information,\n",
347 | " num_logs=20000,\n",
348 | " filename='log_intent.json',\n",
349 | " filters=filters,\n",
350 | " overwrite=True,\n",
351 | " version=2)"
352 | ]
353 | },
354 | {
355 | "cell_type": "markdown",
356 | "metadata": {},
357 | "source": [
358 | "Download logs trigging specific intent with a confidence range"
359 | ]
360 | },
361 | {
362 | "cell_type": "code",
363 | "execution_count": null,
364 | "metadata": {},
365 | "outputs": [],
366 | "source": [
367 | "# Add filter queries\n",
368 | "filters = ['language::en', # Logs in English\n",
369 | " 'response.intents:(intent:article_food,confidence<0.25)'] # Intent been triggered: article_food with confidence below 0.25\n",
370 | "# Query 20,000 logs using filename log_intent.json\n",
371 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
372 | " sdk_v2_object=sdk_v2_object,\n",
373 | " assistant_info=assistant_information,\n",
374 | " num_logs=20000,\n",
375 | " filename='log_intent_confidence.json',\n",
376 | " filters=filters,\n",
377 | " overwrite=True,\n",
378 | " version=2)"
379 | ]
380 | },
381 | {
382 | "cell_type": "markdown",
383 | "metadata": {},
384 | "source": [
385 | "Download logs visited specific node"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": null,
391 | "metadata": {},
392 | "outputs": [],
393 | "source": [
394 | "# Fetch assistant definition and save to a JSON file\n",
395 | "df_assistant = get_assistant_definition(sdk_v1_object, assistant_information, filename='assistant_definition.json')\n",
396 | "\n",
397 | "# Get all intents\n",
398 | "assistant_intents = [intent['intent'] for intent in df_assistant['intents'].values[0]] \n",
399 | "\n",
400 | "# Get all dialog nodes\n",
401 | "assistant_nodes = pd.DataFrame(df_assistant['dialog_nodes'].values[0])\n",
402 | "\n",
403 | "# Find mappings betweeen node name and node id\n",
404 | "node_title_map = dict()\n",
405 | "for idx, node in assistant_nodes.iterrows():\n",
406 | " if str(node['title']) != 'nan':\n",
407 | " node_title_map[node['title']] = node['dialog_node']\n",
408 | "node_df = pd.DataFrame(node_title_map.items())\n",
409 | "node_df.columns = {'node_name', 'node_id'}\n",
410 | "\n",
411 | "# Add filter queries\n",
412 | "intent_name = 'book_short_dialog'\n",
413 | "if intent_name in node_title_map:\n",
414 | " filters = ['language::en', # Logs in English\n",
415 | " 'response.output:nodes_visited::[{}]'.format(node_title_map[intent_name])] # Visited node: book_short_dialog\n",
416 | " # Query 20,000 logs using filename log_node.json\n",
417 | " logs = get_logs(sdk_v1_object=sdk_v1_object,\n",
418 | " sdk_v2_object=sdk_v2_object,\n",
419 | " assistant_info=assistant_information,\n",
420 | " num_logs=20000,\n",
421 | " filename='log_node.json',\n",
422 | " filters=filters,\n",
423 | " overwrite=True,\n",
424 | " version=2)\n",
425 | "else:\n",
426 | " print('Cannot find {} in skill definition.'.format(intent_name))"
427 | ]
428 | },
429 | {
430 | "cell_type": "markdown",
431 | "metadata": {},
432 | "source": [
433 | "Copyright © 2021 IBM. This notebook and its source code are released under the terms of the MIT License."
434 | ]
435 | }
436 | ],
437 | "metadata": {
438 | "kernelspec": {
439 | "display_name": "Python 3",
440 | "language": "python",
441 | "name": "python3"
442 | },
443 | "language_info": {
444 | "codemirror_mode": {
445 | "name": "ipython",
446 | "version": 3
447 | },
448 | "file_extension": ".py",
449 | "mimetype": "text/x-python",
450 | "name": "python",
451 | "nbconvert_exporter": "python",
452 | "pygments_lexer": "ipython3",
453 | "version": "3.8.8"
454 | }
455 | },
456 | "nbformat": 4,
457 | "nbformat_minor": 4
458 | }
459 |
--------------------------------------------------------------------------------
/notebook/README.md:
--------------------------------------------------------------------------------
1 | ## Table of Contents
2 |
3 | #### `Effectiveness Notebook.ipynb` ####
4 | - A Jupytor notebook file. Effectiveness notebook helps you understand relative performance of each intent and entity as well as the confusion between your intents. This information helps you prioritize your improvement effort.
5 |
6 | #### `Measure Notebook.ipynb` ####
7 | - A Jupytor notebook file. Measure notebook contains a set of automated metrics that help you monitor and understand the behavior of your system. The goal is to understand where your assistant is doing well vs where it isn’t, and to focus your improvement effort to one of the problem areas identified.
8 |
9 | #### `Logs Notebook.ipynb` ####
10 | - A Jupytor notebook file. Logs notebook helps you fetch logs using Watson Assistant API. You can fetch logs with various filters, and save them as a JSON file, or export the utterances in the logs into a CSV file. The JSON file can be loaded into the Measure notebook. The CSV file can be updated to Watson Assistant service for intent recommendation. Alternatively, you can run python scripts [`fetch_logs`](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/blob/master/src/main/python/fetch_logs.py) and [`export_csv_for_intent_recommendation`](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/blob/master/src/main/python/export_csv_for_intent_recommendation.py) to fetch logs and export them to [intent recommendation CSV](https://cloud.ibm.com/docs/assistant?topic=assistant-intent-recommendations#intent-recommendations-data-resources), respectively. Run `python get_logs -h` and `python export_csv_for_intent_recommendation.py -h` for usage. For example, to generate intent recommendation CSV from logs:
11 |
12 | ```
13 | # Fetch logs by keeping first user utterances in conversations with confidence `response.intents::confidence` between 0.1 and 0.6, and save to JSON file `OUTPUT_JSON_FILE`
14 |
15 | python src/main/python/fetch_logs.py \
16 | --url URL --version VERSION --apikey API_KEY --skill_id SKILL_ID --assistant_id ASSISTANT_ID \
17 | --filters "language::en" "request.context.system.dialog_turn_counter::1" "response.intents:confidence<0.6" "response.intents:confidence>0.1" \
18 | --output_json OUTPUT_JSON_FILE
19 | ```
20 | ```
21 | # Take the fetch logs in `OUTPUT_JSON_FILE`, filter out utterances that is either too short (less than 3 tokens) or too long (more than 20 tokens), remove duplicates, and export them to a CSV file `OUTPUT_CSV_FILE` for intent recommendation
22 |
23 | python src/main/python/export_csv_for_intent_recommendation.py \
24 | --input_json OUTPUT_JSON_FILE --output_csv OUTPUT_CSV_FILE \
25 | --deduplicate --min_length 3 --max_length 20
26 | ```
27 |
28 | #### `IBM Watson Assistant Continuous Improvement Best Practices.pdf` ####
29 | - IBM Watson Assistant Continuous Improvement Best Practices document.
30 |
31 | #### `data` ####
32 | - A folder contains an example workspace with sample logs and an annotated log file for demonstration in notebooks.
33 |
34 | #### `imgs` ####
35 | - A folder contains images used in notebooks.
36 |
--------------------------------------------------------------------------------
/notebook/data/annotation.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/data/annotation.xlsx
--------------------------------------------------------------------------------
/notebook/data/book_recommender_logs.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/data/book_recommender_logs.gz
--------------------------------------------------------------------------------
/notebook/data/book_recommender_skill.json:
--------------------------------------------------------------------------------
1 | {"name": "Book Recommender", "created": "2020-04-24T20:08:40.700Z", "intents": [{"intent": "book_long", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a long book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I like a long novel", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good, long story.", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I need a good, long, story, that will last me all summer?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is a good book that will last a long time?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "two", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "two", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "both", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "the", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "dos", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "one", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_money", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a book about making money", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Is there a good book about investing?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best stock book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Is there a great financial book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good stock market book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_history", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a history book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend an book about World War 2?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is a good book about African history?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Tell me a good book about world history?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend an book about early Egypt?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_scifi", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a scifi book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I like a science fiction book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you give me a science fiction book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good scifi book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good science fiction book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_computer", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a computer article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good computer article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What computer article should I read?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Is there a good computer article that was just releases?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you tell me a good article about computers", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_fantasy", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a fantasy article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good fantasy article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a fantasy quick read?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you tell me a good trending fantasy article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Give me a fantasy article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_food", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I would like an book about food", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good cooking book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best book about Indian cuisine?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good book comparing international foods?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is the best book about healthy eating?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_money", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a article about making money", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good financial book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I there a good current article about making money", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good investment article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good article about stocks?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "hi", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "hello", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "yo", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Hi there!", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Hello there", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "hi", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_history", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a history article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend an article about World War 2?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is a good article about African history?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Tell me a good article about world history?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend an article about early Egypt?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_selfimprove", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a self improvement article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good article about self improvement?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best goal setting article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you tell me a good article about improving my life?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best trending self improvement article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "hello", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "hi", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "hello", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "hi there", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "hello there", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "oy", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": "hello"}, {"intent": "one", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "two", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "single", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "the", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "uno", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "one", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_food", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I would like an article about food", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good cooking article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best article about Indian cuisine?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good article comparing international foods?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is the best article about healthy eating?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_politics", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a article about politics", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a article about the Republicans?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Is there a good article about policies of the Democratic Party?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best article about Indian politics?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is the best article about politics in England?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_selfimprove", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a self improvement book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good book about self improvement?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best goal setting book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you tell me a good book about improving my life?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best trending self improvement book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_politics", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a book about politics", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a book about the Republicans?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Is there a good book about policies of the Democratic Party?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best book about Indian politics?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is the best book about politics in England?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_short", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a short book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I like a short novel", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good, short story.", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I need a good, short, story, that I can read on the plane?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is a good book that won't take long to read?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_scifi", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a scifi article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I like a science fiction article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you give me a science fiction article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good scifi article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good science fiction article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_computer", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a computer book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good book about computers?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good book about the history of computers?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best computer book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I want a really good story about the evolution of computers", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_fantasy", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a fantasy book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good fantasy book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a fantasy story?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you tell me a good trending fantasy book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Give me a fantasy book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}], "updated": "2020-04-24T20:10:53.922Z", "entities": [], "language": "en", "metadata": {"api_version": {"major_version": "v1", "minor_version": "2020-02-05"}, "alternate_responses": true}, "description": "", "dialog_nodes": [{"type": "standard", "title": "Anything else", "output": {"generic": [{"values": [{"text": "I didn't understand. You can try rephrasing."}, {"text": "Can you reword your statement? I'm not understanding."}, {"text": "I didn't get your meaning."}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "anything_else", "dialog_node": "Anything else", "previous_sibling": "node_5_1582040246957", "disambiguation_opt_out": false}, {"type": "standard", "title": "two", "output": {"generic": [{"values": [{"text": "You chose two"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#two", "user_label": "two", "dialog_node": "node_5_1582040246957", "previous_sibling": "node_10_1582039896349"}, {"type": "standard", "title": "one", "output": {"generic": [{"values": [{"text": "You chose one"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#one", "user_label": "one", "dialog_node": "node_10_1582039896349", "previous_sibling": "node_6_1582040330964"}, {"type": "standard", "title": "hi", "output": {"generic": [{"values": [{"text": "hi"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#hi", "user_label": "hi", "dialog_node": "node_6_1582040330964", "previous_sibling": "node_6_1582040347726"}, {"type": "standard", "title": "hello", "output": {"generic": [{"values": [{"text": "hello"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#hello", "user_label": "hello", "dialog_node": "node_6_1582040347726", "previous_sibling": "node_1_1582049209157"}, {"type": "standard", "title": "book_computer", "output": {"generic": [{"values": [{"text": "Here is a computer book. (book_computer)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_computer", "user_label": "computer book", "dialog_node": "node_1_1582049209157", "previous_sibling": "node_6_1582049261648"}, {"type": "standard", "title": "book_fantasy_dialog", "output": {"generic": [{"values": [{"text": "Here is a fantasy book. (book_fantasy)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_fantasy", "user_label": "fantasy book", "dialog_node": "node_6_1582049261648", "previous_sibling": "node_2_1582049297943"}, {"type": "standard", "title": "book_history_dialog", "output": {"generic": [{"values": [{"text": "Here is a history book (book_history)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_history", "user_label": "history book", "dialog_node": "node_2_1582049297943", "previous_sibling": "node_1_1582049324143"}, {"type": "standard", "title": "book_money_dialog", "output": {"generic": [{"values": [{"text": "Here is a book about money (book_money)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_money", "user_label": "money book", "dialog_node": "node_1_1582049324143", "previous_sibling": "node_2_1582049373395"}, {"type": "standard", "title": "book_scifi_dialog", "output": {"generic": [{"values": [{"text": "Here is your scify book (book_scifi)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_scifi", "user_label": "scify book", "dialog_node": "node_2_1582049373395", "previous_sibling": "node_3_1582049417508"}, {"type": "standard", "title": "book_selfimprove_dialog", "output": {"generic": [{"values": [{"text": "Here is your self improvement book"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_selfimprove", "user_label": "self improve book", "dialog_node": "node_3_1582049417508", "previous_sibling": "node_5_1582049713134"}, {"type": "standard", "title": "book_politics_dialog", "output": {"generic": [{"values": [{"text": "Here is a book about politics. (book_politics)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_politics", "user_label": "politics book", "dialog_node": "node_5_1582049713134", "previous_sibling": "node_8_1582050051508"}, {"type": "standard", "title": "book_long_dialog", "output": {"generic": [{"values": [{"text": "Here is a long book. (book_long)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_long", "user_label": "long book", "dialog_node": "node_8_1582050051508", "previous_sibling": "node_5_1582050079858"}, {"type": "standard", "title": "book_short_dialog", "output": {"generic": [{"values": [{"text": "Here is a short book. (book_short)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_short", "user_label": "short book", "dialog_node": "node_5_1582050079858", "previous_sibling": "node_2_1582133481345"}, {"type": "standard", "title": "article_computer_dialog", "output": {"generic": [{"values": [{"text": "Here is your computer article (article_computer)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_computer", "user_label": "computer article", "dialog_node": "node_2_1582133481345", "previous_sibling": "node_7_1582133509246"}, {"type": "standard", "title": "article_fantasy_dialog", "output": {"generic": [{"values": [{"text": "Here is your fantasy article. (article_fantasy)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_fantasy", "user_label": "fantasy article", "dialog_node": "node_7_1582133509246", "previous_sibling": "node_9_1582133556135"}, {"type": "standard", "title": "article_history_dialog", "output": {"generic": [{"values": [{"text": "Here is your history article. (article_history)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_history", "user_label": "history article", "dialog_node": "node_9_1582133556135", "previous_sibling": "node_1_1582133629623"}, {"type": "standard", "title": "article_money_dialog", "output": {"generic": [{"values": [{"text": "Here is your article about money. (article_money)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_money", "user_label": "money article", "dialog_node": "node_1_1582133629623", "previous_sibling": "node_10_1582133662325"}, {"type": "standard", "title": "article_politics_dialog", "output": {"generic": [{"values": [{"text": "Here is your article about politics. (article_politics)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_politics", "user_label": "politics article", "dialog_node": "node_10_1582133662325", "previous_sibling": "node_8_1582133720490"}, {"type": "standard", "title": "article_scifi_dialog", "output": {"generic": [{"values": [{"text": "Here is your article about scifi. (article_scifi)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_scifi", "user_label": "scifi article", "dialog_node": "node_8_1582133720490", "previous_sibling": "node_1_1582134460437"}, {"type": "standard", "title": "article_selfimprove_dialog", "output": {"generic": [{"values": [{"text": "Here is an article about self improvement. (article_selfimprove)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_selfimprove", "user_label": "self improve article", "dialog_node": "node_1_1582134460437", "previous_sibling": "node_2_1582134542638"}, {"type": "standard", "title": "article_food_dialog", "output": {"generic": [{"values": [{"text": "Here is an article about food. (article_food)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_food", "user_label": "food article", "dialog_node": "node_2_1582134542638", "previous_sibling": "node_3_1582134595076"}, {"type": "standard", "title": "book_food_dialog", "output": {"generic": [{"values": [{"text": "Here is a book about food. (book_food)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_food", "user_label": "food book", "dialog_node": "node_3_1582134595076", "previous_sibling": "Welcome"}, {"type": "standard", "title": "Welcome", "output": {"generic": [{"values": [{"text": "Hello. How can I help you?"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "welcome", "dialog_node": "Welcome"}], "workspace_id": "1237b67c-f780-46fe-b693-a4b7b2d6f47f", "counterexamples": [], "system_settings": {"auto_learn": {"apply": false}, "disambiguation": {"prompt": "Did you mean:", "enabled": true, "randomize": true, "max_suggestions": 5, "suggestion_text_policy": "title", "none_of_the_above_prompt": "None of the above", "include_alternate_responses": true}, "human_agent_assist": {"prompt": "Did you mean:"}, "alternate_responses": {"enabled": true}, "spelling_auto_correct": true}, "learning_opt_out": true, "status": "Available"}
--------------------------------------------------------------------------------
/notebook/imgs/analyze_process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/analyze_process.png
--------------------------------------------------------------------------------
/notebook/imgs/box_zoom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/box_zoom.png
--------------------------------------------------------------------------------
/notebook/imgs/click.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/click.png
--------------------------------------------------------------------------------
/notebook/imgs/effectiveness_overall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/effectiveness_overall.png
--------------------------------------------------------------------------------
/notebook/imgs/effort_computation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/effort_computation.png
--------------------------------------------------------------------------------
/notebook/imgs/find_data_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/find_data_icon.png
--------------------------------------------------------------------------------
/notebook/imgs/measure_overall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/measure_overall.png
--------------------------------------------------------------------------------
/notebook/imgs/measure_process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/measure_process.png
--------------------------------------------------------------------------------
/notebook/imgs/reset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/reset.png
--------------------------------------------------------------------------------
/notebook/imgs/save.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/save.png
--------------------------------------------------------------------------------
/notebook/imgs/suggestions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/suggestions.png
--------------------------------------------------------------------------------
/notebook/imgs/wheel_zoom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/wheel_zoom.png
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "assistant_improve_toolkit",
3 | "version": "1.0.0",
4 | "description": "To help improving your Watson Assistant after you have deployed it to production, we prepared the following two Jupyter notebooks. These notebooks include practical steps for measuring, analyzing, and actively improving your virtual assistant in a continuous manner. Check out IBM Watson Assistant Continuous Improvement Best Practices for more details.",
5 | "repository": {
6 | "type": "git",
7 | "url": "git+https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook.git"
8 | },
9 | "author": "IBM",
10 | "license": "Apache-2.0",
11 | "bugs": {
12 | "url": "https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues"
13 | },
14 | "homepage": "https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook#readme",
15 | "dependencies": {
16 | "@semantic-release/changelog": "^5.0.1",
17 | "@semantic-release/exec": "^5.0.0",
18 | "@semantic-release/git": "^9.0.0",
19 | "@semantic-release/github": "^7.2.3"
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ipython
2 | pandas==1.2.1
3 | bokeh==3.2.0
4 | tqdm==4.65.0
5 | matplotlib==3.2.1
6 | XlsxWriter==1.2.8
7 | ibm-watson==7.0.0
8 | numpy==1.23.5
9 | requests==2.29.0
10 | scikit-learn>=0.21.3
--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 |
3 | # test dependencies
4 | pytest>=2.8.2
5 | responses>=0.10.6
6 | python_dotenv>=0.1.5;python_version!='3.2'
7 | pylint>=1.4.4
8 | tox>=2.9.1
9 | pytest-rerunfailures>=3.1
10 |
11 | # code coverage
12 | coverage<5
13 | codecov>=1.6.3
14 | pytest-cov>=2.2.1
15 |
16 | # documentation
17 | bumpversion>=0.5.3
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf8 -*-
2 | #
3 | # This file were created by Python Boilerplate. Use Python Boilerplate to start
4 | # simple, usable and best-practices compliant Python projects.
5 | #
6 | # Learn more about it at: http://github.com/fabiommendes/python-boilerplate/
7 | #
8 |
9 | import setuptools
10 | from os import path
11 |
12 | __version__ = '1.4.1'
13 |
14 | # read contents of README file
15 | this_directory = path.abspath(path.dirname(__file__))
16 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as file:
17 | readme_file = file.read()
18 |
19 | setuptools.setup(
20 | # Basic info
21 | name='assistant_improve_toolkit',
22 | author='IBM Watson',
23 | author_email='watdevex@us.ibm.com',
24 | maintainer='Zhe Zhang',
25 | maintainer_email='zhangzhe@us.ibm.com',
26 | url='https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook',
27 | description='Assistant Improve Toolkit',
28 | license='Apache 2.0',
29 | long_description=readme_file,
30 | long_description_content_type='text/markdown',
31 | classifiers=[
32 | 'Development Status :: 4 - Beta',
33 | 'Intended Audience :: Developers',
34 | 'License :: OSI Approved :: Apache Software License',
35 | 'Programming Language :: Python',
36 | 'Programming Language :: Python :: 3.7',
37 | 'Programming Language :: Python :: 3.8',
38 | 'Operating System :: OS Independent',
39 | 'Topic :: Software Development :: Libraries :: Python Modules'
40 | ],
41 | # Packages and depencies
42 | package_dir={'': 'src'},
43 | packages=setuptools.find_packages('src'),
44 | install_requires=[
45 | 'pandas==1.2.1',
46 | 'bokeh==3.2.0',
47 | 'tqdm==4.65.0',
48 | 'scikit-learn>=0.21.3',
49 | 'matplotlib==3.2.1',
50 | 'XlsxWriter==1.2.8',
51 | 'ibm-watson==7.0.0',
52 | 'numpy==1.23.5',
53 | 'requests==2.29.0'
54 | ],
55 |
56 | zip_safe=False,
57 | platforms='any',
58 | )
59 |
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
--------------------------------------------------------------------------------
/src/assistant_improve_toolkit/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
--------------------------------------------------------------------------------
/src/assistant_improve_toolkit/cos_op.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import datetime
18 | from requests.utils import quote
19 | import hashlib
20 | import hmac
21 | import pandas as pd
22 | from contextlib import closing
23 | from io import BytesIO
24 | import numpy as np
25 |
26 |
27 | def get_hash(key, msg):
28 | """Generates keyed-hash for a message using HMAC
29 | Parameters
30 | ----------
31 | key: The starting key for the hash.
32 | msg: message value that will be hashed
33 | """
34 | return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()
35 |
36 |
37 | def create_signature_key(key, datestamp, region, service):
38 | """Create a signing key based on AWS signature version 4
39 | Parameters
40 | ----------
41 | key: Secret access key
42 | datestamp: date and timestamp
43 | region: service region
44 | service: service name
45 | """
46 | key_date = get_hash(('AWS4' + key).encode('utf-8'), datestamp)
47 | key_region = get_hash(key_date, region)
48 | key_service = get_hash(key_region, service)
49 | key_signing = get_hash(key_service, 'aws4_request')
50 | return key_signing
51 |
52 |
53 | def generate_link(filename, project_io, expiration):
54 | """Generate a signing downloadable link of a file
55 | Parameters
56 | ----------
57 | filename: file name
58 | project_io: Watson Studio project io instance
59 | expiration: expiration time in seconds
60 | """
61 | region = ''
62 | http_method = 'GET'
63 | endpoint = project_io.get_storage_metadata()['properties']['endpoint_url']
64 |
65 | cur_time = datetime.datetime.utcnow()
66 | timestamp = cur_time.strftime('%Y%m%dT%H%M%SZ')
67 | datestamp = cur_time.strftime('%Y%m%d')
68 |
69 | standardized_querystring = ('X-Amz-Algorithm=AWS4-HMAC-SHA256' +
70 | '&X-Amz-Credential=' +
71 | project_io.get_storage_metadata()['properties']['credentials']['editor'][
72 | 'access_key_id'] + '/' + datestamp + '/' + region +
73 | '/s3/aws4_request' +
74 | '&X-Amz-Date=' + timestamp +
75 | '&X-Amz-Expires=' + str(expiration) +
76 | '&X-Amz-SignedHeaders=host')
77 |
78 | standardized_querystring_url_encoded = quote(standardized_querystring, safe='&=')
79 |
80 | standardized_resource = '/' + project_io.get_storage_metadata()['properties']['bucket_name'] + '/' + filename
81 |
82 | payload_hash = 'UNSIGNED-PAYLOAD'
83 | standardized_headers = 'host:' + project_io.get_storage_metadata()['properties']['endpoint_url'].replace('https://',
84 | '')
85 | signed_headers = 'host'
86 |
87 | standardized_request = (http_method + '\n' +
88 | standardized_resource + '\n' +
89 | standardized_querystring_url_encoded + '\n' +
90 | standardized_headers + '\n' +
91 | '\n' +
92 | signed_headers + '\n' +
93 | payload_hash)
94 |
95 | # assemble string-to-sign
96 | hashing_algorithm = 'AWS4-HMAC-SHA256'
97 | credential_scope = datestamp + '/' + region + '/' + 's3' + '/' + 'aws4_request'
98 | sts = (hashing_algorithm + '\n' +
99 | timestamp + '\n' +
100 | credential_scope + '\n' +
101 | hashlib.sha256(standardized_request.encode('utf-8')).hexdigest())
102 |
103 | # generate the signature
104 | signature_key = create_signature_key(
105 | project_io.get_storage_metadata()['properties']['credentials']['editor']['secret_access_key'], datestamp,
106 | region, 's3')
107 | signature = hmac.new(signature_key,
108 | sts.encode('utf-8'),
109 | hashlib.sha256).hexdigest()
110 |
111 | # create and send the request
112 | request_url = (endpoint + '/' +
113 | project_io.get_storage_metadata()['properties']['bucket_name'] + '/' +
114 | filename + '?' +
115 | standardized_querystring_url_encoded +
116 | '&X-Amz-Signature=' +
117 | signature)
118 | return request_url
119 |
120 |
121 | def generate_excel_measure(dataframe_list, sheet_name_list, filename, project_io):
122 | """Generate a formatted excel file given a list of dataframes for measure notebook
123 | Parameters
124 | ----------
125 | dataframe_list: a list of dataframes
126 | sheet_name_list: a list of sheet names
127 | filename: output file name
128 | project_io: Watson Studio project io instance
129 | """
130 | with closing(BytesIO()) as output:
131 | writer = pd.ExcelWriter(output, engine='xlsxwriter', options={'remove_timezone': True})
132 | workbook = writer.book
133 | data_format1 = workbook.add_format({'bg_color': '#BBCCE2'})
134 | data_format2 = workbook.add_format({'bg_color': '#DEE6EF'})
135 |
136 | format_header = workbook.add_format({'text_wrap': True})
137 |
138 | workbook.formats[0].set_font_size(15)
139 | for df, name in zip(dataframe_list, sheet_name_list):
140 | df.to_excel(writer, sheet_name=name)
141 | worksheet = writer.sheets[name]
142 | worksheet.set_row(0, 30, format_header)
143 | worksheet.set_column('A:A', 5)
144 | worksheet.set_column('B:B', 30)
145 | worksheet.set_column('C:C', 30)
146 | worksheet.set_column('D:D', 15)
147 | worksheet.set_column('F:G', 35)
148 | worksheet.set_column('H:AH', 20)
149 | for row in range(1, len(df) + 1, 2):
150 | worksheet.set_row(row, cell_format=data_format1)
151 | worksheet.set_row(row + 1, cell_format=data_format2)
152 | writer.save()
153 | if project_io is not None:
154 | project_io.save_data(filename, output.getvalue(), overwrite=True)
155 | else:
156 | with open(filename, 'wb') as out:
157 | out.write(output.getvalue())
158 |
159 |
160 | def generate_excel_effectiveness(dataframe_list, sheet_name_list, filename, project_io):
161 | """Generate a formatted excel file given a list of dataframes for effectiveness notebook
162 | Parameters
163 | ----------
164 | dataframe_list: a list of dataframes
165 | sheet_name_list: a list of sheet names
166 | filename: output file name
167 | project_io: Watson Studio project io instance
168 | """
169 | with closing(BytesIO()) as output:
170 | writer = pd.ExcelWriter(output, engine='xlsxwriter', options={'remove_timezone': True})
171 | workbook = writer.book
172 | data_format1 = workbook.add_format({'bg_color': '#BBCCE2'})
173 | data_format2 = workbook.add_format({'bg_color': '#DEE6EF'})
174 |
175 | format_header = workbook.add_format({'text_wrap': True})
176 |
177 | workbook.formats[0].set_font_size(15)
178 | for df, name in zip(dataframe_list, sheet_name_list):
179 | df.to_excel(writer, sheet_name=name)
180 | worksheet = writer.sheets[name]
181 | worksheet.set_row(0, 20, format_header)
182 | worksheet.set_column('A:A', 5)
183 | worksheet.set_column('B:D', 30)
184 | for row in range(1, len(df) + 1, 2):
185 | worksheet.set_row(row, cell_format=data_format1)
186 | worksheet.set_row(row + 1, cell_format=data_format2)
187 | writer.save()
188 | if project_io is not None:
189 | project_io.save_data(filename, output.getvalue(), overwrite=True)
190 | else:
191 | with open(filename, 'wb') as out:
192 | out.write(output.getvalue())
193 |
194 |
195 | def export_result_excel(df_effective, sample_size=100, project_io=None):
196 | if df_effective.size == 0:
197 | print('No ineffective conversations found in logs')
198 | return
199 | # Copy the effective dataframe
200 | df_excel = df_effective.copy(deep=True)
201 | # Rename columns to generate excel
202 | df_excel = df_excel.rename(columns={'log_id': 'Log ID', 'response.context.conversation_id': 'Conversation ID',
203 | 'response.timestamp': 'Response Timestamp',
204 | 'request_input': 'Utterance Text',
205 | 'response_text': 'Response Text',
206 | 'response.top_intent_intent': 'Detected top intent',
207 | 'response.top_intent_confidence': 'Detected top intent confidence',
208 | 'Intent 2 intent': 'Intent 2', 'Intent 2 confidence': 'Intent 2 Confidence',
209 | 'Intent 3 intent': 'Intent 3', 'Intent 3 confidence': 'Intent 3 Confidence',
210 | 'response_entities': 'Detected Entities',
211 | 'Escalated_conversation': 'Escalated conversation?',
212 | 'Covered': 'Covered?', 'Not Covered cause': 'Not covered - cause',
213 | 'response.output.nodes_visited_s': 'Dialog Flow',
214 | 'response_dialog_stack': 'Dialog stack',
215 | 'response_dialog_request_counter': 'Dialog request counter',
216 | 'response_dialog_turn_counter': 'Dialog turn counter'
217 | })
218 |
219 | existing_columns = ['Log ID', 'Conversation ID', 'Response Timestamp', 'Customer ID (must retain for delete)',
220 | 'Utterance Text', 'Response Text', 'Detected top intent', 'Detected top intent confidence',
221 | 'Intent 2', 'Intent 2 Confidence', 'Confidence gap (between 1 and 2)', 'Intent 3',
222 | 'Intent 3 Confidence',
223 | 'Detected Entities', 'Escalated conversation?', 'Covered?', 'Not covered - cause',
224 | 'Dialog Flow', 'Dialog stack', 'Dialog request counter', 'Dialog turn counter']
225 | # Add new columns for annotating problematic logs
226 | new_columns_excel = ['Response Correct (Y/N)?', 'Response Helpful (Y/N)?',
227 | 'Root cause (Problem with Intent, entity, dialog)',
228 | 'Wrong intent? If yes, put the correct intent. Otherwise leave it blank',
229 | 'New intent needed? (A new intent. Otherwise leave blank)',
230 | 'Add Utterance to Training data (Y/N)',
231 | 'Entity missed? If yes, put the missed entity value. Otherwise leave it blank',
232 | 'New entity needed? If yes, put the entity name',
233 | 'New entity value? If yes, put the entity value', 'New dialog logic needed?',
234 | 'Wrong dialog node? If yes, put the node name. Otherwise leave it blank',
235 | 'No dialog node triggered']
236 |
237 | # Add the new columns to the dataframe
238 | df_excel = df_excel.reindex(columns=[*existing_columns, *new_columns_excel], fill_value='')
239 |
240 | # Set output filename
241 | all_file = 'All.xlsx'
242 | escalated_sample_file = 'Escalated_sample.xlsx'
243 | non_escalated_sample_file = 'NotEscalated_sample.xlsx'
244 |
245 | # Remove timezone infomation
246 | df_excel['Response Timestamp'] = df_excel['Response Timestamp'].dt.tz_localize(None)
247 |
248 | # Prepare dataframe containing all utterances sorted by Conversation ID and Response Timestamp
249 | df_all = df_excel.sort_values(by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True)
250 |
251 | # Prepare dataframe containing covered utterances sorted by Conversation ID and Response Timestamp
252 | df_covered = df_excel[df_excel['Covered?']==True].sort_values(
253 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True)
254 |
255 | # Prepare dataframe containing not covered utterances sorted by Conversation ID and Response Timestamp
256 | df_not_covered = df_excel[df_excel['Covered?']==False].sort_values(
257 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True)
258 |
259 | # Convert to Excel format and save to local or upload to COS if project_io is provided
260 | generate_excel_measure([df_all, df_covered, df_not_covered],
261 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'], filename=all_file,
262 | project_io=project_io)
263 |
264 | # Prepare dataframe containing escalated conversations
265 | df_escalated_true = df_excel.loc[df_excel['Escalated conversation?']==True]
266 |
267 | # Sample escalated conversations
268 | if sample_size > 0 and len(df_escalated_true) > 0:
269 | # Get unique escalated conversation ids
270 | conversation_ids = df_escalated_true['Conversation ID'].unique()
271 | sampled_conversation_ids = np.random.choice(conversation_ids, sample_size)
272 | df_escalated_true = df_escalated_true[
273 | df_escalated_true['Conversation ID'].isin(sampled_conversation_ids)].sort_values(
274 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True)
275 |
276 | # Prepare dataframe containing covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp
277 | df_escalated_covered = df_escalated_true[df_escalated_true['Covered?']==True].reset_index(drop=True)
278 |
279 | # Prepare dataframe containing not covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp
280 | df_escalated_not_covered = df_escalated_true[df_escalated_true['Covered?']==False].reset_index(drop=True)
281 |
282 | # Covert to Excel format and upload to COS
283 | generate_excel_measure([df_escalated_true, df_escalated_covered, df_escalated_not_covered],
284 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'],
285 | filename=escalated_sample_file, project_io=project_io)
286 |
287 | # Prepare dataframe containing non-escalated conversations
288 | df_not_escalated = df_excel.loc[df_excel['Escalated conversation?']==False].reset_index(drop=True)
289 |
290 | # Sample escalated conversations
291 | if sample_size > 0:
292 | # Get unique non-escalated conversation ids
293 | conversation_ids = df_not_escalated['Conversation ID'].unique()
294 | sampled_conversation_ids = np.random.choice(conversation_ids, sample_size)
295 | df_not_escalated = df_not_escalated[
296 | df_not_escalated['Conversation ID'].isin(sampled_conversation_ids)].sort_values(
297 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True)
298 |
299 | # Prepare dataframe containing covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp
300 | df_not_escalated_covered = df_not_escalated[df_not_escalated['Covered?']==True].reset_index(drop=True)
301 |
302 | # Generate not escalated and not covered sample file
303 | df_not_escalated_not_covered = df_not_escalated[df_not_escalated['Covered?']==False].reset_index(drop=True)
304 |
305 | # Covert to Excel format and upload to COS
306 | generate_excel_measure([df_not_escalated, df_not_escalated_covered, df_not_escalated_not_covered],
307 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'],
308 | filename=non_escalated_sample_file, project_io=project_io)
309 |
--------------------------------------------------------------------------------
/src/assistant_improve_toolkit/export_csv_for_intent_recommendation.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import argparse
18 | from watson_assistant_func import load_logs_from_file
19 | from watson_assistant_func import export_csv_for_intent_recommendation
20 |
21 |
22 | def parse_args():
23 | parser = argparse.ArgumentParser(description='Generate intent recommendation CSV from logs JSON file for Watson '
24 | 'Assistant service.')
25 |
26 | # Required arguments
27 | parser.add_argument(
28 | '--input_json',
29 | type=str,
30 | required=True,
31 | help="The path of the JSON file of logs, generated by `fetch_logs.py`",
32 | )
33 | parser.add_argument(
34 | '--output_csv',
35 | type=str,
36 | required=True,
37 | help="The path of the CSV file of utterances this script will generate for intent recommendation",
38 | )
39 |
40 | # Optional arguments
41 | parser.add_argument(
42 | '--overwrite',
43 | type=bool,
44 | default=False,
45 | help="If overwrite filename if it exists",
46 | )
47 | parser.add_argument(
48 | '--deduplicate',
49 | action="store_true",
50 | help="If set, duplicate utterances are discarded when generating CSV",
51 | )
52 | parser.add_argument(
53 | '--min_length',
54 | type=int,
55 | default=3,
56 | help="Minimum number of tokens of a utterance in the generated CSV. Any utterance that has less than or "
57 | "equal to this number is discarded.",
58 | )
59 | parser.add_argument(
60 | '--max_length',
61 | type=int,
62 | default=20,
63 | help="Maximum number of tokens of a utterance in the generated CSV. Any utterance that has more than or "
64 | "equal to this number is discarded.",
65 | )
66 |
67 | return parser.parse_args()
68 |
69 |
70 | if __name__ == '__main__':
71 | args = parse_args()
72 | print(vars(args))
73 |
74 | logs = load_logs_from_file(filename=args.input_json,
75 | project=None)
76 |
77 | export_csv_for_intent_recommendation(logs,
78 | filename=args.output_csv,
79 | deduplicate=args.deduplicate,
80 | project=None,
81 | overwrite=args.overwrite,
82 | min_length=args.min_length,
83 | max_length=args.max_length)
84 |
--------------------------------------------------------------------------------
/src/assistant_improve_toolkit/fetch_logs.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import argparse
18 | from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
19 | from ibm_watson import AssistantV1
20 |
21 | from watson_assistant_func import get_logs
22 |
23 |
24 | def parse_args():
25 | parser = argparse.ArgumentParser(description='Script to fetch logs from Watson Assistant service.')
26 |
27 | # Required arguments
28 | parser.add_argument(
29 | "--url",
30 | type=str,
31 | required=True,
32 | help="Watson Asssistant Legacy V1 URLs, for example, https://api.us-east.assistant.watson.cloud.ibm.com."
33 | )
34 | parser.add_argument(
35 | "--version",
36 | type=str,
37 | required=True,
38 | help="API requests require a version parameter that takes a date in the format version=YYYY-MM-DD. When we "
39 | "change the API in a backwards-incompatible way, we release a new version date. "
40 | )
41 | parser.add_argument(
42 | "--apikey",
43 | type=str,
44 | required=True,
45 | help="The IAM token."
46 | )
47 |
48 | # Optional arguments
49 | parser.add_argument(
50 | "--workspace_id",
51 | type=str,
52 | default='',
53 | help="To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill "
54 | "ID. The values can be found on the View API Details page. If you are using versioning in Watson "
55 | "Assistant, this ID represents the Development version of your skill definition.",
56 | )
57 | parser.add_argument(
58 | "--skill_id",
59 | type=str,
60 | default='',
61 | help="To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill "
62 | "ID. The values can be found on the View API Details page. If you are using versioning in Watson "
63 | "Assistant, this ID represents the Development version of your skill definition.",
64 | )
65 | parser.add_argument(
66 | "--assistant_id",
67 | type=str,
68 | default='',
69 | help="To load the skill of an assistant in the next section, you need to provide Assistant ID. The values can "
70 | "be found on the View API Details page.",
71 | )
72 | parser.add_argument(
73 | '--filters',
74 | default=[],
75 | nargs='*',
76 | help="List of filters (string), separated by space. For example, '--filters language::en "
77 | "meta.summary.input_text_length_i>0 response_timestamp>=2020-03-01'",
78 | )
79 | parser.add_argument(
80 | '--num_logs',
81 | type=int,
82 | default=20000,
83 | help="Number of logs to retrieve (default=20000)",
84 | )
85 | parser.add_argument(
86 | '--output_json',
87 | type=str,
88 | default=None,
89 | help="If output_json is set, logs will be saved to filename as a JSON file",
90 | )
91 | parser.add_argument(
92 | '--overwrite',
93 | type=bool,
94 | default=False,
95 | help="If overwrite filename if it exists",
96 | )
97 |
98 | return parser.parse_args()
99 |
100 |
101 | if __name__ == '__main__':
102 | args = parse_args()
103 | print(vars(args))
104 |
105 | authenticator = IAMAuthenticator(args.apikey)
106 | sdk_object = AssistantV1(version=args.version, authenticator=authenticator)
107 | sdk_object.set_service_url(args.url)
108 |
109 | assistant_information = {'workspace_id': args.workspace_id,
110 | 'skill_id': args.skill_id,
111 | 'assistant_id': args.assistant_id}
112 | print(assistant_information)
113 |
114 | logs = get_logs(sdk_object,
115 | assistant_information,
116 | num_logs=args.num_logs,
117 | filename=args.output_json,
118 | filters=args.filters,
119 | project=None,
120 | overwrite=args.overwrite,
121 | )
122 |
--------------------------------------------------------------------------------
/src/assistant_improve_toolkit/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.4.1'
--------------------------------------------------------------------------------
/src/assistant_improve_toolkit/watson_assistant_func.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import json
18 | import pandas as pd
19 | import os
20 | import csv
21 | import traceback
22 | import io
23 | from string import punctuation
24 |
25 | from ibm_watson import AssistantV1, AssistantV2
26 |
27 | EN_PUNCTUATION = punctuation + '’'
28 |
29 |
30 | def get_assistant_definition(sdk_object, assistant_info, project=None, overwrite=False, filename='assistant_definition'):
31 | workspace_id, assistant_id, skill_id = [assistant_info.get(k) for k in ['workspace_id', 'assistant_id', 'skill_id']]
32 |
33 | if len(workspace_id) > 0:
34 | filename += '_workspace_{}.json'.format(workspace_id)
35 | elif len(skill_id) > 0:
36 | filename += '_skill_{}.json'.format(skill_id)
37 | else:
38 | print('Please provide a valid Workspace ID or Skill ID!')
39 | return None
40 |
41 | if os.path.isfile(filename) and overwrite is False:
42 | # Get file from cloud object storage
43 | print('Reading from file:', filename)
44 | with open(filename) as data:
45 | data_json = json.load(data)
46 | # Read logs into dataframe
47 | print('Assistant definition is loaded into as a dataframe.')
48 | df_assistant = pd.json_normalize(data_json)
49 | return df_assistant
50 | else:
51 | if type(sdk_object) == AssistantV1:
52 | if len(workspace_id) > 0:
53 | # Fetch the workspace definition
54 | print('Loading workspace definition using workspace id: {}'.format(workspace_id))
55 | assistant_definition = sdk_object.get_workspace(workspace_id=workspace_id, export=True,
56 | include_audit=True).get_result()
57 | elif len(skill_id) > 0:
58 | # Fetch the skill definition
59 | print('Loading skill definition using skill id: {}'.format(skill_id))
60 | assistant_definition = sdk_object.get_workspace(workspace_id=skill_id, export=True,
61 | include_audit=True).get_result()
62 | else:
63 | print('Please provide a valid Workspace ID or Skill ID!')
64 | assistant_definition = None
65 | elif type(sdk_object) == AssistantV2:
66 | if len(assistant_id) > 0:
67 | print('Loading skill definition using assistant id: {}'.format(assistant_id))
68 | assistant_definition = None
69 | assistants = sdk_object.export_skills(assistant_id=assistant_id, include_audit=True).get_result()
70 | for assistant in assistants["assistant_skills"]:
71 | if assistant["type"] == "dialog":
72 | assistant_definition = assistant["workspace"]
73 |
74 | if assistant_definition is None:
75 | print('Your assistant does not support dialog')
76 | else:
77 | print('Please provide a valid Assistant ID!')
78 | assistant_definition = None
79 | else:
80 | print("Please provide a valid watson sdk object")
81 | assistant_definition = None
82 |
83 |
84 | if assistant_definition:
85 | # Store the workspace details in a dataframe
86 | df_assistant = pd.json_normalize(assistant_definition)
87 |
88 | # Set `overwrite` to True for exporting assistant definition to json file
89 | if not os.path.isfile(filename) or overwrite:
90 | if project is not None:
91 | with open(filename, 'wb') as fp:
92 | project.save_data(filename, json.dumps(assistant_definition), overwrite=True)
93 | # Display success message
94 | print('Definition {} exported as a project asset'.format(fp.name))
95 | else:
96 | with open(filename, 'w') as f:
97 | json.dump(assistant_definition, f)
98 | print('Definition {} exported'.format(filename))
99 |
100 | return df_assistant
101 | else:
102 | return None
103 |
104 | def _get_logs_from_v1_api(sdk_object, workspace_id, log_filter, num_logs):
105 | log_list = list()
106 | try:
107 | current_cursor = None
108 | while num_logs > 0:
109 | if len(workspace_id) > 0:
110 | logs_response = sdk_object.list_logs(
111 | workspace_id=workspace_id,
112 | page_limit=500,
113 | cursor=current_cursor,
114 | filter=log_filter
115 | ).get_result()
116 | else:
117 | logs_response = sdk_object.list_all_logs(
118 | page_limit=500,
119 | cursor=current_cursor,
120 | filter=log_filter
121 | ).get_result()
122 | min_num = min(num_logs, len(logs_response['logs']))
123 | log_list.extend(logs_response['logs'][:min_num])
124 | print('\r{} logs retrieved'.format(len(log_list)), end='')
125 | num_logs = num_logs - min_num
126 | current_cursor = None
127 | # Check if there is another page of logs to be fetched
128 | if 'pagination' in logs_response:
129 | # Get the url from which logs are to fetched
130 | if 'next_cursor' in logs_response['pagination']:
131 | current_cursor = logs_response['pagination']['next_cursor']
132 | else:
133 | break
134 | except Exception as ex:
135 | traceback.print_tb(ex.__traceback__)
136 | raise RuntimeError("Error getting logs using API. Please check if URL/credentials are correct.")
137 |
138 | return log_list
139 |
140 |
141 | def _get_logs_from_v2_api(sdk_object, environment_id, log_filter, num_logs):
142 | log_list = list()
143 | try:
144 | current_cursor = None
145 | while num_logs > 0:
146 | logs_response = sdk_object.list_logs(
147 | assistant_id=environment_id,
148 | page_limit=500,
149 | cursor=current_cursor,
150 | filter=log_filter
151 | ).get_result()
152 | min_num = min(num_logs, len(logs_response['logs']))
153 | log_list.extend(logs_response['logs'][:min_num])
154 | print('\r{} logs retrieved'.format(len(log_list)), end='')
155 | num_logs = num_logs - min_num
156 | current_cursor = None
157 | # Check if there is another page of logs to be fetched
158 | if 'pagination' in logs_response:
159 | # Get the url from which logs are to fetched
160 | if 'next_cursor' in logs_response['pagination']:
161 | current_cursor = logs_response['pagination']['next_cursor']
162 | else:
163 | break
164 | except Exception as ex:
165 | traceback.print_tb(ex.__traceback__)
166 | raise RuntimeError("Error getting logs using API. Please check if URL/credentials are correct.")
167 |
168 | return log_list
169 |
170 |
171 | def get_logs(sdk_v1_object, sdk_v2_object, assistant_info, num_logs, filename, filters=None, project=None,
172 | overwrite=False, version=1):
173 | """This function calls Watson Assistant API to retrieve logs, using pagination if necessary.
174 | The goal is to retrieve utterances (user inputs) from the logs.
175 | Parameters
176 | ----------
177 | num_logs : int, the number of records to return in each page of results
178 | assistant_info : dict, containing information regarding sdk_object, assistant id, and name
179 | filters: string, a list of query filters
180 | overwrite: boolean, whether to reset log file
181 | project: project io of studio project
182 | filename: prefix of the name of the log file
183 | Returns
184 | ----------
185 | log_df : DataFrame of fetched logs
186 | """
187 | if filters is None:
188 | filters = []
189 |
190 | workspace_id, assistant_id, skill_id, environment_id = [assistant_info.get(k) for k in ['workspace_id', 'assistant_id', 'skill_id', 'environment_id']]
191 |
192 | if (workspace_id is None or len(workspace_id) == 0) \
193 | and (assistant_id is None or len(assistant_id) == 0) \
194 | and (skill_id is None or len(skill_id) == 0):
195 | print('Please provide a valid Workspace ID, Assistant ID, or Skill ID!')
196 | return None
197 |
198 | # check if filename exists before retrieving logs
199 | file_exist = False
200 | if filename:
201 | if project:
202 | for file in project.get_files():
203 | if file['name'] == filename:
204 | if not overwrite:
205 | print('Load logs from existing file {}, set overwrite=True to overwrite'.format(filename))
206 | return load_logs_from_file(filename, project)
207 | else:
208 | file_exist = True
209 |
210 | elif os.path.exists(filename):
211 | if not overwrite:
212 | print('Load logs from existing file {}, set overwrite=True to overwrite'.format(filename))
213 | return load_logs_from_file(filename, None)
214 | else:
215 | file_exist = True
216 | else:
217 | print('Please provide a valid filename.')
218 | return None
219 |
220 | if version == 1:
221 | # adding default filters based on assistant_id and workspace_id
222 | if assistant_id is not None and len(assistant_id) > 0:
223 | filters.append('request.context.system.assistant_id::{}'.format(assistant_id))
224 | if skill_id is not None and len(skill_id) > 0:
225 | filters.append('workspace_id::{}'.format(skill_id))
226 |
227 | logs = _get_logs_from_v1_api(sdk_object=sdk_v1_object,
228 | workspace_id=workspace_id,
229 | log_filter=','.join(filters),
230 | num_logs=num_logs)
231 | elif version == 2:
232 | logs = _get_logs_from_v2_api(sdk_object=sdk_v2_object,
233 | environment_id=environment_id,
234 | log_filter=','.join(filters),
235 | num_logs=num_logs)
236 | print('\nLoaded {} logs'.format(len(logs)))
237 |
238 | if not file_exist or overwrite:
239 | print('Saving {} logs into {}... '.format(len(logs), filename))
240 | if project:
241 | with open(filename, 'wb') as fp:
242 | project.save_data(filename, json.dumps(logs, indent=2), overwrite=overwrite)
243 | # Display success message
244 | print('File', fp.name, 'saved a project asset')
245 | else:
246 | with open(filename, 'w') as fp:
247 | json.dump(logs, fp, indent=2)
248 | print('File', fp.name, 'saved')
249 |
250 | return logs
251 |
252 |
253 | def load_logs_from_file(filename, project=None):
254 | print('Reading from file:', filename)
255 | logs = None
256 | if project:
257 | # Get file from cloud object storage
258 | data = project.get_file(filename).getvalue().decode('utf8')
259 | logs = json.loads(data)
260 | # Read logs into dataframe
261 | # log_df = pd.DataFrame.from_records(data_json)
262 | print('Loaded {} logs'.format(len(logs)))
263 | else:
264 | if not os.path.exists(filename) or not os.path.isfile(filename):
265 | raise ValueError('{} either does not exist or is a directory'.format(filename))
266 | else:
267 | with open(filename) as data:
268 | logs = json.load(data)
269 | print('Loaded {} logs'.format(len(logs)))
270 | return logs
271 |
272 |
273 | # From: https://github.ibm.com/watson-engagement-advisor/improve-recommendations-engine/blob/4c996b24bfcac4eb6ab6bbf39cf125cdf30b9027/src/main/python/cluster/utils.py#L44
274 | def sanitize_text(text, remove_punctuation=True, lower=True, tokenize=True):
275 | text = text.strip()
276 | if lower:
277 | text = text.lower()
278 | # if tokenize:
279 | # words = word_tokenize(text)
280 | # else:
281 | # words = text.split()
282 | # if remove_punctuation:
283 | # words = [word for word in words if word not in EN_PUNCTUATION]
284 | # return ' '.join(words)
285 | if remove_punctuation:
286 | text = text.translate(str.maketrans('', '', EN_PUNCTUATION))
287 | return text
288 |
289 |
290 | def export_csv_for_intent_recommendation(logs,
291 | filename,
292 | deduplicate=True,
293 | project=None,
294 | overwrite=False,
295 | min_length=3,
296 | max_length=20):
297 |
298 | messages = [sanitize_text(l['request']['input']['text']) for l in logs]
299 | messages = filter(lambda m: min_length < len(m.split()) < max_length, messages)
300 | if deduplicate:
301 | messages = [[m] for m in set(messages)]
302 | else:
303 | messages = [[m] for m in messages]
304 | print('\nExporting {} messages into CSV...'.format(len(messages)))
305 |
306 | if project:
307 | with open(filename, 'wb') as fp:
308 | data = io.StringIO()
309 | writer = csv.writer(data, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
310 | writer.writerows(messages)
311 | project.save_data(filename, data.getvalue(), overwrite=overwrite)
312 | data.close()
313 | # Display success message
314 | print('File', fp.name, 'saved a project asset')
315 | else:
316 | with open(filename, 'w') as f:
317 | writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
318 | writer.writerows(messages)
319 | print('File', f.name, 'saved')
320 |
321 | return messages
--------------------------------------------------------------------------------
/src/main/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/src/main/__init__.py
--------------------------------------------------------------------------------
/src/main/css/custom.css:
--------------------------------------------------------------------------------
1 |
14 |
--------------------------------------------------------------------------------
/src/main/css/custom_jupyter.css:
--------------------------------------------------------------------------------
1 | /**
2 | * (C) Copyright IBM Corp. 2019, 2020.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 |
--------------------------------------------------------------------------------
/src/main/css/custom_watson_studio.css:
--------------------------------------------------------------------------------
1 | /**
2 | * (C) Copyright IBM Corp. 2019, 2020.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 |
30 |
--------------------------------------------------------------------------------
/src/main/python/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
--------------------------------------------------------------------------------
/src/main/python/cos_op.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import datetime
18 | from requests.utils import quote
19 | import hashlib
20 | import hmac
21 | import pandas as pd
22 | from contextlib import closing
23 | from io import BytesIO
24 | import numpy as np
25 |
26 |
27 | def get_hash(key, msg):
28 | """Generates keyed-hash for a message using HMAC
29 | Parameters
30 | ----------
31 | key: The starting key for the hash.
32 | msg: message value that will be hashed
33 | """
34 | return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()
35 |
36 |
37 | def create_signature_key(key, datestamp, region, service):
38 | """Create a signing key based on AWS signature version 4
39 | Parameters
40 | ----------
41 | key: Secret access key
42 | datestamp: date and timestamp
43 | region: service region
44 | service: service name
45 | """
46 | key_date = get_hash(('AWS4' + key).encode('utf-8'), datestamp)
47 | key_region = get_hash(key_date, region)
48 | key_service = get_hash(key_region, service)
49 | key_signing = get_hash(key_service, 'aws4_request')
50 | return key_signing
51 |
52 |
53 | def generate_link(filename, project_io, expiration):
54 | """Generate a signing downloadable link of a file
55 | Parameters
56 | ----------
57 | filename: file name
58 | project_io: Watson Studio project io instance
59 | expiration: expiration time in seconds
60 | """
61 | region = ''
62 | http_method = 'GET'
63 | endpoint = project_io.get_storage_metadata()['properties']['endpoint_url']
64 |
65 | cur_time = datetime.datetime.utcnow()
66 | timestamp = cur_time.strftime('%Y%m%dT%H%M%SZ')
67 | datestamp = cur_time.strftime('%Y%m%d')
68 |
69 | standardized_querystring = ('X-Amz-Algorithm=AWS4-HMAC-SHA256' +
70 | '&X-Amz-Credential=' +
71 | project_io.get_storage_metadata()['properties']['credentials']['editor'][
72 | 'access_key_id'] + '/' + datestamp + '/' + region +
73 | '/s3/aws4_request' +
74 | '&X-Amz-Date=' + timestamp +
75 | '&X-Amz-Expires=' + str(expiration) +
76 | '&X-Amz-SignedHeaders=host')
77 |
78 | standardized_querystring_url_encoded = quote(standardized_querystring, safe='&=')
79 |
80 | standardized_resource = '/' + project_io.get_storage_metadata()['properties']['bucket_name'] + '/' + filename
81 |
82 | payload_hash = 'UNSIGNED-PAYLOAD'
83 | standardized_headers = 'host:' + project_io.get_storage_metadata()['properties']['endpoint_url'].replace('https://',
84 | '')
85 | signed_headers = 'host'
86 |
87 | standardized_request = (http_method + '\n' +
88 | standardized_resource + '\n' +
89 | standardized_querystring_url_encoded + '\n' +
90 | standardized_headers + '\n' +
91 | '\n' +
92 | signed_headers + '\n' +
93 | payload_hash)
94 |
95 | # assemble string-to-sign
96 | hashing_algorithm = 'AWS4-HMAC-SHA256'
97 | credential_scope = datestamp + '/' + region + '/' + 's3' + '/' + 'aws4_request'
98 | sts = (hashing_algorithm + '\n' +
99 | timestamp + '\n' +
100 | credential_scope + '\n' +
101 | hashlib.sha256(standardized_request.encode('utf-8')).hexdigest())
102 |
103 | # generate the signature
104 | signature_key = create_signature_key(
105 | project_io.get_storage_metadata()['properties']['credentials']['editor']['secret_access_key'], datestamp,
106 | region, 's3')
107 | signature = hmac.new(signature_key,
108 | sts.encode('utf-8'),
109 | hashlib.sha256).hexdigest()
110 |
111 | # create and send the request
112 | request_url = (endpoint + '/' +
113 | project_io.get_storage_metadata()['properties']['bucket_name'] + '/' +
114 | filename + '?' +
115 | standardized_querystring_url_encoded +
116 | '&X-Amz-Signature=' +
117 | signature)
118 | return request_url
119 |
120 |
121 | def generate_excel_measure(dataframe_list, sheet_name_list, filename, project_io):
122 | """Generate a formatted excel file given a list of dataframes for measure notebook
123 | Parameters
124 | ----------
125 | dataframe_list: a list of dataframes
126 | sheet_name_list: a list of sheet names
127 | filename: output file name
128 | project_io: Watson Studio project io instance
129 | """
130 | with closing(BytesIO()) as output:
131 | writer = pd.ExcelWriter(output, engine='xlsxwriter', options={'remove_timezone': True})
132 | workbook = writer.book
133 | data_format1 = workbook.add_format({'bg_color': '#BBCCE2'})
134 | data_format2 = workbook.add_format({'bg_color': '#DEE6EF'})
135 |
136 | format_header = workbook.add_format({'text_wrap': True})
137 |
138 | workbook.formats[0].set_font_size(15)
139 | for df, name in zip(dataframe_list, sheet_name_list):
140 | df.to_excel(writer, sheet_name=name)
141 | worksheet = writer.sheets[name]
142 | worksheet.set_row(0, 30, format_header)
143 | worksheet.set_column('A:A', 5)
144 | worksheet.set_column('B:B', 30)
145 | worksheet.set_column('C:C', 30)
146 | worksheet.set_column('D:D', 15)
147 | worksheet.set_column('F:G', 35)
148 | worksheet.set_column('H:AH', 20)
149 | for row in range(1, len(df) + 1, 2):
150 | worksheet.set_row(row, cell_format=data_format1)
151 | worksheet.set_row(row + 1, cell_format=data_format2)
152 | writer.save()
153 | if project_io is not None:
154 | project_io.save_data(filename, output.getvalue(), overwrite=True)
155 | else:
156 | with open(filename, 'wb') as out:
157 | out.write(output.getvalue())
158 |
159 |
160 | def generate_excel_effectiveness(dataframe_list, sheet_name_list, filename, project_io):
161 | """Generate a formatted excel file given a list of dataframes for effectiveness notebook
162 | Parameters
163 | ----------
164 | dataframe_list: a list of dataframes
165 | sheet_name_list: a list of sheet names
166 | filename: output file name
167 | project_io: Watson Studio project io instance
168 | """
169 | with closing(BytesIO()) as output:
170 | writer = pd.ExcelWriter(output, engine='xlsxwriter', options={'remove_timezone': True})
171 | workbook = writer.book
172 | data_format1 = workbook.add_format({'bg_color': '#BBCCE2'})
173 | data_format2 = workbook.add_format({'bg_color': '#DEE6EF'})
174 |
175 | format_header = workbook.add_format({'text_wrap': True})
176 |
177 | workbook.formats[0].set_font_size(15)
178 | for df, name in zip(dataframe_list, sheet_name_list):
179 | df.to_excel(writer, sheet_name=name)
180 | worksheet = writer.sheets[name]
181 | worksheet.set_row(0, 20, format_header)
182 | worksheet.set_column('A:A', 5)
183 | worksheet.set_column('B:D', 30)
184 | for row in range(1, len(df) + 1, 2):
185 | worksheet.set_row(row, cell_format=data_format1)
186 | worksheet.set_row(row + 1, cell_format=data_format2)
187 | writer.save()
188 | if project_io is not None:
189 | project_io.save_data(filename, output.getvalue(), overwrite=True)
190 | else:
191 | with open(filename, 'wb') as out:
192 | out.write(output.getvalue())
193 |
194 |
195 | def export_result_excel(df_effective, sample_size=100, project_io=None):
196 | if df_effective.size == 0:
197 | print('No ineffective conversations found in logs')
198 | return
199 | # Copy the effective dataframe
200 | df_excel = df_effective.copy(deep=True)
201 | # Rename columns to generate excel
202 | df_excel = df_excel.rename(columns={'log_id': 'Log ID', 'response.context.conversation_id': 'Conversation ID',
203 | 'response.timestamp': 'Response Timestamp',
204 | 'request_input': 'Utterance Text',
205 | 'response_text': 'Response Text',
206 | 'response.top_intent_intent': 'Detected top intent',
207 | 'response.top_intent_confidence': 'Detected top intent confidence',
208 | 'Intent 2 intent': 'Intent 2', 'Intent 2 confidence': 'Intent 2 Confidence',
209 | 'Intent 3 intent': 'Intent 3', 'Intent 3 confidence': 'Intent 3 Confidence',
210 | 'response_entities': 'Detected Entities',
211 | 'Escalated_conversation': 'Escalated conversation?',
212 | 'Covered': 'Covered?', 'Not Covered cause': 'Not covered - cause',
213 | 'response.output.nodes_visited_s': 'Dialog Flow',
214 | 'response_dialog_stack': 'Dialog stack',
215 | 'response_dialog_request_counter': 'Dialog request counter',
216 | 'response_dialog_turn_counter': 'Dialog turn counter'
217 | })
218 |
219 | existing_columns = ['Log ID', 'Conversation ID', 'Response Timestamp', 'Customer ID (must retain for delete)',
220 | 'Utterance Text', 'Response Text', 'Detected top intent', 'Detected top intent confidence',
221 | 'Intent 2', 'Intent 2 Confidence', 'Confidence gap (between 1 and 2)', 'Intent 3',
222 | 'Intent 3 Confidence',
223 | 'Detected Entities', 'Escalated conversation?', 'Covered?', 'Not covered - cause',
224 | 'Dialog Flow', 'Dialog stack', 'Dialog request counter', 'Dialog turn counter']
225 | # Add new columns for annotating problematic logs
226 | new_columns_excel = ['Response Correct (Y/N)?', 'Response Helpful (Y/N)?',
227 | 'Root cause (Problem with Intent, entity, dialog)',
228 | 'Wrong intent? If yes, put the correct intent. Otherwise leave it blank',
229 | 'New intent needed? (A new intent. Otherwise leave blank)',
230 | 'Add Utterance to Training data (Y/N)',
231 | 'Entity missed? If yes, put the missed entity value. Otherwise leave it blank',
232 | 'New entity needed? If yes, put the entity name',
233 | 'New entity value? If yes, put the entity value', 'New dialog logic needed?',
234 | 'Wrong dialog node? If yes, put the node name. Otherwise leave it blank',
235 | 'No dialog node triggered']
236 |
237 | # Add the new columns to the dataframe
238 | df_excel = df_excel.reindex(columns=[*existing_columns, *new_columns_excel], fill_value='')
239 |
240 | # Set output filename
241 | all_file = 'All.xlsx'
242 | escalated_sample_file = 'Escalated_sample.xlsx'
243 | non_escalated_sample_file = 'NotEscalated_sample.xlsx'
244 |
245 | # Remove timezone infomation
246 | df_excel['Response Timestamp'] = df_excel['Response Timestamp'].dt.tz_localize(None)
247 |
248 | # Prepare dataframe containing all utterances sorted by Conversation ID and Response Timestamp
249 | df_all = df_excel.sort_values(by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True)
250 |
251 | # Prepare dataframe containing covered utterances sorted by Conversation ID and Response Timestamp
252 | df_covered = df_excel[df_excel['Covered?']==True].sort_values(
253 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True)
254 |
255 | # Prepare dataframe containing not covered utterances sorted by Conversation ID and Response Timestamp
256 | df_not_covered = df_excel[df_excel['Covered?']==False].sort_values(
257 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True)
258 |
259 | # Convert to Excel format and save to local or upload to COS if project_io is provided
260 | generate_excel_measure([df_all, df_covered, df_not_covered],
261 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'], filename=all_file,
262 | project_io=project_io)
263 |
264 | # Prepare dataframe containing escalated conversations
265 | df_escalated_true = df_excel.loc[df_excel['Escalated conversation?']==True]
266 |
267 | # Sample escalated conversations
268 | if sample_size > 0 and len(df_escalated_true) > 0:
269 | # Get unique escalated conversation ids
270 | conversation_ids = df_escalated_true['Conversation ID'].unique()
271 | sampled_conversation_ids = np.random.choice(conversation_ids, sample_size)
272 | df_escalated_true = df_escalated_true[
273 | df_escalated_true['Conversation ID'].isin(sampled_conversation_ids)].sort_values(
274 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True)
275 |
276 | # Prepare dataframe containing covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp
277 | df_escalated_covered = df_escalated_true[df_escalated_true['Covered?']==True].reset_index(drop=True)
278 |
279 | # Prepare dataframe containing not covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp
280 | df_escalated_not_covered = df_escalated_true[df_escalated_true['Covered?']==False].reset_index(drop=True)
281 |
282 | # Covert to Excel format and upload to COS
283 | generate_excel_measure([df_escalated_true, df_escalated_covered, df_escalated_not_covered],
284 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'],
285 | filename=escalated_sample_file, project_io=project_io)
286 |
287 | # Prepare dataframe containing non-escalated conversations
288 | df_not_escalated = df_excel.loc[df_excel['Escalated conversation?']==False].reset_index(drop=True)
289 |
290 | # Sample escalated conversations
291 | if sample_size > 0:
292 | # Get unique non-escalated conversation ids
293 | conversation_ids = df_not_escalated['Conversation ID'].unique()
294 | sampled_conversation_ids = np.random.choice(conversation_ids, sample_size)
295 | df_not_escalated = df_not_escalated[
296 | df_not_escalated['Conversation ID'].isin(sampled_conversation_ids)].sort_values(
297 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True)
298 |
299 | # Prepare dataframe containing covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp
300 | df_not_escalated_covered = df_not_escalated[df_not_escalated['Covered?']==True].reset_index(drop=True)
301 |
302 | # Generate not escalated and not covered sample file
303 | df_not_escalated_not_covered = df_not_escalated[df_not_escalated['Covered?']==False].reset_index(drop=True)
304 |
305 | # Covert to Excel format and upload to COS
306 | generate_excel_measure([df_not_escalated, df_not_escalated_covered, df_not_escalated_not_covered],
307 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'],
308 | filename=non_escalated_sample_file, project_io=project_io)
309 |
--------------------------------------------------------------------------------
/src/main/python/export_csv_for_intent_recommendation.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import argparse
18 | from watson_assistant_func import load_logs_from_file
19 | from watson_assistant_func import export_csv_for_intent_recommendation
20 |
21 |
22 | def parse_args():
23 | parser = argparse.ArgumentParser(description='Generate intent recommendation CSV from logs JSON file for Watson '
24 | 'Assistant service.')
25 |
26 | # Required arguments
27 | parser.add_argument(
28 | '--input_json',
29 | type=str,
30 | required=True,
31 | help="The path of the JSON file of logs, generated by `fetch_logs.py`",
32 | )
33 | parser.add_argument(
34 | '--output_csv',
35 | type=str,
36 | required=True,
37 | help="The path of the CSV file of utterances this script will generate for intent recommendation",
38 | )
39 |
40 | # Optional arguments
41 | parser.add_argument(
42 | '--overwrite',
43 | type=bool,
44 | default=False,
45 | help="If overwrite filename if it exists",
46 | )
47 | parser.add_argument(
48 | '--deduplicate',
49 | action="store_true",
50 | help="If set, duplicate utterances are discarded when generating CSV",
51 | )
52 | parser.add_argument(
53 | '--min_length',
54 | type=int,
55 | default=3,
56 | help="Minimum number of tokens of a utterance in the generated CSV. Any utterance that has less than or "
57 | "equal to this number is discarded.",
58 | )
59 | parser.add_argument(
60 | '--max_length',
61 | type=int,
62 | default=20,
63 | help="Maximum number of tokens of a utterance in the generated CSV. Any utterance that has more than or "
64 | "equal to this number is discarded.",
65 | )
66 |
67 | return parser.parse_args()
68 |
69 |
70 | if __name__ == '__main__':
71 | args = parse_args()
72 | print(vars(args))
73 |
74 | logs = load_logs_from_file(filename=args.input_json,
75 | project=None)
76 |
77 | export_csv_for_intent_recommendation(logs,
78 | filename=args.output_csv,
79 | deduplicate=args.deduplicate,
80 | project=None,
81 | overwrite=args.overwrite,
82 | min_length=args.min_length,
83 | max_length=args.max_length)
84 |
--------------------------------------------------------------------------------
/src/main/python/fetch_logs.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import argparse
18 | from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
19 | from ibm_watson import AssistantV1
20 |
21 | from watson_assistant_func import get_logs
22 |
23 |
24 | def parse_args():
25 | parser = argparse.ArgumentParser(description='Script to fetch logs from Watson Assistant service.')
26 |
27 | # Required arguments
28 | parser.add_argument(
29 | "--url",
30 | type=str,
31 | required=True,
32 | help="Watson Asssistant Legacy V1 URLs, for example, https://api.us-east.assistant.watson.cloud.ibm.com."
33 | )
34 | parser.add_argument(
35 | "--version",
36 | type=str,
37 | required=True,
38 | help="API requests require a version parameter that takes a date in the format version=YYYY-MM-DD. When we "
39 | "change the API in a backwards-incompatible way, we release a new version date. "
40 | )
41 | parser.add_argument(
42 | "--apikey",
43 | type=str,
44 | required=True,
45 | help="The IAM token."
46 | )
47 |
48 | # Optional arguments
49 | parser.add_argument(
50 | "--workspace_id",
51 | type=str,
52 | default='',
53 | help="To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill "
54 | "ID. The values can be found on the View API Details page. If you are using versioning in Watson "
55 | "Assistant, this ID represents the Development version of your skill definition.",
56 | )
57 | parser.add_argument(
58 | "--skill_id",
59 | type=str,
60 | default='',
61 | help="To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill "
62 | "ID. The values can be found on the View API Details page. If you are using versioning in Watson "
63 | "Assistant, this ID represents the Development version of your skill definition.",
64 | )
65 | parser.add_argument(
66 | "--assistant_id",
67 | type=str,
68 | default='',
69 | help="To load the skill of an assistant in the next section, you need to provide Assistant ID. The values can "
70 | "be found on the View API Details page.",
71 | )
72 | parser.add_argument(
73 | '--filters',
74 | default=[],
75 | nargs='*',
76 | help="List of filters (string), separated by space. For example, '--filters language::en "
77 | "meta.summary.input_text_length_i>0 response_timestamp>=2020-03-01'",
78 | )
79 | parser.add_argument(
80 | '--num_logs',
81 | type=int,
82 | default=20000,
83 | help="Number of logs to retrieve (default=20000)",
84 | )
85 | parser.add_argument(
86 | '--output_json',
87 | type=str,
88 | default=None,
89 | help="If output_json is set, logs will be saved to filename as a JSON file",
90 | )
91 | parser.add_argument(
92 | '--overwrite',
93 | type=bool,
94 | default=False,
95 | help="If overwrite filename if it exists",
96 | )
97 |
98 | return parser.parse_args()
99 |
100 |
101 | if __name__ == '__main__':
102 | args = parse_args()
103 | print(vars(args))
104 |
105 | authenticator = IAMAuthenticator(args.apikey)
106 | sdk_object = AssistantV1(version=args.version, authenticator=authenticator)
107 | sdk_object.set_service_url(args.url)
108 |
109 | assistant_information = {'workspace_id': args.workspace_id,
110 | 'skill_id': args.skill_id,
111 | 'assistant_id': args.assistant_id}
112 | print(assistant_information)
113 |
114 | logs = get_logs(sdk_object,
115 | assistant_information,
116 | num_logs=args.num_logs,
117 | filename=args.output_json,
118 | filters=args.filters,
119 | project=None,
120 | overwrite=args.overwrite,
121 | )
122 |
--------------------------------------------------------------------------------
/src/main/python/watson_assistant_func.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import json
18 | import pandas as pd
19 | import os
20 | import csv
21 | import traceback
22 | import io
23 | from string import punctuation
24 |
25 |
26 | EN_PUNCTUATION = punctuation + '’'
27 |
28 |
29 | def get_assistant_definition(sdk_object, assistant_info, project=None, overwrite=False, filename='assistant_definition'):
30 | workspace_id, assistant_id, skill_id = [assistant_info.get(k) for k in ['workspace_id', 'assistant_id', 'skill_id']]
31 |
32 | if len(workspace_id) > 0:
33 | filename += '_workspace_{}.json'.format(workspace_id)
34 | elif len(skill_id) > 0:
35 | filename += '_skill_{}.json'.format(skill_id)
36 | else:
37 | print('Please provide a valid Workspace ID or Skill ID!')
38 | return None
39 |
40 | if os.path.isfile(filename) and overwrite is False:
41 | # Get file from cloud object storage
42 | print('Reading from file:', filename)
43 | with open(filename) as data:
44 | data_json = json.load(data)
45 | # Read logs into dataframe
46 | print('Assistant definition is loaded into as a dataframe.')
47 | df_assistant = pd.json_normalize(data_json)
48 | return df_assistant
49 | else:
50 | if len(workspace_id) > 0:
51 | # Fetch the workspace definition
52 | print('Loading workspace definition using workspace id: {}'.format(workspace_id))
53 | assistant_definition = sdk_object.get_workspace(workspace_id=workspace_id, export=True,
54 | include_audit=True).get_result()
55 | elif len(skill_id) > 0:
56 | # Fetch the skill definition
57 | print('Loading skill definition using skill id: {}'.format(skill_id))
58 | assistant_definition = sdk_object.get_workspace(workspace_id=skill_id, export=True,
59 | include_audit=True).get_result()
60 | else:
61 | print('Please provide a valid Workspace ID or Skill ID!')
62 | assistant_definition = None
63 |
64 | if assistant_definition:
65 | # Store the workspace details in a dataframe
66 | df_assistant = pd.json_normalize(assistant_definition)
67 |
68 | # Set `overwrite` to True for exporting assistant definition to json file
69 | if not os.path.isfile(filename) or overwrite:
70 | if project is not None:
71 | with open(filename, 'wb') as fp:
72 | project.save_data(filename, json.dumps(assistant_definition), overwrite=True)
73 | # Display success message
74 | print('Definition {} exported as a project asset'.format(fp.name))
75 | else:
76 | with open(filename, 'w') as f:
77 | json.dump(assistant_definition, f)
78 | print('Definition {} exported'.format(filename))
79 |
80 | return df_assistant
81 | else:
82 | return None
83 |
84 |
85 | def _get_logs_from_api(sdk_object, workspace_id, log_filter, num_logs):
86 | log_list = list()
87 | try:
88 | current_cursor = None
89 | while num_logs > 0:
90 | if len(workspace_id) > 0:
91 | logs_response = sdk_object.list_logs(
92 | workspace_id=workspace_id,
93 | page_limit=500,
94 | cursor=current_cursor,
95 | filter=log_filter
96 | ).get_result()
97 | else:
98 | logs_response = sdk_object.list_all_logs(
99 | page_limit=500,
100 | cursor=current_cursor,
101 | filter=log_filter
102 | ).get_result()
103 | min_num = min(num_logs, len(logs_response['logs']))
104 | log_list.extend(logs_response['logs'][:min_num])
105 | print('\r{} logs retrieved'.format(len(log_list)), end='')
106 | num_logs = num_logs - min_num
107 | current_cursor = None
108 | # Check if there is another page of logs to be fetched
109 | if 'pagination' in logs_response:
110 | # Get the url from which logs are to fetched
111 | if 'next_cursor' in logs_response['pagination']:
112 | current_cursor = logs_response['pagination']['next_cursor']
113 | else:
114 | break
115 | except Exception as ex:
116 | traceback.print_tb(ex.__traceback__)
117 | raise RuntimeError("Error getting logs using API. Please check if URL/credentials are correct.")
118 |
119 | return log_list
120 |
121 |
122 | def get_logs(sdk_object, assistant_info, num_logs, filename, filters=None, project=None, overwrite=False):
123 | """This function calls Watson Assistant API to retrieve logs, using pagination if necessary.
124 | The goal is to retrieve utterances (user inputs) from the logs.
125 | Parameters
126 | ----------
127 | num_logs : int, the number of records to return in each page of results
128 | assistant_info : dict, containing information regarding sdk_object, assistant id, and name
129 | filters: string, a list of query filters
130 | overwrite: boolean, whether to reset log file
131 | project: project io of studio project
132 | filename: prefix of the name of the log file
133 | Returns
134 | ----------
135 | log_df : DataFrame of fetched logs
136 | """
137 | if filters is None:
138 | filters = []
139 |
140 | workspace_id, assistant_id, skill_id = [assistant_info.get(k) for k in ['workspace_id', 'assistant_id', 'skill_id']]
141 |
142 | if (workspace_id is None or len(workspace_id) == 0) \
143 | and (assistant_id is None or len(assistant_id) == 0) \
144 | and (skill_id is None or len(skill_id) == 0):
145 | print('Please provide a valid Workspace ID, Assistant ID, or Skill ID!')
146 | return None
147 |
148 | # check if filename exists before retrieving logs
149 | file_exist = False
150 | if filename:
151 | if project:
152 | for file in project.get_files():
153 | if file['name'] == filename:
154 | if not overwrite:
155 | print('Load logs from existing file {}, set overwrite=True to overwrite'.format(filename))
156 | return load_logs_from_file(filename, project)
157 | else:
158 | file_exist = True
159 |
160 | elif os.path.exists(filename):
161 | if not overwrite:
162 | print('Load logs from existing file {}, set overwrite=True to overwrite'.format(filename))
163 | return load_logs_from_file(filename, None)
164 | else:
165 | file_exist = True
166 | else:
167 | print('Please provide a valid filename.')
168 | return None
169 |
170 | # adding default filters based on assistant_id and workspace_id
171 | if assistant_id is not None and len(assistant_id) > 0:
172 | filters.append('request.context.system.assistant_id::{}'.format(assistant_id))
173 | if skill_id is not None and len(skill_id) > 0:
174 | filters.append('workspace_id::{}'.format(skill_id))
175 |
176 | logs = _get_logs_from_api(sdk_object=sdk_object,
177 | workspace_id=workspace_id,
178 | log_filter=','.join(filters),
179 | num_logs=num_logs)
180 | print('\nLoaded {} logs'.format(len(logs)))
181 |
182 | if not file_exist or overwrite:
183 | print('Saving {} logs into JSON file... '.format(filename))
184 | if project:
185 | with open(filename, 'wb') as fp:
186 | project.save_data(filename, json.dumps(logs, indent=2), overwrite=overwrite)
187 | # Display success message
188 | print('File', fp.name, 'saved a project asset')
189 | else:
190 | with open(filename, 'w') as fp:
191 | json.dump(logs, fp, indent=2)
192 | print('File', fp.name, 'saved')
193 |
194 | return logs
195 |
196 |
197 | def load_logs_from_file(filename, project=None):
198 | print('Reading from file:', filename)
199 | logs = None
200 | if project:
201 | # Get file from cloud object storage
202 | data = project.get_file(filename).getvalue().decode('utf8')
203 | logs = json.loads(data)
204 | # Read logs into dataframe
205 | # log_df = pd.DataFrame.from_records(data_json)
206 | print('Loaded {} logs'.format(len(logs)))
207 | else:
208 | if not os.path.exists(filename) or not os.path.isfile(filename):
209 | raise ValueError('{} either does not exist or is a directory'.format(filename))
210 | else:
211 | with open(filename) as data:
212 | logs = json.load(data)
213 | print('Loaded {} logs'.format(len(logs)))
214 | return logs
215 |
216 |
217 | # From: https://github.ibm.com/watson-engagement-advisor/improve-recommendations-engine/blob/4c996b24bfcac4eb6ab6bbf39cf125cdf30b9027/src/main/python/cluster/utils.py#L44
218 | def sanitize_text(text, remove_punctuation=True, lower=True, tokenize=True):
219 | text = text.strip()
220 | if lower:
221 | text = text.lower()
222 | # if tokenize:
223 | # words = word_tokenize(text)
224 | # else:
225 | # words = text.split()
226 | # if remove_punctuation:
227 | # words = [word for word in words if word not in EN_PUNCTUATION]
228 | # return ' '.join(words)
229 | if remove_punctuation:
230 | text = text.translate(str.maketrans('', '', EN_PUNCTUATION))
231 | return text
232 |
233 |
234 | def export_csv_for_intent_recommendation(logs,
235 | filename,
236 | deduplicate=True,
237 | project=None,
238 | overwrite=False,
239 | min_length=3,
240 | max_length=20):
241 |
242 | messages = [sanitize_text(l['request']['input']['text']) for l in logs]
243 | messages = filter(lambda m: min_length < len(m.split()) < max_length, messages)
244 | if deduplicate:
245 | messages = [[m] for m in set(messages)]
246 | else:
247 | messages = [[m] for m in messages]
248 | print('\nExporting {} messages into CSV...'.format(len(messages)))
249 |
250 | if project:
251 | with open(filename, 'wb') as fp:
252 | data = io.StringIO()
253 | writer = csv.writer(data, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
254 | writer.writerows(messages)
255 | project.save_data(filename, data.getvalue(), overwrite=overwrite)
256 | data.close()
257 | # Display success message
258 | print('File', fp.name, 'saved a project asset')
259 | else:
260 | with open(filename, 'w') as f:
261 | writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
262 | writer.writerows(messages)
263 | print('File', f.name, 'saved')
264 |
265 | return messages
--------------------------------------------------------------------------------
/src/main/python/watson_assistant_func_skip.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import re
18 | import json
19 | import pandas as pd
20 | import time
21 | import os
22 |
23 |
24 | def get_logs(num_logs, log_list, project_creds, log_filter=None):
25 | """This function calls Watson Assistant API to retrieve logs, using pagination if necessary.
26 | The goal is to retrieve utterances (user inputs) from the logs.
27 | Parameters
28 | ----------
29 | num_logs : int, the number of records to return in each page of results.
30 | log_list : list, a list to store returned logs
31 | project_creds : dict, containing information on project, cos, credentials, assistant, workspace id and name
32 | log_filter: string, a cacheable parameter that limits the results to those matching the specified filter.
33 |
34 | Returns
35 | ----------
36 | log_df : Dataframe of fetched logs
37 | """
38 | # Unpack the keys from the dictionary to individual variables
39 | project, sdk_object, ws_id, ws_name = [project_creds.get(k) for k in ['project', 'sdk_object', 'ws_id', 'ws_name']]
40 | # Create file name by combining workspace name and filter
41 | filename = 'logs_' + ws_id + '_' + str(num_logs)
42 |
43 | # Remove all special characters from file name
44 | filename = re.sub(r'[^a-zA-Z0-9_\- .]', '', filename) + '.json'
45 |
46 | if [file['name'] for file in project.get_files() if file['name'] == filename]:
47 | # Get file from cloud object storage
48 | print('Reading from file:', filename)
49 | data = project.get_file(filename).getvalue().decode('utf8')
50 | data_json = json.loads(data)
51 | # Read logs into dataframe
52 | log_df = pd.DataFrame.from_records(data_json)
53 | # Display success message and return the dataframe
54 | print('Workspace logs loaded successfully with', log_df.shape[0], 'records')
55 | return log_df
56 | else:
57 | try:
58 | current_cursor = None
59 | while num_logs > 0:
60 | time.sleep(0.5) # allow for a short break to avoid reaching rate limit
61 | logs_response = sdk_object.list_logs(
62 | workspace_id=ws_id,
63 | page_limit=500,
64 | cursor=current_cursor,
65 | filter=log_filter
66 | )
67 | min_num = min(num_logs, len(logs_response['logs']))
68 | log_list.extend(logs_response['logs'][:min_num])
69 | print('\r{} logs retrieved'.format(len(log_list)), end='')
70 | num_logs = num_logs - min_num
71 |
72 | current_cursor = None
73 | # Check if there is another page of logs to be fetched
74 | if 'pagination' in logs_response:
75 | # Get the url from which logs are to fetched
76 | if 'next_cursor' in logs_response['pagination']:
77 | current_cursor = logs_response['pagination']['next_cursor']
78 | else:
79 | break
80 |
81 | except Exception as ex:
82 | print(ex)
83 | finally:
84 | log_df = pd.DataFrame(log_list)
85 | return log_df
86 |
87 |
88 | def get_logs_jupyter(num_logs, log_list, workspace_creds, log_filter=None):
89 | """This function calls Watson Assistant API to retrieve logs, using pagination if necessary.
90 | The goal is to retrieve utterances (user inputs) from the logs.
91 | Parameters
92 | ----------
93 | num_logs : int, the number of records to return in each page of results.
94 | log_list : list, a list to store returned logs
95 | workspace_creds : dict, containing information regarding sdk_object, workspace id, and name
96 | log_filter: string, a cacheable parameter that limits the results to those matching the specified filter.
97 |
98 | Returns
99 | ----------
100 | log_df : Dataframe of fetched logs
101 | """
102 | # Unpack the keys from the dictionary to individual variables
103 | sdk_object, ws_id, ws_name = [workspace_creds.get(k) for k in ['sdk_object', 'ws_id', 'ws_name']]
104 | # Create file name by combining workspace name and filter
105 | filename = 'logs_' + ws_id + '_' + str(num_logs)
106 |
107 | # Remove all special characters from file name
108 | filename = re.sub(r'[^a-zA-Z0-9_\- .]', '', filename) + '.json'
109 |
110 | if os.path.isfile(filename):
111 | # Get file from cloud object storage
112 | print('Reading from file:', filename)
113 | with open(filename) as data:
114 | data_json = json.load(data)
115 | # Read logs into dataframe
116 | log_df = pd.DataFrame.from_records(data_json)
117 | # Display success message and return the dataframe
118 | print('Workspace logs loaded successfully with', log_df.shape[0], 'records')
119 | return log_df
120 | else:
121 | try:
122 | current_cursor = None
123 | while num_logs > 0:
124 | logs_response = sdk_object.list_logs(
125 | workspace_id=ws_id,
126 | page_limit=500,
127 | cursor=current_cursor,
128 | filter=log_filter
129 | )
130 | min_num = min(num_logs, len(logs_response['logs']))
131 | log_list.extend(logs_response['logs'][:min_num])
132 | print('\r{} logs retrieved'.format(len(log_list)), end='')
133 | num_logs = num_logs - min_num
134 | current_cursor = None
135 | # Check if there is another page of logs to be fetched
136 | if 'pagination' in logs_response:
137 | # Get the url from which logs are to fetched
138 | if 'next_cursor' in logs_response['pagination']:
139 | current_cursor = logs_response['pagination']['next_cursor']
140 | else:
141 | break
142 |
143 | except Exception as ex:
144 | print(ex)
145 | finally:
146 | log_df = pd.DataFrame(log_list)
147 | return log_df
148 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
--------------------------------------------------------------------------------
/test/assistant_improve_toolkit/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | # (C) Copyright IBM Corp. 2019, 2020.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
--------------------------------------------------------------------------------
/test/assistant_improve_toolkit/test_computation_func.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.insert(0, './src')
3 |
4 | from assistant_improve_toolkit.computation_func import intersection
5 |
6 |
7 | def test_intersection():
8 | list1 = ['node_1', 'node_2']
9 | list2 = ['node_1', 'node_2', 'node_3']
10 |
11 | overlap = intersection(list1, list2)
12 |
13 | assert overlap == ['node_1', 'node_2']
14 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = lint, py37, py38
3 |
4 | [testenv:lint]
5 | basepython = python3.8
6 | deps = pylint
7 | commands = pylint src test
8 |
9 | [testenv]
10 | passenv = TOXENV CI TRAVIS*
11 | commands =
12 | py.test --reruns 3 --cov=src
13 | codecov -e TOXENV
14 | deps =
15 | -r{toxinidir}/requirements.txt
16 | -r{toxinidir}/requirements_dev.txt
17 | usedevelop = True
18 | exclude = .venv,.git,.tox,docs
--------------------------------------------------------------------------------