├── .bumpversion.cfg ├── .github └── workflows │ ├── build.yml │ └── deploy.yml ├── .gitignore ├── .releaserc ├── .travis.yml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── notebook ├── Effectiveness Notebook.ipynb ├── IBM Watson Assistant Continuous Improvement Best Practices.pdf ├── Logs Notebook-cp4d.ipynb ├── Logs Notebook.ipynb ├── Measure Notebook-cp4d.ipynb ├── Measure Notebook.ipynb ├── README.md ├── data │ ├── annotation.xlsx │ ├── book_recommender_logs.gz │ ├── book_recommender_skill.json │ ├── sample_logs.json │ └── workspace.json └── imgs │ ├── analyze_process.png │ ├── box_zoom.png │ ├── click.png │ ├── effectiveness_overall.png │ ├── effort_computation.png │ ├── find_data_icon.png │ ├── measure_overall.png │ ├── measure_process.png │ ├── reset.png │ ├── save.png │ ├── suggestions.png │ └── wheel_zoom.png ├── package-lock.json ├── package.json ├── requirements.txt ├── requirements_dev.txt ├── setup.py ├── src ├── __init__.py ├── assistant_improve_toolkit │ ├── __init__.py │ ├── computation_func.py │ ├── cos_op.py │ ├── export_csv_for_intent_recommendation.py │ ├── fetch_logs.py │ ├── version.py │ ├── visualize_func.py │ └── watson_assistant_func.py └── main │ ├── __init__.py │ ├── css │ ├── custom.css │ ├── custom_jupyter.css │ └── custom_watson_studio.css │ └── python │ ├── __init__.py │ ├── computation_func.py │ ├── cos_op.py │ ├── export_csv_for_intent_recommendation.py │ ├── fetch_logs.py │ ├── visualize_func.py │ ├── watson_assistant_func.py │ └── watson_assistant_func_skip.py ├── test ├── __init__.py └── assistant_improve_toolkit │ ├── __init__.py │ └── test_computation_func.py └── tox.ini /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 1.4.1 3 | commit = True 4 | 5 | [bumpversion:file:src/assistant_improve_toolkit/version.py] 6 | search = __version__ = '{current_version}' 7 | replace = __version__ = '{new_version}' 8 | 9 | [bumpversion:file:setup.py] 10 | search = __version__ = '{current_version}' 11 | replace = __version__ = '{new_version}' 12 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support documentation. 4 | # This workflow will download a prebuilt Python version, install dependencies, build and deploy/publish a new release 5 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 6 | 7 | name: Build and Test 8 | 9 | on: 10 | pull_request: 11 | # Sequence of patterns matched against refs/heads 12 | branches: 13 | - master 14 | 15 | jobs: 16 | test: 17 | name: Build and Test 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | with: 23 | persist-credentials: false 24 | - name: Set up Python 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: '3.10' 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 32 | if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi 33 | - name: Test with pytest 34 | run: | 35 | pytest 36 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support documentation. 4 | # This workflow will download a prebuilt Python version, install dependencies, build and deploy/publish a new release 5 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 6 | 7 | name: Deploy and Publish 8 | 9 | on: 10 | push: 11 | branches: 12 | - master 13 | 14 | jobs: 15 | deploy: 16 | if: "!contains(github.event.head_commit.message, 'skip ci')" 17 | name: Deploy and Publish 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | with: 23 | persist-credentials: false 24 | - name: Set up Python 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: '3.10' 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 32 | if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi 33 | - name: Test with pytest 34 | run: | 35 | pytest 36 | - name: Setup Node 37 | uses: actions/setup-node@v1 38 | with: 39 | node-version: 18 40 | - name: Install Semantic Release dependencies 41 | run: | 42 | sudo apt-get install bumpversion 43 | npm install -g semantic-release 44 | npm install -g @semantic-release/changelog 45 | npm install -g @semantic-release/exec 46 | npm install -g @semantic-release/git 47 | npm install -g @semantic-release/github 48 | npm install -g @semantic-release/commit-analyzer 49 | npm install -g @semantic-release/release-notes-generator 50 | npm install -g semantic-release-pypi 51 | - name: Install setuptools 52 | run: python -m pip install --upgrade setuptools wheel twine 53 | - name: Publish to Git Releases and Tags 54 | run: npx semantic-release 55 | env: 56 | GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} 57 | NPM_TOKEN: ${{ secrets.NPM_TOKEN }} 58 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Java 2 | *.class 3 | 4 | build/ 5 | liberty/ 6 | target/ 7 | dist/ 8 | 9 | # Some MarkDown preview plugin generate .filename.html file that we want to ignore 10 | .*.html 11 | 12 | ### Gradle ### 13 | .gradle 14 | gradle-app.setting 15 | 16 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored) 17 | !gradle-wrapper.jar 18 | 19 | ### Eclipse ### 20 | .classpath 21 | .project 22 | .factorypath 23 | .settings/ 24 | .metadata 25 | bin/ 26 | logs/ 27 | 28 | # Ignore the service manifest project (subtree) files 29 | manifest/ 30 | 31 | ### OS X ### 32 | .DS_Store 33 | 34 | ### Intelli J 35 | .idea 36 | *.iws 37 | 38 | 39 | ### Generated files ### 40 | /manifest.yml 41 | generated-sources/ 42 | 43 | ### Vagrant ### 44 | .vagrant 45 | 46 | ### Python ### 47 | *.pyc 48 | .pydevproject 49 | __pycache__/ 50 | *.egg-info/ 51 | 52 | 53 | ### Log files ### 54 | *.log 55 | 56 | ### test coverage ### 57 | .coverage 58 | nosetests.xml 59 | 60 | ### test resources ### 61 | *.gz 62 | -------------------------------------------------------------------------------- /.releaserc: -------------------------------------------------------------------------------- 1 | { 2 | "debug": true, 3 | "plugins": [ 4 | "@semantic-release/commit-analyzer", 5 | "@semantic-release/release-notes-generator", 6 | "@semantic-release/changelog", 7 | "semantic-release-pypi", 8 | [ 9 | "@semantic-release/exec", 10 | { 11 | "prepareCmd": "bumpversion --allow-dirty --current-version ${lastRelease.version} --new-version ${nextRelease.version} patch" 12 | } 13 | ], 14 | [ 15 | "@semantic-release/git", 16 | { 17 | "message": "chore(release): ${nextRelease.version} release notes\n\n${nextRelease.notes}" 18 | } 19 | ], 20 | "@semantic-release/github" 21 | ] 22 | } -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 3.7 4 | - 3.8 5 | cache: pip 6 | before_install: 7 | #- python -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)" 8 | - npm install npm@latest -g 9 | install: 10 | - pip3 install tox-travis 11 | before_script: 12 | - pip3 install -r requirements.txt 13 | script: 14 | - pip3 install python-dotenv 15 | #- travis_wait tox 16 | before_deploy: 17 | - pip3 install bumpversion pypandoc 18 | - sudo apt-get update 19 | - sudo apt-get install pandoc 20 | - nvm install 12 21 | - npm install @semantic-release/changelog 22 | - npm install @semantic-release/exec 23 | - npm install @semantic-release/git 24 | - npm install @semantic-release/github 25 | deploy: 26 | - provider: script 27 | script: npx semantic-release 28 | skip_cleanup: true 29 | on: 30 | python: 3.8 31 | branch: master 32 | - provider: pypi 33 | user: "$PYPI_USER" 34 | password: "$PYPI_PASSWORD" 35 | server: https://upload.pypi.org/legacy/ 36 | skip_cleanup: true 37 | on: 38 | python: 3.8 39 | tags: true -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [1.4.1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.4.0...v1.4.1) (2023-07-08) 2 | 3 | 4 | ### Bug Fixes 5 | 6 | * remove outdated log filter for v2 api ([#155](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/155)) ([58a9407](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/58a9407e89a5ce2abf9fe2de5a82d7269c6be440)) 7 | 8 | # [1.4.0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.9...v1.4.0) (2023-06-30) 9 | 10 | 11 | ### Features 12 | 13 | * support watson assistant sdk v2 ([#154](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/154)) ([0ec1075](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/0ec107522993a549f6d96cbd07a58d1d25920574)) 14 | 15 | ## [1.3.9](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.8...v1.3.9) (2023-06-22) 16 | 17 | 18 | ### Bug Fixes 19 | 20 | * update node version ([#153](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/153)) ([4ec5aa8](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/4ec5aa8a3f9410655b6e34eeed4d7d192a3315e4)) 21 | * update numpy version to be compatible with python 3.10 ([#152](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/152)) ([bbd5686](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/bbd568696f8f44ee3dff4e0a9f3cadb1d0b87083)) 22 | 23 | ## [1.3.8](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.7...v1.3.8) (2022-06-14) 24 | 25 | 26 | ### Bug Fixes 27 | 28 | * update requests version to avoid dependency issues ([#148](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/148)) ([b9fbdde](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/b9fbdde0e241ff3d9189a423ed6ca4d90591976b)) 29 | 30 | ## [1.3.7](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.6...v1.3.7) (2022-05-06) 31 | 32 | 33 | ### Bug Fixes 34 | 35 | * keep __version__ variable ([#147](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/147)) ([2714d14](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/2714d146b7e69669eed3474f3b700b3789084927)) 36 | * remove version from setup.py ([#146](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/146)) ([2d62284](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/2d622842a4ec002fa340810fe745ef04987f161c)) 37 | 38 | 39 | ### Reverts 40 | 41 | * Revert "chore (github_actions): deploy and build configs (#141)" (#144) ([68f0610](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/68f0610e0950d53e4732f428122669b7503353a3)), closes [#141](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/141) [#144](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/144) 42 | 43 | ## [1.3.6](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.5...v1.3.6) (2021-07-20) 44 | 45 | 46 | ### Bug Fixes 47 | 48 | * deploy config and setup ([27d8de0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/27d8de0b41e0a6df300551460d7ec665f34d3d8c)) 49 | * drop reference to dialog_stack field and update deploy config ([c43c8cd](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/c43c8cd243c2779453434826c21054ec7a1ed6df)) 50 | 51 | ## [1.3.5](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.4...v1.3.5) (2021-07-19) 52 | 53 | 54 | ### Bug Fixes 55 | 56 | * deploy config ([e045930](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/e045930686b85e870f2201d02850f86040b49788)) 57 | * deploy config ([bf66c2b](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/bf66c2b9a5c0e4b2b92397360d481d3fd93aceae)) 58 | * deploy config ([7e88da9](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/7e88da9fb48d35239518e81db87f50a52647d6e0)) 59 | * deploy config ([ae7ccf9](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/ae7ccf959225b14a8b585eec26b3fa9fc4588bc1)) 60 | * deploy config ([0c0096c](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/0c0096c214d43fa304479cd70db14fcca0564fae)) 61 | * readme parsing for pypi ([a6fbbad](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/a6fbbaddd530b4a4487219a41819231df3456257)) 62 | 63 | ## [1.3.4](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.3...v1.3.4) (2021-07-17) 64 | 65 | 66 | ### Bug Fixes 67 | 68 | * deploy config ([244fe3f](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/244fe3fbe04e274997d912d501cbc6cf6ab596d9)) 69 | 70 | ## [1.3.3](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.2...v1.3.3) (2021-07-16) 71 | 72 | 73 | ### Bug Fixes 74 | 75 | * add github actions settings ([ed00f97](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/ed00f9700113e01fc096c20d8a80073b9e185511)) 76 | * add github actions settings ([320bfd2](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/320bfd2e915df1655759c97e0c03d31e55c88880)) 77 | * deploy config ([63df0d8](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/63df0d8fd12d9898ef0b20b0754b24dcb8a929eb)) 78 | * deploy config ([358fe32](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/358fe32dacddf66576111fcabc076a1a188ffc8d)) 79 | * deploy config ([a3b5d7e](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/a3b5d7ea4542c9d81a2a748efa2caa5689ce0a0a)) 80 | * deploy file ([13495c8](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/13495c890d0e9e3da68180507300419b82b69c12)) 81 | * release version number ([be2b821](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/be2b8212cbf95c5ae29edface6857e3387ef4de5)) 82 | * release version number ([9f2cbdc](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/9f2cbdc4385899604d22a31c4c5447b342c23730)) 83 | * remove response_dialog fiedlds ([923cdf6](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/923cdf654dd25bd6526a2ea8c3a48106c34a2244)) 84 | * remove response_dialog fields ([b09f905](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/b09f905bb32979af15b620ca6741c9fb0d7e241c)) 85 | * remove response_dialog fields ([2c0b91a](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/2c0b91a50f9c29e8dbec68aebf4eb2c64d4760c3)) 86 | 87 | ## [1.3.2](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.1...v1.3.2) (2021-07-15) 88 | 89 | 90 | ### Bug Fixes 91 | 92 | * Remove response_dialog fields ([#119](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/pull/119)) ([2c0b91a50f](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commits/2c0b91a50f9c29e8dbec68aebf4eb2c64d4760c3)) 93 | 94 | ## [1.3.1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.3.0...v1.3.1) (2021-06-09) 95 | 96 | 97 | ### Bug Fixes 98 | 99 | * update requirement and fix typos in notebooks ([#113](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/113)) ([bc96b95](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/bc96b95e4deae6b7b5ad77ba066b5dc936669ab6)) 100 | 101 | # [1.3.0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.2.3...v1.3.0) (2021-06-04) 102 | 103 | 104 | ### Features 105 | 106 | * log notebook for cp4d ([#111](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/111)) ([526f8f1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/526f8f1d3bc2a1e1f6bc4404459684073bbdf3e4)) 107 | 108 | ## [1.2.3](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.2.2...v1.2.3) (2021-06-03) 109 | 110 | 111 | ### Bug Fixes 112 | 113 | * fix a bug ([#107](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/107)) ([dd7aca8](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/dd7aca876b2f98a2547b87f31181824c4a3fd7f1)) 114 | 115 | ## [1.2.2](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.2.1...v1.2.2) (2021-06-03) 116 | 117 | 118 | ### Bug Fixes 119 | 120 | * add handler for nan value in context field ([#105](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/105)) ([41e68ef](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/41e68effbfbb55afb79e9f6daf1725fb92b553bc)) 121 | 122 | ## [1.2.1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.2.0...v1.2.1) (2021-05-27) 123 | 124 | 125 | ### Bug Fixes 126 | 127 | * support cp4d format ([#102](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/102)) ([1e75b80](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/1e75b804a75d3d0745395371405e4d881cf953fa)) 128 | 129 | # [1.2.0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.6...v1.2.0) (2021-05-21) 130 | 131 | 132 | ### Features 133 | 134 | * add support for assistant v2 log apis ([#100](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/100)) ([2c8d7bb](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/2c8d7bb3fab4f4dad4a7ba3849aa3fc2763c803d)) 135 | 136 | ## [1.1.6](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.5...v1.1.6) (2020-09-02) 137 | 138 | 139 | ### Bug Fixes 140 | 141 | * fix visualization with 'week' interval ([#96](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/96)) ([fa0d9e0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/fa0d9e0fae5eb7901a9b77b580d41c940451f543)) 142 | 143 | ## [1.1.5](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.4...v1.1.5) (2020-09-01) 144 | 145 | 146 | ### Bug Fixes 147 | 148 | * fix a bug when calculating coverage ([#95](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/95)) ([06ba033](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/06ba03380c1f309309f3e7cb484d736b7d3df1c3)) 149 | 150 | ## [1.1.4](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.3...v1.1.4) (2020-08-24) 151 | 152 | 153 | ### Bug Fixes 154 | 155 | * delete watson_assistant_func_skip.py ([0da1456](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/0da145654157796a58a50d6a90348ff6b32478a2)) 156 | 157 | ## [1.1.3](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.2...v1.1.3) (2020-08-22) 158 | 159 | 160 | ### Bug Fixes 161 | 162 | * fix a bug in show disambiguation click api ([#93](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/93)) ([a7eddca](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/a7eddca0b55f9520e1438a646416937ddfc3458e)) 163 | 164 | ## [1.1.2](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.1...v1.1.2) (2020-08-21) 165 | 166 | 167 | ### Bug Fixes 168 | 169 | * remove tqdm for watson studio support ([#91](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/91)) ([e183b0d](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/e183b0d36a82660484cb3516d4f34b04ee0897e2)) 170 | 171 | ## [1.1.1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.1.0...v1.1.1) (2020-08-21) 172 | 173 | 174 | ### Bug Fixes 175 | 176 | * fix pypi url ([#90](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/90)) ([b0f39f4](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/b0f39f45341729eba954cfdb387abfddaa91ec0f)) 177 | 178 | # [1.1.0](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.0.1...v1.1.0) (2020-08-21) 179 | 180 | 181 | ### Features 182 | 183 | * improve ReadMe.md to include all of the related notebooks ([#89](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/89)) ([68c3325](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/68c3325ddfc84e5866728360880f4dca5991303e)) 184 | 185 | ## [1.0.1](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/compare/v1.0.0...v1.0.1) (2020-08-17) 186 | 187 | 188 | ### Bug Fixes 189 | 190 | * releaserc ([eb6b2b2](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/eb6b2b260ef7bb43aaa3aab5cbb9b992c3f5049d)) 191 | 192 | # 1.0.0 (2020-08-17) 193 | 194 | 195 | ### Bug Fixes 196 | 197 | * bumpversion.cfg ([143460a](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/143460ae166940d041fa674d4e771e1b7e9e2305)) 198 | * change release branch ([#88](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/88)) ([a211584](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/a211584dfff38db35e374a03b4835f3ff1d1fea8)) 199 | * current version error ([d3aac8c](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/d3aac8c9355822308964a51f79f527ab835ae442)) 200 | * version file path ([4b7d0fa](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/4b7d0fa20859c72288945ed9ae9c70ee5756bacd)) 201 | 202 | 203 | ### Features 204 | 205 | * Add Customer Effort Analysis notebook ([#83](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/83)) ([593d38d](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/593d38d2ec5595e758e33b96dbc02440ea059bab)) 206 | * refactor for assistant improve toolkit ([#87](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/87)) ([f5d6e7e](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/commit/f5d6e7e54661f761f33b20ebfa9e0f8f234c84c2)) 207 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Issues 4 | 5 | If you encounter an issue, start by searching through the list of [issues](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues) and active [pull requests](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/pulls) to see if anyone else has raised a similar issue. 6 | 7 | If you don't see an issue listed please submit a [new issue](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues/new/choose). Make sure to provide sufficient information on your environment and how to reproduce the issue. 8 | 9 | ## Contributing code 10 | 11 | Please sign our [Contributor License Agreement (CLA)](https://cla-assistant.io/watson-developer-cloud/assistant-improve-recommendations-notebook) before sending PRs 12 | 13 | * **If your contribution is minor,** such as a bug fix, open a pull request. 14 | * **If your contribution is major,** such as a new feature, start by opening an issue first. Others can then weigh in before you commence any work. 15 | 16 | ## Submittion Guideline 17 | 1. Fork the repo 18 | 2. Create a local branch for your change, e.g. `git checkout -b my-new-feature-branch` 19 | 3. Test your changes 20 | 4. Commit your changes to local branch, e.g. `git commit -m "feat: my new feature"`. (see instructions below re: commit message) 21 | 5. Push your changes to remote `git push -u origin my-new-feature-branch` 22 | 6. From github, create a PR from your fork to this repo 23 | 24 | ### Format your commit message to properly document and trigger the semantic release 25 | * feat: A new feature 26 | * fix: A bug fix 27 | * docs: Documentation only changes 28 | * style: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons etc) 29 | * refactor: A code change that neither fixes a bug, not adds a feature 30 | * perf: A code change that improves performance 31 | * test: Adding missing tests 32 | * chore: Changes to the build process or auxiliary tools and libraries such as documentation generation -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Watson Assistant Improve Notebooks 2 | 3 | [![Build Status](https://travis-ci.org/watson-developer-cloud/assistant-improve-recommendations-notebook.svg?branch=master)](https://travis-ci.org/github/watson-developer-cloud/assistant-improve-recommendations-notebook) 4 | [![Slack](https://wdc-slack-inviter.mybluemix.net/badge.svg)](https://wdc-slack-inviter.mybluemix.net) 5 | [![Latest Stable Version](https://img.shields.io/pypi/v/assistant-improve-toolkit)](https://pypi.org/project/assistant-improve-toolkit/) 6 | [![CLA assistant](https://cla-assistant.io/readme/badge/watson-developer-cloud/assistant-improve-recommendations-notebook)](https://cla-assistant.io/watson-developer-cloud/assistant-improve-recommendations-notebook) 7 | 8 | This repository houses Watson Assistant Improve notebooks and the underlying assistant improve toolkit library. 9 | 10 | ## Introduction 11 | To help improving your Watson Assistant after you have deployed it to production, we prepared the following Jupyter notebooks. These notebooks include practical steps for measuring, analyzing, and actively improving your assistant in a continuous manner. Check out [IBM Watson Assistant Continuous Improvement Best Practices](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/raw/master/notebook/IBM%20Watson%20Assistant%20Continuous%20Improvement%20Best%20Practices.pdf) for more details. 12 | 13 | - __Measure notebook__ contains a set of automated metrics that help you monitor and understand the behavior of your system. The goal is to understand where your assistant is doing well vs where it isn’t, and to focus your improvement effort to one of the problem areas identified. This notebook generates an assessment spreadsheet for you to use to label problematic conversations, and then feed to the Effectiveness notebook. 14 | 15 | - __Effectiveness notebook__ helps you understand the relative performance of each intent and entity as well as the confusion between your intents. This information helps you prioritize your improvement effort. The input to this notebook is an assessment spreadsheet generated from the Measure notebook. Update the marked columns in the spreadsheet with your labels and load it into the Effectiveness notebook for analysis. 16 | 17 | - __Logs notebook__ helps you fetch logs using Watson Assistant API. You can fetch logs with various filters, and save them as a JSON file, or export the utterances in the logs into a CSV file. The JSON file can be loaded into the Measure notebook. The CSV file can be used for [intent recommendation service](https://cloud.ibm.com/docs/assistant?topic=assistant-intent-recommendations#intent-recommendations-get-intent-recommendations-task). Alternatively, you can run python scripts [`fetch_logs`](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/blob/master/src/main/python/fetch_logs.py) and [`export_csv_for_intent_recommendation`](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/blob/master/src/main/python/export_csv_for_intent_recommendation.py) to fetch logs and export them to [intent recommendation CSV](https://cloud.ibm.com/docs/assistant?topic=assistant-intent-recommendations#intent-recommendations-data-resources), respectively. Run `python get_logs -h` and `python export_csv_for_intent_recommendation.py -h` for usage. 18 | 19 | - __Dialog Flow Analysis notebook__ help you assess and analyze user journeys and issues related to the dialog flow of ineffective (low quality) conversations based on production logs. Check out [Dialog Flow Analysis](https://github.com/watson-developer-cloud/assistant-dialog-flow-analysis) for more details. 20 | 21 | - __Dialog Skill Analysis notebook__ help you analyze characteristics of your data such as the number of training examples for each intent or the terms which seem to be characteristic of a specific intent. Check out [Dialog Skill Analysis](https://github.com/watson-developer-cloud/assistant-dialog-skill-analysis) for more details. 22 | 23 | ## Getting Started 24 | 25 | You can either run the notebooks locally or in [IBM Watson Studio](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/notebooks-parent.html). 26 | 27 | - **Run locally** 28 | 29 | 1. Install Jupyter Notebook, see [Jupyter/IPython Notebook Quick Start Guide](https://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/install.html) for more details. 30 | 2. Download the Jupyter notebooks available in this repository's [notebook](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/tree/master/notebook) directory. __Note: These notebook files are not designed for Watson Studio environment__ 31 | 3. Start jupyter server `jupyter notebook` 32 | 4. Follow the instructions in each of the notebooks. Be sure to add your Watson Assistant credentials if necessary. 33 | 34 | - **Run in Watson Studio** 35 | 36 | 1. Create a Watson Studio account. 37 | Sign up in [Watson Studio](https://www.ibm.com/cloud/watson-studio), or use an existing account. Lite plan is free to use. 38 | 39 | 2. Create a new project and add a Cloud Object Storage (COS) account. 40 | For more information regarding COS plans, see [Pricing](https://www.ibm.com/cloud-computing/bluemix/pricing-object-storage). 41 | 42 | 3. Copy [Measure](https://dataplatform.cloud.ibm.com/exchange/public/entry/view/133dfc4cd1480bbe4eaa78d3f635e568) or [Effectiveness](https://dataplatform.cloud.ibm.com/exchange/public/entry/view/133dfc4cd1480bbe4eaa78d3f636921c) notebook from Watson Studio community into your project. 43 | 44 | 4. Follow the instructions in each notebook to add project tokens and Watson Assistant credentials if necessary. 45 | 46 | ## Guides 47 | * Learn more about our measure and effectiveness notebook on Medium: [Continuously Improve Your Watson Assistant with Jupyter Notebooks](https://medium.com/ibm-watson/continuously-improve-your-watson-assistant-with-jupiter-notebooks-60231df4f01f) 48 | 49 | ## Contributing 50 | See [CONTRIBUTING.md](CONTRIBUTING.md) for more details on how to contribute 51 | 52 | ## License 53 | This library is licensed under the [Apache 2.0 license](http://www.apache.org/licenses/LICENSE-2.0). 54 | 55 | -------------------------------------------------------------------------------- /notebook/IBM Watson Assistant Continuous Improvement Best Practices.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/IBM Watson Assistant Continuous Improvement Best Practices.pdf -------------------------------------------------------------------------------- /notebook/Logs Notebook-cp4d.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Waston Assistant Logs Notebook\n", 8 | "### IBM Cloud Pak for Data version" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "## Introduction\n", 16 | "This notebook demonstrates how to download Watson Assistant user-generated logs based on different criteria.\n", 17 | "\n", 18 | "### Programming language and environment\n", 19 | "Some familiarity with Python is recommended. This notebook runs on Python 3.7+" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "\n", 27 | "## 1. Configuration and Setup\n", 28 | "\n", 29 | "In this section, we add data and workspace access credentials, import required libraries and functions.\n", 30 | "\n", 31 | "### 1.1 Install Assistant Improve Toolkit" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "scrolled": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "!pip install --user --upgrade \"assistant-improve-toolkit\";" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "### 1.2 Import functions used in the notebook" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "# Import Watson Assistant related functions\n", 59 | "from ibm_cloud_sdk_core.authenticators import IAMAuthenticator\n", 60 | "from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator\n", 61 | "import pandas as pd\n", 62 | "import json\n", 63 | "from ibm_watson import AssistantV1, AssistantV2\n", 64 | "\n", 65 | "from assistant_improve_toolkit.watson_assistant_func import get_logs\n", 66 | "from assistant_improve_toolkit.watson_assistant_func import get_assistant_definition\n", 67 | "from assistant_improve_toolkit.watson_assistant_func import load_logs_from_file\n", 68 | "from assistant_improve_toolkit.watson_assistant_func import export_csv_for_intent_recommendation" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## 2. Load and format data " 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": { 81 | "pycharm": { 82 | "name": "#%% md\n" 83 | } 84 | }, 85 | "source": [ 86 | "### 2.1 Add Watson Assistant configuration\n", 87 | "\n", 88 | "The notebook uses `CloudPakForDataAuthenticator` to authenticate the APIs.\n", 89 | "\n", 90 | "- Replace `username` and `password` with your Cloud Pak for Data credentials\n", 91 | "- `base_url` is the base URL of your instance. It is in the format of `https://{cpd_cluster_host}{:port}/icp4d-api`\n", 92 | "- The string to set for version is a date in the format version=YYYY-MM-DD. The version date string determines which version of the Watson Assistant v1/v2 API will be called. For more information about version, see [Versioning](https://cloud.ibm.com/apidocs/assistant-data-v1?code=python#versioning)\n", 93 | "- The string to pass into `assistant.set_service_url` is the service URL of your Watson Assistant. The URL follows this pattern: `https://{cpd_cluster_host}{:port}/assistant/{release}/instances/{instance_id}/api`. To find this URL, view the details for the service instance from the Cloud Pak for Data web client. For more information, see [Service Endpoint](https://cloud.ibm.com/apidocs/assistant-data-v1?code=python#service-endpoint)\n", 94 | "\n", 95 | "The notebook requires initializing both v1 API instance `sdk_v1_object` and v2 API instance `sdk_v2_object`." 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# Provide credentials to connect to assistant\n", 105 | "# Set disable_ssl_verification=True for self-signed certificate\n", 106 | "authenticator = CloudPakForDataAuthenticator(\n", 107 | " username='username',\n", 108 | " password='password',\n", 109 | " url='base_url',\n", 110 | " disable_ssl_verification=False\n", 111 | ")\n", 112 | "\n", 113 | "# Initialize v1 API instance\n", 114 | "sdk_v1_object = AssistantV1(version='2020-04-01', authenticator = authenticator)\n", 115 | "sdk_v1_object.set_service_url('service_url')\n", 116 | "\n", 117 | "# Initialize v2 API instance\n", 118 | "sdk_v2_object = AssistantV2(version='2020-09-24', authenticator = authenticator)\n", 119 | "sdk_v2_object.set_service_url('service_url')\n", 120 | "\n", 121 | "# Set set_disable_ssl_verification to True for self-signed certificate\n", 122 | "# sdk_v1_object.set_disable_ssl_verification(True)\n", 123 | "# sdk_v2_object.set_disable_ssl_verification(True)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "Add the information of your assistant. To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill ID. To locate your assistant ID, open the assistant settings and click __API Details__. To location your workspace ID or skill ID, go to the Skills page and select __View API Details__ from the menu of a skill tile. If you are using versioning in Watson Assistant, this ID represents the Development version of your skill definition." 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "assistant_information = {'workspace_id' : '',\n", 140 | " 'skill_id' : '',\n", 141 | " 'assistant_id' : ''}" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "### 2.2 Fetch and load logs\n", 149 | "\n", 150 | "- `num_logs`: number of logs to fetch\n", 151 | "- Use `filename` to specify if logs are saved as a JSON file (default: `None`)\n", 152 | "- Apply `filters` while fetching logs (default: `[]`), e.g.,\n", 153 | " - removing empty input: `meta.summary.input_text_length_i>0`\n", 154 | " - fetching logs generated after a timestamp: `response_timestamp>=2018-09-18`\n", 155 | " \n", 156 | " Refer to [Filter query reference](https://cloud.ibm.com/docs/services/assistant?topic=assistant-filter-reference) for\n", 157 | " more information.\n", 158 | "- Use `project` to specify project when using Watson Studio (default: `None`)\n", 159 | "- Use `overwrite` to overwrite if `filename` exists (default: `False`)\n" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "__A. Download all logs for a period of time (and save as a JSON file for Measure notebook)__" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "# Add filter queries\n", 176 | "filters = ['language::en', # Logs in English\n", 177 | " 'meta.summary.input_text_length_i>0', # Logs with non empty input \n", 178 | " 'response_timestamp>=2020-03-01'] # Logs with response timestamp later or equal to 2020-03-01\n", 179 | "\n", 180 | "# Query 20,000 logs\n", 181 | "filename = 'logs.json'\n", 182 | "\n", 183 | "# Fetch 20,000 logs, set `overwrite` to True to reload logs, set version=2 to use v2 log apis\n", 184 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 185 | " sdk_v2_object=sdk_v2_object,\n", 186 | " assistant_info=assistant_information,\n", 187 | " num_logs=20000,\n", 188 | " filename=filename,\n", 189 | " filters=filters,\n", 190 | " overwrite=True,\n", 191 | " project=None,\n", 192 | " version=2)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "__B. Download and export logs for intent recommendation__\n", 200 | "\n", 201 | "For intent recommendation, by default, an utterance is considered only when:\n", 202 | "- It is the first user utterance in each conversation\n", 203 | "- its confidence `response.intents::confidence` is between 0.1 and 0.6 (exclusive),\n", 204 | "- its token count is between 3 and 20 (exclusive), and\n", 205 | "- it is not a duplicate of the other utterances in the logs.\n", 206 | "\n", 207 | "This example adds confidence filters when calling `get_logs`, and then exports the utterances to a CSV file by calling\n", 208 | "`export_csv_for_intent_recommendation` with token count filter and dedeplication applied.\n" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "pycharm": { 216 | "name": "#%%\n" 217 | } 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "# Add filter queries\n", 222 | "filters = ['language::en', # Logs in English\n", 223 | " 'request.context.system.dialog_turn_counter::1', # The first user utterance in each conversation\n", 224 | " 'response.intents:confidence<0.6', # filter out high intent confidence utterance\n", 225 | " 'response.intents:confidence>0.1', # filter out low intent confidnce utterance\n", 226 | " ]\n", 227 | "\n", 228 | "# Query 20,000 logs using filename 'log_first_utterances.json'\n", 229 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 230 | " sdk_v2_object=sdk_v2_object,\n", 231 | " assistant_info=assistant_information,\n", 232 | " num_logs=20000,\n", 233 | " filename='log_for_intent_recommendation.json',\n", 234 | " filters=filters,\n", 235 | " overwrite=True,\n", 236 | " version=2)\n", 237 | "\n", 238 | "# Or, load previously saved logs.\n", 239 | "logs = load_logs_from_file(filename='log_for_intent_recommendation.json')" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "Export logs to a CSV file for intent recommendation\n", 247 | "\n", 248 | "- `logs`: the logs object from `get_logs` or `load_logs_from_file`\n", 249 | "- `filename`: the CSV output filename\n", 250 | "- Use `deduplicate` to specify if duplicate messages should be removed (default: `True`)\n", 251 | "- Use `project` to specify project when using Watson Studio (default: `None`)\n", 252 | "- Use `overwrite` to overwrite if `filename` exists (default: `False`)\n", 253 | "- Use `min_length` to filter out utterances that are less than certain number of tokens (exclusive, default: `3`)\n", 254 | "- Use `max_length` to filter out utterances that are more than certain number of tokens (exclusive, default: `20`)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": { 261 | "pycharm": { 262 | "name": "#%%\n" 263 | } 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "export_csv_for_intent_recommendation(logs,\n", 268 | " filename='log_for_intent_recommendation.csv',\n", 269 | " deduplicate=True,\n", 270 | " min_length=3,\n", 271 | " max_length=20,\n", 272 | " overwrite=True)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "__C. More examples__\n", 280 | "\n", 281 | "Download logs of the first user utterance in each conversation for a period of time" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "# Add filter queries\n", 291 | "filters = ['language::en', # Logs in English \n", 292 | " 'request.context.system.dialog_turn_counter::1', # The first user utterance in each conversation\n", 293 | " 'response_timestamp>=2020-03-01'] # Logs with response timestamp later or equal to 2020-03-01\n", 294 | "\n", 295 | "# Query 20,000 logs using filename 'log_first_utterances.json'\n", 296 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 297 | " sdk_v2_object=sdk_v2_object,\n", 298 | " assistant_info=assistant_information,\n", 299 | " num_logs=20000,\n", 300 | " filename='log_first_utterances.json',\n", 301 | " filters=filters,\n", 302 | " overwrite=True,\n", 303 | " version=2)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "Download logs containing specific input text" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": { 317 | "pycharm": { 318 | "name": "#%%\n" 319 | } 320 | }, 321 | "outputs": [], 322 | "source": [ 323 | "# Add filter queries\n", 324 | "filters = ['language::en', # Logs in English\n", 325 | " 'request.input.text::\"Is there an article on how to make cherry pie?\"'] # Logs with input text: \"Is there an article on how to make cherry pie?\"\n", 326 | "\n", 327 | "# Query 20,000 logs using filename 'log_input.json'\n", 328 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 329 | " sdk_v2_object=sdk_v2_object,\n", 330 | " assistant_info=assistant_information,\n", 331 | " num_logs=20000,\n", 332 | " filename='log_input.json',\n", 333 | " filters=filters,\n", 334 | " overwrite=True,\n", 335 | " version=2)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "Download logs trigging specific intent" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "# Add filter queries\n", 352 | "filters = ['language::en', # Logs in English\n", 353 | " 'response.intents:intent::\"article_food\"'] # Intent been triggered: article_food\n", 354 | "# Query 20,000 logs using filename log_intent.json\n", 355 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 356 | " sdk_v2_object=sdk_v2_object,\n", 357 | " assistant_info=assistant_information,\n", 358 | " num_logs=20000,\n", 359 | " filename='log_intent.json',\n", 360 | " filters=filters,\n", 361 | " overwrite=True,\n", 362 | " version=2)" 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "Download logs trigging specific intent with a confidence range" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "# Add filter queries\n", 379 | "filters = ['language::en', # Logs in English\n", 380 | " 'response.intents:(intent:article_food,confidence<0.25)'] # Intent been triggered: article_food with confidence below 0.25\n", 381 | "# Query 20,000 logs using filename log_intent.json\n", 382 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 383 | " sdk_v2_object=sdk_v2_object,\n", 384 | " assistant_info=assistant_information,\n", 385 | " num_logs=20000,\n", 386 | " filename='log_intent_confidence.json',\n", 387 | " filters=filters,\n", 388 | " overwrite=True,\n", 389 | " version=2)" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": {}, 395 | "source": [ 396 | "Download logs visited specific node" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": {}, 403 | "outputs": [], 404 | "source": [ 405 | "# Fetch assistant definition and save to a JSON file\n", 406 | "df_assistant = get_assistant_definition(sdk_v1_object, assistant_information, filename='assistant_definition.json')\n", 407 | "\n", 408 | "# Get all intents\n", 409 | "assistant_intents = [intent['intent'] for intent in df_assistant['intents'].values[0]] \n", 410 | "\n", 411 | "# Get all dialog nodes\n", 412 | "assistant_nodes = pd.DataFrame(df_assistant['dialog_nodes'].values[0])\n", 413 | "\n", 414 | "# Find mappings betweeen node name and node id\n", 415 | "node_title_map = dict()\n", 416 | "for idx, node in assistant_nodes.iterrows():\n", 417 | " if str(node['title']) != 'nan':\n", 418 | " node_title_map[node['title']] = node['dialog_node']\n", 419 | "node_df = pd.DataFrame(node_title_map.items())\n", 420 | "node_df.columns = {'node_name', 'node_id'}\n", 421 | "\n", 422 | "# Add filter queries\n", 423 | "intent_name = 'book_short_dialog'\n", 424 | "if intent_name in node_title_map:\n", 425 | " filters = ['language::en', # Logs in English\n", 426 | " 'response.output:nodes_visited::[{}]'.format(node_title_map[intent_name])] # Visited node: book_short_dialog\n", 427 | " # Query 20,000 logs using filename log_node.json\n", 428 | " logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 429 | " sdk_v2_object=sdk_v2_object,\n", 430 | " assistant_info=assistant_information,\n", 431 | " num_logs=20000,\n", 432 | " filename='log_node.json',\n", 433 | " filters=filters,\n", 434 | " overwrite=True,\n", 435 | " version=2)\n", 436 | "else:\n", 437 | " print('Cannot find {} in skill definition.'.format(intent_name))" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": {}, 443 | "source": [ 444 | "Copyright © 2021 IBM. This notebook and its source code are released under the terms of the MIT License." 445 | ] 446 | } 447 | ], 448 | "metadata": { 449 | "kernelspec": { 450 | "display_name": "Python 3", 451 | "language": "python", 452 | "name": "python3" 453 | }, 454 | "language_info": { 455 | "codemirror_mode": { 456 | "name": "ipython", 457 | "version": 3 458 | }, 459 | "file_extension": ".py", 460 | "mimetype": "text/x-python", 461 | "name": "python", 462 | "nbconvert_exporter": "python", 463 | "pygments_lexer": "ipython3", 464 | "version": "3.7.10" 465 | } 466 | }, 467 | "nbformat": 4, 468 | "nbformat_minor": 4 469 | } 470 | -------------------------------------------------------------------------------- /notebook/Logs Notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Waston Assistant Logs Notebook" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Introduction\n", 15 | "This notebook demonstrates how to download Watson Assistant user-generated logs based on different criteria.\n", 16 | "\n", 17 | "### Programming language and environment\n", 18 | "Some familiarity with Python is recommended. This notebook runs on Python 3.7+" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "\n", 26 | "## 1. Configuration and Setup\n", 27 | "\n", 28 | "In this section, we add data and workspace access credentials, import required libraries and functions.\n", 29 | "\n", 30 | "### 1.1 Install Assistant Improve Toolkit" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "scrolled": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "!pip install --user --upgrade \"assistant-improve-toolkit\";" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "### 1.2 Import functions used in the notebook" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "# Import Watson Assistant related functions\n", 58 | "from ibm_cloud_sdk_core.authenticators import IAMAuthenticator\n", 59 | "import pandas as pd\n", 60 | "import json\n", 61 | "from ibm_watson import AssistantV1, AssistantV2\n", 62 | "\n", 63 | "from assistant_improve_toolkit.watson_assistant_func import get_logs\n", 64 | "from assistant_improve_toolkit.watson_assistant_func import get_assistant_definition\n", 65 | "from assistant_improve_toolkit.watson_assistant_func import load_logs_from_file\n", 66 | "from assistant_improve_toolkit.watson_assistant_func import export_csv_for_intent_recommendation" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## 2. Load and format data " 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": { 79 | "pycharm": { 80 | "name": "#%% md\n" 81 | } 82 | }, 83 | "source": [ 84 | "### 2.1 Add Watson Assistant configuration\n", 85 | "\n", 86 | "This notebook uses Watson Assistant v1 API to access skill definition. To access message logs, the notebook uses both v1 and v2 APIs. You authenticate to the API by using IBM Cloud Identity and Access Management (IAM).\n", 87 | "\n", 88 | "You can access the values you need for this configuration from the Watson Assistant user interface. Go to the Skills page and select View API Details from the menu of a skill title.\n", 89 | "\n", 90 | "- The string to set in the call to `IAMAuthenticator` is your Api Key under Service Credentials\n", 91 | "- The string to set for version is a date in the format version=YYYY-MM-DD. The version date string determines which version of the Watson Assistant V1 API will be called. For more information about version, see [Versioning](https://cloud.ibm.com/apidocs/assistant/assistant-v1#versioning).\n", 92 | "- The string to pass into `assistant.set_service_url` is the base URL of Watson Assistant. For example, for us-south, the endpoint is `https://api.us-south.assistant.watson.cloud.ibm.com`. This value will be different depending on the location of your service instance. For more information, see [Service Endpoint](https://cloud.ibm.com/apidocs/assistant/assistant-v1?code=python#service-endpoint)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "# Provide credentials to connect to assistant\n", 102 | "authenticator = IAMAuthenticator('API_KEY')\n", 103 | "\n", 104 | "# Initialize v1 SDK instance\n", 105 | "sdk_v1_object = AssistantV1(version='2020-04-01', authenticator = authenticator)\n", 106 | "sdk_v1_object.set_service_url('https://api.us-south.assistant.watson.cloud.ibm.com')\n", 107 | "\n", 108 | "# Initialize v2 SDK instance\n", 109 | "sdk_v2_object = AssistantV2(version='2020-09-24', authenticator = authenticator)\n", 110 | "sdk_v2_object.set_service_url('https://api.us-south.assistant.watson.cloud.ibm.com')\n" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "Add the information of your assistant. To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill ID. The values can be found on the View API Details page. If you are using versioning in Watson Assistant, this ID represents the Development version of your skill definition.\n", 118 | "\n", 119 | "For more information about authentication and finding credentials in the Watson Assistant UI, please see [Watson Assistant v1 API](https://cloud.ibm.com/apidocs/assistant/assistant-v1) in the offering documentation.\n" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "assistant_information = {'workspace_id' : '',\n", 129 | " 'skill_id' : '',\n", 130 | " 'assistant_id' : ''}" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "### 2.2 Fetch and load logs\n", 138 | "\n", 139 | "- `num_logs`: number of logs to fetch\n", 140 | "- Use `filename` to specify if logs are saved as a JSON file (default: `None`)\n", 141 | "- Apply `filters` while fetching logs (default: `[]`), e.g.,\n", 142 | " - removing empty input: `meta.summary.input_text_length_i>0`\n", 143 | " - fetching logs generated after a timestamp: `response_timestamp>=2018-09-18`\n", 144 | " \n", 145 | " Refer to [Filter query reference](https://cloud.ibm.com/docs/services/assistant?topic=assistant-filter-reference) for\n", 146 | " more information.\n", 147 | "- Use `project` to specify project when using Watson Studio (default: `None`)\n", 148 | "- Use `overwrite` to overwrite if `filename` exists (default: `False`)\n" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "__A. Download all logs for a period of time (and save as a JSON file for Measure notebook)__" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "# Add filter queries\n", 165 | "filters = ['language::en', # Logs in English\n", 166 | " 'meta.summary.input_text_length_i>0', # Logs with non empty input \n", 167 | " 'response_timestamp>=2020-03-01'] # Logs with response timestamp later or equal to 2020-03-01\n", 168 | "\n", 169 | "# Query 20,000 logs\n", 170 | "filename = 'logs.json'\n", 171 | "\n", 172 | "# Fetch 20,000 logs, set `overwrite` to True to reload logs, set version=2 to use v2 log apis\n", 173 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 174 | " sdk_v2_object=sdk_v2_object,\n", 175 | " assistant_info=assistant_information,\n", 176 | " num_logs=20000,\n", 177 | " filename=filename,\n", 178 | " filters=filters,\n", 179 | " overwrite=True,\n", 180 | " project=None,\n", 181 | " version=2)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "__B. Download and export logs for intent recommendation__\n", 189 | "\n", 190 | "For intent recommendation, by default, an utterance is considered only when:\n", 191 | "- It is the first user utterance in each conversation\n", 192 | "- its confidence `response.intents::confidence` is between 0.1 and 0.6 (exclusive),\n", 193 | "- its token count is between 3 and 20 (exclusive), and\n", 194 | "- it is not a duplicate of the other utterances in the logs.\n", 195 | "\n", 196 | "This example adds confidence filters when calling `get_logs`, and then exports the utterances to a CSV file by calling\n", 197 | "`export_csv_for_intent_recommendation` with token count filter and dedeplication applied.\n" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "pycharm": { 205 | "name": "#%%\n" 206 | } 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "# Add filter queries\n", 211 | "filters = ['language::en', # Logs in English\n", 212 | " 'request.context.system.dialog_turn_counter::1', # The first user utterance in each conversation\n", 213 | " 'response.intents:confidence<0.6', # filter out high intent confidence utterance\n", 214 | " 'response.intents:confidence>0.1', # filter out low intent confidnce utterance\n", 215 | " ]\n", 216 | "\n", 217 | "# Query 20,000 logs using filename 'log_first_utterances.json'\n", 218 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 219 | " sdk_v2_object=sdk_v2_object,\n", 220 | " assistant_info=assistant_information,\n", 221 | " num_logs=20000,\n", 222 | " filename='log_for_intent_recommendation.json',\n", 223 | " filters=filters,\n", 224 | " overwrite=True,\n", 225 | " version=2)\n", 226 | "\n", 227 | "# Or, load previously saved logs.\n", 228 | "logs = load_logs_from_file(filename='log_for_intent_recommendation.json')" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "Export logs to a CSV file for intent recommendation\n", 236 | "\n", 237 | "- `logs`: the logs object from `get_logs` or `load_logs_from_file`\n", 238 | "- `filename`: the CSV output filename\n", 239 | "- Use `deduplicate` to specify if duplicate messages should be removed (default: `True`)\n", 240 | "- Use `project` to specify project when using Watson Studio (default: `None`)\n", 241 | "- Use `overwrite` to overwrite if `filename` exists (default: `False`)\n", 242 | "- Use `min_length` to filter out utterances that are less than certain number of tokens (exclusive, default: `3`)\n", 243 | "- Use `max_length` to filter out utterances that are more than certain number of tokens (exclusive, default: `20`)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": { 250 | "pycharm": { 251 | "name": "#%%\n" 252 | } 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "export_csv_for_intent_recommendation(logs,\n", 257 | " filename='log_for_intent_recommendation.csv',\n", 258 | " deduplicate=True,\n", 259 | " min_length=3,\n", 260 | " max_length=20,\n", 261 | " overwrite=False)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "__C. More examples__\n", 269 | "\n", 270 | "Download logs of the first user utterance in each conversation for a period of time" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "# Add filter queries\n", 280 | "filters = ['language::en', # Logs in English \n", 281 | " 'request.context.system.dialog_turn_counter::1', # The first user utterance in each conversation\n", 282 | " 'response_timestamp>=2020-03-01'] # Logs with response timestamp later or equal to 2020-03-01\n", 283 | "\n", 284 | "# Query 20,000 logs using filename 'log_first_utterances.json'\n", 285 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 286 | " sdk_v2_object=sdk_v2_object,\n", 287 | " assistant_info=assistant_information,\n", 288 | " num_logs=20000,\n", 289 | " filename='log_first_utterances.json',\n", 290 | " filters=filters,\n", 291 | " overwrite=True,\n", 292 | " version=2)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "Download logs containing specific input text" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": { 306 | "pycharm": { 307 | "name": "#%%\n" 308 | } 309 | }, 310 | "outputs": [], 311 | "source": [ 312 | "# Add filter queries\n", 313 | "filters = ['language::en', # Logs in English\n", 314 | " 'request.input.text::\"Is there an article on how to make cherry pie?\"'] # Logs with input text: \"Is there an article on how to make cherry pie?\"\n", 315 | "\n", 316 | "# Query 20,000 logs using filename 'log_input.json'\n", 317 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 318 | " sdk_v2_object=sdk_v2_object,\n", 319 | " assistant_info=assistant_information,\n", 320 | " num_logs=20000,\n", 321 | " filename='log_input.json',\n", 322 | " filters=filters,\n", 323 | " overwrite=True,\n", 324 | " version=2)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "Download logs trigging specific intent" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "# Add filter queries\n", 341 | "filters = ['language::en', # Logs in English\n", 342 | " 'response.intents:intent::\"article_food\"'] # Intent been triggered: article_food\n", 343 | "# Query 20,000 logs using filename log_intent.json\n", 344 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 345 | " sdk_v2_object=sdk_v2_object,\n", 346 | " assistant_info=assistant_information,\n", 347 | " num_logs=20000,\n", 348 | " filename='log_intent.json',\n", 349 | " filters=filters,\n", 350 | " overwrite=True,\n", 351 | " version=2)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "Download logs trigging specific intent with a confidence range" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "# Add filter queries\n", 368 | "filters = ['language::en', # Logs in English\n", 369 | " 'response.intents:(intent:article_food,confidence<0.25)'] # Intent been triggered: article_food with confidence below 0.25\n", 370 | "# Query 20,000 logs using filename log_intent.json\n", 371 | "logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 372 | " sdk_v2_object=sdk_v2_object,\n", 373 | " assistant_info=assistant_information,\n", 374 | " num_logs=20000,\n", 375 | " filename='log_intent_confidence.json',\n", 376 | " filters=filters,\n", 377 | " overwrite=True,\n", 378 | " version=2)" 379 | ] 380 | }, 381 | { 382 | "cell_type": "markdown", 383 | "metadata": {}, 384 | "source": [ 385 | "Download logs visited specific node" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": {}, 392 | "outputs": [], 393 | "source": [ 394 | "# Fetch assistant definition and save to a JSON file\n", 395 | "df_assistant = get_assistant_definition(sdk_v1_object, assistant_information, filename='assistant_definition.json')\n", 396 | "\n", 397 | "# Get all intents\n", 398 | "assistant_intents = [intent['intent'] for intent in df_assistant['intents'].values[0]] \n", 399 | "\n", 400 | "# Get all dialog nodes\n", 401 | "assistant_nodes = pd.DataFrame(df_assistant['dialog_nodes'].values[0])\n", 402 | "\n", 403 | "# Find mappings betweeen node name and node id\n", 404 | "node_title_map = dict()\n", 405 | "for idx, node in assistant_nodes.iterrows():\n", 406 | " if str(node['title']) != 'nan':\n", 407 | " node_title_map[node['title']] = node['dialog_node']\n", 408 | "node_df = pd.DataFrame(node_title_map.items())\n", 409 | "node_df.columns = {'node_name', 'node_id'}\n", 410 | "\n", 411 | "# Add filter queries\n", 412 | "intent_name = 'book_short_dialog'\n", 413 | "if intent_name in node_title_map:\n", 414 | " filters = ['language::en', # Logs in English\n", 415 | " 'response.output:nodes_visited::[{}]'.format(node_title_map[intent_name])] # Visited node: book_short_dialog\n", 416 | " # Query 20,000 logs using filename log_node.json\n", 417 | " logs = get_logs(sdk_v1_object=sdk_v1_object,\n", 418 | " sdk_v2_object=sdk_v2_object,\n", 419 | " assistant_info=assistant_information,\n", 420 | " num_logs=20000,\n", 421 | " filename='log_node.json',\n", 422 | " filters=filters,\n", 423 | " overwrite=True,\n", 424 | " version=2)\n", 425 | "else:\n", 426 | " print('Cannot find {} in skill definition.'.format(intent_name))" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "Copyright © 2021 IBM. This notebook and its source code are released under the terms of the MIT License." 434 | ] 435 | } 436 | ], 437 | "metadata": { 438 | "kernelspec": { 439 | "display_name": "Python 3", 440 | "language": "python", 441 | "name": "python3" 442 | }, 443 | "language_info": { 444 | "codemirror_mode": { 445 | "name": "ipython", 446 | "version": 3 447 | }, 448 | "file_extension": ".py", 449 | "mimetype": "text/x-python", 450 | "name": "python", 451 | "nbconvert_exporter": "python", 452 | "pygments_lexer": "ipython3", 453 | "version": "3.8.8" 454 | } 455 | }, 456 | "nbformat": 4, 457 | "nbformat_minor": 4 458 | } 459 | -------------------------------------------------------------------------------- /notebook/README.md: -------------------------------------------------------------------------------- 1 | ## Table of Contents 2 | 3 | #### `Effectiveness Notebook.ipynb` #### 4 | - A Jupytor notebook file. Effectiveness notebook helps you understand relative performance of each intent and entity as well as the confusion between your intents. This information helps you prioritize your improvement effort. 5 | 6 | #### `Measure Notebook.ipynb` #### 7 | - A Jupytor notebook file. Measure notebook contains a set of automated metrics that help you monitor and understand the behavior of your system. The goal is to understand where your assistant is doing well vs where it isn’t, and to focus your improvement effort to one of the problem areas identified. 8 | 9 | #### `Logs Notebook.ipynb` #### 10 | - A Jupytor notebook file. Logs notebook helps you fetch logs using Watson Assistant API. You can fetch logs with various filters, and save them as a JSON file, or export the utterances in the logs into a CSV file. The JSON file can be loaded into the Measure notebook. The CSV file can be updated to Watson Assistant service for intent recommendation. Alternatively, you can run python scripts [`fetch_logs`](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/blob/master/src/main/python/fetch_logs.py) and [`export_csv_for_intent_recommendation`](https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/blob/master/src/main/python/export_csv_for_intent_recommendation.py) to fetch logs and export them to [intent recommendation CSV](https://cloud.ibm.com/docs/assistant?topic=assistant-intent-recommendations#intent-recommendations-data-resources), respectively. Run `python get_logs -h` and `python export_csv_for_intent_recommendation.py -h` for usage. For example, to generate intent recommendation CSV from logs: 11 | 12 | ``` 13 | # Fetch logs by keeping first user utterances in conversations with confidence `response.intents::confidence` between 0.1 and 0.6, and save to JSON file `OUTPUT_JSON_FILE` 14 | 15 | python src/main/python/fetch_logs.py \ 16 | --url URL --version VERSION --apikey API_KEY --skill_id SKILL_ID --assistant_id ASSISTANT_ID \ 17 | --filters "language::en" "request.context.system.dialog_turn_counter::1" "response.intents:confidence<0.6" "response.intents:confidence>0.1" \ 18 | --output_json OUTPUT_JSON_FILE 19 | ``` 20 | ``` 21 | # Take the fetch logs in `OUTPUT_JSON_FILE`, filter out utterances that is either too short (less than 3 tokens) or too long (more than 20 tokens), remove duplicates, and export them to a CSV file `OUTPUT_CSV_FILE` for intent recommendation 22 | 23 | python src/main/python/export_csv_for_intent_recommendation.py \ 24 | --input_json OUTPUT_JSON_FILE --output_csv OUTPUT_CSV_FILE \ 25 | --deduplicate --min_length 3 --max_length 20 26 | ``` 27 | 28 | #### `IBM Watson Assistant Continuous Improvement Best Practices.pdf` #### 29 | - IBM Watson Assistant Continuous Improvement Best Practices document. 30 | 31 | #### `data` #### 32 | - A folder contains an example workspace with sample logs and an annotated log file for demonstration in notebooks. 33 | 34 | #### `imgs` #### 35 | - A folder contains images used in notebooks. 36 | -------------------------------------------------------------------------------- /notebook/data/annotation.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/data/annotation.xlsx -------------------------------------------------------------------------------- /notebook/data/book_recommender_logs.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/data/book_recommender_logs.gz -------------------------------------------------------------------------------- /notebook/data/book_recommender_skill.json: -------------------------------------------------------------------------------- 1 | {"name": "Book Recommender", "created": "2020-04-24T20:08:40.700Z", "intents": [{"intent": "book_long", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a long book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I like a long novel", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good, long story.", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I need a good, long, story, that will last me all summer?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is a good book that will last a long time?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "two", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "two", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "both", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "the", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "dos", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "one", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_money", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a book about making money", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Is there a good book about investing?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best stock book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Is there a great financial book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good stock market book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_history", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a history book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend an book about World War 2?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is a good book about African history?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Tell me a good book about world history?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend an book about early Egypt?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_scifi", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a scifi book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I like a science fiction book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you give me a science fiction book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good scifi book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good science fiction book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_computer", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a computer article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good computer article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What computer article should I read?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Is there a good computer article that was just releases?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you tell me a good article about computers", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_fantasy", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a fantasy article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good fantasy article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a fantasy quick read?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you tell me a good trending fantasy article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Give me a fantasy article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_food", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I would like an book about food", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good cooking book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best book about Indian cuisine?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good book comparing international foods?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is the best book about healthy eating?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_money", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a article about making money", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good financial book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I there a good current article about making money", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good investment article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good article about stocks?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "hi", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "hello", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "yo", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Hi there!", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Hello there", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "hi", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_history", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a history article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend an article about World War 2?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is a good article about African history?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Tell me a good article about world history?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend an article about early Egypt?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_selfimprove", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a self improvement article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good article about self improvement?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best goal setting article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you tell me a good article about improving my life?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best trending self improvement article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "hello", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "hi", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "hello", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "hi there", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "hello there", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "oy", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": "hello"}, {"intent": "one", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "two", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "single", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "the", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "uno", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "one", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_food", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I would like an article about food", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good cooking article?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best article about Indian cuisine?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good article comparing international foods?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is the best article about healthy eating?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_politics", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a article about politics", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a article about the Republicans?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Is there a good article about policies of the Democratic Party?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best article about Indian politics?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is the best article about politics in England?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_selfimprove", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a self improvement book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good book about self improvement?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best goal setting book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you tell me a good book about improving my life?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best trending self improvement book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_politics", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a book about politics", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a book about the Republicans?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Is there a good book about policies of the Democratic Party?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best book about Indian politics?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is the best book about politics in England?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_short", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a short book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I like a short novel", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good, short story.", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I need a good, short, story, that I can read on the plane?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What is a good book that won't take long to read?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "article_scifi", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a scifi article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I like a science fiction article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you give me a science fiction article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good scifi article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good science fiction article", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_computer", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a computer book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a good book about computers?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good book about the history of computers?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's the best computer book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "I want a really good story about the evolution of computers", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}, {"intent": "book_fantasy", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "examples": [{"text": "I like a fantasy book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "What's a good fantasy book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you recommend a fantasy story?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Can you tell me a good trending fantasy book?", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}, {"text": "Give me a fantasy book", "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z"}], "description": ""}], "updated": "2020-04-24T20:10:53.922Z", "entities": [], "language": "en", "metadata": {"api_version": {"major_version": "v1", "minor_version": "2020-02-05"}, "alternate_responses": true}, "description": "", "dialog_nodes": [{"type": "standard", "title": "Anything else", "output": {"generic": [{"values": [{"text": "I didn't understand. You can try rephrasing."}, {"text": "Can you reword your statement? I'm not understanding."}, {"text": "I didn't get your meaning."}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "anything_else", "dialog_node": "Anything else", "previous_sibling": "node_5_1582040246957", "disambiguation_opt_out": false}, {"type": "standard", "title": "two", "output": {"generic": [{"values": [{"text": "You chose two"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#two", "user_label": "two", "dialog_node": "node_5_1582040246957", "previous_sibling": "node_10_1582039896349"}, {"type": "standard", "title": "one", "output": {"generic": [{"values": [{"text": "You chose one"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#one", "user_label": "one", "dialog_node": "node_10_1582039896349", "previous_sibling": "node_6_1582040330964"}, {"type": "standard", "title": "hi", "output": {"generic": [{"values": [{"text": "hi"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#hi", "user_label": "hi", "dialog_node": "node_6_1582040330964", "previous_sibling": "node_6_1582040347726"}, {"type": "standard", "title": "hello", "output": {"generic": [{"values": [{"text": "hello"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#hello", "user_label": "hello", "dialog_node": "node_6_1582040347726", "previous_sibling": "node_1_1582049209157"}, {"type": "standard", "title": "book_computer", "output": {"generic": [{"values": [{"text": "Here is a computer book. (book_computer)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_computer", "user_label": "computer book", "dialog_node": "node_1_1582049209157", "previous_sibling": "node_6_1582049261648"}, {"type": "standard", "title": "book_fantasy_dialog", "output": {"generic": [{"values": [{"text": "Here is a fantasy book. (book_fantasy)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_fantasy", "user_label": "fantasy book", "dialog_node": "node_6_1582049261648", "previous_sibling": "node_2_1582049297943"}, {"type": "standard", "title": "book_history_dialog", "output": {"generic": [{"values": [{"text": "Here is a history book (book_history)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_history", "user_label": "history book", "dialog_node": "node_2_1582049297943", "previous_sibling": "node_1_1582049324143"}, {"type": "standard", "title": "book_money_dialog", "output": {"generic": [{"values": [{"text": "Here is a book about money (book_money)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_money", "user_label": "money book", "dialog_node": "node_1_1582049324143", "previous_sibling": "node_2_1582049373395"}, {"type": "standard", "title": "book_scifi_dialog", "output": {"generic": [{"values": [{"text": "Here is your scify book (book_scifi)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_scifi", "user_label": "scify book", "dialog_node": "node_2_1582049373395", "previous_sibling": "node_3_1582049417508"}, {"type": "standard", "title": "book_selfimprove_dialog", "output": {"generic": [{"values": [{"text": "Here is your self improvement book"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_selfimprove", "user_label": "self improve book", "dialog_node": "node_3_1582049417508", "previous_sibling": "node_5_1582049713134"}, {"type": "standard", "title": "book_politics_dialog", "output": {"generic": [{"values": [{"text": "Here is a book about politics. (book_politics)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_politics", "user_label": "politics book", "dialog_node": "node_5_1582049713134", "previous_sibling": "node_8_1582050051508"}, {"type": "standard", "title": "book_long_dialog", "output": {"generic": [{"values": [{"text": "Here is a long book. (book_long)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_long", "user_label": "long book", "dialog_node": "node_8_1582050051508", "previous_sibling": "node_5_1582050079858"}, {"type": "standard", "title": "book_short_dialog", "output": {"generic": [{"values": [{"text": "Here is a short book. (book_short)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_short", "user_label": "short book", "dialog_node": "node_5_1582050079858", "previous_sibling": "node_2_1582133481345"}, {"type": "standard", "title": "article_computer_dialog", "output": {"generic": [{"values": [{"text": "Here is your computer article (article_computer)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_computer", "user_label": "computer article", "dialog_node": "node_2_1582133481345", "previous_sibling": "node_7_1582133509246"}, {"type": "standard", "title": "article_fantasy_dialog", "output": {"generic": [{"values": [{"text": "Here is your fantasy article. (article_fantasy)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_fantasy", "user_label": "fantasy article", "dialog_node": "node_7_1582133509246", "previous_sibling": "node_9_1582133556135"}, {"type": "standard", "title": "article_history_dialog", "output": {"generic": [{"values": [{"text": "Here is your history article. (article_history)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_history", "user_label": "history article", "dialog_node": "node_9_1582133556135", "previous_sibling": "node_1_1582133629623"}, {"type": "standard", "title": "article_money_dialog", "output": {"generic": [{"values": [{"text": "Here is your article about money. (article_money)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_money", "user_label": "money article", "dialog_node": "node_1_1582133629623", "previous_sibling": "node_10_1582133662325"}, {"type": "standard", "title": "article_politics_dialog", "output": {"generic": [{"values": [{"text": "Here is your article about politics. (article_politics)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_politics", "user_label": "politics article", "dialog_node": "node_10_1582133662325", "previous_sibling": "node_8_1582133720490"}, {"type": "standard", "title": "article_scifi_dialog", "output": {"generic": [{"values": [{"text": "Here is your article about scifi. (article_scifi)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_scifi", "user_label": "scifi article", "dialog_node": "node_8_1582133720490", "previous_sibling": "node_1_1582134460437"}, {"type": "standard", "title": "article_selfimprove_dialog", "output": {"generic": [{"values": [{"text": "Here is an article about self improvement. (article_selfimprove)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_selfimprove", "user_label": "self improve article", "dialog_node": "node_1_1582134460437", "previous_sibling": "node_2_1582134542638"}, {"type": "standard", "title": "article_food_dialog", "output": {"generic": [{"values": [{"text": "Here is an article about food. (article_food)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#article_food", "user_label": "food article", "dialog_node": "node_2_1582134542638", "previous_sibling": "node_3_1582134595076"}, {"type": "standard", "title": "book_food_dialog", "output": {"generic": [{"values": [{"text": "Here is a book about food. (book_food)"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "#book_food", "user_label": "food book", "dialog_node": "node_3_1582134595076", "previous_sibling": "Welcome"}, {"type": "standard", "title": "Welcome", "output": {"generic": [{"values": [{"text": "Hello. How can I help you?"}], "response_type": "text", "selection_policy": "sequential"}]}, "created": "2020-04-24T20:08:40.700Z", "updated": "2020-04-24T20:08:40.700Z", "conditions": "welcome", "dialog_node": "Welcome"}], "workspace_id": "1237b67c-f780-46fe-b693-a4b7b2d6f47f", "counterexamples": [], "system_settings": {"auto_learn": {"apply": false}, "disambiguation": {"prompt": "Did you mean:", "enabled": true, "randomize": true, "max_suggestions": 5, "suggestion_text_policy": "title", "none_of_the_above_prompt": "None of the above", "include_alternate_responses": true}, "human_agent_assist": {"prompt": "Did you mean:"}, "alternate_responses": {"enabled": true}, "spelling_auto_correct": true}, "learning_opt_out": true, "status": "Available"} -------------------------------------------------------------------------------- /notebook/imgs/analyze_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/analyze_process.png -------------------------------------------------------------------------------- /notebook/imgs/box_zoom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/box_zoom.png -------------------------------------------------------------------------------- /notebook/imgs/click.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/click.png -------------------------------------------------------------------------------- /notebook/imgs/effectiveness_overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/effectiveness_overall.png -------------------------------------------------------------------------------- /notebook/imgs/effort_computation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/effort_computation.png -------------------------------------------------------------------------------- /notebook/imgs/find_data_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/find_data_icon.png -------------------------------------------------------------------------------- /notebook/imgs/measure_overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/measure_overall.png -------------------------------------------------------------------------------- /notebook/imgs/measure_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/measure_process.png -------------------------------------------------------------------------------- /notebook/imgs/reset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/reset.png -------------------------------------------------------------------------------- /notebook/imgs/save.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/save.png -------------------------------------------------------------------------------- /notebook/imgs/suggestions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/suggestions.png -------------------------------------------------------------------------------- /notebook/imgs/wheel_zoom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/notebook/imgs/wheel_zoom.png -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "assistant_improve_toolkit", 3 | "version": "1.0.0", 4 | "description": "To help improving your Watson Assistant after you have deployed it to production, we prepared the following two Jupyter notebooks. These notebooks include practical steps for measuring, analyzing, and actively improving your virtual assistant in a continuous manner. Check out IBM Watson Assistant Continuous Improvement Best Practices for more details.", 5 | "repository": { 6 | "type": "git", 7 | "url": "git+https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook.git" 8 | }, 9 | "author": "IBM", 10 | "license": "Apache-2.0", 11 | "bugs": { 12 | "url": "https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook/issues" 13 | }, 14 | "homepage": "https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook#readme", 15 | "dependencies": { 16 | "@semantic-release/changelog": "^5.0.1", 17 | "@semantic-release/exec": "^5.0.0", 18 | "@semantic-release/git": "^9.0.0", 19 | "@semantic-release/github": "^7.2.3" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ipython 2 | pandas==1.2.1 3 | bokeh==3.2.0 4 | tqdm==4.65.0 5 | matplotlib==3.2.1 6 | XlsxWriter==1.2.8 7 | ibm-watson==7.0.0 8 | numpy==1.23.5 9 | requests==2.29.0 10 | scikit-learn>=0.21.3 -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | 3 | # test dependencies 4 | pytest>=2.8.2 5 | responses>=0.10.6 6 | python_dotenv>=0.1.5;python_version!='3.2' 7 | pylint>=1.4.4 8 | tox>=2.9.1 9 | pytest-rerunfailures>=3.1 10 | 11 | # code coverage 12 | coverage<5 13 | codecov>=1.6.3 14 | pytest-cov>=2.2.1 15 | 16 | # documentation 17 | bumpversion>=0.5.3 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | # 3 | # This file were created by Python Boilerplate. Use Python Boilerplate to start 4 | # simple, usable and best-practices compliant Python projects. 5 | # 6 | # Learn more about it at: http://github.com/fabiommendes/python-boilerplate/ 7 | # 8 | 9 | import setuptools 10 | from os import path 11 | 12 | __version__ = '1.4.1' 13 | 14 | # read contents of README file 15 | this_directory = path.abspath(path.dirname(__file__)) 16 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as file: 17 | readme_file = file.read() 18 | 19 | setuptools.setup( 20 | # Basic info 21 | name='assistant_improve_toolkit', 22 | author='IBM Watson', 23 | author_email='watdevex@us.ibm.com', 24 | maintainer='Zhe Zhang', 25 | maintainer_email='zhangzhe@us.ibm.com', 26 | url='https://github.com/watson-developer-cloud/assistant-improve-recommendations-notebook', 27 | description='Assistant Improve Toolkit', 28 | license='Apache 2.0', 29 | long_description=readme_file, 30 | long_description_content_type='text/markdown', 31 | classifiers=[ 32 | 'Development Status :: 4 - Beta', 33 | 'Intended Audience :: Developers', 34 | 'License :: OSI Approved :: Apache Software License', 35 | 'Programming Language :: Python', 36 | 'Programming Language :: Python :: 3.7', 37 | 'Programming Language :: Python :: 3.8', 38 | 'Operating System :: OS Independent', 39 | 'Topic :: Software Development :: Libraries :: Python Modules' 40 | ], 41 | # Packages and depencies 42 | package_dir={'': 'src'}, 43 | packages=setuptools.find_packages('src'), 44 | install_requires=[ 45 | 'pandas==1.2.1', 46 | 'bokeh==3.2.0', 47 | 'tqdm==4.65.0', 48 | 'scikit-learn>=0.21.3', 49 | 'matplotlib==3.2.1', 50 | 'XlsxWriter==1.2.8', 51 | 'ibm-watson==7.0.0', 52 | 'numpy==1.23.5', 53 | 'requests==2.29.0' 54 | ], 55 | 56 | zip_safe=False, 57 | platforms='any', 58 | ) 59 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /src/assistant_improve_toolkit/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /src/assistant_improve_toolkit/cos_op.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import datetime 18 | from requests.utils import quote 19 | import hashlib 20 | import hmac 21 | import pandas as pd 22 | from contextlib import closing 23 | from io import BytesIO 24 | import numpy as np 25 | 26 | 27 | def get_hash(key, msg): 28 | """Generates keyed-hash for a message using HMAC 29 | Parameters 30 | ---------- 31 | key: The starting key for the hash. 32 | msg: message value that will be hashed 33 | """ 34 | return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest() 35 | 36 | 37 | def create_signature_key(key, datestamp, region, service): 38 | """Create a signing key based on AWS signature version 4 39 | Parameters 40 | ---------- 41 | key: Secret access key 42 | datestamp: date and timestamp 43 | region: service region 44 | service: service name 45 | """ 46 | key_date = get_hash(('AWS4' + key).encode('utf-8'), datestamp) 47 | key_region = get_hash(key_date, region) 48 | key_service = get_hash(key_region, service) 49 | key_signing = get_hash(key_service, 'aws4_request') 50 | return key_signing 51 | 52 | 53 | def generate_link(filename, project_io, expiration): 54 | """Generate a signing downloadable link of a file 55 | Parameters 56 | ---------- 57 | filename: file name 58 | project_io: Watson Studio project io instance 59 | expiration: expiration time in seconds 60 | """ 61 | region = '' 62 | http_method = 'GET' 63 | endpoint = project_io.get_storage_metadata()['properties']['endpoint_url'] 64 | 65 | cur_time = datetime.datetime.utcnow() 66 | timestamp = cur_time.strftime('%Y%m%dT%H%M%SZ') 67 | datestamp = cur_time.strftime('%Y%m%d') 68 | 69 | standardized_querystring = ('X-Amz-Algorithm=AWS4-HMAC-SHA256' + 70 | '&X-Amz-Credential=' + 71 | project_io.get_storage_metadata()['properties']['credentials']['editor'][ 72 | 'access_key_id'] + '/' + datestamp + '/' + region + 73 | '/s3/aws4_request' + 74 | '&X-Amz-Date=' + timestamp + 75 | '&X-Amz-Expires=' + str(expiration) + 76 | '&X-Amz-SignedHeaders=host') 77 | 78 | standardized_querystring_url_encoded = quote(standardized_querystring, safe='&=') 79 | 80 | standardized_resource = '/' + project_io.get_storage_metadata()['properties']['bucket_name'] + '/' + filename 81 | 82 | payload_hash = 'UNSIGNED-PAYLOAD' 83 | standardized_headers = 'host:' + project_io.get_storage_metadata()['properties']['endpoint_url'].replace('https://', 84 | '') 85 | signed_headers = 'host' 86 | 87 | standardized_request = (http_method + '\n' + 88 | standardized_resource + '\n' + 89 | standardized_querystring_url_encoded + '\n' + 90 | standardized_headers + '\n' + 91 | '\n' + 92 | signed_headers + '\n' + 93 | payload_hash) 94 | 95 | # assemble string-to-sign 96 | hashing_algorithm = 'AWS4-HMAC-SHA256' 97 | credential_scope = datestamp + '/' + region + '/' + 's3' + '/' + 'aws4_request' 98 | sts = (hashing_algorithm + '\n' + 99 | timestamp + '\n' + 100 | credential_scope + '\n' + 101 | hashlib.sha256(standardized_request.encode('utf-8')).hexdigest()) 102 | 103 | # generate the signature 104 | signature_key = create_signature_key( 105 | project_io.get_storage_metadata()['properties']['credentials']['editor']['secret_access_key'], datestamp, 106 | region, 's3') 107 | signature = hmac.new(signature_key, 108 | sts.encode('utf-8'), 109 | hashlib.sha256).hexdigest() 110 | 111 | # create and send the request 112 | request_url = (endpoint + '/' + 113 | project_io.get_storage_metadata()['properties']['bucket_name'] + '/' + 114 | filename + '?' + 115 | standardized_querystring_url_encoded + 116 | '&X-Amz-Signature=' + 117 | signature) 118 | return request_url 119 | 120 | 121 | def generate_excel_measure(dataframe_list, sheet_name_list, filename, project_io): 122 | """Generate a formatted excel file given a list of dataframes for measure notebook 123 | Parameters 124 | ---------- 125 | dataframe_list: a list of dataframes 126 | sheet_name_list: a list of sheet names 127 | filename: output file name 128 | project_io: Watson Studio project io instance 129 | """ 130 | with closing(BytesIO()) as output: 131 | writer = pd.ExcelWriter(output, engine='xlsxwriter', options={'remove_timezone': True}) 132 | workbook = writer.book 133 | data_format1 = workbook.add_format({'bg_color': '#BBCCE2'}) 134 | data_format2 = workbook.add_format({'bg_color': '#DEE6EF'}) 135 | 136 | format_header = workbook.add_format({'text_wrap': True}) 137 | 138 | workbook.formats[0].set_font_size(15) 139 | for df, name in zip(dataframe_list, sheet_name_list): 140 | df.to_excel(writer, sheet_name=name) 141 | worksheet = writer.sheets[name] 142 | worksheet.set_row(0, 30, format_header) 143 | worksheet.set_column('A:A', 5) 144 | worksheet.set_column('B:B', 30) 145 | worksheet.set_column('C:C', 30) 146 | worksheet.set_column('D:D', 15) 147 | worksheet.set_column('F:G', 35) 148 | worksheet.set_column('H:AH', 20) 149 | for row in range(1, len(df) + 1, 2): 150 | worksheet.set_row(row, cell_format=data_format1) 151 | worksheet.set_row(row + 1, cell_format=data_format2) 152 | writer.save() 153 | if project_io is not None: 154 | project_io.save_data(filename, output.getvalue(), overwrite=True) 155 | else: 156 | with open(filename, 'wb') as out: 157 | out.write(output.getvalue()) 158 | 159 | 160 | def generate_excel_effectiveness(dataframe_list, sheet_name_list, filename, project_io): 161 | """Generate a formatted excel file given a list of dataframes for effectiveness notebook 162 | Parameters 163 | ---------- 164 | dataframe_list: a list of dataframes 165 | sheet_name_list: a list of sheet names 166 | filename: output file name 167 | project_io: Watson Studio project io instance 168 | """ 169 | with closing(BytesIO()) as output: 170 | writer = pd.ExcelWriter(output, engine='xlsxwriter', options={'remove_timezone': True}) 171 | workbook = writer.book 172 | data_format1 = workbook.add_format({'bg_color': '#BBCCE2'}) 173 | data_format2 = workbook.add_format({'bg_color': '#DEE6EF'}) 174 | 175 | format_header = workbook.add_format({'text_wrap': True}) 176 | 177 | workbook.formats[0].set_font_size(15) 178 | for df, name in zip(dataframe_list, sheet_name_list): 179 | df.to_excel(writer, sheet_name=name) 180 | worksheet = writer.sheets[name] 181 | worksheet.set_row(0, 20, format_header) 182 | worksheet.set_column('A:A', 5) 183 | worksheet.set_column('B:D', 30) 184 | for row in range(1, len(df) + 1, 2): 185 | worksheet.set_row(row, cell_format=data_format1) 186 | worksheet.set_row(row + 1, cell_format=data_format2) 187 | writer.save() 188 | if project_io is not None: 189 | project_io.save_data(filename, output.getvalue(), overwrite=True) 190 | else: 191 | with open(filename, 'wb') as out: 192 | out.write(output.getvalue()) 193 | 194 | 195 | def export_result_excel(df_effective, sample_size=100, project_io=None): 196 | if df_effective.size == 0: 197 | print('No ineffective conversations found in logs') 198 | return 199 | # Copy the effective dataframe 200 | df_excel = df_effective.copy(deep=True) 201 | # Rename columns to generate excel 202 | df_excel = df_excel.rename(columns={'log_id': 'Log ID', 'response.context.conversation_id': 'Conversation ID', 203 | 'response.timestamp': 'Response Timestamp', 204 | 'request_input': 'Utterance Text', 205 | 'response_text': 'Response Text', 206 | 'response.top_intent_intent': 'Detected top intent', 207 | 'response.top_intent_confidence': 'Detected top intent confidence', 208 | 'Intent 2 intent': 'Intent 2', 'Intent 2 confidence': 'Intent 2 Confidence', 209 | 'Intent 3 intent': 'Intent 3', 'Intent 3 confidence': 'Intent 3 Confidence', 210 | 'response_entities': 'Detected Entities', 211 | 'Escalated_conversation': 'Escalated conversation?', 212 | 'Covered': 'Covered?', 'Not Covered cause': 'Not covered - cause', 213 | 'response.output.nodes_visited_s': 'Dialog Flow', 214 | 'response_dialog_stack': 'Dialog stack', 215 | 'response_dialog_request_counter': 'Dialog request counter', 216 | 'response_dialog_turn_counter': 'Dialog turn counter' 217 | }) 218 | 219 | existing_columns = ['Log ID', 'Conversation ID', 'Response Timestamp', 'Customer ID (must retain for delete)', 220 | 'Utterance Text', 'Response Text', 'Detected top intent', 'Detected top intent confidence', 221 | 'Intent 2', 'Intent 2 Confidence', 'Confidence gap (between 1 and 2)', 'Intent 3', 222 | 'Intent 3 Confidence', 223 | 'Detected Entities', 'Escalated conversation?', 'Covered?', 'Not covered - cause', 224 | 'Dialog Flow', 'Dialog stack', 'Dialog request counter', 'Dialog turn counter'] 225 | # Add new columns for annotating problematic logs 226 | new_columns_excel = ['Response Correct (Y/N)?', 'Response Helpful (Y/N)?', 227 | 'Root cause (Problem with Intent, entity, dialog)', 228 | 'Wrong intent? If yes, put the correct intent. Otherwise leave it blank', 229 | 'New intent needed? (A new intent. Otherwise leave blank)', 230 | 'Add Utterance to Training data (Y/N)', 231 | 'Entity missed? If yes, put the missed entity value. Otherwise leave it blank', 232 | 'New entity needed? If yes, put the entity name', 233 | 'New entity value? If yes, put the entity value', 'New dialog logic needed?', 234 | 'Wrong dialog node? If yes, put the node name. Otherwise leave it blank', 235 | 'No dialog node triggered'] 236 | 237 | # Add the new columns to the dataframe 238 | df_excel = df_excel.reindex(columns=[*existing_columns, *new_columns_excel], fill_value='') 239 | 240 | # Set output filename 241 | all_file = 'All.xlsx' 242 | escalated_sample_file = 'Escalated_sample.xlsx' 243 | non_escalated_sample_file = 'NotEscalated_sample.xlsx' 244 | 245 | # Remove timezone infomation 246 | df_excel['Response Timestamp'] = df_excel['Response Timestamp'].dt.tz_localize(None) 247 | 248 | # Prepare dataframe containing all utterances sorted by Conversation ID and Response Timestamp 249 | df_all = df_excel.sort_values(by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True) 250 | 251 | # Prepare dataframe containing covered utterances sorted by Conversation ID and Response Timestamp 252 | df_covered = df_excel[df_excel['Covered?']==True].sort_values( 253 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True) 254 | 255 | # Prepare dataframe containing not covered utterances sorted by Conversation ID and Response Timestamp 256 | df_not_covered = df_excel[df_excel['Covered?']==False].sort_values( 257 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True) 258 | 259 | # Convert to Excel format and save to local or upload to COS if project_io is provided 260 | generate_excel_measure([df_all, df_covered, df_not_covered], 261 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'], filename=all_file, 262 | project_io=project_io) 263 | 264 | # Prepare dataframe containing escalated conversations 265 | df_escalated_true = df_excel.loc[df_excel['Escalated conversation?']==True] 266 | 267 | # Sample escalated conversations 268 | if sample_size > 0 and len(df_escalated_true) > 0: 269 | # Get unique escalated conversation ids 270 | conversation_ids = df_escalated_true['Conversation ID'].unique() 271 | sampled_conversation_ids = np.random.choice(conversation_ids, sample_size) 272 | df_escalated_true = df_escalated_true[ 273 | df_escalated_true['Conversation ID'].isin(sampled_conversation_ids)].sort_values( 274 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True) 275 | 276 | # Prepare dataframe containing covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp 277 | df_escalated_covered = df_escalated_true[df_escalated_true['Covered?']==True].reset_index(drop=True) 278 | 279 | # Prepare dataframe containing not covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp 280 | df_escalated_not_covered = df_escalated_true[df_escalated_true['Covered?']==False].reset_index(drop=True) 281 | 282 | # Covert to Excel format and upload to COS 283 | generate_excel_measure([df_escalated_true, df_escalated_covered, df_escalated_not_covered], 284 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'], 285 | filename=escalated_sample_file, project_io=project_io) 286 | 287 | # Prepare dataframe containing non-escalated conversations 288 | df_not_escalated = df_excel.loc[df_excel['Escalated conversation?']==False].reset_index(drop=True) 289 | 290 | # Sample escalated conversations 291 | if sample_size > 0: 292 | # Get unique non-escalated conversation ids 293 | conversation_ids = df_not_escalated['Conversation ID'].unique() 294 | sampled_conversation_ids = np.random.choice(conversation_ids, sample_size) 295 | df_not_escalated = df_not_escalated[ 296 | df_not_escalated['Conversation ID'].isin(sampled_conversation_ids)].sort_values( 297 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True) 298 | 299 | # Prepare dataframe containing covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp 300 | df_not_escalated_covered = df_not_escalated[df_not_escalated['Covered?']==True].reset_index(drop=True) 301 | 302 | # Generate not escalated and not covered sample file 303 | df_not_escalated_not_covered = df_not_escalated[df_not_escalated['Covered?']==False].reset_index(drop=True) 304 | 305 | # Covert to Excel format and upload to COS 306 | generate_excel_measure([df_not_escalated, df_not_escalated_covered, df_not_escalated_not_covered], 307 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'], 308 | filename=non_escalated_sample_file, project_io=project_io) 309 | -------------------------------------------------------------------------------- /src/assistant_improve_toolkit/export_csv_for_intent_recommendation.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import argparse 18 | from watson_assistant_func import load_logs_from_file 19 | from watson_assistant_func import export_csv_for_intent_recommendation 20 | 21 | 22 | def parse_args(): 23 | parser = argparse.ArgumentParser(description='Generate intent recommendation CSV from logs JSON file for Watson ' 24 | 'Assistant service.') 25 | 26 | # Required arguments 27 | parser.add_argument( 28 | '--input_json', 29 | type=str, 30 | required=True, 31 | help="The path of the JSON file of logs, generated by `fetch_logs.py`", 32 | ) 33 | parser.add_argument( 34 | '--output_csv', 35 | type=str, 36 | required=True, 37 | help="The path of the CSV file of utterances this script will generate for intent recommendation", 38 | ) 39 | 40 | # Optional arguments 41 | parser.add_argument( 42 | '--overwrite', 43 | type=bool, 44 | default=False, 45 | help="If overwrite filename if it exists", 46 | ) 47 | parser.add_argument( 48 | '--deduplicate', 49 | action="store_true", 50 | help="If set, duplicate utterances are discarded when generating CSV", 51 | ) 52 | parser.add_argument( 53 | '--min_length', 54 | type=int, 55 | default=3, 56 | help="Minimum number of tokens of a utterance in the generated CSV. Any utterance that has less than or " 57 | "equal to this number is discarded.", 58 | ) 59 | parser.add_argument( 60 | '--max_length', 61 | type=int, 62 | default=20, 63 | help="Maximum number of tokens of a utterance in the generated CSV. Any utterance that has more than or " 64 | "equal to this number is discarded.", 65 | ) 66 | 67 | return parser.parse_args() 68 | 69 | 70 | if __name__ == '__main__': 71 | args = parse_args() 72 | print(vars(args)) 73 | 74 | logs = load_logs_from_file(filename=args.input_json, 75 | project=None) 76 | 77 | export_csv_for_intent_recommendation(logs, 78 | filename=args.output_csv, 79 | deduplicate=args.deduplicate, 80 | project=None, 81 | overwrite=args.overwrite, 82 | min_length=args.min_length, 83 | max_length=args.max_length) 84 | -------------------------------------------------------------------------------- /src/assistant_improve_toolkit/fetch_logs.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import argparse 18 | from ibm_cloud_sdk_core.authenticators import IAMAuthenticator 19 | from ibm_watson import AssistantV1 20 | 21 | from watson_assistant_func import get_logs 22 | 23 | 24 | def parse_args(): 25 | parser = argparse.ArgumentParser(description='Script to fetch logs from Watson Assistant service.') 26 | 27 | # Required arguments 28 | parser.add_argument( 29 | "--url", 30 | type=str, 31 | required=True, 32 | help="Watson Asssistant Legacy V1 URLs, for example, https://api.us-east.assistant.watson.cloud.ibm.com." 33 | ) 34 | parser.add_argument( 35 | "--version", 36 | type=str, 37 | required=True, 38 | help="API requests require a version parameter that takes a date in the format version=YYYY-MM-DD. When we " 39 | "change the API in a backwards-incompatible way, we release a new version date. " 40 | ) 41 | parser.add_argument( 42 | "--apikey", 43 | type=str, 44 | required=True, 45 | help="The IAM token." 46 | ) 47 | 48 | # Optional arguments 49 | parser.add_argument( 50 | "--workspace_id", 51 | type=str, 52 | default='', 53 | help="To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill " 54 | "ID. The values can be found on the View API Details page. If you are using versioning in Watson " 55 | "Assistant, this ID represents the Development version of your skill definition.", 56 | ) 57 | parser.add_argument( 58 | "--skill_id", 59 | type=str, 60 | default='', 61 | help="To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill " 62 | "ID. The values can be found on the View API Details page. If you are using versioning in Watson " 63 | "Assistant, this ID represents the Development version of your skill definition.", 64 | ) 65 | parser.add_argument( 66 | "--assistant_id", 67 | type=str, 68 | default='', 69 | help="To load the skill of an assistant in the next section, you need to provide Assistant ID. The values can " 70 | "be found on the View API Details page.", 71 | ) 72 | parser.add_argument( 73 | '--filters', 74 | default=[], 75 | nargs='*', 76 | help="List of filters (string), separated by space. For example, '--filters language::en " 77 | "meta.summary.input_text_length_i>0 response_timestamp>=2020-03-01'", 78 | ) 79 | parser.add_argument( 80 | '--num_logs', 81 | type=int, 82 | default=20000, 83 | help="Number of logs to retrieve (default=20000)", 84 | ) 85 | parser.add_argument( 86 | '--output_json', 87 | type=str, 88 | default=None, 89 | help="If output_json is set, logs will be saved to filename as a JSON file", 90 | ) 91 | parser.add_argument( 92 | '--overwrite', 93 | type=bool, 94 | default=False, 95 | help="If overwrite filename if it exists", 96 | ) 97 | 98 | return parser.parse_args() 99 | 100 | 101 | if __name__ == '__main__': 102 | args = parse_args() 103 | print(vars(args)) 104 | 105 | authenticator = IAMAuthenticator(args.apikey) 106 | sdk_object = AssistantV1(version=args.version, authenticator=authenticator) 107 | sdk_object.set_service_url(args.url) 108 | 109 | assistant_information = {'workspace_id': args.workspace_id, 110 | 'skill_id': args.skill_id, 111 | 'assistant_id': args.assistant_id} 112 | print(assistant_information) 113 | 114 | logs = get_logs(sdk_object, 115 | assistant_information, 116 | num_logs=args.num_logs, 117 | filename=args.output_json, 118 | filters=args.filters, 119 | project=None, 120 | overwrite=args.overwrite, 121 | ) 122 | -------------------------------------------------------------------------------- /src/assistant_improve_toolkit/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.4.1' -------------------------------------------------------------------------------- /src/assistant_improve_toolkit/watson_assistant_func.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import json 18 | import pandas as pd 19 | import os 20 | import csv 21 | import traceback 22 | import io 23 | from string import punctuation 24 | 25 | from ibm_watson import AssistantV1, AssistantV2 26 | 27 | EN_PUNCTUATION = punctuation + '’' 28 | 29 | 30 | def get_assistant_definition(sdk_object, assistant_info, project=None, overwrite=False, filename='assistant_definition'): 31 | workspace_id, assistant_id, skill_id = [assistant_info.get(k) for k in ['workspace_id', 'assistant_id', 'skill_id']] 32 | 33 | if len(workspace_id) > 0: 34 | filename += '_workspace_{}.json'.format(workspace_id) 35 | elif len(skill_id) > 0: 36 | filename += '_skill_{}.json'.format(skill_id) 37 | else: 38 | print('Please provide a valid Workspace ID or Skill ID!') 39 | return None 40 | 41 | if os.path.isfile(filename) and overwrite is False: 42 | # Get file from cloud object storage 43 | print('Reading from file:', filename) 44 | with open(filename) as data: 45 | data_json = json.load(data) 46 | # Read logs into dataframe 47 | print('Assistant definition is loaded into as a dataframe.') 48 | df_assistant = pd.json_normalize(data_json) 49 | return df_assistant 50 | else: 51 | if type(sdk_object) == AssistantV1: 52 | if len(workspace_id) > 0: 53 | # Fetch the workspace definition 54 | print('Loading workspace definition using workspace id: {}'.format(workspace_id)) 55 | assistant_definition = sdk_object.get_workspace(workspace_id=workspace_id, export=True, 56 | include_audit=True).get_result() 57 | elif len(skill_id) > 0: 58 | # Fetch the skill definition 59 | print('Loading skill definition using skill id: {}'.format(skill_id)) 60 | assistant_definition = sdk_object.get_workspace(workspace_id=skill_id, export=True, 61 | include_audit=True).get_result() 62 | else: 63 | print('Please provide a valid Workspace ID or Skill ID!') 64 | assistant_definition = None 65 | elif type(sdk_object) == AssistantV2: 66 | if len(assistant_id) > 0: 67 | print('Loading skill definition using assistant id: {}'.format(assistant_id)) 68 | assistant_definition = None 69 | assistants = sdk_object.export_skills(assistant_id=assistant_id, include_audit=True).get_result() 70 | for assistant in assistants["assistant_skills"]: 71 | if assistant["type"] == "dialog": 72 | assistant_definition = assistant["workspace"] 73 | 74 | if assistant_definition is None: 75 | print('Your assistant does not support dialog') 76 | else: 77 | print('Please provide a valid Assistant ID!') 78 | assistant_definition = None 79 | else: 80 | print("Please provide a valid watson sdk object") 81 | assistant_definition = None 82 | 83 | 84 | if assistant_definition: 85 | # Store the workspace details in a dataframe 86 | df_assistant = pd.json_normalize(assistant_definition) 87 | 88 | # Set `overwrite` to True for exporting assistant definition to json file 89 | if not os.path.isfile(filename) or overwrite: 90 | if project is not None: 91 | with open(filename, 'wb') as fp: 92 | project.save_data(filename, json.dumps(assistant_definition), overwrite=True) 93 | # Display success message 94 | print('Definition {} exported as a project asset'.format(fp.name)) 95 | else: 96 | with open(filename, 'w') as f: 97 | json.dump(assistant_definition, f) 98 | print('Definition {} exported'.format(filename)) 99 | 100 | return df_assistant 101 | else: 102 | return None 103 | 104 | def _get_logs_from_v1_api(sdk_object, workspace_id, log_filter, num_logs): 105 | log_list = list() 106 | try: 107 | current_cursor = None 108 | while num_logs > 0: 109 | if len(workspace_id) > 0: 110 | logs_response = sdk_object.list_logs( 111 | workspace_id=workspace_id, 112 | page_limit=500, 113 | cursor=current_cursor, 114 | filter=log_filter 115 | ).get_result() 116 | else: 117 | logs_response = sdk_object.list_all_logs( 118 | page_limit=500, 119 | cursor=current_cursor, 120 | filter=log_filter 121 | ).get_result() 122 | min_num = min(num_logs, len(logs_response['logs'])) 123 | log_list.extend(logs_response['logs'][:min_num]) 124 | print('\r{} logs retrieved'.format(len(log_list)), end='') 125 | num_logs = num_logs - min_num 126 | current_cursor = None 127 | # Check if there is another page of logs to be fetched 128 | if 'pagination' in logs_response: 129 | # Get the url from which logs are to fetched 130 | if 'next_cursor' in logs_response['pagination']: 131 | current_cursor = logs_response['pagination']['next_cursor'] 132 | else: 133 | break 134 | except Exception as ex: 135 | traceback.print_tb(ex.__traceback__) 136 | raise RuntimeError("Error getting logs using API. Please check if URL/credentials are correct.") 137 | 138 | return log_list 139 | 140 | 141 | def _get_logs_from_v2_api(sdk_object, environment_id, log_filter, num_logs): 142 | log_list = list() 143 | try: 144 | current_cursor = None 145 | while num_logs > 0: 146 | logs_response = sdk_object.list_logs( 147 | assistant_id=environment_id, 148 | page_limit=500, 149 | cursor=current_cursor, 150 | filter=log_filter 151 | ).get_result() 152 | min_num = min(num_logs, len(logs_response['logs'])) 153 | log_list.extend(logs_response['logs'][:min_num]) 154 | print('\r{} logs retrieved'.format(len(log_list)), end='') 155 | num_logs = num_logs - min_num 156 | current_cursor = None 157 | # Check if there is another page of logs to be fetched 158 | if 'pagination' in logs_response: 159 | # Get the url from which logs are to fetched 160 | if 'next_cursor' in logs_response['pagination']: 161 | current_cursor = logs_response['pagination']['next_cursor'] 162 | else: 163 | break 164 | except Exception as ex: 165 | traceback.print_tb(ex.__traceback__) 166 | raise RuntimeError("Error getting logs using API. Please check if URL/credentials are correct.") 167 | 168 | return log_list 169 | 170 | 171 | def get_logs(sdk_v1_object, sdk_v2_object, assistant_info, num_logs, filename, filters=None, project=None, 172 | overwrite=False, version=1): 173 | """This function calls Watson Assistant API to retrieve logs, using pagination if necessary. 174 | The goal is to retrieve utterances (user inputs) from the logs. 175 | Parameters 176 | ---------- 177 | num_logs : int, the number of records to return in each page of results 178 | assistant_info : dict, containing information regarding sdk_object, assistant id, and name 179 | filters: string, a list of query filters 180 | overwrite: boolean, whether to reset log file 181 | project: project io of studio project 182 | filename: prefix of the name of the log file 183 | Returns 184 | ---------- 185 | log_df : DataFrame of fetched logs 186 | """ 187 | if filters is None: 188 | filters = [] 189 | 190 | workspace_id, assistant_id, skill_id, environment_id = [assistant_info.get(k) for k in ['workspace_id', 'assistant_id', 'skill_id', 'environment_id']] 191 | 192 | if (workspace_id is None or len(workspace_id) == 0) \ 193 | and (assistant_id is None or len(assistant_id) == 0) \ 194 | and (skill_id is None or len(skill_id) == 0): 195 | print('Please provide a valid Workspace ID, Assistant ID, or Skill ID!') 196 | return None 197 | 198 | # check if filename exists before retrieving logs 199 | file_exist = False 200 | if filename: 201 | if project: 202 | for file in project.get_files(): 203 | if file['name'] == filename: 204 | if not overwrite: 205 | print('Load logs from existing file {}, set overwrite=True to overwrite'.format(filename)) 206 | return load_logs_from_file(filename, project) 207 | else: 208 | file_exist = True 209 | 210 | elif os.path.exists(filename): 211 | if not overwrite: 212 | print('Load logs from existing file {}, set overwrite=True to overwrite'.format(filename)) 213 | return load_logs_from_file(filename, None) 214 | else: 215 | file_exist = True 216 | else: 217 | print('Please provide a valid filename.') 218 | return None 219 | 220 | if version == 1: 221 | # adding default filters based on assistant_id and workspace_id 222 | if assistant_id is not None and len(assistant_id) > 0: 223 | filters.append('request.context.system.assistant_id::{}'.format(assistant_id)) 224 | if skill_id is not None and len(skill_id) > 0: 225 | filters.append('workspace_id::{}'.format(skill_id)) 226 | 227 | logs = _get_logs_from_v1_api(sdk_object=sdk_v1_object, 228 | workspace_id=workspace_id, 229 | log_filter=','.join(filters), 230 | num_logs=num_logs) 231 | elif version == 2: 232 | logs = _get_logs_from_v2_api(sdk_object=sdk_v2_object, 233 | environment_id=environment_id, 234 | log_filter=','.join(filters), 235 | num_logs=num_logs) 236 | print('\nLoaded {} logs'.format(len(logs))) 237 | 238 | if not file_exist or overwrite: 239 | print('Saving {} logs into {}... '.format(len(logs), filename)) 240 | if project: 241 | with open(filename, 'wb') as fp: 242 | project.save_data(filename, json.dumps(logs, indent=2), overwrite=overwrite) 243 | # Display success message 244 | print('File', fp.name, 'saved a project asset') 245 | else: 246 | with open(filename, 'w') as fp: 247 | json.dump(logs, fp, indent=2) 248 | print('File', fp.name, 'saved') 249 | 250 | return logs 251 | 252 | 253 | def load_logs_from_file(filename, project=None): 254 | print('Reading from file:', filename) 255 | logs = None 256 | if project: 257 | # Get file from cloud object storage 258 | data = project.get_file(filename).getvalue().decode('utf8') 259 | logs = json.loads(data) 260 | # Read logs into dataframe 261 | # log_df = pd.DataFrame.from_records(data_json) 262 | print('Loaded {} logs'.format(len(logs))) 263 | else: 264 | if not os.path.exists(filename) or not os.path.isfile(filename): 265 | raise ValueError('{} either does not exist or is a directory'.format(filename)) 266 | else: 267 | with open(filename) as data: 268 | logs = json.load(data) 269 | print('Loaded {} logs'.format(len(logs))) 270 | return logs 271 | 272 | 273 | # From: https://github.ibm.com/watson-engagement-advisor/improve-recommendations-engine/blob/4c996b24bfcac4eb6ab6bbf39cf125cdf30b9027/src/main/python/cluster/utils.py#L44 274 | def sanitize_text(text, remove_punctuation=True, lower=True, tokenize=True): 275 | text = text.strip() 276 | if lower: 277 | text = text.lower() 278 | # if tokenize: 279 | # words = word_tokenize(text) 280 | # else: 281 | # words = text.split() 282 | # if remove_punctuation: 283 | # words = [word for word in words if word not in EN_PUNCTUATION] 284 | # return ' '.join(words) 285 | if remove_punctuation: 286 | text = text.translate(str.maketrans('', '', EN_PUNCTUATION)) 287 | return text 288 | 289 | 290 | def export_csv_for_intent_recommendation(logs, 291 | filename, 292 | deduplicate=True, 293 | project=None, 294 | overwrite=False, 295 | min_length=3, 296 | max_length=20): 297 | 298 | messages = [sanitize_text(l['request']['input']['text']) for l in logs] 299 | messages = filter(lambda m: min_length < len(m.split()) < max_length, messages) 300 | if deduplicate: 301 | messages = [[m] for m in set(messages)] 302 | else: 303 | messages = [[m] for m in messages] 304 | print('\nExporting {} messages into CSV...'.format(len(messages))) 305 | 306 | if project: 307 | with open(filename, 'wb') as fp: 308 | data = io.StringIO() 309 | writer = csv.writer(data, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) 310 | writer.writerows(messages) 311 | project.save_data(filename, data.getvalue(), overwrite=overwrite) 312 | data.close() 313 | # Display success message 314 | print('File', fp.name, 'saved a project asset') 315 | else: 316 | with open(filename, 'w') as f: 317 | writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) 318 | writer.writerows(messages) 319 | print('File', f.name, 'saved') 320 | 321 | return messages -------------------------------------------------------------------------------- /src/main/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watson-developer-cloud/assistant-improve-recommendations-notebook/8f339a95f0895d08df27811420f5ff74324ebb71/src/main/__init__.py -------------------------------------------------------------------------------- /src/main/css/custom.css: -------------------------------------------------------------------------------- 1 | 14 | -------------------------------------------------------------------------------- /src/main/css/custom_jupyter.css: -------------------------------------------------------------------------------- 1 | /** 2 | * (C) Copyright IBM Corp. 2019, 2020. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | -------------------------------------------------------------------------------- /src/main/css/custom_watson_studio.css: -------------------------------------------------------------------------------- 1 | /** 2 | * (C) Copyright IBM Corp. 2019, 2020. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 30 | -------------------------------------------------------------------------------- /src/main/python/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /src/main/python/cos_op.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import datetime 18 | from requests.utils import quote 19 | import hashlib 20 | import hmac 21 | import pandas as pd 22 | from contextlib import closing 23 | from io import BytesIO 24 | import numpy as np 25 | 26 | 27 | def get_hash(key, msg): 28 | """Generates keyed-hash for a message using HMAC 29 | Parameters 30 | ---------- 31 | key: The starting key for the hash. 32 | msg: message value that will be hashed 33 | """ 34 | return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest() 35 | 36 | 37 | def create_signature_key(key, datestamp, region, service): 38 | """Create a signing key based on AWS signature version 4 39 | Parameters 40 | ---------- 41 | key: Secret access key 42 | datestamp: date and timestamp 43 | region: service region 44 | service: service name 45 | """ 46 | key_date = get_hash(('AWS4' + key).encode('utf-8'), datestamp) 47 | key_region = get_hash(key_date, region) 48 | key_service = get_hash(key_region, service) 49 | key_signing = get_hash(key_service, 'aws4_request') 50 | return key_signing 51 | 52 | 53 | def generate_link(filename, project_io, expiration): 54 | """Generate a signing downloadable link of a file 55 | Parameters 56 | ---------- 57 | filename: file name 58 | project_io: Watson Studio project io instance 59 | expiration: expiration time in seconds 60 | """ 61 | region = '' 62 | http_method = 'GET' 63 | endpoint = project_io.get_storage_metadata()['properties']['endpoint_url'] 64 | 65 | cur_time = datetime.datetime.utcnow() 66 | timestamp = cur_time.strftime('%Y%m%dT%H%M%SZ') 67 | datestamp = cur_time.strftime('%Y%m%d') 68 | 69 | standardized_querystring = ('X-Amz-Algorithm=AWS4-HMAC-SHA256' + 70 | '&X-Amz-Credential=' + 71 | project_io.get_storage_metadata()['properties']['credentials']['editor'][ 72 | 'access_key_id'] + '/' + datestamp + '/' + region + 73 | '/s3/aws4_request' + 74 | '&X-Amz-Date=' + timestamp + 75 | '&X-Amz-Expires=' + str(expiration) + 76 | '&X-Amz-SignedHeaders=host') 77 | 78 | standardized_querystring_url_encoded = quote(standardized_querystring, safe='&=') 79 | 80 | standardized_resource = '/' + project_io.get_storage_metadata()['properties']['bucket_name'] + '/' + filename 81 | 82 | payload_hash = 'UNSIGNED-PAYLOAD' 83 | standardized_headers = 'host:' + project_io.get_storage_metadata()['properties']['endpoint_url'].replace('https://', 84 | '') 85 | signed_headers = 'host' 86 | 87 | standardized_request = (http_method + '\n' + 88 | standardized_resource + '\n' + 89 | standardized_querystring_url_encoded + '\n' + 90 | standardized_headers + '\n' + 91 | '\n' + 92 | signed_headers + '\n' + 93 | payload_hash) 94 | 95 | # assemble string-to-sign 96 | hashing_algorithm = 'AWS4-HMAC-SHA256' 97 | credential_scope = datestamp + '/' + region + '/' + 's3' + '/' + 'aws4_request' 98 | sts = (hashing_algorithm + '\n' + 99 | timestamp + '\n' + 100 | credential_scope + '\n' + 101 | hashlib.sha256(standardized_request.encode('utf-8')).hexdigest()) 102 | 103 | # generate the signature 104 | signature_key = create_signature_key( 105 | project_io.get_storage_metadata()['properties']['credentials']['editor']['secret_access_key'], datestamp, 106 | region, 's3') 107 | signature = hmac.new(signature_key, 108 | sts.encode('utf-8'), 109 | hashlib.sha256).hexdigest() 110 | 111 | # create and send the request 112 | request_url = (endpoint + '/' + 113 | project_io.get_storage_metadata()['properties']['bucket_name'] + '/' + 114 | filename + '?' + 115 | standardized_querystring_url_encoded + 116 | '&X-Amz-Signature=' + 117 | signature) 118 | return request_url 119 | 120 | 121 | def generate_excel_measure(dataframe_list, sheet_name_list, filename, project_io): 122 | """Generate a formatted excel file given a list of dataframes for measure notebook 123 | Parameters 124 | ---------- 125 | dataframe_list: a list of dataframes 126 | sheet_name_list: a list of sheet names 127 | filename: output file name 128 | project_io: Watson Studio project io instance 129 | """ 130 | with closing(BytesIO()) as output: 131 | writer = pd.ExcelWriter(output, engine='xlsxwriter', options={'remove_timezone': True}) 132 | workbook = writer.book 133 | data_format1 = workbook.add_format({'bg_color': '#BBCCE2'}) 134 | data_format2 = workbook.add_format({'bg_color': '#DEE6EF'}) 135 | 136 | format_header = workbook.add_format({'text_wrap': True}) 137 | 138 | workbook.formats[0].set_font_size(15) 139 | for df, name in zip(dataframe_list, sheet_name_list): 140 | df.to_excel(writer, sheet_name=name) 141 | worksheet = writer.sheets[name] 142 | worksheet.set_row(0, 30, format_header) 143 | worksheet.set_column('A:A', 5) 144 | worksheet.set_column('B:B', 30) 145 | worksheet.set_column('C:C', 30) 146 | worksheet.set_column('D:D', 15) 147 | worksheet.set_column('F:G', 35) 148 | worksheet.set_column('H:AH', 20) 149 | for row in range(1, len(df) + 1, 2): 150 | worksheet.set_row(row, cell_format=data_format1) 151 | worksheet.set_row(row + 1, cell_format=data_format2) 152 | writer.save() 153 | if project_io is not None: 154 | project_io.save_data(filename, output.getvalue(), overwrite=True) 155 | else: 156 | with open(filename, 'wb') as out: 157 | out.write(output.getvalue()) 158 | 159 | 160 | def generate_excel_effectiveness(dataframe_list, sheet_name_list, filename, project_io): 161 | """Generate a formatted excel file given a list of dataframes for effectiveness notebook 162 | Parameters 163 | ---------- 164 | dataframe_list: a list of dataframes 165 | sheet_name_list: a list of sheet names 166 | filename: output file name 167 | project_io: Watson Studio project io instance 168 | """ 169 | with closing(BytesIO()) as output: 170 | writer = pd.ExcelWriter(output, engine='xlsxwriter', options={'remove_timezone': True}) 171 | workbook = writer.book 172 | data_format1 = workbook.add_format({'bg_color': '#BBCCE2'}) 173 | data_format2 = workbook.add_format({'bg_color': '#DEE6EF'}) 174 | 175 | format_header = workbook.add_format({'text_wrap': True}) 176 | 177 | workbook.formats[0].set_font_size(15) 178 | for df, name in zip(dataframe_list, sheet_name_list): 179 | df.to_excel(writer, sheet_name=name) 180 | worksheet = writer.sheets[name] 181 | worksheet.set_row(0, 20, format_header) 182 | worksheet.set_column('A:A', 5) 183 | worksheet.set_column('B:D', 30) 184 | for row in range(1, len(df) + 1, 2): 185 | worksheet.set_row(row, cell_format=data_format1) 186 | worksheet.set_row(row + 1, cell_format=data_format2) 187 | writer.save() 188 | if project_io is not None: 189 | project_io.save_data(filename, output.getvalue(), overwrite=True) 190 | else: 191 | with open(filename, 'wb') as out: 192 | out.write(output.getvalue()) 193 | 194 | 195 | def export_result_excel(df_effective, sample_size=100, project_io=None): 196 | if df_effective.size == 0: 197 | print('No ineffective conversations found in logs') 198 | return 199 | # Copy the effective dataframe 200 | df_excel = df_effective.copy(deep=True) 201 | # Rename columns to generate excel 202 | df_excel = df_excel.rename(columns={'log_id': 'Log ID', 'response.context.conversation_id': 'Conversation ID', 203 | 'response.timestamp': 'Response Timestamp', 204 | 'request_input': 'Utterance Text', 205 | 'response_text': 'Response Text', 206 | 'response.top_intent_intent': 'Detected top intent', 207 | 'response.top_intent_confidence': 'Detected top intent confidence', 208 | 'Intent 2 intent': 'Intent 2', 'Intent 2 confidence': 'Intent 2 Confidence', 209 | 'Intent 3 intent': 'Intent 3', 'Intent 3 confidence': 'Intent 3 Confidence', 210 | 'response_entities': 'Detected Entities', 211 | 'Escalated_conversation': 'Escalated conversation?', 212 | 'Covered': 'Covered?', 'Not Covered cause': 'Not covered - cause', 213 | 'response.output.nodes_visited_s': 'Dialog Flow', 214 | 'response_dialog_stack': 'Dialog stack', 215 | 'response_dialog_request_counter': 'Dialog request counter', 216 | 'response_dialog_turn_counter': 'Dialog turn counter' 217 | }) 218 | 219 | existing_columns = ['Log ID', 'Conversation ID', 'Response Timestamp', 'Customer ID (must retain for delete)', 220 | 'Utterance Text', 'Response Text', 'Detected top intent', 'Detected top intent confidence', 221 | 'Intent 2', 'Intent 2 Confidence', 'Confidence gap (between 1 and 2)', 'Intent 3', 222 | 'Intent 3 Confidence', 223 | 'Detected Entities', 'Escalated conversation?', 'Covered?', 'Not covered - cause', 224 | 'Dialog Flow', 'Dialog stack', 'Dialog request counter', 'Dialog turn counter'] 225 | # Add new columns for annotating problematic logs 226 | new_columns_excel = ['Response Correct (Y/N)?', 'Response Helpful (Y/N)?', 227 | 'Root cause (Problem with Intent, entity, dialog)', 228 | 'Wrong intent? If yes, put the correct intent. Otherwise leave it blank', 229 | 'New intent needed? (A new intent. Otherwise leave blank)', 230 | 'Add Utterance to Training data (Y/N)', 231 | 'Entity missed? If yes, put the missed entity value. Otherwise leave it blank', 232 | 'New entity needed? If yes, put the entity name', 233 | 'New entity value? If yes, put the entity value', 'New dialog logic needed?', 234 | 'Wrong dialog node? If yes, put the node name. Otherwise leave it blank', 235 | 'No dialog node triggered'] 236 | 237 | # Add the new columns to the dataframe 238 | df_excel = df_excel.reindex(columns=[*existing_columns, *new_columns_excel], fill_value='') 239 | 240 | # Set output filename 241 | all_file = 'All.xlsx' 242 | escalated_sample_file = 'Escalated_sample.xlsx' 243 | non_escalated_sample_file = 'NotEscalated_sample.xlsx' 244 | 245 | # Remove timezone infomation 246 | df_excel['Response Timestamp'] = df_excel['Response Timestamp'].dt.tz_localize(None) 247 | 248 | # Prepare dataframe containing all utterances sorted by Conversation ID and Response Timestamp 249 | df_all = df_excel.sort_values(by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True) 250 | 251 | # Prepare dataframe containing covered utterances sorted by Conversation ID and Response Timestamp 252 | df_covered = df_excel[df_excel['Covered?']==True].sort_values( 253 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True) 254 | 255 | # Prepare dataframe containing not covered utterances sorted by Conversation ID and Response Timestamp 256 | df_not_covered = df_excel[df_excel['Covered?']==False].sort_values( 257 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True) 258 | 259 | # Convert to Excel format and save to local or upload to COS if project_io is provided 260 | generate_excel_measure([df_all, df_covered, df_not_covered], 261 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'], filename=all_file, 262 | project_io=project_io) 263 | 264 | # Prepare dataframe containing escalated conversations 265 | df_escalated_true = df_excel.loc[df_excel['Escalated conversation?']==True] 266 | 267 | # Sample escalated conversations 268 | if sample_size > 0 and len(df_escalated_true) > 0: 269 | # Get unique escalated conversation ids 270 | conversation_ids = df_escalated_true['Conversation ID'].unique() 271 | sampled_conversation_ids = np.random.choice(conversation_ids, sample_size) 272 | df_escalated_true = df_escalated_true[ 273 | df_escalated_true['Conversation ID'].isin(sampled_conversation_ids)].sort_values( 274 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True) 275 | 276 | # Prepare dataframe containing covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp 277 | df_escalated_covered = df_escalated_true[df_escalated_true['Covered?']==True].reset_index(drop=True) 278 | 279 | # Prepare dataframe containing not covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp 280 | df_escalated_not_covered = df_escalated_true[df_escalated_true['Covered?']==False].reset_index(drop=True) 281 | 282 | # Covert to Excel format and upload to COS 283 | generate_excel_measure([df_escalated_true, df_escalated_covered, df_escalated_not_covered], 284 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'], 285 | filename=escalated_sample_file, project_io=project_io) 286 | 287 | # Prepare dataframe containing non-escalated conversations 288 | df_not_escalated = df_excel.loc[df_excel['Escalated conversation?']==False].reset_index(drop=True) 289 | 290 | # Sample escalated conversations 291 | if sample_size > 0: 292 | # Get unique non-escalated conversation ids 293 | conversation_ids = df_not_escalated['Conversation ID'].unique() 294 | sampled_conversation_ids = np.random.choice(conversation_ids, sample_size) 295 | df_not_escalated = df_not_escalated[ 296 | df_not_escalated['Conversation ID'].isin(sampled_conversation_ids)].sort_values( 297 | by=['Conversation ID', 'Response Timestamp']).reset_index(drop=True) 298 | 299 | # Prepare dataframe containing covered utterances in escalated conversations sorted by Conversation ID and Response Timestamp 300 | df_not_escalated_covered = df_not_escalated[df_not_escalated['Covered?']==True].reset_index(drop=True) 301 | 302 | # Generate not escalated and not covered sample file 303 | df_not_escalated_not_covered = df_not_escalated[df_not_escalated['Covered?']==False].reset_index(drop=True) 304 | 305 | # Covert to Excel format and upload to COS 306 | generate_excel_measure([df_not_escalated, df_not_escalated_covered, df_not_escalated_not_covered], 307 | ['All_Utterances', 'Covered_Utterances', 'Not_Covered_Utterances'], 308 | filename=non_escalated_sample_file, project_io=project_io) 309 | -------------------------------------------------------------------------------- /src/main/python/export_csv_for_intent_recommendation.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import argparse 18 | from watson_assistant_func import load_logs_from_file 19 | from watson_assistant_func import export_csv_for_intent_recommendation 20 | 21 | 22 | def parse_args(): 23 | parser = argparse.ArgumentParser(description='Generate intent recommendation CSV from logs JSON file for Watson ' 24 | 'Assistant service.') 25 | 26 | # Required arguments 27 | parser.add_argument( 28 | '--input_json', 29 | type=str, 30 | required=True, 31 | help="The path of the JSON file of logs, generated by `fetch_logs.py`", 32 | ) 33 | parser.add_argument( 34 | '--output_csv', 35 | type=str, 36 | required=True, 37 | help="The path of the CSV file of utterances this script will generate for intent recommendation", 38 | ) 39 | 40 | # Optional arguments 41 | parser.add_argument( 42 | '--overwrite', 43 | type=bool, 44 | default=False, 45 | help="If overwrite filename if it exists", 46 | ) 47 | parser.add_argument( 48 | '--deduplicate', 49 | action="store_true", 50 | help="If set, duplicate utterances are discarded when generating CSV", 51 | ) 52 | parser.add_argument( 53 | '--min_length', 54 | type=int, 55 | default=3, 56 | help="Minimum number of tokens of a utterance in the generated CSV. Any utterance that has less than or " 57 | "equal to this number is discarded.", 58 | ) 59 | parser.add_argument( 60 | '--max_length', 61 | type=int, 62 | default=20, 63 | help="Maximum number of tokens of a utterance in the generated CSV. Any utterance that has more than or " 64 | "equal to this number is discarded.", 65 | ) 66 | 67 | return parser.parse_args() 68 | 69 | 70 | if __name__ == '__main__': 71 | args = parse_args() 72 | print(vars(args)) 73 | 74 | logs = load_logs_from_file(filename=args.input_json, 75 | project=None) 76 | 77 | export_csv_for_intent_recommendation(logs, 78 | filename=args.output_csv, 79 | deduplicate=args.deduplicate, 80 | project=None, 81 | overwrite=args.overwrite, 82 | min_length=args.min_length, 83 | max_length=args.max_length) 84 | -------------------------------------------------------------------------------- /src/main/python/fetch_logs.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import argparse 18 | from ibm_cloud_sdk_core.authenticators import IAMAuthenticator 19 | from ibm_watson import AssistantV1 20 | 21 | from watson_assistant_func import get_logs 22 | 23 | 24 | def parse_args(): 25 | parser = argparse.ArgumentParser(description='Script to fetch logs from Watson Assistant service.') 26 | 27 | # Required arguments 28 | parser.add_argument( 29 | "--url", 30 | type=str, 31 | required=True, 32 | help="Watson Asssistant Legacy V1 URLs, for example, https://api.us-east.assistant.watson.cloud.ibm.com." 33 | ) 34 | parser.add_argument( 35 | "--version", 36 | type=str, 37 | required=True, 38 | help="API requests require a version parameter that takes a date in the format version=YYYY-MM-DD. When we " 39 | "change the API in a backwards-incompatible way, we release a new version date. " 40 | ) 41 | parser.add_argument( 42 | "--apikey", 43 | type=str, 44 | required=True, 45 | help="The IAM token." 46 | ) 47 | 48 | # Optional arguments 49 | parser.add_argument( 50 | "--workspace_id", 51 | type=str, 52 | default='', 53 | help="To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill " 54 | "ID. The values can be found on the View API Details page. If you are using versioning in Watson " 55 | "Assistant, this ID represents the Development version of your skill definition.", 56 | ) 57 | parser.add_argument( 58 | "--skill_id", 59 | type=str, 60 | default='', 61 | help="To load the skill of an assistant in the next section, you need to provide either Workspace ID or Skill " 62 | "ID. The values can be found on the View API Details page. If you are using versioning in Watson " 63 | "Assistant, this ID represents the Development version of your skill definition.", 64 | ) 65 | parser.add_argument( 66 | "--assistant_id", 67 | type=str, 68 | default='', 69 | help="To load the skill of an assistant in the next section, you need to provide Assistant ID. The values can " 70 | "be found on the View API Details page.", 71 | ) 72 | parser.add_argument( 73 | '--filters', 74 | default=[], 75 | nargs='*', 76 | help="List of filters (string), separated by space. For example, '--filters language::en " 77 | "meta.summary.input_text_length_i>0 response_timestamp>=2020-03-01'", 78 | ) 79 | parser.add_argument( 80 | '--num_logs', 81 | type=int, 82 | default=20000, 83 | help="Number of logs to retrieve (default=20000)", 84 | ) 85 | parser.add_argument( 86 | '--output_json', 87 | type=str, 88 | default=None, 89 | help="If output_json is set, logs will be saved to filename as a JSON file", 90 | ) 91 | parser.add_argument( 92 | '--overwrite', 93 | type=bool, 94 | default=False, 95 | help="If overwrite filename if it exists", 96 | ) 97 | 98 | return parser.parse_args() 99 | 100 | 101 | if __name__ == '__main__': 102 | args = parse_args() 103 | print(vars(args)) 104 | 105 | authenticator = IAMAuthenticator(args.apikey) 106 | sdk_object = AssistantV1(version=args.version, authenticator=authenticator) 107 | sdk_object.set_service_url(args.url) 108 | 109 | assistant_information = {'workspace_id': args.workspace_id, 110 | 'skill_id': args.skill_id, 111 | 'assistant_id': args.assistant_id} 112 | print(assistant_information) 113 | 114 | logs = get_logs(sdk_object, 115 | assistant_information, 116 | num_logs=args.num_logs, 117 | filename=args.output_json, 118 | filters=args.filters, 119 | project=None, 120 | overwrite=args.overwrite, 121 | ) 122 | -------------------------------------------------------------------------------- /src/main/python/watson_assistant_func.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import json 18 | import pandas as pd 19 | import os 20 | import csv 21 | import traceback 22 | import io 23 | from string import punctuation 24 | 25 | 26 | EN_PUNCTUATION = punctuation + '’' 27 | 28 | 29 | def get_assistant_definition(sdk_object, assistant_info, project=None, overwrite=False, filename='assistant_definition'): 30 | workspace_id, assistant_id, skill_id = [assistant_info.get(k) for k in ['workspace_id', 'assistant_id', 'skill_id']] 31 | 32 | if len(workspace_id) > 0: 33 | filename += '_workspace_{}.json'.format(workspace_id) 34 | elif len(skill_id) > 0: 35 | filename += '_skill_{}.json'.format(skill_id) 36 | else: 37 | print('Please provide a valid Workspace ID or Skill ID!') 38 | return None 39 | 40 | if os.path.isfile(filename) and overwrite is False: 41 | # Get file from cloud object storage 42 | print('Reading from file:', filename) 43 | with open(filename) as data: 44 | data_json = json.load(data) 45 | # Read logs into dataframe 46 | print('Assistant definition is loaded into as a dataframe.') 47 | df_assistant = pd.json_normalize(data_json) 48 | return df_assistant 49 | else: 50 | if len(workspace_id) > 0: 51 | # Fetch the workspace definition 52 | print('Loading workspace definition using workspace id: {}'.format(workspace_id)) 53 | assistant_definition = sdk_object.get_workspace(workspace_id=workspace_id, export=True, 54 | include_audit=True).get_result() 55 | elif len(skill_id) > 0: 56 | # Fetch the skill definition 57 | print('Loading skill definition using skill id: {}'.format(skill_id)) 58 | assistant_definition = sdk_object.get_workspace(workspace_id=skill_id, export=True, 59 | include_audit=True).get_result() 60 | else: 61 | print('Please provide a valid Workspace ID or Skill ID!') 62 | assistant_definition = None 63 | 64 | if assistant_definition: 65 | # Store the workspace details in a dataframe 66 | df_assistant = pd.json_normalize(assistant_definition) 67 | 68 | # Set `overwrite` to True for exporting assistant definition to json file 69 | if not os.path.isfile(filename) or overwrite: 70 | if project is not None: 71 | with open(filename, 'wb') as fp: 72 | project.save_data(filename, json.dumps(assistant_definition), overwrite=True) 73 | # Display success message 74 | print('Definition {} exported as a project asset'.format(fp.name)) 75 | else: 76 | with open(filename, 'w') as f: 77 | json.dump(assistant_definition, f) 78 | print('Definition {} exported'.format(filename)) 79 | 80 | return df_assistant 81 | else: 82 | return None 83 | 84 | 85 | def _get_logs_from_api(sdk_object, workspace_id, log_filter, num_logs): 86 | log_list = list() 87 | try: 88 | current_cursor = None 89 | while num_logs > 0: 90 | if len(workspace_id) > 0: 91 | logs_response = sdk_object.list_logs( 92 | workspace_id=workspace_id, 93 | page_limit=500, 94 | cursor=current_cursor, 95 | filter=log_filter 96 | ).get_result() 97 | else: 98 | logs_response = sdk_object.list_all_logs( 99 | page_limit=500, 100 | cursor=current_cursor, 101 | filter=log_filter 102 | ).get_result() 103 | min_num = min(num_logs, len(logs_response['logs'])) 104 | log_list.extend(logs_response['logs'][:min_num]) 105 | print('\r{} logs retrieved'.format(len(log_list)), end='') 106 | num_logs = num_logs - min_num 107 | current_cursor = None 108 | # Check if there is another page of logs to be fetched 109 | if 'pagination' in logs_response: 110 | # Get the url from which logs are to fetched 111 | if 'next_cursor' in logs_response['pagination']: 112 | current_cursor = logs_response['pagination']['next_cursor'] 113 | else: 114 | break 115 | except Exception as ex: 116 | traceback.print_tb(ex.__traceback__) 117 | raise RuntimeError("Error getting logs using API. Please check if URL/credentials are correct.") 118 | 119 | return log_list 120 | 121 | 122 | def get_logs(sdk_object, assistant_info, num_logs, filename, filters=None, project=None, overwrite=False): 123 | """This function calls Watson Assistant API to retrieve logs, using pagination if necessary. 124 | The goal is to retrieve utterances (user inputs) from the logs. 125 | Parameters 126 | ---------- 127 | num_logs : int, the number of records to return in each page of results 128 | assistant_info : dict, containing information regarding sdk_object, assistant id, and name 129 | filters: string, a list of query filters 130 | overwrite: boolean, whether to reset log file 131 | project: project io of studio project 132 | filename: prefix of the name of the log file 133 | Returns 134 | ---------- 135 | log_df : DataFrame of fetched logs 136 | """ 137 | if filters is None: 138 | filters = [] 139 | 140 | workspace_id, assistant_id, skill_id = [assistant_info.get(k) for k in ['workspace_id', 'assistant_id', 'skill_id']] 141 | 142 | if (workspace_id is None or len(workspace_id) == 0) \ 143 | and (assistant_id is None or len(assistant_id) == 0) \ 144 | and (skill_id is None or len(skill_id) == 0): 145 | print('Please provide a valid Workspace ID, Assistant ID, or Skill ID!') 146 | return None 147 | 148 | # check if filename exists before retrieving logs 149 | file_exist = False 150 | if filename: 151 | if project: 152 | for file in project.get_files(): 153 | if file['name'] == filename: 154 | if not overwrite: 155 | print('Load logs from existing file {}, set overwrite=True to overwrite'.format(filename)) 156 | return load_logs_from_file(filename, project) 157 | else: 158 | file_exist = True 159 | 160 | elif os.path.exists(filename): 161 | if not overwrite: 162 | print('Load logs from existing file {}, set overwrite=True to overwrite'.format(filename)) 163 | return load_logs_from_file(filename, None) 164 | else: 165 | file_exist = True 166 | else: 167 | print('Please provide a valid filename.') 168 | return None 169 | 170 | # adding default filters based on assistant_id and workspace_id 171 | if assistant_id is not None and len(assistant_id) > 0: 172 | filters.append('request.context.system.assistant_id::{}'.format(assistant_id)) 173 | if skill_id is not None and len(skill_id) > 0: 174 | filters.append('workspace_id::{}'.format(skill_id)) 175 | 176 | logs = _get_logs_from_api(sdk_object=sdk_object, 177 | workspace_id=workspace_id, 178 | log_filter=','.join(filters), 179 | num_logs=num_logs) 180 | print('\nLoaded {} logs'.format(len(logs))) 181 | 182 | if not file_exist or overwrite: 183 | print('Saving {} logs into JSON file... '.format(filename)) 184 | if project: 185 | with open(filename, 'wb') as fp: 186 | project.save_data(filename, json.dumps(logs, indent=2), overwrite=overwrite) 187 | # Display success message 188 | print('File', fp.name, 'saved a project asset') 189 | else: 190 | with open(filename, 'w') as fp: 191 | json.dump(logs, fp, indent=2) 192 | print('File', fp.name, 'saved') 193 | 194 | return logs 195 | 196 | 197 | def load_logs_from_file(filename, project=None): 198 | print('Reading from file:', filename) 199 | logs = None 200 | if project: 201 | # Get file from cloud object storage 202 | data = project.get_file(filename).getvalue().decode('utf8') 203 | logs = json.loads(data) 204 | # Read logs into dataframe 205 | # log_df = pd.DataFrame.from_records(data_json) 206 | print('Loaded {} logs'.format(len(logs))) 207 | else: 208 | if not os.path.exists(filename) or not os.path.isfile(filename): 209 | raise ValueError('{} either does not exist or is a directory'.format(filename)) 210 | else: 211 | with open(filename) as data: 212 | logs = json.load(data) 213 | print('Loaded {} logs'.format(len(logs))) 214 | return logs 215 | 216 | 217 | # From: https://github.ibm.com/watson-engagement-advisor/improve-recommendations-engine/blob/4c996b24bfcac4eb6ab6bbf39cf125cdf30b9027/src/main/python/cluster/utils.py#L44 218 | def sanitize_text(text, remove_punctuation=True, lower=True, tokenize=True): 219 | text = text.strip() 220 | if lower: 221 | text = text.lower() 222 | # if tokenize: 223 | # words = word_tokenize(text) 224 | # else: 225 | # words = text.split() 226 | # if remove_punctuation: 227 | # words = [word for word in words if word not in EN_PUNCTUATION] 228 | # return ' '.join(words) 229 | if remove_punctuation: 230 | text = text.translate(str.maketrans('', '', EN_PUNCTUATION)) 231 | return text 232 | 233 | 234 | def export_csv_for_intent_recommendation(logs, 235 | filename, 236 | deduplicate=True, 237 | project=None, 238 | overwrite=False, 239 | min_length=3, 240 | max_length=20): 241 | 242 | messages = [sanitize_text(l['request']['input']['text']) for l in logs] 243 | messages = filter(lambda m: min_length < len(m.split()) < max_length, messages) 244 | if deduplicate: 245 | messages = [[m] for m in set(messages)] 246 | else: 247 | messages = [[m] for m in messages] 248 | print('\nExporting {} messages into CSV...'.format(len(messages))) 249 | 250 | if project: 251 | with open(filename, 'wb') as fp: 252 | data = io.StringIO() 253 | writer = csv.writer(data, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) 254 | writer.writerows(messages) 255 | project.save_data(filename, data.getvalue(), overwrite=overwrite) 256 | data.close() 257 | # Display success message 258 | print('File', fp.name, 'saved a project asset') 259 | else: 260 | with open(filename, 'w') as f: 261 | writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) 262 | writer.writerows(messages) 263 | print('File', f.name, 'saved') 264 | 265 | return messages -------------------------------------------------------------------------------- /src/main/python/watson_assistant_func_skip.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import re 18 | import json 19 | import pandas as pd 20 | import time 21 | import os 22 | 23 | 24 | def get_logs(num_logs, log_list, project_creds, log_filter=None): 25 | """This function calls Watson Assistant API to retrieve logs, using pagination if necessary. 26 | The goal is to retrieve utterances (user inputs) from the logs. 27 | Parameters 28 | ---------- 29 | num_logs : int, the number of records to return in each page of results. 30 | log_list : list, a list to store returned logs 31 | project_creds : dict, containing information on project, cos, credentials, assistant, workspace id and name 32 | log_filter: string, a cacheable parameter that limits the results to those matching the specified filter. 33 | 34 | Returns 35 | ---------- 36 | log_df : Dataframe of fetched logs 37 | """ 38 | # Unpack the keys from the dictionary to individual variables 39 | project, sdk_object, ws_id, ws_name = [project_creds.get(k) for k in ['project', 'sdk_object', 'ws_id', 'ws_name']] 40 | # Create file name by combining workspace name and filter 41 | filename = 'logs_' + ws_id + '_' + str(num_logs) 42 | 43 | # Remove all special characters from file name 44 | filename = re.sub(r'[^a-zA-Z0-9_\- .]', '', filename) + '.json' 45 | 46 | if [file['name'] for file in project.get_files() if file['name'] == filename]: 47 | # Get file from cloud object storage 48 | print('Reading from file:', filename) 49 | data = project.get_file(filename).getvalue().decode('utf8') 50 | data_json = json.loads(data) 51 | # Read logs into dataframe 52 | log_df = pd.DataFrame.from_records(data_json) 53 | # Display success message and return the dataframe 54 | print('Workspace logs loaded successfully with', log_df.shape[0], 'records') 55 | return log_df 56 | else: 57 | try: 58 | current_cursor = None 59 | while num_logs > 0: 60 | time.sleep(0.5) # allow for a short break to avoid reaching rate limit 61 | logs_response = sdk_object.list_logs( 62 | workspace_id=ws_id, 63 | page_limit=500, 64 | cursor=current_cursor, 65 | filter=log_filter 66 | ) 67 | min_num = min(num_logs, len(logs_response['logs'])) 68 | log_list.extend(logs_response['logs'][:min_num]) 69 | print('\r{} logs retrieved'.format(len(log_list)), end='') 70 | num_logs = num_logs - min_num 71 | 72 | current_cursor = None 73 | # Check if there is another page of logs to be fetched 74 | if 'pagination' in logs_response: 75 | # Get the url from which logs are to fetched 76 | if 'next_cursor' in logs_response['pagination']: 77 | current_cursor = logs_response['pagination']['next_cursor'] 78 | else: 79 | break 80 | 81 | except Exception as ex: 82 | print(ex) 83 | finally: 84 | log_df = pd.DataFrame(log_list) 85 | return log_df 86 | 87 | 88 | def get_logs_jupyter(num_logs, log_list, workspace_creds, log_filter=None): 89 | """This function calls Watson Assistant API to retrieve logs, using pagination if necessary. 90 | The goal is to retrieve utterances (user inputs) from the logs. 91 | Parameters 92 | ---------- 93 | num_logs : int, the number of records to return in each page of results. 94 | log_list : list, a list to store returned logs 95 | workspace_creds : dict, containing information regarding sdk_object, workspace id, and name 96 | log_filter: string, a cacheable parameter that limits the results to those matching the specified filter. 97 | 98 | Returns 99 | ---------- 100 | log_df : Dataframe of fetched logs 101 | """ 102 | # Unpack the keys from the dictionary to individual variables 103 | sdk_object, ws_id, ws_name = [workspace_creds.get(k) for k in ['sdk_object', 'ws_id', 'ws_name']] 104 | # Create file name by combining workspace name and filter 105 | filename = 'logs_' + ws_id + '_' + str(num_logs) 106 | 107 | # Remove all special characters from file name 108 | filename = re.sub(r'[^a-zA-Z0-9_\- .]', '', filename) + '.json' 109 | 110 | if os.path.isfile(filename): 111 | # Get file from cloud object storage 112 | print('Reading from file:', filename) 113 | with open(filename) as data: 114 | data_json = json.load(data) 115 | # Read logs into dataframe 116 | log_df = pd.DataFrame.from_records(data_json) 117 | # Display success message and return the dataframe 118 | print('Workspace logs loaded successfully with', log_df.shape[0], 'records') 119 | return log_df 120 | else: 121 | try: 122 | current_cursor = None 123 | while num_logs > 0: 124 | logs_response = sdk_object.list_logs( 125 | workspace_id=ws_id, 126 | page_limit=500, 127 | cursor=current_cursor, 128 | filter=log_filter 129 | ) 130 | min_num = min(num_logs, len(logs_response['logs'])) 131 | log_list.extend(logs_response['logs'][:min_num]) 132 | print('\r{} logs retrieved'.format(len(log_list)), end='') 133 | num_logs = num_logs - min_num 134 | current_cursor = None 135 | # Check if there is another page of logs to be fetched 136 | if 'pagination' in logs_response: 137 | # Get the url from which logs are to fetched 138 | if 'next_cursor' in logs_response['pagination']: 139 | current_cursor = logs_response['pagination']['next_cursor'] 140 | else: 141 | break 142 | 143 | except Exception as ex: 144 | print(ex) 145 | finally: 146 | log_df = pd.DataFrame(log_list) 147 | return log_df 148 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /test/assistant_improve_toolkit/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # (C) Copyright IBM Corp. 2019, 2020. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /test/assistant_improve_toolkit/test_computation_func.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, './src') 3 | 4 | from assistant_improve_toolkit.computation_func import intersection 5 | 6 | 7 | def test_intersection(): 8 | list1 = ['node_1', 'node_2'] 9 | list2 = ['node_1', 'node_2', 'node_3'] 10 | 11 | overlap = intersection(list1, list2) 12 | 13 | assert overlap == ['node_1', 'node_2'] 14 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = lint, py37, py38 3 | 4 | [testenv:lint] 5 | basepython = python3.8 6 | deps = pylint 7 | commands = pylint src test 8 | 9 | [testenv] 10 | passenv = TOXENV CI TRAVIS* 11 | commands = 12 | py.test --reruns 3 --cov=src 13 | codecov -e TOXENV 14 | deps = 15 | -r{toxinidir}/requirements.txt 16 | -r{toxinidir}/requirements_dev.txt 17 | usedevelop = True 18 | exclude = .venv,.git,.tox,docs --------------------------------------------------------------------------------