├── .environment.yml
├── .flake8
├── .github
    ├── actions
    │   └── build_docs
    │   │   └── action.yml
    └── workflows
    │   ├── build.yaml
    │   ├── build_docs.yaml
    │   └── merge.yaml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── config
    ├── default
    │   └── config_default.yaml
    ├── llm_interface
    │   ├── config_ollama_default.yaml
    │   ├── config_ollama_information_need.yaml
    │   └── config_openai_default.yaml
    └── nlu
    │   ├── agent_dialogue_acts_extraction_config_default.yaml
    │   └── user_dialogue_acts_extraction_config_default.yaml
├── conftest.py
├── crs_agent_wrapper
    ├── __init__.py
    └── ievalm_agent.py
├── data
    ├── datasets
    │   ├── README.md
    │   ├── iard
    │   │   ├── agent_dialogue_acts_extraction_prompt.txt
    │   │   ├── formatted_IARD_annotated_gold.json
    │   │   ├── formatted_IARD_annotated_partially.json
    │   │   ├── user_dialogue_acts_extraction_prompt.txt
    │   │   └── user_utterance_nlg_prompt.txt
    │   └── moviebot
    │   │   └── annotated_dialogues.json
    ├── domains
    │   └── moviebot.yaml
    ├── interaction_models
    │   ├── cir6.yaml
    │   ├── crs_v1.yaml
    │   └── qrfa.yaml
    └── item_collections
    │   ├── movielens-20m-sample
    │       ├── README.md
    │       ├── movies.csv
    │       └── ratings.csv
    │   └── movielens-25m-sample
    │       ├── movies_w_keywords.csv
    │       └── ratings.csv
├── docs
    ├── README.md
    └── source
    │   ├── _static
    │       ├── UserSimCRS-Overview.png
    │       └── favicon.png
    │   ├── _templates
    │       ├── layout.html
    │       └── versions.html
    │   ├── agenda_based.rst
    │   ├── components.rst
    │   ├── conf.py
    │   ├── configuration.rst
    │   ├── dataset.rst
    │   ├── index.rst
    │   ├── information_need.rst
    │   ├── installation.rst
    │   ├── interaction_model.rst
    │   ├── llm_based.rst
    │   ├── setup_agent.rst
    │   ├── usage.rst
    │   └── user_modeling.rst
├── pyproject.toml
├── requirements
    ├── docs_requirements.txt
    ├── experimentation_requirements.txt
    └── requirements.txt
├── scripts
    ├── datasets
    │   ├── README.md
    │   ├── information_need_annotation
    │   │   ├── __init__.py
    │   │   ├── information_need_annotator.py
    │   │   └── information_need_prompt_movies_default.txt
    │   └── redial
    │   │   ├── augment_redial.py
    │   │   └── redial_to_dialoguekit.py
    ├── evaluation
    │   └── satisfaction_evaluation.py
    ├── nlg
    │   └── generative_lm_nlg.py
    └── nlu
    │   ├── dialogue_acts_extraction.py
    │   └── metrics.py
├── tests
    ├── __init__.py
    ├── conftest.py
    ├── core
    │   └── test_information_need.py
    ├── data
    │   ├── annotated_dialogues.json
    │   ├── domains
    │   │   └── movies.yaml
    │   ├── interaction_models
    │   │   └── crs_v1.yaml
    │   └── items
    │   │   ├── movies.csv
    │   │   ├── movies_w_keywords.csv
    │   │   └── ratings.csv
    ├── dialogue_management
    │   └── test_dialogue_state_tracker.py
    ├── items
    │   ├── test_item_collection.py
    │   └── test_ratings.py
    └── simulator
    │   ├── agenda_based
    │       └── test_interaction_model.py
    │   ├── llm
    │       ├── test_stop_prompt.py
    │       └── test_utterance_generation_prompt.py
    │   ├── tus
    │       └── test_tus_feature_handler.py
    │   └── user_modeling
    │       └── test_simple_preference_model.py
└── usersimcrs
    ├── __init__.py
    ├── core
        ├── __init__.py
        ├── information_need.py
        └── simulation_domain.py
    ├── dialogue_management
        ├── __init__.py
        ├── dialogue_state.py
        └── dialogue_state_tracker.py
    ├── items
        ├── __init__.py
        ├── item.py
        ├── item_collection.py
        └── ratings.py
    ├── nlg
        └── lm
        │   └── nlg_generative_lm.py
    ├── nlu
        └── lm
        │   └── lm_dialogue_act_extractor.py
    ├── run_simulation.py
    ├── simulation_platform.py
    ├── simulator
        ├── __init__.py
        ├── agenda_based
        │   ├── agenda.py
        │   ├── agenda_based_simulator.py
        │   └── interaction_model.py
        ├── llm
        │   ├── __init__.py
        │   ├── dual_prompt_user_simulator.py
        │   ├── interfaces
        │   │   ├── llm_interface.py
        │   │   ├── ollama_interface.py
        │   │   └── openai_interface.py
        │   ├── prompt
        │   │   ├── prompt.py
        │   │   ├── stop_prompt.py
        │   │   └── utterance_generation_prompt.py
        │   └── simple_prompt_user_simulator.py
        ├── neural
        │   ├── core
        │   │   ├── __init__.py
        │   │   ├── feature_handler.py
        │   │   └── transformer.py
        │   └── tus
        │   │   └── tus_feature_handler.py
        └── user_simulator.py
    ├── user_modeling
        ├── context_model.py
        ├── persona.py
        ├── pkg_preference_model.py
        ├── preference_model.py
        └── simple_preference_model.py
    └── utils
        ├── annotation_converter_rasa.py
        └── simulation_utils.py


/.environment.yml:
--------------------------------------------------------------------------------
1 | dependencies:
2 |   - black
3 |   - flake8
4 |   - pytest
5 |   - pre-commit


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 80
3 | max-complexity = 10
4 | exclude =
5 |     .git,
6 |     __pycache__
7 | 


--------------------------------------------------------------------------------
/.github/actions/build_docs/action.yml:
--------------------------------------------------------------------------------
  1 | name: 'Sphinx docs to GitHub Pages'
  2 | description: 'Automatic Sphinx html docs compilation and deployment through the gh-pages branch.'
  3 | author: "Diego Prada-Gracia"
  4 | branding:
  5 |   icon: "upload-cloud"
  6 |   color: "orange"
  7 | inputs:
  8 |   branch:
  9 |     description: Name of the branch where the sphinx documentation is located
 10 |     required: false
 11 |     default: 'main'
 12 |   dir_docs:
 13 |     description: Path where the sphinx documentation is located
 14 |     required: false
 15 |     default: 'docs'
 16 |   sphinxapiexclude:
 17 |     description: Files/directories to exclude from sphinx-apidoc
 18 |     require: false
 19 |     default: '*setup* tests*'
 20 |   sphinxapiopts:
 21 |     description: Options for sphinx-apidoc (default outputs to dir_docs and searches for modules one level up)
 22 |     require: false
 23 |     default: '-o . ../'
 24 |   sphinxopts:
 25 |     description: Compilation options for sphinx-multiversion
 26 |     required: false
 27 |     default: ''
 28 | 
 29 | runs:
 30 |   using: "composite"
 31 |   steps:
 32 |     - name: setting the committer name and email
 33 |       id: committer
 34 |       shell: bash
 35 |       run: |
 36 |         author_name="$(git show --format=%an -s)"
 37 |         author_email="$(git show --format=%ae -s)"
 38 |         echo "::group::Set committer"
 39 |         echo "git config user.name $author_name"
 40 |         git config user.name $author_name
 41 |         echo "git config user.email $author_email"
 42 |         git config user.email $author_email
 43 |         echo "::endgroup::"
 44 |     - name: gh-pages branch creation if needed
 45 |       id: gh-pages-branch-creation
 46 |       shell: bash
 47 |       run: |
 48 |         echo "::group::Checking if gh-pages branch exists"
 49 |         if [[ -z $(git ls-remote --heads origin gh-pages) ]]; then
 50 |            echo "Creating gh-pages branch"
 51 |            git checkout --orphan gh-pages
 52 |            git reset --hard
 53 |            git commit --allow-empty -m "First commit to create gh-pages branch"
 54 |            git push origin gh-pages
 55 |            echo "Created gh-pages branch"
 56 |         else
 57 |            echo "Branch gh-pages already exists"
 58 |         fi
 59 |         echo "::endgroup::"
 60 |     - name: Moving to branch where sphinx docs are located
 61 |       id: to-branch-with-docs
 62 |       shell: bash
 63 |       run: |
 64 |            git checkout ${{ inputs.branch }}
 65 |     - name: sphinx apidoc generation
 66 |       shell: bash -l {0}
 67 |       working-directory: ./${{ inputs.dir_docs }}
 68 |       run: |
 69 |         echo ::group::Sphinx apidocs generation
 70 |         sphinx-apidoc ${{ inputs.sphinxapiopts }}  ${{ inputs.sphinxapiexclude }}
 71 |         echo ::endgroup::
 72 |     - name: sphinx html docs compilation
 73 |       shell: bash -l {0}      # This is needed to work with conda here. See:https://github.com/marketplace/actions/setup-miniconda#IMPORTANT
 74 |       working-directory: ./${{ inputs.dir_docs }}
 75 |       run: |
 76 |         echo ::group::Sphinx docs compilation
 77 |         sphinx-multiversion . _build ${{ inputs.sphinxopts }}
 78 |         echo ::endgroup::
 79 |     - name: pushing to gh-pages
 80 |       shell: bash
 81 |       run: |
 82 |         echo ::group::Create README for gh-pages
 83 |         SHA=$GITHUB_SHA
 84 |         echo "$SHA $GITHUB_EVENT_NAME"
 85 |         if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
 86 |             SHA=$(cat $GITHUB_EVENT_PATH | jq -r .pull_request.head.sha)
 87 |         fi
 88 |         SHORT_SHA="$(git rev-parse --short $SHA)"
 89 |         DIR_HTML=${{ inputs.dir_docs }}/_build/
 90 |         echo "#GitHub Pages" > $DIR_HTML/README.md
 91 |         echo "" >> $DIR_HTML/README.md
 92 |         echo "Last update of sphinx html documentation from [$SHORT_SHA](https://github.com/$GITHUB_REPOSITORY/tree/$SHA)" >> $DIR_HTML/README.md
 93 |         cat $DIR_HTML/README.md
 94 |         echo ::endgroup::
 95 |         echo ::group::Create .nojekyll in case 'sphinx.ext.githubpages' is not used
 96 |         touch $DIR_HTML/.nojekyll
 97 |         echo ::endgroup::
 98 |         echo ::group::Push to gh-pages
 99 |         git add -f $DIR_HTML
100 |         git commit -m "From $GITHUB_REF $SHA"
101 |         git push origin `git subtree split --prefix $DIR_HTML ${{ inputs.branch }}`:gh-pages --force
102 |         echo ::endgroup::


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | on:
  3 |   pull_request:
  4 |     types:
  5 |       - opened
  6 |       - reopened
  7 |       - synchronize
  8 |     paths-ignore:
  9 |       - "**.md"
 10 | 
 11 | env:
 12 |   REPO_NAME: ${{ github.event.repository.name }}
 13 | 
 14 | jobs:
 15 |   pre-commit:
 16 |     if: always()
 17 |     runs-on: ubuntu-latest
 18 |     timeout-minutes: 10
 19 |     steps:
 20 |       - uses: actions/checkout@v3
 21 |         with:
 22 |           fetch-depth: 0
 23 | 
 24 |       - uses: actions/setup-python@v4
 25 |         with:
 26 |           python-version: "3.9"
 27 |           cache: "pip"
 28 | 
 29 |       - name: Install Dependencies
 30 |         run: |
 31 |           pip install --upgrade pip
 32 |           pip install -r requirements/requirements.txt
 33 | 
 34 |       - name: Run black
 35 |         shell: bash
 36 |         run: pre-commit run black --all-files
 37 | 
 38 |       - name: Run flake8
 39 |         shell: bash
 40 |         run: pre-commit run flake8 --all-files
 41 | 
 42 |       - name: Run docformatter
 43 |         shell: bash
 44 |         run: pre-commit run docformatter --all-files
 45 | 
 46 |   mypy:
 47 |     if: always()
 48 |     runs-on: ubuntu-latest
 49 |     timeout-minutes: 10
 50 |     steps:
 51 |       - uses: actions/checkout@v3
 52 |         with:
 53 |           fetch-depth: 0
 54 | 
 55 |       - uses: actions/setup-python@v4
 56 |         with:
 57 |           python-version: "3.9"
 58 |           cache: "pip"
 59 | 
 60 |       - name: Install Dependencies
 61 |         run: |
 62 |           pip install --upgrade pip
 63 |           pip install -r requirements/requirements.txt
 64 |       - name: Run mypy
 65 |         shell: bash
 66 |         run: pre-commit run mypy --all-file
 67 | 
 68 |   build-and-test:
 69 |     name: "Build and Test Python 3.9"
 70 |     runs-on: ubuntu-latest
 71 |     if: always()
 72 |     timeout-minutes: 20
 73 | 
 74 |     steps:
 75 |       - uses: actions/checkout@v3
 76 |       - name: Setup python
 77 |         uses: actions/setup-python@v4
 78 |         with:
 79 |           python-version: "3.9"
 80 |           cache: "pip"
 81 | 
 82 |       - name: Install Dependencies
 83 |         run: |
 84 |           pip install --upgrade pip
 85 |           pip install -r requirements/requirements.txt
 86 |           pip install pytest-github-actions-annotate-failures
 87 | 
 88 |       - name: PyTest with code coverage
 89 |         continue-on-error: true
 90 |         run: |
 91 |           pytest --junitxml pytest.xml --cov=. --cov-report=term-missing --cov-report=xml --cov-branch | tee pytest-coverage.txt
 92 | 
 93 |       - name: Upload Coverage Results txt
 94 |         if: always()
 95 |         uses: actions/upload-artifact@v3
 96 |         with:
 97 |           name: coverage-results-txt
 98 |           path: ./pytest-coverage.txt
 99 | 
100 |       - name: Upload Coverage Results xml
101 |         if: always()
102 |         uses: actions/upload-artifact@v3
103 |         with:
104 |           name: coverage-results-xml
105 |           path: ./coverage.xml
106 | 
107 |       - name: Upload Unit Test Results
108 |         if: always()
109 |         uses: actions/upload-artifact@v3
110 |         with:
111 |           name: unit-test-py39
112 |           path: ./pytest.xml
113 | 
114 |   publish-test-results:
115 |     name: "Publish Unit Tests Results"
116 |     needs: build-and-test
117 |     runs-on: ubuntu-latest
118 |     if: always()
119 |     timeout-minutes: 20
120 | 
121 |     steps:
122 |       - name: Download Artifacts
123 |         uses: actions/download-artifact@v3
124 |         with:
125 |           path: artifacts
126 | 
127 |       - name: Publish Unit Test Results
128 |         uses: EnricoMi/publish-unit-test-result-action@v1
129 |         with:
130 |           files: artifacts/unit-test-py39/*.xml
131 |           comment_mode: off
132 | 
133 |       - name: Get the Coverage
134 |         shell: bash
135 |         run: |
136 |           regex='<coverage.+line-rate="([0-9).[0-9]+)".+>'
137 |           line=$(grep -oP $regex artifacts/coverage-results-xml/coverage.xml)
138 |           [[ $line =~ $regex ]]
139 |           coverage=$( bc <<< ${BASH_REMATCH[1]}*100 )
140 |           if (( $(echo "$coverage > 90" |bc -l) )); then
141 |             COLOR=green
142 |           else
143 |             COLOR=red
144 |           fi
145 |           echo "COVERAGE=${coverage%.*}%" >> $GITHUB_ENV
146 |           echo "COLOR=$COLOR" >> $GITHUB_ENV
147 | 
148 |       - name: Create the Badge
149 |         uses: schneegans/dynamic-badges-action@v1.7.0
150 |         with:
151 |           auth: ${{ secrets.GIST_SECRET }}
152 |           gistID: cd558f4b76df656b67277f8ae214b7e0
153 |           filename: coverage.${{ env.REPO_NAME }}.${{ github.event.number }}.json
154 |           label: coverage
155 |           message: ${{ env.COVERAGE }}
156 |           color: ${{ env.COLOR }}
157 | 
158 |       - name: Find Comment
159 |         uses: peter-evans/find-comment@v2
160 |         id: fc
161 |         with:
162 |           issue-number: ${{ github.event.pull_request.number }}
163 |           comment-author: "github-actions[bot]"
164 |           body-includes: Current Branch | Main Branch |
165 | 
166 |       - name: Create coverage comment
167 |         uses: peter-evans/create-or-update-comment@v2.1.0
168 |         with:
169 |           comment-id: ${{ steps.fc.outputs.comment-id }}
170 |           issue-number: ${{  github.event.pull_request.number }}
171 |           body: |
172 |             Current Branch | Main Branch |
173 |             | ------ | ------ |
174 |             ![Coverage Badge](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/NoB0/cd558f4b76df656b67277f8ae214b7e0/raw/coverage.${{ env.REPO_NAME }}.${{ github.event.number  }}.json) | ![Coverage Badge](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/NoB0/cd558f4b76df656b67277f8ae214b7e0/raw/coverage.${{ env.REPO_NAME }}.main.json) |
175 |           edit-mode: replace
176 | 


--------------------------------------------------------------------------------
/.github/workflows/build_docs.yaml:
--------------------------------------------------------------------------------
 1 | name: Sphinx docs to gh-pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | # workflow_dispatch:        # Un comment line if you also want to trigger action manually
 9 | 
10 | jobs:
11 |   sphinx_docs_to_gh-pages:
12 |     runs-on: ubuntu-latest
13 |     name: Sphinx docs to gh-pages
14 |     steps:
15 |       - name: Checkout branch
16 |         uses: actions/checkout@v3
17 |         with:
18 |           ref: main
19 |           fetch-depth: 0
20 |       - name: Setup Python
21 |         uses: actions/setup-python@v4
22 |         with:
23 |           python-version: 3.9.15
24 |           cache: "pip"
25 |       - name: Installing the Documentation requirements
26 |         run: |
27 |           pip3 install -r requirements/docs_requirements.txt
28 |       - name: Sphinx docs to GitHub Pages
29 |         uses: ./.github/actions/build_docs/
30 |         with:
31 |           branch: main
32 |           dir_docs: docs/source
33 |           sphinxapiopts: "--separate -o . ../"
34 |           sphinxapiexclude: "../*setup* ../*.ipynb"
35 |           sphinxopts: ""
36 | 


--------------------------------------------------------------------------------
/.github/workflows/merge.yaml:
--------------------------------------------------------------------------------
 1 | name: Deploy
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 | 
 7 | jobs:
 8 |   build-and-test:
 9 |     name: "Build and Test Python 3.9"
10 |     runs-on: ubuntu-latest
11 |     if: always()
12 |     timeout-minutes: 20
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |       - name: Setup python
17 |         uses: actions/setup-python@v4
18 |         with:
19 |           python-version: "3.9"
20 |           cache: "pip"
21 | 
22 |       - name: Install Dependencies
23 |         run: |
24 |           pip install --upgrade pip
25 |           pip install -r requirements/requirements.txt 
26 |           pip install pytest-github-actions-annotate-failures
27 | 
28 |       - name: PyTest with code coverage
29 |         continue-on-error: true
30 |         run: |
31 |           pytest --cov=. --cov-report=term-missing --cov-report=xml --cov-branch
32 | 
33 |       - name: Get the Coverage
34 |         shell: bash
35 |         run: |
36 |           regex='<coverage.+line-rate="([0-9).[0-9]+)".+>'
37 |           line=$(grep -oP $regex coverage.xml)
38 |           [[ $line =~ $regex ]]
39 |           coverage=$( bc <<< ${BASH_REMATCH[1]}*100 )
40 |           if (( $(echo "$coverage > 90" |bc -l) )); then
41 |             COLOR=green
42 |           else
43 |             COLOR=red
44 |           fi
45 |           echo "COVERAGE=${coverage%.*}%" >> $GITHUB_ENV
46 |           echo "COLOR=$COLOR" >> $GITHUB_ENV
47 | 
48 |       - name: Create the Badge
49 |         uses: schneegans/dynamic-badges-action@v1.7.0
50 |         with:
51 |           auth: ${{ secrets.GIST_SECRET }}
52 |           gistID: cd558f4b76df656b67277f8ae214b7e0
53 |           filename: coverage.${{ github.event.repository.name }}.main.json
54 |           label: coverage
55 |           message: ${{ env.COVERAGE }}
56 |           color: ${{ env.COLOR }}
57 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .idea/
 3 | .pyc
 4 | __pycache__/
 5 | .vscode/*
 6 | .rasa
 7 | 
 8 | # Documentation build
 9 | docs/build
10 | 
11 | # Runs
12 | data/runs
13 | 
14 | .coverage


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/ambv/black
 3 |     rev: 22.6.0
 4 |     hooks:
 5 |     - id: black
 6 |       language_version: python3.9
 7 | -   repo: https://github.com/pycqa/flake8
 8 |     rev: 5.0.4
 9 |     hooks:
10 |     - id: flake8
11 | -   repo: https://github.com/PyCQA/docformatter
12 |     rev: v1.5.0
13 |     hooks:
14 |       - id: docformatter
15 |         name: docformatter
16 |         description: "Formats docstrings to follow PEP 257."
17 |         entry: docformatter
18 |         args: 
19 |           - --in-place
20 |           - --wrap-summaries=80
21 |           - --wrap-descriptions=80
22 |         language: python
23 |         types: [python]
24 | -   repo: local
25 |     hooks:
26 |       - id: mypy
27 |         name: mypy
28 |         entry: "mypy"
29 |         language: python
30 |         exclude: ^tests/
31 |         types: [python]
32 |         verbose: true
33 |         args: 
34 |           - --no-strict-optional
35 |           - --ignore-missing-imports
36 |           - --no-incremental
37 |           - --disable-error-code=override
38 |         additional_dependencies: []
39 | -   repo: local
40 |     hooks:
41 |     -   id: pytest
42 |         name: run tests
43 |         entry: pytest tests -vv
44 |         language: system
45 |         always_run: true
46 |         pass_filenames: false


--------------------------------------------------------------------------------
/config/default/config_default.yaml:
--------------------------------------------------------------------------------
 1 | output_name: "moviebot"
 2 | debug: False
 3 | fix_random_seed: False
 4 | num_simulated_dialogues: 100
 5 | 
 6 | ### AGENT CONFIGURATION ###
 7 | agent_class_path: "sample_agents.moviebot_agent.MovieBotAgent"
 8 | agent_id: "IAI MovieBot"
 9 | # By default, the agent has an HTTP API.
10 | agent_uri: "http://127.0.0.1:5001"
11 | 
12 | ### SIMULATOR CONFIGURATION ###
13 | simulator_class_path: "usersimcrs.simulator.agenda_based.agenda_based_simulator.AgendaBasedSimulator"
14 | simulator_id: "User Simulator"
15 | 
16 | domain: data/domains/moviebot.yaml
17 | intents: data/interaction_models/crs_v1.yaml
18 | 
19 | collection_db_path: "data/item_collections.db"
20 | collection_name: "movies_w_keywords"
21 | items: data/item_collections/movielens-25m-sample/movies_w_keywords.csv
22 | id_col: movieId
23 | domain_mapping:
24 |   title:
25 |     slot: TITLE
26 |   genres:
27 |     slot: GENRE
28 |     multi-valued: True
29 |     delimiter: "|"
30 |   keywords:
31 |     slot: KEYWORD
32 |     multi-valued: True
33 |     delimiter: "|"
34 | ratings: data/item_collections/movielens-25m-sample/ratings.csv
35 | historical_ratings_ratio: 0.8
36 | 
37 | dialogues: data/datasets/moviebot/annotated_dialogues.json
38 | intent_classifier: "cosine"
39 | # If using the DIET classifier the following file needs to be provided. 
40 | # rasa_dialogues: data/agents/moviebot/annotated_dialogues_rasa_agent.yml
41 | 
42 | 


--------------------------------------------------------------------------------
/config/llm_interface/config_ollama_default.yaml:
--------------------------------------------------------------------------------
1 | host: "https://ollama.ux.uis.no"
2 | model: "mistral-nemo:latest"
3 | stream: False
4 | options:
5 |   temperature: 0


--------------------------------------------------------------------------------
/config/llm_interface/config_ollama_information_need.yaml:
--------------------------------------------------------------------------------
1 | host: "https://ollama.ux.uis.no"
2 | model: "mistral-nemo:latest"
3 | stream: False
4 | format: "json"
5 | options:
6 |   temperature: 0


--------------------------------------------------------------------------------
/config/llm_interface/config_openai_default.yaml:
--------------------------------------------------------------------------------
1 | model: "gpt-3.5-turbo"
2 | api_key: "YOUR_API_KEY"
3 | options:
4 |   temperature: 0


--------------------------------------------------------------------------------
/config/nlu/agent_dialogue_acts_extraction_config_default.yaml:
--------------------------------------------------------------------------------
 1 | extraction_prompt: data/datasets/iard/agent_dialogue_acts_extraction_prompt.txt
 2 | 
 3 | intent_labels:
 4 |   - REQ
 5 |   - CLA
 6 |   - RES
 7 |   - ANS
 8 |   - REC-S
 9 |   - REC-E
10 |   - EXP-I
11 |   - EXP-P
12 |   - EXP-S
13 |   - OTH
14 | 
15 | slot_labels:
16 |   - GENRE
17 |   - TITLE
18 |   - PLOT
19 |   - ACTOR
20 |   - DIRECTOR
21 |   - RATING
22 |   - KEYWORDS
23 |   - YEAR
24 |   - OTHER
25 | 
26 | ollama_host: "https://ollama.ux.uis.no"
27 | ollama_model: "mistral-nemo:latest"
28 | ollama_options:
29 |   temperature: 0


--------------------------------------------------------------------------------
/config/nlu/user_dialogue_acts_extraction_config_default.yaml:
--------------------------------------------------------------------------------
 1 | extraction_prompt: data/datasets/iard/user_dialogue_acts_extraction_prompt.txt
 2 | 
 3 | intent_labels:
 4 |   - IQU
 5 |   - CON
 6 |   - REF
 7 |   - STO
 8 |   - PRO
 9 |   - ANS
10 |   - ASK
11 |   - SEE
12 |   - ACC
13 |   - REJ
14 |   - INQ
15 |   - CRI-F
16 |   - CRI-A
17 |   - NRE
18 |   - CRI-C
19 |   - OTH
20 | 
21 | slot_labels:
22 |   - GENRE
23 |   - TITLE
24 |   - PLOT
25 |   - ACTOR
26 |   - DIRECTOR
27 |   - RATING
28 |   - KEYWORDS
29 |   - YEAR
30 |   - OTHER
31 | 
32 | ollama_host: "https://ollama.ux.uis.no"
33 | ollama_model: "mistral-nemo:latest"
34 | ollama_options:
35 |   temperature: 0


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iai-group/UserSimCRS/13f72923d9be0b27472e3f684b2bca4563332c3d/conftest.py


--------------------------------------------------------------------------------
/crs_agent_wrapper/__init__.py:
--------------------------------------------------------------------------------
1 | """CRS agent wrappers level init."""
2 | 


--------------------------------------------------------------------------------
/crs_agent_wrapper/ievalm_agent.py:
--------------------------------------------------------------------------------
 1 | """Wrapper for CRS agents available in iEvaLM fork.
 2 | 
 3 | The models are served via Flask API, the wrapper is responsible for sending
 4 | and receiving messages from an agent via the API.
 5 | 
 6 | The code for the agents is available at:
 7 | https://github.com/iai-group/iEvaLM-CRS
 8 | """
 9 | 
10 | from typing import Optional
11 | 
12 | import requests
13 | 
14 | from dialoguekit.core.annotated_utterance import AnnotatedUtterance
15 | from dialoguekit.core.dialogue_act import DialogueAct
16 | from dialoguekit.core.intent import Intent
17 | from dialoguekit.core.utterance import Utterance
18 | from dialoguekit.participant import Agent
19 | from dialoguekit.participant.agent import AgentType
20 | from dialoguekit.participant.participant import DialogueParticipant
21 | 
22 | DEFAULT_IEVALM_URI = "http://127.0.0.1:5005/"
23 | 
24 | 
25 | class iEvaLMAgent(Agent):
26 |     def __init__(
27 |         self,
28 |         id: str,
29 |         agent_type: AgentType = AgentType.BOT,
30 |         stop_intent: Intent = Intent("EXIT"),
31 |         uri: str = DEFAULT_IEVALM_URI,
32 |         crs_model: Optional[str] = None,
33 |     ) -> None:
34 |         """Initializes iEvaLM agent.
35 | 
36 |         Args:
37 |             id: Agent ID.
38 |             agent_type: Agent type. Defaults to AgentType.BOT.
39 |             stop_intent: Label of the exit intent. Defaults to "EXIT".
40 |             uri: URI of the iEvaLM agent. Defaults to DEFAULT_IEVALM_URI.
41 |             crs_model: CRS model served by iEvaLM. Defaults to None.
42 |         """
43 |         super().__init__(id=id, agent_type=agent_type, stop_intent=stop_intent)
44 |         self._uri = uri
45 |         self._crs_model = crs_model
46 | 
47 |     def welcome(self) -> None:
48 |         """Sends the agent's welcome message."""
49 |         welcome_message = (
50 |             f"Hello! I am {self._crs_model}. How can I help you?"
51 |             if self._crs_model
52 |             else "Hello! How can I help you?"
53 |         )
54 |         response = AnnotatedUtterance(
55 |             text=welcome_message,
56 |             participant=self._type,
57 |         )
58 |         self._dialogue_connector.register_agent_utterance(response)
59 | 
60 |     def goodbye(self) -> None:
61 |         """Sends the agent's goodbye message."""
62 |         goodbye_message = "Goodbye!"
63 |         response = AnnotatedUtterance(
64 |             text=goodbye_message,
65 |             participant=self._type,
66 |             dialogue_acts=[DialogueAct(self.stop_intent)],
67 |         )
68 |         self._dialogue_connector.register_agent_utterance(response)
69 | 
70 |     def receive_utterance(self, utterance: Utterance) -> None:
71 |         """Responds to the other participant with an utterance.
72 | 
73 |         Args:
74 |             utterance: The other participant's utterance.
75 |         """
76 |         context = []
77 |         # Models expect the first utterance to be from the user. The agent
78 |         # utterances before the user utterance are skipped.
79 |         skipped_first_agent_utterance = False
80 |         for utterance in self._dialogue_connector.dialogue_history.utterances:
81 |             speaker = utterance.participant
82 |             if (
83 |                 not skipped_first_agent_utterance
84 |                 and speaker == DialogueParticipant.AGENT
85 |             ):
86 |                 skipped_first_agent_utterance = True
87 |                 continue
88 |             context.append(utterance.text)
89 | 
90 |         r = requests.post(
91 |             self._uri, json={"context": context, "message": utterance.text}
92 |         )
93 |         response = AnnotatedUtterance(
94 |             text=r.text,
95 |             participant=self._type,
96 |         )
97 |         self._dialogue_connector.register_agent_utterance(response)
98 | 


--------------------------------------------------------------------------------
/data/datasets/README.md:
--------------------------------------------------------------------------------
 1 | # Dialogue files
 2 | 
 3 | The YAML files required to train the Rasa DIET classifier can be generated from the annotated dialogues saved in the correct format.
 4 | The generation of these files can be done with this command:
 5 | 
 6 | ```shell
 7 |   cd usersimcrs/utils
 8 |   python -m annotation_converter_rasa -source PathToAnnotatedDialoguesFile -destination PathToDestinationFolder
 9 | ```
10 | 
11 | It creates the following files:
12 | 
13 |   - `<originalname>_reformat.yaml`: The original file saved as a yaml file
14 |   - `<originalname>_types_w_examples.yaml`: Slots and example values extracted from the dialogues
15 |   - `<originalname>_rasa_agent.yaml`: Examples of agent utterances for all possible intents/actions that the agent can take
16 |   - `<originalname>_rasa_user.yaml`: Similar to the agent file, but for users
17 | 


--------------------------------------------------------------------------------
/data/datasets/iard/agent_dialogue_acts_extraction_prompt.txt:
--------------------------------------------------------------------------------
 1 | | Intent (Code)                | Description                                                  | Example                                                      |
 2 | | ---------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
 3 | | Request Information (REQ)    | Recommender requests for the seeker's preference or feedback. | "What kind of movies do you like?"                         |
 4 | | Clarify Question (CLA)       | Recommender asks a clarifying question for more details.     | "What kind of animated movie are you thinking of?"         |
 5 | | Respond-Feedback (RES)       | Recommender responds to any other feedback from the seeker.  | "That's my favourite Christmas movie too! " (U: "My absolute favourite!!") |
 6 | | Answer (ANS)                 | Recommender answers the question asked by the seeker.        | "Steve Martin and John Candy." (Q: "Who is in that?")      |
 7 | | Recommend-Show (REC-S)       | Recommender provides recommendation by showing it directly.  | "The Invitation (2015) is a movie kids like."              |
 8 | | Recommend-Explore (REC-E)    | Recommender provides recommendation by inquiring about the seeker's preference. | "Have you seen Cult of Chucky (2017) that one as pretty scary." |
 9 | | Explain-Introduction (EXP-I) | Recommender explains recommendation with non-personalized introduction. | "What about Sleepless in Seattle (1993)? Hanks and Ryan?"  |
10 | | Explain-Preference (EXP-P)   | Recommender explains recommendation based on the seeker's past preference. | "Will Ferrell is also very good in Elf (2003) if you're in need of another comedy" |
11 | | Explain-Suggestion (EXP-S)   | Recommender explains recommendation in a suggestive way.     | "If you like gory then I would suggest The Last House on the Left (2009)." |
12 | | Other (OTH)                  | Greetings, gratitude expression, or chit-chat utterances.    | "Have a good night."                                       |
13 | 
14 | | Slot |
15 | | ---- |
16 | | GENRE |
17 | | TITLE |
18 | | PLOT |
19 | | ACTOR |
20 | | DIRECTOR |
21 | | RATING |
22 | | KEYWORDS |
23 | | YEAR |
24 | | OTHER |
25 | 
26 | Given the list of available intents and slots, extract a list of dialogue acts from a given utterance. A dialogue act is a pair of an intent and an optional list of slot-value pairs represented as intent_code(slot="value",slot,...) where the value of a slot is optional or taken from the utterance without any modifications. Multiple dialogue acts are separated by '|'. If no dialogue acts are found, say None. Strictly follow the specified format.
27 | 
28 | Example 1:
29 | Input utterance: you're welcome.
30 | Dialogue acts: OTH()
31 | 
32 | Example 2:
33 | Input utterance: Hi, what kind of movies do you like?
34 | Dialogue acts: OTH()|REQ(GENRE)
35 | 
36 | Example 3:
37 | Input utterance: Okay! How about Horrible Bosses (2011) or Superbad (2007)?
38 | Dialogue acts: REC-S(TITLE='Horrible Bosses', TITLE='Superbad')
39 | 
40 | Example 4:
41 | Input utterance: It came out in 2011...No problem! You're welcome!
42 | Dialogue acts: RES(YEAR="2011")|OTH()
43 | 
44 | Example 5:
45 | Input utterance: how about Eddie Murphy Raw (1987) it is a stand up comedy
46 | Dialogue acts: REC-S(TITLE='Eddie Murphy Raw')|EXP-I(GENRE='stand up comedy')
47 | 
48 | 
49 | Input utterance: {utterance}
50 | Dialogue acts:


--------------------------------------------------------------------------------
/data/datasets/iard/user_dialogue_acts_extraction_prompt.txt:
--------------------------------------------------------------------------------
 1 | | Intent (Code)              | Description                                                  | Example                                                      |
 2 | | -------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
 3 | | Initial Query (IQU)        | Seeker asks for a recommendation in the first query.         | "I like comedy do you know of any good ones?"              |
 4 | | Continue (CON)             | Seeker asks for more recommendations in the subsequent query. | "Do you have any other suggestions?"                       |
 5 | | Reformulate (REF)          | Seeker restates her/his query with or without clarification/further constraints. | "Maybe I am not being clear. I want something that is in the theater now." |
 6 | | Start Over (STO)           | Seeker starts a new query to ask for recommendations.        | "Anything that I can watch with my kids under 10."         |
 7 | | Provide Preference (PRO)   | Seeker provides specific preference for the item s/he is looking for. | "I usually enjoy movies with Seth Rogen and Jonah Hill."   |
 8 | | Answer (ANS)               | Seeker answers the question issued by the recommender.       | "Maybe something with more action." (Q: "What kind of fun movie you look for?") |
 9 | | Ask Opinion (ASK)          | Seeker asks the recommender's personal opinions.             | "I really like Reese Witherspoon. How about you?"          |
10 | | Seen (SEE)                 | Seeker has seen the recommended item before.                 | "I have seen that one and enjoyed it."                     |
11 | | Accept (ACC)               | Seeker likes the recommended item.                           | "Awesome, I will check it out."                            |
12 | | Reject (REJ)               | Seeker dislikes the recommended item.                        | "I hated that movie. I did not even crack a smile once."   |
13 | | Inquire (INQ)              | Seeker wants to know more about the recommended item.        | "I haven't seen that one yet. What's it about?"            |
14 | | Critique-Feature (CRI-F)   | Seeker makes critiques on specific features of the current recommendation. | "That's a bit too scary for me."                           |
15 | | Critique-Add (CRI-A)       | Seeker adds further constraints on top of the current recommendation. | "I would like something more recent."                      |
16 | | Neutral Response (NRE)     | Seeker does not indicate her/his preference for the current recommendation. | "I have actually never seen that one."                     |
17 | | Critique-Compare (CRI-C)   | Seeker requests sth similar to the current recommendation in order to compare. | "Den of Thieves (2018) sounds amazing. Any others like that?" |
18 | | Other (OTH)                | Greetings, gratitude expression, or chit-chat utterances.    | "Sorry about the weird typing."                            |
19 | 
20 | | Slot |
21 | | ---- |
22 | | GENRE |
23 | | TITLE |
24 | | PLOT |
25 | | ACTOR |
26 | | DIRECTOR |
27 | | RATING |
28 | | KEYWORDS |
29 | | YEAR |
30 | | OTHER |
31 | 
32 | Given the list of available intents and slots, extract a list of dialogue acts from a given utterance. A dialogue act is a pair of an intent and an optional list of slot-value pairs represented as intent_code(slot="value",slot,...) where the value of a slot is optional or taken from the utterance without any modifications. Multiple dialogue acts are separated by '|'. If no dialogue acts are found, say None. Strictly follow the specified format.
33 | 
34 | Example 1:
35 | Input utterance: I don't really like horror movies what about thrillers
36 | Dialogue acts: REJ(GENRE='horror')|CRI-A(GENRE='thrillers')
37 | 
38 | Example 2:
39 | Input utterance: Thank you bye.
40 | Dialogue acts: OTH()
41 | 
42 | Example 3:
43 | Input utterance: I like things like The Parent Trap...Oh yeah that was a good one. the kids enjoyed it too....maybe something not animated
44 | Dialogue acts: PRO(TITLE="The Parent Trap")|SEE()|CRI-A(GENRE="animated")
45 | 
46 | Example 4:
47 | Input utterance: I haven't seen The River Wild. What's that about?
48 | Dialogue acts: INQ(TITLE="The River Wild", PLOT)
49 | 
50 | Example 5:
51 | Input utterance: Yes. oh okay. I'll check it out. Thanks!
52 | Dialogue acts: ACC()
53 | 
54 | 
55 | Input utterance: {utterance}
56 | Dialogue acts:


--------------------------------------------------------------------------------
/data/datasets/iard/user_utterance_nlg_prompt.txt:
--------------------------------------------------------------------------------
 1 | | Intent (Code)              | Description                                                  | Example                                                      |
 2 | | -------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
 3 | | Initial Query (IQU)        | Seeker asks for a recommendation in the first query.         | "I like comedy do you know of any good ones?"              |
 4 | | Continue (CON)             | Seeker asks for more recommendations in the subsequent query. | "Do you have any other suggestions?"                       |
 5 | | Reformulate (REF)          | Seeker restates her/his query with or without clarification/further constraints. | "Maybe I am not being clear. I want something that is in the theater now." |
 6 | | Start Over (STO)           | Seeker starts a new query to ask for recommendations.        | "Anything that I can watch with my kids under 10."         |
 7 | | Provide Preference (PRO)   | Seeker provides specific preference for the item s/he is looking for. | "I usually enjoy movies with Seth Rogen and Jonah Hill."   |
 8 | | Answer (ANS)               | Seeker answers the question issued by the recommender.       | "Maybe something with more action." (Q: "What kind of fun movie you look for?") |
 9 | | Ask Opinion (ASK)          | Seeker asks the recommender's personal opinions.             | "I really like Reese Witherspoon. How about you?"          |
10 | | Seen (SEE)                 | Seeker has seen the recommended item before.                 | "I have seen that one and enjoyed it."                     |
11 | | Accept (ACC)               | Seeker likes the recommended item.                           | "Awesome, I will check it out."                            |
12 | | Reject (REJ)               | Seeker dislikes the recommended item.                        | "I hated that movie. I did not even crack a smile once."   |
13 | | Inquire (INQ)              | Seeker wants to know more about the recommended item.        | "I haven't seen that one yet. What's it about?"            |
14 | | Critique-Feature (CRI-F)   | Seeker makes critiques on specific features of the current recommendation. | "That's a bit too scary for me."                           |
15 | | Critique-Add (CRI-A)       | Seeker adds further constraints on top of the current recommendation. | "I would like something more recent."                      |
16 | | Neutral Response (NRE)     | Seeker does not indicate her/his preference for the current recommendation. | "I have actually never seen that one."                     |
17 | | Critique-Compare (CRI-C)   | Seeker requests sth similar to the current recommendation in order to compare. | "Den of Thieves (2018) sounds amazing. Any others like that?" |
18 | | Other (OTH)                | Greetings, gratitude expression, or chit-chat utterances.    | "Sorry about the weird typing."                            |
19 | 
20 | | Slot |
21 | | ---- |
22 | | GENRE |
23 | | TITLE |
24 | | PLOT |
25 | | ACTOR |
26 | | DIRECTOR |
27 | | RATING |
28 | | KEYWORDS |
29 | | YEAR |
30 | | OTHER |
31 | 
32 | Generate a user utterance that corresponds to the given dialogue acts. The generated utterance should be straight to the point and include all the information provided in the dialogue acts. A dialogue act is a pair with an intent and an optional list of slot-value pairs. A description of the intents and slots is provided in the tables above. 
33 | 
34 | Example 1:
35 | Dialogue acts: REJ(GENRE='horror')|CRI-A(GENRE='thrillers')
36 | Generated utterance: I don't really like horror movies what about thrillers
37 | 
38 | Example 2:
39 | Dialogue acts: OTH()
40 | Generated utterance: Thank you bye.
41 | 
42 | Example 3:
43 | Dialogue acts: PRO(TITLE="The Parent Trap")|SEE()|CRI-A(GENRE="animated")
44 | Generated utterance: I like things like The Parent Trap...Oh yeah that was a good one. the kids enjoyed it too....maybe something not animated
45 | 
46 | Example 4:
47 | Dialogue acts: INQ(TITLE="The River Wild", PLOT)
48 | Generated utterance: I haven't seen The River Wild. What's that about?
49 | 
50 | Example 5:
51 | Dialogue acts: ACC()
52 | Generated utterance: Yes. oh okay. I'll check it out. Thanks!
53 | 
54 | 
55 | Dialogue acts: {dialogue_acts}
56 | Generated utterance:
57 | 


--------------------------------------------------------------------------------
/data/domains/moviebot.yaml:
--------------------------------------------------------------------------------
1 | name: iai_moviebot
2 | slot_names:
3 |   TITLE:
4 |   GENRE:
5 |   ACTOR:
6 |   KEYWORD:
7 |   DIRECTOR:


--------------------------------------------------------------------------------
/data/interaction_models/cir6.yaml:
--------------------------------------------------------------------------------
  1 | name: CIR6
  2 | description: CIR6 intent scheme by Zhang&Balog (KDD'20)
  3 | 
  4 | # Possible user intents with optional description.
  5 | user_intents:
  6 |   COMPLETE:
  7 |   DISCLOSE:
  8 |     - DISCLOSE.REVIEW
  9 |     - DISCLOSE.NON-DISCLOSE
 10 |     - DISCLOSE.NON-DISCLOSE-REVIEW
 11 |   REVEAL:
 12 |     - REVEAL.EXPAND
 13 |     - REVEAL.REFINE
 14 |     - REVEAL.REVISE
 15 |   INQUIRE:
 16 |     - INQUIRE.LIST
 17 |     - INQUIRE.COMPARE
 18 |     - INQUIRE.SUBSET
 19 |     - INQUIRE.SIMILAR
 20 |   NAVIGATE:
 21 |     - NAVIGATE.BACK
 22 |     - NAVIGATE.MORE
 23 |     - NAVIGATE.REPEAT
 24 |   NOTE:
 25 |     - NOTE.NOTE-DISLIKE
 26 |     - NOTE.NOTE-END
 27 |     - NOTE.NOTE-YES
 28 |   INTERROGATE:
 29 |   COMPLETE:
 30 | 
 31 | # Possible agent intents with optional description.
 32 | agent_intents:
 33 |   END: 
 34 |   INQUIRE: 
 35 |     - INQUIRE.ELICIT-REVIEW
 36 |     - INQUIRE.ELICIT
 37 |     - INQUIRE.CLARIFY
 38 |   REVEAL: 
 39 |     - REVEAL.LIST
 40 |     - REVEAL.SUBSET
 41 |     - REVEAL.SIMILAR
 42 |   TRAVERSE: 
 43 |     - TRAVERSE.BACK
 44 |     - TRAVERSE.MORE
 45 |     - TRAVERSE.REPEAT
 46 |   RECORD:
 47 |   END:
 48 | 
 49 | # List of agent intents (including sub-intents) that elicit preferences.
 50 | agent_elicit_intents:
 51 |   - INQUIRE
 52 |   - INQUIRE.*
 53 | 
 54 | # List of agent intents (including sub-intents) that are for set retrieval.
 55 | agent_set_retrieval:
 56 |   - REVEAL
 57 |   - REVEAL.*
 58 |   - TRAVERSE
 59 |   - TRAVERSE.*
 60 |   - RECORD
 61 | 
 62 | 
 63 | # Expected agent intents in response to a (simulated) user intent.
 64 | expected_responses:
 65 |   DISCLOSE.NON-DISCLOSE:
 66 |     - INQUIRE
 67 |     - INQUIRE.ELICIT
 68 |   DISCLOSE.NON-DISCLOSE-REVIEW:
 69 |     - INQUIRE
 70 |     - INQUIRE.ELICIT
 71 |   DISCLOSE:
 72 |     - INQUIRE.ELICIT
 73 |     - SHOW
 74 |     - REVEAL.LIST
 75 |     - SUGGEST
 76 |     - INQUIRE.CLARIFY
 77 |   DISCLOSE.REVIEW:
 78 |     - INQUIRE.ELICIT
 79 |     - SHOW
 80 |     - REVEAL.LIST
 81 |     - SUGGEST
 82 |     - INQUIRE.CLARIFY
 83 |   REVEAL.REVISE:
 84 |     - INQUIRE.ELICIT
 85 |     - SHOW
 86 |     - REVEAL.LIST
 87 |     - SUGGEST
 88 |     - INQUIRE.CLARIFY
 89 |   REVEAL.REFINE:
 90 |     - INQUIRE.ELICIT
 91 |     - SHOW
 92 |     - REVEAL.LIST
 93 |     - SUGGEST
 94 |     - INQUIRE.CLARIFY
 95 |   REVEAL.EXPAND:
 96 |     - INQUIRE.ELICIT
 97 |     - SHOW
 98 |     - REVEAL.LIST
 99 |     - SUGGEST
100 |     - INQUIRE.CLARIFY
101 |   NOTE:
102 |     - RECORD
103 |     - TRAVERSE.REPEAT
104 |   NOTE.NOTE-END:
105 |     - RECORD
106 |     - TRAVERSE.REPEAT
107 |   NOTE.NOTE-YES:
108 |     - INQUIRE.ELICIT-REVIEW
109 |   NOTE.NOTE-DISLIKE:
110 |     - REVEAL.LIST
111 |     - INQUIRE.ELICIT
112 |   INTERROGATE:
113 |     - EXPLAIN
114 |   INQUIRE:
115 |     - REVEAL.LIST
116 |     - INQUIRE.ELICIT
117 |   NAVIGATE.REPEAT:
118 |     - TRAVERSE.REPEAT
119 |   NAVIGATE.MORE:
120 |     - REVEAL.LIST
121 |     - TRAVERSE.MORE
122 |   INQUIRE.SIMILAR:
123 |     - INQUIRE.ELICIT
124 |     - REVEAL.SIMILAR
125 |     - REVEAL.LIST
126 |   INQUIRE.LIST:
127 |     - REVEAL.LIST
128 |   INQUIRE.SUBSET:
129 |     - REVEAL.SUBSET
130 |   NAVIGATE:
131 |     - SHOW
132 |   COMPLETE:
133 |     - END
134 | 


--------------------------------------------------------------------------------
/data/interaction_models/crs_v1.yaml:
--------------------------------------------------------------------------------
  1 | name: intent_schema
  2 | description: Intent schema for CIR by Afzali, Drzewiecki, and Balog
  3 | 
  4 | # Minimum intents required
  5 | required_intents:
  6 |   INTENT_START: DISCLOSE.NON-DISCLOSE
  7 |   INTENT_STOP: COMPLETE
  8 |   INTENT_ITEM_CONSUMED: NOTE.YES
  9 |   INTENT_LIKE: NOTE.LIKE
 10 |   INTENT_DISLIKE: NOTE.DISLIKE
 11 |   INTENT_NEUTRAL: NOTE.NEUTRAL
 12 |   INTENT_DISCLOSE: DISCLOSE
 13 |   INTENT_INQUIRE: INQUIRE
 14 |   INTENT_DONT_KNOW: DONT-KNOW
 15 |   INTENT_YES: YES
 16 |   INTENT_NO: NO
 17 | 
 18 | # Possible user intents with optional description.
 19 | user_intents:
 20 |   COMPLETE:
 21 |     expected_agent_intents:
 22 |       - END
 23 |   DISCLOSE.NON-DISCLOSE:
 24 |     expected_agent_intents:
 25 |       - INQUIRE
 26 |       - INQUIRE.ELICIT
 27 |       - DISCLOSE.NON-DISCLOSE
 28 |   DISCLOSE:
 29 |     expected_agent_intents:
 30 |       - INQUIRE.ELICIT
 31 |       - REVEAL
 32 |       - REVEAL.NONE
 33 |   REVEAL:
 34 |     expected_agent_intents:
 35 |   REVEAL.EXPAND:
 36 |     expected_agent_intents:
 37 |       - INQUIRE.ELICIT
 38 |       - REVEAL
 39 |       - REVEAL.NONE
 40 |   REVEAL.REFINE:
 41 |     expected_agent_intents:
 42 |       - INQUIRE.ELICIT
 43 |       - REVEAL
 44 |       - REVEAL.NONE
 45 |   REVEAL.REVISE:
 46 |     expected_agent_intents:
 47 |       - INQUIRE.ELICIT
 48 |       - REVEAL
 49 |       - REVEAL.NONE
 50 |     remove_user_preference: true
 51 |   INQUIRE:
 52 |     expected_agent_intents:
 53 |       - INQUIRE.ELICIT
 54 |       - REVEAL
 55 |       - REVEAL.SIMILAR
 56 |       - REVEAL.NONE
 57 |   INQUIRE.SIMILAR:
 58 |     expected_agent_intents:
 59 |       - REVEAL
 60 |       - REVEAL.SIMILAR
 61 |       - REVEAL.NONE
 62 |   INQUIRE.ITEMINFO:
 63 |     expected_agent_intents:
 64 |       - INQUIRE.MORE
 65 |   INQUIRE.MORE:
 66 |     expected_agent_intents:
 67 |       - DISCLOSE.MORE
 68 |   NOTE:
 69 |     expected_agent_intents:
 70 |       - INQUIRE.NEXT
 71 |       - INQUIRE.MORE
 72 |       - END
 73 |       - REVEAL
 74 |       - REVEAL.SIMILAR
 75 |   NOTE.DISLIKE:
 76 |     expected_agent_intents:
 77 |     preference_contingent: NEGATIVE
 78 |   NOTE.LIKE:
 79 |     expected_agent_intents:
 80 |       - INQUIRE.NEXT
 81 |       - REVEAL
 82 |       - REVEAL.SIMILAR
 83 |     preference_contingent: POSITIVE
 84 |   NOTE.NO:
 85 |     expected_agent_intents:
 86 |       - REVEAL
 87 |       - INQUIRE.NEXT
 88 |     preference_contingent: NOT_CONSUMED
 89 |   NOTE.YES:
 90 |     expected_agent_intents:
 91 |       - INQUIRE.ELICIT
 92 |       - REVEAL
 93 |       - REVEAL.SIMILAR
 94 |     preference_contingent: CONSUMED
 95 |   NOTE.ACCEPT:
 96 |     expected_agent_intents:
 97 |       - INQUIRE.NEXT
 98 | 
 99 | # List of agent intents (including sub-intents) that elicit preferences.
100 | agent_elicit_intents:
101 |   - INQUIRE
102 |   - INQUIRE.ELICIT
103 | 
104 | # List of agent intents (including sub-intents) that are for set retrieval.
105 | agent_set_retrieval:
106 |   - REVEAL
107 |   - REVEAL.SIMILAR
108 |   - REVEAL.NONE
109 | 
110 | # List of agent intents (including sub-intents) that are for inquiries.
111 | agent_inquire_intents:
112 |   - INQUIRE
113 |   - INQUIRE.ELICIT
114 |   - INQUIRE.MORE
115 |   - INQUIRE.NEXT
116 | 
117 | # Reward settings
118 | REWARD:
119 |   full_set_points: 20
120 |   intents:
121 |     INQUIRE: 4
122 |   repeat_penalty: 1
123 |   cost: 1


--------------------------------------------------------------------------------
/data/interaction_models/qrfa.yaml:
--------------------------------------------------------------------------------
1 | name: QRFA
2 | description: QRFA intent scheme by ... used in ...


--------------------------------------------------------------------------------
/data/item_collections/movielens-20m-sample/README.md:
--------------------------------------------------------------------------------
 1 | # MovieLens-20M sample
 2 | 
 3 | This folder contains a small sample from the [MovieLens-20M Dataset](https://grouplens.org/datasets/movielens/20m/).
 4 | 
 5 |   * `movies.csv` contains the first 1000 records from the original dataset.
 6 |   * `ratings.csv` contains a sample of 1000 ratings for items present in the `movies.csv` file.
 7 |     - The file is generated using (1018 is the last movieId in the sample): 
 8 |       ```
 9 |       ml-20m$ head -n1 ratings.csv >{DIALOGUEKIT}/tests/data/movielens-20m-sample/ratings.csv
10 |       ml-20m$ awk -F, '$2<=1018' ratings.csv '{print }' | head -n 1000 >>{DIALOGUEKIT}/tests/data/movielens-20m-sample/ratings.csv
11 |       ```
12 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # Documentation
2 | 
3 | The usage-oriented documentation (in Read the Docs style) is available [here](https://iai-group.github.io/UserSimCRS), with the corresponding sources contained in the source folder. Documentation related to development is found in the [Wiki](https://github.com/iai-group/UserSimCRS/wiki).
4 | 


--------------------------------------------------------------------------------
/docs/source/_static/UserSimCRS-Overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iai-group/UserSimCRS/13f72923d9be0b27472e3f684b2bca4563332c3d/docs/source/_static/UserSimCRS-Overview.png


--------------------------------------------------------------------------------
/docs/source/_static/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iai-group/UserSimCRS/13f72923d9be0b27472e3f684b2bca4563332c3d/docs/source/_static/favicon.png


--------------------------------------------------------------------------------
/docs/source/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {% extends "!layout.html" %}
 2 | {% block body %}
 3 | {% if current_version and latest_version and current_version != latest_version %}
 4 | <p>
 5 |     <strong>
 6 |         {% if current_version.is_released %}
 7 |         You're reading an old version of this documentation.
 8 |         If you want up-to-date information, please have a look at <a
 9 |             href="{{ vpathto(latest_version.name) }}">{{latest_version.name}}</a>.
10 |         {% else %}
11 |         You're reading the documentation for a development version.
12 |         For the latest released version, please have a look at <a
13 |             href="{{ vpathto(latest_version.name) }}">{{latest_version.name}}</a>.
14 |         {% endif %}
15 |     </strong>
16 | </p>
17 | {% endif %}
18 | {{ super() }}
19 | {% endblock %}%


--------------------------------------------------------------------------------
/docs/source/_templates/versions.html:
--------------------------------------------------------------------------------
 1 | {%- if current_version %}
 2 | <div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
 3 |     <span class="rst-current-version" data-toggle="rst-current-version">
 4 |         <span class="fa fa-book"> Other Versions</span>
 5 |         v: {{ current_version.name }}
 6 |         <span class="fa fa-caret-down"></span>
 7 |     </span>
 8 |     <div class="rst-other-versions">
 9 |         {%- if versions %}
10 |         <dl>
11 |             <dt>Branches</dt>
12 |             {%- for item in versions.branches %}
13 |             <dd><a href="{{ item.url }}">{{ item.name }}</a></dd>
14 |             {%- endfor %}
15 |             <dt>Tags</dt>
16 |             {%- for item in versions.tags %}
17 |             <dd><a href="{{ item.url }}">{{ item.name }}</a></dd>
18 |             {%- endfor %}
19 |         </dl>
20 |         {%- endif %}
21 |     </div>
22 | </div>
23 | {%- endif %}


--------------------------------------------------------------------------------
/docs/source/agenda_based.rst:
--------------------------------------------------------------------------------
 1 | Agenda-based simulator
 2 | ======================
 3 | 
 4 | The agenda-based simulator `[Schatzmann et al., 2007] <https://aclanthology.org/N07-2038/>`_ is designed to ensure that the simulated user adheres to a predetermined dialogue strategy by maintaining an agenda (or stack) of actions. The simulated user's decision-making is modeled as a Markov Decision Process. At each turn, it determines the next action to execute based on the current state of this agenda.
 5 | 
 6 | Specifically, the simulator's next action is determined by the agent response. 
 7 | If the agent responds expectedly, the next user action is pulled from the top of the agenda; otherwise, the simulator samples the next user action based on transition probabilities from responses in historical dialogues.
 8 | 
 9 | Agenda initialization
10 | ---------------------
11 | 
12 | The agenda (:py:class:`usersimcrs.simulator.agenda_based.agenda.Agenda`) is initialized based on the :doc:`information need <information_need>` of the simulated user. Specifically, the agenda is initialized with the following steps:
13 | 
14 | 1. (optional) Add the start intent
15 | 2. Add dialogue acts with the *INFORM* intent for each constraint in the information need
16 | 3. Add dialogue acts with the *REQUEST* intent for each request in the information need
17 | 4. Add the stop intent
18 | 
19 | Example
20 | ^^^^^^^
21 | 
22 | For example, the following information need:
23 | 
24 | .. code-block:: json
25 |     
26 |     {
27 |         "constraints": {
28 |             "genre": "comedy"
29 |         },
30 |         "requests": ["plot"],
31 |         "target_items": ["Jump Street", "The Hangover"]
32 |     }
33 | 
34 | will result in the following agenda:
35 | 
36 | .. code-block:: json
37 | 
38 |     [
39 |         START(),
40 |         INFORM("genre", "comedy"),
41 |         REQUEST("plot"),
42 |         STOP()
43 |     ]
44 | 
45 | Agenda update
46 | -------------
47 | 
48 | The agenda is updated after each agent utterance by the :doc:`interaction model <interaction_model>`. The interaction model determines if new actions should be created or sampled and added to the agenda. For example, if the agent recommends an item, the interaction model may decide to create an action to express a preference regarding the recommended item.
49 | 
50 | **Reference**
51 | 
52 | Jost Schatzmann, Blaise Thomson, Karl Weilhammer, Hui Ye, and Steve Young. 2007. Agenda-Based User Simulation for Bootstrapping a POMDP Dialogue System. In Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics; Companion Volume, Short Papers (NAACL '07).


--------------------------------------------------------------------------------
/docs/source/components.rst:
--------------------------------------------------------------------------------
 1 | Main Components
 2 | ===============
 3 | 
 4 | .. image:: _static/UserSimCRS-Overview.png
 5 |     :width: 700
 6 |     :alt: Conceptual overview of the user simulator.
 7 | 
 8 | 
 9 | Natural language understanding (NLU)
10 | ------------------------------------
11 | 
12 | The NLU is responsible for obtaining a structured representation of text utterances. This entails intent classification and entity recognition. In addition to this, the NLU can also do satisfaction prediction. This is the users satisfaction with the agents response.
13 | 
14 | Response generation
15 | -------------------
16 | 
17 | Response generation is currently developed with an agenda-based simulator `[Schatzmann et al., 2007] <https://aclanthology.org/N07-2038/>`_ in mind, however, it could be replaced with other approaches in the future. 
18 | Following `Zhang and Balog, 2020 <https://arxiv.org/abs/2006.08732>`_, response generation is based on an interaction model, which is responsible for initializing the agenda and updating it. 
19 | Updates to the agenda can be summarized as follows: if the agent responds in an expected manner, the interaction model pulls the next action off the agenda; otherwise, it either repeats the same action as the previous turn or samples a new action. 
20 | More details on the agenda-based simulator can be found :doc:`here <agenda_based>`.
21 | Additionally, motivated by recent work in `[Salle et al., 2021] <https://dl.acm.org/doi/abs/10.1007/978-3-030-72113-8_39>`_ and `[Sun et al., 2021] <https://arxiv.org/abs/2105.03748>`_, we also introduce a user satisfaction prediction component. 
22 | In addition to deciding which action to take next, the generated response also includes a turn-level user satisfaction estimate. 
23 | This may be utilized by the NLG component when generating the text of the user utterance.
24 | 
25 | User modeling
26 | -------------
27 | 
28 | User modeling consists of three sub-components: preference model, context model, and persona.
29 | 
30 | Preference model
31 | ^^^^^^^^^^^^^^^^
32 | 
33 | Preference modeling refers to modeling users' individual tastes and allows for a personalized experience. We model preferences as a Personal Knowledge Graph (PKG), where nodes can be either items or attributes. The preference model is built such that it remains consistent across simulations.
34 | 
35 | Context model
36 | ^^^^^^^^^^^^^
37 | 
38 | In addition to preference and interaction modeling, we also model other user contexts, specifically temporal and relational. Temporal context refers to time context such as *time of the day* and whether it is a *weekday* or *weekend*. Relational context on the other hand is used to indicate the group setting of the user.
39 | 
40 | Persona
41 | ^^^^^^^
42 | 
43 | Persona is used to capture user-specific traits, e.g., cooperativeness.
44 | 
45 | Natural language generation (NLG) 
46 | ---------------------------------
47 | 
48 | Following the work in `[Zhang and Balog, 2020] <https://arxiv.org/abs/2006.08732>`_, the NLG component is template-based, that is, given the output of the response generation module, a fitting textual response is chosen and may be instantiated with preferences. Additionally, we extend template-based generation to be conditioned with metadata, specifically on user satisfaction, such that users could use for example stronger language when getting dissatisfied with the system.
49 | 
50 | **References**
51 | 
52 | Alexandre Salle, Shervin Malmasi, Oleg Rokhlenko, and Eugene Agichtein. 2021. Studying the Effectiveness of Conversational Search Refinement Through User Simulation. In Proceedings of the 43rd European Conference on IR Research (ECIR '21). 587--602.
53 | 
54 | Jost Schatzmann, Blaise Thomson, Karl Weilhammer, Hui Ye, and Steve Young. 2007. Agenda-Based User Simulation for Bootstrapping a POMDP Dialogue System. In Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics; Companion Volume, Short Papers (NAACL '07).
55 | 
56 | Weiwei Sun, Shuo Zhang, Krisztian Balog, Zhaochun Ren, Pengjie Ren, Zhumin Chen, and Maarten de Rijke. 2021. Simulating User Satisfaction for the Evaluation of Task-Oriented Dialogue Systems. In Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR '21). 2499--2506.
57 | 
58 | Shuo Zhang and Krisztian Balog. 2020. Evaluating Conversational Recommender Systems via User Simulation. In Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining (KDD '20). 1512--1520.


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | import os
 14 | import sys
 15 | from typing import Dict, List
 16 | 
 17 | import sphinx_rtd_theme
 18 | 
 19 | sys.path.insert(0, os.path.abspath("../../"))
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = "UserSimCRS"
 23 | copyright = "2022, IAI group, University of Stavanger"
 24 | author = "Jafar Afzali, Krisztian Balog, Aleksander Drzewiecki \
 25 |         and Shuo Zhang"
 26 | 
 27 | # The short X.Y version.
 28 | version = "0.0.1"
 29 | # The full version, including alpha/beta/rc tags.
 30 | release = "0.0.1"
 31 | 
 32 | # -- General configuration ---------------------------------------------------
 33 | 
 34 | # Add any Sphinx extension module names here, as strings. They can be
 35 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 36 | # ones.
 37 | extensions = [
 38 |     "sphinx.ext.autodoc",
 39 |     "sphinx.ext.intersphinx",
 40 |     "sphinx.ext.napoleon",
 41 |     "sphinx.ext.githubpages",
 42 |     "autoapi.extension",
 43 |     "myst_parser",
 44 | ]
 45 | 
 46 | # Add any paths that contain templates here, relative to this directory.
 47 | templates_path = ["_templates"]
 48 | 
 49 | html_sidebars = {
 50 |     "**": [
 51 |         "versions.html",
 52 |     ],
 53 | }
 54 | 
 55 | # The suffix(es) of source filenames.
 56 | # You can specify multiple suffix as a list of string:
 57 | # source_suffix = ['.rst', '.md']
 58 | source_suffix = [".rst", ".md"]
 59 | 
 60 | # The encoding of source files.
 61 | # source_encoding = 'utf-8-sig'
 62 | 
 63 | # The master toctree document.
 64 | master_doc = "index"
 65 | 
 66 | # List of patterns, relative to source directory, that match files and
 67 | # directories to ignore when looking for source files.
 68 | # This pattern also affects html_static_path and html_extra_path.
 69 | exclude_patterns: List[str] = []
 70 | 
 71 | # -- Options for HTML output -------------------------------------------------
 72 | 
 73 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 74 | # a list of builtin themes.
 75 | #
 76 | html_theme = "sphinx_rtd_theme"
 77 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 78 | 
 79 | # Add any paths that contain custom static files (such as style sheets) here,
 80 | # relative to this directory. They are copied after the builtin static files,
 81 | # so a file named 'default.css' will overwrite the builtin 'default.css'.
 82 | html_static_path = ["_static"]
 83 | html_theme_options: Dict[str, str] = {}
 84 | html_favicon = "_static/favicon.png"
 85 | 
 86 | # Auto api
 87 | autoapi_type = "python"
 88 | autoapi_dirs = ["../../usersimcrs"]
 89 | autoapi_ignore = ["*tests/*"]
 90 | autoapi_options = [
 91 |     "members",
 92 |     "undoc-members",
 93 |     "show-inheritance",
 94 |     "show-module-summary",
 95 | ]
 96 | autoapi_python_class_content = "init"
 97 | 
 98 | # -- Options for versioning -------------------------------------------------
 99 | # See documentation at:
100 | # https://holzhaus.github.io/sphinx-multiversion/master/configuration.html#
101 | smv_tag_whitelist = r"^.*$"  # Include all tags
102 | smv_branch_whitelist = r"^main$"  # Include only main branch
103 | smv_remote_whitelist = (
104 |     r"^(origin|upstream)$"  # Use branches from origin and upstream
105 | )
106 | smv_released_pattern = r"^tags/.*$"  # Tags only
107 | smv_outputdir_format = "{ref.name}"  # Use the branch/tag name
108 | 
109 | # Determines whether remote or local git branches/tags are preferred if their
110 | # output dirs conflict
111 | smv_prefer_remote_refs = True
112 | 


--------------------------------------------------------------------------------
/docs/source/configuration.rst:
--------------------------------------------------------------------------------
 1 | Setting up a user simulator
 2 | ===========================
 3 | 
 4 | Requirements
 5 | ------------
 6 | 
 7 | To run the simulation, the following are needed:
 8 | 
 9 | 1. **Domain:** A YAML file with domain-specific slot names that will be used for the preference model.
10 | 2. **Item collection:** A CSV file containing the item collection. This file must contain at least 2 columns labeled *ID* and *NAME*.
11 | 3. **Preference data:** A CSV file containing the item ratings in the shape of user ID, item ID, and rating triples.
12 | 4. **Interaction model:** A YAML file containing the users’ and agent’s intent space, as well as the set of expected agent intents for each user intent, is required for the interaction model. The CIR6 interaction model shipped with library offers a baseline starting point, which may be further tailored according to the behavior and capabilities of the CRS.
13 | 5. **Annotated sample dialogues:** A small sample of annotated dialogues with the CRS. The sample size depends on the complexity of the system, in terms of action space and language variety, but is generally in the order of 5-50 dialogues. The sample of dialogues must contain utterance-level annotations in terms of intents and entities, as this is required to train the NLU and NLG components. Note that the slots used for annotation should be the same as the ones defined in the domain file (1) and intents should follow the ones defined in the interaction model (4).
14 | 
15 | Configuration
16 | -------------
17 | 
18 | The agenda based simulator has a number of parameters that can be customized.
19 | These can be either provided in a YAML config file and/or via the command line. Note that arguments passed through the command line will override those in the config file.
20 | 
21 | Simulator parameters
22 | ^^^^^^^^^^^^^^^^^^^^
23 | 
24 |   * `agent_id`: Id of the agent tested.
25 |   * `output_name`: Specifies the output name for the simulation configuration that will be stored under `data/runs` at the end of the simulation.
26 |   * `agent_uri`: URI to communicate with the agent. By default we assume that the agent has an HTTP API.
27 |   * `domain`: A YAML file with domain-specific slot names.
28 |   * `intents`: Path to the intent schema file.
29 |   * `items`: Path to items file.
30 |   * `id_col`: Name of the CSV field containing item id.
31 |   * `domain_mapping`: CSV field mapping to create item based on domain slots.
32 |   * `ratings`: Path to ratings file.
33 |   * `historical_ratings_ratio`: Ratio ([0..1]) of ratings to be used as historical data.
34 |   * `dialogues`: Path to domain config file.
35 |   * `intent_classifier`: Intent classifier model to be used. Only supports DialogueKit intent classifiers.
36 |   * `rasa_dialogues`: File with Rasa annotated dialogues. Only needed when using a DIET intent classifier.
37 |   * `debug`: Flag (boolean) to activate debug mode.
38 | 
39 | Configuration example
40 | ^^^^^^^^^^^^^^^^^^^^^
41 | 
42 | Below is the default configuration to run simulation with the IAI MovieBot as the conversational agent.
43 | 
44 | .. todo:: Add args for context and persona.
45 | 
46 | .. code-block:: yaml
47 |   
48 |   agent_id: "IAI MovieBot"
49 |   output_name: "moviebot"
50 |   # By default, the agent has an HTTP API.
51 |   agent_uri: "http://127.0.0.1:5001"
52 | 
53 |   domain: data/domains/movies.yaml
54 |   intents: data/interaction_models/crs_v1.yaml
55 | 
56 |   items: data/movielens-25m-sample/movies_w_keywords.csv
57 |   id_col: movieId
58 |   domain_mapping:
59 |     title:
60 |       slot: TITLE
61 |     genres:
62 |       slot: GENRE
63 |       multi-valued: True
64 |       delimiter: "|"
65 |     keywords:
66 |       slot: KEYWORD
67 |       multi-valued: True
68 |       delimiter: "|"
69 |   ratings: data/movielens-25m-sample/ratings.csv
70 |   historical_ratings_ratio: 0.8
71 |   
72 |   dialogues: data/agents/moviebot/annotated_dialogues.json
73 |   intent_classifier: "cosine"
74 |   # If using the DIET classifier the following file needs to be provided. 
75 |   # rasa_dialogues: data/agents/moviebot/annotated_dialogues_rasa_agent.yml
76 | 
77 |   debug: False
78 | 
79 | 


--------------------------------------------------------------------------------
/docs/source/dataset.rst:
--------------------------------------------------------------------------------
 1 | Dataset
 2 | =======
 3 | 
 4 | UserSimCRS is shipped with a sample of MovieLens 25M dataset. We specifically chose this dataset as it contains *tags*, which are user-generated metadata about *movies*.
 5 | We merge *tags* and *movies* to one file, where each movie in the dataset contains up to 5 most relevant tags. Furthermore, the sampled *ratings* data contains user ratings for 20 users.
 6 | 
 7 | The file containing *movies* consists of four headers, separated by ",":
 8 | 
 9 | #. movieId
10 | #. title
11 | #. genres (separated by the pipe character)
12 | #. keywords (separated by the pipe character)
13 | 
14 | Similarly, the *ratings* file consists of four headers, separated by ",":
15 | 
16 | #. userId
17 | #. movieId
18 | #. rating
19 | #. timestamp
20 | 
21 | If other datasets are used, we expect them to be in the same format as the *movies* and *ratings* files above.
22 | Note that the *timestamp* field is not in use as of yet. (Can be omitted from the file)
23 | 
24 | Examples
25 | --------
26 | 
27 | The following is an entry from the *movies* file:
28 | 
29 | * 1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,animation|kids and family|pixar animation|computer animation|toys
30 | 
31 | And an entry from *ratings* file:
32 | 
33 | * 1,296,5.0,1147880044


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | UserSimCRS |release| documentation
 2 | ==================================
 3 | 
 4 | UserSimCRS is an extensible user simulation toolkit for evaluating conversational recommender systems.
 5 | 
 6 | It is built on top of the `DialogueKit <iai-group.github.io/DialogueKit/main/>`_ library, which provides basic dialogue management functionalities.
 7 | 
 8 | UserSimCRS follows the architecture of a typical task-based dialogue system, which consists of natural language understanding, response generation, and natural language generation components. Additionally, there is a dedicated user modeling component in order to make simulation more human-like.
 9 | 
10 | .. image:: _static/UserSimCRS-Overview.png
11 |     :width: 700
12 |     :alt: Conceptual overview of the user simulator.
13 | 
14 | This toolkit offers repeatable and reproducible means of evaluation that can complement human evaluation.
15 | UserSimCRS is designed to work with existing conversational recommender systems, without needing access to source code or knowledge of their inner workings.
16 | UserSimCRS can also be extended with other simulation approaches and more advanced user modeling capabilities.
17 | 
18 | 
19 | .. toctree::
20 |    :maxdepth: 1
21 |    :caption: Contents:
22 | 
23 |    installation
24 |    usage
25 |    interaction_model
26 |    components
27 |    configuration
28 |    :ref:`modindex`
29 | 
30 | Indices and tables
31 | ==================
32 | 
33 | * :ref:`genindex`
34 | * :ref:`modindex`
35 | * :ref:`search`
36 | 
37 | 


--------------------------------------------------------------------------------
/docs/source/information_need.rst:
--------------------------------------------------------------------------------
 1 | Information need
 2 | ================
 3 | 
 4 | The information need defines, in a structured manner, what the user is looking for. It comprises:
 5 | * *Constraints*: they specify the slot-value pairs that the item of interest must satisfy.
 6 | * *Requests*: they specify the slots for which the user wants information.
 7 | * *Target items*: they represent the "ground truth" items that the user is interested in.
 8 | 
 9 | 
10 | For example, the information need of a user looking for a comedy movie and its associated plot can be represented as follows:
11 | 
12 | .. code-block:: json
13 | 
14 |     {
15 |         "constraints": {
16 |             "genre": "comedy"
17 |         },
18 |         "requests": ["plot"],
19 |         "target_items": ["Jump Street", "The Hangover"]
20 |     }
21 | 
22 | The information need is a core element of the user simulator as it is used to customize the generated responses and can serve as the reference for the evaluation of the conversational recommender system.


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | The recommended version of Python is 3.9.  
 5 | The easiest way to install UserSimCRS and all of its dependencies is by using pip:
 6 | 
 7 | .. code-block:: shell
 8 |     
 9 |     python -m pip install -r requirements/requirements.txt
10 | 
11 | 
12 | To work on the documentation you also need to install other dependencies:
13 | 
14 | .. code-block:: shell
15 | 
16 |     python -m pip install -r requirements/docs_requirements.txt
17 | 
18 | 


--------------------------------------------------------------------------------
/docs/source/interaction_model.rst:
--------------------------------------------------------------------------------
 1 | Interaction model
 2 | ================= 
 3 | 
 4 | The interaction model is based on `[Zhang & Balog, 2020] <https://arxiv.org/abs/2006.08732>`_ and defines the allowed transitions between dialogue acts based on their intents. In our implementation, the interaction model is also responsible for the updating of the agenda based on a predefined dialogue strategy.
 5 | 
 6 | Define allowed transitions
 7 | --------------------------
 8 | 
 9 | The interaction model defines the user-agent interactions in terms of *intents* from their respective dialogue acts. The model specifies a set of *user intents* including required ones, a set of *agent intents*, and *expected agent responses* to each user intent. 
10 | 
11 | Format
12 | ^^^^^^
13 | 
14 | Below, we specify the YAML format that is used for defining an interaction model.
15 | 
16 | * **required_intents**: List of minimum required intents for the user.
17 | * **user_intents**:  List of all user intents; each should minimally specify **expected_agent_intents**.
18 | 
19 |   - Additionally, if a user intent is dependent on the preference model, this should be indicated via another key, i.e., **preference_contingent**.
20 |   - Similarly, intents that are used to remove preferences should be indicated in another key, **remove_user_preference**.
21 | 
22 | * **sub_intents**: Intents that are a variation of another intent can be listed as **sub_intents** of the same main intent. We separate them with a **"."**, where the former part indicates the main intent, and the latter the sub-intent. For example, REVEAL.EXPAND is a sub-intent of REVEAL (main intent).
23 | * **agent_elicit_intents**: List of intents that the agent can use to elicit preferences/need from the user. 
24 | * **agent_set_retrieval**: List of intents that the agent can use to reveal information to the user.
25 | * **agent_inquire_intents**: List of intents that the agent can use to ask the user if they want to know more.
26 | * **REWARD**: The reward settings for automatic assessment of simulated dialogues.
27 | 
28 | An example of interaction model is available at: `data/interaction_models/crs_v1.yaml`.
29 | 
30 | New interaction models can be added by providing a YAML file with the same format as the example above. The path to this file can be provided either in the configuration file or the command line, see :ref:`Configuration`.
31 | 
32 | Agenda update
33 | -------------
34 | 
35 | The agenda is updated based on the last agent dialogue acts and the current state of the conversation. For each agent dialogue act, the interaction model performs a push operation on the agenda stack. We consider four cases:
36 | 
37 | 1. **Agent elicits**: In case the agent elicits information from the user, the interaction model may push a new dialogue act to disclose the information elicited.
38 | 2. **Agent recommends**: In case the agent recommends an item, the interaction model may push a new dialogue act to express a preference regarding the recommended item (e.g., like, dislike, already consumed).
39 | 3. **Agent inquires**: In case the agent inquires if the user wants to know more about a specific item, the interaction model may push a new dialogue act to request a slot in the information need or a random one.
40 | 4. **None of the above**: In case none of the above cases apply, the interaction model checks if it is coherent to continue with the current agenda or if a new dialogue act should be sampled to keep the conversation going. In the latter case, the new dialogue act is sampled based on the transition probabilities from historical dialogues.
41 | 
42 | Once all the push operations are done, the agenda is cleaned to discard unnecessary dialogue acts, e.g., duplicates.
43 | 
44 | Transition probabilities matrices
45 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
46 | 
47 | The interaction model uses transition probabilities matrices to sample new dialogue acts. These matrices are built from historical dialogues when the model is initialized. The transition probabilities are calculated based on the frequency of transitions between intents in the historical dialogues. We consider two matrices:
48 | 
49 | * *Single intent*: The set of intents from an utterance is considered individually. That is, the transition probabilities are calculated based on the frequency of each intent following another intent.
50 | * *Compound intent*: The set of intents from an utterance is considered as a whole, i.e., the sequence of intents is considered as a single entity. That is, the transition probabilities are calculated based on the frequency of each sequence of intents following another sequence of intents.
51 | 
52 | For example the following consecutive sequence of dialogue acts:
53 | 
54 | | > Agent: [GREETINGS(), ELICIT(genre=?)]  
55 | | > User: [GREETINGS(), DISCLOSE(genre=action)]
56 | 
57 | will result in the following transition probabilities matrices:
58 | 
59 | *Single intent*:
60 | 
61 | +-----------+-----------+----------+
62 | |           | GREETINGS | DISCLOSE |
63 | +-----------+-----------+----------+
64 | | GREETINGS | 0.5       | 0.5      |
65 | +-----------+-----------+----------+
66 | | ELICIT    | 0.5       | 0.5      |
67 | +-----------+-----------+----------+
68 | 
69 | *Compound intent*:
70 | 
71 | +-------------------+--------------------+
72 | |                   | GREETINGS_DISCLOSE |
73 | +-------------------+--------------------+
74 | | GREETINGS_ELICIT  | 1                  |
75 | +-------------------+--------------------+
76 | 
77 | **Reference**
78 | 
79 | Shuo Zhang and Krisztian Balog. 2020. Evaluating Conversational Recommender Systems via User Simulation. In Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining (KDD '20). 1512--1520.


--------------------------------------------------------------------------------
/docs/source/llm_based.rst:
--------------------------------------------------------------------------------
 1 | LLM-based simulator
 2 | ===================
 3 | 
 4 | This simulator relies on a large language model (LLM) to generate utterances. Currently, only single zero-shot prompts are supported for generating responses. The interactions with the LLM are managed by the LLM interface.
 5 | 
 6 | Prompt
 7 | ------
 8 | 
 9 | :py:class:`usersimcrs.simulator.llm.prompt.Prompt`
10 | 
11 | The prompt is inspired by `[Terragni et al., 2023] <https://arxiv.org/abs/2306.00774>`_ It includes the following information: task description, information need, conversational context, and optionally the simulated user persona. The prompt is built as follows:
12 | 
13 | | {task_description} PERSONA: {persona} REQUIREMENTS: You are looking for a {item_type} with the following characteristics: {constraints}. Once you find a suitable {item_type}, make sure to get the following information: {requests}.
14 | | {conversational_context}
15 | 
16 | The persona section is included if the simulated user persona is provided. The placeholder *item_type* is replaced by the type of item the user is looking for such as a restaurant or a movie. The *constraints* and *requests* are extracted from the information need. The *conversational_context* is the history of the conversation up to the current utterance, hence, it is updated each time an utterance is received (agent utterance) or generated (simulated user utterance).
17 | 
18 | 
19 | LLM interface
20 | -------------
21 | 
22 | :py:mod:`usersimcrs.simulator.llm.interfaces`
23 | 
24 | The LLM interface is responsible for interacting with the large language model to generate responses. Currently, two LLM interfaces are supported: Ollama and OpenAI. 
25 | 
26 | Ollama
27 | ^^^^^^
28 | 
29 | :py:class:`usersimcrs.simulator.llm.interfaces.ollama_interface.OllamaLLMInterface`
30 | 
31 | This interface is used to interact with a LLM that is hosted on the `Ollama platform <https://ollama.com>`_. The interface sends requests to the `Ollama API <https://github.com/ollama/ollama/blob/main/docs/api.md>`_ to generate the responses. 
32 | 
33 | This interface is configured with a YAML file that includes: the model name, the host URL, whether to stream the responses, and the LLM specific options. An example configuration is shown below: 
34 | 
35 | .. code-block:: yaml
36 | 
37 |     model: "llama3"
38 |     host: OLLAMA_HOST_URL
39 |     stream: true
40 |     options:
41 |       max_tokens: 100
42 |       temperature: 0.5
43 |       top_p: 0.9
44 |       top_k: 0
45 |       ...
46 | 
47 | 
48 | OpenAI
49 | ^^^^^^
50 | 
51 | :py:class:`usersimcrs.simulator.llm.interfaces.openai_interface.OpenAILLMInterface`
52 | 
53 | This interface interacts with models hosted on the OpenAI platform using their `API <https://openai.com/api/>`_. The interface sends requests to the OpenAI API to generate the responses.
54 | 
55 | This interface is configured with a YAML file that includes: the model name, the API key, and the LLM specific options. An example configuration is shown below:
56 | 
57 | .. code-block:: yaml
58 | 
59 |     model: "GPT-4o"
60 |     api_key: YOUR_API_KEY
61 |     options:
62 |       max_tokens: 100
63 |       seed: 42
64 |       temperature: 0.5
65 |       ...
66 | 
67 | 
68 | **Reference**
69 | 
70 | Silvia Terragni, Modestas Filipavicius, Nghia Khau, Bruna Guedes, André Manso, and Roland Mathis. 2023. In-Context Learning User Simulators for Task-Oriented Dialog Systems. arXiv:2306.00774 [cs.CL].


--------------------------------------------------------------------------------
/docs/source/setup_agent.rst:
--------------------------------------------------------------------------------
 1 | Setting up an agent
 2 | ===================
 3 | 
 4 | 
 5 | .. contents:: Table of Contents
 6 |     :depth: 3
 7 | 
 8 | 1. Prepare domain and item collection
 9 | -------------------------------------
10 | 
11 | A YAML file with domain-specific slot names must be prepared for the preference model. Additionally, a file containing the item collection is required; currently, this is expected in CSV format.
12 | 
13 | .. code-block:: YAML
14 |  :caption: domain.yaml
15 | 
16 |     slot_names:
17 |       TITLE:
18 |       GENRE:
19 |       ACTOR:
20 |       KEYWORD:
21 |       DIRECTOR:
22 | 
23 | .. code-block:: YAML
24 |  :caption: item_collection.csv
25 | 
26 |     movieId,title,genres
27 |     1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
28 |     2,Jumanji (1995),Adventure|Children|Fantasy
29 |     3,Grumpier Old Men (1995),Comedy|Romance
30 |     4,Waiting to Exhale (1995),Comedy|Drama|Romance
31 |     5,Father of the Bride Part II (1995),Comedy
32 |     6,Heat (1995),Action|Crime|Thriller
33 |     7,Sabrina (1995),Comedy|Romance
34 | 
35 | 
36 | 2. Provide preference data
37 | --------------------------
38 | 
39 | Preference data is consumed in the form of item ratings and needs to be provided in a CSV file in the shape of user ID, item ID, and rating triples.
40 | 
41 | 3. Dialogue sample
42 | ------------------
43 | 
44 | A small sample of dialogues with the target CRS needs to be collected. The sample size depends on the complexity of the system, in terms of action space and language variety, but is generally in the order of 5-50 dialogues
45 | 
46 | 
47 | 4. Annotate sample 
48 | ------------------
49 | 
50 | The sample of dialogues must contain utterance-level annotations in terms of intents and entities, as this is required to train the NLU and NLG components. Note that the slots used for annotation should be the same as the ones defined in the domain file (cf. Step 1) and intents should follow the ones defined in the interaction model (cf. Step 4.)
51 | 
52 | 
53 | 5. Define interaction model 
54 | ---------------------------
55 | 
56 | A config file containing the users’ and agent’s intent space, as well as the set of expected agents for each user intent, is required for the interaction model. The CIR6 interaction model shipped with library offers a baseline starting point, which may be further tailored according to the behavior and capabilities of the target CRS
57 | 
58 | 6. Define user model/population
59 | -------------------------------
60 | 
61 | .. todo:: Change this when we decide more specifically on our use of context and persona.
62 | 
63 | Simulation is seeded with a user population that needs to be characterized, in terms of the different contexts (e.g., weekday vs. weekend, alone vs. group setting) and personas (e.g., patient and impatient users) and the number of users to be generated for each combination of context and persona. Each user has their own preference model, which may be instantiated by grounding it to actual preferences (i.e., the ratings dataset given in Step 2)
64 | 
65 | 
66 | **Note:** The next three steps are done when running the ``run_simulator.py`` script.
67 | 
68 | 7. Train simulator
69 | ------------------
70 | 
71 | The NLU and NLG components of the simulator are trained using the annotated dialogue sample.
72 | 
73 | 8. Run simulation
74 | -----------------
75 | 
76 | Running the simulator will generate a set of simulated conversations for each user with the target CRS and save those to files.
77 | 
78 | 9. Perform evaluation
79 | ---------------------
80 | 
81 | Evaluation takes the set of simulated dialogues generated in the previous step as input, and measures the performance of the target CRS in terms of the metrics implemented in DialogueKit (cf. Section 3.1)
82 | 


--------------------------------------------------------------------------------
/docs/source/usage.rst:
--------------------------------------------------------------------------------
 1 | Usage
 2 | =====
 3 | 
 4 | A YAML configuration file is necessary to start the simulation; see `default configuration <https://github.com/iai-group/UserSimCRS/blob/main/config/default/config_default.yaml>`_ for an example.  
 5 | Run the following command to start the simulation:
 6 | 
 7 | .. code-block:: shell
 8 | 
 9 |     python -m usersimcrs.run_simulation -c <path_to_config.yaml>
10 | 
11 | 
12 | Example
13 | -------
14 | 
15 | This example shows how to run simulation using the default configuration and the `IAI MovieBot <https://github.com/iai-group/MovieBot>`_ as the conversational agent.
16 | 
17 | 1. Start IAI MovieBot locally
18 | 
19 |   * Download IAI MovieBot v1.0.1 `here <https://github.com/iai-group/MovieBot/releases/tag/v1.0.1>`_.
20 |   * Follow the IAI MovieBot installation instructions.
21 |   * Start the IAI MovieBot locally: 
22 |   
23 | .. code-block:: shell
24 |     
25 |     python -m run_bot -c config/moviebot_config_no_integration.yaml
26 | 
27 | Note: the parameter `agent_uri` needs to be updated in the configuration in case IAI MovieBot does not run on the default URI (i.e., `http://127.0.0.1:5001`).
28 | 
29 | 2. Run simulation
30 | 
31 | .. code-block:: shell
32 | 
33 |     python -m usersimcrs.run_simulation -c config/default/config_default.yaml
34 | 
35 | 
36 | After the simulation, the YAML configuration is saved under `data/runs` using the `output_name` parameter.
37 | The simulated dialogue is saved under `dialogue_export`.


--------------------------------------------------------------------------------
/docs/source/user_modeling.rst:
--------------------------------------------------------------------------------
 1 | User modeling
 2 | =============
 3 | 
 4 | Preference model
 5 | ----------------
 6 | 
 7 | Preference modeling refers to modeling users' individual tastes and allows for a personalized experience. We model preferences as a Personal Knowledge Graph (PKG), where nodes can be either items or attributes. The preference model assumes a temporal ordering of the preference data and is built such that it remains consistent across simulations and assumes a temporal ordering.
 8 | 
 9 | Context model
10 | -------------
11 | 
12 | In addition to preference and interaction modeling, we also model other user contexts, such as temporal and relational context.
13 | For example, during *weekdays*, a person might be busy with work and have other obligations that can change e.g., the information that they are willing to provide. On the other hand, during *weekends*, people are usually more relaxed and in a better mood in general, which could lead to the opposite effect.
14 | 
15 | Persona
16 | -------
17 | 
18 | Persona contains user variables, e.g., age, education level, etc. These characteristics also affect the behaviour and language usage of a person. People with higher education might use more formal or refined language. Age might also affect word choices and sentence structure.


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 80
3 | target-version = ['py37']
4 | 


--------------------------------------------------------------------------------
/requirements/docs_requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==7.3.7
2 | nbsphinx
3 | sphinx-autoapi==3.1.1
4 | sphinx-markdown-tables
5 | sphinx-multiversion==0.2.4
6 | sphinx-rtd-theme==2.0.0
7 | sphinxcontrib-bibtex
8 | recommonmark
9 | myst-parser


--------------------------------------------------------------------------------
/requirements/experimentation_requirements.txt:
--------------------------------------------------------------------------------
1 | evaluate==0.4.3
2 | sacrebleu==2.4.3


--------------------------------------------------------------------------------
/requirements/requirements.txt:
--------------------------------------------------------------------------------
 1 | pre-commit==3.8.0
 2 | flake8==5.0.4
 3 | black
 4 | pytest==8.2.2
 5 | pytest-cov==5.0.0
 6 | mypy==1.11.1
 7 | docformatter
 8 | types-PyYAML
 9 | types-requests
10 | botocore>=1.29.29
11 | wget
12 | confuse
13 | websockets<11.0
14 | nltk
15 | joblib
16 | torch==2.0.1
17 | openai==1.30.5
18 | ollama==0.3.1
19 | pandas==2.2.2
20 | git+https://github.com/iai-group/DialogueKit.git
21 | setuptools<=70.0.0


--------------------------------------------------------------------------------
/scripts/datasets/README.md:
--------------------------------------------------------------------------------
 1 | # Add new dataset
 2 | 
 3 | This folder contains scripts to add new datasets for the simulators.
 4 | 
 5 | ## ReDial
 6 | 
 7 | To download and format the dialogues to DialogueKit format, use the following command:
 8 | 
 9 | ```bash
10 | python scripts/datasets/redial/redial_to_dialoguekit.py
11 | ```
12 | 
13 | This will create a folder in `data/datasets/` containing the formatted dialogues.
14 | 
15 | You can artificially augment the dialogues with an information need and the utterances with dialogue acts using the following command:
16 | 
17 | ```bash
18 | python -m scripts.datasets.redial.augment_redial
19 | ```
20 | 
21 | For more information on the arguments, use the `--help` flag.
22 | 


--------------------------------------------------------------------------------
/scripts/datasets/information_need_annotation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iai-group/UserSimCRS/13f72923d9be0b27472e3f684b2bca4563332c3d/scripts/datasets/information_need_annotation/__init__.py


--------------------------------------------------------------------------------
/scripts/datasets/information_need_annotation/information_need_annotator.py:
--------------------------------------------------------------------------------
 1 | """Annotate information need based on annotated dialogue.
 2 | 
 3 | This annotator uses the annotated dialogue to identify a possible information
 4 | need that the user had during the conversation. The information need comprises
 5 | constraints and requests that user expressed during the conversation.
 6 | """
 7 | 
 8 | import json
 9 | import logging
10 | import os
11 | 
12 | import yaml
13 | from ollama import Client, Options
14 | 
15 | from dialoguekit.core.dialogue import Dialogue
16 | from usersimcrs.core.information_need import InformationNeed
17 | 
18 | DEFAULT_INITIAL_PROMPT_MOVIES_FILE = "scripts/datasets/information_need_annotation/information_need_prompt_movies_default.txt"  # noqa: E501
19 | 
20 | 
21 | class InformationNeedAnnotator:
22 |     def __init__(
23 |         self,
24 |         ollama_config_file: str,
25 |         prompt_file: str = DEFAULT_INITIAL_PROMPT_MOVIES_FILE,
26 |     ) -> None:
27 |         """Initializes the annotator.
28 | 
29 |         Args:
30 |             ollama_config_file: Configuration file for Ollama.
31 |             prompt_file: File containing prompt, it should have a placeholder
32 |               for the dialogue. Defaults to DEFAULT_INITIAL_PROMPT_MOVIES_FILE.
33 | 
34 |         Raises:
35 |             FileNotFoundError: If the configuration file is not found.
36 |         """
37 |         if not os.path.exists(ollama_config_file):
38 |             raise FileNotFoundError(
39 |                 f"No configuration file: {ollama_config_file}"
40 |             )
41 | 
42 |         configuration = yaml.safe_load(open(ollama_config_file))
43 | 
44 |         # Ollama
45 |         self.client = Client(host=configuration.get("host"))
46 |         self._model = configuration.get("model")
47 |         self._options = Options(**configuration.get("options", {}))
48 | 
49 |         # Prompt
50 |         self.prompt = open(prompt_file).read()
51 | 
52 |     def annotate_information_need(self, dialogue: Dialogue) -> Dialogue:
53 |         """Annotates information need in the dialogue.
54 | 
55 |         Args:
56 |             dialogue: Dialogue to annotate.
57 | 
58 |         Returns:
59 |             Dialogue with annotated information need.
60 |         """
61 |         json_dialogue = json.dumps(dialogue.to_dict(), indent=2)
62 |         prompt = self.prompt.replace("{dialogue}", json_dialogue)
63 | 
64 |         response = self.client.generate(
65 |             prompt=prompt, model=self._model, options=self._options
66 |         ).get("response", "")
67 | 
68 |         try:
69 |             information_need = self.parse_model_output(response)
70 |             dialogue.metadata["information_need"] = information_need.to_dict()
71 |         except Exception as e:
72 |             logging.error(
73 |                 "Failed to parse model output for dialogue "
74 |                 f"{dialogue.conversation_id}: {response}\n{e}"
75 |             )
76 |         return dialogue
77 | 
78 |     def parse_model_output(self, response: str) -> InformationNeed:
79 |         """Parses the model output to extract information need.
80 | 
81 |         Args:
82 |             response: Model output.
83 | 
84 |         Returns:
85 |             Extracted information need.
86 |         """
87 |         json_response = json.loads(response)
88 |         json_response["target_items"] = []
89 |         return InformationNeed.from_dict(json_response)
90 | 


--------------------------------------------------------------------------------
/scripts/datasets/information_need_annotation/information_need_prompt_movies_default.txt:
--------------------------------------------------------------------------------
 1 | Given the annotated dialogue below, your task is to identify the underlying information need of the user. The information need corresponds to what the user is looking for in the dialogue. The information need comprises constraints and requests. Constraints are conditions that the recommended movie must satisfy. Requests are properties of the movie that the user inquires about in the dialogue.
 2 | 
 3 | Your should be formatted as a JSON object with two fields: constraints and requests. Strictly consider the content of the dialogue to identify the information need. Do not make assumptions beyond what is stated in the dialogue.
 4 | Constraints are represented as a dictionary where the keys are the slots, and the values are the values that the movie must have for that slot. The possible slots are:
 5 | - GENRE: Movie genre
 6 | - ACTOR: Actor starring in the movie
 7 | - KEYWORD: Keyword associated with the movie
 8 | - DIRECTOR: Director of the movie
 9 | Requests are represented as a list of slots. The possible slots are:
10 | - PLOT: Movie plot
11 | - RATING: Movie rating as a number
12 | - ACTOR: Actor starring in the movie
13 | - DIRECTOR: Director of the movie
14 | 
15 | Annotated dialogue:
16 |   {dialogue}
17 | 
18 | Information need:


--------------------------------------------------------------------------------
/scripts/datasets/redial/augment_redial.py:
--------------------------------------------------------------------------------
  1 | """Augment the formatted ReDial dataset with additional information.
  2 | 
  3 | The augmentation is artificial and done to create training data for neural user
  4 | simulators. Each dialogue is augmented with an information need and each
  5 | utterance is augmented with dialogue acts. The information need is inferred from
  6 | the dialogue acts, e.g., the annotations of a inquire dialogue act can serve as
  7 | requests.
  8 | """
  9 | 
 10 | import argparse
 11 | import json
 12 | 
 13 | from tqdm import tqdm
 14 | 
 15 | from dialoguekit.core.dialogue import Dialogue
 16 | from dialoguekit.nlu.nlu import NLU
 17 | from dialoguekit.participant.participant import DialogueParticipant
 18 | from dialoguekit.utils.dialogue_reader import json_to_dialogues
 19 | from scripts.datasets.information_need_annotation.information_need_annotator import (  # noqa: E501
 20 |     InformationNeedAnnotator,
 21 | )
 22 | from usersimcrs.nlu.lm.lm_dialogue_act_extractor import LMDialogueActsExtractor
 23 | 
 24 | 
 25 | def annotate_dialogue_acts(
 26 |     user_nlu: NLU, agent_nlu: NLU, dialogue: Dialogue
 27 | ) -> Dialogue:
 28 |     """Annotates the dialogue acts in the dialogue.
 29 | 
 30 |     Args:
 31 |         user_nlu: NLU to use for user dialogue act annotation.
 32 |         agent_nlu: NLU to use for agent dialogue act annotation.
 33 |         dialogue: Dialogue to annotate.
 34 | 
 35 |     Returns:
 36 |         Dialogue with annotated dialogue acts.
 37 |     """
 38 |     for utterance in dialogue.utterances:
 39 |         if utterance.participant == DialogueParticipant.USER:
 40 |             dialogue_acts = user_nlu.extract_dialogue_acts(utterance)
 41 |         else:
 42 |             dialogue_acts = agent_nlu.extract_dialogue_acts(utterance)
 43 |         utterance.add_dialogue_acts(dialogue_acts)
 44 |     return dialogue
 45 | 
 46 | 
 47 | def augment_dialogue(
 48 |     user_nlu: NLU,
 49 |     agent_nlu: NLU,
 50 |     information_need_annotator: InformationNeedAnnotator,
 51 |     dialogue: Dialogue,
 52 | ) -> Dialogue:
 53 |     """Augments a dialogue with dialogue acts and information need.
 54 | 
 55 |     Args:
 56 |         user_nlu: NLU to use for user dialogue act annotation.
 57 |         agent_nlu: NLU to use for agent dialogue act annotation.
 58 |         information_need_annotator: Information need annotator.
 59 |         dialogue: Dialogue to augment.
 60 | 
 61 |     Returns:
 62 |         Augmented dialogue.
 63 |     """
 64 |     dialogue = annotate_dialogue_acts(user_nlu, agent_nlu, dialogue)
 65 |     dialogue = information_need_annotator.annotate_information_need(dialogue)
 66 |     return dialogue
 67 | 
 68 | 
 69 | def parse_args() -> argparse.Namespace:
 70 |     """Parses command-line arguments.
 71 | 
 72 |     Returns:
 73 |         Parsed command-line arguments.
 74 |     """
 75 |     parser = argparse.ArgumentParser(
 76 |         description=(
 77 |             "Augment the formatted ReDial dataset with additional information."
 78 |         ),
 79 |         prog="augment_redial.py",
 80 |     )
 81 |     parser.add_argument(
 82 |         "--input_path",
 83 |         type=str,
 84 |         default="data/datasets/redial/train_dialogues.json",
 85 |         help="Path to the formatted ReDial dataset.",
 86 |     )
 87 |     parser.add_argument(
 88 |         "--user_nlu_config",
 89 |         type=str,
 90 |         default="config/nlu/user_dialogue_acts_extraction_config_default.yaml",
 91 |         help="NLU configuration file for user dialogue acts extraction.",
 92 |     )
 93 |     parser.add_argument(
 94 |         "--agent_nlu_config",
 95 |         type=str,
 96 |         default="config/nlu/agent_dialogue_acts_extraction_config_default.yaml",
 97 |         help="NLU configuration file for agent dialogue acts extraction.",
 98 |     )
 99 |     parser.add_argument(
100 |         "--ollama_config",
101 |         type=str,
102 |         default="config/llm_interface/config_ollama_information_need.yaml",
103 |         help="Configuration file for Ollama.",
104 |     )
105 |     parser.add_argument(
106 |         "--information_need_prompt",
107 |         type=str,
108 |         default="scripts/datasets/information_need_annotation/information_need_prompt_movies_default.txt",  # noqa: E501
109 |         help="File containing the prompt for information need annotation.",
110 |     )
111 |     parser.add_argument(
112 |         "--output_path",
113 |         type=str,
114 |         default="data/datasets/redial/augmented_train_dialogues.json",
115 |         help="Path to save the augmented dataset.",
116 |     )
117 |     return parser.parse_args()
118 | 
119 | 
120 | if __name__ == "__main__":
121 |     args = parse_args()
122 | 
123 |     dialogues = json_to_dialogues(args.input_path)
124 |     user_nlu = NLU(LMDialogueActsExtractor(args.user_nlu_config))
125 |     agent_nlu = NLU(LMDialogueActsExtractor(args.agent_nlu_config))
126 | 
127 |     information_need_annotator = InformationNeedAnnotator(
128 |         args.ollama_config, args.information_need_prompt
129 |     )
130 | 
131 |     augmented_dialogues = [
132 |         augment_dialogue(
133 |             user_nlu, agent_nlu, information_need_annotator, dialogue
134 |         )
135 |         for dialogue in tqdm(dialogues)
136 |     ]
137 | 
138 |     with open(args.output_path, "w") as output_file:
139 |         json.dump(
140 |             [dialogue.to_dict() for dialogue in augmented_dialogues],
141 |             output_file,
142 |             indent=4,
143 |         )
144 | 


--------------------------------------------------------------------------------
/scripts/evaluation/satisfaction_evaluation.py:
--------------------------------------------------------------------------------
 1 | """Automatic evaluation of dialogues.
 2 | 
 3 | This script evaluates dialogues with regards to user satisfaction. It uses
 4 | DialogueKit's satisfaction classifier, which assigns a score between 1 and 5.
 5 | """
 6 | 
 7 | import argparse
 8 | from collections import defaultdict
 9 | from statistics import mean, stdev
10 | from typing import Dict
11 | 
12 | from dialoguekit.nlu.models.satisfaction_classifier import (
13 |     SatisfactionClassifierSVM,
14 | )
15 | from dialoguekit.utils.dialogue_reader import json_to_dialogues
16 | 
17 | 
18 | def parse_args() -> argparse.Namespace:
19 |     """Parse command-line arguments.
20 | 
21 |     Returns:
22 |         Parsed arguments.
23 |     """
24 |     parser = argparse.ArgumentParser()
25 |     parser.add_argument(
26 |         "--dialogues",
27 |         type=str,
28 |         required=True,
29 |         help="Path to the dialogues.",
30 |     )
31 |     return parser.parse_args()
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     args = parse_args()
36 | 
37 |     # Load dialogues
38 |     dialogues = json_to_dialogues(args.dialogues)
39 |     print(f"Loaded {len(dialogues)} dialogues.")
40 | 
41 |     # Satisfaction classifier
42 |     satisfaction_classifier = SatisfactionClassifierSVM()
43 | 
44 |     # Evaluate dialogues
45 |     scores: Dict[str, Dict[str, float]] = defaultdict(dict)
46 | 
47 |     for i, dialogue in enumerate(dialogues):
48 |         scores[dialogue.agent_id][
49 |             i
50 |         ] = satisfaction_classifier.classify_last_n_dialogue(
51 |             dialogue, last_n=None
52 |         )
53 | 
54 |     # Summary
55 |     for agent, agent_scores in scores.items():
56 |         avg_score = mean(agent_scores.values())
57 |         stdev_score = stdev(agent_scores.values())
58 |         max_score = max(agent_scores.values())
59 |         min_score = min(agent_scores.values())
60 |         print(f"Agent: {agent} / Num. dialogues: {len(agent_scores)}")
61 |         print(f"Min score: {min_score}")
62 |         print(f"Max score: {max_score}")
63 |         print(f"Average score: {avg_score:.3f} (stdev: {stdev_score:.3f})")
64 | 


--------------------------------------------------------------------------------
/scripts/nlg/generative_lm_nlg.py:
--------------------------------------------------------------------------------
  1 | """Script to experiment with LMGenerativeNLG.
  2 | 
  3 | To run this script, you need to install the additional requirements in
  4 | `requirements/experimentation_requirements.txt`.
  5 | """
  6 | 
  7 | import argparse
  8 | from typing import List
  9 | 
 10 | import evaluate
 11 | from tqdm import tqdm
 12 | 
 13 | from dialoguekit.core.annotated_utterance import AnnotatedUtterance
 14 | from dialoguekit.core.dialogue import Dialogue
 15 | from dialoguekit.core.utterance import Utterance
 16 | from dialoguekit.participant.participant import DialogueParticipant
 17 | from dialoguekit.utils.dialogue_reader import json_to_dialogues
 18 | from usersimcrs.nlg.lm.nlg_generative_lm import LMGenerativeNLG
 19 | 
 20 | 
 21 | def parse_args() -> argparse.Namespace:
 22 |     """Parses command-line arguments.
 23 | 
 24 |     Returns:
 25 |         Parsed command-line arguments.
 26 |     """
 27 |     parser = argparse.ArgumentParser(
 28 |         description="Experimentation with generative NLG."
 29 |     )
 30 |     parser.add_argument(
 31 |         "--ollama-config-file",
 32 |         type=str,
 33 |         default="config/llm_interface/config_ollama_default.yaml",
 34 |         help="Ollama configuration file.",
 35 |     )
 36 |     parser.add_argument(
 37 |         "--prompt-file",
 38 |         type=str,
 39 |         default="data/datasets/iard/user_utterance_nlg_prompt.txt",
 40 |         help="Prompt file.",
 41 |     )
 42 |     parser.add_argument(
 43 |         "--prompt-prefix",
 44 |         type=str,
 45 |         default="Generated utterance:",
 46 |         help="Prefix to remove from generated utterances.",
 47 |     )
 48 |     parser.add_argument(
 49 |         "--input-dialogues",
 50 |         type=str,
 51 |         default="data/datasets/iard/formatted_IARD_annotated_gold.json",
 52 |         help="Input dialogues JSON file.",
 53 |     )
 54 |     return parser.parse_args()
 55 | 
 56 | 
 57 | def filter_user_utterances(dialogue: Dialogue) -> List[Utterance]:
 58 |     """Filters dialogue utterances to keep only user utterances.
 59 | 
 60 |     Args:
 61 |         dialogue: Dialogue.
 62 | 
 63 |     Returns:
 64 |         List of user utterances.
 65 |     """
 66 |     return [
 67 |         utterance
 68 |         for utterance in dialogue.utterances
 69 |         if utterance.participant == DialogueParticipant.USER
 70 |     ]
 71 | 
 72 | 
 73 | def compute_sacrebleu_score(
 74 |     gold_utterances: List[Utterance],
 75 |     generated_utterances: List[AnnotatedUtterance],
 76 | ) -> float:
 77 |     """Computes the SacreBLEU score for the generated utterances.
 78 | 
 79 |     Args:
 80 |         gold_utterances: Gold utterances.
 81 |         generated_utterances: Generated utterances.
 82 | 
 83 |     Returns:
 84 |         SacreBLEU score.
 85 |     """
 86 |     metric = evaluate.load("sacrebleu")
 87 |     gold_nl_utterances = [[utterance.text] for utterance in gold_utterances]
 88 |     generated_nl_utterances = [
 89 |         utterance.text for utterance in generated_utterances
 90 |     ]
 91 |     return metric.compute(
 92 |         predictions=generated_nl_utterances, references=gold_nl_utterances
 93 |     )
 94 | 
 95 | 
 96 | def generate_utterances(
 97 |     nlg: LMGenerativeNLG, gold_utterances: List[Utterance]
 98 | ) -> List[AnnotatedUtterance]:
 99 |     """Generates utterances using the NLG.
100 | 
101 |     Args:
102 |         nlg: NLG.
103 |         gold_utterances: Gold utterances.
104 | 
105 |     Returns:
106 |         List of generated utterances.
107 |     """
108 |     generated_utterances = []
109 |     for utterance in tqdm(gold_utterances):
110 |         generated_utterances.append(
111 |             nlg.generate_utterance_text(
112 |                 utterance.dialogue_acts,
113 |                 utterance.annotations,
114 |             )
115 |         )
116 |     return generated_utterances
117 | 
118 | 
119 | if __name__ == "__main__":
120 |     args = parse_args()
121 | 
122 |     dialogues = json_to_dialogues(args.input_dialogues)
123 |     user_utterances = []
124 |     for dialogue in dialogues:
125 |         user_utterances.extend(filter_user_utterances(dialogue))
126 | 
127 |     nlg = LMGenerativeNLG(
128 |         args.ollama_config_file,
129 |         args.prompt_file,
130 |         args.prompt_prefix,
131 |     )
132 | 
133 |     generated_utterances = generate_utterances(nlg, user_utterances)
134 | 
135 |     sacrebleu_score = compute_sacrebleu_score(
136 |         user_utterances, generated_utterances
137 |     )
138 |     print(f"SacreBLEU score:\n{sacrebleu_score}")
139 | 


--------------------------------------------------------------------------------
/scripts/nlu/dialogue_acts_extraction.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from typing import Dict, List
  3 | 
  4 | import pandas as pd
  5 | from tqdm import tqdm
  6 | 
  7 | from dialoguekit.participant.participant import DialogueParticipant
  8 | from dialoguekit.utils.dialogue_reader import json_to_dialogues
  9 | from scripts.nlu.metrics import (
 10 |     dialogue_acts_f1_score,
 11 |     dialogue_acts_precision,
 12 |     dialogue_acts_recall,
 13 |     intent_error_rate,
 14 |     slot_error_rate,
 15 | )
 16 | from usersimcrs.nlu.lm.lm_dialogue_act_extractor import LMDialogueActsExtractor
 17 | 
 18 | 
 19 | def parse_args() -> argparse.Namespace:
 20 |     """Parses command-line arguments.
 21 | 
 22 |     Returns:
 23 |         Parsed command-line arguments.
 24 |     """
 25 |     parser = argparse.ArgumentParser(
 26 |         description="Experimentation with dialogue acts extraction."
 27 |     )
 28 |     parser.add_argument(
 29 |         "-m",
 30 |         "--model",
 31 |         type=str,
 32 |         default="mistral-nemo:latest",
 33 |         help="Model to evaluate.",
 34 |     )
 35 |     parser.add_argument(
 36 |         "--user-extractor-config",
 37 |         type=str,
 38 |         default="config/nlu/user_dialogue_acts_extraction_config_default.yaml",
 39 |         help="User dialogue acts extractor configuration file.",
 40 |     )
 41 |     parser.add_argument(
 42 |         "--agent-extractor-config",
 43 |         type=str,
 44 |         default="config/nlu/agent_dialogue_acts_extraction_config_default.yaml",
 45 |         help="Agent dialogue acts extractor configuration file.",
 46 |     )
 47 |     parser.add_argument(
 48 |         "--annotated-dialogues",
 49 |         type=str,
 50 |         default="data/datasets/iard/formatted_IARD_annotated_gold.json",
 51 |         help="Annotated dialogues JSON file.",
 52 |     )
 53 |     return parser.parse_args()
 54 | 
 55 | 
 56 | if __name__ == "__main__":
 57 |     args = parse_args()
 58 | 
 59 |     user_dialogue_acts_extractor = LMDialogueActsExtractor(
 60 |         args.user_extractor_config
 61 |     )
 62 |     agent_dialogue_acts_extractor = LMDialogueActsExtractor(
 63 |         args.agent_extractor_config
 64 |     )
 65 | 
 66 |     annotated_dialogues = json_to_dialogues(args.annotated_dialogues)
 67 | 
 68 |     print(
 69 |         f"Testing dialogue acts extraction with {args.model} model on "
 70 |         f"{args.annotated_dialogues}"
 71 |     )
 72 | 
 73 |     scores: Dict[str, Dict[str, List[float]]] = dict.fromkeys(
 74 |         ["Global", "User", "Agent"],
 75 |         {
 76 |             "ER_slot": [],
 77 |             "ER_intent": [],
 78 |             "Recall_DA": [],
 79 |             "Prec_DA": [],
 80 |             "F1_DA": [],
 81 |         },
 82 |     )
 83 | 
 84 |     for dialogue in tqdm(annotated_dialogues):
 85 |         for utterance in dialogue.utterances:
 86 |             if utterance.participant == DialogueParticipant.USER:
 87 |                 extracted_dialogue_acts = (
 88 |                     user_dialogue_acts_extractor.extract_dialogue_acts(
 89 |                         utterance
 90 |                     )
 91 |                 )
 92 |             else:
 93 |                 extracted_dialogue_acts = (
 94 |                     agent_dialogue_acts_extractor.extract_dialogue_acts(
 95 |                         utterance
 96 |                     )
 97 |                 )
 98 | 
 99 |             slot_error_rate_score = slot_error_rate(
100 |                 extracted_dialogue_acts, utterance.dialogue_acts
101 |             )
102 |             intent_error_rate_score = intent_error_rate(
103 |                 extracted_dialogue_acts, utterance.dialogue_acts
104 |             )
105 |             dialogue_acts_recall_score = dialogue_acts_recall(
106 |                 extracted_dialogue_acts, utterance.dialogue_acts
107 |             )
108 |             dialogue_acts_precision_score = dialogue_acts_precision(
109 |                 extracted_dialogue_acts, utterance.dialogue_acts
110 |             )
111 |             dialogue_acts_f1_score_score = dialogue_acts_f1_score(
112 |                 extracted_dialogue_acts, utterance.dialogue_acts
113 |             )
114 | 
115 |             scores["Global"]["ER_slot"].append(slot_error_rate_score)
116 |             scores["Global"]["ER_intent"].append(intent_error_rate_score)
117 |             scores["Global"]["Recall_DA"].append(dialogue_acts_recall_score)
118 |             scores["Global"]["Prec_DA"].append(dialogue_acts_precision_score)
119 |             scores["Global"]["F1_DA"].append(dialogue_acts_f1_score_score)
120 | 
121 |             if utterance.participant == DialogueParticipant.USER:
122 |                 scores["User"]["ER_slot"].append(slot_error_rate_score)
123 |                 scores["User"]["ER_intent"].append(intent_error_rate_score)
124 |                 scores["User"]["Recall_DA"].append(dialogue_acts_recall_score)
125 |                 scores["User"]["Prec_DA"].append(dialogue_acts_precision_score)
126 |                 scores["User"]["F1_DA"].append(dialogue_acts_f1_score_score)
127 |             else:
128 |                 scores["Agent"]["ER_slot"].append(slot_error_rate_score)
129 |                 scores["Agent"]["ER_intent"].append(intent_error_rate_score)
130 |                 scores["Agent"]["Recall_DA"].append(dialogue_acts_recall_score)
131 |                 scores["Agent"]["Prec_DA"].append(dialogue_acts_precision_score)
132 |                 scores["Agent"]["F1_DA"].append(dialogue_acts_f1_score_score)
133 | 
134 |     evaluation_results = pd.DataFrame(
135 |         {k: {m: sum(v) / len(v) for m, v in scores[k].items()} for k in scores}
136 |     )
137 | 
138 |     print("\nEvaluation results:")
139 |     print(evaluation_results.round(3))
140 | 


--------------------------------------------------------------------------------
/scripts/nlu/metrics.py:
--------------------------------------------------------------------------------
  1 | """Define the evaluation metric for the dialogue act extraction.
  2 | 
  3 | We define 5 metrics:
  4 | 1. Slot Error Rate (ER_slot)
  5 | 2. Intent Error Rate (ER_intent)
  6 | 3. Dialogue Acts Recall (Recall_DA)
  7 | 4. Dialogue Acts Precision (Prec_DA)
  8 | 5. Dialogue Acts F1 Score (F1_DA)
  9 | """
 10 | 
 11 | from typing import List, Tuple
 12 | 
 13 | from dialoguekit.core.dialogue_act import DialogueAct
 14 | 
 15 | 
 16 | def _get_slot_value_pairs(
 17 |     dialogue_acts: List[DialogueAct],
 18 | ) -> List[Tuple[str, str]]:
 19 |     """Gets the slot-value pairs from given dialogue acts."""
 20 |     return [
 21 |         (annotation.slot, annotation.value)
 22 |         for da in dialogue_acts
 23 |         for annotation in da.annotations
 24 |     ]
 25 | 
 26 | 
 27 | def _get_intents(dialogue_acts: List[DialogueAct]) -> List[str]:
 28 |     """Gets the intents from given dialogue acts."""
 29 |     return [da.intent.label for da in dialogue_acts]
 30 | 
 31 | 
 32 | def slot_error_rate(
 33 |     predicted_dialogue_acts: List[DialogueAct],
 34 |     target_dialogue_acts: List[DialogueAct],
 35 | ) -> float:
 36 |     """Calculates the Slot Error Rate (ER_slot).
 37 | 
 38 |     Args:
 39 |         predicted_dialogue_acts: Predicted dialogue acts.
 40 |         target_dialogue_acts: Target dialogue acts.
 41 | 
 42 |     Returns:
 43 |         Slot Error Rate.
 44 |     """
 45 |     predicted_slot_value_pairs = _get_slot_value_pairs(predicted_dialogue_acts)
 46 |     target_slot_value_pairs = _get_slot_value_pairs(target_dialogue_acts)
 47 | 
 48 |     num_correct_slot_value_pairs = len(
 49 |         set(predicted_slot_value_pairs) & set(target_slot_value_pairs)
 50 |     )
 51 |     num_total_slot_value_pairs = len(target_slot_value_pairs)
 52 |     if num_total_slot_value_pairs == 0:
 53 |         return 0.0
 54 |     return 1 - num_correct_slot_value_pairs / num_total_slot_value_pairs
 55 | 
 56 | 
 57 | def intent_error_rate(
 58 |     predicted_dialogue_acts: List[DialogueAct],
 59 |     target_dialogue_acts: List[DialogueAct],
 60 | ) -> float:
 61 |     """Calculates the Intent Error Rate (ER_intent).
 62 | 
 63 |     Args:
 64 |         predicted_dialogue_acts: Predicted dialogue acts.
 65 |         target_dialogue_acts: Target dialogue acts.
 66 | 
 67 |     Returns:
 68 |         Intent Error Rate.
 69 |     """
 70 |     predicted_intents = _get_intents(predicted_dialogue_acts)
 71 |     target_intents = _get_intents(target_dialogue_acts)
 72 | 
 73 |     num_correct_intents = len(set(predicted_intents) & set(target_intents))
 74 |     num_total_intents = len(target_intents)
 75 |     if num_total_intents == 0:
 76 |         return 0.0
 77 |     return 1 - num_correct_intents / num_total_intents
 78 | 
 79 | 
 80 | def dialogue_acts_recall(
 81 |     predicted_dialogue_acts: List[DialogueAct],
 82 |     target_dialogue_acts: List[DialogueAct],
 83 | ) -> float:
 84 |     """Calculates the Dialogue Acts Recall (Recall_DA).
 85 | 
 86 |     Args:
 87 |         predicted_dialogue_acts: Predicted dialogue acts.
 88 |         target_dialogue_acts: Target dialogue acts.
 89 | 
 90 |     Returns:
 91 |         Dialogue Acts Recall.
 92 |     """
 93 |     num_correct_dialogue_acts = len(
 94 |         set(predicted_dialogue_acts) & set(target_dialogue_acts)
 95 |     )
 96 |     num_total_dialogue_acts = len(target_dialogue_acts)
 97 |     if num_total_dialogue_acts == 0:
 98 |         return 0.0
 99 |     return num_correct_dialogue_acts / num_total_dialogue_acts
100 | 
101 | 
102 | def dialogue_acts_precision(
103 |     predicted_dialogue_acts: List[DialogueAct],
104 |     target_dialogue_acts: List[DialogueAct],
105 | ) -> float:
106 |     """Calculates the Dialogue Acts Precision (Prec_DA).
107 | 
108 |     Args:
109 |         predicted_dialogue_acts: Predicted dialogue acts.
110 |         target_dialogue_acts: Target dialogue acts.
111 | 
112 |     Returns:
113 |         Dialogue Acts Precision.
114 |     """
115 |     num_correct_dialogue_acts = len(
116 |         set(predicted_dialogue_acts) & set(target_dialogue_acts)
117 |     )
118 |     num_total_dialogue_acts = len(predicted_dialogue_acts)
119 |     if num_total_dialogue_acts == 0:
120 |         return 0.0
121 |     return num_correct_dialogue_acts / num_total_dialogue_acts
122 | 
123 | 
124 | def dialogue_acts_f1_score(
125 |     predicted_dialogue_acts: List[DialogueAct],
126 |     target_dialogue_acts: List[DialogueAct],
127 | ) -> float:
128 |     """Calculates the Dialogue Acts F1 Score (F1_DA).
129 | 
130 |     Args:
131 |         predicted_dialogue_acts: Predicted dialogue acts.
132 |         target_dialogue_acts: Target dialogue acts.
133 | 
134 |     Returns:
135 |         Dialogue Acts F1 Score.
136 |     """
137 |     recall = dialogue_acts_recall(predicted_dialogue_acts, target_dialogue_acts)
138 |     precision = dialogue_acts_precision(
139 |         predicted_dialogue_acts, target_dialogue_acts
140 |     )
141 |     if precision + recall == 0.0:
142 |         return 0.0
143 |     return 2 * (precision * recall) / (precision + recall)
144 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | 
3 | sys.path.append(".")
4 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | """Fixtures for the tests."""
 2 | 
 3 | import os
 4 | 
 5 | import pytest
 6 | 
 7 | from usersimcrs.core.information_need import InformationNeed
 8 | from usersimcrs.core.simulation_domain import SimulationDomain
 9 | from usersimcrs.items.item import Item
10 | from usersimcrs.items.item_collection import ItemCollection
11 | from usersimcrs.simulator.neural.tus.tus_feature_handler import (
12 |     TUSFeatureHandler,
13 | )
14 | 
15 | DOMAIN_YAML_FILE = "tests/data/domains/movies.yaml"
16 | ITEMS_CSV_FILE = "tests/data/items/movies_w_keywords.csv"
17 | 
18 | 
19 | @pytest.fixture(scope="session")
20 | def domain() -> SimulationDomain:
21 |     """Domain fixture."""
22 |     return SimulationDomain(DOMAIN_YAML_FILE)
23 | 
24 | 
25 | @pytest.fixture(scope="module")
26 | def information_need() -> InformationNeed:
27 |     """Information need fixture."""
28 |     constraints = {"GENRE": "Comedy", "DIRECTOR": "Steven Spielberg"}
29 |     requests = ["PLOT", "RATING"]
30 |     target_items = [
31 |         Item(
32 |             "1",
33 |             {
34 |                 "GENRE": "Comedy",
35 |                 "DIRECTOR": "Steven Spielberg",
36 |                 "RATING": 4.5,
37 |                 "PLOT": "A movie plot",
38 |             },
39 |         )
40 |     ]
41 |     return InformationNeed(target_items, constraints, requests)
42 | 
43 | 
44 | @pytest.fixture(scope="session")
45 | def item_collection(domain: SimulationDomain):
46 |     """Item collection fixture."""
47 |     item_collection = ItemCollection("tests/data/items.db", "test_collection")
48 |     mapping = {
49 |         "title": {"slot": "TITLE"},
50 |         "genres": {
51 |             "slot": "GENRE",
52 |             "multi-valued": True,
53 |             "delimiter": "|",
54 |         },
55 |         "keywords": {
56 |             "slot": "KEYWORD",
57 |             "multi-valued": True,
58 |             "delimiter": "|",
59 |         },
60 |     }
61 |     item_collection.load_items_csv(
62 |         ITEMS_CSV_FILE,
63 |         id_col="movieId",
64 |         domain=domain,
65 |         domain_mapping=mapping,
66 |     )
67 |     yield item_collection
68 |     os.remove("tests/data/items.db")
69 | 
70 | 
71 | @pytest.fixture(scope="module")
72 | def feature_handler(domain: SimulationDomain) -> TUSFeatureHandler:
73 |     """Returns the feature handler."""
74 |     tus_feature_handler = TUSFeatureHandler(
75 |         domain=domain,
76 |         max_turn_feature_length=40,
77 |         context_depth=2,
78 |         user_actions=["inform", "request"],
79 |         agent_actions=["elicit", "recommend", "bye"],
80 |     )
81 | 
82 |     assert tus_feature_handler._user_actions == ["inform", "request"]
83 |     assert tus_feature_handler._agent_actions == [
84 |         "elicit",
85 |         "recommend",
86 |         "bye",
87 |     ]
88 | 
89 |     return tus_feature_handler
90 | 


--------------------------------------------------------------------------------
/tests/core/test_information_need.py:
--------------------------------------------------------------------------------
  1 | """Tests for the InformationNeed class."""
  2 | 
  3 | import pytest
  4 | 
  5 | from usersimcrs.core.information_need import (
  6 |     InformationNeed,
  7 |     generate_random_information_need,
  8 | )
  9 | from usersimcrs.core.simulation_domain import SimulationDomain
 10 | from usersimcrs.items.item import Item
 11 | from usersimcrs.items.item_collection import ItemCollection
 12 | 
 13 | 
 14 | def test_generate_random_information_need(
 15 |     domain: SimulationDomain, item_collection: ItemCollection
 16 | ) -> None:
 17 |     """Test generate_information_need.
 18 | 
 19 |     Args:
 20 |         domain: Simulation domain.
 21 |         item_collection: Item collection.
 22 |     """
 23 |     information_need = generate_random_information_need(domain, item_collection)
 24 |     assert all(information_need.constraints.values())
 25 |     assert all(
 26 |         [
 27 |             slot in domain.get_requestable_slots()
 28 |             for slot in information_need.requested_slots
 29 |         ]
 30 |     )
 31 |     assert len(information_need.target_items) == 1
 32 | 
 33 | 
 34 | @pytest.mark.parametrize(
 35 |     "slot,expected_value",
 36 |     [
 37 |         ("GENRE", "Comedy"),
 38 |         ("DIRECTOR", "Steven Spielberg"),
 39 |         ("KEYWORDS", None),
 40 |     ],
 41 | )
 42 | def test_get_constraint_value(
 43 |     information_need: InformationNeed, slot: str, expected_value: str
 44 | ) -> None:
 45 |     """Test get_constraint_value.
 46 | 
 47 |     Args:
 48 |         information_need: Information need.
 49 |         slot: Slot.
 50 |         expected_value: Expected value.
 51 |     """
 52 |     assert information_need.get_constraint_value(slot) == expected_value
 53 | 
 54 | 
 55 | def test_get_requestable_slots(information_need: InformationNeed) -> None:
 56 |     """Test get_requestable_slots.
 57 | 
 58 |     Args:
 59 |         information_need: Information need.
 60 |     """
 61 |     assert information_need.get_requestable_slots() == ["PLOT", "RATING"]
 62 |     information_need.requested_slots["RATING"] = 4.5
 63 |     assert information_need.get_requestable_slots() == ["PLOT"]
 64 | 
 65 | 
 66 | def test_to_dict(information_need: InformationNeed) -> None:
 67 |     """Test to_dict.
 68 | 
 69 |     Args:
 70 |         information_need: Information need.
 71 |     """
 72 |     assert information_need.to_dict() == {
 73 |         "target_items": [
 74 |             {
 75 |                 "item_id": "1",
 76 |                 "properties": {
 77 |                     "GENRE": "Comedy",
 78 |                     "DIRECTOR": "Steven Spielberg",
 79 |                     "RATING": 4.5,
 80 |                     "PLOT": "A movie plot",
 81 |                 },
 82 |             }
 83 |         ],
 84 |         "constraints": {"GENRE": "Comedy", "DIRECTOR": "Steven Spielberg"},
 85 |         "requests": ["PLOT", "RATING"],
 86 |     }
 87 | 
 88 | 
 89 | def test_from_dict() -> None:
 90 |     """Tests from_dict."""
 91 |     data = {
 92 |         "target_items": [
 93 |             {
 94 |                 "item_id": "1",
 95 |                 "properties": {
 96 |                     "GENRE": "Comedy",
 97 |                     "DIRECTOR": "Steven Spielberg",
 98 |                     "PLOT": "A movie plot",
 99 |                 },
100 |             },
101 |             {
102 |                 "item_id": "2",
103 |                 "properties": {
104 |                     "GENRE": "Drama",
105 |                     "ACTOR": "Steven Spielberg",
106 |                     "RATING": 4.5,
107 |                 },
108 |             },
109 |         ],
110 |         "constraints": {"GENRE": "Comedy", "DIRECTOR": "Steven Spielberg"},
111 |         "requests": ["PLOT", "RATING"],
112 |     }
113 |     expected_information_need = InformationNeed(
114 |         [
115 |             Item(
116 |                 "1",
117 |                 {
118 |                     "GENRE": "Comedy",
119 |                     "DIRECTOR": "Steven Spielberg",
120 |                     "PLOT": "A movie plot",
121 |                 },
122 |             ),
123 |             Item(
124 |                 "2",
125 |                 {"GENRE": "Drama", "ACTOR": "Steven Spielberg", "RATING": 4.5},
126 |             ),
127 |         ],
128 |         {"GENRE": "Comedy", "DIRECTOR": "Steven Spielberg"},
129 |         ["PLOT", "RATING"],
130 |     )
131 |     loaded_information_need = InformationNeed.from_dict(data)
132 |     assert [target.id for target in loaded_information_need.target_items] == [
133 |         target.id for target in expected_information_need.target_items
134 |     ]
135 |     assert (
136 |         loaded_information_need.constraints
137 |         == expected_information_need.constraints
138 |     )
139 |     assert (
140 |         loaded_information_need.requested_slots
141 |         == expected_information_need.requested_slots
142 |     )
143 | 


--------------------------------------------------------------------------------
/tests/data/domains/movies.yaml:
--------------------------------------------------------------------------------
 1 | name: test_movies
 2 | slot_names:
 3 |   TITLE:
 4 |   GENRE:
 5 |   ACTOR:
 6 |   KEYWORD:
 7 |   DIRECTOR:
 8 |   PLOT:
 9 |   RATING:
10 |   RELEASE_DATE:
11 | 
12 | requestable_slots:
13 |   - PLOT
14 |   - RATING
15 |   - RELEASE_DATE
16 |   - TITLE
17 |   - GENRE
18 |   - ACTOR
19 |   - KEYWORD
20 |   - DIRECTOR
21 | 
22 | informable_slots:
23 |   - GENRE
24 |   - ACTOR
25 |   - KEYWORD
26 |   - DIRECTOR
27 | 


--------------------------------------------------------------------------------
/tests/data/interaction_models/crs_v1.yaml:
--------------------------------------------------------------------------------
  1 | name: intent_schema
  2 | description: Intent schema for CIR by Afzali, Drzewiecki, and Balog
  3 | 
  4 | # Minimum intents required
  5 | required_intents:
  6 |   INTENT_START: DISCLOSE.NON-DISCLOSE
  7 |   INTENT_STOP: STOP
  8 |   INTENT_ITEM_CONSUMED: NOTE.YES
  9 |   INTENT_LIKE: NOTE.LIKE
 10 |   INTENT_DISLIKE: NOTE.DISLIKE
 11 |   INTENT_NEUTRAL: NOTE.NEUTRAL
 12 |   INTENT_DISCLOSE: DISCLOSE
 13 |   INTENT_INQUIRE: INQUIRE
 14 |   INTENT_DONT_KNOW: DONT-KNOW
 15 |   INTENT_YES: YES
 16 |   INTENT_NO: NO
 17 | 
 18 | # Possible user intents with optional description.
 19 | user_intents:
 20 |   COMPLETE:
 21 |     expected_agent_intents:
 22 |       - END
 23 |   DISCLOSE.NON-DISCLOSE:
 24 |     expected_agent_intents:
 25 |       - INQUIRE
 26 |       - INQUIRE.ELICIT
 27 |       - DISCLOSE.NON-DISCLOSE
 28 |   DISCLOSE:
 29 |     expected_agent_intents:
 30 |       - INQUIRE.ELICIT
 31 |       - REVEAL
 32 |       - REVEAL.NONE
 33 |   REVEAL:
 34 |     expected_agent_intents:
 35 |   REVEAL.EXPAND:
 36 |     expected_agent_intents:
 37 |       - INQUIRE.ELICIT
 38 |       - REVEAL
 39 |       - REVEAL.NONE
 40 |   REVEAL.REFINE:
 41 |     expected_agent_intents:
 42 |       - INQUIRE.ELICIT
 43 |       - REVEAL
 44 |       - REVEAL.NONE
 45 |   REVEAL.REVISE:
 46 |     expected_agent_intents:
 47 |       - INQUIRE.ELICIT
 48 |       - REVEAL
 49 |       - REVEAL.NONE
 50 |     remove_user_preference: true
 51 |   INQUIRE:
 52 |     expected_agent_intents:
 53 |       - INQUIRE.ELICIT
 54 |       - REVEAL
 55 |       - REVEAL.SIMILAR
 56 |       - REVEAL.NONE
 57 |   INQUIRE.SIMILAR:
 58 |     expected_agent_intents:
 59 |       - REVEAL
 60 |       - REVEAL.SIMILAR
 61 |       - REVEAL.NONE
 62 |   INQUIRE.ITEMINFO:
 63 |     expected_agent_intents:
 64 |       - INQUIRE.MORE
 65 |   INQUIRE.MORE:
 66 |     expected_agent_intents:
 67 |       - DISCLOSE.MORE
 68 |   NOTE:
 69 |     expected_agent_intents:
 70 |       - INQUIRE.NEXT
 71 |       - INQUIRE.MORE
 72 |       - END
 73 |       - REVEAL
 74 |       - REVEAL.SIMILAR
 75 |   NOTE.DISLIKE:
 76 |     expected_agent_intents:
 77 |     preference_contingent: NEGATIVE
 78 |   NOTE.LIKE:
 79 |     expected_agent_intents:
 80 |       - INQUIRE.NEXT
 81 |       - REVEAL
 82 |       - REVEAL.SIMILAR
 83 |     preference_contingent: POSITIVE
 84 |   NOTE.NO:
 85 |     expected_agent_intents:
 86 |       - REVEAL
 87 |       - INQUIRE.NEXT
 88 |     preference_contingent: NOT_CONSUMED
 89 |   NOTE.YES:
 90 |     expected_agent_intents:
 91 |       - INQUIRE.ELICIT
 92 |       - REVEAL
 93 |       - REVEAL.SIMILAR
 94 |     preference_contingent: CONSUMED
 95 |   NOTE.ACCEPT:
 96 |     expected_agent_intents:
 97 |       - INQUIRE.NEXT
 98 | 
 99 | # List of agent intents (including sub-intents) that elicit preferences.
100 | agent_elicit_intents:
101 |   - INQUIRE
102 |   - INQUIRE.ELICIT
103 | 
104 | # List of agent intents (including sub-intents) that are for set retrieval.
105 | agent_set_retrieval:
106 |   - REVEAL
107 |   - REVEAL.SIMILAR
108 |   - REVEAL.NONE
109 | 
110 | # List of agent intents (including sub-intents) that are for inquiries.
111 | agent_inquire_intents:
112 |   - INQUIRE
113 | 
114 | # Reward settings
115 | REWARD:
116 |   full_set_points: 20
117 |   intents:
118 |     INQUIRE: 4
119 |   repeat_penalty: 1
120 |   cost: 1


--------------------------------------------------------------------------------
/tests/dialogue_management/test_dialogue_state_tracker.py:
--------------------------------------------------------------------------------
 1 | """Tests for the dialogue state tracker module."""
 2 | 
 3 | import pytest
 4 | from dialoguekit.core.dialogue_act import DialogueAct
 5 | from dialoguekit.core.intent import Intent
 6 | from dialoguekit.core.slot_value_annotation import SlotValueAnnotation
 7 | from dialoguekit.participant import DialogueParticipant
 8 | 
 9 | from usersimcrs.dialogue_management.dialogue_state_tracker import (
10 |     DialogueStateTracker,
11 | )
12 | 
13 | 
14 | @pytest.fixture(scope="module")
15 | def dialogue_state_tracker() -> DialogueStateTracker:
16 |     """Fixture for the dialogue state tracker."""
17 |     dst = DialogueStateTracker()
18 | 
19 |     initial_state = dst.get_current_state()
20 |     assert initial_state.utterance_count == 0
21 |     assert initial_state.agent_dialogue_acts == []
22 |     assert initial_state.user_dialogue_acts == []
23 |     assert initial_state.belief_state == {}
24 |     return dst
25 | 
26 | 
27 | def test_update_state_agent(
28 |     dialogue_state_tracker: DialogueStateTracker,
29 | ) -> None:
30 |     """Tests dialogue state update with agent dialogue acts."""
31 |     dialogue_acts = [
32 |         DialogueAct(Intent("greet")),
33 |         DialogueAct(
34 |             Intent("elicit"), annotations=[SlotValueAnnotation("GENRE")]
35 |         ),
36 |     ]
37 | 
38 |     dialogue_state_tracker.update_state(
39 |         dialogue_acts, DialogueParticipant.AGENT
40 |     )
41 | 
42 |     current_state = dialogue_state_tracker.get_current_state()
43 |     assert current_state.utterance_count == 1
44 |     assert current_state.agent_dialogue_acts == [dialogue_acts]
45 |     assert current_state.user_dialogue_acts == []
46 |     print(current_state.belief_state)
47 |     assert current_state.belief_state == {"GENRE": []}
48 | 
49 | 
50 | def test_update_state_user(
51 |     dialogue_state_tracker: DialogueStateTracker,
52 | ) -> None:
53 |     """Tests dialogue state update with user dialogue acts."""
54 |     dialogue_acts = [
55 |         DialogueAct(
56 |             Intent("inform"),
57 |             annotations=[SlotValueAnnotation("GENRE", "comedy")],
58 |         ),
59 |         DialogueAct(
60 |             Intent("request"), annotations=[SlotValueAnnotation("YEAR")]
61 |         ),
62 |     ]
63 | 
64 |     dialogue_state_tracker.update_state(dialogue_acts, DialogueParticipant.USER)
65 | 
66 |     current_state = dialogue_state_tracker.get_current_state()
67 |     assert current_state.utterance_count == 2
68 |     assert len(current_state.agent_dialogue_acts) == 1
69 |     assert current_state.user_dialogue_acts == [dialogue_acts]
70 |     assert current_state.belief_state == {"GENRE": ["comedy"], "YEAR": []}
71 | 


--------------------------------------------------------------------------------
/tests/items/test_item_collection.py:
--------------------------------------------------------------------------------
 1 | """Tests for ItemCollection."""
 2 | 
 3 | from typing import Any, Dict, List
 4 | 
 5 | import pytest
 6 | from dialoguekit.core.slot_value_annotation import SlotValueAnnotation
 7 | 
 8 | from usersimcrs.items.item_collection import ItemCollection
 9 | 
10 | 
11 | @pytest.fixture
12 | def movie() -> Dict[str, Any]:
13 |     """Movie fixture representing the first item of the collection."""
14 |     return {
15 |         "ID": "1",
16 |         "TITLE": "Toy Story (1995)",
17 |         "GENRE": ["Adventure", "Animation", "Children", "Comedy", "Fantasy"],
18 |         "KEYWORD": [
19 |             "animation",
20 |             "kids and family",
21 |             "pixar animation",
22 |             "computer animation",
23 |             "toys",
24 |         ],
25 |         "YEAR": "1995",
26 |     }
27 | 
28 | 
29 | def test_load_items_csv(
30 |     item_collection: ItemCollection, movie: Dict[str, Any]
31 | ) -> None:
32 |     """Tests items loading with a domain and mapping."""
33 | 
34 |     item = item_collection.get_item(movie["ID"])
35 | 
36 |     for property in ["TITLE", "GENRE"]:
37 |         assert item.get_property(property) == movie[property]
38 |     assert item.get_property("YEAR") is None
39 | 
40 | 
41 | def test_get_possible_property_values(
42 |     item_collection: ItemCollection,
43 | ) -> None:
44 |     """Tests using slot with different types (str, list) and unknown slot."""
45 | 
46 |     genres = item_collection.get_possible_property_values("GENRE")
47 |     assert len(genres) == 20
48 |     assert {
49 |         "Adventure",
50 |         "Animation",
51 |         "Children",
52 |         "Comedy",
53 |         "Fantasy",
54 |     }.issubset(genres)
55 |     assert not {"Biography", "Short Film"}.issubset(genres)
56 | 
57 |     titles = item_collection.get_possible_property_values("TITLE")
58 |     assert len(titles) == 13813
59 |     assert "Toy Story (1995)" in titles
60 |     assert "Toy Story 4" not in titles
61 | 
62 |     unknown_property = item_collection.get_possible_property_values("UNKNOWN")
63 |     assert len(unknown_property) == 0
64 | 
65 | 
66 | @pytest.mark.parametrize(
67 |     "annotations, expected_num_matching_items",
68 |     [
69 |         ([], 0),
70 |         ([SlotValueAnnotation("GENRE", "Adventure")], 1507),
71 |         (
72 |             [
73 |                 SlotValueAnnotation("GENRE", "Adventure"),
74 |                 SlotValueAnnotation("GENRE", "Comedy"),
75 |             ],
76 |             464,
77 |         ),
78 |     ],
79 | )
80 | def test_get_items_by_properties(
81 |     item_collection: ItemCollection,
82 |     annotations: List[SlotValueAnnotation],
83 |     expected_num_matching_items: int,
84 | ) -> None:
85 |     """Tests getting items by properties."""
86 |     matching_items = item_collection.get_items_by_properties(annotations)
87 |     assert len(matching_items) == expected_num_matching_items
88 | 


--------------------------------------------------------------------------------
/tests/items/test_ratings.py:
--------------------------------------------------------------------------------
  1 | """Tests for Ratings."""
  2 | 
  3 | from typing import Dict, List
  4 | 
  5 | import pytest
  6 | 
  7 | from usersimcrs.items.item_collection import ItemCollection
  8 | from usersimcrs.items.ratings import Ratings, user_item_sampler
  9 | 
 10 | DOMAIN_YAML_FILE = "tests/data/domains/movies.yaml"
 11 | DOMAIN_MAPPING = {
 12 |     "title": {"slot": "TITLE"},
 13 |     "genres": {
 14 |         "slot": "GENRE",
 15 |         "multi-valued": True,
 16 |         "delimiter": "|",
 17 |     },
 18 | }
 19 | ITEMS_CSV_FILE = "tests/data/items/movies.csv"
 20 | RATINGS_CSV_FILE = "tests/data/items/ratings.csv"
 21 | 
 22 | 
 23 | def simple_user_item_sampler(
 24 |     item_ratings: Dict[str, float],
 25 |     historical_ratio: float,
 26 | ) -> List[str]:
 27 |     """Samples the first portion of items rated by a given user.
 28 | 
 29 |     Args:
 30 |         item_ratings: Item ratings to sample.
 31 |         historical_ratio: Ratio of items ratings to be used as historical
 32 |           data.
 33 | 
 34 |     Returns:
 35 |         List of sampled item ids.
 36 |     """
 37 |     # Determine the number of items to use as historical data for a given user.
 38 |     nb_historical_items = int(historical_ratio * len(item_ratings))
 39 |     return list(item_ratings.keys())[:nb_historical_items]
 40 | 
 41 | 
 42 | @pytest.fixture
 43 | def ratings(item_collection: ItemCollection) -> Ratings:
 44 |     """Ratings fixture."""
 45 |     ratings = Ratings(item_collection)
 46 |     ratings.load_ratings_csv(RATINGS_CSV_FILE)
 47 |     return ratings
 48 | 
 49 | 
 50 | @pytest.mark.parametrize(
 51 |     "historical_ratio",
 52 |     [0.5, 0.8, 0.2],
 53 | )
 54 | def test_user_item_sampler_ratio(
 55 |     historical_ratio: float, ratings: Ratings
 56 | ) -> None:
 57 |     user_id = "1"
 58 |     user_ratings = ratings.get_user_ratings(user_id)
 59 | 
 60 |     item_sample = user_item_sampler(
 61 |         user_ratings,
 62 |         historical_ratio,
 63 |     )
 64 | 
 65 |     assert len(item_sample) == int(len(user_ratings) * historical_ratio)
 66 | 
 67 | 
 68 | @pytest.mark.parametrize(
 69 |     "historical_ratio, user_id,historical_item_id, ground_truth_item_id",
 70 |     [
 71 |         (0.5, "1", "29", "367"),
 72 |         (0.8, "1", "32", "919"),
 73 |         (0.8, "23", "293", "838"),
 74 |         (0.2, "5", "60", "377"),
 75 |     ],
 76 | )
 77 | def test_create_split(
 78 |     historical_ratio: float,
 79 |     user_id: str,
 80 |     historical_item_id: str,
 81 |     ground_truth_item_id: str,
 82 |     ratings: Ratings,
 83 | ) -> None:
 84 |     """Tests create_split method with a simple sampler.
 85 | 
 86 |     Args:
 87 |         historical_ratio: Ratio of historical items.
 88 |         user_id: User id.
 89 |         historical_item_id: Id of a historical item for user_id.
 90 |         ground_truth_item_id: Id of an unseen item ofr user_id.
 91 |     """
 92 |     historical_ratings, ground_truth_ratings = ratings.create_split(
 93 |         historical_ratio, simple_user_item_sampler
 94 |     )
 95 | 
 96 |     assert set(historical_ratings.get_user_ratings(user_id).keys()).isdisjoint(
 97 |         set(ground_truth_ratings.get_user_ratings(user_id))
 98 |     )
 99 | 
100 |     assert historical_ratings.get_user_item_rating(user_id, historical_item_id)
101 |     assert ground_truth_ratings.get_user_item_rating(
102 |         user_id, ground_truth_item_id
103 |     )
104 | 
105 | 
106 | def test_create_split_ratio_error(ratings: Ratings) -> None:
107 |     with pytest.raises(ValueError):
108 |         ratings.create_split(2)
109 | 
110 | 
111 | def test_add_user_item_rating_nonexistent_item(ratings: Ratings) -> None:
112 |     user_id = "1"
113 |     item_id = "1342"
114 |     rating = 0.3
115 | 
116 |     original_item_ratings = ratings.get_item_ratings(item_id)
117 |     original_user_ratings = ratings.get_user_ratings(user_id)
118 | 
119 |     ratings.add_user_item_rating(user_id, item_id, rating)
120 | 
121 |     assert original_item_ratings == ratings.get_item_ratings(item_id)
122 |     assert original_user_ratings == ratings.get_user_ratings(user_id)
123 | 


--------------------------------------------------------------------------------
/tests/simulator/agenda_based/test_interaction_model.py:
--------------------------------------------------------------------------------
  1 | """Tests for the InteractionModel class."""
  2 | 
  3 | import pytest
  4 | 
  5 | from dialoguekit.core.dialogue_act import DialogueAct
  6 | from dialoguekit.core.intent import Intent
  7 | from dialoguekit.core.slot_value_annotation import SlotValueAnnotation
  8 | from dialoguekit.utils.dialogue_reader import json_to_dialogues
  9 | from usersimcrs.core.information_need import InformationNeed
 10 | from usersimcrs.core.simulation_domain import SimulationDomain
 11 | from usersimcrs.simulator.agenda_based.interaction_model import (
 12 |     InteractionModel,
 13 | )
 14 | 
 15 | ANNOTATED_CONVERSATIONS = json_to_dialogues(
 16 |     "tests/data/annotated_dialogues.json",
 17 |     agent_ids=["Agent"],
 18 |     user_ids=["User"],
 19 | )
 20 | 
 21 | 
 22 | @pytest.fixture
 23 | def im_crsv1(
 24 |     information_need: InformationNeed, domain: SimulationDomain
 25 | ) -> InteractionModel:
 26 |     """CRS v1 Interaction model fixture."""
 27 |     im = InteractionModel(
 28 |         "tests/data/interaction_models/crs_v1.yaml",
 29 |         domain,
 30 |         ANNOTATED_CONVERSATIONS,
 31 |     )
 32 |     im.initialize_agenda(information_need)
 33 |     return im
 34 | 
 35 | 
 36 | def test_initialize_with_error(domain: SimulationDomain) -> None:
 37 |     with pytest.raises(FileNotFoundError):
 38 |         InteractionModel(
 39 |             "tests/data/interaction_models/invalid_file.yaml",
 40 |             domain,
 41 |             ANNOTATED_CONVERSATIONS,
 42 |         )
 43 | 
 44 | 
 45 | def test_initialize_agenda(
 46 |     im_crsv1: InteractionModel, information_need: InformationNeed
 47 | ) -> None:
 48 |     im_crsv1.initialize_agenda(information_need)
 49 |     assert len(im_crsv1.agenda.stack) == 6
 50 |     assert im_crsv1.agenda.stack[0].intent == im_crsv1.INTENT_START
 51 |     assert im_crsv1.agenda.stack[1].intent == im_crsv1.INTENT_DISCLOSE
 52 |     assert im_crsv1.agenda.stack[-2] == DialogueAct(
 53 |         im_crsv1.INTENT_INQUIRE, [SlotValueAnnotation("RATING")]
 54 |     )
 55 |     assert im_crsv1.agenda.stack[-1].intent == im_crsv1.INTENT_STOP
 56 | 
 57 | 
 58 | def test_initialize_transition_matrices(im_crsv1: InteractionModel) -> None:
 59 |     (
 60 |         transition_single_intent,
 61 |         transition_compound_intent,
 62 |     ) = im_crsv1.initialize_transition_matrices(ANNOTATED_CONVERSATIONS)
 63 | 
 64 |     assert transition_single_intent.shape == (2, 3)
 65 |     assert transition_compound_intent.shape == (2, 4)
 66 | 
 67 |     assert transition_compound_intent.loc["ELICIT", "Intent-A_Intent-B"] == 0.5
 68 |     assert transition_single_intent.loc["INQUIRE", "Intent-A"] == 3 / 8
 69 | 
 70 | 
 71 | def test_get_next_dialogue_acts(im_crsv1: InteractionModel) -> None:
 72 |     dialogue_acts = im_crsv1.get_next_dialogue_acts(3)
 73 |     assert len(dialogue_acts) == 3
 74 |     assert dialogue_acts[0].intent == im_crsv1.INTENT_START
 75 |     assert dialogue_acts[1].intent == im_crsv1.INTENT_DISCLOSE
 76 |     assert dialogue_acts[2] == DialogueAct(
 77 |         im_crsv1.INTENT_DISCLOSE,
 78 |         [SlotValueAnnotation("DIRECTOR", "Steven Spielberg")],
 79 |     )
 80 | 
 81 | 
 82 | def test_intent_types(im_crsv1: InteractionModel) -> None:
 83 |     """Tests methods checking the type of intents."""
 84 |     assert im_crsv1.is_agent_intent_elicit(Intent("ELICIT")) is False
 85 |     assert im_crsv1.is_agent_intent_inquire(Intent("INQUIRE")) is True
 86 |     assert im_crsv1.is_agent_intent_set_retrieval(Intent("REVEAL")) is True
 87 | 
 88 | 
 89 | def test_is_transition_allowed(monkeypatch, im_crsv1: InteractionModel) -> None:
 90 |     monkeypatch.setattr(
 91 |         im_crsv1, "_current_dialogue_acts", [DialogueAct(Intent("DISCLOSE"))]
 92 |     )
 93 |     dialogue_acts_allowed = [
 94 |         DialogueAct(Intent("INQUIRE")),
 95 |         DialogueAct(Intent("REVEAL")),
 96 |     ]
 97 |     dialogue_acts_not_allowed = [DialogueAct(Intent("END"))]
 98 | 
 99 |     assert im_crsv1._is_transition_allowed(dialogue_acts_allowed) is True
100 |     assert im_crsv1._is_transition_allowed(dialogue_acts_not_allowed) is False
101 | 
102 | 
103 | def test_sample_next_user_dialogue_acts(
104 |     caplog, im_crsv1: InteractionModel, information_need: InformationNeed
105 | ) -> None:
106 |     agent_dialogue_acts = [
107 |         DialogueAct(Intent("GREETING")),
108 |         DialogueAct(Intent("ELICIT")),
109 |     ]
110 |     user_dialogue_acts = im_crsv1._sample_next_user_dialogue_acts(
111 |         information_need, agent_dialogue_acts
112 |     )
113 |     assert len(user_dialogue_acts) == 1
114 |     assert (
115 |         "Transition matrix does not contain agent intent: GREETING"
116 |         in caplog.text
117 |     )
118 | 
119 |     agent_dialogue_acts = [
120 |         DialogueAct(Intent("ELICIT")),
121 |     ]
122 |     user_dialogue_acts = im_crsv1._sample_next_user_dialogue_acts(
123 |         information_need, agent_dialogue_acts
124 |     )
125 |     # The sampling is non-deterministic, so different outcomes are possible.
126 |     # Here we check for the two possible outcomes.
127 |     assert user_dialogue_acts == [
128 |         DialogueAct(Intent("Intent-A")),
129 |         DialogueAct(Intent("Intent-B")),
130 |     ] or user_dialogue_acts == [DialogueAct(Intent("Intent-C"))]
131 | 


--------------------------------------------------------------------------------
/tests/simulator/llm/test_stop_prompt.py:
--------------------------------------------------------------------------------
 1 | """Tests stop prompt."""
 2 | 
 3 | import pytest
 4 | 
 5 | from usersimcrs.core.information_need import InformationNeed
 6 | from usersimcrs.simulator.llm.prompt.stop_prompt import (
 7 |     DEFAULT_STOP_DEFINITION,
 8 |     StopPrompt,
 9 | )
10 | 
11 | 
12 | @pytest.fixture
13 | def prompt(information_need: InformationNeed) -> StopPrompt:
14 |     """Returns a Prompt object."""
15 |     return StopPrompt(information_need, "item")
16 | 
17 | 
18 | def test_build_new_prompt(prompt: StopPrompt) -> None:
19 |     """Tests the build_new_prompt method."""
20 |     stringified_requirements = (
21 |         "\nREQUIREMENTS: You are looking for a item with the following "
22 |         "characteristics: genre=Comedy, director=Steven Spielberg and want to "
23 |         "know the following information about it: plot, rating.\nHISTORY:\n"
24 |     )
25 | 
26 |     assert prompt.build_new_prompt() == (
27 |         DEFAULT_STOP_DEFINITION + stringified_requirements
28 |     )
29 | 
30 | 
31 | def test_prompt_text(prompt: StopPrompt) -> None:
32 |     """Tests the prompt_text property."""
33 |     initial_prompt = (
34 |         f"{DEFAULT_STOP_DEFINITION}\nREQUIREMENTS: You are looking "
35 |         "for a item with the following characteristics: genre=Comedy, "
36 |         "director=Steven Spielberg and want to know the following information "
37 |         "about it: plot, rating.\nHISTORY:\n"
38 |     )
39 | 
40 |     assert prompt.prompt_text == initial_prompt + "\n\nCONTINUE: "
41 | 


--------------------------------------------------------------------------------
/tests/simulator/llm/test_utterance_generation_prompt.py:
--------------------------------------------------------------------------------
 1 | """Tests utterance generation prompt."""
 2 | 
 3 | import pytest
 4 | 
 5 | from dialoguekit.core import Utterance
 6 | from dialoguekit.participant import DialogueParticipant
 7 | from usersimcrs.core.information_need import InformationNeed
 8 | from usersimcrs.simulator.llm.prompt.utterance_generation_prompt import (
 9 |     DEFAULT_TASK_DEFINITION,
10 |     UtteranceGenerationPrompt,
11 | )
12 | from usersimcrs.user_modeling.persona import Persona
13 | 
14 | 
15 | @pytest.fixture
16 | def persona() -> Persona:
17 |     """Returns a Persona object."""
18 |     return Persona(characteristics={"curiosity": "high", "education": "MSc"})
19 | 
20 | 
21 | @pytest.fixture
22 | def prompt(
23 |     information_need: InformationNeed, persona: Persona
24 | ) -> UtteranceGenerationPrompt:
25 |     """Returns a Prompt object."""
26 |     return UtteranceGenerationPrompt(information_need, "item", persona=persona)
27 | 
28 | 
29 | def test_build_new_prompt(prompt: UtteranceGenerationPrompt) -> None:
30 |     """Tests the build_new_prompt method."""
31 |     stringified_persona = (
32 |         " Adapt your responses considering your PERSONA.\nPERSONA: "
33 |         "curiosity=high, education=MSc\n"
34 |     )
35 |     stringified_requirements = (
36 |         "\nREQUIREMENTS: You are looking for a item with the following "
37 |         "characteristics: genre=Comedy, director=Steven Spielberg. Once you "
38 |         "find a suitable item, make sure to get the following information: "
39 |         "plot, rating.\nHISTORY:\n"
40 |     )
41 | 
42 |     assert prompt.build_new_prompt() == (
43 |         DEFAULT_TASK_DEFINITION + stringified_persona + stringified_requirements
44 |     )
45 | 
46 | 
47 | def test_update_prompt_context(prompt: UtteranceGenerationPrompt) -> None:
48 |     """Tests the update_prompt_context method."""
49 |     user_utterance = Utterance(
50 |         "I am looking for a comedy movie.", DialogueParticipant.USER
51 |     )
52 |     agent_utterance = Utterance(
53 |         "I suggest 'The terminal'.", DialogueParticipant.AGENT
54 |     )
55 | 
56 |     prompt.update_prompt_context(user_utterance, DialogueParticipant.USER)
57 |     assert prompt._prompt_context == "USER: I am looking for a comedy movie.\n"
58 |     prompt.update_prompt_context(agent_utterance, DialogueParticipant.AGENT)
59 |     assert prompt._prompt_context == (
60 |         "USER: I am looking for a comedy movie.\nASSISTANT: I suggest 'The "
61 |         "terminal'.\nUSER: "
62 |     )
63 | 


--------------------------------------------------------------------------------
/tests/simulator/user_modeling/test_simple_preference_model.py:
--------------------------------------------------------------------------------
  1 | """Tests for SimplePreferenceModel."""
  2 | 
  3 | import os
  4 | 
  5 | import pytest
  6 | 
  7 | from usersimcrs.core.simulation_domain import SimulationDomain
  8 | from usersimcrs.items.item_collection import ItemCollection
  9 | from usersimcrs.items.ratings import Ratings
 10 | from usersimcrs.user_modeling.simple_preference_model import (
 11 |     SimplePreferenceModel,
 12 | )
 13 | 
 14 | RATINGS_CSV_FILE = "tests/data/items/ratings.csv"
 15 | 
 16 | 
 17 | @pytest.fixture
 18 | def preference_model(
 19 |     domain: SimulationDomain, item_collection: ItemCollection
 20 | ) -> SimplePreferenceModel:
 21 |     """Preference model fixture."""
 22 |     ratings = Ratings()
 23 |     ratings.load_ratings_csv(RATINGS_CSV_FILE)
 24 |     return SimplePreferenceModel(
 25 |         domain,
 26 |         item_collection,
 27 |         ratings,
 28 |         historical_user_id="13",
 29 |     )
 30 | 
 31 | 
 32 | @pytest.mark.parametrize("item_id, expected", [("377", True), ("591", False)])
 33 | def test_is_item_consumed(
 34 |     item_id: str, expected: bool, preference_model: SimplePreferenceModel
 35 | ) -> None:
 36 |     assert expected == preference_model.is_item_consumed(item_id)
 37 | 
 38 | 
 39 | def test_get_item_preference(preference_model: SimplePreferenceModel) -> None:
 40 |     preference = preference_model.get_item_preference("527")
 41 |     assert preference == 1.0 or preference == -1.0
 42 | 
 43 | 
 44 | def test_get_item_preference_nonexisting_item(
 45 |     preference_model: SimplePreferenceModel,
 46 | ) -> None:
 47 |     with pytest.raises(ValueError):
 48 |         preference_model.get_item_preference("23043")
 49 | 
 50 | 
 51 | def test_get_slot_value_preference(
 52 |     preference_model: SimplePreferenceModel,
 53 | ) -> None:
 54 |     preference = preference_model.get_slot_value_preference(
 55 |         "GENRE", "Animation"
 56 |     )
 57 |     assert preference == 1.0 or preference == -1.0
 58 | 
 59 | 
 60 | def test_get_slot_value_preference_nonexisting_slot(
 61 |     preference_model: SimplePreferenceModel,
 62 | ) -> None:
 63 |     with pytest.raises(ValueError):
 64 |         preference_model.get_slot_value_preference("WRITER", "Billy Wilder")
 65 | 
 66 | 
 67 | def test_get_slot_preference(preference_model: SimplePreferenceModel) -> None:
 68 |     value, preference = preference_model.get_slot_preference("GENRE")
 69 |     assert (
 70 |         value
 71 |         in preference_model._item_collection.get_possible_property_values(
 72 |             "GENRE"
 73 |         )
 74 |     )
 75 |     assert preference == 1
 76 | 
 77 | 
 78 | def test_get_slot_preference_nonexisting_slot(
 79 |     preference_model: SimplePreferenceModel,
 80 | ) -> None:
 81 |     with pytest.raises(ValueError):
 82 |         preference_model.get_slot_preference("YEAR")
 83 | 
 84 | 
 85 | def test_load_preferences_error(
 86 |     preference_model: SimplePreferenceModel,
 87 | ) -> None:
 88 |     with pytest.raises(FileNotFoundError):
 89 |         preference_model.load_preference_model("path")
 90 | 
 91 | 
 92 | def test_save_preference_model(
 93 |     preference_model: SimplePreferenceModel,
 94 | ) -> None:
 95 |     preference_model._slot_value_preferences.set_preference(
 96 |         "GENRE", "Action", 1
 97 |     )
 98 |     preference_model._item_preferences.set_preference("ID", "527", -1)
 99 |     preference_model.save_preference_model("tests/data/preference_model.joblib")
100 | 
101 |     loaded_model = SimplePreferenceModel.load_preference_model(
102 |         "tests/data/preference_model.joblib"
103 |     )
104 | 
105 |     assert loaded_model._user_id == preference_model._user_id
106 |     assert (
107 |         loaded_model._item_preferences._preferences
108 |         == preference_model._item_preferences._preferences
109 |     )
110 |     assert (
111 |         loaded_model._slot_value_preferences._preferences
112 |         == preference_model._slot_value_preferences._preferences
113 |     )
114 | 
115 |     os.remove("tests/data/preference_model.joblib")
116 | 


--------------------------------------------------------------------------------
/usersimcrs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iai-group/UserSimCRS/13f72923d9be0b27472e3f684b2bca4563332c3d/usersimcrs/__init__.py


--------------------------------------------------------------------------------
/usersimcrs/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iai-group/UserSimCRS/13f72923d9be0b27472e3f684b2bca4563332c3d/usersimcrs/core/__init__.py


--------------------------------------------------------------------------------
/usersimcrs/core/information_need.py:
--------------------------------------------------------------------------------
  1 | """Interface to represent an information need.
  2 | 
  3 | The information need comprises three elements: constraints, requests, and target
  4 | items. The constraints specify the slot-value pairs that the item of interest
  5 | must satisfy, while the requests specify the slots for which the user wants
  6 | information. The target items represent the "ground truth" items that the user
  7 | is interested in.
  8 | """
  9 | 
 10 | from __future__ import annotations
 11 | 
 12 | import random
 13 | from collections import defaultdict
 14 | from typing import Any, Dict, List
 15 | 
 16 | from usersimcrs.core.simulation_domain import SimulationDomain
 17 | from usersimcrs.items.item import Item
 18 | from usersimcrs.items.item_collection import ItemCollection
 19 | 
 20 | 
 21 | def generate_random_information_need(
 22 |     domain: SimulationDomain, item_collection: ItemCollection
 23 | ) -> InformationNeed:
 24 |     """Generates a random information need based on the domain.
 25 | 
 26 |     It randomly selects one target item and sets constraints and requests slots.
 27 |     The value of constraints are derived from the target's properties. The
 28 |     number of constraints and requests are also randomly determined.
 29 | 
 30 |     Args:
 31 |         domain: Domain knowledge.
 32 |         item_collection: Collection of items.
 33 | 
 34 |     Returns:
 35 |         Information need.
 36 |     """
 37 |     target_item = item_collection.get_random_item()
 38 | 
 39 |     constraints = {}
 40 |     informable_slots = set(domain.get_informable_slots()).intersection(
 41 |         target_item.properties.keys()
 42 |     )
 43 |     num_constraints = random.randint(1, len(informable_slots))
 44 |     for slot in random.sample(informable_slots, num_constraints):
 45 |         constraints[slot] = target_item.get_property(slot)
 46 | 
 47 |     requestable_slots = set(
 48 |         domain.get_requestable_slots()
 49 |     ).symmetric_difference(constraints.keys())
 50 |     num_requests = random.randint(1, len(requestable_slots))
 51 |     requests = random.sample(requestable_slots, num_requests)
 52 | 
 53 |     return InformationNeed([target_item], constraints, requests)
 54 | 
 55 | 
 56 | class InformationNeed:
 57 |     def __init__(
 58 |         self,
 59 |         target_items: List[Item],
 60 |         constraints: Dict[str, Any],
 61 |         requests: List[str],
 62 |     ) -> None:
 63 |         """Initializes an information need.
 64 | 
 65 |         Args:
 66 |             target_items: Items that the user is interested in.
 67 |             constraints: Slot-value pairs representing constraints on the item
 68 |               of interest.
 69 |             requests: Slots representing the desired information.
 70 |         """
 71 |         self.target_items = target_items
 72 |         self.constraints = constraints
 73 |         self.requested_slots = defaultdict(
 74 |             None, {slot: None for slot in requests}
 75 |         )
 76 | 
 77 |     def get_constraint_value(self, slot: str) -> Any:
 78 |         """Returns the value of a constraint slot.
 79 | 
 80 |         Args:
 81 |             slot: Slot.
 82 | 
 83 |         Returns:
 84 |             Value of the slot.
 85 |         """
 86 |         return self.constraints.get(slot)
 87 | 
 88 |     def get_requestable_slots(self) -> List[str]:
 89 |         """Returns the list of requestable slots."""
 90 |         return [
 91 |             slot
 92 |             for slot in self.requested_slots
 93 |             if not self.requested_slots[slot]
 94 |         ]
 95 | 
 96 |     @classmethod
 97 |     def from_dict(cls, data: Dict[str, Any]) -> InformationNeed:
 98 |         """Creates information need from a dictionary."""
 99 |         target_items = [Item(**item) for item in data["target_items"]]
100 |         return cls(
101 |             target_items=target_items,
102 |             constraints=data["constraints"],
103 |             requests=data["requests"],
104 |         )
105 | 
106 |     def to_dict(self) -> Dict[str, Any]:
107 |         """Returns information need as a dictionary."""
108 |         return {
109 |             "target_items": [
110 |                 {"item_id": item.id, "properties": item.properties}
111 |                 for item in self.target_items
112 |             ],
113 |             "constraints": self.constraints,
114 |             "requests": list(self.requested_slots.keys()),
115 |         }
116 | 


--------------------------------------------------------------------------------
/usersimcrs/core/simulation_domain.py:
--------------------------------------------------------------------------------
 1 | """Simulation domain knowledge.
 2 | 
 3 | This domain knowledge allows the definition of requestable and informable slots.
 4 | If not specified, all the slots are considered requestable and informable.
 5 | """
 6 | 
 7 | from typing import List
 8 | 
 9 | from dialoguekit.core.domain import Domain
10 | 
11 | 
12 | class SimulationDomain(Domain):
13 |     def __init__(self, config_file: str) -> None:
14 |         """Initializes the domain knowledge.
15 | 
16 |         Args:
17 |             config_file: Path to the domain configuration file.
18 |         """
19 |         super().__init__(config_file)
20 | 
21 |     def get_requestable_slots(self) -> List[str]:
22 |         """Returns the list of requestable slots."""
23 |         if "requestable_slots" not in self._config:
24 |             return self.get_slot_names()
25 |         return self._config["requestable_slots"]
26 | 
27 |     def get_informable_slots(self) -> List[str]:
28 |         """Returns the list of informable slots."""
29 |         if "informable_slots" not in self._config:
30 |             return self.get_slot_names()
31 |         return self._config["informable_slots"]
32 | 


--------------------------------------------------------------------------------
/usersimcrs/dialogue_management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iai-group/UserSimCRS/13f72923d9be0b27472e3f684b2bca4563332c3d/usersimcrs/dialogue_management/__init__.py


--------------------------------------------------------------------------------
/usersimcrs/dialogue_management/dialogue_state.py:
--------------------------------------------------------------------------------
 1 | """Representation of the dialogue state.
 2 | 
 3 | The dialogue state includes the number of utterances, a list of dialogue acts
 4 | per utterance for both the agent and the user, and the belief state. The belief
 5 | state is a dictionary that holds for each slot (key) the values provided by the
 6 | user.
 7 | """
 8 | 
 9 | from collections import defaultdict
10 | from dataclasses import dataclass, field
11 | from typing import DefaultDict, List
12 | 
13 | from dialoguekit.core.dialogue_act import DialogueAct
14 | 
15 | 
16 | @dataclass
17 | class DialogueState:
18 |     """Dialogue state.
19 | 
20 |     Attributes:
21 |         utterance_count: Utterance count.
22 |         agent_dialogue_acts: List of dialogue acts per turn for the agent.
23 |         user_dialogue_acts: List of dialogue acts per turn for the user.
24 |         belief_state: Belief state.
25 |     """
26 | 
27 |     utterance_count: int = 0
28 |     agent_dialogue_acts: List[List[DialogueAct]] = field(default_factory=list)
29 |     user_dialogue_acts: List[List[DialogueAct]] = field(default_factory=list)
30 |     belief_state: DefaultDict[str, List[str]] = field(
31 |         default_factory=lambda: defaultdict(list)
32 |     )
33 | 


--------------------------------------------------------------------------------
/usersimcrs/dialogue_management/dialogue_state_tracker.py:
--------------------------------------------------------------------------------
 1 | """Interface for dialogue state tracking."""
 2 | 
 3 | from typing import List
 4 | 
 5 | from dialoguekit.core.dialogue_act import DialogueAct
 6 | from dialoguekit.participant import DialogueParticipant
 7 | 
 8 | from usersimcrs.dialogue_management.dialogue_state import DialogueState
 9 | 
10 | 
11 | class DialogueStateTracker:
12 |     def __init__(self) -> None:
13 |         """Initializes the dialogue state tracker."""
14 |         self._dialogue_state = DialogueState()
15 | 
16 |     def get_current_state(self) -> DialogueState:
17 |         """Returns the current dialogue state.
18 | 
19 |         Returns:
20 |             DialogueState.
21 |         """
22 |         return self._dialogue_state
23 | 
24 |     def update_state(
25 |         self,
26 |         dialogue_acts: List[DialogueAct],
27 |         participant: DialogueParticipant,
28 |     ) -> None:
29 |         """Updates the dialogue state based on the dialogue acts.
30 | 
31 |         Args:
32 |             dialogue_acts: Dialogue acts.
33 |             participant: Dialogue participant.
34 |         """
35 |         if participant == DialogueParticipant.USER:
36 |             self._dialogue_state.user_dialogue_acts.append(dialogue_acts)
37 |         else:
38 |             self._dialogue_state.agent_dialogue_acts.append(dialogue_acts)
39 | 
40 |         self.update_belief_state(dialogue_acts)
41 |         self._dialogue_state.utterance_count += 1
42 | 
43 |     def update_belief_state(self, dialogue_acts: List[DialogueAct]) -> None:
44 |         """Updates the belief state based on the dialogue acts.
45 | 
46 |         Args:
47 |             dialogue_acts: Dialogue acts.
48 |         """
49 |         for dialogue_act in dialogue_acts:
50 |             for annotation in dialogue_act.annotations:
51 |                 if annotation.value:
52 |                     self._dialogue_state.belief_state[annotation.slot].append(
53 |                         annotation.value
54 |                     )
55 |                 else:
56 |                     self._dialogue_state.belief_state[annotation.slot] = []
57 | 
58 |     def reset_state(self) -> None:
59 |         """Resets the dialogue state."""
60 |         self._dialogue_state = DialogueState()
61 | 


--------------------------------------------------------------------------------
/usersimcrs/items/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iai-group/UserSimCRS/13f72923d9be0b27472e3f684b2bca4563332c3d/usersimcrs/items/__init__.py


--------------------------------------------------------------------------------
/usersimcrs/items/item.py:
--------------------------------------------------------------------------------
 1 | """Represents an item."""
 2 | 
 3 | from typing import Any, Dict
 4 | 
 5 | from usersimcrs.core.simulation_domain import SimulationDomain
 6 | 
 7 | 
 8 | class Item:
 9 |     def __init__(
10 |         self,
11 |         item_id: str,
12 |         properties: Dict[str, Any] = None,
13 |         domain: SimulationDomain = None,
14 |     ) -> None:
15 |         """Creates an item.
16 | 
17 |         Each item has minimally an ID and can optionally have any number of
18 |         properties, which are represented as key-value pairs.
19 | 
20 |         Args:
21 |             item_id: Item ID.
22 |             properties: Dictionary of item properties (key-value pairs).
23 |               Defaults to None.
24 |             domain: Domain of the item. Defaults to None.
25 |         """
26 |         self._item_id = item_id
27 |         self._domain = domain
28 |         self._slot_names = None
29 | 
30 |         if self._domain:
31 |             self._properties = dict(
32 |                 filter(
33 |                     lambda i: i[0] in self._domain.get_slot_names(),
34 |                     properties.items(),
35 |                 )
36 |             )
37 |         else:
38 |             self._properties = properties
39 | 
40 |     @property
41 |     def id(self) -> str:
42 |         """Return the item id."""
43 |         return self._item_id
44 | 
45 |     @property
46 |     def properties(self) -> Dict[str, Any]:
47 |         """Returns the item properties."""
48 |         return self._properties
49 | 
50 |     def get_property(self, key: str) -> Any:
51 |         """Returns a given item property.
52 | 
53 |         Args:
54 |             key: Name of property.
55 | 
56 |         Returns:
57 |             Value of property or None.
58 |         """
59 |         return self._properties.get(key)
60 | 
61 |     def set_property(self, key: str, value: Any) -> None:
62 |         """Sets the value of a given item property.
63 | 
64 |         If the item property exists it will be overwritten.
65 | 
66 |         Args:
67 |             key: Property name.
68 |             value: Property value.
69 | 
70 |         Raises:
71 |             ValueError: if the property is not part of the domain knowledge.
72 |         """
73 |         if self._domain and key not in self._domain.get_slot_names():
74 |             raise ValueError(
75 |                 f"The property {key} is not part of the slots specified by "
76 |                 "the domain."
77 |             )
78 |         self._properties[key] = value
79 | 


--------------------------------------------------------------------------------
/usersimcrs/nlg/lm/nlg_generative_lm.py:
--------------------------------------------------------------------------------
  1 | """Generative natural language generation using a language model."""
  2 | 
  3 | import os
  4 | from typing import List, Optional, Union
  5 | 
  6 | import yaml
  7 | from ollama import Client, Options
  8 | 
  9 | from dialoguekit.core.annotated_utterance import AnnotatedUtterance
 10 | from dialoguekit.core.annotation import Annotation
 11 | from dialoguekit.core.dialogue_act import DialogueAct
 12 | from dialoguekit.nlg.nlg_abstract import AbstractNLG
 13 | from dialoguekit.participant.participant import DialogueParticipant
 14 | 
 15 | 
 16 | class LMGenerativeNLG(AbstractNLG):
 17 |     def __init__(
 18 |         self,
 19 |         ollama_config_file: str,
 20 |         prompt_file: str,
 21 |         prompt_prefix: Optional[str] = None,
 22 |     ) -> None:
 23 |         """Initializes the generative NLG.
 24 | 
 25 |         Args:
 26 |             ollama_config_file: Path to the Ollama configuration file.
 27 |             prompt_file: Path to the prompt file.
 28 |             prompt_prefix: Prefix to be remove from generated utterances.
 29 |               Defaults to None.
 30 | 
 31 |         Raises:
 32 |             FileNotFoundError: If the Ollama configuration file or the prompt
 33 |               file is not found.
 34 |         """
 35 |         if not os.path.exists(ollama_config_file):
 36 |             raise FileNotFoundError(f"File '{ollama_config_file}' not found.")
 37 |         if not os.path.exists(prompt_file):
 38 |             raise FileNotFoundError(f"File '{prompt_file}' not found.")
 39 | 
 40 |         # Ollama
 41 |         ollama_config = yaml.safe_load(open(ollama_config_file, "r"))
 42 |         self.client = Client(ollama_config.get("host"))
 43 |         self._model = ollama_config.get("model")
 44 |         self._stream = ollama_config.get("stream", False)
 45 |         self._options = Options(**ollama_config.get("options", {}))
 46 | 
 47 |         # Prompt
 48 |         self.prompt = open(prompt_file, "r").read()
 49 |         self.prompt_prefix = prompt_prefix
 50 | 
 51 |     def generate_utterance_text(
 52 |         self,
 53 |         dialogue_acts: List[DialogueAct],
 54 |         annotations: Optional[Union[List[Annotation], None]] = None,
 55 |         force_annotation: bool = False,
 56 |     ) -> Union[AnnotatedUtterance, bool]:
 57 |         """Turns a structured utterance into a textual one.
 58 | 
 59 |         Args:
 60 |             dialogue_acts: Dialogue acts of the utterance to be generated.
 61 |             annotations: If provided, these annotations should be considered
 62 |               during generation.
 63 |             force_annotation: A flag to indicate whether annotations should be
 64 |               forced or not. Not used in this NLG.
 65 | 
 66 |         Raises:
 67 |             RuntimeError: If generation fails.
 68 | 
 69 |         Returns:
 70 |             Natural language utterance.
 71 |         """
 72 |         try:
 73 |             dialogue_acts_str = self._stringify_dialogue_acts(dialogue_acts)
 74 | 
 75 |             if annotations:
 76 |                 annonations_str = self._stringify_annotations(annotations)
 77 |                 prompt = self.prompt.format(
 78 |                     dialogue_acts=dialogue_acts_str,
 79 |                     annotations=annonations_str,
 80 |                 )
 81 |             else:
 82 |                 prompt = self.prompt.format(dialogue_acts=dialogue_acts_str)
 83 | 
 84 |             response = self.client.generate(
 85 |                 prompt=prompt,
 86 |                 model=self._model,
 87 |                 options=self._options,
 88 |                 stream=self._stream,
 89 |             ).get("response", "")
 90 |             response = response.strip().replace(self.prompt_prefix, "")
 91 |         except Exception as e:
 92 |             raise RuntimeError(f"Failed to generate utterance: {e}")
 93 | 
 94 |         return AnnotatedUtterance(
 95 |             text=response,
 96 |             participant=DialogueParticipant.USER,
 97 |             dialogue_acts=dialogue_acts,
 98 |             annotations=annotations,
 99 |         )
100 | 
101 |     def _stringify_dialogue_acts(self, dialogue_acts: List[DialogueAct]) -> str:
102 |         """Stringifies dialogue acts.
103 | 
104 |         The stringified dialogue acts are in the format:
105 |         "intent(slot=value,...)|intent(slot,...)|..."
106 | 
107 |         Args:
108 |             dialogue_acts: List of dialogue acts.
109 | 
110 |         Returns:
111 |             List of dialogue acts as a string.
112 |         """
113 |         dialogue_acts_str = []
114 |         for dialogue_act in dialogue_acts:
115 |             dialogue_act_str = dialogue_act.intent.label
116 |             if dialogue_act.annotations:
117 |                 annotations_str = ",".join(
118 |                     [
119 |                         f"{a.slot}={a.value}" if a.value else f"{a.slot}"
120 |                         for a in dialogue_act.annotations
121 |                     ]
122 |                 )
123 |                 dialogue_act_str = f"{dialogue_act_str}({annotations_str})"
124 |             else:
125 |                 dialogue_act_str = f"{dialogue_act_str}()"
126 |             dialogue_acts_str.append(dialogue_act_str)
127 |         return "|".join(dialogue_acts_str)
128 | 
129 |     def _stringify_annotations(self, annotations: List[Annotation]) -> str:
130 |         """Stringifies annotations.
131 | 
132 |         The stringified annotations are in the format:
133 |         "slot=value\nslot\n..."
134 | 
135 |         Args:
136 |             annotations: List of annotations.
137 | 
138 |         Returns:
139 |             List of annotations as a string.
140 |         """
141 |         return "\n".join(
142 |             [
143 |                 f"{a.key}={a.value}" if a.value else f"{a.key}"
144 |                 for a in annotations
145 |             ]
146 |         )
147 | 


--------------------------------------------------------------------------------
/usersimcrs/simulation_platform.py:
--------------------------------------------------------------------------------
  1 | """Simulation platform to connect simulator and agent."""
  2 | 
  3 | import logging
  4 | import sys
  5 | from typing import Any, Dict, Type
  6 | 
  7 | import requests
  8 | from dialoguekit.connector import DialogueConnector
  9 | from dialoguekit.core import Utterance
 10 | from dialoguekit.participant import Agent
 11 | from dialoguekit.platforms import Platform
 12 | 
 13 | from usersimcrs.simulator.user_simulator import UserSimulator
 14 | 
 15 | 
 16 | class SimulationPlatform(Platform):
 17 |     def __init__(
 18 |         self, agent_class: Type[Agent], agent_config: Dict[str, Any] = {}
 19 |     ) -> None:
 20 |         """Initializes the simulation platform.
 21 | 
 22 |         Args:
 23 |             agent_class: Agent class.
 24 |             agent_config: Configuration of the agent. Defaults to empty
 25 |               dictionary.
 26 |         """
 27 |         super().__init__(agent_class)
 28 |         self._agent_class = agent_class
 29 |         self._agent_config = agent_config
 30 | 
 31 |     def start(self) -> None:
 32 |         """Starts the simulation platform.
 33 | 
 34 |         It creates the agent.
 35 | 
 36 |         Raises:
 37 |             RuntimeError: If the connection to the agent is refused.
 38 |             ValueError: If the agent URI is not specified in the agent
 39 |               configuration.
 40 |         """
 41 |         try:
 42 |             agent_uri = self._agent_config["uri"]
 43 |             response = requests.get(agent_uri, timeout=60)
 44 |             assert response.status_code == 200
 45 |             self.agent = self._agent_class(**self._agent_config)
 46 |         except requests.exceptions.RequestException:
 47 |             raise RuntimeError(
 48 |                 f"Connection refused to {agent_uri}. Please check that "
 49 |                 "the conversational agent is running at this address. See the "
 50 |                 "full traceback above."
 51 |             )
 52 |         except KeyError:
 53 |             raise ValueError(
 54 |                 "The agent URI is not specified in the agent configuration."
 55 |             )
 56 | 
 57 |     def connect(
 58 |         self,
 59 |         user_id: str,
 60 |         simulator_class: Type[UserSimulator],
 61 |         simulator_config: Dict[str, Any] = {},
 62 |     ) -> None:
 63 |         """Connects a user simulator to an agent.
 64 | 
 65 |         Args:
 66 |             user_id: User ID.
 67 |             simulator_class: User simulator class.
 68 |             simulator_config: Configuration of the user simulator. Defaults to
 69 |               empty dictionary.
 70 | 
 71 |         Raises:
 72 |             Exception: If an error occurs during the dialogue.
 73 |         """
 74 |         self._active_users[user_id] = simulator_class(
 75 |             user_id, **simulator_config
 76 |         )
 77 |         dialogue_connector = DialogueConnector(
 78 |             agent=self.agent,
 79 |             user=self._active_users[user_id],
 80 |             platform=self,
 81 |         )
 82 | 
 83 |         try:
 84 |             dialogue_connector.start()
 85 |         except Exception as e:
 86 |             tb = sys.exc_info()
 87 |             dialogue_connector._dialogue_history._metadata.update(
 88 |                 {
 89 |                     "error": {
 90 |                         "error_type": type(e).__name__,
 91 |                         "trace": str(e.with_traceback(tb[2])),
 92 |                     }
 93 |                 }
 94 |             )
 95 |             return
 96 | 
 97 |     def display_agent_utterance(
 98 |         self, agent_id: str, utterance: Utterance
 99 |     ) -> None:
100 |         """Displays an agent utterance.
101 | 
102 |         Args:
103 |             agent_id: Agent ID.
104 |             utterance: An instance of Utterance.
105 |         """
106 |         logging.debug(f"Agent {agent_id}: {utterance.text}")
107 | 
108 |     def display_user_utterance(
109 |         self, user_id: str, utterance: Utterance
110 |     ) -> None:
111 |         """Displays a user utterance.
112 | 
113 |         Args:
114 |             user_id: User ID.
115 |             utterance: An instance of Utterance.
116 |         """
117 |         logging.debug(f"User {user_id}: {utterance.text}")
118 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iai-group/UserSimCRS/13f72923d9be0b27472e3f684b2bca4563332c3d/usersimcrs/simulator/__init__.py


--------------------------------------------------------------------------------
/usersimcrs/simulator/agenda_based/agenda.py:
--------------------------------------------------------------------------------
  1 | """Interface to represent an agenda.
  2 | 
  3 | The agenda is a stack of dialogue acts that the user wants to perform to fulfill
  4 | their information need. The representation is based on the description in:
  5 | Agenda-Based User Simulation for Bootstrapping a POMDP Dialogue System,
  6 | Schatzmann et al., 2007.
  7 | """
  8 | 
  9 | from collections import deque
 10 | from typing import Deque, List
 11 | 
 12 | from dialoguekit.core.dialogue_act import DialogueAct
 13 | from dialoguekit.core.intent import Intent
 14 | from dialoguekit.core.slot_value_annotation import SlotValueAnnotation
 15 | from usersimcrs.core.information_need import InformationNeed
 16 | 
 17 | 
 18 | class Agenda:
 19 |     def __init__(
 20 |         self,
 21 |         information_need: InformationNeed,
 22 |         inform_intent: Intent,
 23 |         request_intent: Intent,
 24 |         stop_intent: Intent,
 25 |         start_intent: Intent = None,
 26 |     ) -> None:
 27 |         """Initializes the agenda.
 28 | 
 29 |         Args:
 30 |             information_need: Information need.
 31 |             inform_intent: Inform intent.
 32 |             request_intent: Request intent.
 33 |             stop_intent: Stop intent.
 34 |             start_intent: Start intent. Defaults to None.
 35 |         """
 36 |         self._dialogue_acts_stack: Deque[DialogueAct] = deque()
 37 |         self.inform_intent = inform_intent
 38 |         self.request_intent = request_intent
 39 | 
 40 |         if start_intent is not None:
 41 |             self._dialogue_acts_stack.append(DialogueAct(start_intent))
 42 | 
 43 |         # All constraints are converted to inform dialogue acts
 44 |         for slot, value in information_need.constraints.items():
 45 |             if isinstance(value, list):
 46 |                 annotations = [SlotValueAnnotation(slot, v) for v in value]
 47 |             else:
 48 |                 annotations = [SlotValueAnnotation(slot, value)]
 49 |             self._dialogue_acts_stack.append(
 50 |                 DialogueAct(inform_intent, annotations)
 51 |             )
 52 | 
 53 |         # All requests are converted to request dialogue acts
 54 |         for slot in information_need.get_requestable_slots():
 55 |             self._dialogue_acts_stack.append(
 56 |                 DialogueAct(request_intent, [SlotValueAnnotation(slot)])
 57 |             )
 58 | 
 59 |         # Finish with a stop dialogue act
 60 |         self._dialogue_acts_stack.append(DialogueAct(stop_intent))
 61 | 
 62 |     @property
 63 |     def stack(self) -> Deque[DialogueAct]:
 64 |         """Returns the dialogue acts stack."""
 65 |         return self._dialogue_acts_stack
 66 | 
 67 |     def get_next_dialogue_acts(self, n: int) -> List[DialogueAct]:
 68 |         """Returns the next n dialogue acts from the stack.
 69 | 
 70 |         Args:
 71 |             n: Number of dialogue acts to return.
 72 | 
 73 |         Returns:
 74 |             List of dialogue acts.
 75 |         """
 76 |         return list([self._dialogue_acts_stack.popleft() for _ in range(n)])
 77 | 
 78 |     def push_dialogue_act(self, dialogue_act: DialogueAct) -> None:
 79 |         """Pushes a dialogue act onto the stack.
 80 | 
 81 |         Args:
 82 |             dialogue_act: Dialogue act.
 83 |         """
 84 |         self._dialogue_acts_stack.appendleft(dialogue_act)
 85 | 
 86 |     def push_dialogue_acts(self, dialogue_acts: List[DialogueAct]) -> None:
 87 |         """Pushes dialogue acts onto the stack.
 88 | 
 89 |         Args:
 90 |             dialogue_acts: Dialogue acts.
 91 |         """
 92 |         for dialogue_act in dialogue_acts:
 93 |             self.push_dialogue_act(dialogue_act)
 94 | 
 95 |     def clean_agenda(self, information_need: InformationNeed) -> None:
 96 |         """Cleans the agenda.
 97 | 
 98 |         Removes duplicate dialogue acts, null dialogue acts, and requests for
 99 |         already informed slots.
100 | 
101 |         Args:
102 |             information_need: Information need.
103 |         """
104 |         new_stack: Deque[DialogueAct] = deque()
105 |         informed_slots = information_need.get_requestable_slots()
106 |         for dialogue_act in self._dialogue_acts_stack:
107 |             if dialogue_act is None:
108 |                 continue
109 |             if dialogue_act.intent == self.request_intent and any(
110 |                 annotation.slot in informed_slots
111 |                 for annotation in dialogue_act.annotations
112 |             ):
113 |                 continue
114 |             if dialogue_act not in new_stack:
115 |                 new_stack.append(dialogue_act)
116 | 
117 |         self._dialogue_acts_stack = new_stack
118 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/agenda_based/agenda_based_simulator.py:
--------------------------------------------------------------------------------
  1 | """Agenda-based user simulator from [Zhang and Balog, KDD'20]."""
  2 | 
  3 | from dialoguekit.core.annotated_utterance import AnnotatedUtterance
  4 | from dialoguekit.core.utterance import Utterance
  5 | from dialoguekit.nlg import ConditionalNLG
  6 | from dialoguekit.nlu import NLU
  7 | from dialoguekit.participant.participant import DialogueParticipant
  8 | from usersimcrs.core.simulation_domain import SimulationDomain
  9 | from usersimcrs.dialogue_management.dialogue_state_tracker import (
 10 |     DialogueStateTracker,
 11 | )
 12 | from usersimcrs.items.item_collection import ItemCollection
 13 | from usersimcrs.items.ratings import Ratings
 14 | from usersimcrs.simulator.agenda_based.interaction_model import (
 15 |     InteractionModel,
 16 | )
 17 | from usersimcrs.simulator.user_simulator import UserSimulator
 18 | from usersimcrs.user_modeling.preference_model import PreferenceModel
 19 | 
 20 | 
 21 | class AgendaBasedSimulator(UserSimulator):
 22 |     def __init__(
 23 |         self,
 24 |         id: str,
 25 |         domain: SimulationDomain,
 26 |         item_collection: ItemCollection,
 27 |         preference_model: PreferenceModel,
 28 |         interaction_model: InteractionModel,
 29 |         nlu: NLU,
 30 |         nlg: ConditionalNLG,
 31 |         ratings: Ratings,
 32 |     ) -> None:
 33 |         """Initializes the agenda-based simulated user.
 34 | 
 35 |         Args:
 36 |             id: Simulator ID.
 37 |             domain: Domain.
 38 |             item_collection: Item collection.
 39 |             preference_model: Preference model.
 40 |             interaction_model: Interaction model.
 41 |             nlu: NLU module performing dialogue act extraction.
 42 |             nlg: NLG module generating textual responses.
 43 |             ratings: Historical ratings.
 44 |         """
 45 |         super().__init__(id=id, domain=domain, item_collection=item_collection)
 46 |         self._preference_model = preference_model
 47 |         self._interaction_model = interaction_model
 48 |         self._interaction_model.initialize_agenda(self.information_need)
 49 |         self._nlu = nlu
 50 |         self._nlg = nlg
 51 |         self._dialogue_state_tracker = DialogueStateTracker()
 52 |         self._ratings = ratings
 53 | 
 54 |     def _generate_response(self, agent_utterance: Utterance) -> Utterance:
 55 |         """Generates response to the agent's utterance.
 56 | 
 57 |         Args:
 58 |             agent_utterance: Agent utterance.
 59 | 
 60 |         Return:
 61 |             User utterance.
 62 |         """
 63 |         return self.generate_response(agent_utterance)
 64 | 
 65 |     def generate_response(
 66 |         self, agent_utterance: Utterance
 67 |     ) -> AnnotatedUtterance:
 68 |         """Generates response to the agent's utterance.
 69 | 
 70 |         Args:
 71 |             agent_utterance: Agent utterance.
 72 | 
 73 |         Return:
 74 |             User utterance.
 75 |         """
 76 |         # Run agent utterance through NLU.
 77 |         agent_dialogue_acts = self._nlu.extract_dialogue_acts(agent_utterance)
 78 | 
 79 |         self._interaction_model.dialogue_state_tracker.update_state(
 80 |             agent_dialogue_acts, DialogueParticipant.AGENT
 81 |         )
 82 | 
 83 |         # Test for the agent's stopping intent. Note that this would normally
 84 |         # handled by the dialogue connector. However, since intent annotations
 85 |         # for the agent's utterance are not available when the response is
 86 |         # received by the dialogue connector, an extra check is needed here.
 87 |         if any(
 88 |             da.intent == self._dialogue_connector._agent.stop_intent
 89 |             for da in agent_dialogue_acts
 90 |         ):
 91 |             self._dialogue_connector.close()
 92 |             quit()
 93 | 
 94 |         # Update agenda based on the agent's dialogue acts.
 95 |         self._interaction_model.update_agenda(
 96 |             self.information_need,
 97 |             self._preference_model,
 98 |             self._item_collection,
 99 |         )
100 |         # Get next user dialogue acts based on the current agenda.
101 |         response_dialogue_acts = (
102 |             self._interaction_model.get_next_dialogue_acts()
103 |         )
104 | 
105 |         # Generating natural language response through NLG.
106 |         response = self._nlg.generate_utterance_text(response_dialogue_acts)
107 | 
108 |         response.participant = DialogueParticipant.USER
109 | 
110 |         # Update dialogue state.
111 |         self._interaction_model.dialogue_state_tracker.update_state(
112 |             response_dialogue_acts, DialogueParticipant.USER
113 |         )
114 | 
115 |         return response
116 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/llm/__init__.py:
--------------------------------------------------------------------------------
1 | """LLM-based simulator module."""
2 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/llm/dual_prompt_user_simulator.py:
--------------------------------------------------------------------------------
 1 | """User simulator leveraging a large language model to generate responses.
 2 | 
 3 | The generation of responses is based on two prompts. The first one establishes
 4 | if the conversation should continue or not. The second one is used to generate
 5 | the user response.
 6 | """
 7 | 
 8 | from dialoguekit.core.utterance import Utterance
 9 | from dialoguekit.participant import DialogueParticipant
10 | from usersimcrs.core.simulation_domain import SimulationDomain
11 | from usersimcrs.items.item_collection import ItemCollection
12 | from usersimcrs.simulator.llm.interfaces.llm_interface import LLMInterface
13 | from usersimcrs.simulator.llm.prompt.stop_prompt import (
14 |     DEFAULT_STOP_DEFINITION,
15 |     StopPrompt,
16 | )
17 | from usersimcrs.simulator.llm.prompt.utterance_generation_prompt import (
18 |     DEFAULT_TASK_DEFINITION,
19 |     UtteranceGenerationPrompt,
20 | )
21 | from usersimcrs.simulator.user_simulator import UserSimulator
22 | from usersimcrs.user_modeling.persona import Persona
23 | 
24 | 
25 | class DualPromptUserSimulator(UserSimulator):
26 |     def __init__(
27 |         self,
28 |         id: str,
29 |         domain: SimulationDomain,
30 |         item_collection: ItemCollection,
31 |         llm_interface: LLMInterface,
32 |         item_type: str,
33 |         task_definition: str = DEFAULT_TASK_DEFINITION,
34 |         stop_definition: str = DEFAULT_STOP_DEFINITION,
35 |         persona: Persona = None,
36 |     ) -> None:
37 |         """Initializes the user simulator.
38 | 
39 |         Args:
40 |             id: User simulator ID.
41 |             llm_interface: Interface to the large language model.
42 |             item_type: Type of the item to be recommended. Defaults to None.
43 |             task_definition: Definition of the task to be performed.
44 |               Defaults to DEFAULT_TASK_DEFINITION.
45 |             stop_definition: Definition of the stop task. Defaults to
46 |               DEFAULT_STOP_DEFINITION.
47 |             persona: Persona of the user. Defaults to None.
48 |         """
49 |         super().__init__(id, domain, item_collection)
50 |         self.llm_interface = llm_interface
51 |         self.generation_prompt = UtteranceGenerationPrompt(
52 |             self.information_need, item_type, task_definition, persona
53 |         )
54 |         self.stop_prompt = StopPrompt(
55 |             self.information_need, item_type, stop_definition, persona
56 |         )
57 | 
58 |     def _generate_response(self, agent_utterance: Utterance) -> Utterance:
59 |         """Generates response to the agent utterance.
60 | 
61 |         Args:
62 |             agent_utterance: Agent utterance.
63 | 
64 |         Returns:
65 |             User utterance.
66 |         """
67 |         self.generation_prompt.update_prompt_context(
68 |             agent_utterance, DialogueParticipant.AGENT
69 |         )
70 |         self.stop_prompt.update_prompt_context(
71 |             agent_utterance, DialogueParticipant.AGENT
72 |         )
73 | 
74 |         # Check if the conversation should continue
75 |         b_continue = self.llm_interface.get_llm_api_response(
76 |             self.stop_prompt.prompt_text
77 |         )
78 |         if b_continue.strip().lower() == "false":
79 |             user_utterance = Utterance("\\end", DialogueParticipant.USER)
80 |         else:
81 |             user_utterance = self.llm_interface.generate_utterance(
82 |                 self.generation_prompt
83 |             )
84 |         self.generation_prompt.update_prompt_context(
85 |             user_utterance, DialogueParticipant.USER
86 |         )
87 |         return user_utterance
88 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/llm/interfaces/llm_interface.py:
--------------------------------------------------------------------------------
 1 | """Interface for the LLM model."""
 2 | 
 3 | from abc import ABC, abstractmethod
 4 | 
 5 | from dialoguekit.core import Utterance
 6 | from usersimcrs.simulator.llm.prompt.utterance_generation_prompt import (
 7 |     UtteranceGenerationPrompt,
 8 | )
 9 | 
10 | 
11 | class LLMInterface(ABC):
12 |     def __init__(self, default_response: str = None) -> None:
13 |         """Initializes the LLM interface.
14 | 
15 |         Args:
16 |             default_response: Default response to be used if the LLM fails to
17 |               generate a response.
18 |         """
19 |         self.default_response = default_response
20 | 
21 |     @abstractmethod
22 |     def generate_utterance(
23 |         self, prompt: UtteranceGenerationPrompt
24 |     ) -> Utterance:
25 |         """Generates an utterance given a prompt.
26 | 
27 |         Args:
28 |             prompt: Prompt for generating the utterance.
29 | 
30 |         Raises:
31 |             NotImplementedError: If the method is not implemented in subclass.
32 | 
33 |         Returns:
34 |             Utterance in natural language.
35 |         """
36 |         raise NotImplementedError()
37 | 
38 |     @abstractmethod
39 |     def get_llm_api_response(self, prompt: str, **kwargs) -> str:
40 |         """Gets the raw response from the LLM API.
41 | 
42 |         This method should be used to interact directly with the LLM API, i.e.,
43 |         for everything that is not related to the generation of an utterance.
44 | 
45 |         Args:
46 |             prompt: Prompt for the LLM.
47 |             **kwargs: Additional arguments to be passed to the API call.
48 | 
49 |         Raises:
50 |             NotImplementedError: If the method is not implemented in subclass.
51 | 
52 |         Returns:
53 |             Response from the LLM API without any post-processing.
54 |         """
55 |         raise NotImplementedError()
56 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/llm/interfaces/ollama_interface.py:
--------------------------------------------------------------------------------
 1 | """Interface to use a LLM served by Ollama."""
 2 | 
 3 | import os
 4 | 
 5 | import yaml
 6 | from ollama import Client, Options
 7 | 
 8 | from dialoguekit.core import Utterance
 9 | from dialoguekit.participant import DialogueParticipant
10 | from usersimcrs.simulator.llm.interfaces.llm_interface import LLMInterface
11 | from usersimcrs.simulator.llm.prompt.utterance_generation_prompt import (
12 |     UtteranceGenerationPrompt,
13 | )
14 | 
15 | 
16 | class OllamaLLMInterface(LLMInterface):
17 |     def __init__(
18 |         self,
19 |         configuration_path: str,
20 |         default_response: str = None,
21 |     ) -> None:
22 |         """Initializes interface for ollama served LLM.
23 | 
24 |         Args:
25 |             configuration_path: Path to the configuration file.
26 |             default_response: Default response to be used if the LLM fails to
27 |               generate a response.
28 | 
29 |         Raises:
30 |             FileNotFoundError: If the configuration file is not found.
31 |             ValueError: If the model or host is not specified in the config.
32 |         """
33 |         super().__init__(default_response)
34 |         if not os.path.exists(configuration_path):
35 |             raise FileNotFoundError(
36 |                 f"Configuration file not found: {configuration_path}"
37 |             )
38 | 
39 |         with open(configuration_path, "r") as f:
40 |             self._llm_configuration = yaml.safe_load(f)
41 | 
42 |         if "model" not in self._llm_configuration:
43 |             raise ValueError(
44 |                 "No model specified in the config, e.g., 'llama2'."
45 |             )
46 |         if "host" not in self._llm_configuration:
47 |             raise ValueError("No host specified in the config.")
48 | 
49 |         self.client = Client(host=self._llm_configuration.get("host"))
50 |         self.model = self._llm_configuration.get("model")
51 |         self._stream = self._llm_configuration.get("stream", False)
52 |         self._llm_options = Options(
53 |             **self._llm_configuration.get("options", {})
54 |         )
55 | 
56 |     def generate_utterance(
57 |         self, prompt: UtteranceGenerationPrompt
58 |     ) -> Utterance:
59 |         """Generates a user utterance given a prompt.
60 | 
61 |         Args:
62 |             prompt: Prompt for generating the utterance.
63 | 
64 |         Returns:
65 |             Utterance in natural language.
66 |         """
67 |         response = self.get_llm_api_response(prompt.prompt_text)
68 |         if response == "":
69 |             response = self.default_response
70 |         response = response.replace("USER: ", "")
71 |         return Utterance(response, participant=DialogueParticipant.USER)
72 | 
73 |     def get_llm_api_response(self, prompt: str) -> str:
74 |         """Gets the raw response from the LLM API.
75 | 
76 |         This method should be used to interact directly with the LLM API, i.e.,
77 |         for everything that is not related to the generation of an utterance.
78 | 
79 |         Args:
80 |             prompt: Prompt for the LLM.
81 |             **kwargs: Additional arguments to be passed to the API call.
82 | 
83 |         Returns:
84 |             Response from the LLM API without any post-processing.
85 |         """
86 |         ollama_response = self.client.generate(
87 |             self.model,
88 |             prompt,
89 |             options=self._llm_options,
90 |             stream=self._stream,
91 |         )
92 |         return ollama_response.get("response", "")
93 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/llm/interfaces/openai_interface.py:
--------------------------------------------------------------------------------
  1 | """Interface to use a LLM served by OpenAI."""
  2 | 
  3 | import os
  4 | import re
  5 | from typing import Dict, List
  6 | 
  7 | import yaml
  8 | from openai import OpenAI
  9 | 
 10 | from dialoguekit.core import Utterance
 11 | from dialoguekit.participant.participant import DialogueParticipant
 12 | from usersimcrs.simulator.llm.interfaces.llm_interface import LLMInterface
 13 | from usersimcrs.simulator.llm.prompt.utterance_generation_prompt import (
 14 |     UtteranceGenerationPrompt,
 15 | )
 16 | 
 17 | 
 18 | class OpenAILLMInterface(LLMInterface):
 19 |     def __init__(
 20 |         self,
 21 |         configuration_path: str,
 22 |         use_chat_api: bool = False,
 23 |         default_response: str = None,
 24 |     ) -> None:
 25 |         """Initializes interface for OpenAI served LLM.
 26 | 
 27 |         Args:
 28 |             configuration_path: Path to the configuration file.
 29 |             use_chat_api: Whether to use the chat or completion API. Defaults to
 30 |               False (i.e., completion API).
 31 |             default_response: Default response to be used if the LLM fails to
 32 |               generate a response.
 33 | 
 34 |         Raises:
 35 |             FileNotFoundError: If the configuration file is not found.
 36 |         """
 37 |         super().__init__(default_response)
 38 | 
 39 |         if not os.path.exists(configuration_path):
 40 |             raise FileNotFoundError(
 41 |                 f"Configuration file not found: {configuration_path}"
 42 |             )
 43 | 
 44 |         with open(configuration_path, "r") as f:
 45 |             self._llm_configuration = yaml.safe_load(f)
 46 | 
 47 |         if "api_key" not in self._llm_configuration:
 48 |             raise ValueError(
 49 |                 "No API key specified in the config, see how to get one at "
 50 |                 "https://platform.openai.com/docs/quickstart/account-setup"
 51 |             )
 52 | 
 53 |         if "model" not in self._llm_configuration:
 54 |             raise ValueError(
 55 |                 "No model specified in the config, see supported models at "
 56 |                 "https://platform.openai.com/docs/models"
 57 |             )
 58 | 
 59 |         self.model = self._llm_configuration.get("model")
 60 |         self._llm_options = self._llm_configuration.get("options", {})
 61 | 
 62 |         self.client = OpenAI(api_key=self._llm_configuration.get("api_key"))
 63 |         self.use_chat_api = use_chat_api
 64 | 
 65 |     def generate_utterance(
 66 |         self, prompt: UtteranceGenerationPrompt
 67 |     ) -> Utterance:
 68 |         """Generates a user utterance given a prompt.
 69 | 
 70 |         Args:
 71 |             prompt: Prompt for generating the utterance.
 72 | 
 73 |         Returns:
 74 |             Utterance in natural language.
 75 |         """
 76 |         response = self.get_llm_api_response(
 77 |             prompt.prompt_text, initial_prompt=prompt.build_new_prompt()
 78 |         )
 79 |         response = response.replace("USER: ", "")
 80 |         return Utterance(response, DialogueParticipant.USER)
 81 | 
 82 |     def _parse_prompt_context(
 83 |         self, prompt_context: str
 84 |     ) -> List[Dict[str, str]]:
 85 |         """Parses the prompt context to a list of messages.
 86 | 
 87 |         Args:
 88 |             prompt_context: Prompt context.
 89 |         """
 90 |         messages = []
 91 |         utterances = prompt_context.split("\n")
 92 |         role_pattern = re.compile(r"^\[(USER|ASSISTANT)\]: (.+)$")
 93 |         for utterance in utterances:
 94 |             match = role_pattern.match(utterance)
 95 |             if match:
 96 |                 role = match.group(1)
 97 |                 text = match.group(2)
 98 |                 messages.append({"role": role.lower(), "content": text})
 99 |         return messages
100 | 
101 |     def get_llm_api_response(
102 |         self, prompt: str, initial_prompt: str = None
103 |     ) -> str:
104 |         """Gets the raw response from the LLM API.
105 | 
106 |         This method should be used to interact directly with the LLM API, i.e.,
107 |         for everything that is not related to the generation of an utterance.
108 | 
109 |         Args:
110 |             prompt: Prompt for the LLM.
111 |             initial_prompt: Initial prompt for the chat API. Defaults to None.
112 | 
113 |         Returns:
114 |             Response from the LLM API without any post-processing.
115 |         """
116 |         if self.use_chat_api:
117 |             messages = [
118 |                 {"role": "system", "content": initial_prompt},
119 |                 *self._parse_prompt_context(prompt),
120 |             ]
121 |             response = (
122 |                 self.client.chat.completions.create(
123 |                     messages=messages, model=self.model, **self._llm_options  # type: ignore[arg-type] # noqa
124 |                 )
125 |                 .choices[0]
126 |                 .message.content
127 |             )
128 |         else:
129 |             response = (
130 |                 self.client.completions.create(
131 |                     model=self.model, prompt=prompt, **self._llm_options
132 |                 )
133 |                 .choices[0]
134 |                 .text
135 |             )
136 | 
137 |         return response
138 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/llm/prompt/prompt.py:
--------------------------------------------------------------------------------
 1 | """Interface for prompt."""
 2 | 
 3 | from abc import ABC, abstractmethod
 4 | 
 5 | from dialoguekit.core.utterance import Utterance
 6 | from dialoguekit.participant.participant import DialogueParticipant
 7 | from usersimcrs.core.information_need import InformationNeed
 8 | from usersimcrs.user_modeling.persona import Persona
 9 | 
10 | 
11 | class Prompt(ABC):
12 |     def __init__(
13 |         self,
14 |         information_need: InformationNeed,
15 |         item_type: str,
16 |         prompt_definition: str,
17 |         persona: Persona = None,
18 |     ) -> None:
19 |         """Initializes the prompt.
20 | 
21 |         Args:
22 |             information_need: The information need of the user.
23 |             item_type: The type of the item to be recommended.
24 |             prompt_definition: The definition of the task to be performed.
25 |             persona: The persona of the user. Defaults to None.
26 |         """
27 |         self.information_need = information_need
28 |         self.item_type = item_type
29 |         self.prompt_definition = prompt_definition
30 |         self.persona = persona
31 |         self._initial_prompt = self.build_new_prompt()
32 |         self._prompt_context = ""
33 | 
34 |     @property
35 |     def prompt_text(self) -> str:
36 |         """Prompt for the user simulator."""
37 |         return self._initial_prompt + "\n" + self._prompt_context
38 | 
39 |     @abstractmethod
40 |     def build_new_prompt(self, **kwargs) -> str:
41 |         """Builds the initial prompt without any context.
42 | 
43 |         Raises:
44 |             NotImplementedError: If the method is not implemented in subclasses.
45 | 
46 |         Returns:
47 |             Initial prompt.
48 |         """
49 |         raise NotImplementedError
50 | 
51 |     def update_prompt_context(
52 |         self, utterance: Utterance, participant: DialogueParticipant
53 |     ) -> None:
54 |         """Updates the context provided in the prompt.
55 | 
56 |         Args:
57 |             utterance: Utterance to be added to the prompt.
58 |             participant: Participant of the conversation.
59 |         """
60 |         role = (
61 |             "ASSISTANT" if participant == DialogueParticipant.AGENT else "USER"
62 |         )
63 | 
64 |         self._prompt_context += f"{role}: {utterance.text}\n"
65 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/llm/prompt/stop_prompt.py:
--------------------------------------------------------------------------------
 1 | """Define the prompt for stopping the conversation."""
 2 | 
 3 | from usersimcrs.core.information_need import InformationNeed
 4 | from usersimcrs.simulator.llm.prompt.prompt import Prompt
 5 | from usersimcrs.user_modeling.persona import Persona
 6 | 
 7 | DEFAULT_STOP_DEFINITION = (
 8 |     "You are a USER discussing with an ASSISTANT to get a recommendation "
 9 |     "meeting your REQUIREMENTS. Given the conversation history, you need to "
10 |     "decide whether to continue the conversation or not. Detect if the "
11 |     "conversation is not progressing towards your goal or if the ASSISTANT is "
12 |     "not helpful. In such cases, you should terminate the conversation by "
13 |     "returning TRUE. Otherwise, return FALSE."
14 | )
15 | 
16 | 
17 | class StopPrompt(Prompt):
18 |     def __init__(
19 |         self,
20 |         information_need: InformationNeed,
21 |         item_type: str,
22 |         prompt_definition: str = DEFAULT_STOP_DEFINITION,
23 |         persona: Persona = None,
24 |     ) -> None:
25 |         """Initializes the prompt.
26 | 
27 |         Args:
28 |             information_need: The information need of the user.
29 |             item_type: The type of the item to be recommended.
30 |             prompt_definition: The definition of the task to be performed.
31 |               Defaults to DEFAULT_STOP_DEFINITION.
32 |             persona: The persona of the user. Defaults to None.
33 |         """
34 |         super().__init__(
35 |             information_need, item_type, prompt_definition, persona
36 |         )
37 | 
38 |     @property
39 |     def prompt_text(self) -> str:
40 |         """Prompt for the user simulator."""
41 |         return (
42 |             self._initial_prompt
43 |             + "\n"
44 |             + self._prompt_context
45 |             + "\n"
46 |             + "CONTINUE: "
47 |         )
48 | 
49 |     def build_new_prompt(self) -> str:
50 |         """Builds the initial prompt without any context.
51 | 
52 |         Returns:
53 |             Initial prompt with task definition, requirements, and persona.
54 |         """
55 |         initial_prompt = self.prompt_definition
56 | 
57 |         if self.persona:
58 |             initial_prompt += (
59 |                 " Take into account your PERSONA when deciding to stop the "
60 |                 "conversation.\n"
61 |             )
62 |             stringified_characteristics = ", ".join(
63 |                 [
64 |                     f"{key}={value}"
65 |                     for key, value in self.persona.characteristics.items()
66 |                 ]
67 |             )
68 |             initial_prompt += f"PERSONA: {stringified_characteristics}\n"
69 | 
70 |         stringified_constraints = ", ".join(
71 |             [
72 |                 f"{key.lower()}={value}"
73 |                 for key, value in self.information_need.constraints.items()
74 |             ]
75 |         )
76 |         requestable_slot = ", ".join(
77 |             [k.lower() for k in self.information_need.requested_slots.keys()]
78 |         )
79 |         initial_prompt += (
80 |             f"\nREQUIREMENTS: You are looking for a {self.item_type} with the "
81 |             f"following characteristics: {stringified_constraints} and want to "
82 |             f"know the following information about it: {requestable_slot}.\n"
83 |             "HISTORY:\n"
84 |         )
85 |         return initial_prompt
86 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/llm/prompt/utterance_generation_prompt.py:
--------------------------------------------------------------------------------
  1 | """Define the prompt for the simulator.
  2 | 
  3 | The structure of the prompt is inspired by the work of Terragni et al. It
  4 | includes the task description, the information need, and optionally a persona
  5 | and an example of a conversation. Unlike the original work, we consider a zero-
  6 | shot setting, i.e., the prompt does not include any examples of conversations.
  7 | 
  8 | Reference: Terragni, S., et al. (2023). "In-Context Learning User Simulators
  9 | for Task-Oriented Dialog Systems", arXiv 2306.00774.
 10 | """
 11 | 
 12 | from dialoguekit.core.utterance import Utterance
 13 | from dialoguekit.participant.participant import DialogueParticipant
 14 | from usersimcrs.core.information_need import InformationNeed
 15 | from usersimcrs.simulator.llm.prompt.prompt import Prompt
 16 | from usersimcrs.user_modeling.persona import Persona
 17 | 
 18 | DEFAULT_TASK_DEFINITION = (
 19 |     "You are a USER discussing with an ASSISTANT. Given the conversation "
 20 |     "history, you need to generate the next USER message in the most natural "
 21 |     "way possible. The conversation is about getting a recommendation "
 22 |     "according to the REQUIREMENTS. You must fulfill all REQUIREMENTS as the "
 23 |     "conversation progresses (you don't need to fulfill them all at once). "
 24 |     "After getting all the necessary information, you can terminate the "
 25 |     "conversation by sending '\\end'. You may also terminate the conversation "
 26 |     "if it is not going anywhere or the ASSISTANT is not helpful by sending "
 27 |     "'\\giveup'. "
 28 | )
 29 | 
 30 | 
 31 | class UtteranceGenerationPrompt(Prompt):
 32 |     def __init__(
 33 |         self,
 34 |         information_need: InformationNeed,
 35 |         item_type: str,
 36 |         prompt_definition: str = DEFAULT_TASK_DEFINITION,
 37 |         persona: Persona = None,
 38 |     ) -> None:
 39 |         """Initializes the prompt.
 40 | 
 41 |         Args:
 42 |             information_need: The information need of the user.
 43 |             item_type: The type of the item to be recommended.
 44 |             prompt_definition: The definition of the task to be performed.
 45 |               Defaults to DEFAULT_TASK_DEFINITION.
 46 |             persona: The persona of the user. Defaults to None.
 47 |         """
 48 |         super().__init__(
 49 |             information_need, item_type, prompt_definition, persona
 50 |         )
 51 | 
 52 |     def build_new_prompt(self) -> str:
 53 |         """Builds the initial prompt without any context.
 54 | 
 55 |         Returns:
 56 |             Initial prompt with task definition, requirements, and persona.
 57 |         """
 58 |         initial_prompt = self.prompt_definition
 59 | 
 60 |         if self.persona:
 61 |             initial_prompt += (
 62 |                 " Adapt your responses considering your PERSONA.\n"
 63 |             )
 64 |             stringified_characteristics = ", ".join(
 65 |                 [
 66 |                     f"{key}={value}"
 67 |                     for key, value in self.persona.characteristics.items()
 68 |                 ]
 69 |             )
 70 |             initial_prompt += f"PERSONA: {stringified_characteristics}\n"
 71 |         else:
 72 |             initial_prompt += (
 73 |                 "Be precise with the REQUIREMENTS, clear and concise.\n"
 74 |             )
 75 | 
 76 |         stringified_constraints = ", ".join(
 77 |             [
 78 |                 f"{key.lower()}={value}"
 79 |                 for key, value in self.information_need.constraints.items()
 80 |             ]
 81 |         )
 82 |         requestable_slot = ", ".join(
 83 |             [k.lower() for k in self.information_need.requested_slots.keys()]
 84 |         )
 85 |         initial_prompt += (
 86 |             f"\nREQUIREMENTS: You are looking for a {self.item_type} with the "
 87 |             f"following characteristics: {stringified_constraints}. Once you "
 88 |             f"find a suitable {self.item_type}, make sure to get the following "
 89 |             f"information: {requestable_slot}.\nHISTORY:\n"
 90 |         )
 91 |         return initial_prompt
 92 | 
 93 |     def update_prompt_context(
 94 |         self, utterance: Utterance, participant: DialogueParticipant
 95 |     ) -> None:
 96 |         """Updates the context provided in the prompt.
 97 | 
 98 |         Args:
 99 |             utterance: Utterance to be added to the prompt.
100 |             participant: Participant of the conversation.
101 |         """
102 |         super().update_prompt_context(utterance, participant)
103 |         if participant == DialogueParticipant.AGENT:
104 |             self._prompt_context += "USER: "
105 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/llm/simple_prompt_user_simulator.py:
--------------------------------------------------------------------------------
 1 | """User simulator leveraging a large language model to generate responses.
 2 | 
 3 | The responses are generated via a single prompt template with a large language
 4 | model.
 5 | """
 6 | 
 7 | from dialoguekit.core.utterance import Utterance
 8 | from dialoguekit.participant import DialogueParticipant
 9 | from usersimcrs.core.simulation_domain import SimulationDomain
10 | from usersimcrs.items.item_collection import ItemCollection
11 | from usersimcrs.simulator.llm.interfaces.llm_interface import LLMInterface
12 | from usersimcrs.simulator.llm.prompt.utterance_generation_prompt import (
13 |     DEFAULT_TASK_DEFINITION,
14 |     UtteranceGenerationPrompt,
15 | )
16 | from usersimcrs.simulator.user_simulator import UserSimulator
17 | from usersimcrs.user_modeling.persona import Persona
18 | 
19 | 
20 | class SinglePromptUserSimulator(UserSimulator):
21 |     def __init__(
22 |         self,
23 |         id: str,
24 |         domain: SimulationDomain,
25 |         item_collection: ItemCollection,
26 |         llm_interface: LLMInterface,
27 |         item_type: str,
28 |         task_definition: str = DEFAULT_TASK_DEFINITION,
29 |         persona: Persona = None,
30 |     ) -> None:
31 |         """Initializes the user simulator.
32 | 
33 |         Args:
34 |             id: User simulator ID.
35 |             llm_interface: Interface to the large language model.
36 |             item_type: Type of the item to be recommended. Defaults to None.
37 |             task_definition: Definition of the task to be performed.
38 |               Defaults to DEFAULT_TASK_DEFINITION.
39 |             persona: Persona of the user. Defaults to None.
40 |         """
41 |         super().__init__(id, domain, item_collection)
42 |         self.llm_interface = llm_interface
43 |         self.prompt = UtteranceGenerationPrompt(
44 |             self.information_need, item_type, task_definition, persona
45 |         )
46 | 
47 |     def _generate_response(self, agent_utterance: Utterance) -> Utterance:
48 |         """Generates response to the agent utterance.
49 | 
50 |         Args:
51 |             agent_utterance: Agent utterance.
52 | 
53 |         Returns:
54 |             User utterance.
55 |         """
56 |         self.prompt.update_prompt_context(
57 |             agent_utterance, DialogueParticipant.AGENT
58 |         )
59 |         user_utterance = self.llm_interface.generate_utterance(self.prompt)
60 |         self.prompt.update_prompt_context(
61 |             user_utterance, DialogueParticipant.USER
62 |         )
63 |         return user_utterance
64 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/neural/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iai-group/UserSimCRS/13f72923d9be0b27472e3f684b2bca4563332c3d/usersimcrs/simulator/neural/core/__init__.py


--------------------------------------------------------------------------------
/usersimcrs/simulator/neural/core/feature_handler.py:
--------------------------------------------------------------------------------
 1 | """Interface to build feature vector for neural-based user simulator."""
 2 | 
 3 | from abc import ABC, abstractmethod
 4 | from typing import List, Tuple, Union
 5 | 
 6 | import torch
 7 | from dialoguekit.core.annotated_utterance import AnnotatedUtterance
 8 | 
 9 | FeatureVector = Union[torch.Tensor, List[int]]
10 | FeatureMask = Union[torch.Tensor, List[bool]]
11 | 
12 | 
13 | class FeatureHandler(ABC):
14 |     @abstractmethod
15 |     def build_input_vector(
16 |         self, utterance: AnnotatedUtterance, **kwargs
17 |     ) -> Tuple[FeatureVector, FeatureMask]:
18 |         """Builds the input vector for a given utterance.
19 | 
20 |         Args:
21 |             utterance: Annotated utterance.
22 |             kwargs: Additional arguments.
23 | 
24 |         Raises:
25 |             NotImplementedError: If not implemented in derived class.
26 | 
27 |         Returns:
28 |             Input vector and mask.
29 |         """
30 |         raise NotImplementedError
31 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/neural/core/transformer.py:
--------------------------------------------------------------------------------
  1 | """Encoder-only transformer model for neural user simulator."""
  2 | 
  3 | import math
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | 
  9 | class PositionalEncoding(nn.Module):
 10 |     def __init__(
 11 |         self,
 12 |         d_model: int,
 13 |         dropout: float = 0.1,
 14 |         max_len: int = 5000,
 15 |         **kwargs,
 16 |     ) -> None:
 17 |         """Initializes positional encoding layer.
 18 | 
 19 |         Args:
 20 |             d_model: Dimension of the model.
 21 |             dropout: Dropout rate. Defaults to 0.1.
 22 |             max_len: Maximum length of the input sequence. Defaults to 5000.
 23 |         """
 24 |         super(PositionalEncoding, self).__init__()
 25 |         self.dropout = nn.Dropout(p=dropout)
 26 | 
 27 |         position = torch.arange(max_len).unsqueeze(1)
 28 |         div_term = torch.exp(
 29 |             torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)
 30 |         )
 31 |         pe = torch.zeros(max_len, 1, d_model)
 32 |         pe[:, 0, 0::2] = torch.sin(position * div_term)
 33 |         pe[:, 0, 1::2] = torch.cos(position * div_term)
 34 |         self.register_buffer("pe", pe)
 35 | 
 36 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
 37 |         """Performs forward pass.
 38 | 
 39 |         Args:
 40 |             x: Input tensor.
 41 | 
 42 |         Returns:
 43 |             Positional encoded tensor.
 44 |         """
 45 |         x = x + self.pe[: x.size(0)]
 46 |         return self.dropout(x)
 47 | 
 48 | 
 49 | class TransformerEncoderModel(nn.Module):
 50 |     def __init__(
 51 |         self,
 52 |         input_dim: int,
 53 |         output_dim: int,
 54 |         nhead: int,
 55 |         hidden_dim: int,
 56 |         num_encoder_layers: int,
 57 |         num_token: int,
 58 |         dropout: float = 0.5,
 59 |     ) -> None:
 60 |         """Initializes a encoder-only transformer model.
 61 | 
 62 |         Args:
 63 |             input_dim: Size of the input vector.
 64 |             output_dim: Size of the output vector.
 65 |             nhead: Number of heads.
 66 |             hidden_dim: Hidden dimension.
 67 |             num_encoder_layers: Number of encoder layers.
 68 |             num_token: Number of tokens in the vocabulary.
 69 |             dropout: Dropout rate. Defaults to 0.5.
 70 |         """
 71 |         super(TransformerEncoderModel, self).__init__()
 72 |         self.d_model = input_dim
 73 | 
 74 |         self.pos_encoder = PositionalEncoding(input_dim, dropout)
 75 |         self.embedding = nn.Embedding(num_token, input_dim)
 76 | 
 77 |         # Encoder layers
 78 |         norm_layer = nn.LayerNorm(input_dim)
 79 |         encoder_layer = nn.TransformerEncoderLayer(
 80 |             d_model=input_dim,
 81 |             nhead=nhead,
 82 |             dim_feedforward=hidden_dim,
 83 |         )
 84 |         self.encoder = nn.TransformerEncoder(
 85 |             encoder_layer,
 86 |             num_layers=num_encoder_layers,
 87 |             norm=norm_layer,
 88 |         )
 89 | 
 90 |         self.linear = nn.Linear(input_dim, output_dim)
 91 |         self.softmax = nn.Softmax(dim=-1)
 92 | 
 93 |         self.init_weights()
 94 | 
 95 |     def init_weights(self) -> None:
 96 |         """Initializes weights of the network."""
 97 |         initrange = 0.1
 98 |         self.embedding.weight.data.uniform_(-initrange, initrange)
 99 |         self.linear.bias.data.zero_()
100 |         self.linear.weight.data.uniform_(-initrange, initrange)
101 | 
102 |     def forward(
103 |         self, src: torch.Tensor, src_mask: torch.Tensor = None
104 |     ) -> torch.Tensor:
105 |         """Performs forward pass.
106 | 
107 |         Args:
108 |             src: Source tensor.
109 |             src_mask: Mask tensor.
110 | 
111 |         Returns:
112 |             Output tensor.
113 |         """
114 |         src = self.embedding(src) * math.sqrt(self.d_model)
115 |         src = self.pos_encoder(src)
116 |         output = self.encoder(src, mask=src_mask)
117 |         output = self.linear(output)
118 |         return output
119 | 


--------------------------------------------------------------------------------
/usersimcrs/simulator/user_simulator.py:
--------------------------------------------------------------------------------
 1 | """User simulator abstract class."""
 2 | 
 3 | from abc import ABC, abstractmethod
 4 | 
 5 | from dialoguekit.core.annotated_utterance import AnnotatedUtterance
 6 | from dialoguekit.core.utterance import Utterance
 7 | from dialoguekit.participant.user import User, UserType
 8 | from usersimcrs.core.information_need import generate_random_information_need
 9 | from usersimcrs.core.simulation_domain import SimulationDomain
10 | from usersimcrs.items.item_collection import ItemCollection
11 | 
12 | 
13 | class UserSimulator(User, ABC):
14 |     def __init__(
15 |         self,
16 |         id: str,
17 |         domain: SimulationDomain,
18 |         item_collection: ItemCollection,
19 |     ) -> None:
20 |         """Initializes the user simulator."""
21 |         super().__init__(id, UserType.SIMULATOR)
22 |         self._domain = domain
23 |         self._item_collection = item_collection
24 |         self.get_new_information_need()
25 | 
26 |     def get_new_information_need(self) -> None:
27 |         """Generates a new information need."""
28 |         self.information_need = generate_random_information_need(
29 |             self._domain, self._item_collection
30 |         )
31 | 
32 |     @abstractmethod
33 |     def _generate_response(self, agent_utterance: Utterance) -> Utterance:
34 |         """Generates response to the agent utterance.
35 | 
36 |         Args:
37 |             agent_utterance: Agent utterance.
38 | 
39 |         Raises:
40 |             NotImplementedError: If not implemented in derived class.
41 | 
42 |         Returns:
43 |             User utterance.
44 |         """
45 |         raise NotImplementedError
46 | 
47 |     def receive_utterance(self, utterance: Utterance) -> None:
48 |         """Gets called every time there is a new agent utterance.
49 | 
50 |         Args:
51 |             utterance: Agent utterance.
52 |         """
53 |         response = self._generate_response(utterance)
54 |         if not isinstance(response, AnnotatedUtterance):
55 |             response = AnnotatedUtterance.from_utterance(response)
56 |         self._dialogue_connector.register_user_utterance(response)
57 | 


--------------------------------------------------------------------------------
/usersimcrs/user_modeling/context_model.py:
--------------------------------------------------------------------------------
 1 | """Context model including multiple context dimensions, e.g., temporal."""
 2 | 
 3 | 
 4 | from typing import Dict
 5 | 
 6 | 
 7 | class ContextModel:
 8 |     _DEFAULT_CONTEXT_PROBABILITIES: Dict[str, Dict[str, float]] = dict()
 9 | 
10 |     def __init__(
11 |         self,
12 |         context_probability_mapping: Dict[
13 |             str, Dict[str, float]
14 |         ] = _DEFAULT_CONTEXT_PROBABILITIES,
15 |     ) -> None:
16 |         """Instantiates a context model.
17 | 
18 |         Args:
19 |             context_probability_mapping: A dictionary with necessary
20 |               probabilities to sample context. If it is not provided, we use
21 |               default values. The dictionary should contain the context
22 |               dimension as the outer key and the corresponding value should be
23 |               another dictionary with key-value pairs for events and the
24 |               respective probability assigned to it. Example structure:
25 |               {
26 |                 temporal: {
27 |                     weekend: 0.50,
28 |                     weekday: 0.50
29 |                 },
30 |                 relational: {
31 |                     group: 0.50,
32 |                     alone: 0.50
33 |                 }
34 |               }
35 |         """
36 |         pass
37 | 
38 |     def sample_context(self):
39 |         """Samples context along each of the dimensions independently.
40 | 
41 |         Args:
42 | 
43 |         Returns:
44 |         """
45 |         pass
46 | 
47 |     def _sample_context_dimension(self, dimension: str):
48 |         """Samples a context along the given dimension.
49 | 
50 |         Args:
51 |             dimension: The dimension which context is to be sampled along.
52 | 
53 |         Returns:
54 |         """
55 |         pass
56 | 


--------------------------------------------------------------------------------
/usersimcrs/user_modeling/persona.py:
--------------------------------------------------------------------------------
 1 | """Persona, which is a profile of the user to represent different backgrounds
 2 | (e.g., age, gender, education), personality types, and behavioral tendencies
 3 | (e.g., patience, conscientiousness, or curiosity)."""
 4 | 
 5 | from dataclasses import dataclass
 6 | from typing import Any, Dict
 7 | 
 8 | 
 9 | @dataclass
10 | class Persona:
11 |     """Represents personal user characteristics as key-value pairs."""
12 | 
13 |     characteristics: Dict[str, Any]
14 | 


--------------------------------------------------------------------------------
/usersimcrs/user_modeling/pkg_preference_model.py:
--------------------------------------------------------------------------------
 1 | """User preference modeling component based on a PKG.
 2 | 
 3 | A personal knowledge graph (PKG), in this particular application context, is a
 4 | knowledge graph that is used to store the preferences of a single individual to
 5 | support the delivery of services that are customized to that individual.
 6 | 
 7 | The implementation of get_item_preference() and get_slot_value_preference()
 8 | depends on the release of the PKG API.
 9 | See: https://github.com/iai-group/UserSimCRS/issues/110
10 | """
11 | 
12 | from usersimcrs.core.simulation_domain import SimulationDomain
13 | from usersimcrs.items.item_collection import ItemCollection
14 | from usersimcrs.items.ratings import Ratings
15 | from usersimcrs.user_modeling.preference_model import PreferenceModel
16 | 
17 | 
18 | class PKGPreferenceModel(PreferenceModel):
19 |     def __init__(
20 |         self,
21 |         domain: SimulationDomain,
22 |         item_collection: ItemCollection,
23 |         historical_ratings: Ratings,
24 |         historical_user_id: str = None,
25 |     ) -> None:
26 |         """Initializes the preference model of a simulated user based on a PKG.
27 | 
28 |         Args:
29 |             domain: Domain.
30 |             item_collection: Item collection.
31 |             historical_ratings: Historical ratings.
32 |             historical_user_id (Optional): If provided, the simulated user is
33 |                 based on this particular historical user; otherwise, it is based
34 |                 on a randomly sampled user. This is mostly added to make the
35 |                 class testable.
36 |         """
37 |         super().__init__(
38 |             domain, item_collection, historical_ratings, historical_user_id
39 |         )
40 |         # TODO: Open connection to PKG.
41 | 
42 |     def get_item_preference(self, item_id: str) -> float:
43 |         """Returns a preference for a given item.
44 | 
45 |         Args:
46 |             item_id: Item ID.
47 | 
48 |         Returns:
49 |             Item preference, which is in [-1,1].
50 | 
51 |         Raises:
52 |             ValueError: If the item does not exist in the collection.
53 |         """
54 |         self._assert_item_exists(item_id)
55 |         # TODO: Query PKG to retrieve item preference.
56 |         preference = None
57 |         return preference
58 | 
59 |     def get_slot_value_preference(self, slot: str, value: str) -> float:
60 |         """Returns a preference on a given slot-value pair.
61 | 
62 |         Args:
63 |             slot: Slot name (needs to exist in the domain).
64 |             value: Slot value.
65 | 
66 |         Returns:
67 |             Slot-value preference, which is in [-1,1].
68 |         """
69 |         self._assert_slot_exists(slot)
70 |         # TODO: Query PKG to retrieve slot-value preference.
71 |         preference = None
72 |         return preference
73 | 


--------------------------------------------------------------------------------
/usersimcrs/user_modeling/simple_preference_model.py:
--------------------------------------------------------------------------------
  1 | """Simple user preference modeling component.
  2 | 
  3 | This class implements the "single item preference" approach in [Zhang & Balog,
  4 | KDD'20].
  5 | 
  6 | - Whenever a user is prompted whether they had seen/consumed a specific item, we
  7 |   answer that based on historical data.
  8 | - Whenever the user is prompted for their preference on a given item or
  9 |   slot-value pair, we provide a positive/negative response by flipping a coin
 10 |   (i.e., either -1 or +1 as the preference).
 11 | - Whenever the user is prompted for a preference on a given slot, a random value
 12 |   among the existing slot values is picked and returned as positive preference.
 13 | 
 14 | The responses given are remembered so that the user would respond the same way
 15 | if they are asked the same question again.
 16 | 
 17 | This approach offers limited consistency. Items that are seen/consumed are
 18 | rooted in real user behavior, but the preferences expressed about them are not.
 19 | Hence, the user might express a preference about a slot that is inconsistent
 20 | with the answers given to other questions (e.g., likes "action" as a genre, but
 21 | has not seen a single action movie).
 22 | """
 23 | 
 24 | import random
 25 | 
 26 | from dialoguekit.participant.user_preferences import UserPreferences
 27 | 
 28 | from usersimcrs.core.simulation_domain import SimulationDomain
 29 | from usersimcrs.items.item_collection import ItemCollection
 30 | from usersimcrs.items.ratings import Ratings
 31 | from usersimcrs.user_modeling.preference_model import (
 32 |     KEY_ITEM_ID,
 33 |     PreferenceModel,
 34 | )
 35 | 
 36 | 
 37 | class SimplePreferenceModel(PreferenceModel):
 38 |     def __init__(
 39 |         self,
 40 |         domain: SimulationDomain,
 41 |         item_collection: ItemCollection,
 42 |         historical_ratings: Ratings,
 43 |         historical_user_id: str = None,
 44 |     ) -> None:
 45 |         """Initializes the simple preference model of a simulated user.
 46 | 
 47 |         Args:
 48 |             domain: Domain.
 49 |             item_collection: Item collection.
 50 |             historical_ratings: Historical ratings.
 51 |             historical_user_id (Optional): If provided, the simulated user is
 52 |               based on this particular historical user; otherwise, it is based
 53 |               on a randomly sampled user. This is mostly added to make the
 54 |               class testable. Defaults to None.
 55 |         """
 56 |         super().__init__(
 57 |             domain, item_collection, historical_ratings, historical_user_id
 58 |         )
 59 | 
 60 |         # Store item and slot-value preferences separately.
 61 |         self._item_preferences = UserPreferences(self._user_id)
 62 |         self._slot_value_preferences = UserPreferences(self._user_id)
 63 | 
 64 |     def get_item_preference(self, item_id: str) -> float:
 65 |         """Returns a preference for a given item.
 66 | 
 67 |         Args:
 68 |             item_id: Item ID.
 69 | 
 70 |         Returns:
 71 |             Randomly chosen preference, which is either -1 or +1.
 72 | 
 73 |         Raises:
 74 |             ValueError: If the item does not exist in the collection.
 75 |         """
 76 |         self._assert_item_exists(item_id)
 77 |         preference = self._item_preferences.get_preference(KEY_ITEM_ID, item_id)
 78 |         if not preference:
 79 |             preference = random.choice([-1, 1])
 80 |             self._item_preferences.set_preference(
 81 |                 KEY_ITEM_ID, item_id, preference
 82 |             )
 83 |         return preference
 84 | 
 85 |     def get_slot_value_preference(self, slot: str, value: str) -> float:
 86 |         """Returns a preference on a given slot-value pair.
 87 | 
 88 |         Args:
 89 |             slot: Slot name (needs to exist in the domain).
 90 |             value: Slot value.
 91 | 
 92 |         Returns:
 93 |             Randomly chosen preference, which is either -1 or +1.
 94 |         """
 95 |         self._assert_slot_exists(slot)
 96 |         preference = self._slot_value_preferences.get_preference(slot, value)
 97 |         if not preference:
 98 |             preference = random.choice([-1, 1])
 99 |             self._slot_value_preferences.set_preference(slot, value, preference)
100 |         return preference
101 | 


--------------------------------------------------------------------------------
/usersimcrs/utils/annotation_converter_rasa.py:
--------------------------------------------------------------------------------
 1 | """Generates Rasa NLU files from the JSON formatted annotated dialogues.
 2 | 
 3 | Usage:
 4 |   $ python -m usersimcrs.utils.annotation_converter_rasa \
 5 |     -source PathToAnnotatedDialoguesFile \
 6 |     -destination PathToDestinationFolder
 7 | """
 8 | 
 9 | import argparse
10 | import os
11 | import sys
12 | 
13 | from dialoguekit.utils.annotation_converter_dialoguekit_to_rasa import (
14 |     AnnotationConverterRasa,
15 | )
16 | 
17 | if __name__ == "__main__":
18 |     parser = argparse.ArgumentParser()
19 |     parser.add_argument(
20 |         "-source", type=str, help="Path to the annotated dialogues file."
21 |     )
22 |     parser.add_argument(
23 |         "-destination", type=str, help="Path to the destination folder."
24 |     )
25 |     args = parser.parse_args()
26 | 
27 |     if not os.path.exists(args.source):
28 |         sys.exit("FileNotFound: {file}".format(file=args.source))
29 |     if not os.path.exists(args.destination):
30 |         sys.exit("FileNotFound: {file}".format(file=args.destination))
31 | 
32 |     converter = AnnotationConverterRasa(args.source, args.destination)
33 |     converter.read_original()
34 |     converter.run()
35 | 


--------------------------------------------------------------------------------