├── .github └── workflows │ └── documentation.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── dialz ├── __init__.py ├── dataset.py ├── datasets │ ├── create │ │ ├── age.json │ │ ├── disability.json │ │ ├── gender.json │ │ ├── question-answer.json │ │ ├── race.json │ │ ├── religion.json │ │ ├── sentence-starters.json │ │ ├── sexuality.json │ │ ├── socioeconomic.json │ │ └── tasks.json │ └── load │ │ ├── coordinate-other-ais.json │ │ ├── corrigible-neutral-HHH.json │ │ ├── hallucination.json │ │ ├── morality.json │ │ ├── myopic-reward.json │ │ ├── refusal.json │ │ ├── stereoset-gender.json │ │ ├── stereoset-profession.json │ │ ├── stereoset-race.json │ │ ├── stereoset-religion.json │ │ ├── survival-instinct.json │ │ └── sycophancy.json ├── score.py ├── vector.py └── visualize.py ├── docs ├── api │ ├── datasets.rst │ ├── score.rst │ ├── vectors.rst │ └── visualize.rst ├── conf.py ├── index.rst └── tutorials │ ├── basic_tutorial.ipynb │ └── datasets_tutorial.ipynb ├── notebooks ├── basic_tutorial.ipynb ├── data │ ├── cm_test.csv │ ├── cm_train_400.csv │ ├── gender_stereotype.csv │ ├── race_stereotype.csv │ └── religion_stereotype.csv ├── datasets_tutorial.ipynb ├── paper_examples.ipynb ├── paper_results.ipynb └── results │ ├── outputs │ ├── run1_gender_outputs.csv │ ├── run1_race_outputs.csv │ ├── run1_religion_outputs.csv │ ├── run2_gender_outputs.csv │ ├── run2_race_outputs.csv │ ├── run2_religion_outputs.csv │ ├── run3_gender_outputs.csv │ ├── run3_race_outputs.csv │ ├── run3_religion_outputs.csv │ ├── run4_gender_outputs.csv │ ├── run4_race_outputs.csv │ ├── run4_religion_outputs.csv │ ├── run5_gender_outputs.csv │ ├── run5_race_outputs.csv │ └── run5_religion_outputs.csv │ └── scores │ ├── run1_gender_scores.csv │ ├── run1_race_scores.csv │ ├── run1_religion_scores.csv │ ├── run2_gender_scores.csv │ ├── run2_race_scores.csv │ ├── run2_religion_scores.csv │ ├── run3_gender_scores.csv │ ├── run3_race_scores.csv │ ├── run3_religion_scores.csv │ ├── run4_gender_scores.csv │ ├── run4_race_scores.csv │ ├── run4_religion_scores.csv │ ├── run5_gender_scores.csv │ ├── run5_race_scores.csv │ └── run5_religion_scores.csv ├── poetry.lock ├── pyproject.toml ├── requirements.txt └── tests └── __init__.py /.github/workflows/documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/.github/workflows/documentation.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/README.md -------------------------------------------------------------------------------- /dialz/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/__init__.py -------------------------------------------------------------------------------- /dialz/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/dataset.py -------------------------------------------------------------------------------- /dialz/datasets/create/age.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/create/age.json -------------------------------------------------------------------------------- /dialz/datasets/create/disability.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/create/disability.json -------------------------------------------------------------------------------- /dialz/datasets/create/gender.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/create/gender.json -------------------------------------------------------------------------------- /dialz/datasets/create/question-answer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/create/question-answer.json -------------------------------------------------------------------------------- /dialz/datasets/create/race.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/create/race.json -------------------------------------------------------------------------------- /dialz/datasets/create/religion.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/create/religion.json -------------------------------------------------------------------------------- /dialz/datasets/create/sentence-starters.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/create/sentence-starters.json -------------------------------------------------------------------------------- /dialz/datasets/create/sexuality.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/create/sexuality.json -------------------------------------------------------------------------------- /dialz/datasets/create/socioeconomic.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/create/socioeconomic.json -------------------------------------------------------------------------------- /dialz/datasets/create/tasks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/create/tasks.json -------------------------------------------------------------------------------- /dialz/datasets/load/coordinate-other-ais.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/coordinate-other-ais.json -------------------------------------------------------------------------------- /dialz/datasets/load/corrigible-neutral-HHH.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/corrigible-neutral-HHH.json -------------------------------------------------------------------------------- /dialz/datasets/load/hallucination.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/hallucination.json -------------------------------------------------------------------------------- /dialz/datasets/load/morality.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/morality.json -------------------------------------------------------------------------------- /dialz/datasets/load/myopic-reward.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/myopic-reward.json -------------------------------------------------------------------------------- /dialz/datasets/load/refusal.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/refusal.json -------------------------------------------------------------------------------- /dialz/datasets/load/stereoset-gender.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/stereoset-gender.json -------------------------------------------------------------------------------- /dialz/datasets/load/stereoset-profession.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/stereoset-profession.json -------------------------------------------------------------------------------- /dialz/datasets/load/stereoset-race.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/stereoset-race.json -------------------------------------------------------------------------------- /dialz/datasets/load/stereoset-religion.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/stereoset-religion.json -------------------------------------------------------------------------------- /dialz/datasets/load/survival-instinct.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/survival-instinct.json -------------------------------------------------------------------------------- /dialz/datasets/load/sycophancy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/datasets/load/sycophancy.json -------------------------------------------------------------------------------- /dialz/score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/score.py -------------------------------------------------------------------------------- /dialz/vector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/vector.py -------------------------------------------------------------------------------- /dialz/visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/dialz/visualize.py -------------------------------------------------------------------------------- /docs/api/datasets.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/docs/api/datasets.rst -------------------------------------------------------------------------------- /docs/api/score.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/docs/api/score.rst -------------------------------------------------------------------------------- /docs/api/vectors.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/docs/api/vectors.rst -------------------------------------------------------------------------------- /docs/api/visualize.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/docs/api/visualize.rst -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/tutorials/basic_tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/docs/tutorials/basic_tutorial.ipynb -------------------------------------------------------------------------------- /docs/tutorials/datasets_tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/docs/tutorials/datasets_tutorial.ipynb -------------------------------------------------------------------------------- /notebooks/basic_tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/basic_tutorial.ipynb -------------------------------------------------------------------------------- /notebooks/data/cm_test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/data/cm_test.csv -------------------------------------------------------------------------------- /notebooks/data/cm_train_400.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/data/cm_train_400.csv -------------------------------------------------------------------------------- /notebooks/data/gender_stereotype.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/data/gender_stereotype.csv -------------------------------------------------------------------------------- /notebooks/data/race_stereotype.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/data/race_stereotype.csv -------------------------------------------------------------------------------- /notebooks/data/religion_stereotype.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/data/religion_stereotype.csv -------------------------------------------------------------------------------- /notebooks/datasets_tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/datasets_tutorial.ipynb -------------------------------------------------------------------------------- /notebooks/paper_examples.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/paper_examples.ipynb -------------------------------------------------------------------------------- /notebooks/paper_results.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/paper_results.ipynb -------------------------------------------------------------------------------- /notebooks/results/outputs/run1_gender_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run1_gender_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run1_race_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run1_race_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run1_religion_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run1_religion_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run2_gender_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run2_gender_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run2_race_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run2_race_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run2_religion_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run2_religion_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run3_gender_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run3_gender_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run3_race_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run3_race_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run3_religion_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run3_religion_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run4_gender_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run4_gender_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run4_race_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run4_race_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run4_religion_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run4_religion_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run5_gender_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run5_gender_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run5_race_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run5_race_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/outputs/run5_religion_outputs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/outputs/run5_religion_outputs.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run1_gender_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run1_gender_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run1_race_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run1_race_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run1_religion_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run1_religion_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run2_gender_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run2_gender_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run2_race_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run2_race_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run2_religion_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run2_religion_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run3_gender_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run3_gender_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run3_race_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run3_race_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run3_religion_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run3_religion_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run4_gender_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run4_gender_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run4_race_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run4_race_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run4_religion_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run4_religion_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run5_gender_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run5_gender_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run5_race_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run5_race_scores.csv -------------------------------------------------------------------------------- /notebooks/results/scores/run5_religion_scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/notebooks/results/scores/run5_religion_scores.csv -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardiffnlp/dialz/HEAD/requirements.txt -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # to do --------------------------------------------------------------------------------