├── .flake8 ├── .github └── workflows │ └── python-lint.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── assets ├── XH.jpeg ├── cmd_example.gif ├── css │ ├── app.min.css │ ├── factcheck.css │ ├── images │ │ ├── cover.jpg │ │ └── pattern.png │ └── vendor.min.css ├── js │ ├── app.min.js │ └── vendor.min.js ├── librai_librai.png ├── online_screenshot.png ├── web_input.png ├── web_result.png └── webfonts │ ├── fa-brands-400.ttf │ ├── fa-brands-400.woff2 │ ├── fa-regular-400.ttf │ ├── fa-regular-400.woff2 │ ├── fa-solid-900.ttf │ ├── fa-solid-900.woff2 │ ├── fa-v4compatibility.ttf │ └── fa-v4compatibility.woff2 ├── demo_data ├── image.webp ├── speech.mp3 ├── text.txt └── video.m4v ├── docs ├── README.md ├── RELEASE_LOG.md ├── development_guide.md └── user_guide.md ├── factcheck ├── __init__.py ├── __main__.py ├── config │ ├── api_config.yaml │ └── sample_prompt.yaml ├── core │ ├── CheckWorthy.py │ ├── ClaimVerify.py │ ├── Decompose.py │ ├── QueryGenerator.py │ ├── Retriever │ │ ├── __init__.py │ │ ├── base.py │ │ ├── google_retriever.py │ │ └── serper_retriever.py │ └── __init__.py └── utils │ ├── api_config.py │ ├── data_class.py │ ├── llmclient │ ├── __init__.py │ ├── base.py │ ├── claude_client.py │ ├── gpt_client.py │ └── local_openai_client.py │ ├── logger.py │ ├── multimodal.py │ ├── prompt │ ├── __init__.py │ ├── base.py │ ├── chatgpt_prompt.py │ ├── chatgpt_prompt_zh.py │ ├── claude_prompt.py │ └── customized_prompt.py │ ├── utils.py │ └── web_util.py ├── poetry.lock ├── pyproject.toml ├── requirements.txt ├── script ├── minimal_test.py ├── minimal_test_en.json └── minimal_test_zh.json ├── templates ├── LibrAI_fc.html └── input.html └── webapp.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203, E266, E501, W503, F403, F401, C901 3 | max-line-length = 127 4 | max-complexity = 10 5 | select = B,C,E,F,W,T4,B9 6 | -------------------------------------------------------------------------------- /.github/workflows/python-lint.yml: -------------------------------------------------------------------------------- 1 | name: Python Linting 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | - master 8 | 9 | jobs: 10 | flake8: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Python 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: '3.8' 20 | 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install flake8 25 | 26 | - name: Pre-Commit 27 | uses: pre-commit/action@v3.0.0 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.pyc 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | *.log 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | *.log.* 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # poetry 102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 106 | #poetry.lock 107 | 108 | # pdm 109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 110 | #pdm.lock 111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 112 | # in version control. 113 | # https://pdm.fming.dev/#use-with-ide 114 | .pdm.toml 115 | 116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 117 | __pypackages__/ 118 | 119 | # Celery stuff 120 | celerybeat-schedule 121 | celerybeat.pid 122 | 123 | # SageMath parsed files 124 | *.sage.py 125 | 126 | # Environments 127 | .env 128 | .venv 129 | env/ 130 | venv/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | 150 | # Pyre type checker 151 | .pyre/ 152 | 153 | # pytype static type analyzer 154 | .pytype/ 155 | 156 | # Cython debug symbols 157 | cython_debug/ 158 | debug.ipynb 159 | 160 | # PyCharm 161 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 162 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 163 | # and can be added to the global gitignore or merged into this file. For a more nuclear 164 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 165 | #.idea/ 166 | # gunicorn log 167 | server_run/*.log 168 | test_data/ 169 | log/ 170 | # Mac 171 | *.DS_Store 172 | 173 | # lark key and utils 174 | factcheck/utils/lark.py 175 | # openai_key_file 176 | factcheck/utils/openai_key.py 177 | factcheck/config/secret_dict.py 178 | 179 | # gptcache file 180 | data_map.txt 181 | .gptcache_data_map.txt 182 | dump.rdb 183 | faiss.index 184 | 185 | # test data 186 | demo_data/test_api_config.yaml 187 | assets/response.json 188 | api_config.yaml 189 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Ignore test linting to avoid conflicting changes to version stability. 2 | exclude: | 3 | (?x)^( 4 | factcheck/utils/prompt/| 5 | demo_data/| 6 | templates/| 7 | assets/| 8 | fig/ 9 | ) 10 | repos: 11 | - repo: https://github.com/pre-commit/pre-commit-hooks 12 | rev: v4.1.0 13 | hooks: 14 | - id: check-added-large-files 15 | - id: check-ast 16 | - id: check-byte-order-marker 17 | - id: check-case-conflict 18 | - id: check-json 19 | - id: check-merge-conflict 20 | - id: check-symlinks 21 | - id: check-yaml 22 | args: ["--unsafe"] 23 | - id: destroyed-symlinks 24 | - id: detect-private-key 25 | - id: end-of-file-fixer 26 | - id: requirements-txt-fixer 27 | - id: trailing-whitespace 28 | - id: fix-encoding-pragma 29 | args: [--remove] 30 | - id: mixed-line-ending 31 | args: [--fix=lf] 32 | - repo: https://github.com/psf/black 33 | rev: 23.12.1 34 | hooks: 35 | - id: black 36 | args: [--line-length=127] 37 | - repo: https://github.com/pycqa/flake8 38 | rev: 7.0.0 39 | hooks: 40 | - id: flake8 41 | - repo: https://github.com/codespell-project/codespell 42 | rev: v2.2.4 43 | hooks: 44 | - id: codespell 45 | exclude: > 46 | (?x)^( 47 | .*\.json|.*\.ipynb 48 | )$ 49 | args: [--check-filenames, -L=fpt, '--ignore-regex=.{1024}|.*codespell-ignore.*'] 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 LibrAI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |
3 | 4 | LibrAI Logo 5 | 6 |
7 |
8 | 9 | # Loki: An Open-source Tool for Fact Verification 10 | 11 | ## Overview 12 | Loki is our open-source solution designed to automate the process of verifying factuality. It provides a comprehensive pipeline for dissecting long texts into individual claims, assessing their worthiness for verification, generating queries for evidence search, crawling for evidence, and ultimately verifying the claims. This tool is especially useful for journalists, researchers, and anyone interested in the factuality of information. To stay updated, please subscribe to our newsletter at [our website](https://www.librai.tech/) or join us on [Discord](https://discord.gg/ssxtFVbDdT)! 13 | 14 | 15 | ## Quick Start 16 | 17 | ### Clone the repository and navigate to the project directory 18 | ```bash 19 | git clone https://github.com/Libr-AI/OpenFactVerification.git 20 | cd OpenFactVerification 21 | ``` 22 | 23 | ### Installation with poetry (option 1) 24 | 1. Install Poetry by following it [installation guideline](https://python-poetry.org/docs/). 25 | 2. Install all dependencies by running: 26 | ```bash 27 | poetry install 28 | ``` 29 | 30 | ### Installation with pip (option 2) 31 | 1. Create a Python environment at version 3.9 or newer and activate it. 32 | 33 | 2. Navigate to the project directory and install the required packages: 34 | ```bash 35 | pip install -r requirements.txt 36 | ``` 37 | 38 | ### Configure API keys 39 | 40 | You can choose to export essential api key to the environment 41 | 42 | - Example: Export essential api key to the environment 43 | ```bash 44 | export SERPER_API_KEY=... # this is required in evidence retrieval if serper being used 45 | export OPENAI_API_KEY=... # this is required in all tasks 46 | ``` 47 | 48 | Alternatively, you configure API keys via a YAML file, see [user guide](docs/user_guide.md) for more details. 49 | 50 | A sample test case: 51 |
52 | drawing 53 |
54 | 55 | ## Usage 56 | 57 | The main interface of Loki fact-checker located in `factcheck/__init__.py`, which contains the `check_response` method. This method integrates the complete fact verification pipeline, where each functionality is encapsulated in its class as described in the Features section. 58 | 59 | #### Used as a Library 60 | 61 | ```python 62 | from factcheck import FactCheck 63 | 64 | factcheck_instance = FactCheck() 65 | 66 | # Example text 67 | text = "Your text here" 68 | 69 | # Run the fact-check pipeline 70 | results = factcheck_instance.check_response(text) 71 | print(results) 72 | ``` 73 | 74 | #### Used as a Web App 75 | ```bash 76 | python webapp.py --api_config demo_data/api_config.yaml 77 | ``` 78 | 79 | #### Multimodal Usage 80 | 81 | ```bash 82 | # String 83 | python -m factcheck --modal string --input "MBZUAI is the first AI university in the world" 84 | # Text 85 | python -m factcheck --modal text --input demo_data/text.txt 86 | # Speech 87 | python -m factcheck --modal speech --input demo_data/speech.mp3 88 | # Image 89 | python -m factcheck --modal image --input demo_data/image.webp 90 | # Video 91 | python -m factcheck --modal video --input demo_data/video.m4v 92 | ``` 93 | 94 | 95 | #### Customize Your Experience 96 | For advanced usage, please see our [user guide](docs/user_guide.md). 97 | 98 | ## [Try Our Online Service](https://aip.librai.tech/login) 99 | 100 | 101 | 102 | As we continue to evolve and enhance our fact-checking solution, we're excited to invite you to become an integral part of our journey. By registering for our Supporter Edition, you're not just unlocking a suite of advanced features and benefits; you're also fueling the future of trustworthy information. 103 | 104 | 105 | Below is a screenshot of our online service. 106 | [Click here to try it now!](https://aip.librai.tech/login) 107 | 108 |
109 | drawing 110 |
111 | 112 | 138 | 139 | 140 | 141 | ## Contributing to Loki project 142 | 143 | Welcome and thank you for your interest in the Loki project! We welcome contributions and feedback from the community. To get started, please refer to our [Contribution Guidelines](https://github.com/Libr-AI/OpenFactVerification/tree/main/docs/CONTRIBUTING.md). 144 | 145 | ### Acknowledgments 146 | - Special thanks to all contributors who have helped in shaping this project. 147 | 148 | 151 | 152 | 153 | ### Stay Connected and Informed 154 | 155 | Don’t miss out on the latest updates, feature releases, and community insights! We invite you to subscribe to our newsletter and become a part of our growing community. 156 | 157 | 💌 Subscribe now at [our website](https://www.librai.tech/)! 158 | 159 | 160 | 161 | ## Star History 162 | 163 | > [![Star History Chart](https://api.star-history.com/svg?repos=Libr-AI/OpenFactVerification&type=Date)](https://star-history.com/#Libr-AI/OpenFactVerification&Date) 164 | 165 | ## Cite as 166 | ``` 167 | @misc{li2024lokiopensourcetoolfact, 168 | title={Loki: An Open-Source Tool for Fact Verification}, 169 | author={Haonan Li and Xudong Han and Hao Wang and Yuxia Wang and Minghan Wang and Rui Xing and Yilin Geng and Zenan Zhai and Preslav Nakov and Timothy Baldwin}, 170 | year={2024}, 171 | eprint={2410.01794}, 172 | archivePrefix={arXiv}, 173 | primaryClass={cs.CL}, 174 | url={https://arxiv.org/abs/2410.01794}, 175 | } 176 | ``` 177 | -------------------------------------------------------------------------------- /assets/XH.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/XH.jpeg -------------------------------------------------------------------------------- /assets/cmd_example.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/cmd_example.gif -------------------------------------------------------------------------------- /assets/css/factcheck.css: -------------------------------------------------------------------------------- 1 | .controversial-span { 2 | padding: 2px; 3 | text-decoration: underline wavy rgba(255, 197, 61, 0.996); 4 | border-bottom: 2px solid rgba(255, 197, 61, 0.996); 5 | cursor: pointer; 6 | } 7 | 8 | .controversial-span:hover { 9 | padding: 2px; 10 | text-decoration: underline wavy rgba(255, 197, 61, 0.996); 11 | border-bottom: 2px solid rgba(255, 197, 61, 0.996); 12 | background: rgb(217, 217, 217); 13 | cursor: pointer; 14 | } 15 | 16 | .refutes-span { 17 | padding: 2px; 18 | text-decoration: underline wavy rgba(209, 2, 0, 0.533); 19 | border-bottom: 2px solid rgba(209, 2, 0, 0.533); 20 | cursor: pointer; 21 | } 22 | 23 | .refutes-span:hover { 24 | padding: 2px; 25 | text-decoration: underline wavy rgba(209, 2, 0, 0.533); 26 | border-bottom: 2px solid rgba(209, 2, 0, 0.533); 27 | background: rgb(217, 217, 217); 28 | cursor: pointer; 29 | } 30 | 31 | .support-span { 32 | padding: 2px; 33 | text-decoration: none; 34 | border-bottom: 2px solid rgba(255, 197, 61, 0); 35 | cursor: pointer; 36 | } 37 | 38 | .support-span:hover { 39 | padding: 2px; 40 | text-decoration: none; 41 | border-bottom: 2px solid rgba(255, 197, 61, 0); 42 | background: rgb(217, 217, 217); 43 | cursor: pointer; 44 | } 45 | -------------------------------------------------------------------------------- /assets/css/images/cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/css/images/cover.jpg -------------------------------------------------------------------------------- /assets/css/images/pattern.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/css/images/pattern.png -------------------------------------------------------------------------------- /assets/librai_librai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/librai_librai.png -------------------------------------------------------------------------------- /assets/online_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/online_screenshot.png -------------------------------------------------------------------------------- /assets/web_input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/web_input.png -------------------------------------------------------------------------------- /assets/web_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/web_result.png -------------------------------------------------------------------------------- /assets/webfonts/fa-brands-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/webfonts/fa-brands-400.ttf -------------------------------------------------------------------------------- /assets/webfonts/fa-brands-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/webfonts/fa-brands-400.woff2 -------------------------------------------------------------------------------- /assets/webfonts/fa-regular-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/webfonts/fa-regular-400.ttf -------------------------------------------------------------------------------- /assets/webfonts/fa-regular-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/webfonts/fa-regular-400.woff2 -------------------------------------------------------------------------------- /assets/webfonts/fa-solid-900.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/webfonts/fa-solid-900.ttf -------------------------------------------------------------------------------- /assets/webfonts/fa-solid-900.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/webfonts/fa-solid-900.woff2 -------------------------------------------------------------------------------- /assets/webfonts/fa-v4compatibility.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/webfonts/fa-v4compatibility.ttf -------------------------------------------------------------------------------- /assets/webfonts/fa-v4compatibility.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/assets/webfonts/fa-v4compatibility.woff2 -------------------------------------------------------------------------------- /demo_data/image.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/demo_data/image.webp -------------------------------------------------------------------------------- /demo_data/speech.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/demo_data/speech.mp3 -------------------------------------------------------------------------------- /demo_data/text.txt: -------------------------------------------------------------------------------- 1 | University (SDU) is a prestigious and comprehensive research university located in Jinan, the capital city of Shandong Province, China. It is one of the oldest and largest universities in China, with a rich history dating back to the 19th century.Founded in 1901, Shandong University has evolved into a multi-disciplinary institution offering a wide range of academic programs across various fields, including natural sciences, engineering, humanities, social sciences, medicine, management, and more. The university is committed to excellence in education, research, and innovation, with a mission to contribute to the advancement of knowledge and the development of society. 2 | -------------------------------------------------------------------------------- /demo_data/video.m4v: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Libr-AI/OpenFactVerification/6e1ee9e5159abe544c342689093afe12b908a419/demo_data/video.m4v -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # OpenFactVerification Documentation 2 | 3 | Welcome to the OpenFactVerification (Loki) documentation! This repository contains the codebase for the Loki project, which is a fact-checking pipeline that leverages state-of-the-art language models to verify the veracity of textual claims. The pipeline is designed to be modular, allowing users to easily customize the evidence retrieval, language model, and prompt used in the fact-checking process. 4 | 5 | ## Related Documents 6 | 7 | * For users who want to try advanced features, please refer to the [User Guide](https://github.com/Libr-AI/OpenFactVerification/tree/main/docs/user_guide.md). 8 | 9 | * For developers who want to contribute to the project, please go to the [How-to-contribute](#how-to-contribute) section, and also [Development Guide](https://github.com/Libr-AI/OpenFactVerification/tree/main/docs/development_guide.md). 10 | 11 | 12 | ## How to Contribute 13 | We welcome contributions and feedback from the community and recommend a few best practices to make your contributions or reported errors easier to assist with. 14 | 15 | ### For Pull Requests 16 | 17 | * PRs should be titled descriptively, and be opened with a brief description of the scope and intent of the new contribution. 18 | * New features should have appropriate documentation added alongside them. 19 | * Aim for code maintainability, and minimize code copying. 20 | 21 | * Please make sure the code style is checked and aligned, see [Code Style](#code-style) for more details. 22 | 23 | ### For Feature Requests 24 | 25 | * Provide a short paragraph's worth of description. What is the feature you are requesting? What is its motivation, and an example use case of it? How does this differ from what is currently supported? 26 | 27 | ### For Bug Reports 28 | 29 | * Provide a short description of the bug. 30 | * Provide a reproducible example--what is the command you run with our library that results in this error? Have you tried any other steps to resolve it? 31 | * Provide a full error traceback of the error that occurs, if applicable. A one-line error message or small screenshot snippet is unhelpful without the surrounding context. 32 | * Note what version of the codebase you are using, and any specifics of your environment and setup that may be relevant. 33 | 34 | ## Code Style 35 | 36 | Loki uses [black](https://github.com/psf/black) and [flake8](https://pypi.org/project/flake8/) to enforce code style, via [pre-commit](https://pre-commit.com/). Before submitting a pull request, please run the following commands to ensure your code is properly formatted: 37 | 38 | ```bash 39 | pip install pre-commit 40 | pre-commit install 41 | pre-commit run --all-files 42 | ``` 43 | 44 | ## How Can I Get Involved? 45 | 46 | There are a number of distinct ways to contribute to Loki: 47 | 48 | * Implement new features or fix bugs by submitting a pull request: If you want to use a new model or retriever, or if you have an idea for a new feature, we would love to see your contributions. 49 | * We have our [development plan](https://github.com/Libr-AI/OpenFactVerification/tree/main/docs/development_plan.md) that outlines the roadmap for the project. If you are interested in contributing to any of the tasks, please join our [Discord](https://discord.gg/ssxtFVbDdT) and direct message to @Haonan Li. 50 | 51 | We hope you find this project interesting and would like to contribute to it. If you have any questions, please feel free to reach out to us on our [Discord](https://discord.gg/ssxtFVbDdT). 52 | -------------------------------------------------------------------------------- /docs/RELEASE_LOG.md: -------------------------------------------------------------------------------- 1 | # Release Log 2 | 3 | ## v0.0.3 4 | 5 | ### New Features 6 | 1. **Keep Original Text:** Add the mapping from each claim to the position in the original text. Add `restore_claims` function to **decomposer**, to restore the decomposed claims to the original user input. 7 | 2. **Data Structure:** Define the data structure for several intermedia processing function and final output in `utils/data_class.py`. 8 | 3. **Speed Up:** Parallel the `restore_claims`, `identify_checkworthiness` and `query_generation` functions to speed up the pipeline. 9 | 4. **Token Count:** Add the token count for all component. 10 | 5. **Evidence-wise Verification:** Change the verification logic from input all evidence together within a single LLM call, to verify the claim by each evidence for each LLM call. 11 | 6. **Factuality Value:** Remove the deterministic output, change the factuality to a number in range [0,1], calculated by the judgement with each simple evidence. 12 | 7. **Webpage:** Redesign the webpage. 13 | 8. **Default LLM:** Change to GPT-4o. 14 | 15 | ### Bug fixed 16 | 1. **Serper Max Queries:** Serper API allows max of 100 queries in one request, we split the queries into multiple requests if the number of queries exceeds 100. 17 | 2. **Evidence and URL:** Link each evidence to the corresponding URL. 18 | 19 | ## v0.0.2 20 | 21 | ### New Features 22 | 1. **API Key Handling:** Transitioned from creating key files via copying to dynamically reading all API keys from a YAML file, streamlining configuration processes. 23 | 2. **Unified Configuration Dictionary:** Replaced platform-specific dictionaries with a unified dictionary that aligns with environmental variable naming conventions, enhancing consistency and maintainability. 24 | 3. **Model Switching:** Introduced a `--model` parameter that allows switching between different models, currently supporting OpenAI and Anthropic. 25 | 4. **Modular Architecture:** Restructured the codebase into one Base class file and individual class files for each model, enhancing modularity and clarity. 26 | 5. **Base Class Redefinition:** Redefined the Base class to abstract asynchronous operations and other functionalities. Users customizing models need only override three functions. 27 | 6. **Prompt Switching:** Added a `--prompt` parameter for switching between predefined prompts, initially supporting prompts for OpenAI and Anthropic. 28 | 7. **Prompt Definitions via YAML and JSON:** Enabled prompt definitions using YAML and JSON, allowing prompts to be automatically read from corresponding YAML or JSON files when the prompt parameter ends with `.yaml` or `.json`. 29 | 8. **Search Engine Switching:** Introduced a `--retriever` parameter to switch between different search engines, currently supporting Serper and Google. 30 | 9. **Webapp Frontend Optimization:** Optimized the web application frontend to prevent duplicate requests during processing, including disabling the submit button after a click and displaying a timer during processing. 31 | 10. **Client Switching:** introduce a `--client` parameter that allows switching between different client (chat API), currently support OpenAI compatible API (for local model and official model), and Anthropic chat API client. 32 | 33 | 34 | 35 | ## v0.0.1 36 | 37 | Initial release of Loki. 38 | -------------------------------------------------------------------------------- /docs/development_guide.md: -------------------------------------------------------------------------------- 1 | # Development Guide 2 | 3 | This documentation page provides a guide for developers to want to contribute to the Loki project, for versions v0.0.3 and later. 4 | 5 | - [Development Guide](#development-guide) 6 | - [Framework Introduction](#framework-introduction) 7 | - [Development Plan](#development-plan) 8 | 9 | 10 | ## Framework Introduction 11 | 12 | Loki leverage state-of-the-art language models to verify the veracity of textual claims. The pipeline is designed to be modular in `factcheck/core/`, which include the following components: 13 | 14 | - **Decomposer:** Breaks down extensive texts into digestible, independent claims, setting the stage for detailed analysis. As well as provide the mapping between the original text and the decomposed claims. 15 | - **Checkworthy:** Assesses each claim's potential checkworthiness, filtering out vague or ambiguous statements, as well as the statement of opinion. For example, vague claims like "MBZUAI has a vast campus" are considered unworthy because of the ambiguous nature of "vast." 16 | - **Query Generator:** Transforms check-worthy claims into precise queries, ready to navigate the vast expanse of the internet in search of evidences. 17 | - **Evidence Retriever:** Retrieve relevant evidence that forms the foundation of informed verification, currently, for open-domain questions, we now use the google search (Serper API). 18 | - **ClaimVerify:** Judges each evidence against the claim, determining it is supporting, refuting, or irrelevant. 19 | 20 | To support each component's functionality, Loki relies on the following utils: 21 | - **Language Model:** Currently, 4 out of 5 components (including: Decomposer, Checkworthy, Query Generator, and ClaimVerify) use the language model (LLMs) to perform their tasks. The supported LLMs are defined in `factcheck/core/utils/llmclient/` and can be easily extended to support more LLMs. 22 | - **Prompt:** The prompt is a crucial part of the LLMs, and is usually optimized for each LLM to achieve the best performance. The prompt is defined in `factcheck/core/utils/prompt/` and can be easily extended to support more prompts. 23 | 24 | ### Support a New LLM Client 25 | 26 | A new LLM should be defined in `factcheck/core/utils/llmclient/` and should be a subclass of `BaseClient` from `factcheck/core/utils/llmclient/base.py`. The LLM should implement the `_call` method, which take a single string input and return a string output. 27 | 28 | > **_Note_:** 29 | > To ensure the sanity of the pipeline, the output of the LLM should be a compiled-code-based string, which can be directly parsed by python `eval` method. Usually, the output should be a `list` or `dict` in the form of a string. 30 | 31 | We find that ChatGPT [json_mode](https://platform.openai.com/docs/guides/text-generation/json-mode) is a good choice for the LLM, as it can generate structured output. 32 | To support a new LLM, you may need to implement a post-processing to convert the output of the LLM to a structured format. 33 | 34 | ### Support a New Search Engine (Retriever) 35 | 36 | Evidence retriever should be defined in `factcheck/core/Retriever/` and should be a subclass of `EvidenceRetriever` from `factcheck/core/Retriever/base.py`. The retriever should implement the `retrieve_evidence` method. 37 | 38 | ### Support a New Language 39 | 40 | To support a new language, you need to create a new file in `factcheck/utils/prompt/` with the name `_prompt_.py`. For example, to create a prompt suite for ChatGPT in Chinese, you can create a file named `chatgpt_prompt_zh.py`. 41 | 42 | The prompt file should contains a class which is a subclass of `BasePrompt` from `factcheck/core/utils/prompt/base.py`, and been registered in `factcheck/utils/prompt/__init__.py`. 43 | 44 | 45 | ### Prompt Optimization 46 | 47 | To optimize the prompt for a specific LLM, you can modify the prompt in `factcheck/utils/prompt/`. After optimization, you can run our minimal test in `script/minimal_test.py`, you are also welcomed to add more test cases to the minimal test set in `script/minimal_test.json`. 48 | 49 | 50 | 51 | ## Development Plan 52 | 53 | As Loki continues to evolve, our development plan focuses on broadening capabilities and enhancing flexibility to meet the diverse needs of our users. Here are the key areas we are working on: 54 | 55 | ### 1. Support for Multiple Models 56 | - **Broader Model Compatibility:** 57 | - Integration with leading AI models besides ChatGPT and Claude to diversify fact-checking capabilities, including Command R and Gemini. 58 | - Implementation of self-hosted model options for enhanced privacy and control, e.g., FastChat, TGI, and vLLM. 59 | 60 | ### 2. Model-specific Prompt Engineering 61 | - **Unit Testing for Prompts:** 62 | - Develop robust unit tests for each step to ensure prompt reliability and accuracy across different scenarios. 63 | 64 | ### 3. Expanded Search Engine Support 65 | - **Diverse Search Engines:** 66 | - Incorporate a variety of search engines including Bing, scraperapi to broaden search capabilities. 67 | - Integration with [Searxng](https://github.com/searxng/searxng), an open-source metasearch engine. 68 | - Support for specialized indexes like LlamaIndex and Langchain, and the ability to search local documents. 69 | 70 | ### 4. Deployment and Scalability 71 | - **Dockerization:** 72 | - Packaging Loki into Docker containers to simplify deployment and scale-up operations, ensuring Loki can be easily set up and maintained across different environments. 73 | 74 | ### 5. Multi-lingual Support 75 | - **Language Expansion:** 76 | - Support for additional languages beyond English, including Chinese, Arabic, etc, to cater to a global user base. 77 | 78 | 79 | We are committed to these enhancements to make Loki not just more powerful, but also more adaptable to the needs of a global user base. Stay tuned as we roll out these exciting developments! 80 | -------------------------------------------------------------------------------- /docs/user_guide.md: -------------------------------------------------------------------------------- 1 | # User Guide 2 | 3 | - [User Guide](#user-guide) 4 | - [Installation](#installation) 5 | - [Clone the repository and navigate to the project directory](#clone-the-repository-and-navigate-to-the-project-directory) 6 | - [Installation with poetry (option 1)](#installation-with-poetry-option-1) 7 | - [Installation with pip (option 2)](#installation-with-pip-option-2) 8 | - [Configure API Keys](#configure-api-keys) 9 | - [Environment Variables](#environment-variables) 10 | - [Configuration Files](#configuration-files) 11 | - [Additional API Configurations](#additional-api-configurations) 12 | - [Basic Usage](#basic-usage) 13 | - [Used in Command Line](#used-in-command-line) 14 | - [Used as a Library](#used-as-a-library) 15 | - [Used as a Web App](#used-as-a-web-app) 16 | - [Advanced Features](#advanced-features) 17 | - [Multimodality](#multimodality) 18 | - [Customized Prompts](#customized-prompts) 19 | - [Switch Between Models](#switch-between-models) 20 | - [Switch Between Search Engine](#switch-between-search-engine) 21 | 22 | ## Installation 23 | 24 | ### Clone the repository and navigate to the project directory 25 | ```bash 26 | git clone https://github.com/Libr-AI/OpenFactVerification.git 27 | cd OpenFactVerification 28 | ``` 29 | 30 | ### Installation with poetry (option 1) 31 | 1. Install Poetry by following it [installation guideline](https://python-poetry.org/docs/). 32 | 2. Install all dependencies by running: 33 | ```bash 34 | poetry install 35 | ``` 36 | 37 | ### Installation with pip (option 2) 38 | 1. Create a Python environment at version 3.9 or newer and activate it. 39 | 40 | 2. Navigate to the project directory and install the required packages: 41 | ```bash 42 | pip install -r requirements.txt 43 | ``` 44 | 45 | ## Configure API Keys 46 | 47 | API keys can be configured from both **Environment Variables** and **Configuration Files**. 48 | Specifically, the tool initialization loads API keys from environment variables or config file, config file take precedence. Related implementations can be seen from `factcheck/utils/api_config.py`. 49 | 50 | ### Environment Variables 51 | Example: Export essential api key to the environment 52 | ```bash 53 | export SERPER_API_KEY=... # this is required in evidence retrieval if serper being used 54 | export OPENAI_API_KEY=... # this is required in all tasks 55 | export ANTHROPIC_API_KEY=... # this is required only if you want to replace openai with anthropic 56 | export LOCAL_API_KEY=... # this is required only if you want to use local LLM 57 | export LOCAL_API_URL=... # this is required only if you want to use local LLM 58 | ``` 59 | 60 | ### Configuration Files 61 | 62 | Alternatively, we can store the api information in a YAML file with the same key names as the environment variables and pass the path to the yaml file as an argument to the `check_response` method. 63 | 64 | Example: Pass the path to the api configuration file 65 | 66 | ```YAML 67 | SERPER_API_KEY: null 68 | 69 | OPENAI_API_KEY: null 70 | 71 | ANTHROPIC_API_KEY: null 72 | 73 | LOCAL_API_KEY: null 74 | LOCAL_API_URL: null 75 | ``` 76 | 77 | To load api configurations from a YAML file, please specify the path to the YAML file with the argument `--api_config` 78 | 79 | ```bash 80 | python -m factcheck --modal string --input "MBZUAI is the first AI university in the world" --api_config PATH_TO_CONFIG_FILE/api_config.yaml 81 | ``` 82 | 83 | ### Additional API Configurations 84 | 85 | The supported api configuration variables are pre-defined in `factcheck/utils/api_config.py`. 86 | ```python 87 | # Define all keys for the API configuration 88 | keys = [ 89 | "SERPER_API_KEY", 90 | "OPENAI_API_KEY", 91 | "ANTHROPIC_API_KEY", 92 | "LOCAL_API_KEY", 93 | "LOCAL_API_URL", 94 | ] 95 | ``` 96 | 97 | Only these variables can be loaded from **Environment Variables**. If additional variables are required, you are recommended to define these variable in a YAML files. All variables in the api configuration file will be loaded automatically. 98 | 99 | ## Basic Usage 100 | 101 | ### Used in Command Line 102 | 103 | ```python 104 | python -m factcheck --input "MBZUAI is the first AI university in the world" 105 | ``` 106 | 107 | ### Used as a Library 108 | 109 | ```python 110 | from factcheck import FactCheck 111 | 112 | factcheck_instance = FactCheck() 113 | 114 | # Example text 115 | text = "Your text here" 116 | 117 | # Run the fact-check pipeline 118 | results = factcheck_instance.check_response(text) 119 | print(results) 120 | ``` 121 | ### Used as a Web App 122 | 123 | ```bash 124 | python webapp.py --api_config demo_data/api_config.yaml 125 | ``` 126 | 127 |

128 |

129 | 130 | ## Advanced Features 131 | 132 | ### Multimodality 133 | 134 | Different modalities (text, speech, image, and video) are unified in this tool by converting them into text, and then verified by the standard text fact verification pipeline. 135 | 136 | ```bash 137 | # String 138 | python -m factcheck --modal string --input "MBZUAI is the first AI university in the world" 139 | # Text 140 | python -m factcheck --modal text --input demo_data/text.txt 141 | # Speech 142 | python -m factcheck --modal speech --input demo_data/speech.mp3 143 | # Image 144 | python -m factcheck --modal image --input demo_data/image.webp 145 | # Video 146 | python -m factcheck --modal video --input demo_data/video.m4v 147 | ``` 148 | 149 | ### Customized Prompts 150 | Prompts for each step can be specified in a YAML/JSON file. Please see `factcheck/config/sample_prompt.yaml` as an example. 151 | 152 | For now, there are four prompts in each file with respect to claim decomposition, claim checkworthy, query generation, and claim verification, respectively. 153 | 154 | When using your own prompts, please use `--prompt` to specify the prompt file path. 155 | 156 | 157 | ```bash 158 | python -m factcheck --input "MBZUAI is the first AI university in the world" --prompt PATH_TO_PROMPT/sample_prompt.yaml 159 | ``` 160 | 161 | 162 | ### Switch Between Models 163 | 164 | Currently, Loki supports models from OpenAI, Anthropic, and local-hosted models. To specify the model version used for fact checking, there are two arguments `--model` and `--client`. 165 | Please see `factcheck/utils/llmclient/__init__.py` for details. 166 | 167 | | Model | --model | --client | 168 | |-----------|----------------|--------------| 169 | | OpenAI | gpt-VERSION | None | 170 | | Anthropic | claude-VERSION | None | 171 | | Local | MODEL_NAME | local_openai | 172 | 173 | 174 | ```bash 175 | # OpenAI 176 | python -m factcheck --modal string --input "MBZUAI is the first AI university in the world" --model gpt-4-turbo 177 | 178 | # Anthropic 179 | python -m factcheck --modal string --input "MBZUAI is the first AI university in the world" --model claude-3-opus-20240229 180 | 181 | # Local 182 | python -m factcheck --modal string --input "MBZUAI is the first AI university in the world" --client local_openai --model wizardlm2 183 | ``` 184 | 185 | The prompt can be model-specific, especially if we ask the model to output a JSON format response. We have yet to have a chance to support all models, so you will have to adopt your own prompts when using models other than openai. 186 | 187 | Besides, when using local_openai models, please make sure to specify `LOCAL_API_KEY` and `LOCAL_API_URL`. 188 | 189 | ### Switch Between Search Engine 190 | Currently google search and Serper are supported. You can switch between different search engines with the argument `--retriever`. 191 | 192 | 193 | ```bash 194 | # Serper 195 | python -m factcheck --modal string --input "MBZUAI is the first AI university in the world" --retriever serper 196 | 197 | # Google 198 | python -m factcheck --modal string --input "MBZUAI is the first AI university in the world" --retriever google 199 | ``` 200 | 201 | You can get a serper key from https://serper.dev/ 202 | -------------------------------------------------------------------------------- /factcheck/__init__.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import time 3 | import tiktoken 4 | 5 | from dataclasses import dataclass, asdict 6 | from factcheck.utils.llmclient import CLIENTS, model2client 7 | from factcheck.utils.prompt import prompt_mapper 8 | from factcheck.utils.logger import CustomLogger 9 | from factcheck.utils.api_config import load_api_config 10 | from factcheck.utils.data_class import PipelineUsage, FactCheckOutput, ClaimDetail, FCSummary 11 | from factcheck.core import ( 12 | Decompose, 13 | Checkworthy, 14 | QueryGenerator, 15 | retriever_mapper, 16 | ClaimVerify, 17 | ) 18 | 19 | logger = CustomLogger(__name__).getlog() 20 | 21 | 22 | class FactCheck: 23 | def __init__( 24 | self, 25 | default_model: str = "gpt-4o", 26 | client: str = None, 27 | prompt: str = "chatgpt_prompt", 28 | retriever: str = "serper", 29 | decompose_model: str = None, 30 | checkworthy_model: str = None, 31 | query_generator_model: str = None, 32 | evidence_retrieval_model: str = None, 33 | claim_verify_model: str = None, # "gpt-3.5-turbo", 34 | api_config: dict = None, 35 | num_seed_retries: int = 3, 36 | ): 37 | # TODO: better handle raw token count 38 | self.encoding = tiktoken.get_encoding("cl100k_base") 39 | 40 | self.prompt = prompt_mapper(prompt_name=prompt) 41 | 42 | # load configures for API 43 | self.load_config(api_config=api_config) 44 | 45 | # llms for each step (sub-module) 46 | step_models = { 47 | "decompose_model": decompose_model, 48 | "checkworthy_model": checkworthy_model, 49 | "query_generator_model": query_generator_model, 50 | "evidence_retrieval_model": evidence_retrieval_model, 51 | "claim_verify_model": claim_verify_model, 52 | } 53 | 54 | for key, _model_name in step_models.items(): 55 | _model_name = default_model if _model_name is None else _model_name 56 | print(f"== Init {key} with model: {_model_name}") 57 | if client is not None: 58 | logger.info(f"== Use specified client: {client}") 59 | LLMClient = CLIENTS[client] 60 | else: 61 | logger.info("== LLMClient is not specified, use default llm client.") 62 | LLMClient = model2client(_model_name) 63 | setattr(self, key, LLMClient(model=_model_name, api_config=self.api_config)) 64 | 65 | # sub-modules 66 | self.decomposer = Decompose(llm_client=self.decompose_model, prompt=self.prompt) 67 | self.checkworthy = Checkworthy(llm_client=self.checkworthy_model, prompt=self.prompt) 68 | self.query_generator = QueryGenerator(llm_client=self.query_generator_model, prompt=self.prompt) 69 | self.evidence_crawler = retriever_mapper(retriever_name=retriever)( 70 | llm_client=self.evidence_retrieval_model, api_config=self.api_config 71 | ) 72 | self.claimverify = ClaimVerify(llm_client=self.claim_verify_model, prompt=self.prompt) 73 | self.attr_list = ["decomposer", "checkworthy", "query_generator", "evidence_crawler", "claimverify"] 74 | self.num_seed_retries = num_seed_retries 75 | 76 | logger.info("===Sub-modules Init Finished===") 77 | 78 | def load_config(self, api_config: dict) -> None: 79 | # Load API config 80 | self.api_config = load_api_config(api_config) 81 | 82 | def check_text(self, raw_text: str): 83 | # first clear current usage 84 | self._reset_usage() 85 | 86 | st_time = time.time() 87 | # step 1 88 | claims = self.decomposer.getclaims(doc=raw_text, num_retries=self.num_seed_retries) 89 | # Parallel run restore claims and checkworthy 90 | with concurrent.futures.ThreadPoolExecutor() as executor: 91 | future_claim2doc = executor.submit( 92 | self.decomposer.restore_claims, doc=raw_text, claims=claims, num_retries=self.num_seed_retries 93 | ) 94 | # step 2 95 | future_checkworthy_claims = executor.submit( 96 | self.checkworthy.identify_checkworthiness, claims, num_retries=self.num_seed_retries 97 | ) 98 | # step 3 99 | future_claim_queries_dict = executor.submit(self.query_generator.generate_query, claims=claims) 100 | 101 | # Wait for all futures to complete 102 | claim2doc = future_claim2doc.result() 103 | checkworthy_claims, claim2checkworthy = future_checkworthy_claims.result() 104 | claim_queries_dict = future_claim_queries_dict.result() 105 | 106 | checkworthy_claims_S = set(checkworthy_claims) 107 | claim_queries_dict = {k: v for k, v in claim_queries_dict.items() if k in checkworthy_claims_S} 108 | 109 | for i, (claim, origin) in enumerate(claim2doc.items()): 110 | logger.info(f"== raw_text claims {i} --- {claim} --- {origin}") 111 | for i, claim in enumerate(checkworthy_claims): 112 | logger.info(f"== Checkworthy claims {i}: {claim}") 113 | 114 | if checkworthy_claims == []: 115 | return self._finalize_factcheck(raw_text=raw_text, claim_detail=[], return_dict=True) 116 | 117 | for k, v in claim_queries_dict.items(): 118 | logger.info(f"== Claim: {k} --- Queries: {v}") 119 | 120 | step123_time = time.time() 121 | 122 | # step 4 123 | claim_evidences_dict = self.evidence_crawler.retrieve_evidence(claim_queries_dict=claim_queries_dict) 124 | for claim, evidences in claim_evidences_dict.items(): 125 | logger.info(f"== Claim: {claim}") 126 | logger.info(f"== Evidence: {evidences}\n") 127 | step4_time = time.time() 128 | 129 | # step 5 130 | claim_verifications_dict = self.claimverify.verify_claims(claim_evidences_dict=claim_evidences_dict) 131 | for k, v in claim_verifications_dict.items(): 132 | logger.info(f"== Claim: {k} --- Verify: {v}") 133 | step5_time = time.time() 134 | logger.info( 135 | f"== State: Done! \n Total time: {step5_time-st_time:.2f}s. (create claims:{step123_time-st_time:.2f}s ||| retrieve:{step4_time-step123_time:.2f}s ||| verify:{step5_time-step4_time:.2f}s)" 136 | ) 137 | 138 | claim_detail = self._merge_claim_details( 139 | claim2doc=claim2doc, 140 | claim2checkworthy=claim2checkworthy, 141 | claim2queries=claim_queries_dict, 142 | claim2evidences=claim_evidences_dict, 143 | claim2verifications=claim_verifications_dict, 144 | ) 145 | 146 | return self._finalize_factcheck(raw_text=raw_text, claim_detail=claim_detail, return_dict=True) 147 | 148 | def _get_usage(self): 149 | return PipelineUsage(**{attr: getattr(self, attr).llm_client.usage for attr in self.attr_list}) 150 | 151 | def _reset_usage(self): 152 | for attr in self.attr_list: 153 | getattr(self, attr).llm_client.reset_usage() 154 | 155 | def _merge_claim_details( 156 | self, claim2doc: dict, claim2checkworthy: dict, claim2queries: dict, claim2evidences: dict, claim2verifications: dict 157 | ) -> list[ClaimDetail]: 158 | claim_details = [] 159 | for i, (claim, origin) in enumerate(claim2doc.items()): 160 | if claim in claim2verifications: 161 | assert claim in claim2queries, f"Claim {claim} not found in claim2queries." 162 | assert claim in claim2evidences, f"Claim {claim} not found in claim2evidences." 163 | 164 | evidences = claim2verifications.get(claim, {}) 165 | labels = list(map(lambda x: x.relationship, evidences)) 166 | if labels.count("SUPPORTS") + labels.count("REFUTES") == 0: 167 | factuality = "No evidence found." 168 | else: 169 | factuality = labels.count("SUPPORTS") / (labels.count("REFUTES") + labels.count("SUPPORTS")) 170 | 171 | claim_obj = ClaimDetail( 172 | id=i, 173 | claim=claim, 174 | checkworthy=True, 175 | checkworthy_reason=claim2checkworthy.get(claim, "No reason provided, please report issue."), 176 | origin_text=origin["text"], 177 | start=origin["start"], 178 | end=origin["end"], 179 | queries=claim2queries[claim], 180 | evidences=evidences, 181 | factuality=factuality, 182 | ) 183 | else: 184 | claim_obj = ClaimDetail( 185 | id=i, 186 | claim=claim, 187 | checkworthy=False, 188 | checkworthy_reason=claim2checkworthy.get(claim, "No reason provided, please report issue."), 189 | origin_text=origin["text"], 190 | start=origin["start"], 191 | end=origin["end"], 192 | queries=[], 193 | evidences=[], 194 | factuality="Nothing to check.", 195 | ) 196 | claim_details.append(claim_obj) 197 | return claim_details 198 | 199 | def _finalize_factcheck( 200 | self, raw_text: str, claim_detail: list[ClaimDetail] = None, return_dict: bool = True 201 | ) -> FactCheckOutput: 202 | verified_claims = list(filter(lambda x: not isinstance(x.factuality, str), claim_detail)) 203 | num_claims = len(claim_detail) 204 | num_checkworthy_claims = len(list(filter(lambda x: x.factuality != "Nothing to check.", claim_detail))) 205 | num_verified_claims = len(verified_claims) 206 | num_supported_claims = len(list(filter(lambda x: x.factuality == 1, verified_claims))) 207 | num_refuted_claims = len(list(filter(lambda x: x.factuality == 0, verified_claims))) 208 | num_controversial_claims = num_verified_claims - num_supported_claims - num_refuted_claims 209 | factuality = sum(map(lambda x: x.factuality, verified_claims)) / num_verified_claims if num_verified_claims != 0 else 0 210 | 211 | summary = FCSummary( 212 | num_claims, 213 | num_checkworthy_claims, 214 | num_verified_claims, 215 | num_supported_claims, 216 | num_refuted_claims, 217 | num_controversial_claims, 218 | factuality, 219 | ) 220 | 221 | num_tokens = len(self.encoding.encode(raw_text)) 222 | output = FactCheckOutput( 223 | raw_text=raw_text, 224 | token_count=num_tokens, 225 | usage=self._get_usage(), 226 | claim_detail=claim_detail, 227 | summary=summary, 228 | ) 229 | 230 | if not output.attribute_check(): 231 | raise ValueError("Output attribute check failed.") 232 | 233 | logger.info(f"== Overall Factuality: {output.summary.factuality}\n") 234 | 235 | if return_dict: 236 | return asdict(output) 237 | else: 238 | return output 239 | -------------------------------------------------------------------------------- /factcheck/__main__.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | 4 | from factcheck.utils.llmclient import CLIENTS 5 | from factcheck.utils.multimodal import modal_normalization 6 | from factcheck.utils.utils import load_yaml 7 | from factcheck import FactCheck 8 | 9 | 10 | def check(args): 11 | """factcheck 12 | 13 | Args: 14 | model (str): gpt model used for factchecking 15 | modal (str): input type, supported types are str, text file, speech, image, and video 16 | input (str): input content or path to the file 17 | """ 18 | # Load API config from yaml file 19 | try: 20 | api_config = load_yaml(args.api_config) 21 | except Exception as e: 22 | print(f"Error loading api config: {e}") 23 | api_config = {} 24 | 25 | factcheck = FactCheck( 26 | default_model=args.model, client=args.client, api_config=api_config, prompt=args.prompt, retriever=args.retriever 27 | ) 28 | 29 | content = modal_normalization(args.modal, args.input) 30 | res = factcheck.check_text(content) 31 | print(json.dumps(res, indent=4)) 32 | 33 | # Save the results to lark (only for local testing) 34 | try: 35 | from factcheck.utils import lark 36 | 37 | lark.save_json_to_lark_by_level(res) 38 | except: # noqa 39 | pass 40 | 41 | 42 | if __name__ == "__main__": 43 | parser = argparse.ArgumentParser() 44 | parser.add_argument("--model", type=str, default="gpt-4o") 45 | parser.add_argument("--client", type=str, default=None, choices=CLIENTS.keys()) 46 | parser.add_argument("--prompt", type=str, default="chatgpt_prompt") 47 | parser.add_argument("--retriever", type=str, default="serper") 48 | parser.add_argument("--modal", type=str, default="text") 49 | parser.add_argument("--input", type=str, default="demo_data/text.txt") 50 | parser.add_argument("--api_config", type=str, default="factcheck/config/api_config.yaml") 51 | args = parser.parse_args() 52 | 53 | check(args) 54 | -------------------------------------------------------------------------------- /factcheck/config/api_config.yaml: -------------------------------------------------------------------------------- 1 | SERPER_API_KEY: null 2 | 3 | OPENAI_API_KEY: null 4 | 5 | ANTHROPIC_API_KEY: null 6 | 7 | LOCAL_API_KEY: null 8 | LOCAL_API_URL: null 9 | -------------------------------------------------------------------------------- /factcheck/config/sample_prompt.yaml: -------------------------------------------------------------------------------- 1 | decompose_prompt: | 2 | Your task is to decompose the text into atomic claims. 3 | The answer should be a JSON with a single key "claims", with the value of a list of strings, where each string should be a context-independent claim, representing one fact. 4 | Note that: 5 | 1. Each claim should be concise (less than 15 words) and self-contained. 6 | 2. Avoid vague references like 'he', 'she', 'it', 'this', 'the company', 'the man' and using complete names. 7 | 3. Generate at least one claim for each single sentence in the texts. 8 | 9 | For example, 10 | Text: Mary is a five-year old girl, she likes playing piano and she doesn't like cookies. 11 | Output: 12 | {{"claims": ["Mary is a five-year old girl.", "Mary likes playing piano.", "Mary doesn't like cookies."]}} 13 | 14 | Text: {doc} 15 | Output: 16 | 17 | checkworthy_prompt: | 18 | Your task is to evaluate each provided statement to determine if it presents information whose factuality can be objectively verified by humans, irrespective of the statement's current accuracy. Consider the following guidelines: 19 | 1. Opinions versus Facts: Distinguish between opinions, which are subjective and not verifiable, and statements that assert factual information, even if broad or general. Focus on whether there's a factual claim that can be investigated. 20 | 2. Clarity and Specificity: Statements must have clear and specific references to be verifiable (e.g., "he is a professor" is not verifiable without knowing who "he" is). 21 | 3. Presence of Factual Information: Consider a statement verifiable if it includes factual elements that can be checked against evidence or reliable sources, even if the overall statement might be broad or incorrect. 22 | Your response should be in JSON format, with each statement as a key and either "Yes" or "No" as the value, along with a brief rationale for your decision. 23 | 24 | For example, given these statements: 25 | 1. Gary Smith is a distinguished professor of economics. 26 | 2. He is a professor at MBZUAI. 27 | 3. Obama is the president of the UK. 28 | 29 | The expected output is: 30 | {{ 31 | "Gary Smith is a distinguished professor of economics.": "Yes (The statement contains verifiable factual information about Gary Smith's professional title and field.)", 32 | "He is a professor at MBZUAI.": "No (The statement cannot be verified due to the lack of clear reference to who 'he' is.)", 33 | "Obama is the president of the UK.": "Yes (This statement contain verifiable information regarding the political leadership of a country.)" 34 | }} 35 | 36 | For these statements: 37 | {texts} 38 | 39 | The output should be: 40 | 41 | 42 | qgen_prompt: | 43 | Given a claim, your task is to create minimum number of questions need to be check to verify the correctness of the claim. Output in JSON format with a single key "Questions", the value is a list of questions. For example: 44 | 45 | Claim: Your nose switches back and forth between nostrils. When you sleep, you switch about every 45 minutes. This is to prevent a buildup of mucus. It’s called the nasal cycle. 46 | Output: {{"Questions": ["Does your nose switch between nostrils?", "How often does your nostrils switch?", "Why does your nostril switch?", "What is nasal cycle?"]}} 47 | 48 | Claim: The Stanford Prison Experiment was conducted in the basement of Encina Hall, Stanford’s psychology building. 49 | Output: 50 | {{"Question":["Where was Stanford Prison Experiment was conducted?"]}} 51 | 52 | Claim: The Havel-Hakimi algorithm is an algorithm for converting the adjacency matrix of a graph into its adjacency list. It is named after Vaclav Havel and Samih Hakimi. 53 | Output: 54 | {{"Questions":["What does Havel-Hakimi algorithm do?", "Who are Havel-Hakimi algorithm named after?"]}} 55 | 56 | Claim: Social work is a profession that is based in the philosophical tradition of humanism. It is an intellectual discipline that has its roots in the 1800s. 57 | Output: 58 | {{"Questions":["What philosophical tradition is social work based on?", "What year does social work have its root in?"]}} 59 | 60 | Claim: {claim} 61 | Output: 62 | 63 | 64 | verify_prompt: | 65 | Your task is to evaluate the accuracy of a provided statement using the accompanying evidence. Carefully review the evidence, noting that it may vary in detail and sometimes present conflicting information. Your judgment should be informed by this evidence, taking into account its relevance and reliability. 66 | 67 | Keep in mind that a lack of detail in the evidence does not necessarily indicate that the statement is inaccurate. When assessing the statement's factuality, distinguish between errors and areas where the evidence supports the statement. 68 | 69 | Please structure your response in JSON format, including the following four keys: 70 | - "reasoning": explain the thought process behind your judgment. 71 | - "error": none if the text is factual; otherwise, identify any specific inaccuracies in the statement. 72 | - "correction": none if the text is factual; otherwise, provide corrections to any identified inaccuracies, using the evidence to support your corrections. 73 | - "factuality": true if the given text is factual, false otherwise, indicating whether the statement is factual, or non-factual based on the evidence. 74 | 75 | For example: 76 | Input: 77 | [text]: MBZUAI is located in Abu Dhabi, United Arab Emirates. 78 | [evidence]: Where is MBZUAI located?\nAnswer: Masdar City - Abu Dhabi - United Arab Emirates 79 | 80 | Output: 81 | {{ 82 | "reasoning": "The evidence confirms that MBZUAI is located in Masdar City, Abu Dhabi, United Arab Emirates, so the statement is factually correct", 83 | "error": none, 84 | "correction": none, 85 | "factuality": true 86 | }} 87 | 88 | 89 | Input: 90 | [text]: Copper reacts with ferrous sulfate (FeSO4). 91 | [evidence]: Copper is less reactive metal. It has positive value of standard reduction potential. Metal with high standard reduction potential can not displace other metal with low standard reduction potential values. Hence copper can not displace iron from ferrous sulphate solution. So no change will take place. 92 | 93 | Output: 94 | {{ 95 | "reasoning": "The evidence provided confirms that copper cannot displace iron from ferrous sulphate solution, and no change will take place.", 96 | "error": "Copper does not react with ferrous sulfate as stated in the text.", 97 | "correction": "Copper does not react with ferrous sulfate as it cannot displace iron from ferrous sulfate solution.", 98 | "factuality": false 99 | }} 100 | 101 | 102 | Input 103 | [text]: {claim} 104 | [evidences]: {evidence} 105 | 106 | Output: 107 | -------------------------------------------------------------------------------- /factcheck/core/CheckWorthy.py: -------------------------------------------------------------------------------- 1 | from factcheck.utils.logger import CustomLogger 2 | 3 | logger = CustomLogger(__name__).getlog() 4 | 5 | 6 | class Checkworthy: 7 | def __init__(self, llm_client, prompt): 8 | """Initialize the Checkworthy class 9 | 10 | Args: 11 | llm_client (BaseClient): The LLM client used for identifying checkworthiness of claims. 12 | prompt (BasePrompt): The prompt used for identifying checkworthiness of claims. 13 | """ 14 | self.llm_client = llm_client 15 | self.prompt = prompt 16 | 17 | def identify_checkworthiness(self, texts: list[str], num_retries: int = 3, prompt: str = None) -> list[str]: 18 | """Use GPT to identify whether candidate claims are worth fact checking. if gpt is unable to return correct checkworthy_claims, we assume all texts are checkworthy. 19 | 20 | Args: 21 | texts (list[str]): a list of texts to identify whether they are worth fact checking 22 | num_retries (int, optional): maximum attempts for GPT to identify checkworthy claims. Defaults to 3. 23 | 24 | Returns: 25 | list[str]: a list of checkworthy claims, pairwise outputs 26 | """ 27 | checkworthy_claims = texts 28 | joint_texts = "\n".join([str(i + 1) + ". " + j for i, j in enumerate(texts)]) 29 | 30 | if prompt is None: 31 | user_input = self.prompt.checkworthy_prompt.format(texts=joint_texts) 32 | else: 33 | user_input = prompt.format(texts=joint_texts) 34 | 35 | messages = self.llm_client.construct_message_list([user_input]) 36 | for i in range(num_retries): 37 | response = self.llm_client.call(messages, num_retries=1, seed=42 + i) 38 | try: 39 | claim2checkworthy = eval(response) 40 | valid_answer = list( 41 | filter( 42 | lambda x: x[1].startswith("Yes") or x[1].startswith("No"), 43 | claim2checkworthy.items(), 44 | ) 45 | ) 46 | checkworthy_claims = list(filter(lambda x: x[1].startswith("Yes"), claim2checkworthy.items())) 47 | checkworthy_claims = list(map(lambda x: x[0], checkworthy_claims)) 48 | assert len(valid_answer) == len(claim2checkworthy) 49 | break 50 | except Exception as e: 51 | logger.error(f"====== Error: {e}, the LLM response is: {response}") 52 | logger.error(f"====== Our input is: {messages}") 53 | return checkworthy_claims, claim2checkworthy 54 | -------------------------------------------------------------------------------- /factcheck/core/ClaimVerify.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | from factcheck.utils.logger import CustomLogger 5 | from factcheck.utils.data_class import Evidence 6 | 7 | logger = CustomLogger(__name__).getlog() 8 | 9 | 10 | class ClaimVerify: 11 | def __init__(self, llm_client, prompt): 12 | """Initialize the ClaimVerify class 13 | 14 | Args: 15 | llm_client (BaseClient): The LLM client used for verifying the factuality of claims. 16 | prompt (BasePrompt): The prompt used for verifying the factuality of claims. 17 | """ 18 | self.llm_client = llm_client 19 | self.prompt = prompt 20 | 21 | def verify_claims(self, claim_evidences_dict, prompt: str = None) -> dict[str, list[Evidence]]: 22 | """Verify the factuality of the claims with respect to the given evidences 23 | 24 | Args: 25 | claim_evidences_dict (dict): a dictionary of claims and their corresponding evidences. 26 | 27 | Returns: 28 | dict: a dictionary of claims and their relationship to each evidence, including evidence, reasoning, relationship. 29 | """ 30 | 31 | claim_verifications_dict = self._verify_all_claims(claim_evidences_dict, prompt=prompt) 32 | 33 | return claim_verifications_dict 34 | 35 | def _verify_all_claims( 36 | self, 37 | claim_evidences_dict: dict[str, list[str]], 38 | num_retries=3, 39 | prompt: str = None, 40 | ) -> dict[str, list[Evidence]]: 41 | """Verify the factuality of the claims with respect to the given evidences 42 | 43 | Args: 44 | claim_evidences_dict (dict): a dictionary of claims and their corresponding evidences. 45 | num_retries (int, optional): maximum attempts for GPT to verify the factuality of the claims. Defaults to 3. 46 | 47 | Returns: 48 | list[dict[str, any]]: a list of relationship results, including evidence, reasoning, relationship. 49 | """ 50 | attempts = 0 51 | # construct user inputs with respect to each claim and its evidences 52 | claim_evidence_list = [] 53 | messages_list = [] 54 | for claim, _evidences in claim_evidences_dict.items(): 55 | for e in _evidences: 56 | if prompt is None: 57 | user_input = self.prompt.verify_prompt.format(claim=claim, evidence=e) 58 | else: 59 | user_input = prompt.format(claim=claim, evidence=e) 60 | claim_evidence_list.append((claim, e)) 61 | messages_list.append(user_input) 62 | factual_results = [None] * len(messages_list) 63 | 64 | while (attempts < num_retries) and (None in factual_results): 65 | _messages = [_message for _i, _message in enumerate(messages_list) if factual_results[_i] is None] 66 | _indices = [_i for _i, _message in enumerate(messages_list) if factual_results[_i] is None] 67 | 68 | _message_list = self.llm_client.construct_message_list(_messages) 69 | _response_list = self.llm_client.multi_call(_message_list) 70 | for _response, _index in zip(_response_list, _indices): 71 | try: 72 | _response_json = json.loads(_response) 73 | assert all(k in _response_json for k in ["reasoning", "relationship"]) 74 | factual_results[_index] = _response_json 75 | except: # noqa: E722 76 | logger.info(f"Warning: LLM response parse fail, retry {attempts}.") 77 | attempts += 1 78 | 79 | _template_results = { 80 | "reasoning": "[System Warning] Can not identify the factuality of the claim.", 81 | "relationship": "IRRELEVANT", 82 | } 83 | 84 | # construct the evidence list with the verification results 85 | evidences = [] 86 | for (claim, evidence), verification in zip(claim_evidence_list, factual_results): 87 | # if cannot get correct response within num_retries times. 88 | if verification is None: 89 | verification = _template_results 90 | evidences.append(Evidence(claim=claim, **evidence, **verification)) 91 | 92 | # aggregate the results from list to dict 93 | claim_verifications_dict = {k: [] for k in claim_evidences_dict.keys()} 94 | for e in evidences: 95 | claim_verifications_dict[e.claim].append(e) 96 | 97 | return claim_verifications_dict 98 | -------------------------------------------------------------------------------- /factcheck/core/Decompose.py: -------------------------------------------------------------------------------- 1 | from factcheck.utils.logger import CustomLogger 2 | import nltk 3 | 4 | logger = CustomLogger(__name__).getlog() 5 | 6 | 7 | class Decompose: 8 | def __init__(self, llm_client, prompt): 9 | """Initialize the Decompose class 10 | 11 | Args: 12 | llm_client (BaseClient): The LLM client used for decomposing documents into claims. 13 | prompt (BasePrompt): The prompt used for fact checking. 14 | """ 15 | self.llm_client = llm_client 16 | self.prompt = prompt 17 | self.doc2sent = self._nltk_doc2sent 18 | 19 | def _nltk_doc2sent(self, text: str): 20 | """Split the document into sentences using nltk 21 | 22 | Args: 23 | text (str): the document to be split into sentences 24 | 25 | Returns: 26 | list: a list of sentences 27 | """ 28 | 29 | sentences = nltk.sent_tokenize(text) 30 | sentence_list = [s.strip() for s in sentences if len(s.strip()) >= 3] 31 | return sentence_list 32 | 33 | def getclaims(self, doc: str, num_retries: int = 3, prompt: str = None) -> list[str]: 34 | """Use GPT to decompose a document into claims 35 | 36 | Args: 37 | doc (str): the document to be decomposed into claims 38 | num_retries (int, optional): maximum attempts for GPT to decompose the document into claims. Defaults to 3. 39 | 40 | Returns: 41 | list: a list of claims 42 | """ 43 | if prompt is None: 44 | user_input = self.prompt.decompose_prompt.format(doc=doc).strip() 45 | else: 46 | user_input = prompt.format(doc=doc).strip() 47 | 48 | claims = None 49 | messages = self.llm_client.construct_message_list([user_input]) 50 | for i in range(num_retries): 51 | response = self.llm_client.call( 52 | messages=messages, 53 | num_retries=1, 54 | seed=42 + i, 55 | ) 56 | try: 57 | claims = eval(response)["claims"] 58 | if isinstance(claims, list) and len(claims) > 0: 59 | break 60 | except Exception as e: 61 | logger.error(f"Parse LLM response error {e}, response is: {response}") 62 | logger.error(f"Parse LLM response error, prompt is: {messages}") 63 | if isinstance(claims, list): 64 | return claims 65 | else: 66 | logger.info("It does not output a list of sentences correctly, return self.doc2sent_tool split results.") 67 | claims = self.doc2sent(doc) 68 | return claims 69 | 70 | def restore_claims(self, doc: str, claims: list, num_retries: int = 3, prompt: str = None) -> dict[str, dict]: 71 | """Use GPT to map claims back to the document 72 | 73 | Args: 74 | doc (str): the document to be decomposed into claims 75 | claims (list[str]): a list of claims to be mapped back to the document 76 | num_retries (int, optional): maximum attempts for GPT to decompose the document into claims. Defaults to 3. 77 | 78 | Returns: 79 | dict: a dictionary of claims and their corresponding text spans and start/end indices. 80 | """ 81 | 82 | def restore(claim2doc): 83 | claim2doc_detail = {} 84 | flag = True 85 | for claim, sent in claim2doc.items(): 86 | st = doc.find(sent) 87 | if st != -1: 88 | claim2doc_detail[claim] = {"text": sent, "start": st, "end": st + len(sent)} 89 | else: 90 | flag = False 91 | 92 | cur_pos = -1 93 | texts = [] 94 | for k, v in claim2doc_detail.items(): 95 | if v["start"] < cur_pos + 1 and v["end"] > cur_pos: 96 | v["start"] = cur_pos + 1 97 | flag = False 98 | elif v["start"] < cur_pos + 1 and v["end"] <= cur_pos: 99 | v["start"] = v["end"] # temporarily ignore this span 100 | flag = False 101 | elif v["start"] > cur_pos + 1: 102 | v["start"] = cur_pos + 1 103 | flag = False 104 | v["text"] = doc[v["start"] : v["end"]] 105 | texts.append(v["text"]) 106 | claim2doc_detail[k] = v 107 | cur_pos = v["end"] 108 | 109 | return claim2doc_detail, flag 110 | 111 | if prompt is None: 112 | user_input = self.prompt.restore_prompt.format(doc=doc, claims=claims).strip() 113 | else: 114 | user_input = prompt.format(doc=doc, claims=claims).strip() 115 | 116 | messages = self.llm_client.construct_message_list([user_input]) 117 | 118 | tmp_restore = {} 119 | for i in range(num_retries): 120 | response = self.llm_client.call( 121 | messages=messages, 122 | num_retries=1, 123 | seed=42 + i, 124 | ) 125 | try: 126 | claim2doc = eval(response) 127 | assert len(claim2doc) == len(claims) 128 | claim2doc_detail, flag = restore(claim2doc) 129 | if flag: 130 | return claim2doc_detail 131 | else: 132 | tmp_restore = claim2doc_detail 133 | raise Exception("Restore claims not satisfied.") 134 | except Exception as e: 135 | logger.error(f"Parse LLM response error {e}, response is: {response}") 136 | logger.error(f"Parse LLM response error, prompt is: {messages}") 137 | 138 | return tmp_restore 139 | -------------------------------------------------------------------------------- /factcheck/core/QueryGenerator.py: -------------------------------------------------------------------------------- 1 | from factcheck.utils.logger import CustomLogger 2 | 3 | logger = CustomLogger(__name__).getlog() 4 | 5 | 6 | class QueryGenerator: 7 | def __init__(self, llm_client, prompt, max_query_per_claim: int = 5): 8 | """Initialize the QueryGenerator class 9 | 10 | Args: 11 | llm_client (BaseClient): The LLM client used for generating questions. 12 | prompt (BasePrompt): The prompt used for generating questions. 13 | """ 14 | self.llm_client = llm_client 15 | self.prompt = prompt 16 | self.max_query_per_claim = max_query_per_claim 17 | 18 | def generate_query(self, claims: list[str], generating_time: int = 3, prompt: str = None) -> dict[str, list[str]]: 19 | """Generate questions for the given claims 20 | 21 | Args: 22 | claims ([str]): a list of claims to generate questions for. 23 | generating_time (int, optional): maximum attempts for GPT to generate questions. Defaults to 3. 24 | 25 | Returns: 26 | dict: a dictionary of claims and their corresponding generated questions. 27 | """ 28 | generated_questions = [[]] * len(claims) 29 | attempts = 0 30 | 31 | # construct messages 32 | messages_list = [] 33 | for claim in claims: 34 | if prompt is None: 35 | user_input = self.prompt.qgen_prompt.format(claim=claim) 36 | else: 37 | user_input = prompt.format(claim=claim) 38 | messages_list.append(user_input) 39 | 40 | while (attempts < generating_time) and ([] in generated_questions): 41 | _messages = [_message for _i, _message in enumerate(messages_list) if generated_questions[_i] == []] 42 | _indices = [_i for _i, _message in enumerate(messages_list) if generated_questions[_i] == []] 43 | 44 | _message_list = self.llm_client.construct_message_list(_messages) 45 | _response_list = self.llm_client.multi_call(_message_list) 46 | 47 | for _response, _index in zip(_response_list, _indices): 48 | try: 49 | _questions = eval(_response)["Questions"] 50 | generated_questions[_index] = _questions 51 | except: # noqa: E722 52 | logger.info(f"Warning: LLM response parse fail, retry {attempts}.") 53 | attempts += 1 54 | 55 | # ensure that each claim has at least one question which is the claim itself 56 | claim_query_dict = { 57 | _claim: [_claim] + _generated_questions[: (self.max_query_per_claim - 1)] 58 | for _claim, _generated_questions in zip(claims, generated_questions) 59 | } 60 | return claim_query_dict 61 | -------------------------------------------------------------------------------- /factcheck/core/Retriever/__init__.py: -------------------------------------------------------------------------------- 1 | from .google_retriever import GoogleEvidenceRetriever 2 | from .serper_retriever import SerperEvidenceRetriever 3 | 4 | retriever_map = { 5 | "google": GoogleEvidenceRetriever, 6 | "serper": SerperEvidenceRetriever, 7 | } 8 | 9 | 10 | def retriever_mapper(retriever_name: str): 11 | if retriever_name not in retriever_map: 12 | raise NotImplementedError(f"Retriever {retriever_name} not found!") 13 | return retriever_map[retriever_name] 14 | -------------------------------------------------------------------------------- /factcheck/core/Retriever/base.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ProcessPoolExecutor 2 | import os 3 | from copy import deepcopy 4 | from factcheck.utils.web_util import parse_response, crawl_web 5 | from factcheck.utils.logger import CustomLogger 6 | 7 | logger = CustomLogger(__name__).getlog() 8 | 9 | 10 | class BaseRetriever: 11 | def __init__(self, llm_client, api_config: dict = None): 12 | """Initialize the EvidenceRetrieve class.""" 13 | import spacy 14 | 15 | self.tokenizer = spacy.load("en_core_web_sm", disable=["ner", "tagger", "lemmatizer"]) 16 | from sentence_transformers import CrossEncoder 17 | import torch 18 | 19 | self.passage_ranker = CrossEncoder( 20 | "cross-encoder/ms-marco-MiniLM-L-6-v2", 21 | max_length=512, 22 | device=torch.device("cuda" if torch.cuda.is_available() else "cpu"), 23 | ) 24 | self.lang = "en" 25 | self.max_search_result_per_query = 3 26 | self.sentences_per_passage = 10 27 | self.sliding_distance = 8 28 | self.max_passages_per_search_result_to_return = 5 29 | assert self.sentences_per_passage > self.sliding_distance 30 | self.llm_client = llm_client 31 | 32 | def set_lang(self, lang: str): 33 | """Set the language for evidence retrieval. 34 | 35 | Args: 36 | lang (str): The language for evidence retrieval. 37 | """ 38 | self.lang = lang 39 | 40 | def set_max_search_result_per_query(self, m: int): 41 | """Set the maximum number of search results per query. 42 | 43 | Args: 44 | m (int): The maximum number of search results per query. 45 | """ 46 | self.max_search_result_per_query = m 47 | 48 | def retrieve_evidence(self, claim_query_dict): 49 | """Retrieve evidence for a list of claims. 50 | 1. get google search page result by generated questions 51 | 2. crawl all web from urls and extract text 52 | 3. get relevant snippets from these text 53 | 4. Take top-5 evidences for each question 54 | 5. return single claims evidences; 55 | 56 | Args: 57 | claim_query_dict (dict): A dictionary of claims and their corresponding queries. 58 | 59 | Returns: 60 | dict: A dictionary of claims and their corresponding evidences. 61 | """ 62 | claim_evidence_dict = {} 63 | for claim, query_list in claim_query_dict.items(): 64 | logger.info(f"Collecting evidences for claim : {claim}") 65 | evidences = self._retrieve_evidence4singleclaim(claim, query_list=query_list) 66 | claim_evidence_dict[claim] = evidences 67 | return claim_evidence_dict 68 | 69 | def _retrieve_evidence4singleclaim(self, claim: str, query_list: list[str]): 70 | """Retrieve evidence for a single claim. 71 | 72 | Args: 73 | claim (str): The claim to retrieve evidence for. 74 | query_list (list[str]): A list of queries for the claim. 75 | 76 | Returns: 77 | dict: A dictionary of claims and their corresponding evidences. 78 | """ 79 | 80 | query_url_dict = self._get_query_urls(query_list) 81 | query_scraped_results_dict = self._crawl_and_parse_web(query_url_dict=query_url_dict) 82 | evidences = self._get_relevant_snippets(query_scraped_results_dict=query_scraped_results_dict) 83 | return evidences 84 | 85 | def _crawl_and_parse_web(self, query_url_dict: dict[str, list]): 86 | responses = crawl_web(query_url_dict=query_url_dict) 87 | query_responses_dict = dict() 88 | for flag, response, url, query in responses: 89 | if flag and ".pdf" not in str(response.url): 90 | response_list = query_responses_dict.get(query, []) 91 | response_list.append([response, url]) 92 | query_responses_dict[query] = response_list 93 | 94 | query_scraped_results_dict = dict() 95 | with ProcessPoolExecutor(max_workers=os.cpu_count()) as executor: 96 | futures = list() 97 | for query, response_list in query_responses_dict.items(): 98 | for response, url in response_list: 99 | future = executor.submit(parse_response, response, url, query) 100 | futures.append(future) 101 | for future in futures: 102 | web_text, url, query = future.result() 103 | scraped_results_list = query_scraped_results_dict.get(query, []) 104 | scraped_results_list.append([web_text, url]) 105 | query_scraped_results_dict[query] = scraped_results_list 106 | # Remove URLs if we weren't able to scrape anything or if they are a PDF. 107 | for query in query_scraped_results_dict.keys(): 108 | scraped_results_list = query_scraped_results_dict.get(query) 109 | # remove if crawled web text is null 110 | scraped_results = [pair for pair in scraped_results_list if pair[0]] 111 | # get top scraped results by self.max_search_result_per_query 112 | query_scraped_results_dict[query] = scraped_results[: self.max_search_result_per_query] 113 | # print("query_scraped_res", query_scraped_results_dict) 114 | return query_scraped_results_dict 115 | 116 | def _get_relevant_snippets(self, query_scraped_results_dict: dict[str:list]): 117 | """Get relevant snippets from the scraped web text. 118 | 119 | Args: 120 | query_scraped_results_dict (dict): A dictionary of queries and their corresponding scraped web text. 121 | 122 | Returns: 123 | dict: A dictionary of queries and their corresponding relevant snippets. 124 | """ 125 | # 4+ 5 chunk to split web text to several passage and score and sort 126 | snippets_dict = {} 127 | for query, scraped_results in query_scraped_results_dict.items(): 128 | snippets_dict[query] = self._sorted_passage_by_relevant_score(query, scraped_results=scraped_results) 129 | snippets_dict[query] = deepcopy( 130 | sorted( 131 | snippets_dict[query], 132 | key=lambda snippet: snippet["retrieval_score"], 133 | reverse=True, 134 | )[:5] 135 | ) 136 | 137 | evidences = {} 138 | evidences["aggregated"] = [] 139 | evidences["question_wise"] = deepcopy(snippets_dict) 140 | for key in evidences["question_wise"]: 141 | # Take top evidences for each question 142 | index = int(len(evidences["aggregated"]) / len(evidences["question_wise"])) 143 | evidences["aggregated"].append(evidences["question_wise"][key][index]) 144 | if len(evidences["aggregated"]) >= self.max_passages_per_search_result_to_return: 145 | break 146 | # 6 147 | return evidences["aggregated"] 148 | 149 | def _sorted_passage_by_relevant_score(self, query: str, scraped_results: list[str]): 150 | """Sort the passages by relevance to the query using a cross-encoder. 151 | 152 | Args: 153 | query (str): The query to sort the passages by relevance. 154 | scraped_results (list[str]): A list of scraped web text. 155 | 156 | Returns: 157 | list: a list of relevant snippets, where each snippet is a dictionary containing the text, url, sentences per passage, and retrieval score. 158 | """ 159 | retrieved_passages = list() 160 | weball = "" 161 | for webtext, url in scraped_results: 162 | weball += webtext 163 | passages = self._chunk_text(text=weball, tokenizer=self.tokenizer) 164 | if not passages: 165 | return [] 166 | # Score the passages by relevance to the query using a cross-encoder. 167 | scores = self.passage_ranker.predict([(query, p[0]) for p in passages]).tolist() 168 | passage_scores = list(zip(passages, scores)) 169 | 170 | # Take the top passages_per_search passages for the current search result. 171 | passage_scores.sort(key=lambda x: x[1], reverse=True) 172 | 173 | relevant_items = list() 174 | for passage_item, score in passage_scores: 175 | overlap = False 176 | if len(relevant_items) > 0: 177 | for item in relevant_items: 178 | if passage_item[1] >= item[1] and passage_item[1] <= item[2]: 179 | overlap = True 180 | break 181 | if passage_item[2] >= item[1] and passage_item[2] <= item[2]: 182 | overlap = True 183 | break 184 | 185 | # Only consider top non-overlapping relevant passages to maximise for information 186 | if not overlap: 187 | relevant_items.append(deepcopy(passage_item)) 188 | retrieved_passages.append( 189 | { 190 | "text": passage_item[0], 191 | "url": url, 192 | "sents_per_passage": self.sentences_per_passage, 193 | "retrieval_score": score, # Cross-encoder score as retr score 194 | } 195 | ) 196 | if len(relevant_items) >= self.max_passages_per_search_result_to_return: 197 | break 198 | # print("Total snippets extracted: ", len(retrieved_passages)) 199 | return retrieved_passages 200 | 201 | def _chunk_text( 202 | self, 203 | text: str, 204 | tokenizer, 205 | min_sentence_len: int = 3, 206 | max_sentence_len: int = 250, 207 | ) -> list[str]: 208 | """Chunks text into passages using a sliding window. 209 | 210 | Args: 211 | text: Text to chunk into passages. 212 | max_sentence_len: Maximum number of chars of each sentence before being filtered. 213 | Returns: 214 | passages: Chunked passages from the text. 215 | """ 216 | passages = [] 217 | try: 218 | logger.info("========web text len: {} =======".format((len(text)))) 219 | doc = tokenizer(text[:500000]) # Take 500k chars to not break tokenization. 220 | sents = [ 221 | s.text.replace("\n", " ") 222 | for s in doc.sents 223 | if min_sentence_len <= len(s.text) <= max_sentence_len # Long sents are usually metadata. 224 | ] 225 | for idx in range(0, len(sents), self.sliding_distance): 226 | passages.append( 227 | ( 228 | " ".join(sents[idx : idx + self.sentences_per_passage]), 229 | idx, 230 | idx + self.sentences_per_passage - 1, 231 | ) 232 | ) 233 | except UnicodeEncodeError as e: # Sometimes run into Unicode error when tokenizing. 234 | logger.error(f"Unicode error when using Spacy. Skipping text. Error message {e}") 235 | return passages 236 | -------------------------------------------------------------------------------- /factcheck/core/Retriever/google_retriever.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | from factcheck.utils.web_util import common_web_request, crawl_google_web 3 | from .base import BaseRetriever 4 | from factcheck.utils.logger import CustomLogger 5 | 6 | logger = CustomLogger(__name__).getlog() 7 | 8 | 9 | class GoogleEvidenceRetriever(BaseRetriever): 10 | def __init__(self, api_config: dict = None) -> None: 11 | super(GoogleEvidenceRetriever, self).__init__(api_config) 12 | self.num_web_pages = 10 13 | 14 | def _get_query_urls(self, questions: list[str]): 15 | all_request_url_dict = dict() 16 | for query in questions: 17 | query = query.replace(" ", "+") 18 | curr_query_list = all_request_url_dict.get(query, []) 19 | for page in range(0, self.num_web_pages, 10): 20 | # here page is google search's bottom page meaning, click 2 -> start=10 21 | # url = "https://www.google.com/search?q={}&start={}".format(query, page) 22 | url = "https://www.google.com/search?q={}&lr=lang_{}&hl={}&start={}".format(query, self.lang, self.lang, page) 23 | curr_query_list.append(url) 24 | all_request_url_dict[query] = curr_query_list 25 | 26 | crawled_all_page_urls_dict = dict() 27 | with ThreadPoolExecutor(max_workers=len(all_request_url_dict.values())) as executor: 28 | futures = list() 29 | for query, urls in all_request_url_dict.items(): 30 | for url in urls: 31 | future = executor.submit(common_web_request, url, query) 32 | futures.append(future) 33 | for future in futures: 34 | response, query = future.result() 35 | content_list = crawled_all_page_urls_dict.get(query, []) 36 | content_list.extend(crawl_google_web(response)) 37 | crawled_all_page_urls_dict[query] = content_list 38 | for query, urls in crawled_all_page_urls_dict.items(): 39 | # urls = sorted(list(set(urls))) 40 | crawled_all_page_urls_dict[query] = urls[: self.max_search_result_per_query] 41 | return crawled_all_page_urls_dict 42 | -------------------------------------------------------------------------------- /factcheck/core/Retriever/serper_retriever.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | import json 3 | import requests 4 | import os 5 | import re 6 | import bs4 7 | from factcheck.utils.logger import CustomLogger 8 | from factcheck.utils.web_util import crawl_web 9 | 10 | logger = CustomLogger(__name__).getlog() 11 | 12 | 13 | class SerperEvidenceRetriever: 14 | def __init__(self, llm_client, api_config: dict = None): 15 | """Initialize the SerperEvidenceRetrieve class""" 16 | self.lang = "en" 17 | self.serper_key = api_config["SERPER_API_KEY"] 18 | self.llm_client = llm_client 19 | 20 | def retrieve_evidence(self, claim_queries_dict, top_k: int = 3, snippet_extend_flag: bool = True): 21 | """Retrieve evidences for the given claims 22 | 23 | Args: 24 | claim_queries_dict (dict): a dictionary of claims and their corresponding queries. 25 | top_k (int, optional): the number of top relevant results to retrieve. Defaults to 3. 26 | snippet_extend_flag (bool, optional): whether to extend the snippet. Defaults to True. 27 | 28 | Returns: 29 | dict: a dictionary of claims and their corresponding evidences. 30 | """ 31 | logger.info("Collecting evidences ...") 32 | query_list = [y for x in claim_queries_dict.items() for y in x[1]] 33 | evidence_list = self._retrieve_evidence_4_all_claim( 34 | query_list=query_list, top_k=top_k, snippet_extend_flag=snippet_extend_flag 35 | ) 36 | 37 | i = 0 38 | claim_evidence_dict = {} 39 | for claim, queries in claim_queries_dict.items(): 40 | evidences_per_query_L = evidence_list[i : i + len(queries)] 41 | claim_evidence_dict[claim] = [e for evidences in evidences_per_query_L for e in evidences] 42 | i += len(queries) 43 | assert i == len(evidence_list) 44 | logger.info("Collect evidences done!") 45 | return claim_evidence_dict 46 | 47 | def _retrieve_evidence_4_all_claim( 48 | self, query_list: list[str], top_k: int = 3, snippet_extend_flag: bool = True 49 | ) -> list[list[str]]: 50 | """Retrieve evidences for the given queries 51 | 52 | Args: 53 | query_list (list[str]): a list of queries to retrieve evidences for. 54 | top_k (int, optional): the number of top relevant results to retrieve. Defaults to 3. 55 | snippet_extend_flag (bool, optional): whether to extend the snippet. Defaults to True. 56 | 57 | Returns: 58 | list[list[]]: a list of [a list of evidences for each given query]. 59 | """ 60 | 61 | # init the evidence list with None 62 | evidences = [[] for _ in query_list] 63 | 64 | # get the response from serper 65 | serper_responses = [] 66 | for i in range(0, len(query_list), 100): 67 | batch_query_list = query_list[i : i + 100] 68 | batch_response = self._request_serper_api(batch_query_list) 69 | if batch_response is None: 70 | logger.error("Serper API request error!") 71 | return evidences 72 | else: 73 | serper_responses += batch_response.json() 74 | 75 | # get the responses for queries with an answer box 76 | query_url_dict = {} 77 | url_to_date = {} # TODO: decide whether to use date 78 | _snippet_to_check = [] 79 | for i, (query, response) in enumerate(zip(query_list, serper_responses)): 80 | if query != response.get("searchParameters").get("q"): 81 | logger.error("Serper change query from {} TO {}".format(query, response.get("searchParameters").get("q"))) 82 | 83 | # TODO: provide the link for the answer box 84 | if "answerBox" in response: 85 | if "answer" in response["answerBox"]: 86 | evidences[i] = [ 87 | { 88 | "text": f"{query}\nAnswer: {response['answerBox']['answer']}", 89 | "url": "Google Answer Box", 90 | } 91 | ] 92 | else: 93 | evidences[i] = [ 94 | { 95 | "text": f"{query}\nAnswer: {response['answerBox']['snippet']}", 96 | "url": "Google Answer Box", 97 | } 98 | ] 99 | # TODO: currently --- if there is google answer box, we only got 1 evidence, otherwise, we got multiple, this will deminish the value of the google answer. 100 | else: 101 | topk_results = response.get("organic", [])[:top_k] # Choose top 5 response 102 | 103 | if (len(_snippet_to_check) == 0) or (not snippet_extend_flag): 104 | evidences[i] += [ 105 | {"text": re.sub(r"\n+", "\n", _result["snippet"]), "url": _result["link"]} for _result in topk_results 106 | ] 107 | 108 | # Save date for each url 109 | url_to_date.update({_result.get("link"): _result.get("date") for _result in topk_results}) 110 | # Save query-url pair, 1 query may have multiple urls 111 | query_url_dict.update({query: [_result.get("link") for _result in topk_results]}) 112 | _snippet_to_check += [_result["snippet"] if "snippet" in _result else "" for _result in topk_results] 113 | 114 | # return if there is no snippet to check or snippet_extend_flag is False 115 | if (len(_snippet_to_check) == 0) or (not snippet_extend_flag): 116 | return evidences 117 | 118 | # crawl web for queries without answer box 119 | responses = crawl_web(query_url_dict) 120 | # Get extended snippets based on the snippet from serper 121 | flag_to_check = [_item[0] for _item in responses] 122 | response_to_check = [_item[1] for _item in responses] 123 | url_to_check = [_item[2] for _item in responses] 124 | query_to_check = [_item[3] for _item in responses] 125 | 126 | def bs4_parse_text(response, snippet, flag): 127 | """Parse the text from the response and extend the snippet 128 | 129 | Args: 130 | response (web response): the response from the web 131 | snippet (str): the snippet to extend from the search result 132 | flag (bool): flag to extend the snippet 133 | 134 | Returns: 135 | _type_: _description_ 136 | """ 137 | if flag and ".pdf" not in str(response.url): 138 | soup = bs4.BeautifulSoup(response.text, "html.parser") 139 | text = soup.get_text() 140 | # Search for the snippet in text 141 | snippet_start = text.find(snippet[:-10]) 142 | if snippet_start == -1: 143 | return snippet 144 | else: 145 | pre_context_range = 0 # Number of characters around the snippet to display 146 | post_context_range = 500 # Number of characters around the snippet to display 147 | start = max(0, snippet_start - pre_context_range) 148 | end = snippet_start + len(snippet) + post_context_range 149 | return text[start:end] + " ..." 150 | else: 151 | return snippet 152 | 153 | # Question: if os.cpu_count() cause problems when running in parallel? 154 | with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor: 155 | _extended_snippet = list( 156 | executor.map( 157 | lambda _r, _s, _f: bs4_parse_text(_r, _s, _f), 158 | response_to_check, 159 | _snippet_to_check, 160 | flag_to_check, 161 | ) 162 | ) 163 | 164 | # merge the snippets by query 165 | query_snippet_url_dict = {} 166 | for _query, _url, _snippet in zip(query_to_check, url_to_check, _extended_snippet): 167 | _snippet_url_list = query_snippet_url_dict.get(_query, []) 168 | _snippet_url_list.append((_snippet, _url)) 169 | query_snippet_url_dict[_query] = _snippet_url_list 170 | 171 | # extend the evidence list for each query 172 | for _query in query_snippet_url_dict.keys(): 173 | _query_index = query_list.index(_query) 174 | _snippet_url_list = query_snippet_url_dict[_query] 175 | evidences[_query_index] += [ 176 | {"text": re.sub(r"\n+", "\n", snippet), "url": _url} for snippet, _url in _snippet_url_list 177 | ] 178 | 179 | return evidences 180 | 181 | def _request_serper_api(self, questions): 182 | """Request the serper api 183 | 184 | Args: 185 | questions (list): a list of questions to request the serper api. 186 | 187 | Returns: 188 | web response: the response from the serper api 189 | """ 190 | url = "https://google.serper.dev/search" 191 | 192 | headers = { 193 | "X-API-KEY": self.serper_key, 194 | "Content-Type": "application/json", 195 | } 196 | 197 | questions_data = [{"q": question, "autocorrect": False} for question in questions] 198 | payload = json.dumps(questions_data) 199 | response = None 200 | response = requests.request("POST", url, headers=headers, data=payload) 201 | 202 | if response.status_code == 200: 203 | return response 204 | elif response.status_code == 403: 205 | raise Exception("Failed to authenticate. Check your API key.") 206 | else: 207 | raise Exception(f"Error occurred: {response.text}") 208 | 209 | 210 | if __name__ == "__main__": 211 | import argparse 212 | 213 | parser = argparse.ArgumentParser() 214 | parser.add_argument("--serper_api_key", type=str, help="API key for serper") 215 | args = parser.parse_args() 216 | 217 | api_config = {"SERPER_API_KEY": args.serper_api_key} 218 | retriever = SerperEvidenceRetriever(api_config) 219 | 220 | result = retriever._request_serper_api(["Apple", "IBM"]) 221 | print(result.json()) 222 | -------------------------------------------------------------------------------- /factcheck/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .Decompose import Decompose 2 | from .CheckWorthy import Checkworthy 3 | from .QueryGenerator import QueryGenerator 4 | from .Retriever import retriever_mapper 5 | from .ClaimVerify import ClaimVerify 6 | -------------------------------------------------------------------------------- /factcheck/utils/api_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Define all keys for the API configuration 4 | keys = [ 5 | "SERPER_API_KEY", 6 | "OPENAI_API_KEY", 7 | "ANTHROPIC_API_KEY", 8 | "LOCAL_API_KEY", 9 | "LOCAL_API_URL", 10 | ] 11 | 12 | 13 | def load_api_config(api_config: dict = None): 14 | """Load API keys from environment variables or config file, config file take precedence 15 | 16 | Args: 17 | api_config (dict, optional): _description_. Defaults to None. 18 | """ 19 | if api_config is None: 20 | api_config = dict() 21 | assert type(api_config) is dict, "api_config must be a dictionary." 22 | 23 | merged_config = {} 24 | 25 | for key in keys: 26 | merged_config[key] = api_config.get(key, None) 27 | if merged_config[key] is None: 28 | merged_config[key] = os.environ.get(key, None) 29 | 30 | for key in api_config.keys(): 31 | if key not in keys: 32 | merged_config[key] = api_config[key] 33 | return merged_config 34 | -------------------------------------------------------------------------------- /factcheck/utils/data_class.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | from typing import Dict, List, Any, Optional 3 | from enum import Enum 4 | from dataclasses import dataclass 5 | 6 | 7 | @dataclass 8 | class TokenUsage: 9 | model: str = "" 10 | prompt_tokens: int = 0 11 | completion_tokens: Optional[int] = 0 12 | 13 | 14 | @dataclass 15 | class PipelineUsage: 16 | decomposer: TokenUsage = None 17 | checkworthy: TokenUsage = None 18 | query_generator: TokenUsage = None 19 | evidence_crawler: TokenUsage = None 20 | claimverify: TokenUsage = None 21 | 22 | 23 | @dataclass 24 | class Evidence: 25 | claim: str = None 26 | text: str = None # evidence text 27 | url: str = None 28 | reasoning: str = None 29 | relationship: str = None 30 | 31 | def attribute_check(self) -> bool: 32 | for field in self.__dataclass_fields__.values(): 33 | if getattr(self, field.name) is None: 34 | print(f"Field {field.name} is None") 35 | return False 36 | return True 37 | 38 | 39 | @dataclass 40 | class ClaimDetail: 41 | """Dataclass to store the details of a claim. 42 | 43 | Attributes: 44 | id (int): The unique identifier of the claim. [create from checkworthy] 45 | claim (str): The claim text. [create from checkworthy] 46 | checkworthy (bool): Whether the claim is checkworthy. [create from checkworthy] 47 | checkworthy_reason (str): The reason why the claim is checkworthy. [create from checkworthy] 48 | origin_text (str): The original text from which the claim was extracted. [create from decompose] 49 | start (int): The start index of the claim in the original text. [create from decompose] 50 | end (int): The end index of the claim in the original text. [create from decompose] 51 | queries (List[str]): The list of queries generated for the claim. [create from query_generator] 52 | evidences (List[Evidence]): The list of evidences retrieved for the claim. [createfrom evidence_crawler] 53 | factuality (any): The factuality of the claim. [create by summarize evidences] 54 | possible values: "Nothing to check.", "No evidence found", float in [0, 1] 55 | """ 56 | 57 | id: int = None 58 | claim: str = None 59 | checkworthy: bool = None 60 | checkworthy_reason: str = None 61 | origin_text: str = None 62 | start: int = None 63 | end: int = None 64 | queries: List[str] = None 65 | evidences: List[dict] = None 66 | factuality: any = None 67 | 68 | def attribute_check(self) -> bool: 69 | for field in self.__dataclass_fields__.values(): 70 | if getattr(self, field.name) is None: 71 | print(f"Field {field.name} is None") 72 | return False 73 | for evidence in self.evidences: 74 | if not evidence.attribute_check(): 75 | print(f"Field {field.name} is None") 76 | return False 77 | return True 78 | 79 | 80 | @dataclass 81 | class FCSummary: 82 | """Dataclass to store the summary of the fact-checking process. 83 | 84 | Attributes: 85 | num_claims (int): The number of claims processed. [create from decompose] 86 | num_checkworthy_claims (int): The number of claims identified as checkworthy. [create from checkworthy] 87 | num_verified_claims (int): The number of claims that were verified. [create from claimverify - no evidence founded claims] 88 | num_supported_claims (int) 89 | num_refuted_claims (int) 90 | num_controversial_claims (int) 91 | factuality (float): The overall factuality. 92 | """ 93 | 94 | num_claims: int = None 95 | num_checkworthy_claims: int = None 96 | num_verified_claims: int = None 97 | num_supported_claims: int = None 98 | num_refuted_claims: int = None 99 | num_controversial_claims: int = None 100 | factuality: float = None 101 | 102 | def attribute_check(self) -> bool: 103 | for field in self.__dataclass_fields__.values(): 104 | if getattr(self, field.name) is None: 105 | print(f"Field {field.name} is None") 106 | return False 107 | return True 108 | 109 | 110 | @dataclass 111 | class FactCheckOutput: 112 | raw_text: str = None 113 | token_count: int = None 114 | usage: PipelineUsage = None 115 | claim_detail: List[ClaimDetail] = None 116 | summary: FCSummary = None 117 | 118 | def attribute_check(self) -> bool: 119 | for field in self.__dataclass_fields__.values(): 120 | if getattr(self, field.name) is None: 121 | print(f"Field {field.name} is None") 122 | return False 123 | 124 | for claim in self.claim_detail: 125 | if not claim.attribute_check(): 126 | print(f"Field {field.name} is None") 127 | return False 128 | 129 | self.summary.attribute_check() 130 | 131 | return True 132 | -------------------------------------------------------------------------------- /factcheck/utils/llmclient/__init__.py: -------------------------------------------------------------------------------- 1 | from .gpt_client import GPTClient 2 | from .claude_client import ClaudeClient 3 | from .local_openai_client import LocalOpenAIClient 4 | 5 | # fmt: off 6 | CLIENTS = { 7 | "gpt": GPTClient, 8 | "claude": ClaudeClient, 9 | "local_openai": LocalOpenAIClient 10 | } 11 | # fmt: on 12 | 13 | 14 | def model2client(model_name: str): 15 | """If the client is not specified, use this function to map the model name to the corresponding client.""" 16 | if model_name.startswith("gpt"): 17 | return GPTClient 18 | elif model_name.startswith("claude"): 19 | return ClaudeClient 20 | elif model_name.startswith("vicuna"): 21 | return LocalOpenAIClient 22 | else: 23 | raise ValueError(f"Model {model_name} not supported.") 24 | -------------------------------------------------------------------------------- /factcheck/utils/llmclient/base.py: -------------------------------------------------------------------------------- 1 | import time 2 | import asyncio 3 | from abc import abstractmethod 4 | from functools import partial 5 | from collections import deque 6 | 7 | from ..data_class import TokenUsage 8 | 9 | 10 | class BaseClient: 11 | def __init__( 12 | self, 13 | model: str, 14 | api_config: dict, 15 | max_requests_per_minute: int, 16 | request_window: int, 17 | ) -> None: 18 | self.model = model 19 | self.api_config = api_config 20 | self.max_requests_per_minute = max_requests_per_minute 21 | self.request_window = request_window 22 | self.traffic_queue = deque() 23 | self.total_traffic = 0 24 | self.usage = TokenUsage(model=model) 25 | 26 | @abstractmethod 27 | def _call(self, messages: str): 28 | """Internal function to call the API.""" 29 | pass 30 | 31 | @abstractmethod 32 | def _log_usage(self): 33 | """Log the usage of tokens, should be used in each client's _call method.""" 34 | pass 35 | 36 | def get_usage(self): 37 | return self.usage 38 | 39 | def reset_usage(self): 40 | self.usage.prompt_tokens = 0 41 | self.usage.completion_tokens = 0 42 | 43 | @abstractmethod 44 | def construct_message_list(self, prompt_list: list[str]) -> list[str]: 45 | """Construct a list of messages for the function self.multi_call.""" 46 | raise NotImplementedError 47 | 48 | @abstractmethod 49 | def get_request_length(self, messages): 50 | """Get the length of the request. Used for tracking traffic.""" 51 | raise NotImplementedError 52 | 53 | def call(self, messages: list[str], num_retries=3, waiting_time=1, **kwargs): 54 | seed = kwargs.get("seed", 42) 55 | assert type(seed) is int, "Seed must be an integer." 56 | assert len(messages) == 1, "Only one message is allowed for this function." 57 | 58 | r = "" 59 | for _ in range(num_retries): 60 | try: 61 | r = self._call(messages[0], seed=seed) 62 | break 63 | except Exception as e: 64 | print(f"Error LLM Client call: {e} Retrying...") 65 | time.sleep(waiting_time) 66 | 67 | if r == "": 68 | raise ValueError("Failed to get response from LLM Client.") 69 | return r 70 | 71 | def set_model(self, model: str): 72 | self.model = model 73 | 74 | async def _async_call(self, messages: list, **kwargs): 75 | """Calls ChatGPT asynchronously, tracks traffic, and enforces rate limits.""" 76 | while len(self.traffic_queue) >= self.max_requests_per_minute: 77 | await asyncio.sleep(1) 78 | self._expire_old_traffic() 79 | 80 | loop = asyncio.get_running_loop() 81 | response = await loop.run_in_executor(None, partial(self._call, messages, **kwargs)) 82 | 83 | self.total_traffic += self.get_request_length(messages) 84 | self.traffic_queue.append((time.time(), self.get_request_length(messages))) 85 | 86 | return response 87 | 88 | def multi_call(self, messages_list, **kwargs): 89 | tasks = [self._async_call(messages=messages, **kwargs) for messages in messages_list] 90 | asyncio.set_event_loop(asyncio.SelectorEventLoop()) 91 | loop = asyncio.get_event_loop() 92 | responses = loop.run_until_complete(asyncio.gather(*tasks)) 93 | return responses 94 | 95 | def _expire_old_traffic(self): 96 | """Expires traffic older than the request window.""" 97 | current_time = time.time() 98 | while self.traffic_queue and self.traffic_queue[0][0] + self.request_window < current_time: 99 | self.total_traffic -= self.traffic_queue.popleft()[1] 100 | -------------------------------------------------------------------------------- /factcheck/utils/llmclient/claude_client.py: -------------------------------------------------------------------------------- 1 | import time 2 | from anthropic import Anthropic 3 | from .base import BaseClient 4 | 5 | 6 | class ClaudeClient(BaseClient): 7 | def __init__( 8 | self, 9 | model: str = "claude-3-opus-20240229", 10 | api_config: dict = None, 11 | max_requests_per_minute=200, 12 | request_window=60, 13 | ): 14 | super().__init__(model, api_config, max_requests_per_minute, request_window) 15 | self.client = Anthropic(api_key=self.api_config["ANTHROPIC_API_KEY"]) 16 | 17 | def _call(self, messages: str, **kwargs): 18 | response = self.client.messages.create( 19 | messages=messages, 20 | model=self.model, 21 | max_tokens=2048, 22 | ) 23 | return response.content[0].text 24 | 25 | def get_request_length(self, messages): 26 | return 1 27 | 28 | def construct_message_list( 29 | self, 30 | prompt_list: list[str], 31 | system_role: str = None, 32 | ): 33 | if system_role is None: 34 | Warning("system_role is not used in this case") 35 | # system role is not used in this case 36 | messages_list = list() 37 | for prompt in prompt_list: 38 | messages = [ 39 | {"role": "user", "content": prompt}, 40 | ] 41 | messages_list.append(messages) 42 | return messages_list 43 | -------------------------------------------------------------------------------- /factcheck/utils/llmclient/gpt_client.py: -------------------------------------------------------------------------------- 1 | import time 2 | from openai import OpenAI 3 | from .base import BaseClient 4 | 5 | 6 | class GPTClient(BaseClient): 7 | def __init__( 8 | self, 9 | model: str = "gpt-4-turbo", 10 | api_config: dict = None, 11 | max_requests_per_minute=200, 12 | request_window=60, 13 | ): 14 | super().__init__(model, api_config, max_requests_per_minute, request_window) 15 | self.client = OpenAI(api_key=self.api_config["OPENAI_API_KEY"]) 16 | 17 | def _call(self, messages: str, **kwargs): 18 | seed = kwargs.get("seed", 42) # default seed is 42 19 | assert type(seed) is int, "Seed must be an integer." 20 | 21 | response = self.client.chat.completions.create( 22 | response_format={"type": "json_object"}, 23 | seed=seed, 24 | model=self.model, 25 | messages=messages, 26 | ) 27 | r = response.choices[0].message.content 28 | 29 | if hasattr(response, "usage"): 30 | self._log_usage(usage_dict=response.usage) 31 | else: 32 | print("Warning: ChatGPT API Usage is not logged.") 33 | 34 | return r 35 | 36 | def _log_usage(self, usage_dict): 37 | try: 38 | self.usage.prompt_tokens += usage_dict.prompt_tokens 39 | self.usage.completion_tokens += usage_dict.completion_tokens 40 | except: # noqa E722 41 | print("Warning: prompt_tokens or completion_token not found in usage_dict") 42 | 43 | def get_request_length(self, messages): 44 | # TODO: check if we should return the len(menages) instead 45 | return 1 46 | 47 | def construct_message_list( 48 | self, 49 | prompt_list: list[str], 50 | system_role: str = "You are a helpful assistant designed to output JSON.", 51 | ): 52 | messages_list = list() 53 | for prompt in prompt_list: 54 | messages = [ 55 | {"role": "system", "content": system_role}, 56 | {"role": "user", "content": prompt}, 57 | ] 58 | messages_list.append(messages) 59 | return messages_list 60 | -------------------------------------------------------------------------------- /factcheck/utils/llmclient/local_openai_client.py: -------------------------------------------------------------------------------- 1 | import time 2 | import openai 3 | from openai import OpenAI 4 | from .base import BaseClient 5 | 6 | 7 | class LocalOpenAIClient(BaseClient): 8 | """Support Local host LLM chatbot with OpenAI API. 9 | see https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md for example usage. 10 | """ 11 | 12 | def __init__( 13 | self, 14 | model: str = "", 15 | api_config: dict = None, 16 | max_requests_per_minute=200, 17 | request_window=60, 18 | ): 19 | super().__init__(model, api_config, max_requests_per_minute, request_window) 20 | 21 | openai.api_key = api_config["LOCAL_API_KEY"] 22 | openai.base_url = api_config["LOCAL_API_URL"] 23 | 24 | def _call(self, messages: str, **kwargs): 25 | seed = kwargs.get("seed", 42) # default seed is 42 26 | assert type(seed) is int, "Seed must be an integer." 27 | 28 | response = openai.chat.completions.create( 29 | response_format={"type": "json_object"}, 30 | seed=seed, 31 | model=self.model, 32 | messages=messages, 33 | ) 34 | r = response.choices[0].message.content 35 | return r 36 | 37 | def get_request_length(self, messages): 38 | # TODO: check if we should return the len(menages) instead 39 | return 1 40 | 41 | def construct_message_list( 42 | self, 43 | prompt_list: list[str], 44 | system_role: str = "You are a helpful assistant designed to output JSON.", 45 | ): 46 | messages_list = list() 47 | for prompt in prompt_list: 48 | messages = [ 49 | {"role": "system", "content": system_role}, 50 | {"role": "user", "content": prompt}, 51 | ] 52 | messages_list.append(messages) 53 | return messages_list 54 | -------------------------------------------------------------------------------- /factcheck/utils/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from flask import g 4 | from logging.handlers import TimedRotatingFileHandler 5 | 6 | 7 | class CustomLogger: 8 | def __init__(self, name: str, loglevel=logging.INFO): 9 | """Initialize the CustomLogger class 10 | 11 | Args: 12 | name (str): the name of the logger (e.g., __name__). 13 | loglevel (_type_, optional): the log level. Defaults to logging.INFO. 14 | """ 15 | # Create a custom logger 16 | self.logger = logging.getLogger("FactCheck") 17 | self.logger.setLevel(loglevel) 18 | # Create a handler for writing to the log file 19 | if not os.path.exists("./log"): 20 | # If the directory does not exist, create it 21 | os.makedirs("./log") 22 | env = os.environ.get("env", "dev") 23 | fh = TimedRotatingFileHandler(filename="./log/factcheck_{}.log".format(env), when="D", encoding="utf-8") 24 | fh.setLevel(loglevel) 25 | if not self.logger.handlers: 26 | # Create another handler for output to the console 27 | ch = logging.StreamHandler() 28 | ch.setLevel(loglevel) 29 | # Define the output format of the handler 30 | formatter = logging.Formatter("[%(levelname)s]%(asctime)s %(filename)s:%(lineno)d: %(message)s") 31 | fh.setFormatter(formatter) 32 | ch.setFormatter(formatter) 33 | # Add handler to logger 34 | self.logger.addHandler(fh) 35 | self.logger.addHandler(ch) 36 | 37 | def getlog(self): 38 | return self.logger 39 | -------------------------------------------------------------------------------- /factcheck/utils/multimodal.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | import cv2 3 | import base64 4 | import requests 5 | from .logger import CustomLogger 6 | 7 | logger = CustomLogger(__name__).getlog() 8 | 9 | 10 | def voice2text(input, openai_key): 11 | # voice to input 12 | client = OpenAI(api_key=openai_key) 13 | audio_file = open(input, "rb") 14 | transcription = client.audio.transcriptions.create(model="whisper-1", file=audio_file) 15 | return transcription.text 16 | 17 | 18 | def image2text(input, openai_key): 19 | # Function to encode the image 20 | def encode_image(image_path): 21 | with open(image_path, "rb") as image_file: 22 | return base64.b64encode(image_file.read()).decode("utf-8") 23 | 24 | # Getting the base64 string 25 | base64_image = encode_image(input) 26 | 27 | headers = { 28 | "Content-Type": "application/json", 29 | "Authorization": f"Bearer {openai_key}", 30 | } 31 | 32 | payload = { 33 | "model": "gpt-4-vision-preview", 34 | "messages": [ 35 | { 36 | "role": "user", 37 | "content": [ 38 | {"type": "text", "text": "What’s in this image?"}, 39 | { 40 | "type": "image_url", 41 | "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, 42 | }, 43 | ], 44 | } 45 | ], 46 | "max_tokens": 300, 47 | } 48 | 49 | caption = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) 50 | return caption.json()["choices"][0]["message"]["content"] 51 | 52 | 53 | def video2text(input, openai_key): 54 | # Read the video and convert it to pictures 55 | video = cv2.VideoCapture(input) 56 | 57 | base64Frames = [] 58 | while video.isOpened(): 59 | success, frame = video.read() 60 | if not success: 61 | break 62 | _, buffer = cv2.imencode(".jpg", frame) 63 | base64Frames.append(base64.b64encode(buffer).decode("utf-8")) 64 | 65 | video.release() 66 | 67 | # Process the pictures with GPT4-V 68 | client = OpenAI(api_key=openai_key) 69 | PROMPT_MESSAGES = [ 70 | { 71 | "role": "user", 72 | "content": [ 73 | "These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video. Only return the description after the video is fully uploaded. without any other words.", 74 | *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]), 75 | ], 76 | }, 77 | ] 78 | params = { 79 | "model": "gpt-4-vision-preview", 80 | "messages": PROMPT_MESSAGES, 81 | "max_tokens": 500, 82 | } 83 | 84 | result = client.chat.completions.create(**params) 85 | return result.choices[0].message.content 86 | 87 | 88 | def modal_normalization(modal="text", input=None, openai_key=None): 89 | logger.info(f"== Processing: Modal: {modal}, Input: {input}") 90 | if modal == "string": 91 | response = str(input) 92 | elif modal == "text": 93 | with open(input, "r") as f: 94 | response = f.read() 95 | elif modal == "speech": 96 | response = voice2text(input, openai_key) 97 | elif modal == "image": 98 | response = image2text(input, openai_key) 99 | elif modal == "video": 100 | response = video2text(input, openai_key) 101 | else: 102 | raise NotImplementedError 103 | logger.info(f"== Processed: Modal: {modal}, Input: {input}") 104 | return response 105 | -------------------------------------------------------------------------------- /factcheck/utils/prompt/__init__.py: -------------------------------------------------------------------------------- 1 | from .chatgpt_prompt import ChatGPTPrompt 2 | from .chatgpt_prompt_zh import ChatGPTPromptZH 3 | from .claude_prompt import ClaudePrompt 4 | from .customized_prompt import CustomizedPrompt 5 | 6 | prompt_map = { 7 | "chatgpt_prompt": ChatGPTPrompt, 8 | "chatgpt_prompt_zh": ChatGPTPromptZH, 9 | "claude_prompt": ClaudePrompt, 10 | } 11 | 12 | 13 | def prompt_mapper(prompt_name: str): 14 | if prompt_name in prompt_map: 15 | return prompt_map[prompt_name]() 16 | elif prompt_name.endswith("yaml") or prompt_name.endswith("json"): 17 | return CustomizedPrompt(prompt_name) 18 | else: 19 | raise NotImplementedError(f"Prompt {prompt_name} not implemented.") 20 | -------------------------------------------------------------------------------- /factcheck/utils/prompt/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class BasePrompt: 6 | decompose_prompt: str = None 7 | checkworthy_prompt: str = None 8 | qgen_prompt: str = None 9 | verify_prompt: str = None 10 | -------------------------------------------------------------------------------- /factcheck/utils/prompt/chatgpt_prompt.py: -------------------------------------------------------------------------------- 1 | decompose_prompt = """ 2 | Your task is to decompose the text into atomic claims. 3 | The answer should be a JSON with a single key "claims", with the value of a list of strings, where each string should be a context-independent claim, representing one fact. 4 | Note that: 5 | 1. Each claim should be concise (less than 15 words) and self-contained. 6 | 2. Avoid vague references like 'he', 'she', 'it', 'this', 'the company', 'the man' and using complete names. 7 | 3. Generate at least one claim for each single sentence in the texts. 8 | 9 | For example, 10 | Text: Mary is a five-year old girl, she likes playing piano and she doesn't like cookies. 11 | Output: 12 | {{"claims": ["Mary is a five-year old girl.", "Mary likes playing piano.", "Mary doesn't like cookies."]}} 13 | 14 | Text: {doc} 15 | Output: 16 | """ 17 | 18 | # restore_prompt = """Given a text and a list of facts derived from the text, your task is to identify the corresponding words in the text that derive each fact. 19 | # For each fact, please find the minimal continues span in the original text that contains the information to derive the fact. The answer should be a JSON dict where the keys are the facts and the values are the corresponding spans copied from the original text. 20 | # 21 | # For example, 22 | # Text: Mary is a five-year old girl, she likes playing piano and she doesn't like cookies. 23 | # Facts: ["Mary is a five-year old girl.", "Mary likes playing piano.", "Mary doesn't like cookies."] 24 | # 25 | # Output: 26 | # {{"Mary is a five-year old girl.":"Mary is a five-year old girl", 27 | # "Mary likes playing piano.":"she likes playing piano", 28 | # "Mary doesn't like cookies.":"she doesn't like cookies."] 29 | # 30 | # Text: {doc} 31 | # Facts: {claims} 32 | # Output: 33 | # """ 34 | 35 | # use this for demo 36 | restore_prompt = """Given a text and a list of facts derived from the text, your task is to split the text into chunks that derive each fact. 37 | For each fact, please find the corresponding continues span in the original text that contains the information to derive the fact. The answer should be a JSON dict where the keys are the facts and the values are the corresponding spans copied from the original text. 38 | Please make sure the returned spans can be concatenated to the full original doc. 39 | 40 | For example, 41 | Text: Mary is a five-year old girl, she likes playing piano and she doesn't like cookies. 42 | Facts: ["Mary is a five-year old girl.", "Mary likes playing piano.", "Mary doesn't like cookies."] 43 | 44 | Output: 45 | {{"Mary is a five-year old girl.":"Mary is a five-year old girl,", 46 | "Mary likes playing piano.":"she likes playing piano", 47 | "Mary doesn't like cookies.":"and she doesn't like cookies."] 48 | 49 | Text: {doc} 50 | Facts: {claims} 51 | Output: 52 | 53 | """ 54 | 55 | checkworthy_prompt = """ 56 | Your task is to evaluate each provided statement to determine if it presents information whose factuality can be objectively verified by humans, irrespective of the statement's current accuracy. Consider the following guidelines: 57 | 1. Opinions versus Facts: Distinguish between opinions, which are subjective and not verifiable, and statements that assert factual information, even if broad or general. Focus on whether there's a factual claim that can be investigated. 58 | 2. Clarity and Specificity: Statements must have clear and specific references to be verifiable (e.g., "he is a professor" is not verifiable without knowing who "he" is). 59 | 3. Presence of Factual Information: Consider a statement verifiable if it includes factual elements that can be checked against evidence or reliable sources, even if the overall statement might be broad or incorrect. 60 | Your response should be in JSON format, with each statement as a key and either "Yes" or "No" as the value, along with a brief rationale for your decision. 61 | 62 | For example, given these statements: 63 | 1. Gary Smith is a distinguished professor of economics. 64 | 2. He is a professor at MBZUAI. 65 | 3. Obama is the president of the UK. 66 | 67 | The expected output is: 68 | {{ 69 | "Gary Smith is a distinguished professor of economics.": "Yes (The statement contains verifiable factual information about Gary Smith's professional title and field.)", 70 | "He is a professor at MBZUAI.": "No (The statement cannot be verified due to the lack of clear reference to who 'he' is.)", 71 | "Obama is the president of the UK.": "Yes (This statement contain verifiable information regarding the political leadership of a country.)" 72 | }} 73 | 74 | For these statements: 75 | {texts} 76 | 77 | The output should be: 78 | """ 79 | 80 | qgen_prompt = """Given a claim, your task is to create minimum number of questions need to be check to verify the correctness of the claim. Output in JSON format with a single key "Questions", the value is a list of questions. For example: 81 | 82 | Claim: Your nose switches back and forth between nostrils. When you sleep, you switch about every 45 minutes. This is to prevent a buildup of mucus. It’s called the nasal cycle. 83 | Output: {{"Questions": ["Does your nose switch between nostrils?", "How often does your nostrils switch?", "Why does your nostril switch?", "What is nasal cycle?"]}} 84 | 85 | Claim: The Stanford Prison Experiment was conducted in the basement of Encina Hall, Stanford’s psychology building. 86 | Output: 87 | {{"Question":["Where was Stanford Prison Experiment was conducted?"]}} 88 | 89 | Claim: The Havel-Hakimi algorithm is an algorithm for converting the adjacency matrix of a graph into its adjacency list. It is named after Vaclav Havel and Samih Hakimi. 90 | Output: 91 | {{"Questions":["What does Havel-Hakimi algorithm do?", "Who are Havel-Hakimi algorithm named after?"]}} 92 | 93 | Claim: Social work is a profession that is based in the philosophical tradition of humanism. It is an intellectual discipline that has its roots in the 1800s. 94 | Output: 95 | {{"Questions":["What philosophical tradition is social work based on?", "What year does social work have its root in?"]}} 96 | 97 | Claim: {claim} 98 | Output: 99 | """ 100 | 101 | verify_prompt = """ 102 | Your task is to decide whether the evidence supports, refutes, or is irrelevant to the claim. Carefully review the evidence, noting that it may vary in detail and sometimes present conflicting information. Your judgment should be informed by this evidence, taking into account its relevance and reliability. 103 | Please structure your response in JSON format, including the following four keys: 104 | - "reasoning": explain the thought process behind your judgment. 105 | - "relationship": the stance label, which can be one of "SUPPORTS", "REFUTES", or "IRRELEVANT". 106 | For example, 107 | Input: 108 | [claim]: MBZUAI is located in Abu Dhabi, United Arab Emirates. 109 | [evidence]: Where is MBZUAI located?\nAnswer: Masdar City - Abu Dhabi - United Arab Emirates 110 | Output: 111 | {{ 112 | "reasoning": "The evidence confirms that MBZUAI is located in Masdar City, Abu Dhabi, United Arab Emirates, so the relationship is SUPPORTS.", 113 | "relationship": "SUPPORTS" 114 | }} 115 | Input: 116 | [claim]: Copper reacts with ferrous sulfate (FeSO4). 117 | [evidence]: Copper is less reactive metal. It has positive value of standard reduction potential. Metal with high standard reduction potential can not displace other metal with low standard reduction potential values. Hence copper can not displace iron from ferrous sulphate solution. So no change will take place. 118 | Output: 119 | {{ 120 | "reasoning": "The evidence provided confirms that copper cannot displace iron from ferrous sulphate solution, and no change will take place. Therefore, the evidence refutes the claim", 121 | "relationship": "REFUTES" 122 | }} 123 | Input: 124 | [claim]: Apple is a leading technology company in UK. 125 | [evidence]: International Business Machines Corporation, nicknamed Big Blue, is an American multinational technology company headquartered in Armonk, New York and present in over 175 countries. 126 | Output: 127 | {{ 128 | "reasoning": "The evidence is about IBM, while the claim is about Apple. Therefore, the evidence is irrelevant to the claim", 129 | "relationship": "IRRELEVANT" 130 | }} 131 | Input 132 | [claim]: {claim} 133 | [evidences]: {evidence} 134 | Output: 135 | """ 136 | 137 | 138 | class ChatGPTPrompt: 139 | decompose_prompt = decompose_prompt 140 | restore_prompt = restore_prompt 141 | checkworthy_prompt = checkworthy_prompt 142 | qgen_prompt = qgen_prompt 143 | verify_prompt = verify_prompt 144 | -------------------------------------------------------------------------------- /factcheck/utils/prompt/chatgpt_prompt_zh.py: -------------------------------------------------------------------------------- 1 | from .base import BasePrompt 2 | from dataclasses import dataclass 3 | 4 | decompose_prompt_zh = """你的任务是将文本分解为原子命题(atomic claim)。 5 | 答案应该是一个带有单个key为"claims"的JSON,其value是一个字符串列表,其中每个字符串应该是一个独立于上下文的命题,代表一个事实。 6 | 注意: 7 | 1. 每一个原子命题应简明扼要(少于15个字),并且是独立完整的。 8 | 2. 避免使用"他"、"她"、"它"、"这个"、"此公司"、"此人"等模糊的称呼,请使用全名。 9 | 3. 为文本每个句子至少生成一个命题。 10 | 11 | 例如, 12 | 玛丽是一个五岁的女孩,她喜欢弹钢琴,她不喜欢饼干。 13 | 输出: 14 | {{"claim": ["玛丽是一个五岁的女孩。", "玛丽是一个五岁的女孩。", "玛丽不喜欢饼干。"]}} 15 | 16 | 文本: {doc} 17 | 输出: 18 | """ 19 | 20 | checkworthy_prompt_zh = """你的任务是评估每个陈述,以确定它提供的信息的真实性是否可以被人类客观验证,而不考虑当前陈述的准确性。请遵循以下规则: 21 | 1. 观点与事实: 区分主观的、无法证实的观点和表达事实信息的陈述,即使表述较为宽泛和笼统。请关注是否有可以验证其事实性的命题。 22 | 2. 清晰和专一性: 陈述必须有清晰、具体且可验证的参考(例如,"他是教授"这个陈述在不知道"他"是谁的情况下是不可验证的)。 23 | 3. 存在事实信息: 如果一个陈述包含了可以根据证据或可靠来源进行验证的事实元素,那么就认为它是可验证的,即使整个陈述可能较为笼统或不正确。 24 | 你的输出是JSON格式,key是每个陈述,value是"Yes"或"No"并简述原因。 25 | 26 | 例如,给定这些语句: 27 | 1. 加里·史密斯是一位杰出的经济学教授。 28 | 2. 他是麻省理工学院的教授。 29 | 3. 奥巴马是英国总统。 30 | 4. 上海大学是中国江苏的大学。 31 | 32 | 预期输出为: 33 | {{ 34 | "加里·史密斯是一位杰出的经济学教授": "Yes(该陈述包含有关Gary Smith的专业头衔和领域的可验证的事实信息。)", 35 | "他是麻省理工学院的教授。": "No(该陈述没有明确提到‘他’是谁,因此无法核实。)", 36 | "奥巴马是英国总统。": "Yes(该陈述包含了关于一个国家政治领导的可核实的信息。)", 37 | "上海大学是中国江苏的大学。": "Yes(该陈述包含了关于一个大学地理位置的可验证的信息。)" 38 | }} 39 | 40 | 对于这些陈述: 41 | {texts} 42 | 43 | 输出应是: 44 | """ 45 | 46 | qgen_prompt_zh = """给定一个命题,你的任务是创建最少数量的问题,以验证命题的正确性。输出为JSON格式,其有单个key"Questions",value是问题列表。例如: 47 | 48 | 命题:你的鼻子呼吸在两个鼻孔之间来回切换。当你睡觉时,大约每45分钟换一次。这是为了防止粘液积聚。这个现象叫做鼻腔循环。 49 | 输出:{{"Questions": ["你的鼻子呼吸会在鼻孔之间交换吗?","你的鼻子呼吸多久交换一次?","你的鼻孔呼吸为什么会交换?","什么是鼻循环?"]}} 50 | 51 | 命题:斯坦福监狱实验是在斯坦福大学心理学大楼恩西纳大厅的地下室进行的。 52 | 输出: 53 | {{"Questions": ["斯坦福监狱实验是在哪里进行的?"]}} 54 | 55 | 命题:Havel-Hakimi算法是一种将图的邻接矩阵转换为其邻接表的算法。它以瓦茨拉夫·哈维尔和萨米·哈基米的名字命名。 56 | 输出: 57 | {{"Questions": ["Havel-Hakimi算法是做什么的?","Havel-Hakimi算法是以谁命名的?"]}} 58 | 59 | 命题:社会工作是一种基于人文主义哲学传统的职业。这是一门起源于19世纪的知识学科。 60 | 输出: 61 | {{"Questions": ["社会工作是基于什么哲学传统?","社会工作起源于哪一年?"]}} 62 | 63 | 命题:{claim} 64 | 输出: 65 | """ 66 | 67 | verify_prompt_zh = """你的任务是使用陈述附带的证据(evidence)来评估其陈述的准确性。仔细审查这些证据,注意它可能在细节上有所不同,有时会呈现相互矛盾的信息。你的判断应该根据这些证据,并考虑其相关性和可靠性。 68 | 69 | 请记住,证据中缺乏细节并不一定表明陈述是不准确的。在评估陈述的真实性时,要区分错误和证据支持陈述的地方。 70 | 71 | 请以JSON格式构建您的回答,包含以下四个key: 72 | - "reasoning": 解释你的判断的推理过程。 73 | - "error": 如果文本事实正确,该项为"none";否则,请指出命题中任何具体的不准确之处。 74 | - "correction": 如果文本是事实,该项为"none";否则,对任何发现的不准确之处进行纠正,并在纠正时候使用证据来支持。 75 | - "factuality": 如果给定的文本是事实,该项为"true",表明该陈述是事实。否则为"false",说明根据证据,该陈述并非事实。 76 | 77 | 例如: 78 | 输入: 79 | [文本]: MBZUAI位于阿拉伯联合酋长国的阿布扎比。 80 | [证据]: MBZUAI位于哪里?\n答案:马斯达尔城-阿布扎比-阿拉伯联合酋长国 81 | 82 | 输出: 83 | {{ 84 | "reasoning": "证据证实MBZUAI位于阿拉伯联合酋长国阿布扎比的马斯达尔城,因此该陈述在事实上是正确的", 85 | "error": none, 86 | "correction": none, 87 | "factuality": true 88 | }} 89 | 90 | 91 | 输入: 92 | [文本]: 铜会与硫酸亚铁(FeSO4)发生反应。 93 | [证据]: 铜是较不活泼的金属。它具有正的标准还原电位。具有高标准还原电位的金属不能取代具有低标准还原电位的金属。因此,铜不能从硫酸亚铁溶液中取代铁。所以不会发生反应。 94 | 95 | 输出: 96 | {{ 97 | "reasoning": "所提供的证据证实,铜不能从硫酸亚铁溶液中取代铁,也不会发生变化。", 98 | "error": "铜不能与文中所述的硫酸亚铁反应。", 99 | "correction": "铜不能与硫酸亚铁反应,因为它不能取代硫酸亚铁溶液中的铁。", 100 | "factuality": false 101 | }} 102 | 103 | 输入 104 | [文本]:{claim} 105 | [证据]:{evidence} 106 | 107 | 输出: 108 | """ 109 | 110 | class ChatGPTPromptZH: 111 | decompose_prompt = decompose_prompt_zh 112 | checkworthy_prompt = checkworthy_prompt_zh 113 | qgen_prompt = qgen_prompt_zh 114 | verify_prompt = verify_prompt_zh 115 | -------------------------------------------------------------------------------- /factcheck/utils/prompt/claude_prompt.py: -------------------------------------------------------------------------------- 1 | decompose_prompt = """ 2 | Your task is to decompose the text into atomic claims. 3 | The answer should be a JSON with a single key "claims", with the value of a list of strings, where each string should be a context-independent claim, representing one fact. 4 | Note that: 5 | 1. Each claim should be concise (less than 15 words) and self-contained. 6 | 2. Avoid vague references like 'he', 'she', 'it', 'this', 'the company', 'the man' and using complete names. 7 | 3. Generate at least one claim for each single sentence in the texts. 8 | 9 | For example, 10 | Text: Mary is a five-year old girl, she likes playing piano and she doesn't like cookies. 11 | Output: 12 | {{"claims": ["Mary is a five-year old girl.", "Mary likes playing piano.", "Mary doesn't like cookies."]}} 13 | 14 | Text: {doc} 15 | Output: 16 | """ 17 | 18 | checkworthy_prompt = """ 19 | Your task is to evaluate each provided statement to determine if it presents information whose factuality can be objectively verified by humans, irrespective of the statement's current accuracy. Consider the following guidelines: 20 | 1. Opinions versus Facts: Distinguish between opinions, which are subjective and not verifiable, and statements that assert factual information, even if broad or general. Focus on whether there's a factual claim that can be investigated. 21 | 2. Clarity and Specificity: Statements must have clear and specific references to be verifiable (e.g., "he is a professor" is not verifiable without knowing who "he" is). 22 | 3. Presence of Factual Information: Consider a statement verifiable if it includes factual elements that can be checked against evidence or reliable sources, even if the overall statement might be broad or incorrect. 23 | Your response should be in JSON format, with each statement as a key and either "Yes" or "No" as the value, along with a brief rationale for your decision. 24 | 25 | For example, given these statements: 26 | 1. Gary Smith is a distinguished professor of economics. 27 | 2. He is a professor at MBZUAI. 28 | 3. Obama is the president of the UK. 29 | 30 | The expected output is a JSON: 31 | {{ 32 | "Gary Smith is a distinguished professor of economics.": "Yes (The statement contains verifiable factual information about Gary Smith's professional title and field.)", 33 | "He is a professor at MBZUAI.": "No (The statement cannot be verified due to the lack of clear reference to who 'he' is.)", 34 | "Obama is the president of the UK.": "Yes (This statement contain verifiable information regarding the political leadership of a country.)" 35 | }} 36 | 37 | For these statements: 38 | {texts} 39 | 40 | The output should be a JSON: 41 | """ 42 | 43 | qgen_prompt = """Given a claim, your task is to create minimum number of questions need to be check to verify the correctness of the claim. Output in JSON format with a single key "Questions", the value is a list of questions. For example: 44 | 45 | Claim: Your nose switches back and forth between nostrils. When you sleep, you switch about every 45 minutes. This is to prevent a buildup of mucus. It’s called the nasal cycle. 46 | JSON Output: {{"Questions": ["Does your nose switch between nostrils?", "How often does your nostrils switch?", "Why does your nostril switch?", "What is nasal cycle?"]}} 47 | 48 | Claim: The Stanford Prison Experiment was conducted in the basement of Encina Hall, Stanford’s psychology building. 49 | JSON Output: 50 | {{"Question":["Where was Stanford Prison Experiment was conducted?"]}} 51 | 52 | Claim: The Havel-Hakimi algorithm is an algorithm for converting the adjacency matrix of a graph into its adjacency list. It is named after Vaclav Havel and Samih Hakimi. 53 | JSON Output: 54 | {{"Questions":["What does Havel-Hakimi algorithm do?", "Who are Havel-Hakimi algorithm named after?"]}} 55 | 56 | Claim: Social work is a profession that is based in the philosophical tradition of humanism. It is an intellectual discipline that has its roots in the 1800s. 57 | Output: 58 | {{"Questions":["What philosophical tradition is social work based on?", "What year does social work have its root in?"]}} 59 | 60 | Claim: {claim} 61 | JSON Output: 62 | """ 63 | 64 | verify_prompt = """ 65 | Your task is to evaluate the accuracy of a provided statement using the accompanying evidence. Carefully review the evidence, noting that it may vary in detail and sometimes present conflicting information. Your judgment should be informed by this evidence, taking into account its relevance and reliability. 66 | 67 | Keep in mind that a lack of detail in the evidence does not necessarily indicate that the statement is inaccurate. When assessing the statement's factuality, distinguish between errors and areas where the evidence supports the statement. 68 | 69 | Please structure your response in JSON format, including the following four keys: 70 | - "reasoning": explain the thought process behind your judgment. 71 | - "error": none if the text is factual; otherwise, identify any specific inaccuracies in the statement. 72 | - "correction": none if the text is factual; otherwise, provide corrections to any identified inaccuracies, using the evidence to support your corrections. 73 | - "factuality": true if the given text is factual, false otherwise, indicating whether the statement is factual, or non-factual based on the evidence. 74 | 75 | For example: 76 | Input: 77 | [text]: MBZUAI is located in Abu Dhabi, United Arab Emirates. 78 | [evidence]: Where is MBZUAI located?\nAnswer: Masdar City - Abu Dhabi - United Arab Emirates 79 | 80 | JSON Output: 81 | {{ 82 | "reasoning": "The evidence confirms that MBZUAI is located in Masdar City, Abu Dhabi, United Arab Emirates, so the statement is factually correct", 83 | "error": none, 84 | "correction": none, 85 | "factuality": true 86 | }} 87 | 88 | 89 | Input: 90 | [text]: Copper reacts with ferrous sulfate (FeSO4). 91 | [evidence]: Copper is less reactive metal. It has positive value of standard reduction potential. Metal with high standard reduction potential can not displace other metal with low standard reduction potential values. Hence copper can not displace iron from ferrous sulphate solution. So no change will take place. 92 | 93 | JSON Output: 94 | {{ 95 | "reasoning": "The evidence provided confirms that copper cannot displace iron from ferrous sulphate solution, and no change will take place.", 96 | "error": "Copper does not react with ferrous sulfate as stated in the text.", 97 | "correction": "Copper does not react with ferrous sulfate as it cannot displace iron from ferrous sulfate solution.", 98 | "factuality": false 99 | }} 100 | 101 | 102 | Input 103 | [text]: {claim} 104 | [evidences]: {evidence} 105 | 106 | JSON Output: 107 | """ 108 | 109 | 110 | class ClaudePrompt: 111 | decompose_prompt = decompose_prompt 112 | checkworthy_prompt = checkworthy_prompt 113 | qgen_prompt = qgen_prompt 114 | verify_prompt = verify_prompt 115 | -------------------------------------------------------------------------------- /factcheck/utils/prompt/customized_prompt.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import json 3 | from .base import BasePrompt 4 | 5 | 6 | class CustomizedPrompt(BasePrompt): 7 | def __init__(self, CustomizedPrompt): 8 | if CustomizedPrompt.endswith("yaml"): 9 | self.prompts = self.load_prompt_yaml(CustomizedPrompt) 10 | elif CustomizedPrompt.endswith("json"): 11 | self.prompts = self.load_prompt_json(CustomizedPrompt) 12 | else: 13 | raise NotImplementedError(f"File type of {CustomizedPrompt} not implemented.") 14 | keys = [ 15 | "decompose_prompt", 16 | "checkworthy_prompt", 17 | "qgen_prompt", 18 | "verify_prompt", 19 | ] 20 | 21 | for key in keys: 22 | assert key in self.prompts, f"Key {key} not found in the prompt yaml file." 23 | setattr(self, key, self.prompts[key]) 24 | 25 | def load_prompt_yaml(self, prompt_name): 26 | # Load the prompt from a yaml file 27 | with open(prompt_name, "r") as file: 28 | return yaml.safe_load(file) 29 | 30 | def load_prompt_json(self, prompt_name): 31 | # Load the prompt from a json file 32 | with open(prompt_name, "r") as file: 33 | return json.load(file) 34 | -------------------------------------------------------------------------------- /factcheck/utils/utils.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | 4 | def load_yaml(filepath): 5 | with open(filepath, "r") as file: 6 | return yaml.safe_load(file) 7 | -------------------------------------------------------------------------------- /factcheck/utils/web_util.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import backoff 3 | import time 4 | import bs4 5 | import asyncio 6 | from httpx import AsyncHTTPTransport 7 | from httpx._client import AsyncClient 8 | 9 | 10 | USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0" 11 | # mobile user-agent 12 | MOBILE_USER_AGENT = "Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36" 13 | headers = {"User-Agent": USER_AGENT} 14 | 15 | 16 | def is_tag_visible(element: bs4.element) -> bool: 17 | """Determines if an HTML element is visible. 18 | 19 | Args: 20 | element: A BeautifulSoup element to check the visibility of. 21 | returns: 22 | Whether the element is visible. 23 | """ 24 | if element.parent.name in [ 25 | "style", 26 | "script", 27 | "head", 28 | "title", 29 | "meta", 30 | "[document]", 31 | ] or isinstance(element, bs4.element.Comment): 32 | return False 33 | return True 34 | 35 | 36 | transport = AsyncHTTPTransport(retries=3) 37 | 38 | 39 | async def httpx_get(url: str, headers: dict): 40 | try: 41 | async with AsyncClient(transport=transport) as client: 42 | response = await client.get(url, headers=headers, timeout=3) 43 | response = response if response.status_code == 200 else None 44 | if not response: 45 | return False, None 46 | else: 47 | return True, response 48 | except Exception as e: # noqa: F841 49 | return False, None 50 | 51 | 52 | async def httpx_bind_key(url: str, headers: dict, key: str = ""): 53 | flag, response = await httpx_get(url, headers) 54 | return flag, response, url, key 55 | 56 | 57 | def crawl_web(query_url_dict: dict): 58 | tasks = list() 59 | for query, urls in query_url_dict.items(): 60 | for url in urls: 61 | task = httpx_bind_key(url=url, headers=headers, key=query) 62 | tasks.append(task) 63 | asyncio.set_event_loop(asyncio.SelectorEventLoop()) 64 | loop = asyncio.get_event_loop() 65 | responses = loop.run_until_complete(asyncio.gather(*tasks)) 66 | return responses 67 | 68 | 69 | # @backoff.on_exception(backoff.expo, (requests.exceptions.RequestException, requests.exceptions.Timeout), max_tries=1,max_time=3) 70 | def common_web_request(url: str, query: str = None, timeout: int = 3): 71 | resp = requests.get(url, headers=headers, timeout=timeout) 72 | if query: 73 | return resp, query 74 | else: 75 | return resp 76 | 77 | 78 | def parse_response(response: requests.Response, url: str, query: str = None): 79 | html_content = response.text 80 | url = url 81 | try: 82 | soup = bs4.BeautifulSoup(html_content, "html.parser") 83 | texts = soup.findAll(text=True) 84 | # Filter out invisible text from the page. 85 | visible_text = filter(is_tag_visible, texts) 86 | except Exception as _: # noqa: F841 87 | return None, url, query 88 | 89 | # Returns all the text concatenated as a string. 90 | web_text = " ".join(t.strip() for t in visible_text).strip() 91 | # Clean up spacing. 92 | web_text = " ".join(web_text.split()) 93 | return web_text, url, query 94 | 95 | 96 | def scrape_url(url: str, timeout: float = 3): 97 | """Scrapes a URL for all text information. 98 | 99 | Args: 100 | url: URL of webpage to scrape. 101 | timeout: Timeout of the requests call. 102 | Returns: 103 | web_text: The visible text of the scraped URL. 104 | url: URL input. 105 | """ 106 | # Scrape the URL 107 | try: 108 | response = requests.get(url, timeout=timeout) 109 | response.raise_for_status() 110 | except requests.exceptions.RequestException as _: # noqa: F841 111 | return None, url 112 | 113 | # Extract out all text from the tags 114 | try: 115 | soup = bs4.BeautifulSoup(response.text, "html.parser") 116 | texts = soup.findAll(text=True) 117 | # Filter out invisible text from the page. 118 | visible_text = filter(is_tag_visible, texts) 119 | except Exception as _: # noqa: F841 120 | return None, url 121 | 122 | # Returns all the text concatenated as a string. 123 | web_text = " ".join(t.strip() for t in visible_text).strip() 124 | # Clean up spacing. 125 | web_text = " ".join(web_text.split()) 126 | return web_text, url 127 | 128 | 129 | def crawl_google_web(response, top_k: int = 10): 130 | soup = bs4.BeautifulSoup(response.text, "html.parser") 131 | # with open("text%d.html"%time.time(), 'w') as fw: 132 | # fw.write(response.text) 133 | valid_node_list = list() 134 | for node in soup.find_all("a", {"href": True}): 135 | if node.findChildren("h3"): 136 | valid_node_list.append(node) 137 | result_urls = list() 138 | for node in valid_node_list: 139 | result_urls.append(node.get("href")) 140 | # result_urls = [link.get("href") for link in node if link.get("href")] 141 | return result_urls[:top_k] 142 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "openfactverification" 3 | version = "0.1.0" 4 | packages = [ 5 | {include = "factcheck"} 6 | ] 7 | description = "Loki: An Open-source Tool for Fact Verification" 8 | authors = [ 9 | "Hao Wang ", 10 | "Yuxia Wang ", 11 | "Minghan Wang ", 12 | "Yilin Geng ", 13 | "Zhen Zhao ", 14 | "Preslav Nakov ", 15 | "Timothy Baldwin ", 16 | "Zenan Zhai ", 17 | "Xudong Han ", 18 | "Haonan Li " 19 | ] 20 | license = "MIT" 21 | readme = "README.md" 22 | repository = "https://github.com/Libr-AI/OpenFactVerification/" 23 | 24 | [tool.poetry.dependencies] 25 | python = "^3.9" 26 | anthropic = "^0.23.1" 27 | backoff = "^2.2.1" 28 | bs4 = "^0.0.2" 29 | Flask = { version = "^3.0.3", optional = true } 30 | httpx = "^0.27.0" 31 | nltk = "^3.8.1" 32 | openai = "^1.16.2" 33 | opencv-python = "^4.9.0.80" 34 | pandas = "^2.2.1" 35 | playwright = "^1.42.0" 36 | playwright-stealth = "^1.0.6" 37 | tiktoken = "^0.6.0" 38 | 39 | [tool.poetry.extras] 40 | api = ["Flask"] 41 | 42 | [build-system] 43 | requires = ["poetry-core"] 44 | build-backend = "poetry.core.masonry.api" 45 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | anthropic 2 | backoff 3 | bs4 4 | flask 5 | httpx 6 | nltk 7 | openai>=1.0.0 8 | opencv-python 9 | pandas 10 | playwright 11 | playwright_stealth 12 | tiktoken 13 | -------------------------------------------------------------------------------- /script/minimal_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import json 4 | from tqdm import tqdm 5 | 6 | sys.path.append("..") 7 | from factcheck import FactCheck # noqa: E402 8 | 9 | # ANSI escape codes for colors 10 | green = "\033[92m" 11 | red = "\033[91m" 12 | reset = "\033[0m" 13 | 14 | 15 | def minimal_test(lang="en"): 16 | # Initialize the FactCheck class 17 | prompt = "chatgpt_prompt" 18 | if lang == "zh": 19 | prompt = "chatgpt_prompt_zh" 20 | factcheck = FactCheck(prompt=prompt) 21 | 22 | def atom_test(instance): 23 | response = instance["response"] 24 | res = factcheck.check_text(response) 25 | try: 26 | for k, v in instance["attributes"].items(): 27 | print(f"{k}: {res[k]}, {v}") 28 | assert res[k] == v 29 | return True 30 | except: # noqa E722 31 | return False 32 | 33 | with open(f"minimal_test_{lang}.json", encoding="utf-8") as f: 34 | test_data = json.load(f) 35 | num_tests = len(test_data) 36 | 37 | with tqdm(total=num_tests, position=0) as pbar: 38 | success_count = 0 39 | fail_count = 0 40 | for i, test_piece in enumerate(test_data): 41 | result = atom_test(test_piece) 42 | 43 | if result is True: 44 | success_count += 1 45 | pbar.set_postfix_str("█", refresh=False) 46 | pbar.colour = "green" 47 | else: 48 | fail_count += 1 49 | pbar.set_postfix_str("█", refresh=False) 50 | pbar.colour = "red" 51 | 52 | pbar.set_description(f"| Success: {success_count}, Failed: {fail_count}", refresh=True) 53 | pbar.update(1) 54 | time.sleep(0.1) # Sleep for 0.1 seconds 55 | 56 | 57 | if __name__ == "__main__": 58 | minimal_test() 59 | -------------------------------------------------------------------------------- /script/minimal_test_en.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 1, 4 | "language": "en", 5 | "response": "I am a famous scientist.", 6 | "attributes": { 7 | "factuality": "Nothing to check." 8 | } 9 | }, 10 | { 11 | "id": 2, 12 | "language": "en", 13 | "response": "Steve Jobs is the founder of Apple.", 14 | "attributes": { 15 | "factuality": true 16 | } 17 | }, 18 | { 19 | "id": 3, 20 | "language": "en", 21 | "response": "Elon Musk bought Twitter in 2023 and renamed it to X.", 22 | "attributes": { 23 | "factuality": false 24 | } 25 | }, 26 | { 27 | "id": 4, 28 | "language": "en", 29 | "response": "Blockchain technology is only used for cryptocurrencies.", 30 | "attributes": { 31 | "factuality": false 32 | } 33 | }, 34 | { 35 | "id": 5, 36 | "language": "en", 37 | "response": "Facial recognition technology is infallible and cannot be tricked.", 38 | "attributes": { 39 | "factuality": false 40 | } 41 | }, 42 | { 43 | "id": 6, 44 | "language": "en", 45 | "response": "Shanghai Jiao Tong University is one of the top public universities in Guangdong, China", 46 | "attributes": { 47 | "factuality": false 48 | } 49 | }, 50 | { 51 | "id": 7, 52 | "language": "en", 53 | "response": "William Yarnel Slack (August 1, 1816 - March 21, 1862) was an American lawyer, politician, and military officer who fought for the Confederate States of America during the American Civil War. Born in Kentucky, Slack moved to Missouri as a child and later entered the legal profession. After serving in the Missouri General Assembly from 1842 to 1843, he fought as a captain in the United States Army for fourteen months during the Mexican–American War, beginning in 1846. He saw action at the Battle of Embudo Pass and the Siege of Pueblo de Taos. Returning to a legal career, Slack became influential in his local area. After the outbreak of the American Civil War in April 1861, Slack, who held pro-slavery views, supported the Confederate cause. When the Missouri State Guard (MSG) was formed the next month to oppose the Union Army, he was appointed as a brigadier general in the MSG's 4th Division. After participating in the Battle of Carthage in July, he fought in the Battle of Wilson's Creek on August 10. After a surprise Union attack, Slack's deployment of his division gave time for further Confederate States Army and MSG troops to deploy. Suffering a bad hip wound at Wilson's Creek, he was unable to rejoin his command until October. Along with other Missouri State Guard officers, Slack transferred to the Confederate States Army in late 1861 where he commanded a brigade with the rank of colonel. On March 7, 1862, during the Battle of Pea Ridge, Slack suffered another wound that was close to the injury he had received at Wilson's Creek. Infection set in, and he died on March 21. He was posthumously promoted to brigadier general in the Confederate army on April 17; the Confederate States Senate might not have known that he was dead at the time of the promotion. (1786)", 54 | "attributes": { 55 | "factuality": false 56 | } 57 | } 58 | ] 59 | -------------------------------------------------------------------------------- /script/minimal_test_zh.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 1, 4 | "response": "我是一个NLP科学家。", 5 | "attributes": { 6 | "factuality": "Nothing to check." 7 | } 8 | }, 9 | { 10 | "id": 2, 11 | "response": "马云创立了阿里巴巴公司。", 12 | "attributes": { 13 | "factuality": true 14 | } 15 | }, 16 | { 17 | "id": 3, 18 | "response": "埃隆·马斯克(Elon Musk)在2023年收购了Twitter,并将其更名为X。", 19 | "attributes": { 20 | "factuality": false 21 | } 22 | }, 23 | { 24 | "id": 4, 25 | "response": "马化腾创办了京东和腾讯。", 26 | "attributes": { 27 | "factuality": false 28 | } 29 | }, 30 | { 31 | "id": 5, 32 | "response": "人脸识别系统研究始于20世纪20年代。", 33 | "attributes": { 34 | "factuality": false 35 | } 36 | }, 37 | { 38 | "id": 6, 39 | "response": "天津大学是中国江苏的大学。", 40 | "attributes": { 41 | "factuality": false 42 | } 43 | } 44 | ] 45 | -------------------------------------------------------------------------------- /templates/LibrAI_fc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | LibrAI-Eval | Fact-checking 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 58 | 59 | 60 | 61 |
62 | 66 |
67 |
68 |
69 |
70 |
71 | 72 | {% for claim_detail in responses['claim_detail'] %} 73 | {{ loop.index }} 74 | {% if claim_detail['factuality'] == 0 %} 75 | 76 | {% elif claim_detail['factuality'] == 1 %} 77 | 78 | {% elif claim_detail['factuality'] == 'Nothing to check' %} 79 | 80 | {% elif claim_detail['factuality'] == 'No evidence found' %} 81 | 82 | {% else %} 83 | 84 | {% endif %} 85 | 86 | {{claim_detail['origin_text']}} 87 | 88 | 89 | {% endfor %} 90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 | Claim: : {{ responses['claim_detail'][shown_claim]["claim"] }} 105 |
106 |
107 | Evidences {{ responses['claim_detail'][shown_claim]["evidences"] | length }} 108 |
109 |
110 |
111 | 128 |
129 |
130 |
131 | {% for evi in responses['claim_detail'][shown_claim]["evidences"] | filter_evidences("REFUTES", "relationship") %} 132 |
133 |

134 | 139 |

140 |
141 |
142 |

Evidence: {{ evi["text"] }}

143 |

Source:  144 | {{evi.url}} 145 |

146 |

Reasoning: {{evi.reasoning}}

147 |
148 |
149 |
150 | {% endfor %} 151 |
152 |
153 |
154 |
155 | {% for evi in responses['claim_detail'][shown_claim]["evidences"] | filter_evidences("IRRELEVANT", "relationship") %} 156 |
157 |

158 | 163 |

164 |
165 |
166 |

Evidence: {{ evi["text"] }}

167 |

Source:  168 | {{evi.url}} 169 |

170 |

Reasoning: {{evi.reasoning}}

171 |
172 |
173 |
174 | {% endfor %} 175 |
176 |
177 |
178 |
179 | {% for evi in responses['claim_detail'][shown_claim]["evidences"] | filter_evidences("SUPPORTS", "relationship") %} 180 |
181 |

182 | 187 |

188 |
189 |
190 |

Evidence: {{ evi["text"] }}

191 |

Source:  192 | {{evi.url}} 193 |

194 |

Reasoning: {{evi.reasoning}}

195 |
196 |
197 |
198 | {% endfor %} 199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 | Overall Factuality 218 |
219 |
220 |
221 | {{ 100 * responses["summary"]["factuality"] }}% 222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
    233 |
  • 234 |
    235 |
    236 | {{ responses["summary"]["num_claims"] }} 237 |
    238 |
    239 |
    Detected claims
    240 |
  • 241 |
  • 242 |
    243 |
    244 |

    {{ responses["summary"]["num_supported_claims"] }}

    245 |
    246 |
    247 |
    Well Supported
    248 |
  • 249 |
  • 250 |
    251 |
    252 |

    {{ responses["summary"]["num_refuted_claims"] }}

    253 |
    254 |
    255 |
    Conflict
    256 |
  • 257 |
  • 258 |
    259 |
    260 |

    {{ responses["summary"]["num_controversial_claims"] }}

    261 |
    262 |
    263 |
    Controversial
    264 |
  • 265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 | 285 | 286 |
287 | 288 | 289 | 290 | 291 | 292 |
293 | 294 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | -------------------------------------------------------------------------------- /templates/input.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | LIBRAI Fact-Checking App 8 | 9 | 97 | 98 | 121 | 122 | 123 | 124 |
125 | 126 | 128 | LibrAI Logo 129 | 130 |
131 | 132 |
133 |
134 |
135 |
137 | 138 |
139 |

140 |
141 | 142 |
143 |

© LibrAI. All rights reserved

144 |
145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /webapp.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, render_template, jsonify 2 | from factcheck.utils.llmclient import CLIENTS 3 | import argparse 4 | import json 5 | 6 | from factcheck.utils.utils import load_yaml 7 | from factcheck import FactCheck 8 | 9 | app = Flask(__name__, static_folder="assets") 10 | 11 | 12 | # Define the custom filter 13 | def zip_lists(a, b): 14 | return zip(a, b) 15 | 16 | 17 | # Register the filter with the Jinja2 environment 18 | app.jinja_env.filters["zip"] = zip_lists 19 | 20 | 21 | # Occurrences count filter 22 | def count_occurrences(input_dict, target_string, key): 23 | input_list = [item[key] for item in input_dict] 24 | return input_list.count(target_string) 25 | 26 | 27 | app.jinja_env.filters["count_occurrences"] = count_occurrences 28 | 29 | 30 | # Occurrences count filter 31 | def filter_evidences(input_dict, target_string, key): 32 | return [item for item in input_dict if target_string == item[key]] 33 | 34 | 35 | app.jinja_env.filters["filter_evidences"] = filter_evidences 36 | 37 | 38 | @app.route("/", methods=["GET", "POST"]) 39 | def index(): 40 | if request.method == "POST": 41 | response = request.form["response"] 42 | if response == "": 43 | return render_template("input.html") 44 | response = factcheck_instance.check_text(response) 45 | 46 | # save the response json file 47 | with open("assets/response.json", "w") as f: 48 | json.dump(response, f) 49 | 50 | return render_template("LibrAI_fc.html", responses=response, shown_claim=0) 51 | 52 | return render_template("input.html") 53 | 54 | 55 | @app.route("/shownClaim/") 56 | def get_content(content_id): 57 | # load the response json file 58 | import json 59 | 60 | with open("assets/response.json") as f: 61 | response = json.load(f) 62 | 63 | return render_template("LibrAI_fc.html", responses=response, shown_claim=(int(content_id) - 1)) 64 | 65 | 66 | if __name__ == "__main__": 67 | parser = argparse.ArgumentParser() 68 | parser.add_argument("--model", type=str, default="gpt-4o") 69 | parser.add_argument("--client", type=str, default=None, choices=CLIENTS.keys()) 70 | parser.add_argument("--prompt", type=str, default="chatgpt_prompt") 71 | parser.add_argument("--retriever", type=str, default="serper") 72 | parser.add_argument("--modal", type=str, default="text") 73 | parser.add_argument("--input", type=str, default="demo_data/text.txt") 74 | parser.add_argument("--api_config", type=str, default="factcheck/config/api_config.yaml") 75 | args = parser.parse_args() 76 | 77 | # Load API config from yaml file 78 | try: 79 | api_config = load_yaml(args.api_config) 80 | except Exception as e: 81 | print(f"Error loading api config: {e}") 82 | api_config = {} 83 | 84 | factcheck_instance = FactCheck( 85 | default_model=args.model, 86 | api_config=api_config, 87 | prompt=args.prompt, 88 | retriever=args.retriever, 89 | ) 90 | 91 | app.run(host="0.0.0.0", port=2024, debug=True) 92 | --------------------------------------------------------------------------------