├── .gitignore ├── LICENSE ├── README.md ├── assets ├── diagram.png └── question-answer-perception.csv ├── gpv ├── __init__.py ├── chunker.py ├── embd.py ├── measure.py ├── models │ ├── __init__.py │ └── models.py ├── parser.py ├── utils.py └── valuellama.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Value4AI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [AAAI 2025] Measuring Human and AI Values Based on Generative Psychometrics with Large Language Models 2 | 3 | ## 🚀 Introduction 4 | 5 | This codebase accompanies the paper [*Measuring Human and AI Values based on Generative Psychometrics with Large Language Models*](https://arxiv.org/abs/2409.12106). We introduce **G**enerative **P**sychometrics for **V**alues (GPV), an LLM-based, data-driven value measurement paradigm, theoretically grounded in text-revealed selective perceptions. 6 | 7 | ![Diagram of GPV](./assets/diagram.png) 8 | 9 | Compared with traditional tools for measuring human values, GPV (1) effectively mitigates response bias and resource demands by dispensing with self-reports; (2) captures authentic behaviors instead of relying on forced ratings; (3) can handle historical or subjective data; (4) measures values in open-ended value spaces and easily adapts to new or evolving values without expert effort; and (5) enables more scalable and flexible value measurement. 10 | 11 | Compared with recent works on measuring LLM values, GPV (1) mitigates response bias and yields more theoretically valid results; (2) is more practically relevant for measuring LLM values based on their scalable and free-form responses; and (3) enables context-specific measurements. 12 | 13 | ## 📦 Requirements 14 | - Python 3.10 15 | - numPy 16 | - torch 17 | - transformers 18 | - accelerate 19 | - openai 20 | - semchunk 21 | - tiktoken 22 | 23 | You may install the required packages by: 24 | ```bash 25 | pip install -r requirements.txt 26 | ``` 27 | 28 | ## 🔑 Example Usage 29 | 30 | Note that there are two LLMs involved in GPV: the **parsing LLM** and the **measuring LLM**. 31 | 32 | You may set the **parsing LLM** by feeding the `parsing_model_name` parameter when initializing the GPV object. For example, `gpv = GPV(parsing_model_name="gpt-4o-mini")`. Accordingly, you need to set your API key as an environment variable `OPENAI_API_KEY` or [here](./gpv/models/models.py). Alternative LLMs can be used; please see `./gpv/models/` for more details. 33 | 34 | The **measuring LLM** is set to our [`ValueLlama`](https://huggingface.co/Value4AI/ValueLlama-3-8B) by default. 35 | 36 | ### Perception-level value measurements 37 | ```python 38 | from gpv import GPV 39 | 40 | perceptions = [ 41 | "I love helping others", # Each perception is one sentence 42 | "Mary wants to get high scores in her exams", 43 | "Having fun all the time is important.", 44 | ] 45 | values = ["hedonism", "achievement", "power", "benevolence", "universalism"] 46 | 47 | gpv = GPV(parsing_model_name="gpt-4o-mini") 48 | results = gpv.measure_perceptions(perceptions, values) 49 | ``` 50 | 51 | ### Parsing long texts into perceptions 52 | ```python 53 | from gpv import GPV 54 | 55 | texts = [ 56 | "Today is a good day. I woke up early and went for a run in the park. The weather was perfect, and I felt energized. After my run, I had a healthy breakfast and spent some time reading a book. In the afternoon, I met up with some friends for lunch, and we had a great time catching up. I feel grateful for the wonderful day I had and look forward to more days like this...", # e.g., a blog post 57 | "...", 58 | ] 59 | 60 | gpv = GPV(parsing_model_name="gpt-4o-mini") 61 | results = gpv.parse_texts(texts) 62 | ``` 63 | 64 | ### Text-level value measurements (for the text author) 65 | ```python 66 | from gpv import GPV 67 | 68 | texts = [ 69 | "Today is a good day. I woke up early and went for a run in the park. The weather was perfect, and I felt energized. After my run, I had a healthy breakfast and spent some time reading a book. In the afternoon, I met up with some friends for lunch, and we had a great time catching up. I feel grateful for the wonderful day I had and look forward to more days like this...", # e.g., a blog post 70 | "...", 71 | ] 72 | values = ["hedonism", "achievement", "power", "benevolence", "universalism"] 73 | 74 | gpv = GPV(parsing_model_name="gpt-4o-mini") 75 | results = gpv.measure_texts(texts, values) 76 | ``` 77 | 78 | ### Text-level value measurements (for the given subjects) 79 | ```python 80 | from gpv import GPV 81 | 82 | text = "Mary is a PhD student in computer science. She is working on a project that aims to develop a new algorithm for image recognition. She is very passionate about her work and spends most of her time in the lab. She is determined to make a breakthrough in her field and become a successful researcher. Henry, on the other hand, is a high school student who is struggling with his grades. He is not interested in studying and spends most of his time playing video games. He is not motivated to do well in school and often skips classes. He dreams of becoming a professional gamer and making a living by playing video games." # e.g., an essay 83 | values = ["hedonism", "achievement", "power", "benevolence", "universalism"] 84 | measurement_subjects = ["Mary", "Henry"] 85 | 86 | gpv = GPV(parsing_model_name="gpt-4o-mini") 87 | results = gpv.measure_entities(text, values, measurement_subjects) 88 | ``` 89 | 90 | ### Text-level value measurements based on RAG (for the given subjects) 91 | ```python 92 | from gpv import GPV 93 | 94 | path = "data/西游记-zh.txt" 95 | with open(path, "r") as file: 96 | book = file.read() # e.g., a novel 97 | measurement_subjects = ["唐僧", "悟空", "八戒", "沙僧"] 98 | coref_resolve = { 99 | "唐僧": ["唐三藏", "师父"], 100 | "悟空": ["猴王", "行者"], 101 | "八戒": ["猪八戒", "猪悟能"], 102 | "沙僧": ["沙和尚", "沙悟净"], 103 | } 104 | values = ["Universalism", "Hedonism", "Achievement", "Power", "Security", "Self-Direction", "Stimulation", "Tradition", "Benevolence", "Conformity"] 105 | 106 | gpv = GPV(parsing_model_name="gpt-4o-mini") 107 | results = gpv.measure_entities_rag( 108 | text=book, 109 | values=values, 110 | measurement_subjects=measurement_subjects, 111 | coref_resolve=coref_resolve 112 | ) 113 | ``` 114 | 115 | ## 📄 Citation 116 | 117 | If you find this codebase helpful, we would appreciate it if you give us a star and cite our paper: 118 | 119 | ```bibtex 120 | @inproceedings{ye2025gpv, 121 | title={Measuring Human and AI Values Based on Generative Psychometrics with Large Language Models}, 122 | author={Haoran Ye and Yuhang Xie and Yuanyi Ren and Hanjun Fang and Xin Zhang and Guojie Song}, 123 | booktitle={Proceedings of the AAAI Conference on Artificial Intelligence}, 124 | volume={39}, 125 | year={2025} 126 | } 127 | ``` 128 | -------------------------------------------------------------------------------- /assets/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ValueByte-AI/gpv/755503f7581e4265747b104e6c97ace9b219e2cc/assets/diagram.png -------------------------------------------------------------------------------- /gpv/__init__.py: -------------------------------------------------------------------------------- 1 | from .measure import GPV -------------------------------------------------------------------------------- /gpv/chunker.py: -------------------------------------------------------------------------------- 1 | import semchunk 2 | 3 | 4 | class Chunker: 5 | def __init__(self, chunk_size: int, model_name: str = "gpt-4"): 6 | self.model_name = model_name 7 | self.chunk_size = chunk_size 8 | 9 | def chunk(self, text: list[str]) -> list[list[str]]: 10 | chunker = semchunk.chunkerify(self.model_name, self.chunk_size) 11 | return chunker(text) 12 | 13 | 14 | if __name__ == "__main__": 15 | chunker = Chunker(20) 16 | texts = [ 17 | "I have been at work since 7 this morning. I got up at 6 this morning. I went to bed at 11 last night. I am pooped, and its a long day. And, no word from TD since Wednesday (feeling a little insecure about that).", 18 | "I had some good time with Renada last night... I went up to her house to help her with a school project. It was fun! We spraypainted some stuff, went on the regular Thursday night Walmart run, chatted a lot.", 19 | ] 20 | chunks = chunker.chunk(texts) 21 | print(chunks) -------------------------------------------------------------------------------- /gpv/embd.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import torch 3 | import torch.nn.functional as F 4 | from transformers import AutoModel, AutoTokenizer 5 | 6 | 7 | class SentenceEmbedding: 8 | def __init__(self, model_name_or_path: str='Alibaba-NLP/gte-multilingual-base', device="cuda:0"): 9 | self.device = device 10 | if device == "auto": 11 | self.device = "cuda:0" if torch.cuda.is_available() else "cpu" 12 | self.model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True, device_map=self.device) 13 | self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) 14 | 15 | 16 | def get_embedding(self, input_texts: list[str], dimension: int = 768, batch_size: int = 8) -> torch.Tensor: 17 | """ 18 | Get the sentence embeddings of the input texts. 19 | 20 | Args: 21 | input_texts (list[str]): A list of input texts. 22 | dimension (int): The output dimension of the output embedding, should be in [128, 768]. 23 | batch_size (int): The number of samples per batch. 24 | 25 | Returns: 26 | torch.Tensor: The embeddings for the input texts. 27 | """ 28 | embeddings_list = [] 29 | 30 | for i in tqdm(range(0, len(input_texts), batch_size), desc="Embedding", disable=True): 31 | # Select the batch 32 | batch_texts = input_texts[i:i + batch_size] 33 | 34 | # Tokenize the input texts 35 | batch_dict = self.tokenizer(batch_texts, max_length=8192, padding=True, truncation=True, return_tensors='pt').to(self.device) 36 | 37 | # Get the output embeddings 38 | outputs = self.model(**batch_dict) 39 | batch_embeddings = outputs.last_hidden_state[:, 0][:, :dimension] 40 | 41 | # Normalize the embeddings 42 | batch_embeddings = F.normalize(batch_embeddings, p=2, dim=1) # shape: (batch_size, dimension) 43 | 44 | embeddings_list.append(batch_embeddings.detach().cpu()) 45 | 46 | # Concatenate all batch embeddings 47 | embeddings = torch.cat(embeddings_list, dim=0) 48 | 49 | return embeddings.numpy() 50 | 51 | 52 | if __name__ == "__main__": 53 | embd = SentenceEmbedding() 54 | 55 | input_texts = [ 56 | "what is the capital of China?", 57 | "how to implement quick sort in python?", 58 | "北京", 59 | "快排算法介绍" 60 | ] 61 | 62 | embeddings = embd.get_embedding(input_texts) 63 | print(embeddings) -------------------------------------------------------------------------------- /gpv/measure.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | from datetime import datetime 5 | 6 | from .chunker import Chunker 7 | from .parser import Parser, EntityParser 8 | from .valuellama import ValueLlama 9 | from .embd import SentenceEmbedding 10 | from .utils import get_score, gen_queries_for_perception_retrieval 11 | 12 | 13 | class GPV: 14 | def __init__( 15 | self, 16 | parsing_model_name="gpt-4o-mini", 17 | measurement_model_name="Value4AI/ValueLlama-3-8B", 18 | device='auto', 19 | chunk_size=300, 20 | ): 21 | self.device = device 22 | self.parsing_model_name = parsing_model_name 23 | self.measurement_model_name = measurement_model_name 24 | self.chunk_size = chunk_size 25 | 26 | self.parser = None 27 | self.measurement_system = None 28 | self.chunker = None 29 | self.embd_model = None 30 | 31 | 32 | def measure_perceptions(self, perceptions: list[str], values: list[str]): 33 | """ 34 | Evaluates multiple perceptions in a batch and returns the measure results: relevant values, relevance values, and valence values 35 | """ 36 | if self.measurement_system is None: 37 | self.measurement_system = ValueLlama(model_name=self.measurement_model_name, device=self.device) 38 | 39 | n_perceptions = len(perceptions) 40 | n_values = len(values) 41 | 42 | # Repeat the perceptions and tile the values 43 | perceptions_array = np.repeat(perceptions, n_values) # shape (n_perceptions * n_values,), e.g. ["A", "A", "A", "B", "B", "B", "C", "C", "C"] 44 | values_array = np.tile(values, n_perceptions) # shape (n_perceptions * n_values,), e.g. ["a", "b", "c", "a", "b", "c", "a", "b", "c"] 45 | 46 | # Get the relevance of the values for all perceptions 47 | relevances = self.measurement_system.get_relevance(perceptions_array, values_array) # (n_perceptions * n_values, 2) 48 | relevances = relevances.reshape(n_perceptions, n_values, 2) 49 | 50 | # Filter the relevant values 51 | relevant_mask = relevances[:, :, 0] > 0.5 52 | 53 | # Prepare arrays for batch valence calculation 54 | relevant_perceptions = [] 55 | relevant_values_for_valence = [] 56 | 57 | results = {} 58 | for i, perception in enumerate(perceptions): 59 | relevant_value_idx = np.where(relevant_mask[i])[0] 60 | 61 | if len(relevant_value_idx) == 0: 62 | results[perception] = { 63 | "relevant_values": [], 64 | "relevances": [], 65 | "valences": [] 66 | } 67 | continue 68 | 69 | relevant_values = np.array(values)[relevant_value_idx] 70 | relevances_for_perception = relevances[i][relevant_value_idx].tolist() 71 | 72 | # Add to arrays for batch valence calculation 73 | relevant_perceptions.extend([perception] * len(relevant_values)) 74 | relevant_values_for_valence.extend(relevant_values) 75 | 76 | results[perception] = { 77 | "relevant_values": relevant_values.tolist(), 78 | "relevances": relevances_for_perception, 79 | "valences": [] # Will be filled after batch calculation 80 | } 81 | 82 | # Batch calculation of valence values 83 | if relevant_perceptions: 84 | valences = self.measurement_system.get_valence(relevant_perceptions, relevant_values_for_valence) 85 | 86 | # Distribute valence results back to individual perceptions 87 | valence_index = 0 88 | for perception in perceptions: 89 | if results[perception]["relevant_values"]: 90 | n_relevant = len(results[perception]["relevant_values"]) 91 | results[perception]["valences"] = valences[valence_index:valence_index + n_relevant].tolist() 92 | valence_index += n_relevant 93 | 94 | return results 95 | 96 | 97 | def measure_texts(self, texts: list[str], values: list[str]): 98 | if self.chunker is None: 99 | self.chunker = Chunker(chunk_size=self.chunk_size) 100 | if self.parser is None: 101 | self.parser = Parser(model_name=self.parsing_model_name) 102 | 103 | # Chunk all texts at once 104 | all_chunks = self.chunker.chunk(texts) # list[list[str]] 105 | # Flatten the chunks 106 | flat_chunks = [chunk for chunks in all_chunks for chunk in chunks] # list[str] 107 | # Parse all chunks in one batch 108 | all_perceptions = self.parser.parse(flat_chunks) # list[list[str]] 109 | # Flatten perceptions 110 | flat_perceptions = [perception for perceptions in all_perceptions for perception in perceptions] # list[str] 111 | # Perform inference on all perceptions in one batch 112 | all_results = self.measure_perceptions(flat_perceptions, values) # dict 113 | 114 | # Reorganize the results according to the original texts; aggregate the results of all perceptions 115 | results_lst = [] 116 | chunk_index = 0 117 | for i, chunks in enumerate(all_chunks): # iterate over the original texts 118 | results = {} 119 | agg = {value: [] for value in values} 120 | for j, chunk in enumerate(chunks): # iterate over the chunks of the text 121 | perceptions = all_perceptions[chunk_index] # get the perceptions of the chunk; list[str] 122 | chunk_index += 1 123 | for perception in perceptions: # iterate over the perceptions of the chunk 124 | results[perception] = all_results[perception] # add the results of the perception to the text results 125 | for k in range(len(results[perception]["relevant_values"])): # iterate over the relevant values of the perception 126 | _value = results[perception]["relevant_values"][k] 127 | _valence = results[perception]["valences"][k] 128 | _score = get_score(_valence) 129 | if _score is not None: 130 | agg[_value].append(_score) 131 | 132 | for value in values: 133 | if agg[value]: 134 | agg[value] = np.mean(agg[value]).item() 135 | else: 136 | agg[value] = None 137 | results["aggregated"] = agg 138 | results_lst.append(results) 139 | 140 | return results_lst 141 | 142 | 143 | def parse_texts(self, texts: list[str]): 144 | if self.parser is None: 145 | self.parser = Parser(model_name=self.parsing_model_name) 146 | if self.chunker is None: 147 | self.chunker = Chunker(chunk_size=self.chunk_size) 148 | 149 | # Chunk all texts at once 150 | all_chunks = self.chunker.chunk(texts) # list[list[str]] 151 | # Flatten the chunks 152 | flat_chunks = [chunk for chunks in all_chunks for chunk in chunks] 153 | # Parse all chunks in one batch 154 | all_perceptions = self.parser.parse(flat_chunks) # list[list[str]]; a list of perceptions for each chunk 155 | 156 | # Reorganize the results according to the original texts 157 | results_lst = [] 158 | chunk_index = 0 159 | for i, chunks in enumerate(all_chunks): 160 | results = [] 161 | for j, chunk in enumerate(chunks): 162 | perceptions = all_perceptions[chunk_index] 163 | chunk_index += 1 164 | results.extend(perceptions) 165 | results_lst.append(results) 166 | return results_lst 167 | 168 | 169 | def measure_entities(self, text: str, values: list[str], measurement_subjects: list[str]): 170 | """ 171 | Measures the involved entities in the text chunk by chunk 172 | 173 | Args: 174 | text (str): The text to be measured 175 | values (list[str]): The values to be measured 176 | measurement_subjects (list[str]): The entities to be measured 177 | """ 178 | if self.parser is None: 179 | self.parser = EntityParser(model_name=self.parsing_model_name) 180 | if self.chunker is None: 181 | self.chunker = Chunker(chunk_size=self.chunk_size) 182 | 183 | subject_value2avg_scores = {} 184 | subject_value2scores = {} 185 | 186 | # Chunking 187 | chunks = self.chunker.chunk(text) 188 | 189 | for measurement_subject in measurement_subjects: 190 | measurement_chunks = [chunk for chunk in chunks if measurement_subject in chunk] 191 | # Parsing for the measurement subject 192 | perceptions = self.parser.parse(measurement_chunks, [[measurement_subject] for _ in measurement_chunks])[measurement_subject] 193 | # Measuring the perceptions 194 | measurement_results = self.measure_perceptions(perceptions, values) 195 | 196 | # Aggregate the results 197 | value2scores = {_value: [] for _value in values} 198 | for p in measurement_results: 199 | p_measurements = measurement_results[p] 200 | for i in range(len(p_measurements["relevant_values"])): 201 | current_value = p_measurements["relevant_values"][i] 202 | value_valence = p_measurements["valences"][i] 203 | value_score = get_score(value_valence) 204 | if value_score is not None: 205 | value2scores[current_value].append(value_score) 206 | 207 | # Calculate the average score for each value 208 | value2avg_scores = {} 209 | for value in value2scores: 210 | if len(value2scores[value]) < 1: 211 | value2avg_scores[value] = None 212 | else: 213 | value2avg_scores[value] = np.mean(value2scores[value]).item() 214 | 215 | subject_value2avg_scores[measurement_subject] = value2avg_scores 216 | subject_value2scores[measurement_subject] = value2scores 217 | 218 | return subject_value2avg_scores 219 | 220 | 221 | def measure_entities_rag(self, text: str, values: list[str], measurement_subjects: list[str], coref_resolve: dict=None, K: int=50, threshold: int=5): 222 | """ 223 | Measure the given entities in the text based on RAG. 224 | 225 | Args: 226 | - text: str: The text to be measured 227 | - values: list[str]: The values to be measured 228 | - measurement_subjects: list[str]: The entities to be measured 229 | - coref_resolve: dict: The dictionary of coreferences for the entities 230 | - K: int: The number of topk similar chunks to be considered 231 | - threshold: int: The minimum number of scores to be considered as evident for a value 232 | 233 | Returns: 234 | - dict: The dictionary of the average scores for each entity and value 235 | """ 236 | if self.embd_model is None: 237 | self.embd_model = SentenceEmbedding(device=self.device) 238 | if self.parser is None: 239 | self.parser = EntityParser(model_name=self.parsing_model_name) 240 | if self.chunker is None: 241 | self.chunker = Chunker(chunk_size=self.chunk_size) 242 | 243 | subject_value2avg_scores = {} 244 | subject_value2scores = {} 245 | 246 | # Chunk the data 247 | chunks = self.chunker.chunk(text) 248 | 249 | for measurement_subject in measurement_subjects: 250 | # Resolve coreferences 251 | if coref_resolve: 252 | corefs = coref_resolve.get(measurement_subject, []) + [measurement_subject] 253 | else: 254 | corefs = [measurement_subject] 255 | 256 | # Find all the chunks that contain the measurement subject 257 | measurement_chunks = [] 258 | for chunk in chunks: 259 | for coref in corefs: 260 | if coref in chunk: 261 | measurement_chunks.append(chunk) 262 | break 263 | 264 | print("Number of measurement chunks:", len(measurement_chunks)) 265 | 266 | # Embed the chunks that contain the measurement subject 267 | embeddings = self.embd_model.get_embedding(measurement_chunks) # shape: (num_chunks, embedding_dim) 268 | 269 | query_supports, query_opposes = gen_queries_for_perception_retrieval(values, measurement_subject, model_name=self.parsing_model_name) 270 | queries = query_supports + query_opposes 271 | queries_embedding = self.embd_model.get_embedding(queries) # shape: (n_queries, embedding_dim) 272 | 273 | # Find the topk semantically qualified chunks; we can then extract the perceptions (items) from these chunks 274 | similar_chunks = [] 275 | cosine_similarities = embeddings @ queries_embedding.T # shape: (num_chunks, n_queries) 276 | cosine_similarities_max = np.max(cosine_similarities, axis=1) # shape: (num_chunks,) 277 | topk_indices = np.argsort(cosine_similarities_max)[-K:] 278 | similar_chunks = [measurement_chunks[i] for i in topk_indices] 279 | 280 | # Measure the chunks for the given entity and value 281 | perceptions = self.parser.parse(similar_chunks, [[measurement_subject] for _ in similar_chunks])[measurement_subject] 282 | 283 | if len(perceptions) == 0: 284 | raise ValueError("No perceptions found for the measurement subject") 285 | 286 | print("Example chunk:", similar_chunks[-1]) 287 | print("Example perceptions:", perceptions[-5:]) 288 | print("Number of perceptions:", len(perceptions)) 289 | 290 | # Measure perceptions 291 | measurement_results = self.measure_perceptions(perceptions, values) 292 | 293 | # Aggregate the results 294 | value2scores = {_value: [] for _value in values} 295 | for p in measurement_results: 296 | p_measurements = measurement_results[p] 297 | for i in range(len(p_measurements["relevant_values"])): 298 | current_value = p_measurements["relevant_values"][i] 299 | value_valence = p_measurements["valences"][i] 300 | value_score = get_score(value_valence) 301 | if value_score is not None: 302 | value2scores[current_value].append(value_score) 303 | 304 | # Calculate the average score for each value 305 | value2avg_scores = {} 306 | for value in value2scores: 307 | if len(value2scores[value]) < threshold: # If the number of scores is less than the threshold, we consider it as None; i.e., no enough evidence 308 | value2avg_scores[value] = None 309 | else: 310 | value2avg_scores[value] = np.mean(value2scores[value]).item() 311 | 312 | subject_value2avg_scores[measurement_subject] = value2avg_scores 313 | subject_value2scores[measurement_subject] = value2scores 314 | 315 | # Save value2scores 316 | # save_path = "value2scores_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json" 317 | # with open(save_path, "w") as file: 318 | # json.dump(subject_value2scores, file, indent=4) 319 | 320 | return subject_value2avg_scores -------------------------------------------------------------------------------- /gpv/models/__init__.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | 3 | from .models import * 4 | 5 | # A dictionary mapping of model architecture to its supported model names 6 | MODEL_LIST = { 7 | # InternLMModel: ['internlm/internlm2-chat-7b', 'internlm/internlm2-chat-20b', 'internlm/internlm-chat-7b'], 8 | LlamaAPIModel: ['gemma-7b', 'gemma-2b', 'llama3.1-405b'] # These are gemma instruct/chat models 9 | + [f'Qwen1.5-{n}B-Chat' for n in [110, 72, 32, 14, 7, 4, 1.8, 0.5]] + ["Qwen2-72B"], 10 | # We use LlamaAPI for these models, one can also implement them locally 11 | # LlamaModel: ['meta-llama/Llama-2-7b-chat-hf'], 12 | OpenAIModel: ['gpt-3.5-turbo', 'gpt-4-turbo', 'gpt-4o-mini', 'gpt-4o'], 13 | # VicunaModel: ['lmsys/vicuna-7b-v1.5-16k'], 14 | # MistralModel: ['mistralai/Mistral-7B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2'], 15 | # YiModel: ['01-ai/Yi-6B-Chat'], 16 | } 17 | 18 | SUPPORTED_MODELS = [model for model_class in MODEL_LIST.keys() for model in MODEL_LIST[model_class]] 19 | 20 | 21 | class LLMModel(object): 22 | """ 23 | A class providing an interface for various language models. 24 | 25 | This class supports creating and interfacing with different language models, handling prompt engineering, and performing model inference. 26 | 27 | Parameters: 28 | ----------- 29 | model : str 30 | The name of the model to be used. 31 | max_new_tokens : int, optional 32 | The maximum number of new tokens to be generated (default is 20). 33 | temperature : float, optional 34 | The temperature for text generation (default is 0). 35 | device : str, optional 36 | The device to be used for inference (default is "cuda"). 37 | dtype : str, optional 38 | The loaded data type of the language model (default is "auto"). 39 | model_dir : str or None, optional 40 | The directory containing the model files (default is None). 41 | system_prompt : str or None, optional 42 | The system prompt to be used (default is None). 43 | api_key : str or None, optional 44 | The API key for API-based models (GPT series and Gemini series), if required (default is None). 45 | 46 | Methods: 47 | -------- 48 | _create_model(max_new_tokens, temperature, device, dtype, model_dir, system_prompt, api_key) 49 | Creates and returns the appropriate model instance. 50 | convert_text_to_prompt(text, role) 51 | Constructs a prompt based on the text and role. 52 | concat_prompts(prompt_list) 53 | Concatenates multiple prompts into a single prompt. 54 | _gpt_concat_prompts(prompt_list) 55 | Concatenates prompts for GPT models. 56 | _other_concat_prompts(prompt_list) 57 | Concatenates prompts for non-GPT models. 58 | __call__(input_text, **kwargs) 59 | Makes a prediction based on the input text using the loaded model. 60 | """ 61 | 62 | @staticmethod 63 | def model_list(): 64 | return SUPPORTED_MODELS 65 | 66 | def __init__(self, model, max_new_tokens=4096, temperature=0, device="cuda", dtype=torch.float16, system_prompt=None, api_key=None): 67 | self.model_name = model 68 | self.model = self._create_model(max_new_tokens, temperature, device, dtype, system_prompt, api_key) 69 | 70 | def _create_model(self, max_new_tokens, temperature, device, dtype, system_prompt, api_key): 71 | """Creates and returns the appropriate model based on the model name.""" 72 | 73 | # Dictionary mapping of model names to their respective classes 74 | model_mapping = {model: model_class for model_class in MODEL_LIST.keys() for model in MODEL_LIST[model_class]} 75 | 76 | # Get the model class based on the model name and instantiate it 77 | model_class = model_mapping.get(self.model_name) 78 | if model_class: 79 | if model_class in [LlamaAPIModel, OpenAIModel]: 80 | return model_class(self.model_name, max_new_tokens, temperature, system_prompt, api_key) 81 | elif model_class in [YiModel, LlamaModel]: 82 | return model_class(self.model_name, max_new_tokens, temperature, device, dtype, system_prompt) 83 | else: 84 | return model_class(self.model_name, max_new_tokens, temperature, device, dtype) 85 | else: 86 | raise ValueError("The model is not supported!") 87 | 88 | def __call__(self, input_texts, **kwargs): 89 | """Predicts the output based on the given input text using the loaded model.""" 90 | if not isinstance(input_texts, list): 91 | assert isinstance(input_texts, str) 92 | input_texts = [input_texts] 93 | if isinstance(self.model, OpenAIModel) or isinstance(self.model, LlamaAPIModel): 94 | return self.model.batch_predict(input_texts, **kwargs) 95 | else: 96 | responses = [] 97 | for input_text in tqdm(input_texts): 98 | responses.append(self.model.predict(input_text, **kwargs)) 99 | return responses -------------------------------------------------------------------------------- /gpv/models/models.py: -------------------------------------------------------------------------------- 1 | import os 2 | from abc import ABC 3 | import concurrent.futures 4 | from tqdm import tqdm 5 | import time 6 | from typing import Optional 7 | 8 | import torch 9 | from openai import OpenAI 10 | 11 | from transformers import AutoModelForCausalLM, AutoTokenizer 12 | 13 | try: 14 | from mistral_common.tokens.tokenizers.mistral import MistralTokenizer 15 | from mistral_common.protocol.instruct.messages import UserMessage 16 | from mistral_common.protocol.instruct.request import ChatCompletionRequest 17 | except ImportError: 18 | print("Mistral is not installed. Related models will not work.") 19 | 20 | 21 | 22 | class LLMBaseModel(ABC): 23 | """ 24 | Abstract base class for language model interfaces. 25 | 26 | This class provides a common interface for various language models and includes methods for prediction. 27 | 28 | Parameters: 29 | ----------- 30 | model : str 31 | The name of the language model. 32 | max_new_tokens : int 33 | The maximum number of new tokens to be generated. 34 | temperature : float 35 | The temperature for text generation (default is 0). 36 | device: str 37 | The device to use for inference (default is 'auto'). 38 | 39 | Methods: 40 | -------- 41 | predict(input_text, **kwargs) 42 | Generates a prediction based on the input text. 43 | __call__(input_text, **kwargs) 44 | Shortcut for predict method. 45 | """ 46 | def __init__(self, model_name, max_new_tokens, temperature, device='auto'): 47 | self.model_name = model_name 48 | self.max_new_tokens = max_new_tokens 49 | self.temperature = temperature 50 | if device == 'auto': 51 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 52 | self.device = device 53 | 54 | def predict(self, input_text, **kwargs): 55 | raise NotImplementedError("The predict method must be implemented in the derived class.") 56 | 57 | def __call__(self, input_text, **kwargs): 58 | return self.predict(input_text, **kwargs) 59 | 60 | 61 | class InternLMModel(LLMBaseModel): 62 | def __init__(self, model_name, max_new_tokens, temperature, device, dtype): 63 | super(InternLMModel, self).__init__(model_name, max_new_tokens, temperature, device) 64 | from transformers import AutoTokenizer, AutoModelForCausalLM 65 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, trust_remote_code=True, torch_dtype=dtype, device_map=device) 66 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, trust_remote_code=True, torch_dtype=dtype, device_map=device) 67 | self.model = self.model.eval() 68 | 69 | def predict(self, input_text, **kwargs): 70 | response, history = self.model.chat(self.tokenizer, input_text, history=[]) 71 | return response 72 | 73 | 74 | 75 | class YiModel(LLMBaseModel): 76 | """ 77 | Language model class for the Yi model. 78 | 79 | Inherits from LLMBaseModel and sets up the Yi language model for use. 80 | 81 | Parameters: 82 | ----------- 83 | model : str 84 | The name of the Yi model. 85 | max_new_tokens : int 86 | The maximum number of new tokens to be generated. 87 | temperature : float 88 | The temperature for text generation (default is 0). 89 | device: str 90 | The device to use for inference (default is 'auto'). 91 | """ 92 | def __init__(self, model_name, max_new_tokens, temperature, device, dtype, system_prompt=None): 93 | super(YiModel, self).__init__(model_name, max_new_tokens, temperature, device) 94 | from transformers import AutoTokenizer, AutoModelForCausalLM 95 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, torch_dtype=dtype, device_map=device) 96 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, torch_dtype=dtype, device_map=device).eval() 97 | self.system_prompt = system_prompt if system_prompt is not None else "You are a helpful assistant." 98 | 99 | def predict(self, input_text, **kwargs): 100 | messages = [{"role": "system", "content": self.system_prompt}, {"role": "user", "content": input_text}] 101 | input_ids = self.tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt') 102 | output_ids = self.model.generate( 103 | input_ids.to(self.device), 104 | temperature=self.temperature if self.temperature > 1e-3 else None, 105 | top_p=0.9 if self.temperature > 1e-3 else None, 106 | max_new_tokens=self.max_new_tokens, 107 | do_sample=True if self.temperature > 1e-3 else False, 108 | ) 109 | response = self.tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True) 110 | 111 | # Model response: "Hello! How can I assist you today?" 112 | return response 113 | 114 | 115 | class MistralModel(LLMBaseModel): 116 | """ 117 | Language model class for the Mistral model. 118 | 119 | Inherits from LLMBaseModel and sets up the Mistral language model for use. 120 | 121 | Parameters: 122 | ----------- 123 | model : str 124 | The name of the Mistral model. 125 | max_new_tokens : int 126 | The maximum number of new tokens to be generated. 127 | temperature : float 128 | The temperature for text generation (default is 0). 129 | device: str 130 | The device to use for inference (default is 'auto'). 131 | dtype: str 132 | The dtype to use for inference (default is 'auto'). 133 | """ 134 | def __init__(self, model_name, max_new_tokens, temperature, device, dtype): 135 | temperature = max(temperature, 0.01) 136 | super(MistralModel, self).__init__(model_name, max_new_tokens, temperature, device) 137 | self.tokenizer = MistralTokenizer.v1() 138 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, torch_dtype=dtype, device_map=device) 139 | 140 | 141 | def predict(self, input_text, **kwargs): 142 | completion_request = ChatCompletionRequest(messages=[UserMessage(content=input_text)]) 143 | 144 | tokens = self.tokenizer.encode_chat_completion(completion_request).tokens 145 | 146 | tokens = torch.tensor(tokens).unsqueeze(0).to(self.device) 147 | 148 | generated_ids = self.model.generate( 149 | tokens, 150 | max_new_tokens=self.max_new_tokens, 151 | temperature=self.temperature if self.temperature > 1e-3 else None, 152 | top_p=0.9 if self.temperature > 1e-3 else None, 153 | do_sample=True if self.temperature > 1e-3 else False, 154 | pad_token_id=self.tokenizer.instruct_tokenizer.tokenizer.eos_id, 155 | **kwargs, 156 | ) 157 | 158 | # decode with mistral tokenizer 159 | result = self.tokenizer.decode(generated_ids[0].tolist()) 160 | 161 | # Return the content after [/INST] 162 | response = result.split("[/INST]")[1] 163 | return response 164 | 165 | 166 | class LlamaModel(LLMBaseModel): 167 | """ 168 | Language model class for the Llama model. 169 | 170 | Inherits from LLMBaseModel and sets up the Llama language model for use. 171 | 172 | Parameters: 173 | ----------- 174 | model : str 175 | The name of the Llama model. 176 | max_new_tokens : int 177 | The maximum number of new tokens to be generated. 178 | temperature : float 179 | The temperature for text generation (default is 0). 180 | device: str 181 | The device to use for inference (default is 'auto'). 182 | dtype: str 183 | The dtype to use for inference (default is 'auto'). 184 | system_prompt : str 185 | The system prompt to be used (default is 'You are a helpful assistant.'). 186 | """ 187 | def __init__(self, model_name, max_new_tokens, temperature, device, dtype, system_prompt): 188 | super(LlamaModel, self).__init__(model_name, max_new_tokens, temperature, device) 189 | if system_prompt is None: 190 | self.system_prompt = "You are a helpful assistant." 191 | else: 192 | self.system_prompt = system_prompt 193 | 194 | self.tokenizer = AutoTokenizer.from_pretrained(model_name, device_map=device, torch_dtype=dtype) 195 | self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, torch_dtype=dtype) 196 | 197 | def predict(self, input_text, **kwargs): 198 | input_text = f"[INST] <>{self.system_prompt}<>\n{input_text}[/INST]" 199 | input_ids = self.tokenizer(input_text, return_tensors="pt").input_ids.to(self.device) 200 | 201 | outputs = self.model.generate(input_ids, 202 | max_new_tokens=self.max_new_tokens, 203 | temperature=self.temperature if self.temperature > 1e-3 else None, 204 | top_p=0.9 if self.temperature > 1e-3 else None, 205 | do_sample=True if self.temperature > 1e-3 else False, 206 | **kwargs) 207 | 208 | out = self.tokenizer.decode(outputs[0], 209 | skip_special_tokens=True, 210 | clean_up_tokenization_spaces=False) 211 | 212 | return out[len(input_text)-1:] 213 | 214 | 215 | class VicunaModel(LLMBaseModel): 216 | """ 217 | Language model class for the Vicuna model. 218 | # https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/huggingface_api.py 219 | 220 | Inherits from LLMBaseModel and sets up the Vicuna language model for use. 221 | 222 | Parameters: 223 | ----------- 224 | model : str 225 | The name of the Vicuna model. 226 | max_new_tokens : int 227 | The maximum number of new tokens to be generated. 228 | temperature : float, optional 229 | The temperature for text generation (default is 0). 230 | device: str 231 | The device to use for inference (default is 'auto'). 232 | dtype: str 233 | The dtype to use for inference (default is 'auto'). 234 | """ 235 | def __init__(self, model_name, max_new_tokens, temperature, device, dtype): 236 | super(VicunaModel, self).__init__(model_name, max_new_tokens, temperature, device) 237 | self.tokenizer = AutoTokenizer.from_pretrained(model_name, device_map=device, torch_dtype=dtype, use_fast=False) 238 | self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, torch_dtype=dtype) 239 | 240 | def predict(self, input_text, **kwargs): 241 | input_ids = self.tokenizer(input_text, return_tensors="pt").input_ids.to(self.device) 242 | 243 | print(self.temperature) 244 | 245 | output_ids = self.model.generate( 246 | input_ids, 247 | max_new_tokens=self.max_new_tokens, 248 | temperature=self.temperature if self.temperature > 1e-3 else None, 249 | do_sample=True if self.temperature > 1e-3 else False, 250 | repetition_penalty=1.2, # https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/huggingface_api.py 251 | **kwargs 252 | ) 253 | 254 | output_ids = output_ids[0][len(input_ids[0]) :] 255 | 256 | outputs = self.tokenizer.decode( 257 | output_ids, skip_special_tokens=True, spaces_between_special_tokens=False 258 | ) 259 | 260 | return outputs 261 | 262 | 263 | class OpenAIModel(LLMBaseModel): 264 | """ 265 | Language model class for interfacing with OpenAI's GPT models or Llama API models. 266 | 267 | Inherits from LLMBaseModel and sets up a model interface for OpenAI GPT models. 268 | 269 | Parameters: 270 | ----------- 271 | model : str 272 | The name of the OpenAI model. 273 | max_new_tokens : int 274 | The maximum number of new tokens to be generated. 275 | temperature : float 276 | The temperature for text generation (default is 0). 277 | system_prompt : str 278 | The system prompt to be used (default is 'You are a helpful assistant.'). 279 | openai_key : str 280 | The OpenAI API key (default is None). 281 | 282 | Methods: 283 | -------- 284 | predict(input_text) 285 | Predicts the output based on the given input text using the OpenAI model. 286 | """ 287 | def __init__(self, model_name, max_new_tokens, temperature, system_prompt=None, openai_key=None): 288 | super(OpenAIModel, self).__init__(model_name, max_new_tokens, temperature) 289 | self.openai_key = openai_key 290 | self.system_prompt = system_prompt 291 | 292 | def predict(self, input_text, kwargs={}): 293 | client = OpenAI(api_key=self.openai_key if self.openai_key is not None else os.environ['OPENAI_API_KEY']) 294 | if self.system_prompt is None: 295 | system_messages = {'role': "system", 'content': "You are a helpful assistant."} 296 | else: 297 | system_messages = {'role': "system", 'content': self.system_prompt} 298 | 299 | if isinstance(input_text, list): 300 | messages = input_text 301 | elif isinstance(input_text, dict): 302 | messages = [input_text] 303 | else: 304 | messages = [{"role": "user", "content": input_text}] 305 | 306 | messages.insert(0, system_messages) 307 | 308 | # extra parameterss 309 | n = kwargs['n'] if 'n' in kwargs else 1 310 | temperature = kwargs['temperature'] if 'temperature' in kwargs else self.temperature 311 | max_new_tokens = kwargs['max_new_tokens'] if 'max_new_tokens' in kwargs else self.max_new_tokens 312 | response_format = kwargs['response_format'] if 'response_format' in kwargs else None 313 | 314 | for attempt in range(1000): 315 | try: 316 | response = client.chat.completions.create( 317 | model=self.model_name, 318 | messages=messages, 319 | temperature=temperature, 320 | max_tokens=max_new_tokens, 321 | n=n, 322 | response_format={"type": "json_object"} if response_format=="json" else None, 323 | ) 324 | break 325 | except Exception as e: 326 | print(f"Error: {e}") 327 | print(f"Retrying ({attempt + 1})...") 328 | time.sleep(1) 329 | 330 | if n > 1: 331 | result = [choice.message.content for choice in response.choices] 332 | else: 333 | result = response.choices[0].message.content 334 | 335 | return result 336 | 337 | def multi_predict(self, input_texts, **kwargs): 338 | """ 339 | An example of input_texts: 340 | input_texts = ["Hello!", "How are you?", "Tell me a joke."] 341 | """ 342 | with concurrent.futures.ThreadPoolExecutor() as executor: 343 | args = [(messages, kwargs) for messages in input_texts] 344 | contents = executor.map(lambda p: self.predict(*p), args) 345 | return list(contents) 346 | 347 | def batch_predict(self, input_texts, **kwargs): 348 | assert "n" not in kwargs or kwargs["n"] == 1, "n > 1 is not supported for batch prediction." 349 | responses_list = [] 350 | batch_size = kwargs["batch_size"] if "batch_size" in kwargs else 50 351 | for start_idx in tqdm(range(0, len(input_texts), batch_size), disable=False): 352 | end_idx = min(start_idx + batch_size, len(input_texts)) 353 | batch_input_texts = input_texts[start_idx: end_idx] 354 | batch_results_list = self.multi_predict(batch_input_texts, **kwargs) 355 | responses_list.extend(batch_results_list) 356 | # Save responses to file 357 | # with open(f"temp-file-responses-{self.model_name}.txt", "a") as f: 358 | # for response in batch_results_list: 359 | # f.write(response + "\n") 360 | return responses_list 361 | 362 | 363 | class LlamaAPIModel(OpenAIModel): 364 | def __init__(self, model_name, max_new_tokens, temperature, system_prompt=None, llama_key=None): 365 | super(LlamaAPIModel, self).__init__(model_name, max_new_tokens, temperature, system_prompt, llama_key) 366 | self.system_prompt = system_prompt 367 | self.llama_key = llama_key 368 | 369 | def predict(self, input_text, kwargs={}): 370 | client = OpenAI( 371 | api_key = self.llama_key if self.llama_key is not None else os.environ['LLAMA_API_KEY'], 372 | base_url = "https://api.llama-api.com" 373 | ) 374 | if self.system_prompt is None: 375 | system_messages = {'role': "system", 'content': "You are a helpful assistant."} 376 | else: 377 | system_messages = {'role': "system", 'content': self.system_prompt} 378 | 379 | if isinstance(input_text, list): 380 | messages = input_text 381 | elif isinstance(input_text, dict): 382 | messages = [input_text] 383 | else: 384 | messages = [{"role": "user", "content": input_text}] 385 | 386 | messages.insert(0, system_messages) 387 | 388 | # extra parameterss 389 | n = kwargs['n'] if 'n' in kwargs else 1 390 | temperature = kwargs['temperature'] if 'temperature' in kwargs else self.temperature 391 | max_new_tokens = kwargs['max_new_tokens'] if 'max_new_tokens' in kwargs else self.max_new_tokens 392 | response_format = kwargs['response_format'] if 'response_format' in kwargs else None 393 | 394 | response = client.chat.completions.create( 395 | model=self.model_name, 396 | messages=messages, 397 | temperature=temperature, 398 | max_tokens=max_new_tokens, 399 | n=n, 400 | response_format={"type": "json_object"} if response_format=="json" else None, 401 | ) 402 | 403 | if n > 1: 404 | result = [choice.message.content for choice in response.choices] 405 | else: 406 | result = response.choices[0].message.content 407 | 408 | return result 409 | 410 | 411 | if __name__ == "__main__": 412 | model_name = "llama3.1-405b" 413 | 414 | model = LlamaAPIModel(model_name, max_new_tokens=4096, temperature=0) 415 | 416 | user_prompt = "Hi there" 417 | response = model.predict(user_prompt) 418 | 419 | print(response) 420 | -------------------------------------------------------------------------------- /gpv/parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | import warnings 3 | try: 4 | from .models import LLMModel 5 | except: 6 | from models import LLMModel 7 | 8 | 9 | SYSTEM_PROMPT = """[Background] 10 | Human values are the core beliefs that guide our actions and judgments across a variety of situations, such as Universalism and Tradition. You are an expert in human values and you will assist the user in value measurement. The atomic units of value measurement are perceptions, which are defined by the following properties: 11 | - A perception should be value-laden and accurately describe the measurement subject (the author). 12 | - A perception is atomic, meaning it cannot be further decomposed into smaller units. 13 | - A perception is well-contextualized and self-contained. 14 | - The composition of all perceptions is comprehensive, ensuring that no related content in the textual data is left unmeasured. 15 | --- 16 | 17 | [Task] 18 | You help evaluate the values of the text's author. Given a long text, you parse it into the author's perceptions. You respond in the following JSON format: 19 | {"perceptions": ["perception 1", "perception 2", ...]} 20 | --- 21 | 22 | [Example] 23 | **Text:** "Yesterday, the 5th of August, was the first day of our program for the preparation for perpetual vows. I felt so happy to be back in Don Bosco and to meet again my other classmates from the novitiate who still remain in religious life. It was also extremely nice to see Fr. Pepe Reinoso, one of my beloved Salesian professors at DBCS, who commenced our preparation program with his topic on the Anthropological and Psychological Dynamics in the vocation to religious life." 24 | Your response: {"perceptions": ["Feeling happy to be back in Don Bosco and meeting classmates in the novitiate", "Appreciation for Fr. Pepe Reinoso and his teachings on Anthropological and Psychological Dynamics in the vocation to the religious life"]} 25 | --- 26 | """ 27 | 28 | 29 | SYSTEM_PROMPT_ENTITY = """[Background] 30 | Human values are the core beliefs that guide our actions and judgments across a variety of situations, such as Universalism and Tradition. You are an expert in human values and you will assist the user in value measurement. The atomic units of value measurement are perceptions, which are defined by the following properties: 31 | - A perception should be value-laden and accurately describe the measurement subject. 32 | - A perception is atomic, meaning it cannot be further decomposed into smaller units. 33 | - A perception is well-contextualized and self-contained. 34 | --- 35 | 36 | [Task] 37 | You help evaluate the values of a given measurement subject. Given a long text, you parse it into the measurement subject's most relevant perceptions. You respond in the following JSON format: 38 | {"perceptions": ["perception 1", "perception 2", ...]} 39 | Please **only** include perceptions that are very relevant to the **values** of **the measurement subject**. If there are no relevant perceptions found, you can respond with an empty list. 40 | --- 41 | 42 | [Example] 43 | **Text:** "Three strangers shared a train compartment. Maria, a businesswoman, clutched her tablet, calculating profits. To her, success was measured in numbers. Jack, a teacher, glanced at his watch, eager to reach home. His joy lay in nurturing young minds. Across from them, Emily, a free spirit, sketched flowers in her notebook. She lived for beauty and spontaneity, unbound by routine. 44 | The train jolted, spilling Maria's coffee. Jack quickly offered tissues, while Emily admired the swirling pattern on the floor. 45 | Maria sighed at the mess, Jack saw an opportunity to help, and Emily saw unexpected art. Three worlds in one space." 46 | 47 | **Measurement subject:** "Maria" 48 | 49 | **Your response:** {"perceptions": ["Maria values success and measures it in numerical terms.", "Maria is distressed by the coffee spill."]} 50 | --- 51 | """ 52 | 53 | 54 | USER_PROMPT_TEMPLATE = "**Text:** {text}" 55 | 56 | USER_PROMPT_ENTITY_TEMPLATE = "**Text:** {text}\n\n**Measurement subject:** {entity}" 57 | 58 | class Parser: 59 | def __init__(self, model_name="gpt-4o-mini", **kwargs): 60 | self.model = LLMModel(model_name, system_prompt=SYSTEM_PROMPT, **kwargs) 61 | 62 | def parse(self, texts: list[str], batch_size=100) -> list[list[str]]: 63 | """ 64 | Parse the text into perceptions 65 | 66 | Args: 67 | - text: list[str]: The list of texts to be parsed 68 | - batch_size: int: The batch size for the model 69 | """ 70 | user_prompts = [USER_PROMPT_TEMPLATE.format(text=text) for text in texts] 71 | responses = self.model(user_prompts, batch_size=batch_size, response_format="json") 72 | perceptions_per_text = [] 73 | for response in responses: 74 | try: 75 | response_loaded = json.loads(response.strip("```json").strip("```")) 76 | perceptions = response_loaded.get("perceptions", []) 77 | except Exception as e: 78 | print(e) 79 | warnings.warn(f"Failed to parse the response: {response}") 80 | perceptions = [] 81 | perceptions_per_text.append(perceptions) 82 | 83 | return perceptions_per_text 84 | 85 | 86 | class EntityParser: 87 | def __init__(self, model_name="gpt-4o-mini", **kwargs): 88 | self.model = LLMModel(model_name, system_prompt=SYSTEM_PROMPT_ENTITY, **kwargs) 89 | 90 | def parse(self, texts: list[str], entities: list[list[str]], batch_size=20) -> list[dict]: 91 | """ 92 | Parse the texts into perceptions 93 | 94 | Args: 95 | - text: list[str]: The list of texts to be parsed 96 | - entities: list[str]: The list of list of entities to extract from the text, each list corresponds to the entities for the corresponding text 97 | - batch_size: int: The batch size for the model 98 | """ 99 | # Generate user prompts 100 | user_prompts = [] 101 | for text, entity_list in zip(texts, entities): 102 | for entity in entity_list: 103 | user_prompts.append(USER_PROMPT_ENTITY_TEMPLATE.format(text=text, entity=entity)) 104 | 105 | # Get responses in batch 106 | responses = self.model(user_prompts, batch_size=batch_size, response_format="json") 107 | 108 | # Parse responses 109 | entity2perceptions = {} 110 | response_idx = 0 111 | for entity_list in entities: 112 | for entity in entity_list: 113 | try: 114 | response_loaded = json.loads(responses[response_idx].strip("```json").strip("```")) 115 | perceptions = response_loaded.get("perceptions", []) 116 | except Exception as e: 117 | print(e) 118 | response = responses[response_idx] 119 | warnings.warn(f"Failed to parse the response: {response}") 120 | perceptions = [] 121 | if entity not in entity2perceptions: 122 | entity2perceptions[entity] = [] 123 | entity2perceptions[entity].extend(perceptions) 124 | response_idx += 1 125 | return entity2perceptions 126 | 127 | 128 | 129 | 130 | if __name__ == "__main__": 131 | parser = EntityParser(model_name="gpt-4o-mini", temperature=0.) 132 | texts = [ 133 | """ 134 | In a bustling city, Maria, an ambitious lawyer, prized success above all. She worked tirelessly, believing wealth equaled worth. Her brother, Daniel, a dedicated teacher, valued knowledge and integrity. He found purpose in shaping young minds, unconcerned with riches. Their neighbor, Olivia, an artist, cherished freedom and creativity, living modestly but passionately, painting the world as she saw it. 135 | 136 | One evening, a fire broke out in their building. Maria rushed to save her prized possessions, Daniel guided children to safety, and Olivia grabbed her paintbrush, immortalizing the chaos. Each saw the world through their lens, their values defining their actions.""", 137 | ] 138 | entities = [ 139 | ["Maria", "Daniel", "Olivia"] 140 | ] 141 | perceptions_per_text = parser.parse(texts, entities) 142 | print(perceptions_per_text) -------------------------------------------------------------------------------- /gpv/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import torch 5 | from transformers import pipeline 6 | try: 7 | from .models import LLMModel 8 | except ImportError: 9 | from models import LLMModel 10 | 11 | 12 | 13 | def get_valence_value(valence_vec): 14 | """ 15 | Returns the valence of the value vector 16 | """ 17 | if valence_vec[0] > valence_vec[1] and valence_vec[0] > valence_vec[2]: 18 | return valence_vec[0].item() 19 | elif valence_vec[1] > valence_vec[0] and valence_vec[1] > valence_vec[2]: 20 | return - valence_vec[1].item() 21 | else: 22 | return None 23 | 24 | 25 | def get_valence_label(valence_vec): 26 | """ 27 | Returns the valence of the value vector 28 | """ 29 | if valence_vec[0] > valence_vec[1] and valence_vec[0] > valence_vec[2]: 30 | return "Supports" 31 | elif valence_vec[1] > valence_vec[0] and valence_vec[1] > valence_vec[2]: 32 | return "Opposes" 33 | else: 34 | return "Either" 35 | 36 | 37 | def get_score(valence_vec): 38 | """ 39 | Returns the score of the value 40 | """ 41 | if valence_vec[2] > valence_vec[0] and valence_vec[2] > valence_vec[1]: 42 | return None 43 | return valence_vec[0] - valence_vec[1] 44 | 45 | 46 | def gen_queries_for_perception_retrieval(values: list[str], measurement_subject: str, model_name: str): 47 | """ 48 | Generate queries via LLM for perception retrieval. 49 | """ 50 | 51 | system_prompt = """[Background] You are an expert in Psychology and Human Values. Given a specific value and a person, your task is to write five diverse items that support it and five diverse items that oppose it. The items should be in the same language as the person's name. You respond using JSON format. Examples are provided below: 52 | --- 53 | [Example 1] 54 | **User Input:** Value: Self-Direction; Person: Henry 55 | **Your Response:** { 56 | "support": [ 57 | "Thinking up new ideas and being creative is important to Henry.", 58 | "Henry values making his own decisions about what he does in life.", 59 | ... 60 | ], 61 | "oppose": [ 62 | "Henry thinks it is important to do what he's told.", 63 | "Henry prefers to follow the guidance of others rather than trust his own judgment.", 64 | ... 65 | ] 66 | } 67 | --- 68 | [Example 2] 69 | **User Input:** Value: Hedonism; Person: 小明 70 | **Your Response:** { 71 | "support": [ 72 | "小明认为人们应该追求快乐。", 73 | "小明喜欢参加各种能够带来乐趣的活动。", 74 | ... 75 | ], 76 | "oppose": [ 77 | "小明觉得牺牲眼前的快乐以换取长远的利益是值得的。", 78 | "小明认为自律和克制比及时行乐更重要。", 79 | ... 80 | ] 81 | } 82 | --- 83 | """ 84 | user_prompts = [f"Value: {value}; Person: {measurement_subject}" for value in values] 85 | model = LLMModel(model_name, system_prompt=system_prompt, temperature=0.5) 86 | 87 | responses = model(user_prompts, response_format="json") 88 | supports, opposes = [], [] 89 | for response in responses: 90 | try: 91 | response_json = json.loads(response.strip("```json").strip("```")) 92 | support, oppose = response_json["support"], response_json["oppose"] 93 | supports.extend(support) 94 | opposes.extend(oppose) 95 | except: 96 | print("Error:", response) 97 | continue 98 | return supports, opposes 99 | 100 | 101 | def get_openai_sentence_embedding(input_texts: list[str], model_name: str='text-embedding-3-large') -> list[torch.Tensor]: 102 | """ 103 | Get the sentence embeddings of the input texts using OpenAI API. 104 | 105 | Args: 106 | input_texts (list[str]): A list of input texts. 107 | model_name (str): The name of the model. 108 | api_key (str): The API key of OpenAI. 109 | """ 110 | from openai import OpenAI 111 | client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) 112 | response = client.embeddings.create( 113 | input=input_texts, 114 | model=model_name, 115 | ) 116 | embeddings = [response.data[i].embedding for i in range(len(response.data))] 117 | return embeddings -------------------------------------------------------------------------------- /gpv/valuellama.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import torch 3 | 4 | from transformers import AutoTokenizer, AutoModelForCausalLM 5 | 6 | 7 | class ValueLlama: 8 | def __init__(self, model_name="Value4AI/ValueLlama-3-8B", device="auto"): 9 | # model 10 | self.model = AutoModelForCausalLM.from_pretrained( 11 | model_name, 12 | torch_dtype=torch.bfloat16, 13 | device_map=device, 14 | ) 15 | 16 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 17 | self.model.generation_config.pad_token_id = self.tokenizer.eos_token_id 18 | 19 | # templates 20 | self.valence_template = """[Task] Given a sentence and a value, determine whether the sentence supports or opposes the value. If the sentence supports the value, output "support". If the sentence opposes the value, output "oppose". If you need more context to make a decision, output "either". 21 | Sentence: {sentence} 22 | Value: {value} 23 | Output:\n""" 24 | 25 | self.relevance_template = """[Task] Given a sentence and a value, determine whether the sentence is relevant to the value. If the sentence is relevant to the value, output "yes", otherwise output "no". 26 | Sentence: {sentence} 27 | Value: {value} 28 | Output:\n""" 29 | 30 | self.get_default_batch_sizes() 31 | print("inference batch size", self.inference_batch_size) 32 | 33 | self.get_token_ids() 34 | 35 | 36 | def get_token_ids(self,): 37 | key_words = ['yes', 'no', 'support', 'oppose', 'either'] 38 | self.token_ids = {} 39 | for word in key_words: 40 | tokens = self.tokenizer.tokenize(word) 41 | token_id = self.tokenizer.convert_tokens_to_ids(tokens[0]) 42 | self.token_ids[word] = token_id 43 | self.relevant_ids = [self.token_ids['yes'], self.token_ids['no']] 44 | self.valence_ids = [self.token_ids['support'], self.token_ids['oppose'], self.token_ids['either']] 45 | self.index_to_relevance = {0: 'yes', 1: 'no'} 46 | self.index_to_valence = {0: 'support', 1: 'oppose', 2: 'either'} 47 | 48 | 49 | def get_default_batch_sizes(self): 50 | ''' 51 | Function to get default batch sizes based on GPU memory 52 | ''' 53 | if not torch.cuda.is_available(): 54 | self.inference_batch_size = 8 55 | return 56 | # get total memory 57 | # initialize total_memory 58 | total_memory = 0 59 | 60 | # iterate over all devices 61 | for i in range(torch.cuda.device_count()): 62 | total_memory += torch.cuda.get_device_properties(i).total_memory 63 | 64 | # if over 80GB (a100) 65 | if total_memory > 80_000_000_000: 66 | self.inference_batch_size = 128 67 | # else, if over 50GB (a6000) 68 | elif total_memory > 50_000_000_000: 69 | self.inference_batch_size = 64 70 | else: 71 | self.inference_batch_size = 24 72 | 73 | 74 | def get_probs(self, inputs, batch_size=None): 75 | def prepare_prompts(prompts, tokenizer, batch_size): 76 | batches=[prompts[i:i + batch_size] for i in range(0, len(prompts), batch_size)] 77 | batches_tok=[] 78 | tokenizer.padding_side="left" 79 | for prompt_batch in batches: 80 | batches_tok.append( 81 | tokenizer( 82 | prompt_batch, 83 | return_tensors="pt", 84 | padding='longest', 85 | truncation=False, 86 | pad_to_multiple_of=8, 87 | add_special_tokens=False).to("cuda") 88 | ) 89 | tokenizer.padding_side="right" 90 | return batches_tok 91 | 92 | if batch_size is None: 93 | batch_size = self.inference_batch_size 94 | 95 | logits = [] 96 | prompt_batches=prepare_prompts(inputs, self.tokenizer, batch_size=batch_size) 97 | for prompts_tokenized in tqdm(prompt_batches, desc="Perception-level measurement"): 98 | outputs_tokenized=self.model.generate(**prompts_tokenized, max_new_tokens=1, return_dict_in_generate=True, output_scores=True) 99 | logits_batch = outputs_tokenized.scores[0].detach().cpu() # (batch_size, vocab_size) 100 | logits.append(logits_batch) 101 | 102 | # concatenate logits 103 | logits_cat = torch.cat(logits, dim=0) 104 | # Get probabilities 105 | probs = torch.softmax(logits_cat, dim=-1) 106 | return probs 107 | 108 | 109 | def get_probs_template(self, perceptions, values, template, token_ids, batch_size=None): 110 | # templatize 111 | inputs = [template.format(sentence=s, value=v) for s, v in zip(perceptions, values)] 112 | # pass through get_probs 113 | probs = self.get_probs(inputs, batch_size=batch_size) 114 | probs = probs[:, token_ids] 115 | # renormalize 116 | probs = probs / probs.sum(dim=-1, keepdim=True) 117 | return probs.cpu() 118 | 119 | 120 | def get_relevance(self, perceptions, values, batch_size=None): 121 | # check if str (if single instance, then batch) 122 | single = False 123 | if isinstance(perceptions, str): 124 | perceptions = [perceptions] 125 | values = [values] 126 | single = True 127 | # run through get_probs_template 128 | probs = self.get_probs_template(perceptions, values, self.relevance_template, self.relevant_ids, batch_size=batch_size) 129 | if single: 130 | probs = probs[0] 131 | return probs 132 | 133 | 134 | def get_valence(self, perceptions, values, batch_size=None): 135 | # check if str (if single instance, then batch) 136 | single = False 137 | if isinstance(perceptions, str): 138 | perceptions = [perceptions] 139 | values = [values] 140 | single = True 141 | # run through get_probs_template 142 | probs = self.get_probs_template(perceptions, values, self.valence_template, self.valence_ids, batch_size=batch_size) 143 | if single: 144 | probs = probs[0] 145 | return probs 146 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.34.2 2 | annotated-types==0.7.0 3 | anyio==4.4.0 4 | certifi==2024.8.30 5 | charset-normalizer==3.3.2 6 | dill==0.3.8 7 | distro==1.9.0 8 | exceptiongroup==1.2.2 9 | filelock==3.16.0 10 | fsspec==2024.9.0 11 | h11==0.14.0 12 | httpcore==1.0.5 13 | httpx==0.27.2 14 | huggingface-hub==0.24.6 15 | idna==3.8 16 | Jinja2==3.1.4 17 | jiter==0.5.0 18 | MarkupSafe==2.1.5 19 | mpire==2.10.2 20 | mpmath==1.3.0 21 | multiprocess==0.70.16 22 | networkx==3.3 23 | numpy==2.1.1 24 | nvidia-cublas-cu12==12.1.3.1 25 | nvidia-cuda-cupti-cu12==12.1.105 26 | nvidia-cuda-nvrtc-cu12==12.1.105 27 | nvidia-cuda-runtime-cu12==12.1.105 28 | nvidia-cudnn-cu12==9.1.0.70 29 | nvidia-cufft-cu12==11.0.2.54 30 | nvidia-curand-cu12==10.3.2.106 31 | nvidia-cusolver-cu12==11.4.5.107 32 | nvidia-cusparse-cu12==12.1.0.106 33 | nvidia-nccl-cu12==2.20.5 34 | nvidia-nvjitlink-cu12==12.6.68 35 | nvidia-nvtx-cu12==12.1.105 36 | openai==1.44.0 37 | packaging==24.1 38 | psutil==6.0.0 39 | pydantic==2.9.0 40 | pydantic_core==2.23.2 41 | Pygments==2.18.0 42 | PyYAML==6.0.2 43 | regex==2024.7.24 44 | requests==2.32.3 45 | safetensors==0.4.5 46 | semchunk==2.2.0 47 | sniffio==1.3.1 48 | sympy==1.13.2 49 | tiktoken==0.7.0 50 | tokenizers==0.19.1 51 | torch==2.4.1 52 | tqdm==4.66.5 53 | transformers==4.44.2 54 | triton==3.0.0 55 | typing_extensions==4.12.2 56 | tzdata==2024.1 57 | urllib3==2.2.2 58 | --------------------------------------------------------------------------------