├── templates
    ├── mobile_device.txt
    ├── narrative.txt
    ├── plan_template.txt
    ├── face.txt
    ├── multi_modal_event.txt
    ├── commonsense-goal.txt
    ├── commonsense-experience.txt
    ├── commonsense-relationship.txt
    ├── commonsense-characteristic.txt
    ├── commonsense-routine.txt
    ├── persona.txt
    ├── dialogue_summary.txt
    ├── first_stark_dialogue.txt
    └── next_stark_dialogue.txt
├── assets
    └── stark_mcu_overview.PNG
├── photomaker
    ├── __init__.py
    └── model.py
├── runner
    ├── __init__.py
    ├── alignment_runner.py
    ├── summarizer_runner.py
    ├── narrative_runner.py
    ├── face_runner.py
    ├── album_runner.py
    ├── event_runner.py
    ├── base_runner.py
    ├── commonsense_runner.py
    ├── dialogue_runner.py
    └── persona_runner.py
├── prepare_image_db
    ├── download_data.sh
    ├── build_index.sh
    └── build_embedding.sh
├── utils
    ├── etc_utils.py
    └── persona_utils.py
├── LICENSE
├── postprocess_final_dataset.py
├── execute_web_search.py
├── generate_stark_dialogue.py
├── execute_sdxl.py
├── execute_retrieval.py
├── generate_face_image.py
├── execute_photomaker.py
├── scripts
    └── run_mcu.sh
├── README.md
├── make_final_dataset.py
└── plan_runner.py


/templates/mobile_device.txt:
--------------------------------------------------------------------------------
1 | {sentence}
2 | 
3 | Image descriptions stored on {name}'s mobile device:
4 | 1. 


--------------------------------------------------------------------------------
/assets/stark_mcu_overview.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/passing2961/Stark/HEAD/assets/stark_mcu_overview.PNG


--------------------------------------------------------------------------------
/templates/narrative.txt:
--------------------------------------------------------------------------------
1 | {sentence}
2 | 
3 | Rewrite this sentence with more specific details in two or three sentences: 


--------------------------------------------------------------------------------
/templates/plan_template.txt:
--------------------------------------------------------------------------------
1 | Name: {name}
2 | Gender: {gender}
3 | Age: {age}
4 | Image Description: {image_description}
5 | Module: 


--------------------------------------------------------------------------------
/templates/face.txt:
--------------------------------------------------------------------------------
1 | Profile Information:
2 | - Age: {age}
3 | - Gender: {gender}
4 | - Nationality: {nationality}
5 | 
6 | Human Description: 


--------------------------------------------------------------------------------
/templates/multi_modal_event.txt:
--------------------------------------------------------------------------------
1 | {name}'s initial personal event: {event}
2 | 
3 | Given the {name}'s initial personal event, generate the temporal event graph containing more than five events.
4 | Temporal Event Graph: 


--------------------------------------------------------------------------------
/photomaker/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import PhotoMakerIDEncoder
2 | from .pipeline import PhotoMakerStableDiffusionXLPipeline
3 | 
4 | __all__ = [
5 |     "PhotoMakerIDEncoder",
6 |     "PhotoMakerStableDiffusionXLPipeline",
7 | ]


--------------------------------------------------------------------------------
/templates/commonsense-goal.txt:
--------------------------------------------------------------------------------
1 | {demo_sent} {persona_attr} I plan <goal/plan>.
2 | 
3 | Generate the most appropriate sentence for "<goal/plan>" in the given sentence. You must provide the answer corresponding to "<goal/plan>".
4 | <goal/plan>: 


--------------------------------------------------------------------------------
/templates/commonsense-experience.txt:
--------------------------------------------------------------------------------
1 | I <experience>. Now, {demo_sent} {persona_attr}
2 | 
3 | Generate the most appropriate sentence for "<experience>" in the given sentence. You must provide the answer corresponding to "<experience>".
4 | <experience>: 


--------------------------------------------------------------------------------
/templates/commonsense-relationship.txt:
--------------------------------------------------------------------------------
1 | {demo_sent} {persona_attr} So, I <relationship>.
2 | 
3 | Generate the most appropriate sentence for "<relationship>" in the given sentence. You must provide the answer corresponding to "<relationship>".
4 | <relationship>: 


--------------------------------------------------------------------------------
/templates/commonsense-characteristic.txt:
--------------------------------------------------------------------------------
1 | {demo_sent} {persona_attr} I <characteristic>.
2 | 
3 | Generate the most appropriate sentence for "<characteristic>" in the given sentence. You must provide the answer corresponding to "<characteristic>".
4 | <characteristic>: 


--------------------------------------------------------------------------------
/templates/commonsense-routine.txt:
--------------------------------------------------------------------------------
1 | {demo_sent} {persona_attr} I regularly <routine/habit>.
2 | 
3 | Generate the most appropriate sentence for "<routine/habit>" in the given sentence. You must provide the answer corresponding to "<routine/habit>".
4 | <routine/habit>: 


--------------------------------------------------------------------------------
/templates/persona.txt:
--------------------------------------------------------------------------------
 1 | Profile Information:
 2 | - Age: {age}
 3 | - Gender: {gender}
 4 | - Birthplace: {birthplace}
 5 | - Residence: {residence}
 6 | 
 7 | Persona Category: {target_persona_category}
 8 | Persona Entity Key: {target_persona_entity}
 9 | Persona Sentences:
10 | 1. 


--------------------------------------------------------------------------------
/templates/dialogue_summary.txt:
--------------------------------------------------------------------------------
1 | The current time and date are {current_date}. {name} and AI assistant talked today and had the following conversation:
2 | 
3 | {dialogue}
4 | 
5 | Summarize the conversation between {name} and AI assistant so far. Include key details and include time references wherever possible.
6 | 
7 | Summarization: 


--------------------------------------------------------------------------------
/runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .persona_runner import PersonaRunner
2 | from .commonsense_runner import CommonsenseRunner
3 | from .narrative_runner import NarrativeRunner
4 | from .event_runner import EventRunner
5 | from .dialogue_runner import DialogueRunner
6 | from .album_runner import AlbumRunner
7 | from .face_runner import FaceRunner
8 | from .summarizer_runner import SummarizerRunner


--------------------------------------------------------------------------------
/prepare_image_db/download_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | wget https://storage.googleapis.com/conceptual_12m/cc12m.tsv
 4 | 
 5 | sed -i '1s/^/url\tcaption\n/' cc12m.tsv
 6 | 
 7 | img2dataset --url_list cc12m.tsv --input_format "tsv" \
 8 |          --url_col "url" --caption_col "caption" --output_format webdataset \
 9 |            --output_folder cc12m --processes_count 16 --thread_count 64 --image_size 256 \
10 |              --enable_wandb False


--------------------------------------------------------------------------------
/utils/etc_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | 
 4 | def load_json(datadir: str):
 5 |     with open(datadir, 'r') as f:
 6 |         return json.load(f)
 7 |     
 8 | def load_jsonl(datadir: str):
 9 |     output = []
10 |     with open(datadir) as f:
11 |         for line in f.readlines():
12 |             output.append(json.loads(line))
13 |     return output
14 | 
15 | def load_txt(datadir: str):
16 |     with open(datadir, 'r') as f:
17 |         return f.read()


--------------------------------------------------------------------------------
/templates/first_stark_dialogue.txt:
--------------------------------------------------------------------------------
 1 | {name}'s Profile Information:
 2 | - Age: {age}
 3 | - Gender: {gender}
 4 | - Birthplace: {birthplace}
 5 | - Residence: {residence}
 6 | 
 7 | Existing image descriptions in {name}'s mobile device: {mobile_device}
 8 | 
 9 | The topic of the conversation between the AI assistant and {name} on {date} today is as follows.
10 | - Topic on {date}: {event}
11 | 
12 | Generate a long, in-depth conversation with multiple turns based on the given {name}'s profile information and the current topic of conversation.
13 | 


--------------------------------------------------------------------------------
/prepare_image_db/build_index.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | IMAGE_DATASETS=("cc12m""ai2d" "chartqa" "mathvision" "coco_train2017" "gqa" "ocr_vqa" "redcaps12m" "textvqa" "vg")
 4 | 
 5 | for IMAGE_DATASET in "${IMAGE_DATASETS[@]}"
 6 | do
 7 |     autofaiss build_index \
 8 |         --embeddings="embeddings_folder/${IMAGE_DATASET}/img_emb" \
 9 |         --index_path="index_folder/${IMAGE_DATASET}/knn.index" \
10 |         --index_infos_path="index_folder/${IMAGE_DATASET}/infos.json" \
11 |         --metric_type="ip" \
12 |         --max_index_query_time_ms=10 \
13 |         --max_index_memory_usage="16GB"
14 | done


--------------------------------------------------------------------------------
/templates/next_stark_dialogue.txt:
--------------------------------------------------------------------------------
 1 | {name}'s Profile Information:
 2 | - Age: {age}
 3 | - Gender: {gender}
 4 | - Birthplace: {birthplace}
 5 | - Residence: {residence}
 6 | 
 7 | Existing image descriptions in {name}'s mobile device: {mobile_device}
 8 | 
 9 | The topics of the conversation the user had with AI assistant by date are as follows:
10 | {history_event}
11 | 
12 | {time_interval} later from the {last_date}, on {date} today, {name} has gone through a new experience, and based on this experience, {name} and the AI assistant engage in a conversation today. The new experience {name} went through and the topic of conversation with the AI assistant are as follows.
13 | - {name}'s Experience: {experience}
14 | - Topic on {date}: {event}
15 | 
16 | Generate a long, in-depth conversation with multiple turns based on the given {name}'s profile information, the last topic of conversation, the experience and the current topic of conversation.
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Young-Jun Lee
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/runner/alignment_runner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from tqdm import tqdm
 4 | import pyarrow.parquet as pq
 5 | import pandas as pd
 6 | from joblib import Parallel, delayed
 7 | from joblib_progress import joblib_progress
 8 | 
 9 | from .base_runner import BaseRunner, console
10 | 
11 | 
12 | def _rebuild_cc12m_dataset(filename):
13 |     if '.jpg' not in filename:
14 |         return None
15 |     else:
16 |         return filename
17 |         
18 | class AlignmentRunner(BaseRunner):
19 |     def __init__(self, args):
20 |         super().__init__(args)
21 | 
22 |         self._load_image_file()
23 | 
24 |     def _flatten_dataset(self, dataset):
25 |         return [
26 |             ele for ele in dataset if ele is not None
27 |         ]
28 | 
29 |     def system_msg(self):
30 |         return "You are a helpful assistant."
31 | 
32 |     def prompt_prefix(self):
33 |         return "persona-attr"
34 |         
35 |     def _load_image_file(self):
36 |         cc12m_path = '/home/yjlee/workspace/ICCV2023/pipeline/data_collection/image_files/cc12m'
37 | 
38 |         subdir = os.listdir(cc12m_path)[:100]
39 |         cc12m_filenames = []
40 |         for _dir in tqdm(subdir, total=len(subdir)):
41 |             if '.parquet' in _dir or '.json' in _dir:
42 |                 continue
43 |             
44 |             for ele in os.listdir(os.path.join(cc12m_path, _dir)):
45 |                 cc12m_filenames.append(os.path.join(cc12m_path, _dir, ele))
46 |         
47 |         console.log('[{}] # of CC12M dataset: {}'.format(self.__class__.__name__, len(cc12m_filenames)))
48 | 
49 |         with joblib_progress("Loading CC12M dataset...", total=len(cc12m_filenames)):
50 |             cc12m_dataset = Parallel(n_jobs=32)(delayed(_rebuild_cc12m_dataset)(filename) for filename in cc12m_filenames)
51 |         
52 |         cc12m_filenames = self._flatten_dataset(cc12m_dataset)
53 |         
54 |         console.log('[{}] # of CC12M dataset: {}'.format(self.__class__.__name__, len(cc12m_filenames)))
55 | 
56 | 
57 |     def run(self):
58 |         return None


--------------------------------------------------------------------------------
/prepare_image_db/build_embedding.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | clip-retrieval inference \
 5 |     --input_dataset "data/textvqa/train_images" \
 6 |     --output_folder embeddings_folder/textvqa \
 7 |     --clip_model ViT-L/14@336px
 8 | 
 9 | clip-retrieval inference \
10 |     --input_dataset "data/vg/VG_100K" \
11 |     --output_folder embeddings_folder/vg \
12 |     --clip_model ViT-L/14@336px
13 | 
14 | clip-retrieval inference \
15 |     --input_dataset "data/ocr_vqa/images" \
16 |     --output_folder embeddings_folder/ocr_vqa \
17 |     --clip_model ViT-L/14@336px
18 | 
19 | clip-retrieval inference \
20 |     --input_dataset "data/gqa/images" \
21 |     --output_folder embeddings_folder/gqa \
22 |     --clip_model ViT-L/14@336px
23 | 
24 | clip-retrieval inference \
25 |     --input_dataset "data/coco/train2017" \
26 |     --output_folder embeddings_folder/coco_train2017 \
27 |     --clip_model ViT-L/14@336px
28 | 
29 | clip-retrieval inference \
30 |     --input_dataset "cc12m/{00000..01242}.tar" \
31 |     --output_folder embeddings_folder/cc12m \
32 |     --input_format webdataset \
33 |     --clip_model ViT-L/14@336px \
34 |     --enable_metadata True \
35 |     --output_partition_count 1243
36 | 
37 | clip-retrieval inference \
38 |     --input_dataset "data/redcaps/redcaps12m_shards/{00000..00180}.tar" \
39 |     --output_folder embeddings_folder/redcaps12m \
40 |     --input_format webdataset \
41 |     --output_partition_count 181 \
42 |     --clip_model ViT-L/14@336px \
43 |     --enable_metadata True
44 | 
45 | 
46 | clip-retrieval inference \
47 |     --input_dataset "data/ai2d/images" \
48 |     --output_folder embeddings_folder/ai2d \
49 |     --clip_model ViT-L/14@336px
50 | 
51 | clip-retrieval inference \
52 |     --input_dataset "data/ChartQA Dataset/train/png" \
53 |     --output_folder embeddings_folder/chartqa \
54 |     --clip_model ViT-L/14@336px
55 | 
56 | clip-retrieval inference \
57 |     --input_dataset "data/mathvision/images" \
58 |     --output_folder embeddings_folder/mathvision \
59 |     --clip_model ViT-L/14@336px


--------------------------------------------------------------------------------
/postprocess_final_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import copy
 4 | import random
 5 | from tqdm import tqdm
 6 | 
 7 | 
 8 | def load_json(datadir):
 9 |     with open(datadir, 'r', encoding='utf-8') as f:
10 |         return json.load(f)
11 | 
12 | def load_jsonl(datadir: str):
13 |     output = []
14 |     with open(datadir, 'r', encoding='utf-8') as f:
15 |         for line in f.readlines():
16 |             output.append(json.loads(line))
17 |     return output
18 | 
19 | def dump_json_output(outputs, file_name):
20 |     with open(file_name, 'w', encoding='utf-8') as f:
21 |         json.dump(outputs, f, ensure_ascii=False, indent='\t')
22 | 
23 | def dump_jsonl_output(outputs, file_name=None):
24 |     f = open(file_name, 'w', encoding='utf-8') 
25 |     for output in outputs:
26 |         f.write(json.dumps(output) + '\n')
27 |     f.close()
28 | 
29 | 
30 | def load_dataset():
31 |     all_stark = []
32 |     for persona_seed_num in range(0, 1):
33 |         stark = load_json(os.path.join(f'./Stark/stark_{persona_seed_num}.json'))
34 |         
35 |         all_stark.extend(stark)
36 |     
37 |     return all_stark
38 | 
39 | def process_dialog(dialog):
40 |     dialogue = eval(dialog)
41 |     cnt = 0
42 |     new_cnt = 0
43 |     redialog = []
44 |     for item in dialogue:
45 |         utter_id = item['utter_id']
46 |         speaker = item['speaker']
47 |         utter = item['utter']
48 |         sharing_info = item['sharing_info']
49 | 
50 |         cp_item = copy.deepcopy(item)
51 |         if utter == '' and len(sharing_info) == 0:
52 |             cp_item['utter'] = '<non verbal>'
53 |         
54 |         redialog.append(cp_item)
55 |     
56 |     return redialog
57 | 
58 | def process_dataset(dataset):
59 | 
60 |     re_dataset = []
61 |     for instance in dataset:
62 |         uuid = instance['unique_id']
63 |         name = instance['name']
64 | 
65 |         episode = instance['episode']
66 | 
67 |         cp_instance = copy.deepcopy(instance)
68 | 
69 |         re_epi = []
70 |         for idx, session in enumerate(episode):
71 |             session_dialog = session[f'session{idx+1}:dialogue']
72 |             p_dialog = process_dialog(session_dialog)
73 |             
74 |             cp_session = copy.deepcopy(session)
75 |             cp_session[f'session{idx+1}:dialogue'] = p_dialog
76 |             re_epi.append(cp_session)
77 |         
78 |         cp_instance['episode'] = re_epi
79 |         re_dataset.append(cp_instance)
80 |     
81 |     return re_dataset
82 |     #return all_count
83 | 
84 | if __name__ == '__main__':
85 | 
86 |     dataset = load_dataset()
87 |     print(len(dataset))
88 |     save_dir = 'Stark/post-process'
89 |     os.makedirs(save_dir, exist_ok=True)
90 | 
91 |     processed_dataset = process_dataset(dataset)
92 |     print(len(processed_dataset))
93 |     dump_json_output(processed_dataset, os.path.join(save_dir, 'stark_0.json'))


--------------------------------------------------------------------------------
/runner/summarizer_runner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import copy
 4 | import random
 5 | from collections import defaultdict
 6 | 
 7 | import torch
 8 | from tqdm import tqdm
 9 | from glob import glob
10 | 
11 | from .base_runner import BaseRunner, console
12 | from utils.etc_utils import load_jsonl, load_txt, load_json
13 | 
14 | 
15 | class SummarizerRunner(BaseRunner):
16 |     def __init__(self, args):
17 |         super().__init__(args)
18 |         
19 |         self.args = args
20 | 
21 |         self.save_dir = os.path.join(self.output_base_dir, 'dialogue-summary', f'persona_seed:{args.persona_seed_num}')
22 |         
23 |         os.makedirs(self.save_dir, exist_ok=True)
24 |         self.last_save_chunk_idx_file = os.path.join(self.save_dir, 'last_save_chunk_idx.txt')
25 | 
26 |         self._load_prompt_template()
27 | 
28 |     def _load_prompt_template(self):
29 |         self.template = load_txt('./templates/dialogue_summary.txt')
30 | 
31 |     @property
32 |     def system_msg(self):
33 |         return "Your job is to summarize the given conversation."
34 | 
35 |     @property
36 |     def prompt_prefix(self):
37 |         #if self.args.image_alignment_target == 'mobile-device-image':
38 |         return "dialogue-summary"
39 | 
40 |     def convert_flatten_dialogue(self, dialogue):
41 |         
42 |         flatten_dialogue = []
43 |         for instance in dialogue:
44 |             print(instance)
45 |             spk = instance['speaker']
46 |             utter = instance['utterance']
47 | 
48 |             if len(instance['sharing_info']) != 0:
49 |                 image_desc = instance['sharing_info']['image_description']
50 |                 flatten_dialogue.append(f'{spk}: [Sharing Image of {image_desc}]')
51 |             else:
52 |                 flatten_dialogue.append(f'{spk}: {utter}')
53 |         
54 |         return '\n'.join(flatten_dialogue)
55 | 
56 |     def prepare_prompt(self):
57 |         try:
58 |             results = load_jsonl(os.path.join(self.output_base_dir, 'dialogue', f'persona_seed:{self.args.persona_seed_num}', f'session_num:{self.args.target_session_num}', 'final_output.jsonl'))
59 |             console.log('[{}] # of Total results: {}'.format(self.__class__.__name__, len(results)))
60 |         except FileNotFoundError as e:
61 |             return []
62 | 
63 |         if self.args.debug:
64 |             try:
65 |                 results = random.sample(results, self.args.debug_sample_num)
66 |             except ValueError as e:
67 |                 results = results
68 | 
69 |         prompts = []
70 |         for instance in tqdm(results, total=len(results)):
71 |             print(instance.keys())
72 |             print(instance['dialogue:date'])
73 |             current_date = instance['dialogue:date']
74 |             name = instance['name']
75 | 
76 |             print(instance['dialogue:last_date'])
77 |             print(instance['dialogue:history_event'])
78 |             print(instance['session_number'])
79 |             flatten_dialogue = self.convert_flatten_dialogue(instance['parsed_dialogue_generation'])
80 |             
81 |             prompt = self.template.format(current_date=current_date, dialogue=flatten_dialogue, name=name)
82 |             print(prompt)
83 |             assert False
84 | 
85 |     def parse_and_filter(self):
86 |         return None


--------------------------------------------------------------------------------
/execute_web_search.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json
  4 | import copy
  5 | import time
  6 | import torch
  7 | import random
  8 | import argparse
  9 | import warnings
 10 | from tqdm import tqdm
 11 | import pandas as pd
 12 | from pathlib import Path
 13 | 
 14 | import uuid
 15 | import requests
 16 | import concurrent.futures
 17 | from icrawler.builtin import BingImageCrawler
 18 | 
 19 | 
 20 | # Suppress warnings
 21 | warnings.filterwarnings("ignore")
 22 | 
 23 | 
 24 | 
 25 | MODULE_MAPPER = {
 26 |     't2i': 'sdxl-lightning',
 27 |     'p-t2i': 'photomaker',
 28 |     'web': 'bing',
 29 |     'retrieval': 'image_db'
 30 | }
 31 | 
 32 | def load_json(path):
 33 |     with open(path, 'r', encoding='utf-8') as f:
 34 |         return json.load(f)
 35 | 
 36 | def search_image(target_instance, SAVE_PATH):
 37 |     image_uuid = target_instance['image_uuid']
 38 |     bing_crawler = BingImageCrawler(downloader_threads=8, storage={"root_dir": os.path.join(SAVE_PATH, image_uuid)})
 39 | 
 40 |     image_desc = target_instance['image_description']
 41 |     bing_crawler.crawl(keyword=image_desc, offset=0, max_num=10, filters=None)
 42 | 
 43 |     cp_instance = copy.deepcopy(target_instance)
 44 |     cp_instance['image_save_path'] = os.path.join(SAVE_PATH, image_uuid)
 45 |     return cp_instance
 46 | 
 47 | def batch_images(dataset, SAVE_PATH):
 48 |     target_dataset, non_target_dataset = [], []
 49 |     for instance in tqdm(dataset, total=len(dataset)):
 50 |         module = instance['image_alignment_module']
 51 |         model_id = MODULE_MAPPER[module]
 52 | 
 53 |         image_uuid = instance['image_uuid']
 54 | 
 55 |         
 56 |         if model_id == 'bing':
 57 |             if os.path.exists(os.path.join(SAVE_PATH, image_uuid)):
 58 |                 non_target_dataset.append(instance)
 59 |                 continue
 60 |             target_dataset.append(instance)
 61 |         else:
 62 |             non_target_dataset.append(instance)
 63 | 
 64 |     print('# of total dataset:', len(dataset))
 65 |     print('# of target dataset:', len(target_dataset))
 66 |     print('# of non-target dataset:', len(non_target_dataset))
 67 | 
 68 |     final_dataset = []
 69 |     with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
 70 |         futures = []
 71 | 
 72 |         for instance in tqdm(target_dataset, total=len(target_dataset)):
 73 |             cp_instance = copy.deepcopy(instance)
 74 | 
 75 |             future = executor.submit(search_image, cp_instance, SAVE_PATH)
 76 |             futures.append(future)
 77 | 
 78 |         for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
 79 |             ret = future.result()
 80 |             final_dataset.append(ret)
 81 |     
 82 |     return final_dataset + non_target_dataset
 83 | 
 84 | if __name__ == '__main__':
 85 |     parser = argparse.ArgumentParser()
 86 |     parser.add_argument('--start-idx', type=int)
 87 |     parser.add_argument('--end-idx', type=int)
 88 |     args = parser.parse_args()
 89 | 
 90 |     for persona_seed_num in range(args.start_idx, args.end_idx):
 91 | 
 92 |         dataset = load_json(f'curated_stark/planner-parsed-openai/stark_{persona_seed_num}.json')
 93 |         SAVE_PATH = f'generated_image/plan-and-execute/web_searcher/stark_{persona_seed_num}'
 94 |         os.makedirs(SAVE_PATH, exist_ok=True)
 95 |         
 96 |         generations = batch_images(dataset, SAVE_PATH)
 97 | 
 98 |         data_save_path = 'curated_stark/plan-and-execute/web_searcher'
 99 |         os.makedirs(data_save_path, exist_ok=True)
100 | 
101 |         with open(os.path.join(data_save_path, f'stark_{persona_seed_num}.json'), 'w', encoding='utf-8') as f:
102 |             json.dump(generations, f, ensure_ascii=False, indent='\t')
103 | 


--------------------------------------------------------------------------------
/generate_stark_dialogue.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import argparse
  4 | 
  5 | from runner import (
  6 |     PersonaRunner, 
  7 |     CommonsenseRunner,
  8 |     NarrativeRunner,
  9 |     AlignmentRunner,
 10 |     EventRunner,
 11 |     DialogueRunner,
 12 |     #ImageRunner,
 13 |     AlbumRunner,
 14 |     AlbumImageRunner,
 15 |     FaceRunner,
 16 |     FaceImageRunner
 17 | )
 18 | 
 19 | 
 20 | RUNNER_MAP = {
 21 |     'persona-attr': PersonaRunner,
 22 |     'commonsense': CommonsenseRunner,
 23 |     'narrative': NarrativeRunner,
 24 |     'event': EventRunner,
 25 |     'dialogue': DialogueRunner,
 26 |     'album': AlbumRunner,
 27 |     'face': FaceRunner,
 28 | }
 29 | 
 30 | def main(args):
 31 |     random.seed(42)
 32 | 
 33 |     runner = RUNNER_MAP[args.runner_name](args)
 34 |     runner.run()    
 35 | 
 36 | if __name__ == '__main__':
 37 |     parser = argparse.ArgumentParser(description='arguments for generating multi-modal dialogues using LLM')
 38 |     parser.add_argument('--run-id',
 39 |                         type=str,
 40 |                         default='vanilla',
 41 |                         help='the name of the directory where the output will be dumped')
 42 |     parser.add_argument('--model',
 43 |                         type=str,
 44 |                         default='gpt-3.5-turbo-1106',
 45 |                         help='which LLM to use')
 46 |     parser.add_argument('--temperature',
 47 |                         type=float,
 48 |                         default=0.9,
 49 |                         help="control randomness: lowering results in less random completion")
 50 |     parser.add_argument('--top-p',
 51 |                         type=float,
 52 |                         default=0.95,
 53 |                         help="nucleus sampling")
 54 |     parser.add_argument('--frequency-penalty',
 55 |                         type=float,
 56 |                         default=1.0,
 57 |                         help="decreases the model's likelihood to repeat the same line verbatim")
 58 |     parser.add_argument('--presence-penalty',
 59 |                         type=float,
 60 |                         default=0.6,
 61 |                         help="increases the model's likelihood to talk about new topics")
 62 |     parser.add_argument('--max-tokens',
 63 |                         type=int,
 64 |                         default=1024,
 65 |                         help='maximum number of tokens to generate')
 66 |     parser.add_argument('--split',
 67 |                         type=str,
 68 |                         default=None,
 69 |                         help='Specify the dataset split (i.e., train, validation, test).')
 70 |     parser.add_argument('--runner-name',
 71 |                         type=str,
 72 |                         default=None,
 73 |                         help='Specify the runner name (e.g., persona-attribute)')
 74 |     parser.add_argument('--do-parse-filter',
 75 |                         action='store_true',
 76 |                         help='do parsing and filtering based on llm-generated results')
 77 |     parser.add_argument('--diffusion-model-id',
 78 |                         type=str,
 79 |                         default=None,
 80 |                         help='Specify the diffusion model.')
 81 |     parser.add_argument('--cache-dir',
 82 |                         type=str,
 83 |                         default=None,
 84 |                         help='Cache dir for downloading pre-trained diffusion model.')
 85 |     parser.add_argument('--debug',
 86 |                         action='store_true',
 87 |                         help='do debugging for generating small number of sampels.')
 88 |     parser.add_argument('--debug-sample-num',
 89 |                         type=int,
 90 |                         default=None,
 91 |                         help="Number of sample for debug.")
 92 |     parser.add_argument('--shard-num',
 93 |                         type=int,
 94 |                         default=200,
 95 |                         help='Number of sharded files.')
 96 |     parser.add_argument('--persona-seed-num',
 97 |                         type=int,
 98 |                         default=None,
 99 |                         help="Persona seed number.")
100 |     parser.add_argument('--target-session-num',
101 |                         type=int,
102 |                         default=None,
103 |                         help="Target dialogue session number.")
104 |     args = parser.parse_args()
105 |     main(args)


--------------------------------------------------------------------------------
/execute_sdxl.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json
  4 | import copy
  5 | import time
  6 | import torch
  7 | import random
  8 | import argparse
  9 | import warnings
 10 | from tqdm import tqdm
 11 | import pandas as pd
 12 | from pathlib import Path
 13 | import concurrent.futures
 14 | 
 15 | import torch
 16 | from accelerate import PartialState
 17 | from diffusers import (
 18 |     StableDiffusionXLPipeline, 
 19 |     StableDiffusionPipeline, 
 20 |     UNet2DConditionModel, 
 21 |     EulerDiscreteScheduler,
 22 |     PixArtAlphaPipeline,
 23 |     DiffusionPipeline,
 24 |     DDIMScheduler
 25 | )
 26 | from diffusers.utils import load_image
 27 | from safetensors.torch import load_file
 28 | from huggingface_hub import hf_hub_download
 29 | from accelerate.utils import gather_object
 30 | from photomaker import PhotoMakerStableDiffusionXLPipeline
 31 | 
 32 | # Suppress warnings
 33 | warnings.filterwarnings("ignore")
 34 | 
 35 | MODULE_MAPPER = {
 36 |     't2i': 'sdxl-lightning',
 37 |     'p-t2i': 'photomaker',
 38 |     'web': 'bing',
 39 |     'retrieval': 'image_db'
 40 | }
 41 | 
 42 | def load_json(path):
 43 |     with open(path, 'r', encoding='utf-8') as f:
 44 |         return json.load(f)
 45 | 
 46 | def load_sdxl_model(cache_dir: str, device: str):
 47 |     """Load the SDXL Lightning diffusion model."""
 48 |     base = "stabilityai/stable-diffusion-xl-base-1.0"
 49 |     repo = "ByteDance/SDXL-Lightning"
 50 |     ckpt = "sdxl_lightning_8step_unet.safetensors"
 51 |     unet = UNet2DConditionModel.from_config(base, subfolder="unet").to(device, torch.float16)
 52 |     unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device))
 53 |     pipe = StableDiffusionXLPipeline.from_pretrained(
 54 |         base, unet=unet, torch_dtype=torch.float16, cache_dir=cache_dir, variant="fp16"
 55 |     )
 56 |     pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
 57 |     return pipe.to(device)
 58 | 
 59 | 
 60 | cache_dir = './pretrained_diffusion_model'
 61 | 
 62 | parser = argparse.ArgumentParser()
 63 | parser.add_argument('--start-idx', type=int)
 64 | parser.add_argument('--end-idx', type=int)
 65 | args = parser.parse_args()
 66 | 
 67 | model = load_sdxl_model(cache_dir, 'cuda:2')
 68 | 
 69 | @torch.inference_mode()
 70 | def generate_image(target_instance, SAVE_PATH):
 71 | 
 72 |     target_image_uuid = target_instance['image_uuid']
 73 | 
 74 |     images = model(
 75 |         target_instance['image_description'],
 76 |         num_inference_steps=8, 
 77 |         guidance_scale=0,
 78 |         num_images_per_prompt=1
 79 |     ).images
 80 | 
 81 |     save_paths = []
 82 |     for idx, image in enumerate(images):
 83 |         save_paths.append(os.path.join(SAVE_PATH, f'{idx}:{target_image_uuid}.png'))
 84 |         image.save(os.path.join(SAVE_PATH, f'{idx}:{target_image_uuid}.png'))
 85 | 
 86 |     cp_instance = copy.deepcopy(target_instance)
 87 |     cp_instance['image_save_paths'] = save_paths
 88 |     return cp_instance
 89 | 
 90 | 
 91 | def batch_images(dataset, SAVE_PATH):
 92 | 
 93 |     target_dataset, non_target_dataset = [], []
 94 |     for instance in tqdm(dataset, total=len(dataset)):
 95 |         module = instance['image_alignment_module']
 96 |         model_id = MODULE_MAPPER[module]
 97 |         
 98 |         if model_id == 'sdxl-lightning':
 99 |             if os.path.exists(os.path.join(SAVE_PATH, '0:{}.png'.format(instance['image_uuid']))):
100 |                 non_target_dataset.append(instance)
101 |                 continue
102 |             target_dataset.append(instance)
103 |         else:
104 |             non_target_dataset.append(instance)
105 |     
106 |     print('# of total dataset:', len(dataset))
107 |     print('# of target dataset:', len(target_dataset))
108 |     print('# of non-target dataset:', len(non_target_dataset))
109 | 
110 |     completions_per_process = []
111 |     for batch in tqdm(target_dataset, total=len(target_dataset)):
112 | 
113 |         result = generate_image(batch, SAVE_PATH)
114 |         completions_per_process.append(result)
115 |     
116 |     print('# of final dataset:', len(completions_per_process) + len(non_target_dataset))
117 |     return completions_per_process + non_target_dataset
118 | 
119 | if __name__ == '__main__':
120 | 
121 |     for persona_seed_num in range(args.start_idx, args.end_idx):
122 |         dataset = load_json(f'curated_stark/planner-parsed-openai/stark_{persona_seed_num}.json')
123 |         SAVE_PATH = f'generated_image/plan-and-execute/sdxl/stark_{persona_seed_num}'
124 |         os.makedirs(SAVE_PATH, exist_ok=True)
125 |         
126 |         generations = batch_images(dataset, SAVE_PATH)
127 | 
128 |         data_save_path = 'curated_stark/plan-and-execute/sdxl'
129 |         os.makedirs(data_save_path, exist_ok=True)
130 | 
131 |         with open(os.path.join(data_save_path, f'stark_{persona_seed_num}.json'), 'w', encoding='utf-8') as f:
132 |             json.dump(generations, f, ensure_ascii=False, indent='\t')
133 | 


--------------------------------------------------------------------------------
/photomaker/model.py:
--------------------------------------------------------------------------------
  1 | # Merge image encoder and fuse module to create an ID Encoder
  2 | # send multiple ID images, we can directly obtain the updated text encoder containing a stacked ID embedding
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | from transformers.models.clip.modeling_clip import CLIPVisionModelWithProjection
  7 | from transformers.models.clip.configuration_clip import CLIPVisionConfig
  8 | from transformers import PretrainedConfig
  9 | 
 10 | VISION_CONFIG_DICT = {
 11 |     "hidden_size": 1024,
 12 |     "intermediate_size": 4096,
 13 |     "num_attention_heads": 16,
 14 |     "num_hidden_layers": 24,
 15 |     "patch_size": 14,
 16 |     "projection_dim": 768
 17 | }
 18 | 
 19 | class MLP(nn.Module):
 20 |     def __init__(self, in_dim, out_dim, hidden_dim, use_residual=True):
 21 |         super().__init__()
 22 |         if use_residual:
 23 |             assert in_dim == out_dim
 24 |         self.layernorm = nn.LayerNorm(in_dim)
 25 |         self.fc1 = nn.Linear(in_dim, hidden_dim)
 26 |         self.fc2 = nn.Linear(hidden_dim, out_dim)
 27 |         self.use_residual = use_residual
 28 |         self.act_fn = nn.GELU()
 29 | 
 30 |     def forward(self, x):
 31 |         residual = x
 32 |         x = self.layernorm(x)
 33 |         x = self.fc1(x)
 34 |         x = self.act_fn(x)
 35 |         x = self.fc2(x)
 36 |         if self.use_residual:
 37 |             x = x + residual
 38 |         return x
 39 | 
 40 | 
 41 | class FuseModule(nn.Module):
 42 |     def __init__(self, embed_dim):
 43 |         super().__init__()
 44 |         self.mlp1 = MLP(embed_dim * 2, embed_dim, embed_dim, use_residual=False)
 45 |         self.mlp2 = MLP(embed_dim, embed_dim, embed_dim, use_residual=True)
 46 |         self.layer_norm = nn.LayerNorm(embed_dim)
 47 | 
 48 |     def fuse_fn(self, prompt_embeds, id_embeds):
 49 |         stacked_id_embeds = torch.cat([prompt_embeds, id_embeds], dim=-1)
 50 |         stacked_id_embeds = self.mlp1(stacked_id_embeds) + prompt_embeds
 51 |         stacked_id_embeds = self.mlp2(stacked_id_embeds)
 52 |         stacked_id_embeds = self.layer_norm(stacked_id_embeds)
 53 |         return stacked_id_embeds
 54 | 
 55 |     def forward(
 56 |         self,
 57 |         prompt_embeds,
 58 |         id_embeds,
 59 |         class_tokens_mask,
 60 |     ) -> torch.Tensor:
 61 |         # id_embeds shape: [b, max_num_inputs, 1, 2048]
 62 |         id_embeds = id_embeds.to(prompt_embeds.dtype)
 63 |         num_inputs = class_tokens_mask.sum().unsqueeze(0) # TODO: check for training case
 64 |         batch_size, max_num_inputs = id_embeds.shape[:2]
 65 |         # seq_length: 77
 66 |         seq_length = prompt_embeds.shape[1]
 67 |         # flat_id_embeds shape: [b*max_num_inputs, 1, 2048]
 68 |         flat_id_embeds = id_embeds.view(
 69 |             -1, id_embeds.shape[-2], id_embeds.shape[-1]
 70 |         )
 71 |         # valid_id_mask [b*max_num_inputs]
 72 |         valid_id_mask = (
 73 |             torch.arange(max_num_inputs, device=flat_id_embeds.device)[None, :]
 74 |             < num_inputs[:, None]
 75 |         )
 76 |         valid_id_embeds = flat_id_embeds[valid_id_mask.flatten()]
 77 | 
 78 |         prompt_embeds = prompt_embeds.view(-1, prompt_embeds.shape[-1])
 79 |         class_tokens_mask = class_tokens_mask.view(-1)
 80 |         valid_id_embeds = valid_id_embeds.view(-1, valid_id_embeds.shape[-1])
 81 |         # slice out the image token embeddings
 82 |         image_token_embeds = prompt_embeds[class_tokens_mask]
 83 |         stacked_id_embeds = self.fuse_fn(image_token_embeds, valid_id_embeds)
 84 |         assert class_tokens_mask.sum() == stacked_id_embeds.shape[0], f"{class_tokens_mask.sum()} != {stacked_id_embeds.shape[0]}"
 85 |         prompt_embeds.masked_scatter_(class_tokens_mask[:, None], stacked_id_embeds.to(prompt_embeds.dtype))
 86 |         updated_prompt_embeds = prompt_embeds.view(batch_size, seq_length, -1)
 87 |         return updated_prompt_embeds
 88 | 
 89 | class PhotoMakerIDEncoder(CLIPVisionModelWithProjection):
 90 |     def __init__(self):
 91 |         super().__init__(CLIPVisionConfig(**VISION_CONFIG_DICT))
 92 |         self.visual_projection_2 = nn.Linear(1024, 1280, bias=False)
 93 |         self.fuse_module = FuseModule(2048)
 94 | 
 95 |     def forward(self, id_pixel_values, prompt_embeds, class_tokens_mask):
 96 |         b, num_inputs, c, h, w = id_pixel_values.shape
 97 |         id_pixel_values = id_pixel_values.view(b * num_inputs, c, h, w)
 98 | 
 99 |         shared_id_embeds = self.vision_model(id_pixel_values)[1]
100 |         id_embeds = self.visual_projection(shared_id_embeds)
101 |         id_embeds_2 = self.visual_projection_2(shared_id_embeds)
102 | 
103 |         id_embeds = id_embeds.view(b, num_inputs, 1, -1)
104 |         id_embeds_2 = id_embeds_2.view(b, num_inputs, 1, -1)    
105 | 
106 |         id_embeds = torch.cat((id_embeds, id_embeds_2), dim=-1)
107 |         updated_prompt_embeds = self.fuse_module(prompt_embeds, id_embeds, class_tokens_mask)
108 | 
109 |         return updated_prompt_embeds
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     PhotoMakerIDEncoder()


--------------------------------------------------------------------------------
/runner/narrative_runner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import random
  4 | from tqdm import tqdm
  5 | from glob import glob
  6 | from collections import defaultdict
  7 | 
  8 | from names_dataset import NameDataset
  9 | 
 10 | from .base_runner import BaseRunner, console
 11 | from utils.etc_utils import load_jsonl, load_txt
 12 | from utils.persona_utils import COUNTRY_ALPHA_LIST
 13 | 
 14 | 
 15 | class NarrativeRunner(BaseRunner):
 16 |     def __init__(self, args):
 17 |         super().__init__(args)
 18 |         
 19 |         self.save_dir = os.path.join(self.output_base_dir, 'narrative', f'persona_seed:{args.persona_seed_num}')
 20 |         os.makedirs(self.save_dir, exist_ok=True)
 21 | 
 22 |         self.nd = NameDataset()
 23 | 
 24 |         self._load_prompt_template()
 25 |         self._load_universe_name_list()
 26 |         self.last_save_chunk_idx_file = os.path.join(self.save_dir, 'last_save_chunk_idx.txt')
 27 | 
 28 |     @property
 29 |     def system_msg(self):
 30 |         return "You are a helpful assistant."
 31 | 
 32 |     @property
 33 |     def prompt_prefix(self):
 34 |         return "narrative"
 35 |         
 36 |     def _load_persona_commonsense_knowledge(self):
 37 |         our_peacok = load_jsonl(os.path.join(self.output_base_dir, 'commonsense-knowledge', 'final_output_0.jsonl'))
 38 |         
 39 |         self.persona_CK = our_peacok
 40 |         console.log('[{}] Done Loading Persona Commonsense Knowledge..'.format(self.__class__.__name__))
 41 | 
 42 |     def _load_universe_name_list(self):
 43 |         all_names = defaultdict(dict)
 44 |         for country_alpha2_code in COUNTRY_ALPHA_LIST:
 45 |             top_names = self.nd.get_top_names(n=1000, country_alpha2=country_alpha2_code)
 46 | 
 47 |             male_names = top_names[country_alpha2_code]['M']
 48 |             female_names = top_names[country_alpha2_code]['F']
 49 | 
 50 |             all_names[country_alpha2_code] = {
 51 |                 'Male': male_names,
 52 |                 'Female': female_names,
 53 |                 'Non-binary': male_names + female_names
 54 |             }
 55 | 
 56 |         self.name_group = all_names
 57 | 
 58 |     def _load_prompt_template(self):
 59 |         self.sentence_form_template = {
 60 |             'routine': 'My name is {name}. {demo_sent} {persona_attr} I regularly {commonsense}.',
 61 |             'characteristic': 'My name is {name}. {demo_sent} {persona_attr} I {commonsense}.',
 62 |             'experience': 'My name is {name}. I {commonsense}. Now, {demo_sent} {persona_attr}',
 63 |             'goal': 'My name is {name}. {demo_sent} {persona_attr} I plan {commonsense}.',
 64 |             'relationship': 'My name is {name}. {demo_sent} {persona_attr} So, I {commonsense}.'
 65 |         }
 66 |         
 67 |         self.sentence_to_narrative_template = load_txt('./templates/narrative.txt')
 68 |     
 69 |     def prepare_prompt(self):
 70 |         
 71 |         persona_CK = load_jsonl(os.path.join(self.output_base_dir, 'commonsense', f'persona_seed:{self.args.persona_seed_num}', 'final_output.jsonl'))
 72 |         console.log('[{}] # of Total persona commonsense: {}'.format(self.__class__.__name__, len(persona_CK)))
 73 | 
 74 |         if self.args.debug:
 75 |             persona_CK = random.sample(persona_CK, self.args.debug_sample_num)
 76 |         
 77 |         prompts = []
 78 |         for ck in tqdm(persona_CK, total=len(persona_CK)):
 79 |         
 80 |             persona_attr = ck['persona-attr:sent']
 81 |             commonsense = ck['parsed_commonsense_generation']
 82 |             relation = ck['commonsense_relation']
 83 | 
 84 |             age = ck['age']
 85 |             gender = ck['gender']
 86 |             #nationality = ck['nationality']
 87 |             birthplace = ck['birthplace']
 88 |             residence = ck['residence']
 89 | 
 90 |             birthplace_alpha2_code = ck['birthplace_alpha2_code']
 91 |             sampled_name = random.sample(self.name_group[birthplace_alpha2_code][gender], 1)[0]
 92 |             
 93 |             demo_sent = "I am a {}-year-old {}. I was born in {}, I currently reside in {}.".format(
 94 |                 #sampled_name,
 95 |                 age,
 96 |                 gender.lower(),
 97 |                 birthplace, residence
 98 |             )
 99 | 
100 |             sentence_form = self.sentence_form_template[relation].format(
101 |                 demo_sent=demo_sent,
102 |                 persona_attr=persona_attr, 
103 |                 commonsense=commonsense,
104 |                 name=sampled_name
105 |             )
106 |             
107 |             prompt = self.sentence_to_narrative_template.format(
108 |                 sentence=f'{sentence_form}',
109 |             )
110 |             
111 |             cp_instance = copy.deepcopy(ck)
112 |             cp_instance[f'{self.prompt_prefix}_sentence_form'] = sentence_form
113 |             cp_instance[f'{self.prompt_prefix}_prompt'] = prompt
114 |             cp_instance['name'] = sampled_name
115 |             
116 |             prompts.append(cp_instance)
117 |         
118 |         return prompts
119 |     
120 |     def parse_and_filter(self, generations):
121 |         self.dump_output(generations, os.path.join(self.save_dir, 'final_output.jsonl'))
122 | 
123 |     def _generate_initial_narrative(self, prompts, prompt_prefix=None):
124 |         return self.interact(prompts, prompt_prefix=prompt_prefix)


--------------------------------------------------------------------------------
/execute_retrieval.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json
  4 | import copy
  5 | import time
  6 | import torch
  7 | import random
  8 | import argparse
  9 | import warnings
 10 | from tqdm import tqdm
 11 | import pandas as pd
 12 | from pathlib import Path
 13 | 
 14 | import clip
 15 | import faiss
 16 | import yaml
 17 | import uuid
 18 | import requests
 19 | import concurrent.futures
 20 | 
 21 | # Suppress warnings
 22 | warnings.filterwarnings("ignore")
 23 | 
 24 | 
 25 | 
 26 | IMAGE_DB_SOURCE_NAMES = ["cc12m", "redcaps12m", "mathvision", "chartqa", "ai2d"] #, "gqa", "ocr_vqa", "textvqa", "vg"]
 27 | IMAGE_SEARCH_NUM = 5
 28 | 
 29 | def load_json(path):
 30 |     with open(path, 'r', encoding='utf-8') as f:
 31 |         return json.load(f)
 32 | 
 33 | class ImageDBLoader:
 34 |     """
 35 |     Load image list and image indices.
 36 |     """
 37 |     @staticmethod
 38 |     def load_image_db():
 39 |         image_list = dict()
 40 |         image_indices = dict()
 41 |         for image_dataset in IMAGE_DB_SOURCE_NAMES:
 42 |             data_dir = Path(f'../Sonny-PM/prepare_image_db/embeddings_folder/{image_dataset}/metadata')
 43 |             df = pd.concat(
 44 |                 pd.read_parquet(parquet_file)
 45 |                 for parquet_file in data_dir.glob('*.parquet')
 46 |             )
 47 | 
 48 |             ind = faiss.read_index(f'../Sonny-PM/prepare_image_db/index_folder/{image_dataset}/knn.index')
 49 |             image_list[image_dataset] = df['image_path'].tolist()
 50 |             image_indices[image_dataset] = ind
 51 | 
 52 |         return {
 53 |             'image_list': image_list,
 54 |             'image_indices': image_indices
 55 |         }
 56 | 
 57 | image_db = ImageDBLoader.load_image_db()
 58 | print(f'Load Image DB Done!')
 59 | image_mapper = {
 60 |     'cc12m': load_json('../Sonny-PM/prepare_image_db/image_mapper/cc12m.json'),
 61 |     'redcaps12m': load_json('../Sonny-PM/prepare_image_db/image_mapper/redcaps12m.json')
 62 | }
 63 | print(f'Load image mapper Done!')
 64 | 
 65 | MODULE_MAPPER = {
 66 |     't2i': 'sdxl-lightning',
 67 |     'p-t2i': 'photomaker',
 68 |     'web': 'bing',
 69 |     'retrieval': 'image_db'
 70 | }
 71 | 
 72 | def load_openai_clip_model(device: str):
 73 |     model, _ = clip.load('ViT-L/14@336px', device=device, jit=False)
 74 |     return model
 75 | 
 76 | model = load_openai_clip_model('cuda:0')
 77 | 
 78 | @torch.no_grad
 79 | def retrieve_image(instance, persona_seed_num):
 80 |     image_desc = instance['image_description']
 81 |     image_uuid = '{}:{}'.format(persona_seed_num, instance['image_uuid'])
 82 |     
 83 |     desc_tokens = clip.tokenize(image_desc, truncate=True)
 84 |     desc_feats = model.encode_text(desc_tokens.to('cuda:0'))
 85 |     desc_feats /= desc_feats.norm(dim=-1, keepdim=True)
 86 |     desc_embeds = desc_feats.cpu().detach().numpy().astype('float32')
 87 |     #print("Done get embedding")
 88 | 
 89 |     image_search_result = dict()
 90 |     for src_name in IMAGE_DB_SOURCE_NAMES:
 91 |         D, I = image_db['image_indices'][src_name].search(desc_embeds, IMAGE_SEARCH_NUM)
 92 | 
 93 |         tmp_result = []
 94 |         for item_D, item_I in zip(D[0], I[0]):
 95 |             if src_name in ['redcaps12m', 'cc12m']:
 96 |                 target_mapper = image_mapper[src_name]
 97 | 
 98 |                 tmp_result.append({
 99 |                     'image_path_from_db': image_db['image_list'][src_name][item_I], #target_mapper[image_db['image_list'][src_name][item_I]],
100 |                     'clip_score': str(item_D)
101 |                 })
102 |             else:
103 |                 tmp_result.append({
104 |                     'image_path_from_db': image_db['image_list'][src_name][item_I],
105 |                     'clip_score': str(item_D)
106 |                 })
107 |         image_search_result[src_name] = tmp_result
108 | 
109 |     return image_search_result
110 | 
111 | def batch_images(dataset, persona_seed_num):
112 | 
113 |     target_dataset, non_target_dataset = [], []
114 |     for instance in tqdm(dataset, total=len(dataset)):
115 |         module = instance['image_alignment_module']
116 |         model_id = MODULE_MAPPER[module]
117 |         if model_id == 'image_db':
118 |             
119 |             target_dataset.append(instance)
120 |         else:
121 |             non_target_dataset.append(instance)
122 | 
123 |     final_dataset = []
124 |     for target_instance in tqdm(target_dataset, total=len(target_dataset)):
125 |         retrieved_results = retrieve_image(target_instance, persona_seed_num)
126 |         cp_instance = copy.deepcopy(target_instance)
127 |         cp_instance['db_searched_results'] = retrieved_results
128 |         final_dataset.append(cp_instance)
129 | 
130 |     return final_dataset + non_target_dataset
131 | 
132 | if __name__ == '__main__':
133 |     parser = argparse.ArgumentParser()
134 |     parser.add_argument('--start-idx', type=int)
135 |     parser.add_argument('--end-idx', type=int)
136 |     args = parser.parse_args()
137 | 
138 |     for persona_seed_num in range(args.start_idx, args.end_idx):
139 | 
140 |         dataset = load_json(f'curated_stark/planner-parsed-openai/stark_{persona_seed_num}.json')
141 |         generations = batch_images(dataset, persona_seed_num)
142 | 
143 |         data_save_path = 'curated_stark/plan-and-execute/image_db'
144 |         os.makedirs(data_save_path, exist_ok=True)
145 | 
146 |         with open(os.path.join(data_save_path, f'stark_{persona_seed_num}.json'), 'w', encoding='utf-8') as f:
147 |             json.dump(generations, f, ensure_ascii=False, indent='\t')
148 | 
149 | 


--------------------------------------------------------------------------------
/runner/face_runner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import copy
  4 | import random
  5 | from tqdm import tqdm
  6 | from glob import glob
  7 | 
  8 | from .base_runner import BaseRunner, console
  9 | from utils.etc_utils import load_jsonl, load_txt
 10 | 
 11 | 
 12 | SYSTEM_MESSAGE = """Given the profile information, your job is to generate a detailed description of a human that includes specific details such as background, tops, bottoms, hair, and shoes.
 13 | 
 14 | For example, 
 15 | 
 16 | Profile Information:
 17 | - Age: 32
 18 | - Gender: Woman
 19 | - Nationality: South Korea
 20 | 
 21 | Human Description: A full-body shot, an Asian adult female, fit, small road with trees, straight red above-chest hair, normal-length, white and long sleeve cotton shirt, short plaid skirt in pleated shape, cotton backpack, socks, black leather oxford shoes."""
 22 | 
 23 | STYLE_TYPES = [
 24 |     "full-body", "upper body", "portrait", "headshot", "nearly full-body"
 25 | ]
 26 | 
 27 | HUMAN_ATTR_KEY_ORDER = {
 28 |     'body shape': ['body shape'],
 29 |     'background': ['background'],
 30 |     'hair': ['hair style', 'hair color', 'hair length'],
 31 |     'special clothings': ['sleeve length', 'type'],
 32 |     'one-piece outfits': ['shoulder exposure level', 'length', 'collar shape', 'sleeve length', 'material', 'pattern', 'type'],
 33 |     'tops': ['graphic', 'color', 'collar shape', 'top length', 'sleeve length', 'material', 'pattern', 'type'],
 34 |     'coats': ['graphic', 'color', 'collar shape', 'coat length', 'material', 'pattern', 'type'],
 35 |     'bottoms': ['graphic', 'color', 'bottom shape', 'length', 'material', 'pattern', 'type'],
 36 |     'shoes': ['color', 'boots length', 'material', 'pattern', 'type'],
 37 |     'bags': ['material', 'type'],
 38 |     'hats': ['material', 'type'],
 39 |     'belts': ['material'],
 40 |     'scarf': ['material', 'pattern'],
 41 |     'headband': ['material', 'pattern'],
 42 |     'headscarf': ['material', 'pattern'],
 43 |     'veil': ['material', 'pattern'],
 44 |     'socks': ['material', 'pattern'],
 45 |     'ties': ['material', 'pattern']
 46 | }
 47 | 
 48 | class FaceRunner(BaseRunner):
 49 |     def __init__(self, args):
 50 |         super().__init__(args)
 51 | 
 52 |         self.save_dir = os.path.join(self.output_base_dir, 'face', f'persona_seed:{args.persona_seed_num}')
 53 |         os.makedirs(self.save_dir, exist_ok=True)
 54 | 
 55 |         self._load_human_attribute_pool()
 56 |         self._load_prompt_template()
 57 |         self.last_save_chunk_idx_file = os.path.join(self.save_dir, 'last_save_chunk_idx.txt')
 58 | 
 59 |     @property
 60 |     def system_msg(self):
 61 |         return SYSTEM_MESSAGE
 62 | 
 63 |     @property
 64 |     def prompt_prefix(self):
 65 |         return "face"
 66 | 
 67 |     def _load_prompt_template(self):
 68 |         self.face_template = load_txt('./templates/face.txt')
 69 | 
 70 |     def _load_human_attribute_pool(self):
 71 |         self.human_attribute_pool = load_jsonl(os.path.join('./datasets/cosmic/human_attribute_pool.jsonl'))
 72 | 
 73 |     def make_human_attribute_sentence(self, human_attr, age, gender, birthplace):
 74 |         style_type = random.sample(STYLE_TYPES, 1)[0]
 75 |         gender = gender.lower()
 76 | 
 77 |         template = f'A {style_type} shot, a {age}-years-old {gender} from {birthplace},' #-years-old 
 78 |         for category, attribute in human_attr.items():
 79 |             if category == 'face':
 80 |                 continue
 81 |             if category == 'overall-style':
 82 |                 continue
 83 |             
 84 |             if category == 'hair':
 85 |                 assert 'wears' not in attribute.keys()
 86 |             order_keys = HUMAN_ATTR_KEY_ORDER[category]
 87 |             for order_key in order_keys:
 88 |                 try:
 89 |                     template += ' {}'.format(attribute[order_key])
 90 |                 except KeyError as e:
 91 |                     continue
 92 |             #for k, v in attribute.items():
 93 |             #    template += f' {v}'
 94 |             
 95 |             if category == 'hair':
 96 |                 template += ' hair'
 97 |             
 98 |             if category == 'belts':
 99 |                 template += ' belt'
100 |             
101 |             if category == 'scarf':
102 |                 template += ' scarf'
103 | 
104 |             if category == 'headband':
105 |                 template += ' headband'
106 |             
107 |             if category == 'headscarf':
108 |                 template += ' headscarf'
109 |             if category == 'veil':
110 |                 template += ' veil'
111 |             if category == 'socks':
112 |                 template += ' socks'
113 |             if category == 'ties':
114 |                 template += ' tie'
115 |             
116 |             template += ','
117 | 
118 |         template = template[:-1] + '.'
119 |         
120 |         return template
121 | 
122 |     def prepare_prompt(self):
123 |         persona_sentence = load_jsonl(os.path.join(self.output_base_dir, 'persona-attr', f'final_output_{self.args.persona_seed_num}.jsonl'))
124 |         console.log('[{}] # of Total persona sentence: {}'.format(self.__class__.__name__, len(persona_sentence)))
125 | 
126 |         if self.args.debug:
127 |             persona_sentence = random.sample(persona_sentence, self.args.debug_sample_num)
128 |         
129 |         sample_num = len(persona_sentence)
130 |         sampled_human_attribute = random.sample(self.human_attribute_pool, sample_num)
131 |         assert len(persona_sentence) == len(sampled_human_attribute)
132 | 
133 |         prompts = []
134 |         for idx, instance in enumerate(tqdm(persona_sentence, total=len(persona_sentence))):
135 |             human_attr = sampled_human_attribute[idx]
136 |             
137 |             human_attr_sent = self.make_human_attribute_sentence(
138 |                 human_attr,
139 |                 instance['age'], instance['gender'], instance['birthplace']
140 |             )
141 | 
142 |             cp_instance = copy.deepcopy(instance)
143 |             cp_instance[f'{self.prompt_prefix}_prompt'] = human_attr_sent
144 |             
145 |             prompts.append(cp_instance)
146 |         
147 |         return prompts
148 | 
149 |     def parse_and_filter(self, generations):
150 |         self.dump_output(generations, os.path.join(self.save_dir, 'final_output.jsonl'))
151 |     
152 |     def _generate_album(self, prompts, prompt_prefix=None):
153 |         return self.interact(prompts, prompt_prefix=prompt_prefix)


--------------------------------------------------------------------------------
/runner/album_runner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import copy
  4 | import random
  5 | from tqdm import tqdm
  6 | from glob import glob
  7 | from collections import defaultdict
  8 | 
  9 | from .base_runner import BaseRunner, console
 10 | from utils.etc_utils import load_jsonl, load_txt
 11 | 
 12 | 
 13 | SYSTEM_MESSAGE = """Given the sentence related to a person's daily life, your task is to generate five image descriptions that could be stored on the person's mobile device, along with corresponding image categories. You should use the format "<image_description> (Category: <image_category>)". The image category may include selfies, past memories, screenshots, landmarks, animals, art, celebrities, nature, and food.
 14 | 
 15 | For example, 
 16 | 
 17 | My name is Tom. I am a 32-year-old man. I was born in the USA and currently reside there. I have a strong interest in basketball. I played basketball in middle school, but now I work as a chatbot developer at a startup. I enjoy watching the NBA because I love basketball.
 18 | 
 19 | Image descriptions stored on Tom's mobile device:
 20 | 1. A photo of a young Tom playing basketball in a middle school gymnasium (Category: Past Memory, Sport)
 21 | 2. A selfie of Tom smiling at the Golden State Warriors' arena during a game (Category: Selfie, Sport)
 22 | 3. A screenshot of chatbot development code using Python (Category: Screenshot, Computer, Software)
 23 | 4. A picture of Tom enjoying a night out with coworkers at a local pub (Category: Social Networking, Food, Drink)
 24 | 5. A photo of Tom meeting a famous NBA player at a basketball event (Category: Celebrity, Sport)"""
 25 | 
 26 | class AlbumRunner(BaseRunner):
 27 |     def __init__(self, args):
 28 |         super().__init__(args)
 29 | 
 30 |         self.save_dir = os.path.join(self.output_base_dir, 'mobile-device', f'persona_seed:{args.persona_seed_num}')
 31 |         os.makedirs(self.save_dir, exist_ok=True)
 32 | 
 33 |         self._load_prompt_template()
 34 |         self.last_save_chunk_idx_file = os.path.join(self.save_dir, 'last_save_chunk_idx.txt')
 35 | 
 36 |     @property
 37 |     def system_msg(self):
 38 |         return SYSTEM_MESSAGE
 39 | 
 40 |     @property
 41 |     def prompt_prefix(self):
 42 |         return "mobile-device"
 43 | 
 44 |     def _load_prompt_template(self):
 45 |         self.album_template = load_txt('./templates/mobile_device.txt')
 46 | 
 47 |     def prepare_prompt(self):
 48 |         
 49 |         event_graph = load_jsonl(os.path.join(self.output_base_dir, 'event-graph', f'persona_seed:{self.args.persona_seed_num}', 'final_output.jsonl'))
 50 |         console.log('[{}] # of Total event graph: {}'.format(self.__class__.__name__, len(event_graph)))
 51 | 
 52 |         prompts = []
 53 |         for instance in tqdm(event_graph, total=len(event_graph)):
 54 |             narrative = instance['narrative_generation']
 55 |             
 56 |             _prompt = self.album_template.format(
 57 |                 name=instance['name'],
 58 |                 #age=instance['demographic:age'],
 59 |                 #gender=instance['demographic:gender'],
 60 |                 #nationality=instance['demographic:nationality'],
 61 |                 sentence=instance['narrative_generation']
 62 |             )
 63 | 
 64 |             cp_instance = copy.deepcopy(instance)
 65 |             cp_instance[f'{self.prompt_prefix}_prompt'] = _prompt
 66 |             prompts.append(cp_instance)
 67 | 
 68 |         return prompts
 69 |     
 70 |     def parse_and_filter(self, generations):
 71 |         
 72 |         stat = defaultdict(int)
 73 |         stat['total_num'] = len(generations) * 5
 74 | 
 75 |         results = []
 76 |         regex_parsed_results, regex_discard_results = [], []
 77 |         for generation in tqdm(generations, total=len(generations)):
 78 |             parsed_results, discard_results = self._parse_mobile_device_generation(generation[f'{self.prompt_prefix}_generation'])
 79 |             if len(parsed_results) == 0:
 80 |                 for discard_result in discard_results:
 81 |                     cp_generation = copy.deepcopy(generation)
 82 |                     cp_generation['regex:discard_result'] = discard_result
 83 |                     regex_discard_results.append(cp_generation)
 84 |                 continue
 85 | 
 86 |             cp_instance = copy.deepcopy(generation)
 87 |             cp_instance[f'parsed_{self.prompt_prefix}_generation'] = parsed_results
 88 |             results.append(cp_instance)
 89 | 
 90 |             for parsed_result in parsed_results:
 91 |                 cp_generation = copy.deepcopy(generation)
 92 |                 for k, v in parsed_result.items():
 93 |                     cp_generation[f'{self.prompt_prefix}:{k}'] = parsed_result[k]
 94 |                 regex_parsed_results.append(cp_generation)
 95 |             
 96 |             for discard_result in discard_results:
 97 |                 cp_generation = copy.deepcopy(generation)
 98 |                 cp_generation['regex:discard_result'] = discard_result
 99 |                 regex_discard_results.append(cp_generation)
100 |         
101 |         stat['regex:parsed_result'] = len(regex_parsed_results)
102 |         stat['regex:discard_result'] = len(regex_discard_results)
103 | 
104 |         self.dump_output(results, os.path.join(self.save_dir, 'final_output.jsonl'))
105 |         self.dump_output(regex_parsed_results, os.path.join(self.save_dir, 'regex_parsed_output.jsonl'))
106 |         self.dump_output(regex_discard_results, os.path.join(self.save_dir, 'regex_discard_output.jsonl'))
107 |         self.dump_report(stat, os.path.join(self.save_dir, 'report_output.txt'))
108 | 
109 |     def _parse_mobile_device_generation(self, generation):
110 |         # First, split the generation based on the number prefix (e.g., 1., 2.)
111 |         delims = [f'\n{i}. ' for i in range(1, 6)] + [f'\n{i}.' for i in range(1, 6)]
112 |         splitted_generation = re.split('|'.join(delims), generation)
113 |         
114 |         # Second, extract the persona-related information using the regex pattern
115 |         pattern = '(?P<image_description>.*) [\(|\[]Category: (?P<image_category>.*)[\)|\]]' # [] case should be possible
116 |         compiled_regex = re.compile(pattern)
117 | 
118 |         parsed_results = []
119 |         discard = []
120 |         for generation in splitted_generation:
121 |             matched = compiled_regex.match(generation)
122 | 
123 |             if matched:
124 |                 parsed_results.append(matched.groupdict())
125 |             else:
126 |                 discard.append(generation)
127 | 
128 |         return parsed_results, discard


--------------------------------------------------------------------------------
/generate_face_image.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import copy
  4 | import random
  5 | import argparse
  6 | from tqdm import tqdm
  7 | 
  8 | import torch
  9 | from diffusers import (
 10 |     StableDiffusionXLPipeline, 
 11 |     StableDiffusionPipeline, 
 12 |     UNet2DConditionModel, 
 13 |     EulerDiscreteScheduler,
 14 |     PixArtAlphaPipeline,
 15 |     DiffusionPipeline,
 16 |     DDIMScheduler
 17 | )
 18 | from safetensors.torch import load_file
 19 | from huggingface_hub import hf_hub_download
 20 | 
 21 | 
 22 | IMAGE_DB_SOURCE_NAMES = ["coco_train2017", "gqa", "ocr_vqa", "textvqa", "vg"]
 23 | IMAGE_SEARCH_NUM = 5
 24 | 
 25 | STYLE_TYPES = [
 26 |     "full-body", "upper body", "portrait", "headshot", "nearly full-body"
 27 | ]
 28 | 
 29 | HUMAN_ATTR_KEY_ORDER = {
 30 |     'body shape': ['body shape'],
 31 |     'background': ['background'],
 32 |     'hair': ['hair style', 'hair color', 'hair length'],
 33 |     'special clothings': ['sleeve length', 'type'],
 34 |     'one-piece outfits': ['shoulder exposure level', 'length', 'collar shape', 'sleeve length', 'material', 'pattern', 'type'],
 35 |     'tops': ['graphic', 'color', 'collar shape', 'top length', 'sleeve length', 'material', 'pattern', 'type'],
 36 |     'coats': ['graphic', 'color', 'collar shape', 'coat length', 'material', 'pattern', 'type'],
 37 |     'bottoms': ['graphic', 'color', 'bottom shape', 'length', 'material', 'pattern', 'type'],
 38 |     'shoes': ['color', 'boots length', 'material', 'pattern', 'type'],
 39 |     'bags': ['material', 'type'],
 40 |     'hats': ['material', 'type'],
 41 |     'belts': ['material'],
 42 |     'scarf': ['material', 'pattern'],
 43 |     'headband': ['material', 'pattern'],
 44 |     'headscarf': ['material', 'pattern'],
 45 |     'veil': ['material', 'pattern'],
 46 |     'socks': ['material', 'pattern'],
 47 |     'ties': ['material', 'pattern']
 48 | }
 49 | 
 50 | 
 51 | def load_json(datadir):
 52 |     with open(datadir, 'r', encoding='utf-8') as f:
 53 |         return json.load(f)
 54 | 
 55 | def load_jsonl(datadir: str):
 56 |     output = []
 57 |     with open(datadir, 'r', encoding='utf-8') as f:
 58 |         for line in f.readlines():
 59 |             output.append(json.loads(line))
 60 |     return output
 61 | 
 62 | def dump_json_output(outputs, file_name):
 63 |     with open(file_name, 'w', encoding='utf-8') as f:
 64 |         json.dump(outputs, f, ensure_ascii=False, indent='\t')
 65 | 
 66 | def dump_jsonl_output(outputs, file_name=None):
 67 |     f = open(file_name, 'w', encoding='utf-8') 
 68 |     for output in outputs:
 69 |         f.write(json.dumps(output) + '\n')
 70 |     f.close()
 71 | 
 72 | def make_human_attribute_sentence(human_attr, age, gender, birthplace):
 73 |     style_type = random.choice(STYLE_TYPES)
 74 |     gender = gender.lower()
 75 |     template_parts = [f'A {style_type} shot, a {age}-years-old {gender} from {birthplace},']
 76 |     
 77 |     excluded_categories = {'face', 'overall-style'}
 78 |     suffix_map = {
 79 |         'hair': ' hair',
 80 |         'belts': ' belt',
 81 |         'scarf': ' scarf',
 82 |         'headband': ' headband',
 83 |         'headscarf': ' headscarf',
 84 |         'veil': ' veil',
 85 |         'socks': ' socks',
 86 |         'ties': ' tie'
 87 |     }
 88 |     
 89 |     for category, attribute in human_attr.items():
 90 |         if category in excluded_categories:
 91 |             continue
 92 |         
 93 |         if category == 'hair' and 'wears' in attribute:
 94 |             continue
 95 |         
 96 |         order_keys = HUMAN_ATTR_KEY_ORDER.get(category, [])
 97 |         for order_key in order_keys:
 98 |             attr_value = attribute.get(order_key)
 99 |             if attr_value:
100 |                 template_parts.append(f' {attr_value}')
101 |         
102 |         suffix = suffix_map.get(category)
103 |         if suffix:
104 |             template_parts[-1] += suffix
105 |         
106 |         template_parts[-1] += ','
107 | 
108 |     template = ''.join(template_parts).rstrip(',') + '.'
109 |     
110 |     return template
111 | 
112 | def load_human_attribute_pool():
113 |     return load_jsonl(os.path.join('./datasets/cosmic/human_attribute_pool.jsonl'))
114 | 
115 | def load_sdxl_diffusion_model(cache_dir: str, device: str):
116 |     base = "stabilityai/stable-diffusion-xl-base-1.0"
117 |     repo = "ByteDance/SDXL-Lightning"
118 |     ckpt = "sdxl_lightning_8step_unet.safetensors"
119 | 
120 |     unet = UNet2DConditionModel.from_config(base, subfolder="unet").to(device, torch.float16)
121 |     unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device))
122 |     pipe = StableDiffusionXLPipeline.from_pretrained(
123 |         base, unet=unet, torch_dtype=torch.float16, cache_dir=cache_dir, variant="fp16"
124 |     ).to(device)
125 | 
126 |     pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
127 |     return pipe
128 | 
129 | def process_face(stark, human_attribute_pool, pipeline, persona_seed_num, save_dir):
130 |     total_id_keys = list(set([ele['unique_id'] for ele in stark]))
131 |     total_id_num = len(total_id_keys)
132 |     sampled_human_attribute = random.sample(human_attribute_pool, total_id_num)
133 |     id2human_attr = {key: sampled_human_attribute[i] for i, key in enumerate(total_id_keys)}
134 | 
135 |     final_results = []
136 |     for idx, instance in enumerate(tqdm(stark, total=len(stark))):
137 |         uuid = instance['unique_id']
138 |         age = instance['age']
139 |         gender = instance['gender']
140 |         birthplace = instance['birthplace']
141 | 
142 |         human_attr = id2human_attr[uuid]
143 |         human_attr_sent = make_human_attribute_sentence(human_attr, age, gender, birthplace)
144 | 
145 |         face_image = pipeline(human_attr_sent, num_inference_steps=8, guidance_scale=0).images[0]
146 |         face_image.save(os.path.join(save_dir, f'{uuid}.png'))
147 |         
148 |         cp_instance = copy.deepcopy(instance)
149 |         cp_instance['face_description'] = human_attr_sent
150 |         cp_instance['face_image_path'] = os.path.join(save_dir, f'{uuid}.png')
151 |         final_results.append(cp_instance)
152 |     
153 |     return final_results
154 | 
155 | if __name__ == '__main__':
156 | 
157 |     parser = argparse.ArgumentParser()
158 |     parser.add_argument('--start-idx', type=int)
159 |     parser.add_argument('--end-idx', type=int)
160 |     parser.add_argument('--device', type=str)
161 |     args = parser.parse_args()
162 | 
163 |     human_attribute_pool = load_human_attribute_pool()
164 |     pipeline = load_sdxl_diffusion_model('./pretrained_diffusion_model', args.device)
165 |     
166 |     for persona_seed_num in range(args.start_idx, args.end_idx):
167 |         stark = load_json(os.path.join(f'./Stark/post-process/stark_{persona_seed_num}.json'))
168 |         
169 |         save_dir = f'generated_image/human-face/stark_{persona_seed_num}'
170 |         os.makedirs(save_dir, exist_ok=True)
171 | 
172 |         processed_results = process_face(stark, human_attribute_pool, pipeline, persona_seed_num, save_dir)
173 |         
174 |         curated_save_dir = f'curated_stark/human-face'
175 |         os.makedirs(curated_save_dir, exist_ok=True)
176 | 
177 |         dump_json_output(processed_results, os.path.join(curated_save_dir, f'stark_{persona_seed_num}.json'))


--------------------------------------------------------------------------------
/execute_photomaker.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import gc
  3 | import re
  4 | import json
  5 | import copy
  6 | import time
  7 | import torch
  8 | import random
  9 | import argparse
 10 | import warnings
 11 | from tqdm import tqdm
 12 | import pandas as pd
 13 | from pathlib import Path
 14 | import concurrent.futures
 15 | 
 16 | import torch
 17 | from accelerate import PartialState
 18 | from diffusers import (
 19 |     StableDiffusionXLPipeline, 
 20 |     StableDiffusionPipeline, 
 21 |     UNet2DConditionModel, 
 22 |     EulerDiscreteScheduler,
 23 |     PixArtAlphaPipeline,
 24 |     DiffusionPipeline,
 25 |     DDIMScheduler
 26 | )
 27 | from diffusers.utils import load_image
 28 | from safetensors.torch import load_file
 29 | from huggingface_hub import hf_hub_download
 30 | from accelerate.utils import gather_object
 31 | from photomaker import PhotoMakerStableDiffusionXLPipeline
 32 | 
 33 | 
 34 | 
 35 | # Suppress warnings
 36 | warnings.filterwarnings("ignore")
 37 | 
 38 | MODULE_MAPPER = {
 39 |     't2i': 'sdxl-lightning',
 40 |     'p-t2i': 'photomaker',
 41 |     'web': 'bing',
 42 |     'retrieval': 'image_db'
 43 | }
 44 | 
 45 | def load_json(path):
 46 |     with open(path, 'r', encoding='utf-8') as f:
 47 |         return json.load(f)
 48 | 
 49 | def load_sdxl_model(cache_dir: str, device: str):
 50 |     """Load the SDXL Lightning diffusion model."""
 51 |     base = "stabilityai/stable-diffusion-xl-base-1.0"
 52 |     repo = "ByteDance/SDXL-Lightning"
 53 |     ckpt = "sdxl_lightning_8step_unet.safetensors"
 54 |     unet = UNet2DConditionModel.from_config(base, subfolder="unet").to(device, torch.float16)
 55 |     unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device))
 56 |     pipe = StableDiffusionXLPipeline.from_pretrained(
 57 |         base, unet=unet, torch_dtype=torch.float16, cache_dir=cache_dir, variant="fp16"
 58 |     )
 59 |     pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
 60 |     return pipe.to(device)
 61 | 
 62 | distributed_state = PartialState()
 63 | 
 64 | def memory_optimization():
 65 |     # memory deallocation
 66 |     gc.collect()
 67 | 
 68 |     # removing cache
 69 |     torch.cuda.empty_cache()
 70 | 
 71 | def load_photomaker_model(cache_dir: str):
 72 |     base_model_path = 'SG161222/RealVisXL_V3.0'
 73 |         
 74 |     photomaker_ckpt = hf_hub_download(repo_id="TencentARC/PhotoMaker", cache_dir=cache_dir, filename="photomaker-v1.bin", repo_type="model")
 75 |     
 76 |     pipe = PhotoMakerStableDiffusionXLPipeline.from_pretrained(
 77 |         base_model_path,
 78 |         torch_dtype=torch.bfloat16,
 79 |         use_safetensors=True,
 80 |         variant="fp16",
 81 |     ).to(distributed_state.device)
 82 | 
 83 |     pipe.load_photomaker_adapter(
 84 |         os.path.dirname(photomaker_ckpt),
 85 |         subfolder="",
 86 |         weight_name=os.path.basename(photomaker_ckpt),
 87 |         trigger_word="img"
 88 |     )
 89 |     pipe.id_encoder.to(distributed_state.device) #device)
 90 |     
 91 |     pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
 92 |     pipe.fuse_lora()
 93 | 
 94 |     return pipe
 95 | 
 96 | cache_dir = './pretrained_diffusion_model'
 97 | 
 98 | parser = argparse.ArgumentParser()
 99 | parser.add_argument('--start-idx', type=int)
100 | parser.add_argument('--end-idx', type=int)
101 | args = parser.parse_args()
102 | 
103 | model = load_photomaker_model(cache_dir) 
104 | 
105 | negative_prompt = "nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry"
106 | generator = torch.Generator(device=distributed_state.device).manual_seed(42)
107 | 
108 | @torch.inference_mode()
109 | def generate_image(target_instance, SAVE_PATH):
110 |  
111 |     target_image_uuid = target_instance['image_uuid']
112 | 
113 |     
114 |     num_steps = 50
115 |     style_strength_ratio = 20
116 |     start_merge_step = int(float(style_strength_ratio) / 100 * num_steps)
117 |     if start_merge_step > 30:
118 |         start_merge_step = 30
119 |     
120 |     input_id_path = target_instance['face_image_path']
121 |     input_id_images = [load_image(input_id_path)]
122 |     images = model(
123 |         prompt=target_instance['modified_image_description'],
124 |         input_id_images=input_id_images,
125 |         negative_prompt=negative_prompt,
126 |         num_images_per_prompt=1,
127 |         num_inference_steps=num_steps,
128 |         start_merge_step=start_merge_step,
129 |         generator=generator,
130 |         #guidance_scale=5,
131 |     ).images
132 | 
133 |     save_paths = []
134 |     for idx, image in enumerate(images):
135 |         save_paths.append(os.path.join(SAVE_PATH, f'{idx}:{target_image_uuid}.png'))
136 |         image.save(os.path.join(SAVE_PATH, f'{idx}:{target_image_uuid}.png'))
137 | 
138 |     cp_instance = copy.deepcopy(target_instance)
139 |     cp_instance['image_save_paths'] = save_paths
140 |     return cp_instance
141 | 
142 | 
143 | def batch_images(dataset, SAVE_PATH):
144 | 
145 |     target_dataset, non_target_dataset = [], []
146 |     for instance in tqdm(dataset, total=len(dataset)):
147 |         module = instance['image_alignment_module']
148 |         model_id = MODULE_MAPPER[module]
149 |         
150 |         if model_id == 'photomaker':
151 |             if os.path.exists(os.path.join(SAVE_PATH, '0:{}.png'.format(instance['image_uuid']))):
152 |                 non_target_dataset.append(instance)
153 |                 continue
154 |             target_dataset.append(instance)
155 |         else:
156 |             non_target_dataset.append(instance)
157 |     
158 |     print('# of total dataset:', len(dataset))
159 |     print('# of target dataset:', len(target_dataset))
160 |     print('# of non-target dataset:', len(non_target_dataset))
161 | 
162 |     completions_per_process = []
163 |     with distributed_state.split_between_processes(target_dataset) as batched_prompts:
164 |         for batch in tqdm(batched_prompts, total=len(batched_prompts)):
165 |             
166 |             memory_optimization()
167 |             result = generate_image(batch, SAVE_PATH)
168 |             completions_per_process.append(result)
169 | 
170 |     completions_gather = gather_object(completions_per_process)
171 |     completions = completions_gather[: len(target_dataset)]
172 | 
173 |     memory_optimization()
174 | 
175 |     print('# of final dataset:', len(completions) + len(non_target_dataset))
176 |     return completions + non_target_dataset
177 | 
178 | if __name__ == '__main__':
179 | 
180 |     for persona_seed_num in range(args.start_idx, args.end_idx):
181 |         dataset = load_json(f'curated_stark/planner-parsed-openai/stark_{persona_seed_num}.json')
182 |         SAVE_PATH = f'generated_image/plan-and-execute/generator/stark_{persona_seed_num}'
183 |         os.makedirs(SAVE_PATH, exist_ok=True)
184 |         
185 |         generations = batch_images(dataset, SAVE_PATH)
186 | 
187 |         data_save_path = 'curated_stark/plan-and-execute/generator'
188 |         os.makedirs(data_save_path, exist_ok=True)
189 | 
190 |         with open(os.path.join(data_save_path, f'stark_{persona_seed_num}.json'), 'w', encoding='utf-8') as f:
191 |             json.dump(generations, f, ensure_ascii=False, indent='\t')
192 | 


--------------------------------------------------------------------------------
/scripts/run_mcu.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | 
  4 | python generate_sonny_dataset.py --run-id sonny_v1 \
  5 |    --diffusion-model-id jialuliluka/selma-xl \
  6 |    --runner-name image \
  7 |    --cache-dir pretrained_diffusion_model \
  8 |    --model gpt-3.5-turbo-0125	\
  9 |    --persona-seed-num demo
 10 | 
 11 | stabilityai/stable-diffusion-xl-base-1.0 \
 12 | 
 13 | python generate_sonny_dataset.py --run-id sonny_v4 \
 14 |    --model gpt-3.5-turbo-0125	\
 15 |    --temperature 0.9 \
 16 |    --top-p 1.0 \
 17 |    --frequency-penalty .0 \
 18 |    --presence-penalty 0. \
 19 |    --max-tokens 4096 \
 20 |    --runner-name dialogue \
 21 |    --persona-seed-num 0 \
 22 |    --do-parse-filter
 23 | 
 24 |    --debug \
 25 |    --debug-sample-num 10 \
 26 |    --shard-num 1 \
 27 |    
 28 | python generate_sonny_dataset.py --run-id sonny_v4 \
 29 |    --model gpt-3.5-turbo-0125	\
 30 |    --temperature 0.9 \
 31 |    --top-p 1.0 \
 32 |    --frequency-penalty 0.4 \
 33 |    --presence-penalty 0.4 \
 34 |    --max-tokens 1024 \
 35 |    --runner-name face-image \
 36 |    --debug \
 37 |    --debug-sample-num 100 \
 38 |    --shard-num 1 \
 39 |    --persona-seed-num 10 \
 40 |    --do-parse-filter
 41 | 
 42 | python generate_sonny_dataset.py --run-id sonny_v2 \
 43 |    --diffusion-model-id jialuliluka/selma-xl \
 44 |    --runner-name album-image \
 45 |    --cache-dir pretrained_diffusion_model \
 46 |    --model gpt-3.5-turbo-0125 \
 47 |    --do-parse-filter
 48 | 
 49 | 
 50 | 
 51 | 
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | python generate_stark_dialogue.py --run-id stark_v1 \
 58 |     --model gpt-3.5-turbo-0125	\
 59 |     --temperature 0.9 \
 60 |     --top-p 1.0 \
 61 |     --frequency-penalty 0.0 \
 62 |     --presence-penalty 0.0 \
 63 |     --max-tokens 2048 \
 64 |     --runner-name persona-attr \
 65 |     --shard-num 1 \
 66 |     --debug \
 67 |     --debug-sample-num 10 
 68 |     
 69 | python generate_stark_dialogue.py --run-id stark_v1 \
 70 |     --model gpt-3.5-turbo-0125	\
 71 |     --temperature 0.9 \
 72 |     --top-p 1.0 \
 73 |     --frequency-penalty 0.0 \
 74 |     --presence-penalty 0.0 \
 75 |     --max-tokens 2048 \
 76 |     --runner-name persona-attr \
 77 |     --shard-num 1 \
 78 |     --do-parse-filter
 79 |     
 80 | 
 81 | python generate_stark_dialogue.py --run-id stark_v1 \
 82 |     --model gpt-3.5-turbo-0125	\
 83 |     --temperature 0.9 \
 84 |     --top-p 1.0 \
 85 |     --frequency-penalty 0.0 \
 86 |     --presence-penalty 0.0 \
 87 |     --max-tokens 2048 \
 88 |     --runner-name face \
 89 |     --persona-seed-num 0 \
 90 |     --shard-num 1
 91 | 
 92 | python generate_stark_dialogue.py --run-id stark_v1 \
 93 |     --model gpt-3.5-turbo-0125	\
 94 |     --temperature 0.9 \
 95 |     --top-p 1.0 \
 96 |     --frequency-penalty 0.0 \
 97 |     --presence-penalty 0.0 \
 98 |     --max-tokens 2048 \
 99 |     --runner-name face \
100 |     --persona-seed-num 0 \
101 |     --shard-num 1 \
102 |     --do-parse-filter
103 | 
104 | 
105 | 
106 | python generate_stark_dialogue.py --run-id stark_v1 \
107 |    --model gpt-3.5-turbo-0125	\
108 |    --temperature 0.9 \
109 |    --top-p 1.0 \
110 |    --frequency-penalty .0 \
111 |    --presence-penalty .0 \
112 |    --max-tokens 1024 \
113 |    --runner-name commonsense \
114 |    --persona-seed-num 0 \
115 |    --debug \
116 |    --debug-sample-num 20
117 | 
118 | python generate_stark_dialogue.py --run-id stark_v1 \
119 |    --model gpt-3.5-turbo-0125	\
120 |    --temperature 0.9 \
121 |    --top-p 1.0 \
122 |    --frequency-penalty .0 \
123 |    --presence-penalty .0 \
124 |    --max-tokens 1024 \
125 |    --runner-name commonsense \
126 |    --persona-seed-num 0 \
127 |    --do-parse-filter
128 | 
129 | 
130 | 
131 | 
132 | python generate_stark_dialogue.py --run-id stark_v1 \
133 |    --model gpt-3.5-turbo-0125 \
134 |    --temperature 0.9 \
135 |    --top-p 0.95 \
136 |    --frequency-penalty 1.0 \
137 |    --presence-penalty 0.6 \
138 |    --max-tokens 2048 \
139 |    --runner-name narrative \
140 |    --debug \
141 |    --debug-sample-num 20 \
142 |    --persona-seed-num 0
143 | 
144 | python generate_stark_dialogue.py --run-id stark_v1 \
145 |    --model gpt-3.5-turbo-0125 \
146 |    --temperature 0.9 \
147 |    --top-p 0.95 \
148 |    --frequency-penalty 1.0 \
149 |    --presence-penalty 0.6 \
150 |    --max-tokens 2048 \
151 |    --runner-name narrative \
152 |    --debug \
153 |    --debug-sample-num 20 \
154 |    --persona-seed-num 0 \
155 |    --do-parse-filter
156 | 
157 | 
158 | 
159 | python generate_stark_dialogue.py --run-id stark_v1 \
160 |    --model gpt-3.5-turbo-0125 \
161 |    --temperature 0.9 \
162 |    --top-p 1.0 \
163 |    --frequency-penalty 0. \
164 |    --presence-penalty 0. \
165 |    --max-tokens 4096 \
166 |    --runner-name event \
167 |    --debug \
168 |    --debug-sample-num 20 \
169 |    --persona-seed-num 0
170 | 
171 | python generate_stark_dialogue.py --run-id stark_v1 \
172 |    --model gpt-3.5-turbo-0125 \
173 |    --temperature 0.9 \
174 |    --top-p 1.0 \
175 |    --frequency-penalty 0. \
176 |    --presence-penalty 0. \
177 |    --max-tokens 4096 \
178 |    --runner-name event \
179 |    --debug \
180 |    --debug-sample-num 20 \
181 |    --persona-seed-num 0 \
182 |    --do-parse-filter
183 | 
184 | 
185 | 
186 | 
187 | python generate_stark_dialogue.py --run-id stark_v1 \
188 |     --model gpt-3.5-turbo-0125	\
189 |     --temperature 0.9 \
190 |     --top-p 1.0 \
191 |     --frequency-penalty 0.0 \
192 |     --presence-penalty 0.0 \
193 |     --max-tokens 1024 \
194 |     --runner-name album \
195 |     --debug \
196 |     --debug-sample-num 20 \
197 |     --persona-seed-num 0 
198 | 
199 | python generate_stark_dialogue.py --run-id stark_v1 \
200 |     --model gpt-3.5-turbo-0125	\
201 |     --temperature 0.9 \
202 |     --top-p 1.0 \
203 |     --frequency-penalty 0.0 \
204 |     --presence-penalty 0.0 \
205 |     --max-tokens 1024 \
206 |     --runner-name album \
207 |     --debug \
208 |     --debug-sample-num 20 \
209 |     --persona-seed-num 0 \
210 |     --do-parse-filter
211 | 
212 | 
213 | for session_num in {1..6}; do
214 |    python generate_stark_dialogue.py --run-id stark_v1 \
215 |       --model gpt-3.5-turbo-0125	\
216 |       --temperature 0.9 \
217 |       --top-p 1.0 \
218 |       --frequency-penalty 0.0 \
219 |       --presence-penalty 0.0 \
220 |       --max-tokens 4096 \
221 |       --runner-name dialogue \
222 |       --debug \
223 |       --debug-sample-num 20 \
224 |       --persona-seed-num 0 \
225 |       --target-session-num "$session_num" \
226 |       
227 |    python generate_stark_dialogue.py --run-id stark_v1 \
228 |       --model gpt-3.5-turbo-0125	\
229 |       --temperature 0.9 \
230 |       --top-p 1.0 \
231 |       --frequency-penalty 0.0 \
232 |       --presence-penalty 0.0 \
233 |       --max-tokens 4096 \
234 |       --runner-name dialogue \
235 |       --debug \
236 |       --debug-sample-num 20 \
237 |       --persona-seed-num 0 \
238 |       --target-session-num "$session_num" \
239 |       --do-parse-filter
240 | done
241 | 
242 | python make_final_dataset.py
243 | 
244 | python postprocess_final_dataset.py
245 | 
246 | python generate_face_image.py \
247 |     --start-idx 0 \
248 |     --end-idx 1 \
249 |     --device cuda:0
250 | 
251 | python plan_runner.py \
252 |    --start-idx 0 \
253 |    --end-idx 1
254 | 
255 | python plan_runner.py \
256 |    --start-idx 0 \
257 |    --end-idx 1 \
258 |    --do-planner
259 | 
260 | python execute_photomaker.py \
261 |    --start-idx 0 \
262 |    --end-idx 1 
263 | 
264 | python execute_sdxl.py \
265 |    --start-idx 0 \
266 |    --end-idx 1 
267 | 
268 | python execute_retrieval.py \
269 |    --start-idx 0 \
270 |    --end-idx 1 
271 | 
272 | python execute_web_search.py \
273 |    --start-idx 0 \
274 |    --end-idx 1 


--------------------------------------------------------------------------------
/runner/event_runner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import random
  4 | from tqdm import tqdm
  5 | from collections import defaultdict
  6 | from glob import glob
  7 | 
  8 | from .base_runner import BaseRunner, console
  9 | from utils.etc_utils import load_jsonl, load_txt
 10 | 
 11 | 
 12 | SYSTEM_MESSAGE = """You should generate a temporal event graph composed of daily events occuring in a person's life. The temporal event graph contains nodes and edges. Each node represents a daily event which is written in two or three sentences. Each edge represents the casual relationship between two nodes (events), i.e., a past event -> current event. The current event is determined by how much time has passed since the past event and what personal experiences were had during that period. You must generate the temporal event graph following the guidelines below.
 13 | 
 14 | [Guideline]
 15 | - The graph is represented in the form of a json list.
 16 | - Each entry is a python dictionary containing the following keys: "id", "event", "date", "caused_by".
 17 | - The "id" field contains a unique identifier for the current event.
 18 | - The "event" field contains a description of the current event.
 19 | - The "date" field contains a specific date of the current event and is represented in the form of "%Y.%m.%d".
 20 | - The "caused_by" field represents the edge (i.e., a past event) and is represented in the form of a python dictionary containing the following keys: "caused_by:id", "caused_by:time_interval", "caused_by:experience_op", "caused_by:experience".
 21 | - The "caused_by:id" field contains an "id" of the past event that has caused the current event.
 22 | - The "caused_by:time_interval" field contains a time interval between the past event and the current event.
 23 | - The "caused_by:experience_op" field contains an episodic experience operation.
 24 | - The "caused_by:experience" field contains a short description of the added or updated episodic experience.
 25 | - The unit of time interval is ["hour", "day", "week", "month", "year"].
 26 | - The selected time interval should be formatted as "<base number> <time interval unit>".
 27 | - List of the episodic experience operation is ["add", "update"].
 28 | - The "add" operation refers to an operation that adds a new experience that have not been encountered in the past.
 29 | - The "update" operation refers to an operation that updates a past experience with a new experience.
 30 | - Events/Experiences can be positive or negative events or experiences.
 31 | - Events in the "caused_by:id" field should occur on dates before the current event that they have caused.
 32 | - If there is no entry of "caused_by" field, then you should generate an empty dictionary.
 33 | - Each event must be written in the present tense.
 34 | - The year in the "date" field must be until April 2024. 
 35 | - You should generate the temporal event graph based on commonsense or a world model."""
 36 | 
 37 | 
 38 | class EventRunner(BaseRunner):
 39 |     def __init__(self, args):
 40 |         super().__init__(args)
 41 |         
 42 |         self.save_dir = os.path.join(self.output_base_dir, 'event-graph', f'persona_seed:{args.persona_seed_num}')
 43 |         os.makedirs(self.save_dir, exist_ok=True)
 44 |         
 45 |         self._load_prompt_template()
 46 |         self.last_save_chunk_idx_file = os.path.join(self.save_dir, 'last_save_chunk_idx.txt')
 47 |     
 48 |     @property
 49 |     def system_msg(self):
 50 |         return SYSTEM_MESSAGE
 51 | 
 52 |     @property
 53 |     def prompt_prefix(self):
 54 |         return "event-graph"
 55 | 
 56 |     def _load_prompt_template(self):
 57 |         self.event_template = load_txt('./templates/multi_modal_event.txt') 
 58 | 
 59 |     def _load_narrative(self):
 60 |         return load_jsonl(os.path.join(self.output_base_dir, 'album', 'final_output.jsonl'))
 61 |     
 62 |     def prepare_prompt(self):
 63 |         narrative = load_jsonl(os.path.join(self.output_base_dir, 'narrative', f'persona_seed:{self.args.persona_seed_num}', 'final_output.jsonl'))
 64 |         console.log('[{}] # of Total personal narrative: {}'.format(self.__class__.__name__, len(narrative)))
 65 | 
 66 |         if self.args.debug:
 67 |             
 68 |             narrative = random.sample(narrative, self.args.debug_sample_num)
 69 |         
 70 |         prompts = []
 71 |         for instance in tqdm(narrative, total=len(narrative)):
 72 |             
 73 |             _prompt = self.event_template.format(
 74 |                 event=instance['narrative_generation'],
 75 |                 name=instance['name'],
 76 |             )
 77 | 
 78 |             cp_instance = copy.deepcopy(instance)
 79 |             cp_instance[f'{self.prompt_prefix}_prompt'] = _prompt
 80 | 
 81 |             prompts.append(cp_instance)
 82 |         
 83 |         return prompts
 84 | 
 85 |     def check_json_key_exist(self, parsed_event):
 86 |         for instance in parsed_event:
 87 |             
 88 |             for key in ['id', 'event', 'date', 'caused_by']:
 89 |                 if key not in instance.keys():
 90 |                     return False
 91 | 
 92 |             if instance['id'] != 1:
 93 |                 for key in ['id', 'time_interval', 'experience_op', 'experience']:
 94 |                     if f'caused_by:{key}' not in instance['caused_by'].keys():
 95 |                         return False
 96 | 
 97 |         return True
 98 | 
 99 |     def parse_and_filter(self, generations):
100 |         results, fail_results = [], []
101 |         stat = defaultdict(int)
102 |         error = []
103 |         
104 |         for generation in tqdm(generations, total=len(generations)):
105 |             try:
106 |                 parsed_event = eval(self._parse_json_list(generation[f'{self.prompt_prefix}_generation']))
107 |             except (SyntaxError, TypeError, NameError) as e:
108 |                 console.log(self._parse_json_list(generation[f'{self.prompt_prefix}_generation']), style='warning')
109 |                 stat[f'{self.prompt_prefix}_syntax:fail'] += 1
110 |                 error.append(str(e))
111 |                 continue
112 |  
113 |             if not self.check_json_key_exist(parsed_event):
114 |                 stat[f'{self.prompt_prefix}_key_existence:fail'] += 1
115 |                 fail_results.append(parsed_event)
116 |                 continue
117 |             
118 |             #replaced_event = self._replace_wrong_key(parsed_event)
119 | 
120 |             cp_generation = copy.deepcopy(generation)
121 |             cp_generation[f'parsed_{self.prompt_prefix}_generation'] = parsed_event
122 |             results.append(cp_generation)
123 | 
124 |         stat['event:total'] = len(generations)
125 | 
126 |         self.dump_output(results, os.path.join(self.save_dir, 'final_output.jsonl'))
127 |         self.dump_report(stat, os.path.join(self.save_dir, 'report.txt'))
128 |         self.dump_error_message(error, os.path.join(self.save_dir, 'error_message.json'))
129 |         self.dump_json_output(fail_results, os.path.join(self.save_dir, 'fail_output.json'))
130 | 
131 |     def _replace_caused_by_key(self, instance, target_key=None):
132 |         if target_key in instance['caused_by']:
133 |             postfix = target_key.split('caused_by_')[-1]
134 |             instance['caused_by'][f'caused_by:{postfix}'] = instance['caused_by'].pop(target_key)
135 |         
136 |         return instance
137 | 
138 |     def _replace_wrong_key(self, parsed_event):
139 |         replaced_event = []
140 |         for instance in parsed_event:
141 |             
142 |             if 'caused_by:' in instance:
143 |                 instance['caused_by'] = instance['caused_by:']
144 | 
145 |             if 'caused_by_id' in instance['caused_by']:
146 |                 for target_key in ['caused_by_id', 'caused_by_time_interval', 'caused_by_experience_op', 'caused_by_experience']:
147 |                     instance = self._replace_caused_by_key(instance, target_key)
148 |             
149 |             replaced_event.append({
150 |                 'id': instance['id'],
151 |                 'event': instance['event'],
152 |                 'date': instance['date'],
153 |                 'caused_by': instance['caused_by'],
154 |                 #'mobile_device':instance['mobile_device']
155 |             })
156 |         return replaced_event
157 | 
158 |     def _generate_event_graph(self, prompts, prompt_prefix=None):
159 |         return self.interact(prompts, prompt_prefix=prompt_prefix)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Stark: Social Long-Term Multi-Modal Conversation with Persona Commonsense Knowledge
  2 | 
  3 | [🏠 Homepage](https://stark-dataset.github.io/) | [📄 Arxiv](https://arxiv.org/abs/2407.03958) | [📕 PDF](https://arxiv.org/pdf/2407.03958)
  4 | 
  5 | ## List of Provided Model Series
  6 | - **Ultron-Summarizer-Series:** [🤖 Ultron-Summarizer-1B](https://huggingface.co/passing2961/Ultron-Summarizer-1B) | [🤖 Ultron-Summarizer-3B](https://huggingface.co/passing2961/Ultron-Summarizer-3B) | [🤖 Ultron-Summarizer-8B](https://huggingface.co/passing2961/Ultron-Summarizer-8B)
  7 | - **Ultron-11B**: [🤖 Ultron-11B](https://huggingface.co/passing2961/Ultron-11B)
  8 | 
  9 | > 🚨 Disclaimer: All models and datasets are intended for research purposes only.
 10 | 
 11 | ## 📰 News
 12 | - Stark dataset has been released in 🤗 [Huggingface Datasets](https://huggingface.co/collections/passing2961/stark-6729a40126cec81fa24cd09d).
 13 | - Ultron-11B has been released in 🤗 [Huggingface Models](https://huggingface.co/collections/passing2961/thanos-6711f7a74227c6088d5d88f8).
 14 | - The code for fine-tuning and MCU pipeline will be released soon! Stay tuned!
 15 | 
 16 | ## Stark Dataset
 17 | 
 18 | **Stark** is a publicly available, large-scale, long-term multi-modal conversation dataset that encompasses a diverse range of social personas, multi-modality formats, time intervals, and images. To automatically construct Stark, we introduce a novel multi-modal contextualization framework, **MCU**, which generates long-term multi-modal dialogues distilled from ChatGPT and our proposed **Plan-and-Execute Image Aligner**. An overview of MCU and an example from Stark are illustrated below.
 19 | 
 20 | ![MCU Pipeline](assets/stark_mcu_overview.PNG)
 21 | 
 22 | The **Stark** dataset contains many images that originate from different sources, including a personalized text-to-image generator (i.e., [PhotoMaker](https://photo-maker.github.io/)), [Bing Search](https://pypi.org/project/icrawler/), and Image DB retrieval, thanks to our proposed **Plan-and-Execute** Image Aligner to ensure the high quality of the Stark dataset. Therefore, we divide the entire collection of images into two repositories based on the source:
 23 | 
 24 | 1. **[stark-image](https://huggingface.co/datasets/passing2961/stark-image)**: Contains images from the personalized text-to-image generator, Bing Search, and Image DB retrieval (excluding [CC12M](https://github.com/google-research-datasets/conceptual-12m) and [RedCaps12M](https://redcaps.xyz/)). 
 25 | 2. **[stark-image-url](https://huggingface.co/datasets/passing2961/stark-image-url)**: Contains images sourced from CC12M and RedCaps12M, which provide the image URL links. 
 26 | 
 27 | For Image DB retrieval, we use CLIP (i.e., `ViT-L/14@336px`) to retrieve images from prepared source image datasets: [CC12M](https://github.com/google-research-datasets/conceptual-12m), [RedCaps12M](https://redcaps.xyz/), [ChartQA](https://github.com/vis-nlp/ChartQA), [AI2D](https://arxiv.org/pdf/1603.07396v1), and [MathVision](https://huggingface.co/datasets/MathLLMs/MathVision). For efficient searching, we utilize [AutoFaiss](https://github.com/criteo/autofaiss).
 28 | 
 29 | **Note:** 
 30 | - In [stark-dialogue](https://huggingface.co/datasets/passing2961/stark-dialogue), we provide a text-only Stark dialogue dataset that contains only image keys that are stored in `"session<session_num>:images_key"` and `"session<session_num>:device_images_key"`. In these fields, if an image key is in the second repository (CC12M and RedCaps12M), it is prefixed with `"url:"`. Therefore, prepend `"url:"` to the `key` values from the [stark-image-url](https://huggingface.co/datasets/passing2961/stark-image-url) dataset as needed. In the `human_face_image_key` field, face image keys are prefixed with `"face:"`, so prepend `"face:"` to the `key` values in the [stark-human-face](https://huggingface.co/datasets/passing2961/stark-face-image) dataset as required.
 31 | - We initially stated in the paper that the dataset contained 93K episodic conversations; however, prior to public release, we manually removed specific samples, reducing the total to 86.9K episodes. Although this differs from the original number, Stark still features a comparable data scale (0.5M sessions) to SODA (1M) and Conversation Chronicles (1M) and is the first dataset to accomplish long-term multi-modal conversation at scale.
 32 | 
 33 | ```python
 34 | from datasets import load_dataset
 35 | 
 36 | ds = load_dataset("passing2961/stark-dialogue")
 37 | ```
 38 | 
 39 | 
 40 | ## Ultron: Visual Imagination-Infused Large Language and Vision Model
 41 | 
 42 | ### How to use
 43 | 
 44 | ```python
 45 | import logging
 46 | from PIL import Image
 47 | import torch
 48 | from transformers import (
 49 |     AutoModelForVision2Seq,
 50 |     BitsAndBytesConfig,
 51 |     AutoProcessor,
 52 | )
 53 | 
 54 | # Define Ultron template
 55 | ULTRON_TEMPLATE = 'You are an excellent image sharing system that generates <RET> token with the following image description. The image description must be provided with the following format: <RET> <h> image description </h>. The following conversation is between {name} and AI assistant on {date}. The given image is {name}\'s appearance.\n{dialogue}'
 56 | 
 57 | # Ultron model initialization
 58 | def load_ultron_model(model_path):
 59 |     """
 60 |     Loads the Ultron model and processor.
 61 | 
 62 |     Args:
 63 |         model_path (str): Path to the pre-trained model.
 64 | 
 65 |     Returns:
 66 |         model: Loaded Vision-to-Seq model.
 67 |         processor: Corresponding processor for the model.
 68 |     """
 69 |     logging.info(f"Loading Ultron model from {model_path}...")
 70 |     quantization_config = BitsAndBytesConfig(
 71 |         load_in_4bit=True,
 72 |         bnb_4bit_compute_dtype=torch.bfloat16,
 73 |         bnb_4bit_use_double_quant=True,
 74 |         bnb_4bit_quant_type='nf4'
 75 |     )
 76 |     model_kwargs = dict(
 77 |         torch_dtype=torch.bfloat16,
 78 |         low_cpu_mem_usage=True,
 79 |         trust_remote_code=True,
 80 |         device_map="auto",
 81 |     )
 82 |     processor = AutoProcessor.from_pretrained(
 83 |         'meta-llama/Llama-3.2-11B-Vision-Instruct', torch_dtype=torch.bfloat16
 84 |     )
 85 |     model = AutoModelForVision2Seq.from_pretrained(
 86 |         model_path, 
 87 |         **model_kwargs
 88 |     ).eval()
 89 |     logging.info("Ultron model loaded successfully.")
 90 |     return model, processor
 91 | 
 92 | # Run Ultron model
 93 | def run_ultron_model(model, processor, dialogue, name='Tom', date='2023.04.20', face_image_path='sample_face.png'):
 94 |     """
 95 |     Runs the Ultron model with a given dialogue, name, and image.
 96 | 
 97 |     Args:
 98 |         model: Pre-trained model instance.
 99 |         processor: Processor for model input.
100 |         dialogue (str): Input dialogue for the assistant.
101 |         name (str): Name of the user.
102 |         date (str): Date of the conversation.
103 |         face_image_path (str): Path to the face image file.
104 | 
105 |     Returns:
106 |         str: Description of the shared image.
107 |     """
108 |     logging.info("Running Ultron model...")
109 |     face_image = Image.open(face_image_path).convert("RGB")
110 | 
111 |     prompt = ULTRON_TEMPLATE.format(
112 |         dialogue=dialogue,
113 |         name=name,
114 |         date=date
115 |     )
116 |     messages = [
117 |         {
118 |             "content": [
119 |                 {"text": prompt, "type": "text"},
120 |                 {"type": "image"}
121 |             ], 
122 |             "role": "user"
123 |         },
124 |     ]
125 | 
126 |     logging.info("Preparing input for Ultron model...")
127 |     prompt_input = processor.apply_chat_template(messages, add_generation_prompt=True)
128 |     inputs = processor(face_image, prompt_input, return_tensors='pt').to('cuda')
129 | 
130 |     with torch.inference_mode():
131 |         logging.info("Generating output from Ultron model...")
132 |         output = model.generate(
133 |             **inputs, 
134 |             do_sample=True,
135 |             temperature=0.9,
136 |             max_new_tokens=512,
137 |             top_p=1.0,
138 |             use_cache=True,
139 |             num_beams=1,
140 |         )
141 |     
142 |     output_text = processor.decode(output[0], skip_special_token=True)
143 |     logging.info("Output generated successfully from Ultron model.")
144 |     return parse_ultron_output(output_text)
145 | 
146 | # Parse Ultron output
147 | def parse_ultron_output(output):
148 |     """
149 |     Parses the output to extract the image description.
150 | 
151 |     Args:
152 |         output (str): The generated output text from the model.
153 | 
154 |     Returns:
155 |         str: Extracted image description.
156 |     """
157 |     logging.info("Parsing output from Ultron model...")
158 |     if '<RET>' in output:
159 |         return output.split('<h>')[-1].split('</h>')[0].strip()
160 |     else:
161 |         logging.warning("<RET> not found in output.")
162 |         return output
163 | 
164 | # Example usage
165 | def main():
166 |     """
167 |     Example usage of Ultron model.
168 |     """
169 |     model_path = "passing2961/Ultron-11B"
170 |     model, processor = load_ultron_model(model_path)
171 | 
172 |     dialogue = """Tom: I have so much work at the office, I'm exhausted...
173 |     Personal AI Assistant: How can I help you feel less tired?
174 |     Tom: Hmm.. I miss my dog Star at home.
175 |     Personal AI Assistant: """
176 |     
177 |     image_description = run_ultron_model(model, processor, dialogue)
178 |     logging.info(f"Image description generated: {image_description}")
179 | 
180 | if __name__ == "__main__":
181 |     main()
182 | ```
183 | 
184 | ## License and Recommendations
185 | 
186 | 🚨 All models and datasets are intended for research purposes only.
187 | 
188 | ## Acknowledgement
189 | 
190 | This work was supported by a grant of the KAIST-KT joint research project through AI Tech Lab, Institute of convergence Technology, funded by KT [Project No. G01230605, Development of Task-oriented Persona-based Dialogue Generation Combining Multi-modal Interaction and Knowledge Modeling].
191 | 
192 | ## Citation
193 | 
194 | If you find the resources in this repository useful, please cite our work:
195 | 
196 | ```
197 | @article{lee2024stark,
198 |   title={Stark: Social Long-Term Multi-Modal Conversation with Persona Commonsense Knowledge},
199 |   author={Lee, Young-Jun and Lee, Dokyong and Youn, Junyoung and Oh, Kyeongjin and Ko, Byungsoo and Hyeon, Jonghwan and Choi, Ho-Jin},
200 |   journal={arXiv preprint arXiv:2407.03958},
201 |   year={2024}
202 | }
203 | ```
204 | 


--------------------------------------------------------------------------------
/runner/base_runner.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | import os
  3 | import sys
  4 | import json
  5 | import time
  6 | import copy
  7 | import uuid
  8 | import random
  9 | from pathlib import Path
 10 | 
 11 | import faiss
 12 | import pandas as pd
 13 | from tqdm import tqdm
 14 | import concurrent.futures
 15 | import openai
 16 | from openai import OpenAI, AsyncOpenAI
 17 | from rich.console import Console
 18 | from rich.theme import Theme
 19 | 
 20 | from utils.etc_utils import load_jsonl, load_txt
 21 | from runner.retrieval import process_image, load_diffusion_model, load_clip_model
 22 | 
 23 | custom_theme = Theme({
 24 |     "info": "bold dim cyan",
 25 |     "warning": "bold magenta",
 26 |     "danger": "bold red",
 27 |     "debugging": "bold sandy_brown"
 28 | })
 29 | console = Console(theme=custom_theme)
 30 | 
 31 | 
 32 | class BaseRunner(ABC):
 33 |     def __init__(self, args):
 34 |         self.args = args
 35 |         self.output_base_dir = os.path.join('./outputs', self.args.run_id + ":{}".format(self.args.model))
 36 |         os.makedirs(self.output_base_dir, exist_ok=True)
 37 |         
 38 |         self.client = OpenAI(
 39 |             api_key=os.environ.get("OPENAI_API_KEY"),
 40 |         )
 41 | 
 42 |         self.buffer_size = 100
 43 | 
 44 |         self.existing_uuids = set()
 45 | 
 46 |         self.diffusion_model = None
 47 |         self.image_db = None
 48 |         self.retrieval_model = None
 49 |         self.retrieval_preprocess = None
 50 |         if "image" in self.args.runner_name:
 51 |             if self.args.image_process_type == 'retrieve':
 52 |                 self.image_db = self.load_image_db()
 53 |                 self.retrieval_model, self.retrieval_preprocess = load_clip_model()
 54 | 
 55 |             elif self.args.image_process_type == 'generate':
 56 |                 self.diffusion_model = load_diffusion_model(args.diffusion_model_id, args.cache_dir)
 57 | 
 58 |     def load_image_db(self):
 59 |         
 60 |         image_list = dict()
 61 |         image_indices = dict()
 62 |         for image_dataset in ["coco_train2017", "gqa", "ocr_vqa", "redcaps12m", "textvqa", "vg"]:
 63 |             data_dir = Path(f'prepare_image_db/embeddings_folder/{image_dataset}/metadata')
 64 |             df = pd.concat(
 65 |                 pd.read_parquet(parquet_file)
 66 |                 for parquet_file in data_dir.glob('*.parquet')
 67 |             )
 68 | 
 69 |             ind = faiss.read_index(f'prepare_image_db/index_folder/{image_dataset}/knn.index')
 70 |             image_list[image_dataset] = df['image_path'].tolist()
 71 |             image_indices[image_dataset] = ind
 72 | 
 73 |         return {
 74 |             'image_list': image_list,
 75 |             'image_indices': image_indices
 76 |         }
 77 | 
 78 |     def run(self):
 79 |         
 80 |         if self.args.do_parse_filter:
 81 |             if self.args.runner_name == 'dialogue' and not os.path.isfile(os.path.join(self.save_dir, 'output.jsonl')):
 82 |                 return
 83 |             if self.args.runner_name == 'image-alignment' and not os.path.isfile(os.path.join(self.save_dir, 'output.jsonl')):
 84 |                 return
 85 |             generations = load_jsonl(os.path.join(self.save_dir, 'output.jsonl'))
 86 |             self.parse_and_filter(generations)
 87 |             #sys.exit()
 88 |         else:
 89 |             self.generate()
 90 | 
 91 |     @abstractmethod
 92 |     def prepare_prompt(self):
 93 |         """
 94 |         Prepare prompts for generations
 95 |         """
 96 |         pass
 97 |     
 98 |     def generate(self):
 99 |         """
100 |         Prompting LLM via call OpenAI API
101 |         """
102 |         
103 |         if 'image' in self.args.runner_name and self.args.runner_name != 'image-alignment':
104 |             prompts, image_prompts = self.prepare_prompt()
105 |             
106 |             processed_output = process_image(
107 |                 image_prompts, 
108 |                 retrieval_model=self.retrieval_model,
109 |                 retrieval_preprocess=self.retrieval_preprocess,
110 |                 diffusion_model=self.diffusion_model, 
111 |                 image_db=self.image_db,
112 |                 prompt_prefix=self.prompt_prefix,
113 |                 image_process_type=self.args.image_process_type
114 |             )
115 |             if self.args.image_process_type == 'retrieve':
116 |                 self.dump_output(processed_output, os.path.join(self.save_dir, 'output.jsonl'))
117 |             elif self.args.image_process_type == 'generate':
118 |                 self.dump_output(prompts, os.path.join(self.save_dir, 'output.jsonl'))
119 |             return
120 | 
121 |         prompts = self.prepare_prompt()
122 | 
123 |         if self.args.runner_name == 'dialogue' and len(prompts) == 0:
124 |             return
125 |         
126 |         if self.args.runner_name == 'image-alignment' and len(prompts) == 0:
127 |             return
128 |         
129 |         if self.args.runner_name == 'face':
130 |             self.dump_output(prompts, os.path.join(self.save_dir, 'output.jsonl'))
131 |             return
132 | 
133 |         console.log(prompts[0]['{}_prompt'.format(self.prompt_prefix)], style='info')
134 |         
135 |         if self.args.debug:
136 |             if len(prompts) >= self.args.debug_sample_num:
137 |                 prompts = random.sample(prompts, self.args.debug_sample_num)
138 | 
139 |         console.log('[{}] # of prompt: {}'.format(self.__class__.__name__, len(prompts)), style='info')
140 |         
141 |         last_save_chunk_idx = self._load_last_save_chunk_idx()
142 |         chunk_prompts = self._sharding(prompts, shard_num=self.args.shard_num)
143 |         t = tqdm(total=len(chunk_prompts))
144 |         
145 |         print(last_save_chunk_idx)
146 |         
147 |         for chunk_idx, chunk_prompt in enumerate(chunk_prompts):
148 |             if chunk_idx <= last_save_chunk_idx:
149 |                 t.update(1)
150 |                 print(chunk_idx, last_save_chunk_idx)
151 |                 continue
152 |             
153 |             chunk_generation = self._interact(chunk_prompt, prompt_prefix=self.prompt_prefix)
154 |          
155 |             self.dump_chunk_output(chunk_idx, chunk_generation, os.path.join(self.save_dir, 'output.jsonl'))
156 |             t.update(1)
157 | 
158 |     @abstractmethod
159 |     def parse_and_filter(self):
160 |         """
161 |         Parsing and Filtering LLM-generated results
162 |         """
163 |         pass
164 | 
165 |     @property
166 |     def system_msg(self):
167 |         """
168 |         System message for each step
169 |         """
170 |         raise NotImplementedError("Subclasses must implement this property")
171 | 
172 |     @property
173 |     def prompt_prefix(self):
174 |         """
175 |         Prompt prefix for each step
176 |         """
177 |         raise NotImplementedError("Subclasses must implement this property")
178 | 
179 |     def _parse_json_list(self, sent):
180 |         sent = sent.replace('```json', '')
181 |         #sent = sent.replace('```', '')
182 |         sent = sent.split('```')[0] # sometimes LLM generates an explanation about their decision after the token ```
183 | 
184 |         return sent
185 | 
186 |     def _call_openai_api(self, prompt_input, prompt_prefix=None):
187 |         prompt = prompt_input[f'{prompt_prefix}_prompt']
188 | 
189 |         if self.args.runner_name == 'commonsense':
190 |             system_message = prompt_input['commonsense_system_message']
191 |         elif self.args.runner_name == 'persona-attr':
192 |             system_message = prompt_input['persona-attr_system_message']
193 |         else:
194 |             system_message = self.system_msg
195 | 
196 |         while True:
197 |             try:
198 |                 completion = self.client.chat.completions.create(
199 |                     model=self.args.model,
200 |                     messages=[{"role": "system", "content": system_message}, {"role": "user", "content": "{}".format(prompt)}],
201 |                     temperature=self.args.temperature,
202 |                     max_tokens=self.args.max_tokens,
203 |                     top_p=self.args.top_p,
204 |                     frequency_penalty=self.args.frequency_penalty,
205 |                     presence_penalty=self.args.presence_penalty,
206 |                     #stop='\n\n'
207 |                 )
208 |                 break
209 |             except (RuntimeError, openai.RateLimitError, openai.APIStatusError, openai.APIConnectionError) as e:
210 |                 print("Error: {}".format(e))
211 |                 time.sleep(2)
212 |                 continue
213 |             
214 |         output = completion.choices[0].message.content.strip()
215 |         completion_tokens = completion.usage.completion_tokens
216 |         prompt_tokens = completion.usage.prompt_tokens
217 |         
218 |         prompt_input[f'{prompt_prefix}_generation'] = output
219 |         prompt_input[f'{prompt_prefix}:prompt_tokens'] = prompt_tokens
220 |         prompt_input[f'{prompt_prefix}:completion_tokens'] = completion_tokens
221 |         
222 |         return prompt_input
223 | 
224 |     def generate_unique_uuid(self):
225 |         while True:
226 |             new_uuid = str(uuid.uuid4())
227 |             if new_uuid not in self.existing_uuids:
228 |                 self.existing_uuids.add(new_uuid)
229 |                 return new_uuid
230 | 
231 |     def dump_output(self, outputs, file_name=None):
232 |         f = open(file_name, 'w') 
233 |         for output in outputs:
234 |             f.write(json.dumps(output) + '\n')
235 |         f.close()
236 | 
237 |     def dump_json_output(self, outputs, file_name=None):
238 |         with open(file_name, 'w', encoding='utf-8') as f:
239 |             json.dump(outputs, f, ensure_ascii=False, indent='\t')
240 |     
241 |     def dump_chunk_output(self, chunk_idx, outputs, file_name=None):
242 |         # update save point
243 |         with open(self.last_save_chunk_idx_file, 'w') as f:
244 |             f.write(str(chunk_idx))
245 | 
246 |         f = open(file_name, 'a')
247 |         for output in outputs:
248 |             f.write(json.dumps(output) + '\n')
249 |         f.close()
250 | 
251 |     def dump_error_message(self, error_msg, file_name=None):
252 |         with open(file_name, 'w') as f:
253 |             json.dump(error_msg, f, ensure_ascii=False, indent='\t')
254 | 
255 |     def dump_report(self, outputs, file_name=None):
256 |         f = open(file_name, 'w')
257 |         for k, v in outputs.items():
258 |             f.write('{}\t{}\n'.format(k, str(v)))
259 |         f.close()
260 |     
261 |     def _load_last_save_chunk_idx(self):
262 |         if os.path.exists(self.last_save_chunk_idx_file):
263 |             with open(self.last_save_chunk_idx_file, 'r') as f:
264 |                 last_save_chunk_idx = int(f.readlines()[0].strip())
265 |         else:
266 |             last_save_chunk_idx = -1
267 |         
268 |         return last_save_chunk_idx
269 |     
270 |     def load_prompt_template(self, template_name: str):
271 |         return load_txt(f'./templates/{template_name}.txt')
272 | 
273 |     def _sharding(self, input_list, shard_num=None):
274 |         shard_size = len(input_list) // shard_num
275 |         remainder = len(input_list) % shard_num
276 | 
277 |         shards = []
278 |         for i in range(shard_num):
279 |             start_index = i * shard_size + min(i, remainder)
280 |             end_index = start_index + shard_size + (1 if i < remainder else 0)
281 |             if len(input_list[start_index:end_index]) == 0:
282 |                 continue
283 |             shards.append(input_list[start_index:end_index])
284 |         
285 |         return shards
286 | 
287 |     def _interact(self, prompts, prompt_prefix=None):
288 |         
289 |         results = []
290 |         with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
291 |             futures = []
292 | 
293 |             for instance in tqdm(prompts, total=len(prompts)):
294 |                 cp_instance = copy.deepcopy(instance)
295 | 
296 |                 future = executor.submit(self._call_openai_api, cp_instance, prompt_prefix=prompt_prefix)
297 |                 futures.append(future)
298 | 
299 |             for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
300 |                 ret = future.result()
301 |                 results.append(ret)
302 | 
303 |         return results


--------------------------------------------------------------------------------
/runner/commonsense_runner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import copy
  4 | import random
  5 | import glob
  6 | from tqdm import tqdm
  7 | from collections import defaultdict
  8 | 
  9 | from .base_runner import BaseRunner, console
 10 | from utils.etc_utils import load_jsonl, load_txt, load_json
 11 | from utils.persona_utils import (
 12 |     AGE_LIST,
 13 |     GENDER_LIST,
 14 |     COUNTRY_LIST,
 15 |     ETHNICITY_LIST,
 16 |     RELIGION_LIST,
 17 |     COUNTRY_ALPHA_LIST,
 18 |     EDUCATION_LIST,
 19 |     EXCLUDE_COMMONSENSE_TARGET,
 20 |     PERSONA_CATEGORY_LIST,
 21 |     DEMOGRAPHIC_TARGET,
 22 |     COMMONSENSE_TARGET,
 23 |     PEACOK_RELATION
 24 | )
 25 | 
 26 | DEMOGRAPHIC_TRIAL = 5
 27 | 
 28 | 
 29 | class CommonsenseRunner(BaseRunner):
 30 |     def __init__(self, args):
 31 |         super().__init__(args)
 32 | 
 33 |         self.save_dir = os.path.join(self.output_base_dir, 'commonsense', f'persona_seed:{args.persona_seed_num}')
 34 |         os.makedirs(self.save_dir, exist_ok=True)
 35 |         self.last_save_chunk_idx_file = os.path.join(self.save_dir, 'last_save_chunk_idx.txt')
 36 | 
 37 |         self._load_peacok_prompt_template()
 38 | 
 39 |     @property
 40 |     def system_msg(self):
 41 |         return "You are a helpful assistant."
 42 | 
 43 |     @property
 44 |     def prompt_prefix(self):
 45 |         return "commonsense"
 46 | 
 47 |     def get_system_message(self, relation):
 48 |         _sys_msg_map = {
 49 |             'characteristic': "Your task is to infer a character trait of the person in the given sentence, something that sets them apart from others.",
 50 |             'experience': "Your task is to infer what the person in the given sentence has done in the past, something that sets them apart from others.", 
 51 |             'goal': "Your task is to infer what the person in the given sentence will do or achieve in the future, something that sets them apart from others.", 
 52 |             'relationship': "Your task is to infer a relationship that the person in the given sentence has with other people or social groups, something that sets them apart from others.", 
 53 |             'routine': "Your task is to infer what the person in the given sentence regularly or consistently does, something that sets them apart from others."
 54 |         }
 55 |         return _sys_msg_map[relation]
 56 | 
 57 |     def _load_peacok_prompt_template(self):
 58 |         self.peacok_template = dict()
 59 |         
 60 |         for relation in PEACOK_RELATION:
 61 |             self.peacok_template[relation] = self.load_prompt_template(f"commonsense-{relation}")
 62 |     
 63 | 
 64 |     def _create_demographic_sentence(self):
 65 |         age_range = random.sample(AGE_LIST, 1)[0]
 66 |         age = random.randint(int(age_range.split('-')[0]), int(age_range.split('-')[1]))
 67 |         gender = random.sample(GENDER_LIST, 1)[0]
 68 |         country = random.sample(COUNTRY_LIST, 1)[0]
 69 |         #ethnicity = random.sample(ETHNICITY_LIST, 1)[0]
 70 |         #education = random.sample(EDUCATION_LIST, 1)[0]
 71 |         return {
 72 |             'age': age,
 73 |             'gender': gender,
 74 |             'nationality': country
 75 |         }
 76 | 
 77 | 
 78 |     def prepare_prompt(self):
 79 |         
 80 |         persona_sentence = load_jsonl(os.path.join(self.output_base_dir, 'persona-attr', f'final_output_{self.args.persona_seed_num}.jsonl'))
 81 |         console.log('[{}] # of Total persona sentence: {}'.format(self.__class__.__name__, len(persona_sentence)))
 82 | 
 83 |         if self.args.debug:
 84 |             persona_sentence = random.sample(persona_sentence, self.args.debug_sample_num)
 85 | 
 86 |         prompts = []
 87 |         for instance in tqdm(persona_sentence, total=len(persona_sentence)):
 88 |             age = instance['age']
 89 |             gender = instance['gender']
 90 |             #nationality = instance['nationality']
 91 |             birthplace = instance['birthplace']
 92 |             residence = instance['residence']
 93 | 
 94 |             for relation, template in self.peacok_template.items():
 95 |                 demo_sent = "I am a {}-year-old {}. I was born in {}, I currently reside in {}.".format(
 96 |                     age,
 97 |                     gender.lower(),
 98 |                     birthplace, residence
 99 |                 )
100 | 
101 |                 _prompt = template.format(
102 |                     demo_sent=demo_sent,
103 |                     persona_attr=instance['persona-attr:sent']
104 |                 )
105 |                 
106 |                 cp_instance = copy.deepcopy(instance)
107 |                 cp_instance[f'{self.prompt_prefix}_prompt'] = _prompt
108 |                 cp_instance[f'{self.prompt_prefix}_system_message'] = self.get_system_message(relation)
109 |                 cp_instance[f'{self.prompt_prefix}_relation'] = relation
110 | 
111 |                 prompts.append(cp_instance)
112 |         
113 |         return prompts
114 | 
115 |     def _load_prompt_template_for_parsing(self, relation):
116 |         return {
117 |             'routine': '{demo_sent} {persona_attr} I regularly {commonsense}.',
118 |             'characteristic': '{demo_sent} {persona_attr} I {commonsense}.',
119 |             'experience': 'I {commonsense}. Now, {demo_sent} {persona_attr}',
120 |             'goal': '{demo_sent} {persona_attr} I plan {commonsense}.',
121 |             'relationship': '{demo_sent} {persona_attr} So, I {commonsense}.'
122 |         }
123 |     
124 |     def _parse_commonsense_generation(self, demo_sent, persona_attr, generation, relation):
125 |         template = {
126 |             'routine': '{demo_sent} {persona_attr} I regularly ',
127 |             'characteristic': '{demo_sent} {persona_attr} I ',
128 |             'experience': ' Now, {demo_sent} {persona_attr}',
129 |             'goal': '{demo_sent} {persona_attr} I plan ',
130 |             'relationship': '{demo_sent} {persona_attr} So, I '
131 |         }
132 |         
133 |         parse_sent = template[relation].format(demo_sent=demo_sent, persona_attr=persona_attr)
134 |         
135 |         if parse_sent in generation:
136 |             parsed_commonsense = generation.replace(parse_sent, '')
137 |             if parsed_commonsense[-1] == '.':
138 |                 parsed_commonsense = parsed_commonsense[:-1]
139 | 
140 |             if relation == 'experience':
141 |                 parsed_commonsense = parsed_commonsense.split('I ')[-1]
142 |             
143 |             return parsed_commonsense
144 |         
145 |         if relation == 'characteristic':
146 |             parsed_commonsense = self._parse_characteristic_generation(generation, persona_attr)
147 |         elif relation == 'experience':
148 |             parsed_commonsense = self._parse_experience_generation(generation, persona_attr)
149 |         elif relation == 'goal':
150 |             parsed_commonsense = self._parse_goal_generation(generation, persona_attr)
151 |         elif relation == 'relationship':
152 |             parsed_commonsense = self._parse_relationship_generation(generation, persona_attr)
153 |         elif relation == 'routine':
154 |             parsed_commonsense = self._parse_routine_generation(generation, persona_attr)
155 |         else:
156 |             raise ValueError('Wrong relation type name!')
157 | 
158 |         if not parsed_commonsense:
159 |             parsed_commonsense = generation
160 | 
161 |         if parsed_commonsense[-1] == '.':
162 |             parsed_commonsense = parsed_commonsense[:-1]
163 | 
164 |         return parsed_commonsense
165 | 
166 |     def _parse_characteristic_generation(self, generation, persona_attr):
167 |         #return generation.replace(f'{persona_attr} I ', '')
168 |         parsed_commonsense = None
169 | 
170 |         if 'I ' in generation and generation.startswith('I '):
171 |             parsed_commonsense = generation.split('I ')[-1]
172 |         
173 |         return parsed_commonsense
174 | 
175 |     def _parse_experience_generation(self, generation, persona_attr):
176 |         parsed_commonsense = None
177 |         if ' Now, ' in generation:
178 |             parsed_commonsense = generation.split(' Now, ')[0]
179 | 
180 |             if 'I ' in generation and generation.startswith('I '):
181 |                 parsed_commonsense = parsed_commonsense.split('I ')[-1]
182 |         
183 |         elif 'I ' in generation and generation.startswith('I '):
184 |             parsed_commonsense = generation.split('I ')[-1]
185 |         
186 |         return parsed_commonsense
187 | 
188 |     def _parse_goal_generation(self, generation, persona_attr):
189 |         parsed_commonsense = None
190 |         if 'I plan ' in generation:
191 |             parsed_commonsense = generation.split('I plan ')[-1]
192 |         
193 |         elif 'I ' in generation and generation.startswith('I '):
194 |             parsed_commonsense = generation.split('I ')[-1]
195 | 
196 |         return parsed_commonsense
197 | 
198 |     def _parse_relationship_generation(self, generation, persona_attr):
199 |         parsed_commonsense = None
200 |         if 'So, I ' in generation:
201 |             parsed_commonsense = generation.split('So, I ')[-1]
202 |         
203 |         elif 'I ' in generation and generation.startswith('I '):
204 |             parsed_commonsense = generation.split('I ')[-1]
205 | 
206 |         return parsed_commonsense
207 | 
208 |     def _parse_routine_generation(self, generation, persona_attr):
209 |         parsed_commonsense = None
210 |         if 'I regularly ' in generation:
211 |             parsed_commonsense = generation.split('I regularly ')[-1]
212 |         
213 |         elif 'I ' in generation and generation.startswith('I '):
214 |             parsed_commonsense = generation.split('I ')[-1]
215 | 
216 |         return parsed_commonsense
217 | 
218 |     def parse_and_filter(self, generations):
219 |         '''
220 |         Remove repetitions
221 |         '''
222 |         unique_results = []
223 |         stat = defaultdict(int)
224 |         seen = set()
225 |         
226 |         for generation in tqdm(generations, total=len(generations)):
227 |             
228 |             persona_sent = generation['persona-attr:sent']
229 |             relation = generation[f'{self.prompt_prefix}_relation']
230 |             commonsense = generation[f'{self.prompt_prefix}_generation']
231 | 
232 |             age = generation['age']
233 |             gender = generation['gender']
234 |             #nationality = generation['nationality']
235 |             birthplace = generation['birthplace']
236 |             residence = generation['residence']
237 | 
238 |             demo_sent = "I am a {}-year-old {}. I was born in {}, I currently reside in {}.".format(
239 |                 age,   
240 |                 gender.lower(),
241 |                 #nationality,
242 |                 birthplace, residence
243 |             )
244 |             
245 |             parsed_commonsense = self._parse_commonsense_generation(demo_sent, persona_sent, commonsense, relation)
246 |             
247 |             key = '{}+++{}+++{}'.format(persona_sent, relation, parsed_commonsense)
248 | 
249 |             if key not in seen:
250 |                 seen.add(key)
251 | 
252 |                 cp_generation = copy.deepcopy(generation)
253 |                 cp_generation[f'parsed_{self.prompt_prefix}_generation'] = parsed_commonsense
254 |                 unique_results.append(cp_generation)
255 | 
256 |         stat['commonsense:total'] = len(generations)
257 |         stat['commonsense:success'] = len(unique_results)
258 | 
259 |         self.dump_output(unique_results, os.path.join(self.save_dir, 'final_output.jsonl'))
260 |         self.dump_report(stat, os.path.join(self.save_dir, 'report_output.txt'))
261 |         
262 |     def _construct_persona_sentence_w_demographic(self):
263 |         demographic_attributes = dict()
264 |         # get one sentence from each demographic information
265 |         for category in DEMOGRAPHIC_TARGET:
266 |             if isinstance(category, list):
267 |                 random.shuffle(category)
268 |                 selected_category = category[0]
269 | 
270 |                 _persona_attr = load_jsonl(os.path.join(self.output_base_dir, 'persona-attr', f'{selected_category}-success-output.jsonl'))
271 |                 random.shuffle(_persona_attr)
272 | 
273 |                 demographic_attributes[selected_category] = _persona_attr[0]
274 |                 continue
275 | 
276 |             _persona_attr = load_jsonl(os.path.join(self.output_base_dir, 'persona-attr', f'{category}-success-output.jsonl'))
277 |             random.shuffle(_persona_attr)
278 |             demographic_attributes[category] = _persona_attr[0]
279 |         
280 |         console.log(demographic_attributes, style='debugging')
281 | 
282 |         return demographic_attributes
283 |     
284 |     def _generate_commonsense_knowledge(self, prompts, prompt_prefix=None):
285 |         return self.interact(prompts, prompt_prefix)


--------------------------------------------------------------------------------
/utils/persona_utils.py:
--------------------------------------------------------------------------------
  1 | # | symbol -> 나중에 제거해도됨 (깃헙 배포할때)
  2 | OLD_PERSONA_CATEGORY_MAP = {
  3 |     'attend_school_school_name': ['School | Name', 'school name'],
  4 |     'attend_school_school_type': ['School | Type', 'school type'],
  5 |     'employed_by_company_company': ['Employment | Company', 'company name'],
  6 |     'employed_by_general_location': ['Employment | Workplace', 'workplace'],
  7 |     'gender_gender': ['Gender', 'gender'],
  8 |     'has_age_number': ['Age', 'age'],
  9 |     'has_degree_degree_type': ['School | Degree', 'degree'],
 10 |     'has_degree_subject': ['School | Degree | Subject', 'degree subject'],
 11 |     'has_profession_profession': ['Employment | Profession', 'profession'],
 12 |     'have_children_family': ['Family Status | Children', 'children'],
 13 |     'have_family_family': ['Family Status', 'family status'],
 14 |     'have_pet_animal': ['Possession | Animal', 'animal'],
 15 |     'have_sibling_family': ['Family Status | Sibling', 'sibling'],
 16 |     'have_vehicle_vehicle': ['Possession | Vehicle', 'vehicle'],
 17 |     'job_status_job_status': ['Employment | Job Status', 'job status'],
 18 |     'like_activity_goto': ['Preference | Location', 'location'],
 19 |     'like_activity_place': ['Preference | Place', 'place'],
 20 |     'like_activity_show': ['Preference | Show', 'show'],
 21 |     'like_activity_watching': ['Preference | Media Genre', 'media genre'],
 22 |     'like_animal': ['Preference | Animal', 'animal'],
 23 |     'like_book_author': ['Preference | Book | Author', 'book author'],
 24 |     'like_book_genre': ['Preference | Book | Genre', 'book genre'],
 25 |     'like_book_title': ['Preference | Book | Title', 'book title'],
 26 |     'like_color': ['Preference | Color', 'color'],
 27 |     'like_drink': ['Preference | Drink', 'drink'],
 28 |     'like_food': ['Preference | Food', 'food'],
 29 |     'like_hobby': ['Preference | Hobby', 'hobby'],
 30 |     'like_movie_genre': ['Preference | Movie | Genre', 'movie genre'],
 31 |     'like_movie_title': ['Preference | Movie | Title', 'movie_title'],
 32 |     'like_music_genre': ['Preference | Music | Genre', 'music genre'],
 33 |     'like_music_instrument': ['Preference | Music | Instrument', 'music instrument'],
 34 |     'like_music_artist': ['Preference | Music | Artist', 'music artist'],
 35 |     'like_season': ['Preference | Season', 'season'],
 36 |     'like_sport': ['Preference | Sport', 'sport'],
 37 |     'live_in_citystatecountry_citystate': ['Location | Residence', 'city-state'],
 38 |     'live_in_citystatecountry_country': ['Location | Residence', 'country'],
 39 |     'marital_status_marital': ['Marital Status', 'marital status'],
 40 |     'misc_attribute_personality_trait': ['Personal Characteristic | Personality Trait', 'personality trait'],
 41 |     'nationality_country': ['Location | Nationality', 'nationality'],
 42 |     'other_person_label': ['Personal Characteristic | Eating Habit', 'eating habit'],
 43 |     'physical_attribute_person_attribute': ['Personal Characteristic | Physical Attribute', 'physical attribute'],
 44 |     'place_origin_citystate': ['Location | Birthplace', 'city-state'],
 45 |     'place_origin_country': ['Location | Birthplace', 'country'],
 46 |     'previous_profession_profession': ['Employment | Previous Profession', 'profession'],
 47 |     'teach_subject': ['Employment | Teaching Experience | Subject', 'subject'],
 48 |     'teach_activity': ['Employment | Teaching Experience | Activity', 'activity'],
 49 |     'school_status_school_status': ['School | Status', 'school status'],
 50 | }
 51 | 
 52 | 
 53 | ALL_PERSONA_CATEGORY_MAP = {
 54 |     'attend_school_school_name': ['School ⊃ Name', 'school name'],
 55 |     'attend_school_school_type': ['School ⊃ Type', 'school type'],
 56 |     'employed_by_company_company': ['Employment ⊃ Company', 'company name'],
 57 |     'employed_by_general_location': ['Employment ⊃ Workplace', 'workplace'],
 58 |     'gender_gender': ['Gender', 'gender'],
 59 |     'has_age_number': ['Age', 'age'],
 60 |     'has_degree_degree_type': ['School ⊃ Degree', 'degree'],
 61 |     'has_degree_subject': ['School ⊃ Degree Subject', 'degree subject'],
 62 |     'has_profession_profession': ['Employment ⊃ Profession', 'profession'],
 63 |     'have_children_family': ['Family Status, Children', 'children'],
 64 |     'have_family_family': ['Family Status', 'family status'],
 65 |     'have_pet_animal': ['Possession ⊃ Animal', 'animal'],
 66 |     'have_sibling_family': ['Family Status ⊃ Sibling', 'sibling'],
 67 |     'have_vehicle_vehicle': ['Possession ⊃ Vehicle', 'vehicle'],
 68 |     'job_status_job_status': ['Employment ⊃ Job Status', 'job status'],
 69 |     'like_activity_goto': ['Preference ⊃ Location', 'location'],
 70 |     'like_activity_place': ['Preference ⊃ Place', 'place'],
 71 |     'like_activity_show': ['Preference ⊃ Show', 'show'],
 72 |     'like_activity_watching': ['Preference ⊃ Media Genre', 'media genre'],
 73 |     'like_animal': ['Preference ⊃ Animal', 'animal'],
 74 |     'like_book_author': ['Preference ⊃ Book Author', 'book author'],
 75 |     'like_book_genre': ['Preference ⊃ Book Genre', 'book genre'],
 76 |     'like_book_title': ['Preference ⊃ Book Title', 'book title'],
 77 |     'like_color': ['Preference ⊃ Color', 'color'],
 78 |     'like_drink': ['Preference ⊃ Drink', 'drink'],
 79 |     'like_food': ['Preference ⊃ Food', 'food'],
 80 |     'like_hobby': ['Preference ⊃ Hobby', 'hobby'],
 81 |     'like_movie_genre': ['Preference ⊃ Movie Genre', 'movie genre'],
 82 |     'like_movie_title': ['Preference ⊃ Movie Title', 'movie_title'],
 83 |     'like_music_genre': ['Preference ⊃ Music Genre', 'music genre'],
 84 |     'like_music_instrument': ['Preference ⊃ Music Instrument', 'music instrument'],
 85 |     'like_music_artist': ['Preference ⊃ Music Artist', 'music artist'],
 86 |     'like_season': ['Preference ⊃ Season', 'season'],
 87 |     'like_sport': ['Preference ⊃ Sport', 'sport'],
 88 |     'live_in_citystatecountry_citystate': ['Location ⊃ Residence', 'city-state'],
 89 |     'live_in_citystatecountry_country': ['Location ⊃ Residence', 'country'],
 90 |     'marital_status_marital': ['Marital Status', 'marital status'],
 91 |     'misc_attribute_personality_trait': ['Personal Characteristic ⊃ Personality Trait', 'personality trait'],
 92 |     'nationality_country': ['Location ⊃ Nationality', 'nationality'],
 93 |     'other_person_label': ['Personal Characteristic ⊃ Eating Habit', 'eating habit'],
 94 |     'physical_attribute_person_attribute': ['Personal Characteristic ⊃ Physical Attribute', 'physical attribute'],
 95 |     'place_origin_citystate': ['Location ⊃ Birthplace', 'city-state'],
 96 |     'place_origin_country': ['Location ⊃ Birthplace', 'country'],
 97 |     'previous_profession_profession': ['Employment ⊃ Previous Profession', 'profession'],
 98 |     'teach_subject': ['Employment ⊃ Teaching Experience ⊃ Subject', 'subject'],
 99 |     'teach_activity': ['Employment ⊃ Teaching Experience ⊃ Activity', 'activity'],
100 |     'school_status_school_status': ['School ⊃ Status', 'school status'],
101 | }
102 | 
103 | 
104 | PERSONA_CATEGORY_MAP = {
105 |     'like_activity_goto': ['Preference ⊃ Location', 'location'],
106 |     'like_activity_place': ['Preference ⊃ Place', 'place'],
107 |     'like_activity_show': ['Preference ⊃ Show', 'show'],
108 |     'like_activity_watching': ['Preference ⊃ Media Genre', 'media genre'],
109 |     'like_animal': ['Preference ⊃ Animal', 'animal'],
110 |     'like_book_author': ['Preference ⊃ Book Author', 'book author'],
111 |     'like_book_genre': ['Preference ⊃ Book Genre', 'book genre'],
112 |     'like_book_title': ['Preference ⊃ Book Title', 'book title'],
113 |     'like_color': ['Preference ⊃ Color', 'color'],
114 |     'like_drink': ['Preference ⊃ Drink', 'drink'],
115 |     'like_food': ['Preference ⊃ Food', 'food'],
116 |     'like_hobby': ['Preference ⊃ Hobby', 'hobby'],
117 |     'like_movie_genre': ['Preference ⊃ Movie Genre', 'movie genre'],
118 |     'like_movie_title': ['Preference ⊃ Movie Title', 'movie_title'],
119 |     'like_music_genre': ['Preference ⊃ Music Genre', 'music genre'],
120 |     'like_music_instrument': ['Preference ⊃ Music Instrument', 'music instrument'],
121 |     'like_music_artist': ['Preference ⊃ Music Artist', 'music artist'],
122 |     'like_season': ['Preference ⊃ Season', 'season'],
123 |     'like_sport': ['Preference ⊃ Sport', 'sport'],
124 |     'misc_attribute_personality_trait': ['Personal Characteristic ⊃ Personality Trait', 'personality trait'],
125 |     'other_person_label': ['Personal Characteristic ⊃ Eating Habit', 'eating habit'],
126 |     'physical_attribute_person_attribute': ['Personal Characteristic ⊃ Physical Attribute', 'physical attribute'],
127 | }
128 | 
129 | PERSONA_CATEGORY_LIST = [
130 |     ['School ⊃ Name', 'school name'], 
131 |     ['School ⊃ Type', 'school type'], 
132 |     ['Employment ⊃ Company', 'company name'], 
133 |     ['Employment ⊃ Workplace', 'workplace'], 
134 |     ['Gender', 'gender'], ['Age', 'age'], 
135 |     ['School ⊃ Degree', 'degree'], ['School ⊃ Degree Subject', 'degree subject'], 
136 |     ['Employment ⊃ Profession', 'profession'], 
137 |     ['Family Status, Children', 'children'], 
138 |     ['Family Status', 'family status'], 
139 |     ['Possession ⊃ Animal', 'animal'], 
140 |     ['Family Status ⊃ Sibling', 'sibling'], 
141 |     ['Possession ⊃ Vehicle', 'vehicle'], ['Employment ⊃ Job Status', 'job status'], 
142 |     ['Preference ⊃ Location', 'location'], ['Preference ⊃ Place', 'place'], ['Preference ⊃ Show', 'show'], 
143 |     ['Preference ⊃ Media Genre', 'media genre'], ['Preference ⊃ Animal', 'animal'], ['Preference ⊃ Book Author', 'book author'], 
144 |     ['Preference ⊃ Book Genre', 'book genre'], ['Preference ⊃ Book Title', 'book title'], ['Preference ⊃ Color', 'color'], 
145 |     ['Preference ⊃ Drink', 'drink'], ['Preference ⊃ Food', 'food'], ['Preference ⊃ Hobby', 'hobby'], ['Preference ⊃ Movie Genre', 'movie genre'], 
146 |     ['Preference ⊃ Movie Title', 'movie_title'], ['Preference ⊃ Music Genre', 'music genre'], ['Preference ⊃ Music Instrument', 'music instrument'], 
147 |     ['Preference ⊃ Music Artist', 'music artist'], ['Preference ⊃ Season', 'season'], ['Preference ⊃ Sport', 'sport'], 
148 |     ['Location ⊃ Residence', 'city-state'], ['Location ⊃ Residence', 'country'], ['Marital Status', 'marital status'], 
149 |     ['Personal Characteristic ⊃ Personality Trait', 'personality trait'], ['Location ⊃ Nationality', 'nationality'], 
150 |     ['Personal Characteristic ⊃ Eating Habit', 'eating habit'], ['Personal Characteristic ⊃ Physical Attribute', 'physical attribute'], 
151 |     ['Location ⊃ Birthplace', 'city-state'], ['Location ⊃ Birthplace', 'country'], ['Employment ⊃ Previous Profession', 'profession'], 
152 |     ['Employment ⊃ Teaching Experience ⊃ Subject', 'subject'], ['Employment ⊃ Teaching Experience ⊃ Activity', 'activity'], ['School ⊃ Status', 'school status'],
153 |     ['Physical Symptom', 'physical symptom'],
154 |     ['Psychiatric Symptom', 'psychiatric symptom'],
155 |     ['Respiratory Disease', 'respiratory disease'],
156 |     ['Digestive Disease', 'digestive disease'],
157 |     ['Medicine', 'medicine'],
158 |     ['Preference ⊃ Game', 'game'], ['Preference ⊃ Fashion', 'fashion'], ['Preference ⊃ Social Media', 'social media'],
159 |     ['Preference ⊃ Health & Fitness', 'health & fitness'], ['Preference ⊃ Technology', 'technology'], ['Preference ⊃ Art & Design', 'art & design'],
160 |     ['Preference ⊃ Travel', 'travel'], ['Preference ⊃ Politic', 'politic'], ['Preference ⊃ Social Issue', 'social issue'],
161 |     ['Preference ⊃ Science', 'science']
162 | ]
163 | 
164 | EXCLUDE_COMMONSENSE_TARGET = [
165 |     'Gender', 'Age', 'Family Status, Children', 'Family Status', 'Family Status ⊃ Sibling', 'Marital Status',
166 |     'Personal Characteristic ⊃ Personality Trait', 'Location ⊃ Nationality', 'Personal Characteristic ⊃ Eating Habit', 'Personal Characteristic ⊃ Physical Attribute',
167 |     'Location ⊃ Birthplace', # 'Location ⊃ Residence', 
168 | ]
169 | 
170 | DEMOGRAPHIC_TARGET = [
171 |     'Gender', 'Age', 'Marital Status',
172 |     ['Employment ⊃ Company', 'Employment ⊃ Profession', 'Employment ⊃ Workplace'],
173 |     'Location ⊃ Residence'
174 | ]
175 | 
176 | COMMONSENSE_TARGET = [
177 |     'Possession ⊃ Animal', 'Possession ⊃ Vehicle', 
178 |     'Preference ⊃ Location', 'Preference ⊃ Place', 'Preference ⊃ Show', 'Preference ⊃ Media Genre',
179 |     'Preference ⊃ Animal', 'Preference ⊃ Book Author', 'Preference ⊃ Book Genre', 'Preference ⊃ Book Title',
180 |     'Preference ⊃ Color', 'Preference ⊃ Drink', 'Preference ⊃ Food', 'Preference ⊃ Hobby',
181 |     'Preference ⊃ Movie Genre', 'Preference ⊃ Movie Title', 'Preference ⊃ Music Genre',
182 |     'Preference ⊃ Music Instrument', 'Preference ⊃ Music Artist', 'Preference ⊃ Season',
183 |     'Preference ⊃ Sport', 'Employment ⊃ Company', 'Employment ⊃ Profession', 'Employment ⊃ Workplace'
184 | ]
185 | 
186 | PEACOK_RELATION = [
187 |     'characteristic', 'experience', 'goal', 'relationship', 'routine'
188 | ]
189 | 
190 | AGE_LIST = [
191 |     '10-20', '20-30', '30-40', '40-50', '50-60', '60-70', '70-80', '80-90'
192 | ]
193 | 
194 | GENDER_LIST = [
195 |     'Male', 'Female', #'Non-binary'
196 | ]
197 | 
198 | EDUCATION_LIST = [
199 |     'Colleague',
200 |     'Graduate School',
201 |     'High School',
202 |     'Ph.D.',
203 |     'Pre-High School',
204 |     'Professional School',
205 |     'Elementary School',
206 |     'Middle School',
207 |     'Master’s Degree'
208 | ]
209 | 
210 | COUNTRY_LIST = [
211 |    'United States of America', 'China', 'Japan', 'India', 'United Arab Emirates',
212 |    'France', 'Germany', 'Italy', 'South Korea', 'Saudi Arabia', 
213 |    'Kazakhstan', 'Brazil',
214 |    'Mexico', 'Egypt', 'Argentina', 'Russia', 'United Kingdom', 
215 |    'Spain', 'Canada'
216 | ]
217 | 
218 | 
219 | 
220 | # 'Thailand', 
221 | 
222 | COUNTRY_NAME2ALPHA = {
223 |     'United States of America': 'US', 
224 |     'China': 'CN', 
225 |     'Japan': 'JP', 
226 |     'India': 'IN', 
227 |     'United Arab Emirates': 'AE',
228 |     'France': 'FR', 
229 |     'Germany': 'DE', 
230 |     'Italy': 'IT', 
231 |     'South Korea': 'KR', 
232 |     'Saudi Arabia': 'SA', 
233 |     'Kazakhstan': 'KZ', 
234 |     'Brazil': 'BR',
235 |     'Mexico': 'MX', 
236 |     'Egypt': 'EG', 
237 |     'Argentina': 'AR', 
238 |     'Russia': 'RU', 
239 |     'United Kingdom': 'GB', 
240 |     'Spain': 'ES', 
241 |     'Canada': 'CA'
242 | }
243 | 
244 | COUNTRY_ALPHA_LIST = [
245 |     'US', # United States # United States of America
246 |     'CN', # China # People's Republic of China
247 |     'JP', # Japan # Japan
248 |     'IN', # India # Republic of India
249 |     'AE', # United Arab Emirates # United Arab Emirates
250 |     'FR', # France # French Republic
251 |     'DE', # Germany # Federal Republic of Germany
252 |     'IT', # Italy # Italian Republic
253 |     'KR', # Korea, Republic of # Korea, Republic of
254 |     'SA', # Saudi Arabia # Kingdom of Saudi Arabia
255 |     'KZ', # Kazakhstan # Republic of Kazakhstan
256 |     'BR', # Brazil # Federative Republic of Brazil
257 |     'MX', # Mexico # United Mexican States
258 |     'EG', # Egypt # Arab Republic of Egypt
259 |     'AR', # Argentina # Argentine Republic
260 |     'RU', # Russian Federation # Russian Federation
261 |     'GB', # United Kingdom # United Kingdom of Great Britain and Northern Ireland
262 |     'ES', # Spain # Kingdom of Spain
263 |     'CA', # Canada # Canada
264 | ]
265 | 
266 | 
267 | 
268 | ETHNICITY_LIST = [
269 |     'Asian',
270 |     'South Asian',
271 |     'Southeast Asian',
272 |     'East Asian',
273 |     'Black / African American',
274 |     'Latino / Latinx / Hispanic',
275 |     'Native American / Alaskan Native / Indigenous American / First Nations / American Indian',
276 |     'Native Australian',
277 |     'European',
278 |     'Sub-Saharan African',
279 |     'Middle Eastern',
280 |     'Native Hawaiian / Pacific Islander',
281 |     'Multiracial',
282 |     'White / Caucasian',
283 |     'American'
284 | ]
285 | 
286 | RELIGION_LIST = [
287 |     'Buddhist', 'Christian', 'Hindu', 'Jewish', 'Muslim'
288 | ]
289 | 
290 | 
291 | FACE_ATTRIBUTE_CATEGORY = {
292 |     "gender": [
293 |         "what is the gender of the person in the image?",
294 |         [
295 |             "male",
296 |             "female"
297 |         ]
298 |     ],
299 |     "country": [
300 |         "what is the country of the person in the image?",
301 |         [
302 |             "indian",
303 |             "latino",
304 |             "middle eastern",
305 |             "african",
306 |             "asian",
307 |             "caucasian"
308 |         ]
309 |     ],
310 |     "age": [
311 |         "what is the age of the person in the image?",
312 |         [
313 |             "infant",
314 |             "toddler",
315 |             "child",
316 |             "teenager",
317 |             "adult",
318 |             "elderly"
319 |         ]
320 |     ],
321 |     "body shape": [
322 |         "what is the body shape of the person in the image?",
323 |         [
324 |             "fit",
325 |             "skinny",
326 |             "obese",
327 |             "muscular"
328 |         ]
329 |     ],
330 |     "hair color": [
331 |         "what is the hair color of the person?",
332 |         [
333 |             "Because multiple colors may be included, this question is open-ended!"
334 |         ]
335 |     ],
336 |     "hair style": [
337 |         "what is the hair style of the person?",
338 |         [
339 |             "wavy",
340 |             "ponytail",
341 |             "straight",
342 |             "bob",
343 |             "bald",
344 |             "curly",
345 |             "bun",
346 |             "others",
347 |             "afro-hair"
348 |         ]
349 |     ],
350 |     "hair length": [
351 |         "what is the hair length of the person?",
352 |         [
353 |             "below chest",
354 |             "bald",
355 |             "above nose",
356 |             "above chin",
357 |             "above shoulders",
358 |             "above chest",
359 |             "crew cut",
360 |             "above eyes"
361 |         ]
362 |     ]
363 | }


--------------------------------------------------------------------------------
/runner/dialogue_runner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import copy
  4 | import random
  5 | from tqdm import tqdm
  6 | from datetime import datetime
  7 | from collections import defaultdict
  8 | 
  9 | from .base_runner import BaseRunner, console
 10 | from utils.etc_utils import load_jsonl, load_txt
 11 | 
 12 | current_year = datetime.now().year
 13 | 
 14 | 
 15 | SYSTEM_MESSAGE = """Your job is to generate a long in-depth conversation between an user and an user-friendly AI assistant with multiple turns. The user and AI assistant can share images during a conversation in order to strengthen social relationship, to convey important information, to amuse/entertain, to clarify complex situations, to change the topic of dialogue, or to express emotions/opinions/reactions. There must be more than two image-sharing moments within the conversation. The shared images can either be from the collection previously stored on the user's mobile device or obtained from the internet. You must generate the conversation following the guidelines below.
 16 | 
 17 | [Guideline]
 18 | - The conversation is represented in the form of a json list.
 19 | - Each entry is a python dictionary containing the following keys: "utterance_id", "speaker", "utterance", "sharing_info".
 20 | - The "utterance_id" field contains a unique identifier for the utterance within the conversation.
 21 | - The "speaker" field contains a speaker of the utterance.
 22 | - The "utterance" field contains the utterance of the speaker. If the image-sharing behavior occurs, then the "utterance" is a empty string.
 23 | - The "sharing_info" field represents the image-sharing moment and is represented in the form of a python dictionary containing the following keys: "rationale", "image_description", "image_source", "keywords", "image_id_from_mobile".
 24 | - If the image-sharing moment does not occur, then the "sharing_info" field is an empty python dictionary.
 25 | - The "rationale" field represents the reason behind sharing the relevant image.
 26 | - The "image_description" field contains a description of the shared image.
 27 | - The "image_source" field contains a source of the shared image whether it is from the internet (internet) or the user's mobile device (mobile).
 28 | - If you select the user's mobile device as the "image_source," you must either share an image that matches one of the existing descriptions already on the user's mobile device or share a new image that does not exist among these descriptions.
 29 | - If you share an image that matches one of the existing descriptions on the user's mobile device, you must generate the appropriate image ID in the "image_id_from_mobile" field.
 30 | - If you share a new image that does not match any existing descriptions on the user's mobile device, you must enter "new added image" in the "image_id_from_mobile" field.
 31 | - The "keywords" field contains keywords of the shared image."""
 32 | 
 33 | class DialogueRunner(BaseRunner):
 34 |     def __init__(self, args):
 35 |         super().__init__(args)
 36 |         
 37 |         self.save_dir = os.path.join(self.output_base_dir, 'dialogue', f'persona_seed:{args.persona_seed_num}', f'session_num:{args.target_session_num}')
 38 |         os.makedirs(self.save_dir, exist_ok=True)
 39 |         self.last_save_chunk_idx_file = os.path.join(self.save_dir, 'last_save_chunk_idx.txt')
 40 | 
 41 |         self._load_prompt_template()
 42 |     
 43 |     @property
 44 |     def system_msg(self):
 45 |         return SYSTEM_MESSAGE
 46 | 
 47 |     @property
 48 |     def prompt_prefix(self):
 49 |         return "dialogue"
 50 | 
 51 |     def _load_temporal_event_graph(self):
 52 |         return load_jsonl(os.path.join(self.output_base_dir, 'event-graph', 'final_output.jsonl'))
 53 |     
 54 |     def _load_prompt_template(self):
 55 |         self.dialogue_template = {
 56 |             'first_session': load_txt('./templates/first_stark_dialogue.txt'),
 57 |             'next_session': load_txt('./templates/next_stark_dialogue.txt')    
 58 |         }
 59 | 
 60 |     def _parse_json_list(self, sent):
 61 |         sent = sent.replace('```json', '')
 62 |         sent = sent.replace('```', '')
 63 |         
 64 |         return sent
 65 |     
 66 |     def _extract_target_session_input(self, event_graph):
 67 |         max_session_num = max([len(instance['parsed_event-graph_generation']) for instance in event_graph])
 68 | 
 69 |     def prepare_prompt(self):
 70 |         if self.args.target_session_num == 1:
 71 |             temporal_event_graph = load_jsonl(os.path.join(self.output_base_dir, 'mobile-device', f'persona_seed:{self.args.persona_seed_num}', 'final_output.jsonl'))
 72 |         else:
 73 |             if not os.path.isfile(os.path.join(self.output_base_dir, 'dialogue', f'persona_seed:{self.args.persona_seed_num}', f'session_num:{self.args.target_session_num-1}', 'final_output.jsonl')):
 74 |                 return []
 75 | 
 76 |             temporal_event_graph = load_jsonl(os.path.join(self.output_base_dir, 'dialogue', f'persona_seed:{self.args.persona_seed_num}', f'session_num:{self.args.target_session_num-1}', 'final_output.jsonl'))
 77 |         console.log('[{}] # of Total temporal event graph: {}'.format(self.__class__.__name__, len(temporal_event_graph)))
 78 | 
 79 |         max_session_num = max([len(instance['parsed_event-graph_generation']) for instance in temporal_event_graph])
 80 |         if max_session_num < self.args.target_session_num:
 81 |             return []
 82 | 
 83 |         prompts = []
 84 |         for idx, instance in tqdm(enumerate(temporal_event_graph)):
 85 |             if len(instance['parsed_event-graph_generation']) < self.args.target_session_num:
 86 |                 continue
 87 | 
 88 |             session_prompt = self._construct_session_prompt(instance)
 89 |             assert len(session_prompt) == 1
 90 | 
 91 |             cp_instance = copy.deepcopy(instance)
 92 |             cp_instance.update(session_prompt[0])
 93 |             prompts.append(cp_instance)
 94 |             
 95 |         return prompts
 96 | 
 97 |     def prepare_prompt_all_session(self):
 98 |         temporal_event_graph = load_jsonl(os.path.join(self.output_base_dir, 'mobile-device', f'persona_seed:{self.args.persona_seed_num}', 'final_output.jsonl'))
 99 |         console.log('[{}] # of Total temporal event graph: {}'.format(self.__class__.__name__, len(temporal_event_graph)))
100 | 
101 |         self._extract_target_session_input(temporal_event_graph)
102 |         
103 |         if self.args.debug:
104 |             temporal_event_graph = random.sample(temporal_event_graph, self.args.debug_sample_num)
105 |         
106 |         prompts = []
107 |         for idx, instance in tqdm(enumerate(temporal_event_graph)):
108 |             _session_prompts = self._construct_session_prompt(instance)
109 |             dialogue_uuid = self.generate_unique_uuid()
110 | 
111 |             for _session_prompt in _session_prompts:
112 |                 cp_instance = copy.deepcopy(instance)
113 |                 cp_instance['dialogue_uuid'] = dialogue_uuid
114 |                 cp_instance.update(_session_prompt)
115 |                 prompts.append(cp_instance)
116 |             
117 |         return prompts
118 |     
119 |     def _construct_session_prompt(self, instance):
120 | 
121 |         event_graph = instance['parsed_event-graph_generation']
122 |         name = instance['name']
123 |         age = instance['age']
124 |         gender = instance['gender']
125 |         #nationality = instance['nationality']
126 |         birthplace = instance['birthplace']
127 |         residence = instance['residence']
128 | 
129 |         slot_info = {
130 |             'name': name,
131 |             'age': age,
132 |             'gender': gender,
133 |             'birthplace': birthplace,
134 |             'residence': residence,
135 |             'mobile_device': [{'image_id': idx, 'image_description': ele['image_description']} for idx, ele in enumerate(instance['parsed_mobile-device_generation'])],
136 |             'history_event': []
137 |         }
138 | 
139 |         prompts = []
140 |         for event_id, sub_event in enumerate(event_graph):
141 |             
142 | 
143 |             if event_id + 1 != int(sub_event['id']): # 'Mismatch ID'
144 |                 console.log('event_id: {}, sub_event_id: {}'.format(event_id+1, sub_event))
145 |                 #break
146 |             
147 |             if event_id == 0:
148 |                 slot_info['event'] = sub_event['event']
149 |                 slot_info['date'] = sub_event['date']
150 |                 if self.args.target_session_num == event_id + 1:
151 |                     _prompt = self._construct_first_session_prompt(**slot_info)
152 |             else:
153 |                 slot_info['event'] = sub_event['event']
154 |                 slot_info['date'] = sub_event['date']
155 |                 
156 |                 if 'caused_by' in sub_event:
157 |                     
158 |                     if not sub_event['caused_by']:
159 |                         assert False
160 |                         _prompt = self._construct_first_session_prompt(**slot_info)
161 |                     else:
162 |                         slot_info['time_interval'] = sub_event['caused_by']['caused_by:time_interval']
163 |                         slot_info['experience'] = sub_event['caused_by']['caused_by:experience']
164 |                         
165 |                         if self.args.target_session_num == event_id + 1:
166 |                             _prompt = self._construct_next_session_prompt(**slot_info)
167 | 
168 |             
169 |             #slot_info['last_event'] = sub_event['event']
170 |             slot_info['last_date'] = sub_event['date']
171 |             slot_info['history_event'].append('- {}: {}'.format(sub_event['date'], sub_event['event']))
172 | 
173 |             _slot_info = {f'{self.prompt_prefix}:{slot_k}': slot_v for slot_k, slot_v in slot_info.items()}
174 |             if event_id + 1 == self.args.target_session_num:
175 |                 _slot_info[f'{self.prompt_prefix}_prompt'] = _prompt
176 |                 _slot_info['session_number'] = event_id + 1
177 |                 prompts.append(_slot_info)
178 |                 break
179 |             
180 |         return prompts
181 | 
182 |     def _construct_session_prompt_all(self, instance):
183 | 
184 |         event_graph = instance['parsed_event-graph_generation']
185 |         name = instance['name']
186 |         age = instance['age']
187 |         gender = instance['gender']
188 |         #nationality = instance['nationality']
189 |         birthplace = instance['birthplace']
190 |         residence = instance['residence']
191 | 
192 |         slot_info = {
193 |             'name': name,
194 |             'age': age,
195 |             'gender': gender,
196 |             'birthplace': birthplace,
197 |             'residence': residence,
198 |             'mobile_device': [{'image_id': idx, 'image_description': ele['image_description']} for idx, ele in enumerate(instance['parsed_mobile-device_generation'])],
199 |             'history_event': []
200 |         }
201 | 
202 |         prompts = []
203 |         for event_id, sub_event in enumerate(event_graph):
204 |             if event_id + 1 != int(sub_event['id']): # 'Mismatch ID'
205 |                 console.log('event_id: {}, sub_event_id: {}'.format(event_id+1, sub_event))
206 |                 #break
207 |             
208 |             if event_id == 0:
209 |                 slot_info['event'] = sub_event['event']
210 |                 slot_info['date'] = sub_event['date']
211 |                 _prompt = self._construct_first_session_prompt(**slot_info)
212 |             else:
213 |                 slot_info['event'] = sub_event['event']
214 |                 slot_info['date'] = sub_event['date']
215 |                 
216 |                 if 'caused_by' in sub_event:
217 |                     
218 |                     if not sub_event['caused_by']:
219 |                         assert False
220 |                         _prompt = self._construct_first_session_prompt(**slot_info)
221 |                     else:
222 |                         slot_info['time_interval'] = sub_event['caused_by']['caused_by:time_interval']
223 |                         slot_info['experience'] = sub_event['caused_by']['caused_by:experience']
224 |                         _prompt = self._construct_next_session_prompt(**slot_info)
225 | 
226 |             #slot_info['last_event'] = sub_event['event']
227 |             slot_info['last_date'] = sub_event['date']
228 |             slot_info['history_event'].append('- {}: {}'.format(sub_event['date'], sub_event['event']))
229 | 
230 |             _slot_info = {f'{self.prompt_prefix}:{slot_k}': slot_v for slot_k, slot_v in slot_info.items()}
231 |             _slot_info[f'{self.prompt_prefix}_prompt'] = _prompt
232 |             _slot_info['session_number'] = event_id + 1
233 |             prompts.append(_slot_info)
234 | 
235 |         return prompts
236 | 
237 |     def _construct_first_session_prompt(self, **kwargs):
238 |         return self.dialogue_template['first_session'].format(**kwargs)
239 |     
240 |     def _construct_next_session_prompt(self, **kwargs):
241 |         kwargs['history_event'] = '\n'.join(kwargs['history_event'])
242 |         return self.dialogue_template['next_session'].format(**kwargs)
243 | 
244 |     def check_json_key_exist(self, parsed_event):
245 |         for instance in parsed_event:
246 |             
247 |             for key in ['utterance_id', 'speaker', 'utterance', 'sharing_info']:
248 |                 if key not in instance.keys():
249 |                     return False
250 | 
251 |             if len(instance['sharing_info']) != 0:
252 |                 
253 |                 if isinstance(instance['sharing_info'], list):
254 |                     print(instance['sharing_info'])
255 |                     #assert len(instance['sharing_info']) == 1
256 |                     instance['sharing_info'] = copy.deepcopy(instance['sharing_info']) #[0]
257 |                 
258 |                     for sharing_instance in instance['sharing_info']:
259 |                         for key in ['rationale', 'image_description', 'image_source', 'keywords']: #, "image_id_from_mobile"]:
260 |                             if key not in sharing_instance.keys():
261 |                                 return False
262 |                 elif isinstance(instance['sharing_info'], dict):
263 |                     for key in ['rationale', 'image_description', 'image_source', 'keywords']: #, "image_id_from_mobile"]:
264 |                         if key not in instance['sharing_info'].keys():
265 |                             return False
266 | 
267 |         return True
268 |     
269 |     def extract_new_mobile_device_image(self, parsed_dialogue, mobile_device_pool):
270 |         new_image_desc = []
271 |         for instance in parsed_dialogue:
272 |             if len(instance['sharing_info']) == 0:
273 |                 continue
274 |             
275 |             if isinstance(instance['sharing_info'], list):
276 |                 for item in instance['sharing_info']:
277 |                     try:
278 |                         if item['image_id_from_mobile'] == 'new added image' and item['image_source'] == 'mobile':
279 |                             new_image_desc.append({'image_description': item['image_description']})
280 |                     except KeyError as e:
281 |                         continue
282 |             elif isinstance(instance['sharing_info'], dict):
283 | 
284 |                 try:
285 |                     if instance['sharing_info']['image_id_from_mobile'] == 'new added image' and instance['sharing_info']['image_source'] == 'mobile':
286 |                         new_image_desc.append({'image_description': instance['sharing_info']['image_description']})
287 |                 except KeyError as e:
288 |                     continue
289 |                 
290 |         new_mobile_device_pool = mobile_device_pool + new_image_desc
291 | 
292 |         return new_mobile_device_pool
293 | 
294 |     def parse_and_filter(self, generations):
295 |         results, fail_results = [], []
296 |         stat = defaultdict(int)
297 |         error = []
298 | 
299 |         for generation in tqdm(generations, total=len(generations)):
300 |             
301 |             try:
302 |                 parsed_dialogue = eval(self._parse_json_list(generation[f'{self.prompt_prefix}_generation']))
303 |             except (SyntaxError, TypeError, NameError) as e:
304 |                 console.log(self._parse_json_list(generation[f'{self.prompt_prefix}_generation']), style='warning')
305 |                 stat[f'{self.prompt_prefix}_syntax:fail'] += 1
306 |                 error.append(str(e))
307 |                 continue
308 | 
309 |             # if the type of parsed result is a python dictionary or the length of parsed result is 1,
310 |             # then we eliminate these results regarded as low-quality samples
311 |             if isinstance(parsed_dialogue, dict) or len(parsed_dialogue) == 1:
312 |                 stat[f'{self.prompt_prefix}_parse:fail'] += 1
313 |                 continue
314 | 
315 |             if not self.check_json_key_exist(parsed_dialogue):
316 |                 stat[f'{self.prompt_prefix}_key_existence:fail'] += 1
317 |                 fail_results.append(parsed_dialogue)
318 |                 continue
319 |             
320 |             new_added_image = self.extract_new_mobile_device_image(parsed_dialogue, generation['parsed_mobile-device_generation'])
321 |             
322 |             cp_generation = copy.deepcopy(generation)
323 |             cp_generation[f'parsed_{self.prompt_prefix}_generation'] = parsed_dialogue
324 |             cp_generation[f'parsed_mobile-device_generation'] = new_added_image
325 |             results.append(cp_generation)
326 | 
327 |         stat['dialogue:total'] = len(generation)
328 | 
329 |         self.dump_output(results, os.path.join(self.save_dir, 'final_output.jsonl'))
330 |         self.dump_report(stat, os.path.join(self.save_dir, 'report.txt'))
331 |         self.dump_error_message(error, os.path.join(self.save_dir, 'error_message.json'))
332 |         self.dump_json_output(fail_results, os.path.join(self.save_dir, 'fail_output.json'))
333 | 
334 |     def _generate_dialogue(self, prompts, prompt_prefix=None):
335 |         return self.interact(prompts, prompt_prefix=prompt_prefix)


--------------------------------------------------------------------------------
/make_final_dataset.py:
--------------------------------------------------------------------------------
  1 | # 이 코드는 최종 데이터셋 형태를 만들기 위함이다
  2 | # Structure
  3 | # unique_id
  4 | # commonsense
  5 | 
  6 | 
  7 | import os
  8 | import copy
  9 | import json
 10 | import pandas as pd
 11 | from glob import glob
 12 | from tqdm import tqdm
 13 | from collections import defaultdict
 14 | import Levenshtein
 15 | 
 16 | 
 17 | from utils.etc_utils import load_jsonl, load_json
 18 | 
 19 | 
 20 | BASE_DIR = 'outputs/stark_v1:gpt-3.5-turbo-0125/dialogue'
 21 | 
 22 | 
 23 | error_f = open('wrong_mapping_list.txt', 'w', encoding='utf-8')
 24 | 
 25 | 
 26 | def is_directory_empty(directory_path):
 27 |     try:
 28 |         if not os.listdir(directory_path):
 29 |             return True
 30 |         else:
 31 |             return False
 32 |     except FileNotFoundError:
 33 |         print(f"Error: The directory '{directory_path}' does not exist.")
 34 |         return None
 35 | 
 36 | def detect_and_fix_utf8_errors(names):
 37 |     fixed_names = []
 38 |     for name in names:
 39 |         try:
 40 |             name.encode('utf-8')
 41 |             fixed_names.append(name)
 42 |         except UnicodeEncodeError:
 43 |             # Replace invalid characters with '?'
 44 |             fixed_name = name.encode('utf-8', 'ignore').decode('utf-8')
 45 |             fixed_names.append(fixed_name)
 46 |     return fixed_names
 47 | 
 48 | def load_dataset(target_persona_seed_num):
 49 |     dataset = []
 50 |     count = 0
 51 |     for subdir in tqdm(glob(os.path.join(BASE_DIR, 'persona_seed:*'))):
 52 |         persona_seed_num = int(subdir.split('persona_seed:')[-1])
 53 | 
 54 |         if persona_seed_num != target_persona_seed_num:
 55 |             continue
 56 | 
 57 |         for session_dir in glob(os.path.join(subdir, 'session_num:*')):
 58 |             if is_directory_empty(session_dir):
 59 |                 continue
 60 |             
 61 |             path = os.path.join(session_dir, 'final_output.jsonl')
 62 |             try:
 63 |                 result = load_jsonl(path)
 64 |             except FileNotFoundError as e:
 65 |                 continue
 66 | 
 67 |             for instance in result:
 68 |                 dialog_id = '{}:{}-{}'.format(persona_seed_num, instance['id'], instance['commonsense_relation'])
 69 |                 #all_dataset[dialog_id].append(instance)
 70 |                 cp_instance = copy.deepcopy(instance)
 71 |                 cp_instance['unique_id'] = dialog_id
 72 |                 dataset.append(cp_instance)
 73 | 
 74 |     remove_column = [
 75 |         'persona-attr_system_message', 'persona-attr_prompt', 'birthplace_alpha2_code', 'residence_alpha2_code',
 76 |         'persona-attr_generation', 'commonsense_prompt',
 77 |         'commonsense_system_message', 'commonsense_generation', 
 78 |         'narrative_prompt', 'event-graph_prompt', 'event-graph_generation',
 79 |         'mobile-device_prompt', 
 80 |         'dialogue_prompt', 'dialogue_generation', 
 81 |         'persona-attr:prompt_tokens', 'persona-attr:completion_tokens',
 82 |         'commonsense:prompt_tokens', 'commonsense:completion_tokens',
 83 |         'narrative:prompt_tokens', 'narrative:completion_tokens', 
 84 |         'event-graph:prompt_tokens', 'event-graph:completion_tokens',
 85 |         'mobile-device:prompt_tokens', 'mobile-device:completion_tokens',
 86 |         'dialogue:prompt_tokens', 'dialogue:completion_tokens'
 87 |     ]
 88 |     df = pd.DataFrame(dataset)
 89 |     df.drop(columns=remove_column, inplace=True)
 90 |     df = df.groupby('unique_id').apply(lambda x: x.sort_values('session_number')).reset_index(drop=True)
 91 |         
 92 |     return df
 93 | 
 94 | from openai import OpenAI
 95 | client = OpenAI(
 96 |     api_key=os.environ.get("OPENAI_API_KEY"),
 97 | )
 98 | 
 99 | def calculate_similarity(name1, name2):
100 |     # Levenshtein 거리 계산
101 |     distance = Levenshtein.distance(name1, name2)
102 |     # 최대 길이의 문자열을 기준으로 유사도 계산
103 |     max_len = max(len(name1), len(name2))
104 |     similarity = 1 - distance / max_len
105 |     return similarity
106 | 
107 | def call_api(prompt):
108 |     completion = client.chat.completions.create(
109 |         model='gpt-4',
110 |         messages=[{"role": "system", "content": "You are a helpful assistant."}, 
111 |         {"role": "user", "content": "{}".format(prompt)}],
112 |         temperature=0.0,
113 |         max_tokens=128,
114 |         top_p=0.0,
115 |         #stop='\n\n'
116 |     )
117 |     output = completion.choices[0].message.content.strip()
118 |     return output
119 | 
120 | 
121 | def process_utter(persona_seed_num, dialogue_unique_id, utter_id, speaker, utter, sharing_info, mobile_device):
122 |     check_all_empty = all((isinstance(v, list) and len(v) == 0) or (v == '') for v in sharing_info.values())
123 |     if check_all_empty:
124 |         return {
125 |             'utter_id': utter_id,
126 |             'speaker': speaker,
127 |             'utter': utter,
128 |             'sharing_info': ''
129 |         }
130 |     
131 |     only_rationale_exist = (
132 |         all(v == '' for k, v in sharing_info.items() if k != 'rationale') and
133 |         sharing_info.get('rationale', '') != ''
134 |     )
135 |     if only_rationale_exist:
136 |         return {
137 |             'utter_id': utter_id,
138 |             'speaker': speaker,
139 |             'utter': utter,
140 |             'sharing_info': ''
141 |         }
142 | 
143 |     mobile_w_id_but_no_desc = (
144 |         sharing_info.get('image_description', '') == '' and 
145 |         sharing_info.get('image_source', '') == 'mobile' and
146 |         ('image_id_from_mobile' in sharing_info.keys()) and
147 |         sharing_info.get('image_id_from_mobile', '') != 'new added image'
148 |     )
149 |     if mobile_w_id_but_no_desc:
150 |         cp_sharing_info = copy.deepcopy(sharing_info)
151 |         mobile_image_id = sharing_info['image_id_from_mobile']
152 |         if isinstance(mobile_image_id, str):
153 |             if mobile_image_id == '':
154 |                 return {
155 |                     'utter_id': utter_id,
156 |                     'speaker': speaker,
157 |                     'utter': utter,
158 |                     'sharing_info': cp_sharing_info
159 |                 }
160 |             elif mobile_image_id == 'new image added':
161 |                 return {
162 |                     'utter_id': utter_id,
163 |                     'speaker': speaker,
164 |                     'utter': utter,
165 |                     'sharing_info': cp_sharing_info
166 |                 }
167 |             if mobile_image_id != '':
168 |                 mobile_image_id = int(mobile_image_id)
169 |         
170 |         try:
171 |             cp_sharing_info['image_description'] = mobile_device[mobile_image_id]
172 |         except (IndexError, TypeError) as e:
173 |             error_f.write(f'[Error type: {e}]\nPersona seed num: {persona_seed_num}\nDialogue ID: {dialogue_unique_id}\n\n')
174 |             return {
175 |                 'utter_id': utter_id,
176 |                 'speaker': speaker,
177 |                 'utter': utter,
178 |                 'sharing_info': cp_sharing_info
179 |             }
180 | 
181 |         return {
182 |             'utter_id': utter_id,
183 |             'speaker': speaker,
184 |             'utter': utter,
185 |             'sharing_info': cp_sharing_info
186 |         }
187 | 
188 |     internet_w_no_id_desc = (
189 |         sharing_info.get('image_description', '') == '' and 
190 |         sharing_info.get('image_source', '') == 'internet' and
191 |         len(sharing_info.get('keywords', '')) != 0
192 |     )
193 |     if internet_w_no_id_desc:
194 |         keywords = sharing_info['keywords']
195 |         internet_prompt = f'Keywords: {keywords}\n\nGiven the keywords, your job is to generate a relevant concise image description, starting with an image of or a photo of.\nImage Description: '
196 |         generated_image_desc = call_api(internet_prompt)
197 |         cp_sharing_info = copy.deepcopy(sharing_info)
198 |         cp_sharing_info['image_description'] = generated_image_desc
199 |         return {
200 |             'utter_id': utter_id,
201 |             'speaker': speaker,
202 |             'utter': utter,
203 |             'sharing_info': cp_sharing_info
204 |         }
205 |     mobile_new_added_image_but_no_desc = (
206 |         sharing_info.get('image_description', '') == '' and 
207 |         sharing_info.get('image_source', '') == 'mobile' and
208 |         sharing_info.get('image_id_from_mobile', '') == 'new added image'
209 |     )
210 |     if mobile_new_added_image_but_no_desc:
211 |         return {
212 |             'utter_id': utter_id,
213 |             'speaker': speaker,
214 |             'utter': utter,
215 |             'sharing_info': sharing_info
216 |         }
217 | 
218 |     new_added_image = (
219 |         sharing_info.get('image_description', '') != '' and
220 |         sharing_info.get('image_source', '') == 'mobile' and
221 |         sharing_info.get('image_id_from_mobile', '') == 'new added image'
222 |     )
223 |     if new_added_image:
224 |         return {
225 |             'utter_id': utter_id,
226 |             'speaker': speaker,
227 |             'utter': utter,
228 |             'sharing_info': sharing_info,
229 |             'new_added_image': sharing_info['image_description']
230 |         }
231 | 
232 |     return {
233 |         'utter_id': utter_id,
234 |         'speaker': speaker,
235 |         'utter': utter,
236 |         'sharing_info': sharing_info,
237 |     }
238 | 
239 | AI_NAMES = [
240 |     "AI assistant", "AIassistant", "AI 어시스턴트", "AI", "assistant", "AI Assistant", "Assistant",
241 |     "AI_assistant", "ai", "АI assistant", "АI Assistant", "AI_Assistant", "AI_ASSISTANT", "ASSISTANT", "АИ assistant",
242 |     "ai_assistant", "ai assistant", "ai_assistant"
243 | ]
244 | 
245 | 
246 | def infer_name(dialogue, original_name, target_utter_id):
247 | 
248 |     redialogue = []
249 |     for ele in dialogue:
250 |         utter_id = ele['utterance_id']
251 |         utter = ele['utterance']
252 |         speaker = ele['speaker']
253 |         sharing_info = ele['sharing_info']
254 | 
255 |         if utter_id == target_utter_id:
256 |             redialogue.append(f'<SPEAKER>: [Sharing Image]')
257 |             break
258 | 
259 |         if len(sharing_info) == 0:
260 |             redialogue.append(f'{speaker}: {utter}')
261 |         else:
262 |             #image_desc = sharing_info['image_description']
263 |             redialogue.append(f'{speaker}: [Sharing Image]')
264 | 
265 |     redialogue = '\n'.join(redialogue)
266 |     prompt = f'Dialogue:\n{redialogue}\n\nGiven the dialogue, your job is to infer the speaker name (i.e., <SPEAKER>) when the "speaker" field is empty. What is the most appropriate speaker name among "AI Assistant" and "User"?\nAnswer: '
267 |     output = call_api(prompt)
268 |     
269 |     if output.lower() == 'user':
270 |         return original_name
271 |     elif output.lower() == 'ai assistant':
272 |         return 'AI Assistant'
273 |     elif output.lower() == original_name.lower():
274 |         return original_name
275 |     else:
276 |         print(output.lower(), original_name)
277 |         assert False
278 |     
279 | 
280 | def correct_name(gen_name, original_name, dialogue, utter_id):
281 |     if gen_name.lower() == 'user':
282 |         return original_name
283 |     elif gen_name == '사용자':
284 |         return original_name
285 |     elif gen_name == '스프재':
286 |         return original_name
287 |     elif gen_name in AI_NAMES:
288 |         return "AI Assistant"
289 |     elif gen_name == '':
290 |         return infer_name(dialogue, original_name, utter_id)
291 |     else:
292 | 
293 |         return original_name
294 | 
295 | def process_session_dialogue(persona_seed_num, dialogue_unique_id, dialogue, current_mobile_device, original_name):
296 |     redialogue = []
297 |     current_mobile_device = copy.deepcopy(current_mobile_device)
298 |     dialogue = copy.deepcopy(dialogue)
299 |  
300 |     for_infer_dialogue = []
301 |     for item in dialogue:
302 |         utter_id = item['utterance_id']
303 |         speaker = item['speaker']
304 |         utter = item['utterance']
305 |         sharing_info = item['sharing_info']
306 | 
307 |         if speaker != original_name:
308 |             corrected_name = correct_name(speaker, original_name, dialogue, utter_id)
309 |             speaker = corrected_name
310 | 
311 |         if len(sharing_info) != 0:
312 |             if isinstance(sharing_info, dict):
313 |                 processed_utter = process_utter(persona_seed_num, dialogue_unique_id, utter_id, speaker, utter, sharing_info, current_mobile_device)
314 |                 if 'new_added_image' in processed_utter.keys():
315 |                     current_mobile_device.append({
316 |                         'image_id': len(current_mobile_device),
317 |                         'image_description': processed_utter['new_added_image']})
318 |                 
319 |                 redialogue.append({
320 |                     'utter_id': processed_utter['utter_id'],
321 |                     'speaker': processed_utter['speaker'],
322 |                     'utter': processed_utter['utter'],
323 |                     'sharing_info': processed_utter['sharing_info']
324 |                 })
325 |             elif isinstance(sharing_info, list):
326 |                 processed_sharing_info = []
327 |                 for sharing_item in sharing_info:
328 |                     _p_utter = process_utter(persona_seed_num, dialogue_unique_id, utter_id, speaker, utter, sharing_item, current_mobile_device)
329 | 
330 |                     if 'new_added_image' in _p_utter.keys():
331 |                         current_mobile_device.append({
332 |                             'image_id': len(current_mobile_device),
333 |                             'image_description': _p_utter['new_added_image']})
334 | 
335 |                     processed_sharing_info.append({
336 |                         'rationale': _p_utter['sharing_info']['rationale'],
337 |                         'image_description': _p_utter['sharing_info']['image_description'],
338 |                         'image_source': _p_utter['sharing_info']['image_source'],
339 |                         'keywords': _p_utter['sharing_info']['keywords']
340 |                     })
341 |                 
342 |                 redialogue.append({
343 |                     'utter_id': utter_id,
344 |                     'speaker': speaker,
345 |                     'utter': utter,
346 |                     'sharing_info': processed_sharing_info
347 |                 })
348 |             
349 |         else:
350 |             redialogue.append({
351 |                 'utter_id': utter_id,
352 |                 'speaker': speaker,
353 |                 'utter': utter,
354 |                 'sharing_info': ''
355 |             })
356 | 
357 |     return redialogue, copy.deepcopy(current_mobile_device)
358 | 
359 | def dump_output(outputs, file_name=None):
360 |     f = open(file_name, 'w', encoding='utf-8') 
361 |     for output in outputs:
362 |         f.write(json.dumps(output) + '\n')
363 |     f.close()
364 |     
365 | def process_episode(df, dialogue_unique_id, persona_seed_num):
366 |     """Epsiode processing"""
367 | 
368 |     session_num_list = df['session_number'].unique()
369 |     episode = []
370 |     for session_idx, session_num in enumerate(session_num_list):
371 |         session_dict = df[df['session_number'] == session_num].to_dict(orient='list')
372 | 
373 |         t_keys = [
374 |             'dialogue:history_event', 
375 |             'dialogue:event', 'dialogue:date',
376 |             #'dialogue:last_date', 
377 |             'dialogue:time_interval',
378 |             'dialogue:experience'
379 |         ]
380 |         tmp = dict()
381 | 
382 |         for t_k in t_keys:
383 |             tmp[f'seesion{session_num}:{t_k}'] = str(session_dict[t_k][0])
384 |         
385 |         name = session_dict['name'][0]
386 |         dialogue = session_dict['parsed_dialogue_generation'][0]
387 |         if session_idx == 0:
388 |             mobile_device = session_dict['dialogue:mobile_device'][0]
389 |         assert session_num == session_dict['session_number'][0]
390 |         
391 |         tmp[f'session{session_num}:mobile_device'] = str(mobile_device)
392 |         
393 |         processed_session_dialogue, mobile_device = process_session_dialogue(persona_seed_num, dialogue_unique_id, dialogue, mobile_device, name)
394 |         
395 |         tmp[f'session{session_num}:dialogue'] = str(processed_session_dialogue)
396 |         if session_idx == len(session_num_list) - 1:
397 |             tmp[f'session{session_num}:last_added_mobile_device_image'] = str(mobile_device)
398 | 
399 |         episode.append(tmp)
400 |     
401 |     return episode
402 | 
403 | if __name__ == '__main__':
404 | 
405 |     for persona_seed_num in range(0, 1):
406 |         dataset = load_dataset(persona_seed_num)
407 | 
408 |         unique_id_list = dataset['unique_id'].unique()
409 |         
410 |         all_dataset = []
411 |         for uid in tqdm(unique_id_list, total=len(unique_id_list)):
412 |             df = dataset[dataset['unique_id'] == uid]
413 |             
414 |             t_keys = [
415 |                 'name', 'age', 'gender', 'birthplace', 'residence',
416 |                 'persona_category', 'persona-attr:sent', 'persona-attr:key', 'persona-attr:value',
417 |                 'commonsense_relation', 'narrative_sentence_form', 'parsed_event-graph_generation',
418 |             ]
419 |             temp_data = dict()
420 |             temp_data['unique_id'] = uid
421 |             for t_k in t_keys:
422 |                 if t_k == 'parsed_event-graph_generation':
423 |                     temp_data['event-sequence'] = str(df[t_k].apply(str).unique()[0])
424 |                     
425 |                 else:
426 |                     temp_data[t_k] = str(df[t_k].unique()[0])
427 | 
428 |             processed_episode = process_episode(df, uid, persona_seed_num)
429 | 
430 |             temp_data['episode'] = processed_episode
431 |             all_dataset.append(temp_data)
432 |             
433 | 
434 |         base_save_dir = 'Stark'
435 |         os.makedirs(base_save_dir, exist_ok=True)
436 |         with open(os.path.join(base_save_dir, f'stark_{persona_seed_num}.json'), 'w', encoding='utf-8') as f:
437 |            json.dump(all_dataset, f, ensure_ascii=False, indent='\t')
438 | 
439 |     error_f.close()


--------------------------------------------------------------------------------
/plan_runner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json
  4 | import copy
  5 | import time
  6 | import random
  7 | import argparse
  8 | from tqdm import tqdm
  9 | 
 10 | import uuid
 11 | import openai
 12 | from openai import OpenAI
 13 | import concurrent.futures
 14 | 
 15 | 
 16 | client = OpenAI(
 17 |     api_key=os.environ.get("OPENAI_API_KEY"),
 18 | )
 19 | 
 20 | SYSTEM_MESSAGE = """Your job is to determine the most appropriate module from a list of models to process the input request. Please select one module from the following list:
 21 | 
 22 | Personalized Text-to-Image Generator: This module generates personalized images from a given description and a human face image. For example, if you provide a face image and a description like “A selfie of Tom smiling at the Golden State Warriors' arena during a game,” the module will generate a customized realistic human image. Note that when you generate the answer, you must generate the module name and modified image description together. The modified image description MUST include a strict format: “<class_word> [img]”. <class_word> represents the identity of a human, such as a man, woman, girl, boy, or young boy, etc. [img] denotes the special token. You must not omit this strict format, and you must keep the original image description as it is and only add this strict format.
 23 | 
 24 | Web Search: This module finds related images from the internet in real-time based on the given user's input image description. The image description is primarily related to the latest information. Therefore, this method is useful when up-to-date information is needed.
 25 | 
 26 | Image Database Retrieval: This module finds relevant images from a pre-built image database based on the given user's input image description. To build an image database containing images on various topics, images are collected from the RedCaps, Conceptual Captions 12M (CC12M), ChartQA, AI2D, and MathVision datasets. Descriptions related to each dataset are as follows:
 27 | - RedCaps: This is a large-scale dataset of 12M image-text pairs collected from Reddit. Images and captions from Reddit depict and describe a wide variety of objects and scenes.
 28 | - CC12M: This is a dataset with 12 million image-text pairs specifically meant to be used for vision and language pre-training. It is larger and covers a much more diverse set of visual concepts than the Conceptual Captions (CC3M).
 29 | - ChartQA: This is a large-scale ChartQA dataset with real-world charts and human-authored question-answer pairs. This dataset covers 9.6K chart images.
 30 | - AI2D: This is a dataset of over 5,000 grade school science diagrams with over 150,000 rich annotations, their ground truth syntactic parses, and more than 15,000 corresponding multiple choice questions.
 31 | - MathVision: This dataset is a meticulously curated collection of 3,040 high-quality mathematical problems with visual contexts sourced from real math competitions. Spanning 16 distinct mathematical disciplines and graded across 5 levels of difficulty.
 32 | 
 33 | For example,
 34 | 
 35 | Name: Tom
 36 | Gender: Male
 37 | Age: 21
 38 | Image Description: A selfie of Tom smiling at the Golden State Warriors' arena during a game
 39 | Module: Personalized Text-to-Image Generator
 40 | Modified Image Description: A selfie of a young man [img] smiling at the Golden State Warriors' arena during a game
 41 | 
 42 | Name: Tom
 43 | Gender: Male
 44 | Age: 21
 45 | Image Description: A screenshot of chatbot development code using Python
 46 | Module: Image Database Retrieval
 47 | 
 48 | Name: Tom
 49 | Gender: Male
 50 | Age: 21
 51 | Image Description: A photo of Manchester United lifting the 2023-24 FA Cup trophy
 52 | Module: Web Search"""
 53 | 
 54 | EXISTING_UUIDS = set()
 55 | IMAGE_MODULE_MAPPER = {
 56 |     'Text-to-Image Generator': 't2i',
 57 |     'Personalized Text-to-Image Generator': 'p-t2i',
 58 |     'Web Search': 'web',
 59 |     'Image Database Retrieval': 'retrieval'
 60 | }
 61 | 
 62 | with open('./templates/plan_template.txt', 'r') as f:
 63 |     prompt_template = f.read()
 64 | 
 65 | def generate_unique_uuid():
 66 |     while True:
 67 |         new_uuid = str(uuid.uuid4())
 68 |         if new_uuid not in EXISTING_UUIDS:
 69 |             EXISTING_UUIDS.add(new_uuid)
 70 |             return new_uuid
 71 | 
 72 | def dump_json_output(outputs, file_name):
 73 |     with open(file_name, 'w', encoding='utf-8') as f:
 74 |         json.dump(outputs, f, ensure_ascii=False, indent='\t')
 75 | 
 76 | 
 77 | def call_openai_api(prompt_input, prompt_prefix=None):
 78 |     prompt = prompt_input[f'{prompt_prefix}_prompt']
 79 | 
 80 |     while True:
 81 |         try:
 82 |             completion = client.chat.completions.create(
 83 |                 model='gpt-3.5-turbo-0125',
 84 |                 messages=[{"role": "system", "content": SYSTEM_MESSAGE}, 
 85 |                 {"role": "user", "content": "{}".format(prompt)}],
 86 |                 temperature=0.9,
 87 |                 max_tokens=1024,
 88 |                 top_p=0.95,
 89 |                 frequency_penalty=1.0, 
 90 |                 presence_penalty=0.6
 91 |                 #stop='\n\n'
 92 |             )
 93 |             break
 94 |         except (RuntimeError, openai.RateLimitError, openai.APIStatusError, openai.APIConnectionError) as e:
 95 |             print("Error: {}".format(e))
 96 |             time.sleep(2)
 97 |             continue
 98 |         
 99 |     output = completion.choices[0].message.content.strip()
100 |     completion_tokens = completion.usage.completion_tokens
101 |     prompt_tokens = completion.usage.prompt_tokens
102 |     
103 |     prompt_input[f'{prompt_prefix}_generation'] = output
104 |     prompt_input[f'{prompt_prefix}:prompt_tokens'] = prompt_tokens
105 |     prompt_input[f'{prompt_prefix}:completion_tokens'] = completion_tokens
106 | 
107 |     return prompt_input    
108 | 
109 | def interact(prompts, prompt_prefix=None):
110 |     results = []
111 |     with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
112 |         futures = []
113 | 
114 |         for instance in tqdm(prompts, total=len(prompts)):
115 |             cp_instance = copy.deepcopy(instance)
116 | 
117 |             future = executor.submit(call_openai_api, cp_instance, prompt_prefix=prompt_prefix)
118 |             futures.append(future)
119 | 
120 |         for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
121 |             ret = future.result()
122 |             results.append(ret)
123 | 
124 |     return results
125 | 
126 | def load_json(datadir):
127 |     with open(datadir, 'r', encoding='utf-8') as f:
128 |         return json.load(f)
129 | 
130 | def process_dialogue(session_dialogue):
131 | 
132 |     annotated_dialogue_image = []
133 | 
134 |     redialogue = []
135 |     for item in session_dialogue:
136 |         utter_id = item['utter_id']
137 |         speaker = item['speaker']
138 |         utter = item['utter']
139 |         sharing_info = item['sharing_info']
140 | 
141 |         cp_item = copy.deepcopy(item)
142 | 
143 |         if len(sharing_info) != 0:
144 |             if isinstance(sharing_info, list):
145 |                 re_sharing_info = []
146 |                 for ele in sharing_info:
147 |                     image_desc = ele['image_description']
148 |                     image_uuid = generate_unique_uuid()
149 |                     cp_ele = copy.deepcopy(ele)
150 |                     cp_ele['image_uuid'] = image_uuid
151 |                     re_sharing_info.append(cp_ele)
152 |                     annotated_dialogue_image.append({
153 |                         'image_uuid': image_uuid,
154 |                         'image_description': image_desc
155 |                     })
156 | 
157 |                 cp_item['sharing_info'] = re_sharing_info
158 |                 
159 |             elif isinstance(sharing_info, dict):
160 |                 
161 |                 image_desc = sharing_info['image_description']
162 |                 cp_sharing_info = copy.deepcopy(sharing_info)
163 | 
164 |                 if image_desc != '':
165 |                     image_uuid = generate_unique_uuid()
166 |                     cp_sharing_info['image_uuid'] = image_uuid
167 |                     annotated_dialogue_image.append({
168 |                         'image_uuid': image_uuid,
169 |                         'image_description': image_desc
170 |                     })
171 |                     cp_item['sharing_info'] = cp_sharing_info
172 | 
173 |             redialogue.append(cp_item)
174 |         else:
175 |             redialogue.append(cp_item)
176 | 
177 |     return redialogue, annotated_dialogue_image
178 | 
179 | def process_dataset(dataset):
180 | 
181 |     final_results = []
182 |     final_image_results = []
183 |     for idx, instance in enumerate(tqdm(dataset, total=len(dataset))):
184 |         name = instance['name']
185 |         gender = instance['gender']
186 |         age = instance['age']
187 |         face_image_path = instance['face_image_path']
188 |         face_image_desc = instance['face_description']
189 | 
190 |         episode = instance['episode']
191 |         episode_len = len(episode)
192 | 
193 |         new_episode = []
194 |         # first, collect all mobile device and annotate unique id
195 |         session_mobile_device = eval(episode[-1][f'session{episode_len}:last_added_mobile_device_image'])
196 |         annotated_mobile_device = []
197 |         annotated_dict = dict()
198 |         for item in session_mobile_device:
199 |             mobile_uuid = generate_unique_uuid()
200 |             annotated_mobile_device.append({
201 |                 'image_uuid': mobile_uuid,
202 |                 'image_id': item['image_id'],
203 |                 'image_description': item['image_description']
204 |             })
205 |             annotated_dict['{}:{}'.format(item['image_id'], item['image_description'])] = mobile_uuid
206 |             final_image_results.append({
207 |                 'image_uuid': mobile_uuid,
208 |                 'image_description': item['image_description'],
209 |                 'name': name,
210 |                 'gender': gender,
211 |                 'age': age,
212 |                 'face_image_path': face_image_path,
213 |                 'face_description': face_image_desc
214 |             })
215 | 
216 |         # second, collect all image from the dialogue
217 |         for session_idx, session in enumerate(episode):
218 |             
219 |             session_dialogue = session[f'session{session_idx+1}:dialogue']
220 |             session_mobile_device = eval(session[f'session{session_idx+1}:mobile_device'])
221 |             
222 |             re_session_dialogue, annotated_session_dialogue = process_dialogue(session_dialogue)
223 | 
224 |             for ele_anno_session in annotated_session_dialogue:
225 |                 final_image_results.append({
226 |                     'image_uuid': ele_anno_session['image_uuid'],
227 |                     'image_description': ele_anno_session['image_description'],
228 |                     'name': name,
229 |                     'gender': gender,
230 |                     'age': age,
231 |                     'face_image_path': face_image_path,
232 |                     'face_description': face_image_desc
233 |                 })
234 | 
235 |             re_session_mobile_device = []
236 |             for ele_s_m in session_mobile_device:
237 |                 re_session_mobile_device.append({
238 |                     'image_id': ele_s_m['image_id'],
239 |                     'image_uuid': annotated_dict['{}:{}'.format(ele_s_m['image_id'], ele_s_m['image_description'])],
240 |                     'image_description': ele_s_m['image_description']
241 |                 })
242 |             
243 |             cp_session = copy.deepcopy(session)
244 |             cp_session[f'session{session_idx}:dialogue'] = re_session_dialogue
245 |             cp_session[f'session{session_idx}:mobile_device'] = re_session_mobile_device
246 |             new_episode.append(cp_session)
247 |         
248 |         cp_instance = copy.deepcopy(instance)
249 |         cp_instance['episode'] = new_episode
250 |         final_results.append(cp_instance)
251 |     
252 |     print('# of final results:', len(final_results))
253 |     print('# of final image results:', len(final_image_results))
254 |     return final_results, final_image_results
255 | 
256 | def process_planner(dataset):
257 | 
258 |     re_dataset = []
259 |     for instance in tqdm(dataset, total=len(dataset)):
260 |         image_uuid = instance['image_uuid']
261 |         image_desc = instance['image_description']
262 | 
263 |         prompt = prompt_template.format(
264 |             name=instance['name'],
265 |             age=instance['age'],
266 |             gender=instance['gender'],
267 |             image_description=image_desc
268 |         )
269 |         cp_instance = copy.deepcopy(instance)
270 |         cp_instance['image_desc_prompt'] = prompt
271 |         re_dataset.append(cp_instance)
272 |     
273 |     return re_dataset
274 | 
275 | def parse_mobile_device_using_regex(generation):
276 |     # Define the regex pattern
277 |     pattern = r"(?P<module>.*)\nModified Image Description:\s*(?P<description>.*)"
278 | 
279 |     # Use re.search to find the match
280 |     match = re.search(pattern, generation, re.DOTALL)
281 | 
282 |     if match:
283 |         module = match.group("module")
284 |         description = match.group("description")
285 |         return {
286 |             'module': module,
287 |             'description': description
288 |         }
289 |     else:
290 |         
291 |         if 'A photo of a woman [img] enjoying a serene landscape with a colorful sunset pinned on a Pinterest board dedicated to nature photography' in generation:
292 |             return {
293 |                 'module': 'Personalized Text-to-Image Generator',
294 |                 'description': 'A photo of a woman [img] enjoying a serene landscape with a colorful sunset pinned on a Pinterest board dedicated to nature photography'
295 |             }
296 |         print(generation, match)
297 |         raise ValueError("Wrong")
298 | 
299 | 
300 | def process_img_tags(text):
301 |     parts = text.split("[img]")
302 |     processed_text = '[img]'.join(parts[:2])
303 |     return processed_text + ' '.join([ele.strip() for ele in parts[2:]])
304 | 
305 | def parse_and_filter(generations):
306 |     results = []
307 |     for generation in tqdm(generations, total=len(generations)):
308 |         parsed_generation = generation['image_desc_generation']
309 | 
310 |         parsed_generation = parsed_generation.replace('Module:', '').strip()
311 |         
312 |         if 'Personalized Text-to-Image Generator' in parsed_generation:
313 |             parsed_result = parse_mobile_device_using_regex(parsed_generation)
314 |             for module_name in ['Personalized Text-to-Image Generator', 'Image Database Retrieval', 'Web Search']:
315 |                 if module_name in parsed_result['module']:
316 |                     module = module_name
317 |                     break
318 |             module = IMAGE_MODULE_MAPPER[module]
319 |             image_desc = parsed_result['description']
320 |             if image_desc.count('[img]') > 1:
321 |                 
322 |                 modified_desc = process_img_tags(image_desc)
323 | 
324 |             elif image_desc.count('[img]') == 0:
325 |                 modified_desc = image_desc + ' [img]'
326 |             elif image_desc.count('[img]') == 1:
327 |                 modified_desc = image_desc
328 | 
329 |             modified_desc = modified_desc.replace('[img]', 'img')
330 |                 
331 |         else:
332 |             if 'Modified Image Description' in parsed_generation:
333 |                 parsed_generation = parsed_generation.split('Modified Image Description: ')[0].strip()
334 |             
335 |             for module_name in ['Personalized Text-to-Image Generator', 'Image Database Retrieval', 'Web Search']:
336 |                 if module_name in parsed_generation:
337 |                     if module_name == 'Personalized Text-to-Image Generator':
338 |                         assert False, f'{parsed_generation}'
339 | 
340 |                     parsed_generation = module_name
341 |                     break
342 |             
343 |             try:
344 |                 module = IMAGE_MODULE_MAPPER[parsed_generation]
345 |                 modified_desc = ''
346 |             except KeyError:
347 |                 module = 't2i'
348 |                 modified_desc = ''
349 |         
350 |         cp_generation = copy.deepcopy(generation)
351 |         cp_generation['image_alignment_module'] = module
352 |         cp_generation['modified_image_description'] = modified_desc
353 |         results.append(cp_generation)
354 |     
355 |     return results
356 | 
357 | if __name__ == '__main__':
358 |     parser = argparse.ArgumentParser()
359 |     parser.add_argument('--start-idx', type=int)
360 |     parser.add_argument('--end-idx', type=int)
361 |     parser.add_argument('--do-planner', action='store_true')
362 |     args = parser.parse_args()
363 | 
364 |     if args.do_planner:
365 |         for persona_seed_num in range(args.start_idx, args.end_idx):
366 | 
367 |             stark = load_json(f'curated_stark/planner-image-only/stark_{persona_seed_num}.json')
368 | 
369 |             processed_stark = process_planner(stark)
370 |             generations = interact(processed_stark, prompt_prefix='image_desc')
371 |             save_dir = f'curated_stark/planner-openai'
372 |             os.makedirs(save_dir, exist_ok=True)
373 |             dump_json_output(generations, os.path.join(save_dir, f'stark_{persona_seed_num}.json'))
374 | 
375 | 
376 |             generations = load_json(f'curated_stark/planner-openai/stark_{persona_seed_num}.json')
377 |             parsed_generations = parse_and_filter(generations)
378 |             save_dir = f'curated_stark/planner-parsed-openai'
379 |             os.makedirs(save_dir, exist_ok=True)
380 |             dump_json_output(parsed_generations, os.path.join(save_dir, f'stark_{persona_seed_num}.json'))    
381 |     
382 |     else:
383 |         for persona_seed_num in range(args.start_idx, args.end_idx):
384 |             stark = load_json(f'curated_stark/human-face/stark_{persona_seed_num}.json')
385 | 
386 |             curated_stark, stark_images = process_dataset(stark)
387 | 
388 |             save_dir = f'curated_stark/planner-image-only'
389 |             os.makedirs(save_dir, exist_ok=True)
390 |             dump_json_output(stark_images, os.path.join(save_dir, f'stark_{persona_seed_num}.json'))
391 | 
392 |             curated_save_dir = f'curated_stark/planner-dialogue'
393 |             os.makedirs(curated_save_dir, exist_ok=True)
394 |             dump_json_output(curated_stark, os.path.join(curated_save_dir, f'stark_{persona_seed_num}.json'))


--------------------------------------------------------------------------------
/runner/persona_runner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import re
  4 | import random
  5 | from collections import defaultdict
  6 | from tqdm import tqdm
  7 | import concurrent.futures
  8 | 
  9 | import torch
 10 | from transformers import AutoModelForSequenceClassification, AutoTokenizer
 11 | 
 12 | from .base_runner import BaseRunner, console
 13 | from utils.persona_utils import (
 14 |     PERSONA_CATEGORY_MAP,
 15 |     PERSONA_CATEGORY_LIST,
 16 |     AGE_LIST,
 17 |     GENDER_LIST,
 18 |     COUNTRY_LIST,
 19 |     COUNTRY_ALPHA_LIST,
 20 |     COUNTRY_NAME2ALPHA,
 21 |     ETHNICITY_LIST,
 22 |     RELIGION_LIST,
 23 |     EDUCATION_LIST,
 24 |     EXCLUDE_COMMONSENSE_TARGET
 25 | )
 26 | from utils.etc_utils import load_json, load_jsonl
 27 | 
 28 | 
 29 | NUM_TRIALS = 20
 30 | THRESHOLD = 0.8
 31 | 
 32 | DEMOGRAPHIC_TRIAL = 10
 33 | 
 34 | SYSTEM_MESSAGE = """Based on the given persona category, entity key, and user's profile information (i.e., age, gender, nationality), your job is to generate 30 persona sentences and corresponding persona entity values in the format "<persona sentence> (<entity key>: <entity value>)." You should generate very specific persona sentences and entity values. The persona sentence can express a positive sentiment (like) or a negative one (dislike). 
 35 | 
 36 | For example, 
 37 | 
 38 | {example}"""
 39 | 
 40 | 
 41 | class LiteDataset(torch.utils.data.Dataset):
 42 |     def __init__(self, dataset, tokenizer):
 43 |         self.dataset = dataset
 44 |         self.tokenizer = tokenizer
 45 | 
 46 |     def __len__(self):
 47 |         return len(self.dataset)
 48 | 
 49 |     def __getitem__(self, idx):
 50 |         item = self.dataset[idx]
 51 | 
 52 |         premise = item['persona-attr:sent']
 53 |         hypothesis = 'This sentence is about {}.'.format(item['persona-attr:key'].lower())
 54 | 
 55 |         tokenized_ids = self.tokenizer.encode(
 56 |             premise, 
 57 |             hypothesis, 
 58 |             return_tensors='pt',
 59 |             padding="max_length",
 60 |             truncation=True,
 61 |             max_length=128,
 62 |         )
 63 |         
 64 |         cp_item = copy.deepcopy(item)
 65 |         cp_item['input_tensor'] = tokenized_ids[0]
 66 | 
 67 |         return cp_item
 68 | 
 69 |     def collate(self, batch):
 70 |         keys = set([key for b in batch for key in b.keys()])
 71 | 
 72 |         dict_batch = defaultdict(list)
 73 |         for key in keys:
 74 |             for b in batch:
 75 |                 if key != 'input_tensor':
 76 |                     dict_batch[key].append(b[key])
 77 | 
 78 |         dict_batch['input_tensor'] = torch.stack([b['input_tensor'] for b in batch], dim=0)
 79 | 
 80 |         return dict_batch
 81 | 
 82 | class PersonaRunner(BaseRunner):
 83 |     def __init__(self, args):
 84 |         super().__init__(args)
 85 | 
 86 |         #self.persona_attr_prompt = 'Given the persona category and entity key, you should generate 20 persona sentences and persona entity values in the format "<persona sentence> (<entity key>: <entity value>)." You should generate very specific persona sentences and entity values. The persona sentence can express a positive sentiment (like) or a negative one (dislike).\n\nFor example,\n\n{few_shot_example}\n\nPersona Category: {target_persona_category}\nPersona Entity Key: {target_persona_entity}\nPersona Sentences:\n1.'
 87 |         self.save_dir = os.path.join(self.output_base_dir, self.prompt_prefix)
 88 |         os.makedirs(self.save_dir, exist_ok=True)
 89 |         self.last_save_chunk_idx_file = os.path.join(self.save_dir, 'last_save_chunk_idx.txt')
 90 | 
 91 |         self.template = self.load_prompt_template("persona")
 92 |         self._load_persona_attribute()
 93 | 
 94 |     @property
 95 |     def system_msg(self):
 96 |         return "You are a helpful assistant."
 97 | 
 98 |     @property
 99 |     def prompt_prefix(self):
100 |         return "persona-attr"
101 | 
102 |     def _load_persona_attribute(self):
103 |         
104 |         results = defaultdict(list)
105 |         for filename, instance in PERSONA_CATEGORY_MAP.items():
106 | 
107 |             _persona_attr_set = load_json(f'./datasets/ProfileGen/{filename}.json')
108 |             
109 |             # process the loaded persona attribute sets
110 |             # because certain cases contain irrelevant <attr> information compared to the map
111 |             # for example, entity key should be "food", but the loaded attr is "preference".
112 |             _persona_entity_key = instance[1]
113 |             if _persona_entity_key == 'movie_title':
114 |                 _persona_entity_key = 'movie title'
115 |             _persona_attr_set = self._remove_irrelevant_persona_attr_sample(_persona_attr_set, _persona_entity_key)
116 | 
117 |             results['{}+++{}'.format(instance[0], _persona_entity_key)].extend(_persona_attr_set)
118 | 
119 |         self.persona_attribute_set = results
120 |         self.persona_attribute_key = list(results.keys())
121 | 
122 |     def _create_demographic_sentence(self, do_sep_sample=False):
123 |         age_range = random.sample(AGE_LIST, 1)[0]
124 |         age = random.randint(int(age_range.split('-')[0]), int(age_range.split('-')[1]))
125 |         gender = random.sample(GENDER_LIST, 1)[0]
126 |         #country = random.sample(COUNTRY_LIST, 1)[0]
127 |         birthplace = random.sample(COUNTRY_LIST, 1)[0]
128 |         
129 |         if do_sep_sample:
130 |             country = random.sample(COUNTRY_LIST, 2)
131 |             birthplace = country[0]
132 |             residence = country[1]
133 |             #residence = random.sample(COUNTRY_LIST, 1)[0]
134 |         else:
135 |             residence = birthplace
136 |         #ethnicity = random.sample(ETHNICITY_LIST, 1)[0]
137 |         #education = random.sample(EDUCATION_LIST, 1)[0]
138 |         return {
139 |             'age': age,
140 |             'gender': gender,
141 |             #'nationality': country
142 |             'residence': residence,
143 |             'birthplace': birthplace
144 |         }
145 | 
146 |     def get_system_message(self, example):
147 |         return SYSTEM_MESSAGE.format(example=example)
148 | 
149 |     def prepare_prompt(self):
150 |         prompts = []
151 |         for instance in PERSONA_CATEGORY_LIST:
152 |             target_persona_category = instance[0]
153 |             target_persona_entity_key = instance[1]
154 |             if target_persona_category in EXCLUDE_COMMONSENSE_TARGET:
155 |                 continue
156 |             #if target_persona_category != 'Preference ⊃ Drink':
157 |             #    continue
158 |             
159 |             # we first collect all inputs with few-shot demonstrations
160 |             # each target category contains 5 times few-shot demonstrations
161 |             # thus, we will obtain 100 (20*5) generations for each category.
162 | 
163 |             for idx in range(DEMOGRAPHIC_TRIAL):
164 |                 if idx > 0.7 * DEMOGRAPHIC_TRIAL:
165 |                     _demographic_info = self._create_demographic_sentence(True)
166 |                 else:
167 |                     _demographic_info = self._create_demographic_sentence()
168 | 
169 |                 system_prompts = self._collect_persona_attribute(target_persona_category=target_persona_category, target_persona_entity_key=target_persona_entity_key)
170 | 
171 |                 age = _demographic_info['age']
172 |                 gender = _demographic_info['gender']
173 |                 #nationality = _demographic_info['nationality']
174 |                 birthplace = _demographic_info['birthplace']
175 |                 residence = _demographic_info['residence']
176 | 
177 |                 _prompt = self.template.format(
178 |                     age=age, 
179 |                     gender=gender, 
180 |                     #nationality=nationality,
181 |                     birthplace=birthplace,
182 |                     residence=residence,
183 |                     target_persona_category=target_persona_category,
184 |                     target_persona_entity=target_persona_entity_key
185 |                 )
186 |                 
187 |                 for system_prompt in system_prompts:
188 |                     prompts.append({
189 |                         'persona_category': target_persona_category,
190 |                         'persona_entity_key': target_persona_entity_key,
191 |                         f'{self.prompt_prefix}_system_message': self.get_system_message(system_prompt),
192 |                         f'{self.prompt_prefix}_prompt': _prompt,
193 |                         'age': age,
194 |                         'gender': gender,
195 |                         'birthplace': birthplace,
196 |                         'residence': residence,
197 |                         'birthplace_alpha2_code': COUNTRY_NAME2ALPHA[birthplace],
198 |                         'residence_alpha2_code': COUNTRY_NAME2ALPHA[residence],
199 |                         #'nationality': nationality
200 |                     })
201 | 
202 |         console.log('[{}] # of target persona categories: {}'.format(self.__class__.__name__, len(PERSONA_CATEGORY_LIST)), style='info')
203 |         console.log('[{}] List of target persona category: {}'.format(self.__class__.__name__, PERSONA_CATEGORY_LIST), style='info')
204 |         
205 |         return prompts
206 | 
207 |     def prepare_prompt_for_demo(self, kwargs):
208 |         prompts = []
209 |         
210 |         target_persona_category = kwargs['target_persona_category']
211 |         target_persona_entity_key = kwargs['target_persona_entity']
212 |         
213 |         system_prompts = self._collect_persona_attribute(target_persona_category=target_persona_category, target_persona_entity_key=target_persona_entity_key)
214 |     #prompts[target_persona_category] = _input_prompt #.extend(_input_prompt)
215 | 
216 |         age = kwargs['age']
217 |         gender = kwargs['gender']
218 |         birthplace = kwargs['birthplace']
219 |         residence = kwargs['residence']
220 | 
221 |         _prompt = self.template.format(**kwargs)
222 | 
223 |         for system_prompt in system_prompts[:1]:
224 |             prompts.append({
225 |                 'persona_category': target_persona_category,
226 |                 'persona_entity_key': target_persona_entity_key,
227 |                 f'{self.prompt_prefix}_system_message': self.get_system_message(system_prompt),
228 |                 f'{self.prompt_prefix}_prompt': _prompt,
229 |                 'age': age,
230 |                 'gender': gender,
231 |                 'nationality': nationality
232 |             })
233 | 
234 |         
235 |         return prompts
236 |     
237 |     def _collect_persona_attribute(
238 |         self, 
239 |         target_persona_category: str = None,
240 |         target_persona_entity_key: str = None
241 |     ):
242 | 
243 |         input_prompts = []
244 |         for _ in range(NUM_TRIALS):
245 |             random.shuffle(self.persona_attribute_key)
246 |             few_shot_persona_attribute_key = self.persona_attribute_key[:1]
247 | 
248 |             few_shot_persona_attribute_prompt = self._construct_few_shot_persona_attribute_prompt(few_shot_persona_attribute_key)
249 | 
250 |             #persona_attribute_prompt = self.template.format(
251 |             #    few_shot_example=few_shot_persona_attribute_prompt,
252 |             #    target_persona_category=target_persona_category,
253 |             #    target_persona_entity=target_persona_entity_key
254 |             #)
255 |             
256 |             #input_prompts.append(persona_attribute_prompt)
257 |             input_prompts.append(few_shot_persona_attribute_prompt)
258 |         
259 |         return input_prompts
260 | 
261 |     def _construct_few_shot_persona_attribute_prompt(self, few_shot_key):
262 |         
263 |         few_shot_prompt = []
264 |         few_shot_prompt_template = 'Persona Category: {persona_category}\nPersona Entity Key: {persona_entity_key}\nPersona Sentences:\n{persona_sentences}' # {persona_sentence} ({persona_entity_key}: {persona_entity_value})'
265 |         for _key in few_shot_key:
266 |             _few_shot_instances = self.persona_attribute_set[_key]
267 |             random.shuffle(_few_shot_instances)
268 |             _few_shot_samples = _few_shot_instances[:30]
269 | 
270 |             # Double-check the consistency about the entity key
271 |             _few_shot_example_prompt = []
272 |             for i, _few_shot_sample in enumerate(_few_shot_samples):
273 |                 assert _key.split('+++')[1].title() == _few_shot_sample['attr'].title()
274 | 
275 |                 _few_shot_example_prompt.append(
276 |                     '{}. {} ({}: {})'.format(i+1, _few_shot_sample['sentence'], _key.split('+++')[1].title(), _few_shot_sample['value'].title()))
277 | 
278 |             _prompt = few_shot_prompt_template.format(
279 |                 persona_category=_key.split('+++')[0],
280 |                 persona_entity_key=_key.split('+++')[1].title(),
281 |                 persona_sentences='\n'.join(_few_shot_example_prompt)
282 |                 #persona_sentence=_few_shot_sample['sentence'],
283 |                 #persona_entity_value=_few_shot_sample['value']
284 |             )
285 |             
286 |             few_shot_prompt.append(_prompt)
287 |         
288 |         return '\n\n'.join(few_shot_prompt)
289 | 
290 |     def parse_and_filter(self, generations):
291 | 
292 |         stat = defaultdict(int)
293 |         stat['total_num'] = len(generations) * 30
294 | 
295 |         regex_parsed_results, regex_discard_results = [], []
296 |         for generation in tqdm(generations, total=len(generations)):
297 |             
298 |             parsed_results, discard_results = self._parse_persona_attribute_generation(generation[f'{self.prompt_prefix}_generation'])
299 | 
300 |             for parsed_result in parsed_results:
301 |                 cp_generation = copy.deepcopy(generation)
302 |                 uuid = self.generate_unique_uuid()
303 |                 cp_generation['id'] = uuid
304 |                 for k, v in parsed_result.items():
305 |                     cp_generation[f'{self.prompt_prefix}:{k}'] = parsed_result[k]
306 |                 regex_parsed_results.append(cp_generation)
307 |             
308 |             for discard_result in discard_results:
309 |                 cp_generation = copy.deepcopy(generation)
310 |                 cp_generation['regex:discard_result'] = discard_result
311 |                 regex_discard_results.append(cp_generation)
312 |         
313 |         stat['regex:parsed_result'] = len(regex_parsed_results)
314 |         stat['regex:discard_result'] = len(regex_discard_results)
315 | 
316 |         self.dump_output(regex_parsed_results, os.path.join(self.save_dir, 'regex_parsed_output.jsonl'))
317 |         self.dump_output(regex_discard_results, os.path.join(self.save_dir, 'regex_discard_output.jsonl'))
318 | 
319 |         exact_parsed_results, exact_discard_results = [], []
320 |         for regex_parsed_result in tqdm(regex_parsed_results, total=len(regex_parsed_results)):
321 |             
322 |             target_entity_key = regex_parsed_result['persona_entity_key']
323 |             parsed_sent = regex_parsed_result[f'{self.prompt_prefix}:sent']
324 |             parsed_key = regex_parsed_result[f'{self.prompt_prefix}:key']
325 |             parsed_value = regex_parsed_result[f'{self.prompt_prefix}:value']
326 | 
327 |             if parsed_key.lower() == target_entity_key and parsed_value.lower() in parsed_sent.lower():
328 |                 exact_parsed_results.append(regex_parsed_result)
329 |             else:
330 |                 exact_discard_results.append(regex_parsed_result)
331 |         
332 |         stat['exact:parsed_result'] = len(exact_parsed_results)
333 |         stat['exact:discard_result'] = len(exact_discard_results)
334 | 
335 |         self.dump_output(exact_parsed_results, os.path.join(self.save_dir, 'exact_parsed_output.jsonl'))
336 |         self.dump_output(exact_discard_results, os.path.join(self.save_dir, 'exact_discard_output.jsonl'))
337 | 
338 |         unique_results = []
339 |         seen = set()
340 |         for exact_parsed_result in tqdm(exact_parsed_results, total=len(exact_parsed_results)):
341 |             key = exact_parsed_result[f'{self.prompt_prefix}:sent']
342 |             
343 |             if key not in seen:
344 |                 seen.add(key)
345 |                 unique_results.append(exact_parsed_result)
346 | 
347 |         stat['duplication:unique_result'] = len(unique_results)
348 | 
349 |         self.dump_output(unique_results, os.path.join(self.save_dir, 'unique_output.jsonl'))
350 | 
351 |         unique_results = load_jsonl(os.path.join(self.save_dir, 'unique_output.jsonl'))
352 |         
353 |         # make seed dataset
354 |         random.shuffle(unique_results)
355 |         
356 |         buffer_count = 0
357 |         buffer_index = 0
358 |         buffer_list = []
359 | 
360 |         for unique_result in tqdm(unique_results, total=len(unique_results)):
361 |             buffer_list.append(unique_result)
362 | 
363 |             buffer_count += 1
364 |             if buffer_count == self.buffer_size:
365 |                 self.dump_output(buffer_list, os.path.join(self.save_dir, f'final_output_{buffer_index}.jsonl'))
366 |                 buffer_list = []
367 |                 buffer_count = 0
368 |                 buffer_index += 1
369 | 
370 |         self.dump_output(buffer_list, os.path.join(self.save_dir, f'final_output_{buffer_index}.jsonl'))
371 |         self.dump_report(stat, os.path.join(self.save_dir, 'report_output.txt'))
372 | 
373 |     def _remove_irrelevant_persona_attr_sample(self, persona_attr_set, persona_entity_key):
374 |         result = []
375 |         for item in persona_attr_set:
376 |             if item['attr'].lower() != persona_entity_key.lower():
377 |                 continue
378 |             result.append(item)
379 |         return result
380 | 
381 |     def _parse_persona_attribute_generation(self, generation):
382 |         """
383 |         We extract the useful persona-related information from the structured format (i.e., <sent> (<key>: <value>))
384 |         using the regex pattern. Any sentence which doesn't match with the regex pattern, we regard the sentence as an inappropriate generated sentence.
385 |         """
386 |         
387 |         # First, split the generation based on the number prefix (e.g., 1., 2.)
388 |         delims = [f'\n{i}. ' for i in range(1, 31)] + [f'\n{i}.' for i in range(1, 31)]
389 | 
390 |         splitted_generation = re.split('|'.join(delims), generation)
391 |         #if len(splitted_generation) != 30:
392 |         #    splitted_generation = splitted_generation[:30]
393 |         #assert len(splitted_generation) == 20, '{}'.format(generation)
394 | 
395 |         # Second, extract the persona-related information using the regex pattern
396 |         pattern = '(?P<sent>.*) [\(|\[](?P<key>.*): (?P<value>.*)[\)|\]]' # [] case should be possible
397 |         compiled_regex = re.compile(pattern)
398 | 
399 |         parsed_results = []
400 |         discard = []
401 |         for generation in splitted_generation:
402 |             matched = compiled_regex.match(generation)
403 | 
404 |             if matched:
405 |                 parsed_results.append(matched.groupdict())
406 |             else:
407 |                 discard.append(generation)
408 | 
409 |         return parsed_results, discard
410 | 
411 |     def _generate_single_persona_attribute(self, persona_attribute_prompt):
412 | 
413 |         llm_generation = self.interact(persona_attribute_prompt)
414 |         
415 |         return llm_generation
416 | 
417 |     def _generate_persona_attribute(self, prompts, prompt_prefix=None):
418 |         return self.interact(prompts, prompt_prefix)
419 | 


--------------------------------------------------------------------------------