├── data ├── split │ ├── gpt2-output │ │ └── README.md │ ├── open-gpt-text │ │ └── README.md │ ├── open-llama-text │ │ └── README.md │ ├── open-palm-text │ │ └── README.md │ └── open-web-text │ │ └── README.md ├── checkpoint │ └── README.md ├── baselines │ ├── zerogpt_classifier_output │ │ └── README.md │ └── openai_classifier_output │ │ └── README.md └── download.py ├── memoizer ├── __init__.py └── memoizer.py ├── pipeline ├── utils │ ├── __init__.py │ └── reduce_funcs.py ├── __init__.py ├── component │ ├── __init__.py │ ├── data_types.py │ ├── web_component.py │ ├── io_component.py │ ├── misc_component.py │ └── text_component.py ├── lib │ ├── report_entry_count.py │ ├── sanitize_dataset.py │ ├── build_abalation.py │ ├── import_gpt2_original.py │ ├── import_zerogpt_result.py │ └── import_openai_result.py ├── pipeline_base.py └── pipeline_executor.py ├── evaluator ├── toolkit │ ├── __init__.py │ ├── curves.py │ ├── loader.py │ └── statistics.py ├── models │ ├── t5_hidden │ │ ├── t5_get_hidden_states.py │ │ └── t5_pipeline.py │ └── t5_sentinel │ │ ├── t5_get_hidden_states.py │ │ └── t5_pipeline.py ├── plot │ ├── plot_det.py │ ├── plot_pca.py │ ├── plot_tsne.py │ ├── plot_confusion_mat.py │ └── plot_roc.py ├── data_analysis │ ├── plot_adjative_distribution.py │ ├── plot_token_distribution.py │ ├── plot_length_distribution.py │ └── plot_character_distribution.py └── interpret │ ├── sample_pca.py │ └── integrated_gradient.py ├── result └── data │ ├── dataset_adj_count.pdf │ ├── dataset_punc_count.pdf │ ├── dataset_length_token.pdf │ ├── dataset_token_count.pdf │ └── dataset_length_token_cut.pdf ├── cache └── readme.md ├── detector ├── t5_hidden │ ├── __init__.py │ ├── settings.yaml │ ├── dataset.py │ ├── utilities.py │ ├── types.py │ ├── __main__.py │ └── model.py ├── t5_sentinel │ ├── __init__.py │ ├── settings.yaml │ ├── settings_0613_full.yaml │ ├── dataset.py │ ├── utilities.py │ ├── types.py │ ├── __main__.py │ └── model.py ├── zerogpt_classifier │ ├── zerogpt_classifier_client.yaml │ └── zerogpt_classifier_client.py ├── openai_classifier │ ├── openai_classifier_client.yaml │ └── openai_classifier_client.py └── solaiman_classifier │ ├── solaiman_requirements.txt │ └── evaluate.ipynb ├── requirements.txt ├── .gitignore ├── generator ├── gpt4 │ ├── gpt4_client.yaml │ └── gpt4_client.py ├── palm │ ├── palm_client.yaml │ ├── google_cloud_init.py │ └── palm_pipe.py ├── chatgpt │ ├── chatgpt_client.yaml │ └── chatgpt_client.py └── bard │ ├── generator-bard.yaml │ └── client.py ├── LICENSE └── README.md /data/split/gpt2-output/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/split/open-gpt-text/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/split/open-llama-text/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/split/open-palm-text/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/split/open-web-text/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /memoizer/__init__.py: -------------------------------------------------------------------------------- 1 | from memoizer.memoizer import memoize 2 | -------------------------------------------------------------------------------- /pipeline/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .reduce_funcs import * 2 | -------------------------------------------------------------------------------- /data/checkpoint/README.md: -------------------------------------------------------------------------------- 1 | Where the checkpoints are stored. 2 | -------------------------------------------------------------------------------- /evaluator/toolkit/__init__.py: -------------------------------------------------------------------------------- 1 | from .statistics import * 2 | from .curves import * 3 | from .loader import * 4 | -------------------------------------------------------------------------------- /pipeline/utils/reduce_funcs.py: -------------------------------------------------------------------------------- 1 | 2 | def reduce_list(l1: list, l2: list) -> list: 3 | l1.extend(l2) 4 | return l1 5 | -------------------------------------------------------------------------------- /pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | from . import component as P 2 | from .pipeline_executor import PipelineExecutor 3 | from . import utils 4 | 5 | -------------------------------------------------------------------------------- /result/data/dataset_adj_count.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarkChenYutian/T5-Sentinel-public/HEAD/result/data/dataset_adj_count.pdf -------------------------------------------------------------------------------- /result/data/dataset_punc_count.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarkChenYutian/T5-Sentinel-public/HEAD/result/data/dataset_punc_count.pdf -------------------------------------------------------------------------------- /result/data/dataset_length_token.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarkChenYutian/T5-Sentinel-public/HEAD/result/data/dataset_length_token.pdf -------------------------------------------------------------------------------- /result/data/dataset_token_count.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarkChenYutian/T5-Sentinel-public/HEAD/result/data/dataset_token_count.pdf -------------------------------------------------------------------------------- /result/data/dataset_length_token_cut.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarkChenYutian/T5-Sentinel-public/HEAD/result/data/dataset_length_token_cut.pdf -------------------------------------------------------------------------------- /cache/readme.md: -------------------------------------------------------------------------------- 1 | # Cache Directory 2 | 3 | This files contains intermediate calculation results from other files / function calls s.t. they can be memoized and accelerate the calculation. -------------------------------------------------------------------------------- /pipeline/component/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc_component import * 2 | from .text_component import * 3 | from .web_component import * 4 | from .io_component import * 5 | 6 | from .data_types import * -------------------------------------------------------------------------------- /pipeline/component/data_types.py: -------------------------------------------------------------------------------- 1 | import typing as Tp 2 | import numpy as np 3 | 4 | class ArrayEntry(Tp.TypedDict): 5 | uid: str 6 | data: np.array 7 | extra: Tp.Optional[dict] 8 | -------------------------------------------------------------------------------- /detector/t5_hidden/__init__.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from detector.t5_hidden.types import Config 3 | 4 | 5 | with open("detector/t5_hidden/settings.yaml", "r") as f: 6 | config = Config(**yaml.safe_load(f)) 7 | -------------------------------------------------------------------------------- /detector/t5_sentinel/__init__.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from detector.t5_sentinel.types import Config 3 | 4 | 5 | with open("detector/t5_sentinel/settings.yaml", "r") as f: 6 | config = Config(**yaml.safe_load(f)) 7 | -------------------------------------------------------------------------------- /data/baselines/zerogpt_classifier_output/README.md: -------------------------------------------------------------------------------- 1 | # ZeroGPT Classifier Output 2 | 3 | This folder collects the classification result of ZeroGPT text classifier (GPT detector) 4 | 5 | https://www.zerogpt.com/ 6 | 7 | The results are collected automatically by async web client in `./src/baseline/zerogpt_client.py` 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.9.1 2 | bardapi==0.1.38 3 | click==8.1.7 4 | matplotlib==3.7.4 5 | nltk==3.8.1 6 | numpy==1.24.4 7 | openai==1.6.1 8 | protobuf==4.25.1 9 | pydantic==2.5.3 10 | PyYAML==6.0.1 11 | Requests==2.31.0 12 | scikit_learn==1.3.2 13 | seaborn==0.13.1 14 | torch==2.1.2+cu121 15 | tqdm==4.66.1 16 | transformers==4.36.2 17 | Unidecode==1.3.7 18 | vertexai==0.0.1 19 | wandb==0.16.1 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE Settings 2 | /.idea/ 3 | /.vscode/ 4 | 5 | # Cached Files 6 | *.DS_Store 7 | **/__pycache__/ 8 | **/wandb/ 9 | **/storage/ 10 | result/cache/*.json 11 | 12 | # Dataset 13 | *.jsonl 14 | 15 | # Weight Files 16 | *.pt 17 | *.pickle 18 | *.pkl 19 | 20 | # Sensitive Files 21 | **/secret.json 22 | **/client_state.json 23 | **/cookies.pkl 24 | **/secrets.yaml 25 | 26 | # Log Files 27 | *.log 28 | *.lock 29 | -------------------------------------------------------------------------------- /generator/gpt4/gpt4_client.yaml: -------------------------------------------------------------------------------- 1 | ClientName: "gpt4_client" 2 | ClientRoot: "./generator/gpt4/" 3 | 4 | MaxAsyncWorkerCnt: 20 5 | MaxRetryCnt: 3 6 | 7 | Config: 8 | MaxTokenCount: 10000000 9 | MaxLengthAllowed: 2000 10 | WaitTime: 60 # in seconds 11 | InputDirectory: "./data/split/open-web-text" 12 | OutputDirectory: "./data/split/open-gpt4-text" 13 | Sampling: 1 # No sampling 14 | InputSubsets: 15 | - "test-dirty" 16 | # We are using test-dirty since the text sent to model is not sanitized 17 | # for chatGPT 18 | -------------------------------------------------------------------------------- /generator/palm/palm_client.yaml: -------------------------------------------------------------------------------- 1 | ClientName: "palm_client" 2 | ClientRoot: "./generator/palm/" 3 | 4 | RateControl: 5 | entry_per_min: 45 6 | min_wait_time: 0.1 7 | 8 | Config: 9 | project: "llm-sentinel" 10 | ModelName: "text-bison@001" 11 | Temperature: 0.4 12 | # We will clip the text up to first 512 tokens, so we let PaLM generate a little bit more 13 | MaxDecodeSteps: 640 # Up to 1024, the number of tokens output 14 | top_p: 0.95 15 | top_k: 40 16 | retry: 3 17 | # setup vertex AI 18 | CredentialPath: "./generator/palm/secret.json" 19 | -------------------------------------------------------------------------------- /data/baselines/openai_classifier_output/README.md: -------------------------------------------------------------------------------- 1 | # OpenAI Classifier Output 2 | 3 | This folder collects the classification result of OpenAI text classifier (GPT detector) 4 | 5 | https://platform.openai.com/ai-text-classifier 6 | 7 | The results are collected automatically by async web client in `./src/baseline/openai_client.py` 8 | 9 | * The file `gpt2-output-gpt-openai.jsonl` is the classification result of dataset `xl-1542M.test.jsonl` in `GPT2-output` dataset. 10 | 11 | * The file `gpt2-output-web-openai.jsonl` is the classification result of dataset `webtext.test.jsonl` in `GPT2-output` dataset. 12 | -------------------------------------------------------------------------------- /generator/chatgpt/chatgpt_client.yaml: -------------------------------------------------------------------------------- 1 | ClientName: "chatgpt_client" 2 | ClientRoot: "./generator/chatgpt/" 3 | 4 | MaxAsyncWorkerCnt: 20 5 | MaxRetryCnt: 3 6 | 7 | Config: 8 | MaxTokenCount: 50000000 9 | MaxLengthAllowed: 2000 10 | WaitTime: 60 # in seconds 11 | InputDirectory: "./data/original/open-web-text" 12 | OutputDirectory: "./data/original/open-gpt-text" 13 | Sampling: 0.02 14 | InputSubsets: 15 | - "urlsf_subset00" 16 | - "urlsf_subset01" 17 | - "urlsf_subset02" 18 | - "urlsf_subset03" 19 | - "urlsf_subset04" 20 | - "urlsf_subset05" 21 | - "urlsf_subset06" 22 | - "urlsf_subset09" 23 | -------------------------------------------------------------------------------- /pipeline/lib/report_entry_count.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | 4 | def report(source_path: str, file_name: str): 5 | with open(Path("data", "split", source_path, file_name), "r") as f: 6 | lines = f.read().strip().split("\n") 7 | print(f"From: {source_path}, \tSubset: {file_name}, \tCount: {len(lines)}") 8 | 9 | 10 | if __name__ == "__main__": 11 | sources = ["open-web-text", "open-gpt-text", "open-palm-text", "open-llama-text", "gpt2-output"] 12 | subsets = ["train-dirty.jsonl", "valid-dirty.jsonl", "test-dirty.jsonl"] 13 | 14 | for source in sources: 15 | for subset in subsets: 16 | report(source, subset) 17 | -------------------------------------------------------------------------------- /generator/bard/generator-bard.yaml: -------------------------------------------------------------------------------- 1 | # Amount of time to wait between requests. 2 | break-seconds: 5 3 | 4 | # Maximum number of requests to make before silence. 5 | requests-limit: 150 6 | 7 | # Minutes of silence before next request period. 8 | silence-minutes: 30 9 | 10 | # Seconds to wait for a response before timing out. 11 | timeout-seconds: 10 12 | 13 | # Path to the original dataset. 14 | import-path: data/split/open-web-text 15 | 16 | # Path to the file where generated text will be saved. 17 | export-path: data/split/open-bard-text 18 | 19 | # List of subsets from the original dataset to be used. 20 | required-files: 21 | - test.jsonl 22 | 23 | # Prompt to be used for generating text. 24 | question-prompt: "Rephrase the following without any additional details (i.e. no summarized title, no internet access):" 25 | -------------------------------------------------------------------------------- /pipeline/lib/sanitize_dataset.py: -------------------------------------------------------------------------------- 1 | from pipeline import P, PipelineExecutor 2 | from pathlib import Path 3 | 4 | 5 | def sanitize(from_subset, to_subset): 6 | clean_pipeline = P.FromJsonStr() \ 7 | >> P.WriteExtra({"variant": "sanitized"}) \ 8 | >> P.StripNewline() \ 9 | >> P.CastUnicode() \ 10 | >> P.RemovePunc() \ 11 | >> P.ToLower() \ 12 | >> P.RemoveContSpace() \ 13 | >> P.ToJsonStr() 14 | 15 | executor = PipelineExecutor(worker_num=None) 16 | executor.parallel_file_mapping( 17 | clean_pipeline, 18 | from_files=[Path("./data/split/", subset) for subset in from_subset], 19 | to_files=[Path("./data/split/", subset) for subset in to_subset], 20 | verbose=True 21 | ) 22 | 23 | -------------------------------------------------------------------------------- /detector/t5_hidden/settings.yaml: -------------------------------------------------------------------------------- 1 | id: t5-small.0621.a 2 | 3 | mode: training 4 | 5 | epochs: 15 6 | 7 | backbone: 8 | name: t5-small 9 | model_max_length: 512 10 | 11 | dataset: 12 | - label: Human 13 | token: 14 | token_id: 32099 15 | root: data/split/open-web-text 16 | - label: ChatGPT 17 | token: 18 | token_id: 32098 19 | root: data/split/open-gpt-text 20 | - label: PaLM 21 | token: 22 | token_id: 32097 23 | root: data/split/open-palm-text 24 | - label: LLaMA 25 | token: 26 | token_id: 32096 27 | root: data/split/open-llama-text 28 | - label: GPT2 29 | token: 30 | token_id: 32095 31 | root: data/split/gpt2-output 32 | 33 | dataloader: 34 | batch_size: 32 35 | num_workers: 4 36 | 37 | tokenizer: 38 | padding: true 39 | truncation: true 40 | return_tensors: pt 41 | 42 | optimizer: 43 | lr: 1.0e-4 44 | weight_decay: 5.0e-5 45 | batch_size: 32 46 | -------------------------------------------------------------------------------- /detector/t5_sentinel/settings.yaml: -------------------------------------------------------------------------------- 1 | id: t5-small.0613.b 2 | 3 | mode: training 4 | 5 | epochs: 15 6 | 7 | backbone: 8 | name: t5-small 9 | model_max_length: 512 10 | 11 | dataset: 12 | - label: Human 13 | token: 14 | token_id: 32099 15 | root: data/split/open-web-text 16 | - label: ChatGPT 17 | token: 18 | token_id: 32098 19 | root: data/split/open-gpt-text 20 | - label: PaLM 21 | token: 22 | token_id: 32097 23 | root: data/split/open-palm-text 24 | - label: LLaMA 25 | token: 26 | token_id: 32096 27 | root: data/split/open-llama-text 28 | - label: GPT2 29 | token: 30 | token_id: 32095 31 | root: data/split/gpt2-output 32 | 33 | dataloader: 34 | batch_size: 16 35 | num_workers: 4 36 | 37 | tokenizer: 38 | padding: true 39 | truncation: true 40 | return_tensors: pt 41 | 42 | optimizer: 43 | lr: 1.0e-4 44 | weight_decay: 5.0e-5 45 | batch_size: 128 46 | -------------------------------------------------------------------------------- /memoizer/memoizer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import typing as Tp 3 | from pathlib import Path 4 | 5 | I = Tp.TypeVar("I") 6 | 7 | def memoize(cache_path: Path, arg_eq): 8 | assert cache_path.parent.exists() 9 | 10 | def memoize_impl(func: Tp.Callable[[I], Tp.Any]): 11 | if not cache_path.exists(): 12 | torch.save(dict(), cache_path) 13 | 14 | def wrapper(*args: I): 15 | result_dict = torch.load(cache_path) 16 | for prev_args in result_dict: 17 | if arg_eq(args, prev_args): 18 | print(f"Reusing existing cache from {cache_path}") 19 | return result_dict[prev_args] 20 | 21 | print("Cache Miss / Eviction since argument does not match") 22 | result = func(*args) 23 | result_dict[args] = result 24 | torch.save(result_dict, cache_path) 25 | return result 26 | return wrapper 27 | return memoize_impl 28 | 29 | -------------------------------------------------------------------------------- /evaluator/toolkit/curves.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Sequence 3 | from pipeline import P 4 | 5 | from .statistics import quick_statistics_binary, fpr, tpr, fnr 6 | 7 | 8 | def get_roc_binary(predictions: Sequence[P.ArrayEntry], pos_label, steps=100) -> np.array: 9 | result = np.zeros((2, steps + 2)) 10 | for step in range(0, steps + 1): 11 | thresh = step / steps 12 | results = quick_statistics_binary(predictions, pos_label, thresh) 13 | result[0, step] = fpr(*results) 14 | result[1, step] = tpr(*results) 15 | return result 16 | 17 | 18 | def get_det_binary(predictions: Sequence[P.ArrayEntry], pos_label, steps=100) -> np.array: 19 | result = np.zeros((2, steps + 1)) 20 | for step in range(0, steps + 1): 21 | thresh = step / steps 22 | results = quick_statistics_binary(predictions, pos_label, thresh) 23 | result[0, step] = fpr(*results) 24 | result[1, step] = fnr(*results) 25 | return result 26 | -------------------------------------------------------------------------------- /detector/t5_sentinel/settings_0613_full.yaml: -------------------------------------------------------------------------------- 1 | id: t5-small.0613.t5_sentinel.b 2 | 3 | mode: training 4 | 5 | epochs: 15 6 | 7 | backbone: 8 | name: t5-small 9 | model_max_length: 512 10 | 11 | dataset: 12 | - label: Human 13 | token: 14 | token_id: 32099 15 | root: data/split/open-web-text 16 | - label: ChatGPT 17 | token: 18 | token_id: 32098 19 | root: data/split/open-gpt-text 20 | - label: PaLM 21 | token: 22 | token_id: 32097 23 | root: data/split/open-palm-text 24 | - label: LLaMA 25 | token: 26 | token_id: 32096 27 | root: data/split/open-llama-text 28 | - label: GPT2 29 | token: 30 | token_id: 32095 31 | root: data/split/gpt2-output 32 | 33 | dataloader: 34 | batch_size: 16 35 | num_workers: 8 36 | 37 | tokenizer: 38 | padding: true 39 | truncation: true 40 | return_tensors: pt 41 | 42 | optimizer: 43 | lr: 1.0e-4 44 | weight_decay: 5.0e-5 45 | batch_size: 128 46 | -------------------------------------------------------------------------------- /generator/palm/google_cloud_init.py: -------------------------------------------------------------------------------- 1 | # Please set up the Google cloud credentials to use VertexAI service 2 | # Link: https://cloud.google.com/vertex-ai/docs/tutorials/text-classification-automl 3 | 4 | import os 5 | from pathlib import Path 6 | from typing import TypedDict 7 | 8 | from google.cloud import aiplatform 9 | import vertexai 10 | 11 | 12 | class GCP_Config(TypedDict): 13 | project: str 14 | 15 | 16 | def setup_credential(cred_path: Path, config: GCP_Config): 17 | os.putenv("GOOGLE_APPLICATION_CREDENTIALS", str(cred_path)) 18 | vertexai.init(project=config["project"]) 19 | if not Path("./generator/palm/gcp_init.lock").exists(): 20 | aiplatform.init(project=config["project"]) 21 | with open("./generator/palm/gcp_init.lock", "w") as f: 22 | f.write("Existence of this file shows that the google cloud is already initialized.") 23 | 24 | 25 | if __name__ == "__main__": 26 | setup_credential(Path("./generator/palm/secret.json"), {"project": "llm-sentinel"}) 27 | -------------------------------------------------------------------------------- /pipeline/component/web_component.py: -------------------------------------------------------------------------------- 1 | import time 2 | import typing as Tp 3 | from ..pipeline_base import Pipeline 4 | 5 | 6 | I = Tp.TypeVar("I") 7 | 8 | 9 | class RateControl(Pipeline[I, I]): 10 | def __init__(self, entry_per_min=60, omit_none=True, min_wait_time=0.0): 11 | super().__init__() 12 | self.entry_per_min = entry_per_min 13 | self.min_wait_time = min_wait_time 14 | self.omit_none = omit_none 15 | self.controller = { 16 | "start_t": 0.0, 17 | "count": 0 18 | } 19 | 20 | def __call__(self, entry): 21 | if entry is None and self.omit_none: return None 22 | 23 | now = time.time() 24 | if now - self.controller["start_t"] >= 60: 25 | self.controller["start_t"] = now 26 | self.controller["count"] = 0 27 | 28 | if self.controller["count"] >= self.entry_per_min: 29 | time.sleep(60.0 - (now - self.controller["start_t"])) 30 | else: 31 | time.sleep(self.min_wait_time) 32 | 33 | self.controller["count"] += 1 34 | return entry 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Yutian Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pipeline/component/io_component.py: -------------------------------------------------------------------------------- 1 | import typing as Tp 2 | 3 | from pathlib import Path 4 | from ..component import Pipeline 5 | 6 | I = Tp.TypeVar("I") 7 | 8 | 9 | class Print(Pipeline[I, I]): 10 | def __init__(self, prefix: Tp.Optional[str]=None, omit_none=False): 11 | super().__init__() 12 | self.prefix = "" if prefix is None else prefix 13 | self.omit_none = omit_none 14 | 15 | def __call__(self, obj): 16 | if self.omit_none: 17 | if obj is not None: print(self.prefix, obj) 18 | else: print(self.prefix, obj) 19 | return obj 20 | 21 | 22 | class WriteTo(Pipeline[Tp.Optional[str], None]): 23 | def __init__(self, destination: Path, write_mode="a"): 24 | super().__init__() 25 | assert destination.parent.exists(), f"Writing to {destination} but parent does not exist" 26 | self.destination = destination 27 | self.write_mode = "a" 28 | 29 | def __call__(self, string): 30 | if string is None: return 31 | with open(self.destination, self.write_mode) as f: 32 | f.write(string + "\n") 33 | 34 | -------------------------------------------------------------------------------- /detector/zerogpt_classifier/zerogpt_classifier_client.yaml: -------------------------------------------------------------------------------- 1 | ClientName: "zerogpt_classifier" 2 | ClientRoot: "./detector/zerogpt_classifier" 3 | 4 | MaxAsyncWorkerCnt: 180 5 | MaxRetryCnt: 3 6 | 7 | Config: 8 | InputDirectory: 9 | # - "./data/split/open-gpt-text/test-dirty.jsonl" 10 | # - "./data/split/open-web-text/test-dirty.jsonl" 11 | # - "./data/split/open-llama-text/test-dirty.jsonl" 12 | # - "./data/split/open-palm-text/test-dirty.jsonl" 13 | # - "./data/split/gpt2-output/test-dirty.jsonl" 14 | - "./data/split/hc3-test/hc3-human.jsonl" 15 | - "./data/split/hc3-test/hc3-chatgpt.jsonl" 16 | 17 | OutputDirectory: 18 | # - "./data/baselines/zerogpt_classifier_output/open-gpt-text.jsonl" 19 | # - "./data/baselines/zerogpt_classifier_output/open-web-text.jsonl" 20 | # - "./data/baselines/zerogpt_classifier_output/open-llama-text.jsonl" 21 | # - "./data/baselines/zerogpt_classifier_output/open-palm-text.jsonl" 22 | # - "./data/baselines/zerogpt_classifier_output/gpt2-output.jsonl" 23 | - "./data/baselines/zerogpt_classifier_output/hc3-human.jsonl" 24 | - "./data/baselines/zerogpt_classifier_output/hc3-chatgpt.jsonl" 25 | 26 | WaitTime: 60 27 | URL: https://api.zerogpt.com/api/detect/detectText 28 | 29 | Header: 30 | Content-Type: application/json 31 | Origin: https://www.zerogpt.com 32 | Referer: https://www.zerogpt.com/ 33 | User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 34 | -------------------------------------------------------------------------------- /pipeline/lib/build_abalation.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from pipeline import P 3 | from tqdm import tqdm 4 | 5 | 6 | def build_clean_variants(root_path: Path): 7 | clean_1 = P.StripNewline() >> P.WriteExtra({"variant": "clean-x-newline"}) >> P.ToJsonStr() >> P.WriteTo(Path(root_path, "test.variant1.jsonl"), write_mode="a") 8 | clean_2 = P.CastUnicode() >> P.WriteExtra({"variant": "clean-x-unicode"}) >> P.ToJsonStr() >> P.WriteTo(Path(root_path, "test.variant2.jsonl"), write_mode="a") 9 | clean_3 = P.RemovePunc() >> P.WriteExtra({"variant": "clean-x-punct"}) >> P.ToJsonStr() >> P.WriteTo(Path(root_path, "test.variant3.jsonl"), write_mode="a") 10 | clean_4 = P.ToLower() >> P.WriteExtra({"variant": "clean-x-lower"}) >> P.ToJsonStr() >> P.WriteTo(Path(root_path, "test.variant4.jsonl"), write_mode="a") 11 | 12 | process_pipeline = P.FromJsonStr() >> P.Tee(clean_1) >> P.Tee(clean_2) >> P.Tee(clean_3) >> clean_4 13 | 14 | with open(Path(root_path, "test-dirty.jsonl"), "r") as f: 15 | lines = f.read().strip().splitlines() 16 | 17 | for line in tqdm(lines, desc=str(root_path)): process_pipeline(line) 18 | 19 | 20 | if __name__ == "__main__": 21 | build_clean_variants(Path("data", "split", "open-palm-text")) 22 | build_clean_variants(Path("data", "split", "open-web-text")) 23 | build_clean_variants(Path("data", "split", "open-gpt-text")) 24 | build_clean_variants(Path("data", "split", "gpt2-output")) 25 | build_clean_variants(Path("data", "split", "open-llama-text")) 26 | -------------------------------------------------------------------------------- /pipeline/lib/import_gpt2_original.py: -------------------------------------------------------------------------------- 1 | import json 2 | import typing as T 3 | from pipeline import P, PipelineExecutor 4 | from pathlib import Path 5 | 6 | 7 | class rawGPT2Loader(P.Pipeline[str, T.Optional[P.TextEntry]]): 8 | def __call__(self, x: str) -> P.TextEntry: 9 | entry = json.loads(x) 10 | result = {"uid": f"gpt2_{entry['id']}", "text": entry["text"], "extra": {"variant": "original", "source": "gpt2_xl"}} 11 | return result 12 | 13 | 14 | def reduce_list(l1: T.List[P.TextEntry], l2: T.List[P.TextEntry]): 15 | l1.extend(l2) 16 | return l1 17 | 18 | 19 | def sample_gpt2(subset_names): 20 | sample_pipeline = rawGPT2Loader() \ 21 | >> P.RandomFilter(block_factor=0.75) \ 22 | >> P.ToJsonStr() \ 23 | >> P.ToSingletonList(input_type=T.Optional[str]) 24 | 25 | executor = PipelineExecutor(worker_num=3) 26 | sampled = executor.parallel_mapreduce( 27 | sample_pipeline, 28 | from_files=[Path("./data/original/gpt2-output", subset) for subset in subset_names], 29 | identity=[], 30 | reduce_fn=reduce_list, 31 | verbose=True 32 | ) 33 | 34 | print(f"Sampled {len(sampled)} lines") 35 | with open(Path("data", "original", "gpt2-output", "sampled_gpt2.jsonl"), "w") as f: 36 | for line in sampled: f.write(line + "\n") 37 | 38 | 39 | if __name__ == "__main__": 40 | subsets = ["xl-1542M.test.jsonl", "xl-1542M.valid.jsonl", "xl-1542M.train.jsonl"] 41 | sample_gpt2(subsets) 42 | 43 | -------------------------------------------------------------------------------- /detector/openai_classifier/openai_classifier_client.yaml: -------------------------------------------------------------------------------- 1 | ClientName: "openai_classifier" 2 | ClientRoot: "./detector/openai_classifier/" 3 | 4 | MaxAsyncWorkerCnt: 120 5 | MaxRetryCnt: 3 6 | 7 | Config: 8 | InputDirectory: 9 | # - "./data/split/open-gpt-text/test-dirty.jsonl" 10 | # - "./data/split/open-web-text/test-dirty.jsonl" 11 | # - "./data/split/open-palm-text/test-dirty.jsonl" 12 | # - "./data/split/open-llama-text/test-dirty.jsonl" 13 | # - "./data/split/gpt2-output/test-dirty.jsonl" 14 | - "./data/split/hc3-test/hc3-human.jsonl" 15 | - "./data/split/hc3-test/hc3-chatgpt.jsonl" 16 | 17 | OutputDirectory: 18 | # - "./data/baselines/openai_classifier_output/open-gpt-text.jsonl" 19 | # - "./data/baselines/openai_classifier_output/open-web-text.jsonl" 20 | # - "./data/baselines/openai_classifier_output/open-palm-text.jsonl" 21 | # - "./data/baselines/openai_classifier_output/open-llama-text.jsonl" 22 | # - "./data/baselines/openai_classifier_output/gpt2-output.jsonl" 23 | - "./data/baselines/openai_classifier_output/hc3-human.jsonl" 24 | - "./data/baselines/openai_classifier_output/hc3-chatgpt.jsonl" 25 | 26 | WaitTime: 60 27 | URL: https://api.openai.com/v1/completions 28 | 29 | Header: 30 | Content-Type: application/json 31 | Referer: https://platform.openai.com/ 32 | Origin: https://platform.openai.com 33 | User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 34 | OpenAI-Organization: [in secret.json] 35 | Authorization: [in secret.json] 36 | -------------------------------------------------------------------------------- /evaluator/toolkit/loader.py: -------------------------------------------------------------------------------- 1 | from pipeline import P, PipelineExecutor, utils 2 | from pathlib import Path 3 | import typing as Tp 4 | 5 | 6 | Human_Data = [ 7 | Path("data", "split", "open-web-text", "train-dirty.jsonl"), 8 | Path("data", "split", "open-web-text", "test-dirty.jsonl"), 9 | Path("data", "split", "open-web-text", "valid-dirty.jsonl") 10 | ] 11 | 12 | GPT3_Data = [ 13 | Path("data", "split", "open-gpt-text", "test-dirty.jsonl"), 14 | Path("data", "split", "open-gpt-text", "train-dirty.jsonl"), 15 | Path("data", "split", "open-gpt-text", "valid-dirty.jsonl") 16 | ] 17 | 18 | GPT2_Data = [ 19 | Path("data", "split", "gpt2-output", "train-dirty.jsonl"), 20 | Path("data", "split", "gpt2-output", "valid-dirty.jsonl"), 21 | Path("data", "split", "gpt2-output", "test-dirty.jsonl") 22 | ] 23 | 24 | PaLM_Data = [ 25 | Path("data", "split", "open-palm-text", "train-dirty.jsonl"), 26 | Path("data", "split", "open-palm-text", "valid-dirty.jsonl"), 27 | Path("data", "split", "open-palm-text", "test-dirty.jsonl"), 28 | ] 29 | 30 | LLaMA_Data = [ 31 | Path("data", "split", "open-llama-text", "train-dirty.jsonl"), 32 | Path("data", "split", "open-llama-text", "valid-dirty.jsonl"), 33 | Path("data", "split", "open-llama-text", "test-dirty.jsonl"), 34 | ] 35 | 36 | def load_data(files: Tp.List[Path]) -> Tp.Sequence[str]: 37 | executor = PipelineExecutor(worker_num=min(len(files), 8)) 38 | result = executor.sequential_mapreduce( 39 | map_fn=P.FromJsonStr() >> P.ToStr() >> P.ToSingletonList(input_type=Tp.Optional[str]), 40 | from_files=files, 41 | identity=[], 42 | reduce_fn=utils.reduce_list 43 | ) 44 | return result 45 | -------------------------------------------------------------------------------- /detector/t5_sentinel/dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import torch.utils as utils 3 | from transformers import T5TokenizerFast as Tokenizer 4 | from detector.t5_sentinel.__init__ import config 5 | from torch import Tensor 6 | from typing import Tuple 7 | 8 | 9 | class Dataset(utils.data.Dataset): 10 | ''' 11 | Dataset for loading text from different large language models. 12 | 13 | Attributes: 14 | corpus (list[str]): The corpus of the dataset. 15 | label (list[str]): The labels of the dataset. 16 | tokenizer (Tokenizer): The tokenizer used. 17 | ''' 18 | def __init__(self, partition: str, selectedDataset: Tuple[str] = ('Human', 'ChatGPT', 'PaLM', 'LLaMA')): 19 | super().__init__() 20 | 21 | self.corpus, self.label = [], [] 22 | filteredDataset = [item for item in config.dataset if item.label in selectedDataset] 23 | for item in filteredDataset: 24 | with open(f'{item.root}/{partition}.jsonl', 'r') as f: 25 | for line in f: 26 | 27 | if item.label == 'LLaMA': 28 | words = json.loads(line)['text'].split() 29 | continuation = words[75:] 30 | if len(continuation) >= 42: 31 | self.corpus.append(' '.join(continuation[:256])) 32 | self.label.append(item.token) 33 | else: 34 | self.corpus.append(json.loads(line)['text']) 35 | self.label.append(item.token) 36 | 37 | self.tokenizer: Tokenizer = Tokenizer.from_pretrained(config.backbone.name, model_max_length=config.backbone.model_max_length) 38 | 39 | def __len__(self) -> int: 40 | return len(self.corpus) 41 | 42 | def __getitem__(self, idx: int) -> Tuple[str, str]: 43 | return self.corpus[idx], self.label[idx] 44 | 45 | def collate_fn(self, batch: Tuple[str, str]) -> Tuple[Tensor, Tensor, Tensor]: 46 | corpus, label = zip(*batch) 47 | corpus = self.tokenizer.batch_encode_plus(corpus, padding=config.tokenizer.padding, truncation=config.tokenizer.truncation, return_tensors=config.tokenizer.return_tensors) 48 | label = self.tokenizer.batch_encode_plus(label, padding=config.tokenizer.padding, truncation=config.tokenizer.truncation, return_tensors=config.tokenizer.return_tensors) 49 | return corpus.input_ids, corpus.attention_mask, label.input_ids 50 | -------------------------------------------------------------------------------- /detector/t5_hidden/dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import torch.utils as utils 3 | from transformers import T5TokenizerFast as Tokenizer 4 | from detector.t5_hidden.__init__ import config 5 | from torch import Tensor 6 | from typing import Tuple 7 | 8 | 9 | class Dataset(utils.data.Dataset): 10 | ''' 11 | Dataset for loading text from different large language models. 12 | 13 | Attributes: 14 | corpus (list[str]): The corpus of the dataset. 15 | label (list[str]): The labels of the dataset. 16 | tokenizer (Tokenizer): The tokenizer used. 17 | ''' 18 | def __init__(self, partition: str, selectedDataset: Tuple[str] = ('Human', 'ChatGPT', 'PaLM', 'LLaMA', 'GPT2')): 19 | super().__init__() 20 | 21 | self.corpus, self.label = [], [] 22 | filteredDataset = [item for item in config.dataset if item.label in selectedDataset] 23 | for item in filteredDataset: 24 | with open(f'{item.root}/{partition}.jsonl', 'r') as f: 25 | for line in f: 26 | 27 | if item.label == 'LLaMA': 28 | words = json.loads(line)['text'].split() 29 | continuation = words[75:] 30 | if len(continuation) >= 42: 31 | self.corpus.append(' '.join(continuation[:256])) 32 | self.label.append(item.token) 33 | else: 34 | self.corpus.append(json.loads(line)['text']) 35 | self.label.append(item.token) 36 | 37 | self.tokenizer: Tokenizer = Tokenizer.from_pretrained(config.backbone.name, model_max_length=config.backbone.model_max_length) 38 | 39 | def __len__(self) -> int: 40 | return len(self.corpus) 41 | 42 | def __getitem__(self, idx: int) -> Tuple[str, str]: 43 | return self.corpus[idx], self.label[idx] 44 | 45 | def collate_fn(self, batch: Tuple[str, str]) -> Tuple[Tensor, Tensor, Tensor]: 46 | corpus, label = zip(*batch) 47 | corpus = self.tokenizer.batch_encode_plus(corpus, padding=config.tokenizer.padding, truncation=config.tokenizer.truncation, return_tensors=config.tokenizer.return_tensors) 48 | label = self.tokenizer.batch_encode_plus(label, padding=config.tokenizer.padding, truncation=config.tokenizer.truncation, return_tensors=config.tokenizer.return_tensors) 49 | return corpus.input_ids, corpus.attention_mask, label.input_ids 50 | -------------------------------------------------------------------------------- /pipeline/lib/import_zerogpt_result.py: -------------------------------------------------------------------------------- 1 | from pipeline import P, PipelineExecutor 2 | from pathlib import Path 3 | from memoizer import memoize 4 | import numpy as np 5 | import typing as Tp 6 | import json 7 | 8 | 9 | class LoadZeroGPTPrediction(P.Pipeline[Tp.Optional[str], Tp.Optional[P.ArrayEntry]]): 10 | def __call__(self, x: Tp.Optional[str]) -> Tp.Optional[P.ArrayEntry]: 11 | if x is None: return None 12 | 13 | entry = json.loads(x) 14 | uid, extra = entry["uid"], entry["extra"] 15 | res = entry["res"] 16 | if not res["success"]: return None 17 | 18 | prob_human = float(res["data"]["isHuman"]) / 100. 19 | prob_vec = np.array([prob_human, 1. - prob_human]) 20 | return {"uid": uid, "extra": extra, "data": prob_vec} 21 | 22 | 23 | def list_reduce(l1: Tp.List, l2: Tp.List) -> Tp.List: 24 | l1.extend(l2) 25 | return l1 26 | 27 | 28 | def argeq(a, b): return a[0] == b[0] 29 | 30 | 31 | @memoize(Path("cache", "zerogpt_prediction.pt"), arg_eq=argeq) 32 | def import_zerogpt_prediction_result_impl(path: Path): 33 | executor = PipelineExecutor(worker_num=4) 34 | predictions = executor.sequential_mapreduce( 35 | LoadZeroGPTPrediction() >> P.ToSingletonList(input_type=Tp.Optional[P.ArrayEntry]), 36 | from_files=[path], 37 | identity=[], 38 | reduce_fn=list_reduce, 39 | verbose=True 40 | ) 41 | return predictions 42 | 43 | def import_zerogpt_prediction_result(): 44 | openai_gpt = import_zerogpt_prediction_result_impl( 45 | Path("data", "baselines", "zerogpt_classifier_output", "open-gpt-text.jsonl") 46 | ) 47 | openai_llama = import_zerogpt_prediction_result_impl( 48 | Path("data", "baselines", "zerogpt_classifier_output", "open-llama-text.jsonl") 49 | ) 50 | openai_palm = import_zerogpt_prediction_result_impl( 51 | Path("data", "baselines", "zerogpt_classifier_output", "open-palm-text.jsonl") 52 | ) 53 | openai_web = import_zerogpt_prediction_result_impl( 54 | Path("data", "baselines", "zerogpt_classifier_output", "open-web-text.jsonl") 55 | ) 56 | openai_gpt2 = import_zerogpt_prediction_result_impl( 57 | Path("data", "baselines", "zerogpt_classifier_output", "gpt2-output.jsonl") 58 | ) 59 | result = openai_gpt + openai_llama + openai_palm + openai_web + openai_gpt2 60 | return result 61 | 62 | 63 | if __name__ == "__main__": 64 | import_zerogpt_prediction_result() 65 | -------------------------------------------------------------------------------- /evaluator/models/t5_hidden/t5_get_hidden_states.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append(".") 4 | 5 | import typing as T 6 | from pipeline import PipelineExecutor, P 7 | from evaluator.models.t5_hidden.t5_pipeline import ( 8 | ExecuteT5Hidden, 9 | T5HiddenPredictToLogits, 10 | T5PredictToHidden, 11 | ) 12 | from pathlib import Path 13 | from memoizer import memoize 14 | 15 | CHECKPOINT = "T5Hidden.0622.pt" 16 | 17 | 18 | def list_reduce(l1: T.List, l2: T.List) -> T.List: 19 | l1.extend(l2) 20 | return l1 21 | 22 | 23 | def argeq(arg1, arg2): 24 | return arg1[0] == arg2[0] 25 | 26 | 27 | def binary_eq(arg1, arg2): 28 | return arg1[0] == arg2[0] and arg1[1] == arg2[1] 29 | 30 | 31 | @memoize(cache_path=Path("./cache/t5_hidden_state_predicts.pt"), arg_eq=argeq) 32 | def evaluate_prediction_impl(from_file: Path) -> T.Sequence[P.ArrayEntry]: 33 | pipe = ( 34 | P.FromJsonStr() 35 | >> ExecuteT5Hidden(Path("./data/checkpoint", CHECKPOINT)) 36 | >> T5HiddenPredictToLogits() 37 | >> P.ToSingletonList(T.Optional[P.ArrayEntry]) 38 | ) 39 | 40 | executor = PipelineExecutor(worker_num=1) 41 | predicts = executor.sequential_mapreduce( 42 | map_fn=pipe, 43 | from_files=[from_file], 44 | identity=list(), 45 | reduce_fn=list_reduce, 46 | verbose=True, 47 | ) 48 | return predicts 49 | 50 | 51 | @memoize(cache_path=Path("./cache/t5_hidden_state_hiddens.pt"), arg_eq=argeq) 52 | def evaluate_hiddens_impl(from_file: Path) -> T.Sequence[P.ArrayEntry]: 53 | pipe = ( 54 | P.FromJsonStr() 55 | >> ExecuteT5Hidden(Path("./data/checkpoint", CHECKPOINT)) 56 | >> T5PredictToHidden() 57 | >> P.ToSingletonList(T.Optional[P.ArrayEntry]) 58 | ) 59 | 60 | executor = PipelineExecutor(worker_num=1) 61 | hiddens = executor.sequential_mapreduce( 62 | map_fn=pipe, 63 | from_files=[from_file], 64 | identity=list(), 65 | reduce_fn=list_reduce, 66 | verbose=True, 67 | ) 68 | return hiddens 69 | 70 | 71 | def evaluate_predictions(from_files: T.Sequence[Path]) -> T.Sequence[P.ArrayEntry]: 72 | result = [] 73 | for from_file in from_files: 74 | result.extend(evaluate_prediction_impl(from_file)) 75 | return result 76 | 77 | 78 | def evaluate_hidden_states(from_files: T.Sequence[Path]) -> T.Sequence[P.ArrayEntry]: 79 | result = [] 80 | for from_file in from_files: 81 | result.extend(evaluate_hiddens_impl(from_file)) 82 | return result 83 | -------------------------------------------------------------------------------- /pipeline/pipeline_base.py: -------------------------------------------------------------------------------- 1 | import typing as Tp 2 | 3 | 4 | I = Tp.TypeVar("I") 5 | O = Tp.TypeVar("O") 6 | P = Tp.TypeVar("P") 7 | 8 | 9 | class Pipeline(Tp.Generic[I, O]): 10 | VERBOSE = False 11 | CLS_I, CLS_O = None, None 12 | 13 | def __class_getitem__(cls, key): 14 | if cls.CLS_I is None or cls.CLS_I is I: cls.CLS_I, cls.CLS_O = key 15 | else: 16 | try: cls.CLS_I, cls.CLS_O = cls.CLS_I[key], cls.CLS_O[key] 17 | except TypeError: cls.CLS_I, cls.CLS_O = key 18 | return super().__class_getitem__(key) 19 | 20 | def __init_subclass__(cls): 21 | if Pipeline.CLS_I is not None: 22 | cls.CLS_I, Pipeline.CLS_I = Pipeline.CLS_I, None 23 | cls.CLS_O, Pipeline.CLS_O = Pipeline.CLS_O, None 24 | 25 | def __init__(self): 26 | self.execute_pair = None 27 | self.component = [self] 28 | self.IN_TYPE: Tp.Type[I] = self.CLS_I 29 | self.OUT_TYPE: Tp.Type[O] = self.CLS_O 30 | 31 | def __call__(self, x): 32 | if self.execute_pair is None: 33 | return x 34 | else: 35 | return self.execute_pair[1](self.execute_pair[0](x)) 36 | 37 | def __rshift__(self: "Pipeline[I, O]", other: "Pipeline[O, P]") -> "Pipeline[I, P]": 38 | if self.VERBOSE: print(f"\t{self.IN_TYPE}>>{self.OUT_TYPE} [>>] {other.IN_TYPE}>>{other.OUT_TYPE}") 39 | # 'a -> 'a case 40 | if isinstance(other.IN_TYPE, Tp.TypeVar) and other.IN_TYPE == other.OUT_TYPE: 41 | other.IN_TYPE = self.OUT_TYPE 42 | other.OUT_TYPE = self.OUT_TYPE 43 | 44 | # Manual runtime type-checking pipeline construction 45 | not_in_union = (other.IN_TYPE != Tp.Union) or (self.OUT_TYPE not in Tp.get_args(other.IN_TYPE)) 46 | not_in_wildcard = other.IN_TYPE != Tp.Any 47 | if self.OUT_TYPE != other.IN_TYPE and not_in_wildcard and not_in_union: 48 | raise Exception(f"Mismatched Pipeline: \n" 49 | f"\tTried to connect\n" 50 | f"\t{self.OUT_TYPE} [>>] {other.IN_TYPE}\n" 51 | f"\ti.e. {self} --[x]--> {other}") 52 | 53 | new_pipe = Pipeline[self.IN_TYPE, other.OUT_TYPE]() 54 | new_pipe.component = self.component + [other] 55 | new_pipe.execute_pair = (self.__call__, other.__call__) 56 | return new_pipe 57 | 58 | def __repr__(self): 59 | return self.__class__.__name__.split(".")[-1] 60 | 61 | def __str__(self): 62 | if len(self.component) == 1: return repr(self) 63 | return " >> ".join([str(c) for c in self.component]) 64 | -------------------------------------------------------------------------------- /evaluator/plot/plot_det.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | 4 | from evaluator.toolkit import * 5 | from pathlib import Path 6 | import evaluator.models.t5_sentinel.t5_get_hidden_states as T5_Full 7 | 8 | 9 | def get_t5_one_to_rest_full_det(prediction_idx: int, pos_label: str): 10 | predictions = T5_Full.evaluate_predictions([ 11 | Path("data", "split", "open-web-text", "test-dirty.jsonl"), 12 | Path("data", "split", "open-gpt-text", "test-dirty.jsonl"), 13 | Path("data", "split", "open-palm-text", "test-dirty.jsonl"), 14 | Path("./data/split/open-llama-text/test-dirty.jsonl"), 15 | Path("./data/split/gpt2-output/test-dirty.jsonl") 16 | ]) 17 | reformulated_predictions = [] 18 | for entry in predictions: 19 | p_selected = entry["data"][prediction_idx] 20 | new_entry = entry.copy() 21 | new_entry["data"] = np.array([p_selected, 1 - p_selected]) 22 | reformulated_predictions.append(new_entry) 23 | curve = get_det_binary(reformulated_predictions, pos_label) 24 | return curve 25 | 26 | def plot_t5_full_one_to_rest_dirty(): 27 | curve0 = get_t5_one_to_rest_full_det(0, "openweb") 28 | curve1 = get_t5_one_to_rest_full_det(1, "chatgpt") 29 | curve2 = get_t5_one_to_rest_full_det(2, "palm") 30 | curve3 = get_t5_one_to_rest_full_det(3, "llama") 31 | curve4 = get_t5_one_to_rest_full_det(4, "gpt2_xl") 32 | 33 | figure: plt.Figure = plt.figure(dpi=200) 34 | ax: plt.Axes = figure.add_subplot(1, 1, 1) 35 | # ax.set_prop_cycle('color', sns.color_palette("hls")) 36 | ax.set_yscale('log') 37 | ax.set_xscale('log') 38 | ticks_to_use = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1] 39 | ax.set_xticks(ticks_to_use) 40 | ax.set_yticks(ticks_to_use) 41 | ax.set_xlabel("False Positive Rate") 42 | ax.set_ylabel("False Negative Rate") 43 | ax.plot(curve0[0], curve0[1], label="Human") 44 | ax.plot(curve1[0], curve1[1], label="GPT3.5") 45 | ax.plot(curve2[0], curve2[1], label="PaLM") 46 | ax.plot(curve3[0], curve3[1], label="LLaMA") 47 | ax.plot(curve4[0], curve4[1], label="GPT2-XL") 48 | ax.set_xlim(0.001, 1.01) 49 | ax.set_ylim(0.001, 1.01) 50 | # ax.set_title("DET Curves for T5-Sentinel for each \nclassification label on one-to-rest classification task") 51 | ax.grid(visible=True, linestyle="--") 52 | ax.legend() 53 | figure.tight_layout() 54 | figure.savefig(Path("./result/t5_sentinel/det_t5_full_dirty.pdf")) 55 | 56 | 57 | if __name__ == "__main__": 58 | TASKS = [ 59 | plot_t5_full_one_to_rest_dirty 60 | ] 61 | 62 | for task in TASKS: 63 | print("Executing task: ", task.__name__) 64 | task() 65 | -------------------------------------------------------------------------------- /pipeline/lib/import_openai_result.py: -------------------------------------------------------------------------------- 1 | from pipeline import P, PipelineExecutor 2 | from pathlib import Path 3 | from memoizer import memoize 4 | import numpy as np 5 | import typing as Tp 6 | import json 7 | import math 8 | 9 | 10 | class LoadOpenAIPredictionResult(P.Pipeline[Tp.Optional[str], Tp.Optional[P.ArrayEntry]]): 11 | def __call__(self, x: Tp.Optional[str]) -> Tp.Optional[P.ArrayEntry]: 12 | if x is None: return None 13 | 14 | entry = json.loads(x) 15 | uid, extra = entry["uid"], entry["extra"] 16 | pred_result = entry["res"] 17 | if len(pred_result["choices"][0]["logprobs"]["top_logprobs"]) == 0: 18 | prob_human = 0. 19 | elif "!" in pred_result["choices"][0]["logprobs"]["top_logprobs"][0]: 20 | prob_human = math.exp(pred_result["choices"][0]["logprobs"]["top_logprobs"][0]["!"]) 21 | else: 22 | prob_human = 1e-5 23 | prob_vec = np.array([prob_human, 1 - prob_human]) 24 | return {"uid": uid, "extra": extra, "data": prob_vec} 25 | 26 | 27 | def list_reduce(l1: Tp.List, l2: Tp.List) -> Tp.List: 28 | l1.extend(l2) 29 | return l1 30 | 31 | 32 | def argeq(a, b): return a[0] == b[0] 33 | 34 | 35 | @memoize(Path("cache", "openai_prediction.pt"), arg_eq=argeq) 36 | def import_openai_prediction_result_impl(path: Path): 37 | executor = PipelineExecutor(worker_num=4) 38 | predictions = executor.sequential_mapreduce( 39 | LoadOpenAIPredictionResult() >> P.ToSingletonList(input_type=Tp.Optional[P.ArrayEntry]), 40 | from_files=[path], 41 | identity=[], 42 | reduce_fn=list_reduce, 43 | verbose=True 44 | ) 45 | return predictions 46 | 47 | def import_openai_prediction_result(): 48 | openai_gpt = import_openai_prediction_result_impl( 49 | Path("data", "baselines", "openai_classifier_output", "open-gpt-text.jsonl") 50 | ) 51 | openai_llama = import_openai_prediction_result_impl( 52 | Path("data", "baselines", "openai_classifier_output", "open-llama-text.jsonl") 53 | ) 54 | openai_palm = import_openai_prediction_result_impl( 55 | Path("data", "baselines", "openai_classifier_output", "open-palm-text.jsonl") 56 | ) 57 | openai_web = import_openai_prediction_result_impl( 58 | Path("data", "baselines", "openai_classifier_output", "open-web-text.jsonl") 59 | ) 60 | openai_gpt2 = import_openai_prediction_result_impl( 61 | Path("data", "baselines", "openai_classifier_output", "gpt2-output.jsonl") 62 | ) 63 | result = openai_gpt + openai_llama + openai_palm + openai_web + openai_gpt2 64 | return result 65 | 66 | 67 | if __name__ == "__main__": 68 | import_openai_prediction_result() 69 | -------------------------------------------------------------------------------- /generator/bard/client.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import json 4 | import yaml 5 | from tqdm import tqdm 6 | from bardapi import Bard 7 | from pathlib import Path 8 | from typing import List 9 | 10 | 11 | def run_client(): 12 | """ 13 | Run agent with BardAPI. 14 | - Read config and secrets 15 | - Cache work already done 16 | - Iterate over lines to get answers 17 | - Avoid rate limit if needed 18 | """ 19 | 20 | with open("./generator/bard/generator-bard.yaml") as f: 21 | config = yaml.safe_load(f) 22 | with open("./generator/bard/secrets.yaml") as f: 23 | secrets = yaml.safe_load(f) 24 | 25 | for filename in config['required-files']: 26 | chains = secrets["bard-agent"]["api-keys"] 27 | import_path = Path(config['import-path'], filename) 28 | export_path = Path(config['export-path'], filename) 29 | question_prompt = config['question-prompt'] 30 | 31 | # avoid duplicate work 32 | export_work = set() 33 | if export_path.exists(): 34 | with open(export_path, 'r') as df2: 35 | for line in df2: 36 | data = json.loads(line) 37 | export_work.add(data['uid']) 38 | 39 | requests_count = 0 40 | df1 = open(import_path, 'r') 41 | df2 = open(export_path, 'a') 42 | 43 | # create chains of agents 44 | agents: List[Bard] = [] 45 | for chain in chains: 46 | os.environ['_BARD_API_KEY'] = chain 47 | agents.append(Bard(config['timeout-seconds'])) 48 | 49 | # iterate over lines 50 | for i, line1 in tqdm(enumerate(df1), desc=f"Rephrasing"): 51 | try: 52 | # read from original dataset 53 | data1 = json.loads(line1) 54 | uid, text1 = data1['uid'], data1['text'] 55 | if uid not in export_work: 56 | # get answer from agent 57 | agent = agents[i % len(agents)] 58 | text2 = agent.get_answer(question_prompt + '\n' + text1)['content'] 59 | if text2.startswith('Response Error'): 60 | continue 61 | df2.write(json.dumps({'uid': uid, 'text': text2}) + '\n') 62 | # avoid rate limit 63 | requests_count += 1 64 | if requests_count == config['requests-limit']: 65 | time.sleep(config['silence-minutes'] * 60) 66 | requests_count = 0 67 | except: 68 | continue 69 | 70 | # release file descriptors 71 | df1.close() 72 | df2.close() 73 | 74 | 75 | if __name__ == "__main__": 76 | run_client() 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | image 2 | 3 | 4 | **Release repo for our work "Token Prediction as Implicit Classification to Identify LLM-Generated Text"** 5 | 6 | ## Abstract 7 | 8 | This paper introduces a novel approach for identifying the possible large language models (LLMs) involved in text generation. Instead of adding an additional classification layer to a base LM, we reframe the classification task as a next-token prediction task and directly fine-tune the base LM to perform it. We utilize the Text-to-Text Transfer Transformer (T5) model as the backbone for our experiments. We compared our approach to the more direct approach of utilizing hidden states for classification. Evaluation shows the exceptional performance of our method in the text classification task, highlighting its simplicity and efficiency. Furthermore, interpretability studies on the features extracted by our model reveal its ability to differentiate distinctive writing styles among various LLMs even in the absence of an explicit classifier. We also collected a dataset named OpenLLMText, containing approximately 340k text samples from human and LLMs, including GPT3.5, PaLM, LLaMA, and GPT2. 9 | 10 | ## Evaluation Result Overview 11 | 12 | image 13 | 14 | 15 | ## Requirement 16 | 17 | Run `pip install -r requirements.txt` to install dependencies. 18 | 19 | > Note that the baseline model proposed by Solaiman et al. requires a legacy version of library `transformers`, the detailed environment requirements 20 | > for baseline model is placed in [here](https://github.com/MarkChenYutian/T5-Sentinel-public/blob/main/detector/solaiman_classifier/solaiman_requirements.txt) 21 | 22 | ## Evaluate 23 | 24 | 1. Run `./data/download.py` to automatically download dataset & model checkpoints 25 | 2. Run the following files in need 26 | 1. `./evaluator/calc/calc_accuracy.py` to calculate the accuracy under different settings for each module 27 | 2. `./evaluator/interpret/integrated_gradient.ipynb` to calculate the integrated gradient for samples 28 | 3. `./evaluator/interpret/sample_pca.py` to calculate the PCA analysis for hidden layers of the test subset 29 | 4. `./evaluator/plot/*.py` to generate plots of related metrics (confusion matrix, roc, det, etc.) 30 | 31 | **Note that python files are in module**, so to use `./evaluator/calc/calc_accuracy.py`, you need to run `python3 -m evaluator.calc.calc_accuracy`. 32 | 33 | ## Train 34 | 35 | 1. Use the `./detector/t5/arbitrary/__main__.py` to train the T5-Sentinel Model 36 | 37 | (The detailed hyperparameter setup we used for training the T5-Sentinel model in paper is presented in `settings_0613_full.yaml`) 38 | 39 | 2. Use the `./detector/t5/arbitrary_hidden/__main__.py` to train the T5-Hidden Model 40 | 41 | -------------------------------------------------------------------------------- /evaluator/toolkit/statistics.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | import numpy as np 4 | 5 | from pipeline import P 6 | 7 | 8 | def quick_statistics_binary(prediction: Sequence[P.ArrayEntry], pos_label, threshold=0.5): 9 | """ 10 | :param prediction: Binary prediction in form of [P(positive), P(negative)] 11 | :param pos_label: Label name (entry["extra"]["source"]) for positive entry 12 | :param threshold: Threshold probability for being classified of ChatGPT 13 | :return: TP, TN, FP, FN 14 | """ 15 | TP, TN, FP, FN = 0, 0, 0, 0 16 | for entry in prediction: 17 | assert entry["data"].size == 2, "Non-binary data input received" 18 | 19 | p_positive, p_web = entry["data"][0], entry["data"][1] 20 | pred_positive = p_positive >= threshold 21 | 22 | label_positive = entry["extra"]["source"] == pos_label 23 | if pred_positive and label_positive: TP += 1 24 | elif pred_positive and (not label_positive): FP += 1 25 | elif (not pred_positive) and label_positive: FN += 1 26 | else: TN += 1 27 | return TP, TN, FP, FN 28 | 29 | 30 | def report_statistics(TP, TN, FP, FN): 31 | TPR = tpr(TP, TN, FP, FN) 32 | TNR = tnr(TP, TN, FP, FN) 33 | FPR = fpr(TP, TN, FP, FN) 34 | FNR = fnr(TP, TN, FP, FN) 35 | print(f"True Positive: {TP} \t| True Negative: {TN}") 36 | print(f"False Positive:{FP} \t| False Negative:{FN}") 37 | print(f"True Positive Rate: {round(TPR * 100, 2)}\%") 38 | print(f"True Negative Rate: {round(TNR * 100, 2)}\%") 39 | print(f"False Positive Rate: {round(FPR * 100, 2)}\%") 40 | print(f"False Negative Rate: {round(FNR * 100, 2)}\%") 41 | print(f"Accuracy: {round(((TP + TN) / (TP + TN + FP + FN)) * 100, 2)}\%") 42 | print(f"F1 Score: {round((TP) / (TP + 0.5 * (FP + FN)), 2)}") 43 | 44 | print("LaTeX Usable-version\n") 45 | 46 | print( 47 | f"{round(((TP + TN) / (TP + TN + FP + FN)) * 100, 2)}\%", "&" 48 | f"{round(TPR * 100, 2)}\%, ({TP})", "&", 49 | f"{round(TNR * 100, 2)}\%, ({TN})", "&", 50 | f"{round(FPR * 100, 2)}\%, ({FP})", "&", 51 | f"{round(FNR * 100, 2)}\%, ({FN})", "\\\\" 52 | ) 53 | 54 | def tpr(TP, TN, FP, FN): return TP / (TP + FN) 55 | def tnr(TP, TN, FP, FN): return TN / (TN + FP) 56 | def fpr(TP, TN, FP, FN): return FP / (FP + TN) 57 | def fnr(TP, TN, FP, FN): return FN / (FN + TP) 58 | 59 | recall = tpr 60 | 61 | def precision(TP, TN, FP, FN): return TP / (TP + FP) 62 | 63 | def acc(TP, TN, FP, FN): return (TP + TN) / (TP + TN + FP + FN) 64 | 65 | def f1(TP, TN, FP, FN): return TP / (TP + 0.5 * (FP + FN)) 66 | 67 | def calculate_confusion_matrix(predictions: Sequence[P.ArrayEntry], classes: Sequence[str]) -> np.ndarray: 68 | k = len(classes) 69 | confusion = np.zeros((k, k)) 70 | 71 | for entry in predictions: 72 | label = entry["extra"]["source"] 73 | label_idx = classes.index(label) 74 | pred_idx = np.argmax(entry["data"]) 75 | confusion[label_idx, pred_idx] += 1 76 | 77 | return confusion 78 | -------------------------------------------------------------------------------- /generator/palm/palm_pipe.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import typing as Tp 3 | 4 | from pipeline import P, PipelineExecutor 5 | from pathlib import Path 6 | from vertexai.preview.language_models import TextGenerationModel 7 | 8 | 9 | class PaLMRequest(P.Pipeline[Tp.Optional[P.TextEntry], Tp.Optional[P.TextEntry]]): 10 | def __init__(self, **config): 11 | super().__init__() 12 | print(f"Initializing {repr(self)} Pipeline Component with configuration:") 13 | print(config) 14 | 15 | self.config= config 16 | self.retry = config["retry"] 17 | self.model = TextGenerationModel.from_pretrained(self.config["ModelName"]) 18 | 19 | def __call__(self, entry: Tp.Optional[P.TextEntry]) -> Tp.Optional[P.TextEntry]: 20 | if entry is None: return None 21 | try: 22 | rephrased = "" 23 | for _ in range(self.retry): 24 | rephrased = self.model.predict( 25 | f"""Rephrase the following paragraph by paragraph:\n "{entry["text"]}" """, 26 | temperature=self.config["Temperature"], 27 | max_output_tokens=self.config["MaxDecodeSteps"], 28 | top_p=self.config["top_p"], 29 | top_k=self.config["top_k"] 30 | ) 31 | if rephrased.text != "": break 32 | if rephrased.text == "": return None 33 | result = {"uid": entry["uid"], "text": rephrased.text, "extra": entry["extra"]} 34 | return result 35 | except Exception as e: 36 | print(f"[x] - \t Exception caught: {entry['uid']} - {e}") 37 | return None 38 | 39 | 40 | if __name__ == "__main__": 41 | CFG_PATH = Path("./generator/palm/palm_client.yaml") 42 | subsets = ["urlsf_subset03.jsonl", "urlsf_subset00.jsonl"] 43 | 44 | with open(CFG_PATH, "r") as f: global_cfg = yaml.safe_load(f) 45 | # Print UID if something is successfully rephrased by PaLM 46 | print_uid_side_effect = P.ToUID() >> P.Print(prefix="[√] - \t", omit_none=True) 47 | duplicate_filters = [ 48 | P.FilterIf_UID_NotInFile(Path("data", "original", "open-palm-text", subset)) for subset in subsets 49 | ] 50 | dedupe_filter = duplicate_filters[0] 51 | for i in range(1, len(duplicate_filters)): dedupe_filter = dedupe_filter >> duplicate_filters[i] 52 | 53 | pipeline = P.FromJsonStr() \ 54 | >> P.NegateFilter(dedupe_filter) \ 55 | >> P.RandomFilter(block_factor=0.98) \ 56 | >> P.WriteExtra({"source": "palm", "variant": "original"}) \ 57 | >> P.RateControl(**global_cfg["RateControl"]) \ 58 | >> PaLMRequest(**global_cfg["Config"]) \ 59 | >> P.Tee(print_uid_side_effect) \ 60 | >> P.ToJsonStr() 61 | 62 | 63 | executor = PipelineExecutor(worker_num=1) 64 | executor.sequantial_file_mapping( 65 | pipeline, 66 | from_files=[Path("./data/original/open-web-text", subset) for subset in subsets], 67 | to_files=[Path("./data/original/open-palm-text", subset) for subset in subsets], 68 | write_mode="a" 69 | ) 70 | -------------------------------------------------------------------------------- /detector/t5_sentinel/utilities.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from tqdm import tqdm 3 | from torch.utils.data import DataLoader 4 | from detector.t5_sentinel.__init__ import config 5 | from detector.t5_sentinel.types import SentinelOutput 6 | from typing import Tuple 7 | 8 | 9 | def train( 10 | model: nn.Module, 11 | optimizer: nn.Module, 12 | dataloader: DataLoader, 13 | selectedDataset: Tuple[str] = ('Human', 'ChatGPT', 'PaLM', 'LLaMA') 14 | ) -> Tuple[float, float]: 15 | model.train() 16 | accumulatedLoss, accumulatedCorrect, accumulatedBatchSize = 0, 0, 0 17 | progress = tqdm(enumerate(dataloader), total=len(dataloader), desc='Training', ncols=120) 18 | 19 | filteredDataset = [item for item in config.dataset if item.label in selectedDataset] 20 | 21 | for i, (corpus_ids, corpus_mask, label_ids) in progress: 22 | 23 | output: SentinelOutput = model.forward(corpus_ids.cuda(), corpus_mask.cuda(), label_ids.cuda()) 24 | loss, probabilities, predictions = output.huggingface.loss, output.probabilities, [] 25 | for argmaxIndex in probabilities.argmax(dim=-1): 26 | predictions.append(filteredDataset[argmaxIndex].token_id) 27 | 28 | accumulatedLoss += loss.mean().item() 29 | accumulatedCorrect += sum([1 if prediction == label_id[0] else 0 for prediction, label_id in zip(predictions, label_ids.tolist())]) 30 | accumulatedBatchSize += config.dataloader.batch_size 31 | 32 | loss.mean().backward() 33 | if accumulatedBatchSize >= config.optimizer.batch_size or i == len(dataloader) - 1: 34 | optimizer.step() 35 | optimizer.zero_grad() 36 | accumulatedBatchSize = 0 37 | 38 | progress.set_postfix({ 39 | 'loss': '{:04f}'.format(accumulatedLoss / (i + 1)), 40 | 'accuracy': '{:04%}'.format(accumulatedCorrect / ((i + 1) * config.dataloader.batch_size)) 41 | }) 42 | 43 | progress.close() 44 | return accumulatedLoss / len(dataloader), accumulatedCorrect / (len(dataloader) * config.dataloader.batch_size) 45 | 46 | 47 | def validate( 48 | model: nn.Module, 49 | dataloader: DataLoader, 50 | selectedDataset: Tuple[str] = ('Human', 'ChatGPT', 'PaLM', 'LLaMA') 51 | ) -> float: 52 | model.eval() 53 | accumulatedCorrect = 0 54 | progress = tqdm(enumerate(dataloader), total=len(dataloader), desc='Validating', ncols=120) 55 | 56 | filteredDataset = [item for item in config.dataset if item.label in selectedDataset] 57 | 58 | for i, (corpus_ids, corpus_mask, label_ids) in progress: 59 | output: SentinelOutput = model.forward(corpus_ids.cuda(), corpus_mask.cuda(), label_ids.cuda()) 60 | probabilities, predictions = output.probabilities, [] 61 | for argmaxIndex in probabilities.argmax(dim=-1): 62 | predictions.append(filteredDataset[argmaxIndex].token_id) 63 | 64 | accumulatedCorrect += sum([1 if prediction == label_id[0] else 0 for prediction, label_id in zip(predictions, label_ids.tolist())]) 65 | progress.set_postfix({ 66 | 'accuracy': '{:04%}'.format(accumulatedCorrect / ((i + 1) * config.dataloader.batch_size)) 67 | }) 68 | 69 | progress.close() 70 | return accumulatedCorrect / (len(dataloader) * config.dataloader.batch_size) 71 | -------------------------------------------------------------------------------- /detector/t5_hidden/utilities.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from tqdm import tqdm 3 | from torch.utils.data import DataLoader 4 | from detector.t5_hidden.__init__ import config 5 | from detector.t5_hidden.types import SentinelOutput 6 | from typing import Tuple 7 | 8 | 9 | def train( 10 | model: nn.Module, 11 | optimizer: nn.Module, 12 | dataloader: DataLoader, 13 | selectedDataset: Tuple[str] = ('Human', 'ChatGPT', 'PaLM', 'LLaMA', 'GPT2') 14 | ) -> Tuple[float, float]: 15 | model.train() 16 | accumulatedLoss, accumulatedCorrect, accumulatedBatchSize = 0, 0, 0 17 | progress = tqdm(enumerate(dataloader), total=len(dataloader), desc='Training', ncols=120) 18 | 19 | filteredDataset = [item for item in config.dataset if item.label in selectedDataset] 20 | 21 | for i, (corpus_ids, corpus_mask, label_ids) in progress: 22 | 23 | output: SentinelOutput = model.forward(corpus_ids.cuda(), corpus_mask.cuda(), label_ids.cuda()) 24 | loss, probabilities, predictions = output.huggingface.loss, output.probabilities, [] 25 | for argmaxIndex in probabilities.argmax(dim=-1): 26 | predictions.append(filteredDataset[argmaxIndex].token_id) 27 | 28 | accumulatedLoss += loss.mean().item() 29 | accumulatedCorrect += sum([1 if prediction == label_id[0] else 0 for prediction, label_id in zip(predictions, label_ids.tolist())]) 30 | accumulatedBatchSize += config.dataloader.batch_size 31 | 32 | loss.mean().backward() 33 | if accumulatedBatchSize >= config.optimizer.batch_size or i == len(dataloader) - 1: 34 | optimizer.step() 35 | optimizer.zero_grad() 36 | accumulatedBatchSize = 0 37 | 38 | progress.set_postfix({ 39 | 'loss': '{:04f}'.format(accumulatedLoss / (i + 1)), 40 | 'accuracy': '{:04%}'.format(accumulatedCorrect / ((i + 1) * config.dataloader.batch_size)) 41 | }) 42 | 43 | progress.close() 44 | return accumulatedLoss / len(dataloader), accumulatedCorrect / (len(dataloader) * config.dataloader.batch_size) 45 | 46 | 47 | def validate( 48 | model: nn.Module, 49 | dataloader: DataLoader, 50 | selectedDataset: Tuple[str] = ('Human', 'ChatGPT', 'PaLM', 'LLaMA', 'GPT2') 51 | ) -> float: 52 | model.eval() 53 | accumulatedCorrect = 0 54 | progress = tqdm(enumerate(dataloader), total=len(dataloader), desc='Validating', ncols=120) 55 | 56 | filteredDataset = [item for item in config.dataset if item.label in selectedDataset] 57 | 58 | for i, (corpus_ids, corpus_mask, label_ids) in progress: 59 | output: SentinelOutput = model.forward(corpus_ids.cuda(), corpus_mask.cuda(), label_ids.cuda()) 60 | probabilities, predictions = output.probabilities, [] 61 | for argmaxIndex in probabilities.argmax(dim=-1): 62 | predictions.append(filteredDataset[argmaxIndex].token_id) 63 | 64 | accumulatedCorrect += sum([1 if prediction == label_id[0] else 0 for prediction, label_id in zip(predictions, label_ids.tolist())]) 65 | progress.set_postfix({ 66 | 'accuracy': '{:04%}'.format(accumulatedCorrect / ((i + 1) * config.dataloader.batch_size)) 67 | }) 68 | 69 | progress.close() 70 | return accumulatedCorrect / (len(dataloader) * config.dataloader.batch_size) 71 | -------------------------------------------------------------------------------- /detector/t5_hidden/types.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | from torch import Tensor 4 | from transformers.modeling_outputs import Seq2SeqLMOutput 5 | from pydantic import BaseModel, validator, ConfigDict 6 | 7 | 8 | class BackboneField(BaseModel): 9 | name: str 10 | model_max_length: int 11 | 12 | model_config = ConfigDict(protected_namespaces=()) 13 | 14 | @validator("name") 15 | def verify_name(cls, v: str): 16 | if v not in [ 17 | "t5-small", 18 | "t5-base", 19 | "t5-large", 20 | "t5-3b", 21 | "t5-11b", 22 | ]: 23 | raise ValueError(f"Backbone using {v} is not supported!") 24 | return v 25 | 26 | 27 | class DatasetItem(BaseModel): 28 | label: str 29 | token: str 30 | token_id: int 31 | root: str 32 | 33 | @validator("root") 34 | def verify_root(cls, v: str): 35 | if not os.path.exists(v): 36 | raise ValueError(f"Directory {v} does not exist!") 37 | return v 38 | 39 | 40 | class DataloaderField(BaseModel): 41 | batch_size: int 42 | num_workers: int 43 | 44 | @validator("num_workers") 45 | def verify_num_workers(cls, v: int): 46 | if v < 1 or v > os.cpu_count(): 47 | raise ValueError(f"Number of workers {v} is not supported!") 48 | return v 49 | 50 | 51 | class TokenizerField(BaseModel): 52 | padding: bool 53 | truncation: bool 54 | return_tensors: str 55 | 56 | @validator("return_tensors") 57 | def verify_return_tensors(cls, v: str): 58 | if v not in ["pt", "tf", "np"]: 59 | raise ValueError(f"Returning tensors with {v} is not supported!") 60 | return v 61 | 62 | 63 | class OptimizerField(BaseModel): 64 | lr: float 65 | weight_decay: float 66 | batch_size: int 67 | 68 | 69 | class Config(BaseModel): 70 | """ 71 | @note: 72 | - If mode is set to 'interpret', all hidden states and attention weights will be returned. 73 | """ 74 | 75 | id: str 76 | mode: str 77 | epochs: int 78 | backbone: BackboneField 79 | dataset: List[DatasetItem] 80 | dataloader: DataloaderField 81 | tokenizer: TokenizerField 82 | optimizer: OptimizerField 83 | 84 | @validator("mode") 85 | def verify_mode(cls, v: str): 86 | if v not in ["training", "interpret"]: 87 | raise ValueError(f"Mode {v} is not supported!") 88 | return v 89 | 90 | @validator("dataset") 91 | def verify_dataset(cls, v: List[DatasetItem]): 92 | labels, tokens, roots = set(), set(), set() 93 | for item in v: 94 | if item.label in labels: 95 | raise ValueError(f"Label {item.label} is not unique!") 96 | labels.add(item.label) 97 | if item.token in tokens: 98 | raise ValueError(f"Token {item.token} is not unique!") 99 | tokens.add(item.token) 100 | if item.root in roots: 101 | raise ValueError(f"Root {item.root} is not unique!") 102 | roots.add(item.root) 103 | return v 104 | 105 | 106 | class SentinelOutput(BaseModel): 107 | huggingface: Seq2SeqLMOutput 108 | probabilities: Tensor 109 | 110 | class Config: 111 | arbitrary_types_allowed = True 112 | -------------------------------------------------------------------------------- /detector/t5_sentinel/types.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | from torch import Tensor 4 | from transformers.modeling_outputs import Seq2SeqLMOutput 5 | from pydantic import BaseModel, validator, ConfigDict 6 | 7 | 8 | class BackboneField(BaseModel): 9 | name: str 10 | model_max_length: int 11 | 12 | model_config = ConfigDict(protected_namespaces=()) 13 | 14 | @validator("name") 15 | def verify_name(cls, v: str): 16 | if v not in [ 17 | "t5-small", 18 | "t5-base", 19 | "t5-large", 20 | "t5-3b", 21 | "t5-11b", 22 | ]: 23 | raise ValueError(f"Backbone using {v} is not supported!") 24 | return v 25 | 26 | 27 | class DatasetItem(BaseModel): 28 | label: str 29 | token: str 30 | token_id: int 31 | root: str 32 | 33 | @validator("root") 34 | def verify_root(cls, v: str): 35 | if not os.path.exists(v): 36 | raise ValueError(f"Directory {v} does not exist!") 37 | return v 38 | 39 | 40 | class DataloaderField(BaseModel): 41 | batch_size: int 42 | num_workers: int 43 | 44 | @validator("num_workers") 45 | def verify_num_workers(cls, v: int): 46 | if v < 1 or v > os.cpu_count(): 47 | raise ValueError(f"Number of workers {v} is not supported!") 48 | return v 49 | 50 | 51 | class TokenizerField(BaseModel): 52 | padding: bool 53 | truncation: bool 54 | return_tensors: str 55 | 56 | @validator("return_tensors") 57 | def verify_return_tensors(cls, v: str): 58 | if v not in ["pt", "tf", "np"]: 59 | raise ValueError(f"Returning tensors with {v} is not supported!") 60 | return v 61 | 62 | 63 | class OptimizerField(BaseModel): 64 | lr: float 65 | weight_decay: float 66 | batch_size: int 67 | 68 | 69 | class Config(BaseModel): 70 | """ 71 | @note: 72 | - If mode is set to 'interpret', all hidden states and attention weights will be returned. 73 | """ 74 | 75 | id: str 76 | mode: str 77 | epochs: int 78 | backbone: BackboneField 79 | dataset: List[DatasetItem] 80 | dataloader: DataloaderField 81 | tokenizer: TokenizerField 82 | optimizer: OptimizerField 83 | 84 | @validator("mode") 85 | def verify_mode(cls, v: str): 86 | if v not in ["training", "interpret"]: 87 | raise ValueError(f"Mode {v} is not supported!") 88 | return v 89 | 90 | @validator("dataset") 91 | def verify_dataset(cls, v: List[DatasetItem]): 92 | labels, tokens, roots = set(), set(), set() 93 | for item in v: 94 | if item.label in labels: 95 | raise ValueError(f"Label {item.label} is not unique!") 96 | labels.add(item.label) 97 | if item.token in tokens: 98 | raise ValueError(f"Token {item.token} is not unique!") 99 | tokens.add(item.token) 100 | if item.root in roots: 101 | raise ValueError(f"Root {item.root} is not unique!") 102 | roots.add(item.root) 103 | return v 104 | 105 | 106 | class SentinelOutput(BaseModel): 107 | huggingface: Seq2SeqLMOutput 108 | probabilities: Tensor 109 | 110 | class Config: 111 | arbitrary_types_allowed = True 112 | -------------------------------------------------------------------------------- /detector/solaiman_classifier/solaiman_requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.4 2 | aiosignal==1.3.1 3 | anyio==3.6.2 4 | argon2-cffi==21.3.0 5 | argon2-cffi-bindings==21.2.0 6 | arrow==1.2.3 7 | asttokens==2.2.1 8 | async-timeout==4.0.2 9 | attrs==22.2.0 10 | backcall==0.2.0 11 | beautifulsoup4==4.12.0 12 | bleach==6.0.0 13 | brotlipy==0.7.0 14 | certifi==2022.12.7 15 | cffi==1.15.1 16 | charset-normalizer==2.0.4 17 | click==8.1.3 18 | comm==0.1.3 19 | contourpy==1.0.5 20 | cryptography==39.0.1 21 | cycler==0.11.0 22 | daal4py==2023.0.2 23 | debugpy==1.6.6 24 | decorator==5.1.1 25 | defusedxml==0.7.1 26 | executing==1.2.0 27 | fastjsonschema==2.16.3 28 | filelock==3.10.7 29 | flit_core==3.8.0 30 | fonttools==4.25.0 31 | fqdn==1.5.1 32 | frozenlist==1.3.3 33 | gdown==4.6.6 34 | idna==3.4 35 | importlib-metadata==6.1.0 36 | importlib-resources==5.12.0 37 | ipykernel==6.22.0 38 | ipython==8.11.0 39 | ipython-genutils==0.2.0 40 | ipywidgets==8.0.6 41 | isoduration==20.11.0 42 | jedi==0.18.2 43 | Jinja2==3.1.2 44 | joblib==1.2.0 45 | jsonpointer==2.3 46 | jsonschema==4.17.3 47 | jupyter==1.0.0 48 | jupyter_client==8.1.0 49 | jupyter-console==6.6.3 50 | jupyter_core==5.3.0 51 | jupyter-events==0.6.3 52 | jupyter_server==2.5.0 53 | jupyter_server_terminals==0.4.4 54 | jupyterlab-pygments==0.2.2 55 | jupyterlab-widgets==3.0.7 56 | kiwisolver==1.4.4 57 | MarkupSafe==2.1.2 58 | matplotlib==3.7.1 59 | matplotlib-inline==0.1.6 60 | mistune==2.0.5 61 | mkl-fft==1.3.1 62 | mkl-random==1.2.2 63 | mkl-service==2.4.0 64 | multidict==6.0.4 65 | munkres==1.1.4 66 | nbclassic==0.5.3 67 | nbclient==0.7.2 68 | nbconvert==7.2.10 69 | nbformat==5.8.0 70 | nest-asyncio==1.5.6 71 | notebook==6.5.3 72 | notebook_shim==0.2.2 73 | numpy==1.23.5 74 | openai==0.27.2 75 | packaging==23.0 76 | pandas==1.5.3 77 | pandocfilters==1.5.0 78 | parso==0.8.3 79 | pexpect==4.8.0 80 | pickleshare==0.7.5 81 | Pillow==9.4.0 82 | pip==23.0.1 83 | pkgutil_resolve_name==1.3.10 84 | platformdirs==3.2.0 85 | ply==3.11 86 | prometheus-client==0.16.0 87 | prompt-toolkit==3.0.38 88 | psutil==5.9.4 89 | ptyprocess==0.7.0 90 | pure-eval==0.2.2 91 | pycparser==2.21 92 | Pygments==2.14.0 93 | pyOpenSSL==23.0.0 94 | pyparsing==3.0.9 95 | PyQt5-sip==12.11.0 96 | pyrsistent==0.19.3 97 | PySocks==1.7.1 98 | python-dateutil==2.8.2 99 | python-json-logger==2.0.7 100 | pytz==2023.3 101 | PyYAML==6.0 102 | pyzmq==25.0.2 103 | qtconsole==5.4.1 104 | QtPy==2.3.1 105 | regex==2023.3.23 106 | requests==2.28.1 107 | rfc3339-validator==0.1.4 108 | rfc3986-validator==0.1.1 109 | sacremoses==0.0.53 110 | scikit-learn==1.2.0 111 | scikit-learn-intelex==20230228.214413 112 | scipy==1.9.3 113 | Send2Trash==1.8.0 114 | sentencepiece==0.1.97 115 | setuptools==65.6.3 116 | sip==6.6.2 117 | six==1.16.0 118 | sniffio==1.3.0 119 | soupsieve==2.4 120 | stack-data==0.6.2 121 | terminado==0.17.1 122 | threadpoolctl==2.2.0 123 | tinycss2==1.2.1 124 | tokenizers==0.7.0 125 | toml==0.10.2 126 | torch==1.13.1 127 | torchaudio==0.13.1 128 | torchvision==0.14.1 129 | tornado==6.2 130 | tqdm==4.65.0 131 | traitlets==5.9.0 132 | transformers==2.9.1 133 | typing_extensions==4.4.0 134 | tzdata==2023.3 135 | Unidecode==1.3.6 136 | uri-template==1.2.0 137 | urllib3==1.26.14 138 | wcwidth==0.2.6 139 | webcolors==1.13 140 | webencodings==0.5.1 141 | websocket-client==1.5.1 142 | wheel==0.38.4 143 | widgetsnbextension==4.0.7 144 | yarl==1.8.2 145 | zipp==3.15.0 -------------------------------------------------------------------------------- /evaluator/models/t5_sentinel/t5_get_hidden_states.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append(".") 4 | 5 | import typing as T 6 | from pipeline import PipelineExecutor, P 7 | from evaluator.models.t5_sentinel.t5_pipeline import ( 8 | ExecuteT5, 9 | T5PredictToLogits, 10 | T5PredictToHidden, 11 | ) 12 | from pathlib import Path 13 | from memoizer import memoize 14 | 15 | CHECKPOINT = "T5Sentinel.0613.pt" 16 | 17 | 18 | def list_reduce(l1: T.List, l2: T.List) -> T.List: 19 | l1.extend(l2) 20 | return l1 21 | 22 | 23 | def argeq(arg1, arg2): 24 | return arg1[0] == arg2[0] 25 | 26 | 27 | def binary_eq(arg1, arg2): 28 | return arg1[0] == arg2[0] and arg1[1] == arg2[1] 29 | 30 | 31 | @memoize(cache_path=Path("./cache/t5_full_predicts.pt"), arg_eq=argeq) 32 | def evaluate_prediction_impl(from_file: Path) -> T.Sequence[P.ArrayEntry]: 33 | pipe = ( 34 | P.FromJsonStr() 35 | >> ExecuteT5(Path("./data/checkpoint", CHECKPOINT)) 36 | >> T5PredictToLogits() 37 | >> P.ToSingletonList(T.Optional[P.ArrayEntry]) 38 | ) 39 | 40 | executor = PipelineExecutor(worker_num=1) 41 | predicts = executor.sequential_mapreduce( 42 | map_fn=pipe, 43 | from_files=[from_file], 44 | identity=list(), 45 | reduce_fn=list_reduce, 46 | verbose=True, 47 | ) 48 | return predicts 49 | 50 | 51 | @memoize(cache_path=Path("./cache/t5_full_hiddens.pt"), arg_eq=argeq) 52 | def evaluate_hiddens_impl(from_file: Path) -> T.Sequence[P.ArrayEntry]: 53 | pipe = ( 54 | P.FromJsonStr() 55 | >> ExecuteT5(Path("./data/checkpoint", CHECKPOINT)) 56 | >> T5PredictToHidden() 57 | >> P.ToSingletonList(T.Optional[P.ArrayEntry]) 58 | ) 59 | 60 | executor = PipelineExecutor(worker_num=1) 61 | hiddens = executor.sequential_mapreduce( 62 | map_fn=pipe, 63 | from_files=[from_file], 64 | identity=list(), 65 | reduce_fn=list_reduce, 66 | verbose=True, 67 | ) 68 | return hiddens 69 | 70 | 71 | @memoize(cache_path=Path("./cache/t5_punctuation_removal.pt"), arg_eq=binary_eq) 72 | def evaluate_removals_impl(from_file: Path, singleton: str) -> T.Sequence[P.ArrayEntry]: 73 | pipe = ( 74 | P.FromJsonStr() 75 | >> P.RemoveSingleton(singleton) 76 | >> ExecuteT5(Path("./data/checkpoint", CHECKPOINT)) 77 | >> T5PredictToLogits() 78 | >> P.ToSingletonList(T.Optional[P.ArrayEntry]) 79 | ) 80 | 81 | executor = PipelineExecutor(worker_num=1) 82 | predicts = executor.sequential_mapreduce( 83 | map_fn=pipe, 84 | from_files=[from_file], 85 | identity=list(), 86 | reduce_fn=list_reduce, 87 | verbose=True, 88 | ) 89 | return predicts 90 | 91 | 92 | def evaluate_predictions(from_files: T.Sequence[Path]) -> T.Sequence[P.ArrayEntry]: 93 | result = [] 94 | for from_file in from_files: 95 | result.extend(evaluate_prediction_impl(from_file)) 96 | return result 97 | 98 | 99 | def evaluate_hidden_states(from_files: T.Sequence[Path]) -> T.Sequence[P.ArrayEntry]: 100 | result = [] 101 | for from_file in from_files: 102 | result.extend(evaluate_hiddens_impl(from_file)) 103 | return result 104 | 105 | 106 | def evaluate_removals( 107 | from_files: T.Sequence[Path], singleton: str 108 | ) -> T.Sequence[P.ArrayEntry]: 109 | result = [] 110 | for from_file in from_files: 111 | result.extend(evaluate_removals_impl(from_file, singleton)) 112 | return result 113 | -------------------------------------------------------------------------------- /generator/gpt4/gpt4_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | @brief: A GPT4 response generator using Async io 3 | @author: Yutian Chen 4 | @date: May 15, 2023 5 | """ 6 | 7 | import asyncio 8 | import openai 9 | import json 10 | import yaml 11 | import time 12 | 13 | from pathlib import Path 14 | from generator.chatgpt.chatgpt_client import \ 15 | chatgpt_pred_fn, chatgpt_task_generator, chatgpt_state_initializer, estimate_token_count, HANDLE_STRATEGY 16 | from generator.client_base import AsyncRequestClient, TaskResult 17 | import pipeline.component.text_component as P 18 | 19 | Converter = P.WriteExtra({"source": "gpt4"}) >> P.ToJsonStr() 20 | 21 | async def gpt4_request_fn(self: AsyncRequestClient, state, subset, uid, text) -> TaskResult: 22 | if state["token"] > self.config["MaxTokenCount"]: 23 | print("Abort due to budget limit.") 24 | raise Exception("Exceed the MaxTokenCount setting") 25 | 26 | await self.worker_lock.acquire() 27 | start_time = time.time() 28 | 29 | # Ready ... now Work! 30 | 31 | estimatedNumTokens = estimate_token_count(text) 32 | if estimatedNumTokens > self.config["MaxLengthAllowed"]: 33 | print("[x]\t", uid, 34 | "failed since it exceeds the token limit (" + str(self.config["MaxLengthAllowed"]) + ")") 35 | self.worker_lock.release() 36 | return TaskResult.CANCEL 37 | 38 | try: 39 | response = await openai.ChatCompletion.acreate( 40 | model="gpt-4", 41 | messages=[ 42 | {"role": "user", "content": "Rephrase the following paragraph by paragraph:\n\n" + text} 43 | ] 44 | ) 45 | 46 | except openai.error.InvalidRequestError: 47 | # no need to wait, since the request is not sent for some reason 48 | await asyncio.sleep(1.0) # Avoid flushing the API 49 | self.worker_lock.release() 50 | return TaskResult.RETRY 51 | 52 | except (openai.error.RateLimitError, openai.error.APIError, openai.error.TryAgain, openai.error.Timeout): 53 | await asyncio.sleep(self.config["WaitTime"]) 54 | self.worker_lock.release() 55 | return TaskResult.RETRY 56 | 57 | finishReason = response["choices"][0]["finish_reason"] 58 | result = HANDLE_STRATEGY[finishReason] 59 | 60 | if result == TaskResult.FINISH: 61 | machineText = response["choices"][0]["message"]["content"].strip() 62 | 63 | await self.writer_lock.acquire() 64 | with open(Path(self.config["OutputDirectory"], subset + ".jsonl"), "a", encoding="utf-8") as f: 65 | f.write(Converter({"uid": uid, "text": machineText, "extra": dict()})) 66 | f.write("\n") 67 | self.writer_lock.release() 68 | self.state["processed"].add((subset, uid)) 69 | 70 | self.state["token"] += response["usage"]["total_tokens"] 71 | 72 | # Wait for 60 secs, then release the lock to spawn a new worker coroutine 73 | # (We won't be blocked out) 74 | end_time = time.time() 75 | await asyncio.sleep(self.config["WaitTime"] - (end_time - start_time)) 76 | self.worker_lock.release() 77 | 78 | return result 79 | 80 | if __name__ == "__main__": 81 | with open("./generator/gpt4/gpt4_client.yaml", "r") as f: 82 | chatgpt_config = yaml.safe_load(f) 83 | 84 | with open(Path(chatgpt_config["ClientRoot"], "secret.json"), "r") as f: 85 | API_KEY = json.load(f)["OPENAI_API_KEY"] 86 | openai.api_key = API_KEY 87 | 88 | ChatGPTClient = AsyncRequestClient( 89 | chatgpt_config, 90 | gpt4_request_fn, 91 | chatgpt_pred_fn, 92 | chatgpt_task_generator, 93 | chatgpt_state_initializer, 94 | display_args=lambda args: args[1] 95 | ) 96 | asyncio.run(ChatGPTClient.execute()) 97 | -------------------------------------------------------------------------------- /data/download.py: -------------------------------------------------------------------------------- 1 | import io 2 | import zipfile 3 | import requests 4 | 5 | from pathlib import Path 6 | 7 | from pipeline.lib.sanitize_dataset import sanitize 8 | from pipeline.lib.build_abalation import build_clean_variants 9 | from pipeline.lib.report_entry_count import report 10 | 11 | sources = ["gpt2-output", "open-gpt-text", "open-llama-text", "open-palm-text", "open-web-text"] 12 | from_subsets = ["test-dirty.jsonl", "train-dirty.jsonl", "valid-dirty.jsonl"] 13 | to_subsets = ["test.jsonl", "train.jsonl", "valid.jsonl"] 14 | 15 | def downloadAndExtractTo(url: str, to: Path): 16 | print(f"Downloading: {url} => {to}") 17 | file = zipfile.ZipFile(io.BytesIO(requests.get(url, stream=True).content)) 18 | file.extractall(to) 19 | 20 | def downloadAndSaveTo(url, filename): 21 | print(f"Downloading: {url} => {filename}") 22 | with requests.get(url, stream=True) as r: 23 | if r.status_code == 200: 24 | r.raise_for_status() 25 | with open(filename, 'wb') as f: 26 | for chunk in r.iter_content(chunk_size=8192): f.write(chunk) 27 | else: 28 | print(f"Failed to download file, status code: {r.status_code}") 29 | 30 | if __name__ == "__main__": 31 | from_files = [Path(source, from_subset) 32 | for from_subset in from_subsets 33 | for source in sources] 34 | to_files = [Path(source, to_subset) 35 | for to_subset in to_subsets 36 | for source in sources] 37 | 38 | print("Download Checkpoints (models)") 39 | downloadAndSaveTo("https://github.com/MarkChenYutian/T5-Sentinel-public/releases/download/InitialCommit/solaiman-detector-base.pt", Path("data", "checkpoint", "solaiman-detector-base.pt")) 40 | downloadAndSaveTo("https://github.com/MarkChenYutian/T5-Sentinel-public/releases/download/InitialCommit/T5Hidden.0622.pt", Path("data", "checkpoint", "T5Hidden.0622.pt")) 41 | downloadAndSaveTo("https://github.com/MarkChenYutian/T5-Sentinel-public/releases/download/InitialCommit/T5Sentinel.0613.pt", Path("data", "checkpoint", "T5Sentinel.0613.pt")) 42 | 43 | print("Download Dataset") 44 | downloadAndExtractTo("https://zenodo.org/records/8285326/files/GPT2.zip?download=1", Path("data", "split", "gpt2-output")) 45 | downloadAndExtractTo("https://zenodo.org/records/8285326/files/ChatGPT.zip?download=1", Path("data", "split", "open-gpt-text")) 46 | downloadAndExtractTo("https://zenodo.org/records/8285326/files/LLaMA.zip?download=1", Path("data", "split", "open-llama-text")) 47 | downloadAndExtractTo("https://zenodo.org/records/8285326/files/PaLM.zip?download=1", Path("data", "split", "open-palm-text")) 48 | downloadAndExtractTo("https://zenodo.org/records/8285326/files/Human.zip?download=1", Path("data", "split", "open-web-text")) 49 | downloadAndExtractTo("https://zenodo.org/records/8285326/files/ZeroGPT-baseline-response.zip?download=1", Path("data", "baselines", "zerogpt_classifier_output")) 50 | downloadAndExtractTo("https://zenodo.org/records/8285326/files/OpenAI-baseline-response.zip?download=1", Path("data", "baselines", "openai_classifier_output")) 51 | 52 | # Report 53 | print("Download Finished!\n\nDataset Statistics:\n") 54 | for source in sources: 55 | for subset in from_subsets: 56 | report(source, subset) 57 | print("\n") 58 | 59 | # Build cleaned up dataset version 60 | sanitize(from_files, to_files) 61 | 62 | # Build clean variants for the large ablation table 63 | build_clean_variants(Path("data", "split", "open-palm-text")) 64 | build_clean_variants(Path("data", "split", "open-web-text")) 65 | build_clean_variants(Path("data", "split", "open-gpt-text")) 66 | build_clean_variants(Path("data", "split", "gpt2-output")) 67 | build_clean_variants(Path("data", "split", "open-llama-text")) 68 | -------------------------------------------------------------------------------- /evaluator/plot/plot_pca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | from sklearn.decomposition import PCA 5 | from pathlib import Path 6 | 7 | import evaluator.models.t5_sentinel.t5_get_hidden_states as T5_Full 8 | 9 | 10 | def pca_analysis(hidden_states, labels, task_id): 11 | hiddens = [ 12 | [entry["data"][-1] for entry in hidden_states if entry["extra"]["source"] == label] 13 | for label in labels 14 | ] 15 | Harrays = [np.concatenate(hidden, axis=0).reshape((len(hidden), -1)) for hidden in hiddens] 16 | pca_core = PCA(n_components=2) 17 | pca_core.fit(np.concatenate(Harrays, axis=0)) 18 | Tarrays = [pca_core.transform(hidden) for hidden in Harrays] 19 | return Tarrays 20 | 21 | 22 | def plot_t5_full_pca(): 23 | hiddens = T5_Full.evaluate_hidden_states([ 24 | Path("data", "split", "open-web-text", "test-dirty.jsonl"), 25 | Path("data", "split", "open-gpt-text", "test-dirty.jsonl"), 26 | Path("data", "split", "open-palm-text", "test-dirty.jsonl"), 27 | Path("./data/split/open-llama-text/test-dirty.jsonl"), 28 | Path("./data/split/gpt2-output/test-dirty.jsonl") 29 | ]) 30 | labels = ["openweb", "chatgpt", "palm", "llama", "gpt2_xl"] 31 | Tarrays = pca_analysis(hiddens, labels, "t5_full_pca") 32 | fig = plt.figure(dpi=200) 33 | ax: plt.Axes = fig.add_subplot(1, 1, 1) 34 | ax.set_prop_cycle('color', sns.color_palette("hls")) 35 | ax.set_title("PCA projection of decoder hidden state\nT5-Sentinel") 36 | for transformed, label in zip(Tarrays, labels): 37 | ax.scatter(transformed[:, 0], transformed[:, 1], label=label, s=1) 38 | ax.legend() 39 | fig.tight_layout() 40 | fig.savefig("result/t5_sentinel/" + "pca_projection.pdf") 41 | 42 | 43 | def plot_t5_full_pca_compare(): 44 | hiddens = T5_Full.evaluate_hidden_states([ 45 | Path("data", "split", "open-web-text", "test-dirty.jsonl"), 46 | Path("data", "split", "open-gpt-text", "test-dirty.jsonl"), 47 | Path("data", "split", "open-palm-text", "test-dirty.jsonl"), 48 | Path("./data/split/open-llama-text/test-dirty.jsonl"), 49 | Path("./data/split/gpt2-output/test-dirty.jsonl") 50 | ]) 51 | labels = ["openweb", "chatgpt", "palm", "llama", "gpt2_xl"] 52 | Tarrays = pca_analysis(hiddens, labels, "t5_full_pca") 53 | fig = plt.figure(dpi=200) 54 | ax: plt.Axes = fig.add_subplot(1, 1, 1) 55 | ax.set_prop_cycle('color', sns.color_palette("pastel")) 56 | ax.set_title("PCA projection of decoder hidden state\nT5-Sentinel") 57 | for transformed, label in zip(Tarrays, labels): 58 | random_mask = np.random.randn(*(transformed[:, 0].shape)) < 0.2 59 | ax.scatter(transformed[random_mask, 0], transformed[random_mask, 1], s=1) 60 | 61 | hiddens = T5_Full.evaluate_hidden_states([ 62 | Path("data", "split", "open-web-text", "test.variant3.jsonl"), 63 | Path("data", "split", "open-gpt-text", "test.variant3.jsonl"), 64 | Path("data", "split", "open-palm-text", "test.variant3.jsonl"), 65 | Path("data", "split", "open-llama-text", "test.variant3.jsonl"), 66 | Path("data", "split", "gpt2-output", "test.variant3.jsonl") 67 | ]) 68 | Tarrays = pca_analysis(hiddens, labels, "t5_full_pca_variant3") 69 | ax.set_prop_cycle('color', sns.color_palette("dark")) 70 | for transformed, label in zip(Tarrays, labels): 71 | random_mask = np.random.randn(*(transformed[:, 0].shape)) < 0.2 72 | ax.scatter(transformed[random_mask, 0], transformed[random_mask, 1], label=label, s=1) 73 | 74 | ax.legend() 75 | fig.tight_layout() 76 | fig.savefig("result/t5_sentinel/" + "pca_projection_compare.pdf") 77 | 78 | 79 | 80 | if __name__ == "__main__": 81 | TASKS = [ 82 | plot_t5_full_pca, 83 | plot_t5_full_pca_compare 84 | ] 85 | 86 | for task in TASKS: 87 | print("Executing task: ", task.__name__) 88 | task() 89 | -------------------------------------------------------------------------------- /pipeline/component/misc_component.py: -------------------------------------------------------------------------------- 1 | import random 2 | import copy 3 | import typing as Tp 4 | from ..pipeline_base import Pipeline 5 | 6 | from pathlib import Path 7 | import hashlib 8 | 9 | I = Tp.TypeVar("I") 10 | O = Tp.TypeVar("O") 11 | 12 | 13 | class GetFileMD5(Pipeline[Path, Tp.Optional[str]]): 14 | def __init__(self, must_exist=False): 15 | super().__init__() 16 | self.must_exist = False 17 | 18 | def __call__(self, path: Path) -> Tp.Optional[str]: 19 | if not path.exists(): 20 | if self.must_exist: 21 | raise FileNotFoundError(f"Try to compute MD5 of {path}, but file not exist.") 22 | else: 23 | return None 24 | 25 | md5 = hashlib.md5() 26 | with open(path, "rb") as f: 27 | for chunk in iter(lambda: f.read(4096), b""): md5.update(chunk) 28 | return md5.hexdigest() 29 | 30 | 31 | class Tee(Pipeline[I, I]): 32 | def __init__(self, side_effect_pipe): 33 | super().__init__() 34 | self.pipe = side_effect_pipe 35 | 36 | def __call__(self, obj): 37 | self.pipe(copy.deepcopy(obj)) 38 | return obj 39 | 40 | 41 | class Apply(Pipeline[I, O]): 42 | def __init__(self, input_type: Tp.Type[I], output_type: Tp.Type[O], function: Tp.Callable[[I], O]): 43 | super().__init__() 44 | self.IN_TYPE = input_type 45 | self.OUT_TYPE = output_type 46 | self.function = function 47 | 48 | def __call__(self, obj): 49 | return self.function(obj) 50 | 51 | 52 | class RandomFilter(Pipeline[I, I]): 53 | def __init__(self, block_factor: float): 54 | super().__init__() 55 | assert (0 <= block_factor <= 1) 56 | 57 | self.block_factor = block_factor 58 | 59 | def __call__(self, x: I): 60 | if x is None: return None 61 | if random.random() < self.block_factor: return None 62 | return x 63 | 64 | 65 | class Batchify(Pipeline[I, Tp.Optional[Tp.List[I]]]): 66 | def __init__(self, input_type: Tp.Type, batch_size: int, omit_none=True): 67 | super().__init__() 68 | self.IN_TYPE = input_type 69 | self.OUT_TYPE = Tp.Optional[Tp.List[input_type]] 70 | self.batch_size = batch_size 71 | self.omit_none = omit_none 72 | self.buffer = [] 73 | 74 | def __call__(self, entry: I) -> Tp.Optional[Tp.List[I]]: 75 | if self.omit_none and entry is None: return None 76 | 77 | self.buffer.append(entry) 78 | if len(self.buffer) == self.batch_size: 79 | batch = self.buffer[:] 80 | self.buffer = [] 81 | return batch 82 | else: 83 | return None 84 | 85 | 86 | class AutoBatchify(Pipeline[Tp.Optional[Tp.List[I]], Tp.Optional[Tp.List[O]]]): 87 | def __init__(self, pipe: Pipeline[I, O]): 88 | super().__init__() 89 | self.IN_TYPE = Tp.Optional[Tp.List[pipe.IN_TYPE]] 90 | self.OUT_TYPE = Tp.Optional[Tp.List[pipe.OUT_TYPE]] 91 | self.internal_pipe = pipe 92 | 93 | def __call__(self, batch): 94 | if batch is None: return None 95 | return [self.internal_pipe(entry) for entry in batch] 96 | 97 | def __repr__(self): 98 | return f"AutoBatchify({str(self.internal_pipe)})" 99 | 100 | 101 | class ToSingletonSet(Pipeline[Tp.Any, Tp.Set[Tp.Any]]): 102 | def __init__(self, input_type): 103 | super().__init__() 104 | self.IN_TYPE = input_type 105 | self.OUT_TYPE = Tp.Set[input_type] 106 | 107 | def __call__(self, obj): 108 | if obj is None: return {} 109 | return {obj} 110 | 111 | 112 | class ToSingletonList(Pipeline[Tp.Any, Tp.List[Tp.Any]]): 113 | def __init__(self, input_type): 114 | super().__init__() 115 | self.IN_TYPE = input_type 116 | self.OUT_TYPE = Tp.List[input_type] 117 | 118 | def __call__(self, obj): 119 | if obj is None: return [] 120 | return [obj] 121 | -------------------------------------------------------------------------------- /evaluator/models/t5_sentinel/t5_pipeline.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import typing as Tp 3 | from pathlib import Path 4 | from pipeline import P 5 | 6 | from detector.t5_sentinel.model import Sentinel 7 | from detector.t5_sentinel.types import SentinelOutput 8 | from transformers import T5TokenizerFast as Tokenizer 9 | 10 | 11 | class T5PredictionOutput(Tp.TypedDict): 12 | hidden: Tp.List[torch.Tensor] 13 | predict: torch.Tensor 14 | input: P.TextEntry 15 | 16 | 17 | class ExecuteT5(P.Pipeline[Tp.Optional[P.TextEntry], Tp.Optional[T5PredictionOutput]]): 18 | def __init__(self, weight_path: Path, backbone_name: str = "t5-small"): 19 | assert weight_path.exists() 20 | super().__init__() 21 | 22 | checkpoint = torch.load(weight_path) 23 | model = Sentinel() 24 | model.load_state_dict(checkpoint["model"]) 25 | 26 | self.model = model.to("cuda") 27 | self.model.config.mode = "interpret" 28 | self.model.eval() 29 | self.tokenizer = Tokenizer.from_pretrained(backbone_name, model_max_length=512) 30 | 31 | def __call__( 32 | self, entry: Tp.Optional[P.TextEntry] 33 | ) -> Tp.Optional[T5PredictionOutput]: 34 | if entry is None: 35 | return None 36 | text_tokenized = self.tokenizer.batch_encode_plus( 37 | (entry["text"],), padding=True, truncation=True, return_tensors="pt" 38 | ) 39 | result: SentinelOutput = self.model.forward( 40 | text_tokenized.input_ids.cuda(), 41 | text_tokenized.attention_mask.cuda(), 42 | ) 43 | 44 | logits = result.probabilities.cpu() 45 | hiddens = [h.cpu() for h in result.huggingface.decoder_hidden_states[0]] 46 | 47 | return {"hidden": hiddens, "predict": logits, "input": entry} 48 | 49 | 50 | class T5PredictToLogits( 51 | P.Pipeline[Tp.Optional[T5PredictionOutput], Tp.Optional[P.ArrayEntry]] 52 | ): 53 | def __call__( 54 | self, prediction: Tp.Optional[T5PredictionOutput] 55 | ) -> Tp.Optional[P.ArrayEntry]: 56 | if prediction is None: 57 | return None 58 | 59 | uid: str = ( 60 | prediction["input"]["uid"] 61 | if prediction["input"]["uid"] is not None 62 | else "No-UID-placeholder" 63 | ) 64 | return { 65 | "uid": uid, 66 | "data": prediction["predict"][0].numpy(), 67 | "extra": prediction["input"]["extra"], 68 | } 69 | 70 | 71 | class T5PredictToHidden( 72 | P.Pipeline[Tp.Optional[T5PredictionOutput], Tp.Optional[P.ArrayEntry]] 73 | ): 74 | def __call__( 75 | self, prediction: Tp.Optional[T5PredictionOutput] 76 | ) -> Tp.Optional[P.ArrayEntry]: 77 | if prediction is None: 78 | return None 79 | 80 | uid: str = ( 81 | prediction["input"]["uid"] 82 | if prediction["input"]["uid"] is not None 83 | else "No-UID-placeholder" 84 | ) 85 | return { 86 | "uid": uid, 87 | "data": prediction["hidden"], 88 | "extra": prediction["input"]["extra"], 89 | } 90 | 91 | 92 | if __name__ == "__main__": 93 | pipe = ( 94 | ExecuteT5(Path("./data/checkpoint/T5Sentinel.0613.pt")) >> T5PredictToLogits() 95 | ) 96 | result = pipe( 97 | { 98 | "uid": "001", 99 | "text": """What is this?\n\nThis is a neat little plugin to give more options to the webp daemon. It's a vector graphic library when used together with a webscape window, and although smoothies aren't a thing yet. It uses dot-comparison for vector shapes, and is even able to do line spaces. The is a fork of dvipng (although, it won't require you to compile the files during the last step, just download the latest source code) which is already being developed and tested byDendi Samantasan . It's also opened source, so anyone else can add back and forth support to the program. This is the first preview of this file, so it will likely change over time. There's a mailing list for discussion and bug reporting at""", 100 | "extra": None, 101 | } 102 | ) 103 | print(result) 104 | -------------------------------------------------------------------------------- /detector/t5_hidden/__main__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import wandb 3 | import torch 4 | from tqdm import tqdm 5 | import torch.nn as nn 6 | import torch.cuda as cuda 7 | import torch.optim as optim 8 | from torch.utils.data import DataLoader 9 | from detector.t5_hidden.dataset import Dataset 10 | from detector.t5_hidden.model import Sentinel 11 | from detector.t5_hidden.utilities import train, validate 12 | from detector.t5_hidden.__init__ import config 13 | 14 | 15 | ############################################################################## 16 | # Dataset and Dataloader 17 | ############################################################################## 18 | 19 | train_loader = DataLoader( 20 | train_dataset := Dataset("train-dirty"), 21 | collate_fn=train_dataset.collate_fn, 22 | batch_size=config.dataloader.batch_size, 23 | num_workers=config.dataloader.num_workers, 24 | shuffle=True, 25 | ) 26 | 27 | 28 | valid_loader = DataLoader( 29 | valid_dataset := Dataset("valid-dirty"), 30 | collate_fn=valid_dataset.collate_fn, 31 | batch_size=config.dataloader.batch_size, 32 | num_workers=config.dataloader.num_workers, 33 | shuffle=False, 34 | ) 35 | 36 | 37 | ############################################################################## 38 | # Model, Optimizer, and Scheduler 39 | ############################################################################## 40 | model = Sentinel().cuda() 41 | 42 | if cuda.device_count() > 1: 43 | model = nn.DataParallel(model) 44 | 45 | optimizer = optim.AdamW( 46 | model.parameters(), 47 | lr=config.optimizer.lr, 48 | weight_decay=config.optimizer.weight_decay, 49 | ) 50 | 51 | ############################################################################## 52 | # Task and Cache 53 | ############################################################################## 54 | 55 | task = wandb.init( 56 | name=config.id, 57 | project="llm-sentinel", 58 | entity="deep-learner", 59 | ) 60 | 61 | wandb.save("detector/t5_sentinel/__init__.py") 62 | wandb.save("detector/t5_sentinel/__main__.py") 63 | wandb.save("detector/t5_sentinel/dataset.py") 64 | wandb.save("detector/t5_sentinel/model.py") 65 | wandb.save("detector/t5_sentinel/settings.yaml") 66 | wandb.save("detector/t5_sentinel/types.py") 67 | wandb.save("detector/t5_sentinel/utilities.py") 68 | 69 | cache = f"storage/{config.id}" 70 | os.path.exists(cache) or os.makedirs(cache) 71 | 72 | if os.path.exists(f"{cache}/state.pt"): 73 | state = torch.load(f"{cache}/state.pt") 74 | model.load_state_dict(state["model"]) 75 | optimizer.load_state_dict(state["optimizer"]) 76 | startEpoch = state["epochIter"] + 1 77 | bestValidationAccuracy = state["validAccuracy"] 78 | else: 79 | startEpoch = 0 80 | bestValidationAccuracy = float("-inf") 81 | 82 | 83 | ############################################################################## 84 | # Training and Validation 85 | ############################################################################## 86 | for epoch in range(startEpoch, config.epochs): 87 | tqdm.write("Epoch {}".format(epoch + 1)) 88 | learnRate = optimizer.param_groups[0]["lr"] 89 | trainLoss, trainAccuracy = train(model, optimizer, train_loader) 90 | validAccuracy = validate(model, valid_loader) 91 | 92 | wandb.log( 93 | { 94 | "Training Loss": trainLoss, 95 | "Training Accuracy": trainAccuracy * 100, 96 | "Validation Accuracy": validAccuracy * 100, 97 | "Learning Rate": learnRate, 98 | } 99 | ) 100 | 101 | tqdm.write("Training Accuracy {:.2%}".format(trainAccuracy)) 102 | tqdm.write("Training Loss {:.4f}".format(trainLoss)) 103 | tqdm.write("Validation Accuracy {:.2%}".format(validAccuracy)) 104 | tqdm.write("Learning Rate {:.4f}".format(learnRate)) 105 | 106 | checkpoint = { 107 | "epochIter": epoch, 108 | "model": model.module.state_dict() 109 | if isinstance(model, nn.DataParallel) 110 | else model.state_dict(), 111 | "optimizer": optimizer.state_dict(), 112 | "validAccuracy": validAccuracy, 113 | } 114 | 115 | if validAccuracy > bestValidationAccuracy: 116 | bestValidationAccuracy = validAccuracy 117 | torch.save(checkpoint, f"{cache}/state.pt") 118 | tqdm.write("Checkpoint Saved!") 119 | -------------------------------------------------------------------------------- /detector/t5_sentinel/__main__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import wandb 3 | import torch 4 | from tqdm import tqdm 5 | import torch.nn as nn 6 | import torch.cuda as cuda 7 | import torch.optim as optim 8 | from torch.utils.data import DataLoader 9 | from detector.t5_sentinel.dataset import Dataset 10 | from detector.t5_sentinel.model import Sentinel 11 | from detector.t5_sentinel.utilities import train, validate 12 | from detector.t5_sentinel.__init__ import config 13 | 14 | 15 | ############################################################################## 16 | # Dataset and Dataloader 17 | ############################################################################## 18 | train_loader = DataLoader( 19 | train_dataset := Dataset("train-dirty"), 20 | collate_fn=train_dataset.collate_fn, 21 | batch_size=config.dataloader.batch_size, 22 | num_workers=config.dataloader.num_workers, 23 | shuffle=True, 24 | ) 25 | 26 | 27 | valid_loader = DataLoader( 28 | valid_dataset := Dataset("valid-dirty"), 29 | collate_fn=valid_dataset.collate_fn, 30 | batch_size=config.dataloader.batch_size, 31 | num_workers=config.dataloader.num_workers, 32 | shuffle=False, 33 | ) 34 | 35 | 36 | ############################################################################## 37 | # Model, Optimizer, and Scheduler 38 | ############################################################################## 39 | model = Sentinel().cuda() 40 | 41 | if cuda.device_count() > 1: 42 | model = nn.DataParallel(model) 43 | 44 | optimizer = optim.AdamW( 45 | model.parameters(), 46 | lr=config.optimizer.lr, 47 | weight_decay=config.optimizer.weight_decay, 48 | ) 49 | 50 | ############################################################################## 51 | # Task and Cache 52 | ############################################################################## 53 | 54 | task = wandb.init( 55 | name=config.id, 56 | project="llm-sentinel", 57 | entity="deep-learner", 58 | id="5gk3khsd", 59 | resume="must", 60 | ) 61 | 62 | wandb.save("detector/t5_sentinel/__init__.py") 63 | wandb.save("detector/t5_sentinel/__main__.py") 64 | wandb.save("detector/t5_sentinel/dataset.py") 65 | wandb.save("detector/t5_sentinel/model.py") 66 | wandb.save("detector/t5_sentinel/settings.yaml") 67 | wandb.save("detector/t5_sentinel/types.py") 68 | wandb.save("detector/t5_sentinel/utilities.py") 69 | 70 | cache = f"storage/{config.id}" 71 | os.path.exists(cache) or os.makedirs(cache) 72 | 73 | if os.path.exists(f"{cache}/state.pt"): 74 | state = torch.load(f"{cache}/state.pt") 75 | model.load_state_dict(state["model"]) 76 | optimizer.load_state_dict(state["optimizer"]) 77 | startEpoch = state["epochIter"] + 1 78 | bestValidationAccuracy = state["validAccuracy"] 79 | else: 80 | startEpoch = 0 81 | bestValidationAccuracy = float("-inf") 82 | 83 | 84 | ############################################################################## 85 | # Training and Validation 86 | ############################################################################## 87 | for epoch in range(startEpoch, config.epochs): 88 | tqdm.write("Epoch {}".format(epoch + 1)) 89 | learnRate = optimizer.param_groups[0]["lr"] 90 | trainLoss, trainAccuracy = train(model, optimizer, train_loader) 91 | validAccuracy = validate(model, valid_loader) 92 | 93 | wandb.log( 94 | { 95 | "Training Loss": trainLoss, 96 | "Training Accuracy": trainAccuracy * 100, 97 | "Validation Accuracy": validAccuracy * 100, 98 | "Learning Rate": learnRate, 99 | } 100 | ) 101 | 102 | tqdm.write("Training Accuracy {:.2%}".format(trainAccuracy)) 103 | tqdm.write("Training Loss {:.4f}".format(trainLoss)) 104 | tqdm.write("Validation Accuracy {:.2%}".format(validAccuracy)) 105 | tqdm.write("Learning Rate {:.4f}".format(learnRate)) 106 | 107 | checkpoint = { 108 | "epochIter": epoch, 109 | "model": model.module.state_dict() 110 | if isinstance(model, nn.DataParallel) 111 | else model.state_dict(), 112 | "optimizer": optimizer.state_dict(), 113 | "validAccuracy": validAccuracy, 114 | } 115 | 116 | if validAccuracy > bestValidationAccuracy: 117 | bestValidationAccuracy = validAccuracy 118 | torch.save(checkpoint, f"{cache}/state.pt") 119 | tqdm.write("Checkpoint Saved!") 120 | -------------------------------------------------------------------------------- /evaluator/models/t5_hidden/t5_pipeline.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import typing as Tp 3 | from pathlib import Path 4 | from pipeline import P 5 | 6 | from detector.t5_hidden.model import Sentinel 7 | from detector.t5_hidden.types import SentinelOutput 8 | from transformers import T5TokenizerFast as Tokenizer 9 | 10 | 11 | class T5PredictionOutput(Tp.TypedDict): 12 | hidden: Tp.List[torch.Tensor] 13 | predict: torch.Tensor 14 | input: P.TextEntry 15 | 16 | 17 | class ExecuteT5Hidden( 18 | P.Pipeline[Tp.Optional[P.TextEntry], Tp.Optional[T5PredictionOutput]] 19 | ): 20 | def __init__(self, weight_path: Path, backbone_name: str = "t5-small"): 21 | assert weight_path.exists() 22 | super().__init__() 23 | 24 | checkpoint = torch.load(weight_path) 25 | model = Sentinel() 26 | model.load_state_dict(checkpoint["model"]) 27 | 28 | self.model = model.to("cuda") 29 | self.model.config.mode = "interpret" 30 | self.model.eval() 31 | self.tokenizer = Tokenizer.from_pretrained(backbone_name, model_max_length=512) 32 | 33 | def __call__( 34 | self, entry: Tp.Optional[P.TextEntry] 35 | ) -> Tp.Optional[T5PredictionOutput]: 36 | if entry is None: 37 | return None 38 | text_tokenized = self.tokenizer.batch_encode_plus( 39 | (entry["text"],), padding=True, truncation=True, return_tensors="pt" 40 | ) 41 | result: SentinelOutput = self.model.forward( 42 | text_tokenized.input_ids.cuda(), 43 | text_tokenized.attention_mask.cuda(), 44 | ) 45 | 46 | logits = result.probabilities.detach().cpu() 47 | hiddens = [ 48 | h.detach().cpu() for h in result.huggingface.decoder_hidden_states[0] 49 | ] 50 | 51 | return {"hidden": hiddens, "predict": logits, "input": entry} 52 | 53 | 54 | class T5HiddenPredictToLogits( 55 | P.Pipeline[Tp.Optional[T5PredictionOutput], Tp.Optional[P.ArrayEntry]] 56 | ): 57 | def __call__( 58 | self, prediction: Tp.Optional[T5PredictionOutput] 59 | ) -> Tp.Optional[P.ArrayEntry]: 60 | if prediction is None: 61 | return None 62 | 63 | uid: str = ( 64 | prediction["input"]["uid"] 65 | if prediction["input"]["uid"] is not None 66 | else "No-UID-placeholder" 67 | ) 68 | return { 69 | "uid": uid, 70 | "data": prediction["predict"].numpy(), 71 | "extra": prediction["input"]["extra"], 72 | } 73 | 74 | 75 | class T5PredictToHidden( 76 | P.Pipeline[Tp.Optional[T5PredictionOutput], Tp.Optional[P.ArrayEntry]] 77 | ): 78 | def __call__( 79 | self, prediction: Tp.Optional[T5PredictionOutput] 80 | ) -> Tp.Optional[P.ArrayEntry]: 81 | if prediction is None: 82 | return None 83 | 84 | uid: str = ( 85 | prediction["input"]["uid"] 86 | if prediction["input"]["uid"] is not None 87 | else "No-UID-placeholder" 88 | ) 89 | return { 90 | "uid": uid, 91 | "data": prediction["hidden"], 92 | "extra": prediction["input"]["extra"], 93 | } 94 | 95 | 96 | if __name__ == "__main__": 97 | pipe = ( 98 | ExecuteT5Hidden(Path("./data/checkpoint/t5-small.0621.hidden.pt")) 99 | >> T5HiddenPredictToLogits() 100 | ) 101 | result = pipe( 102 | { 103 | "uid": "001", 104 | "text": """What is this?\n\nThis is a neat little plugin to give more options to the webp daemon. It's a vector graphic library when used together with a webscape window, and although smoothies aren't a thing yet. It uses dot-comparison for vector shapes, and is even able to do line spaces. The is a fork of dvipng (although, it won't require you to compile the files during the last step, just download the latest source code) which is already being developed and tested byDendi Samantasan . It's also opened source, so anyone else can add back and forth support to the program. This is the first preview of this file, so it will likely change over time. There's a mailing list for discussion and bug reporting at""", 105 | "extra": None, 106 | } 107 | ) 108 | print(result) 109 | -------------------------------------------------------------------------------- /detector/t5_sentinel/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch import Tensor 4 | from typing import Optional, Tuple 5 | from transformers import T5ForConditionalGeneration as Backbone 6 | from detector.t5_sentinel.__init__ import config 7 | from detector.t5_sentinel.types import SentinelOutput 8 | 9 | 10 | class Sentinel(nn.Module): 11 | def __init__(self) -> None: 12 | super().__init__() 13 | self.backbone: Backbone = Backbone.from_pretrained(config.backbone.name) 14 | self.config = config 15 | 16 | def forward(self, corpus_ids: Tensor, corpus_mask: Tensor, label_ids: Optional[Tensor] = None, selectedDataset: Tuple[str] = ('Human', 'ChatGPT', 'PaLM', 'LLaMA', 'GPT2')) -> SentinelOutput: 17 | ''' 18 | Args: 19 | corpus_ids (Tensor): The input corpus ids. 20 | corpus_mask (Tensor): The input attention mask. 21 | label_ids (Tensor): The input label ids. 22 | 23 | Returns: 24 | output (SentinelOutput): The output of the model. 25 | 26 | Example: 27 | >>> model = Sentinel() 28 | >>> model.eval() 29 | >>> with torch.no_grad(): 30 | >>> corpus_ids, corpus_mask, label_ids = next(iter(train_loader)) 31 | >>> model.forward(corpus_ids.cuda(), corpus_mask.cuda(), label_ids.cuda()) 32 | huggingface=Seq2SeqLMOutput( 33 | loss=..., 34 | logits=..., 35 | past_key_values=..., 36 | decoder_hidden_states=..., 37 | decoder_attentions=..., 38 | cross_attentions=..., 39 | encoder_last_hidden_state=..., 40 | encoder_hidden_states=..., 41 | encoder_attentions=... 42 | ), 43 | probabilities=tensor([ 44 | [1.0000e+00, 2.5421e-07, 1.8315e-07, 4.8886e-07], 45 | [1.0000e+00, 5.2608e-07, 1.0334e-06, 9.4020e-07], 46 | [9.9997e-01, 5.3097e-06, 8.8986e-06, 1.4712e-05], 47 | [9.9999e-01, 2.4895e-06, 1.7681e-06, 5.8721e-06], 48 | [9.9999e-01, 1.3558e-06, 1.1293e-06, 2.8045e-06], 49 | [1.0000e+00, 3.5004e-07, 3.6059e-07, 8.7667e-07], 50 | [9.9997e-01, 5.6359e-06, 7.8194e-06, 1.4346e-05], 51 | [9.9995e-01, 1.1463e-05, 1.2729e-05, 2.9505e-05] 52 | ], device='cuda:0') 53 | ''' 54 | 55 | filteredDataset = [item for item in config.dataset if item.label in selectedDataset] 56 | 57 | if self.training: 58 | outputs = self.backbone.forward( 59 | input_ids=corpus_ids, 60 | attention_mask=corpus_mask, 61 | labels=label_ids, 62 | output_hidden_states=(self.config.mode == 'interpret'), 63 | output_attentions=(self.config.mode == 'interpret') 64 | ) 65 | raw_scores = outputs.logits 66 | filtered_scores = raw_scores[:, 0, [item.token_id for item in filteredDataset]] 67 | probabilities = torch.softmax(filtered_scores, dim=-1) 68 | else: 69 | outputs = self.backbone.generate( 70 | input_ids=corpus_ids, 71 | max_length=2, # one for label token, one for eos token 72 | output_scores=True, 73 | return_dict_in_generate=True, 74 | output_hidden_states=True 75 | ) 76 | raw_scores = torch.stack(outputs.scores) 77 | filtered_scores = raw_scores[0, :, [item.token_id for item in filteredDataset]] 78 | probabilities = torch.softmax(filtered_scores, dim=-1) 79 | 80 | return SentinelOutput.construct(huggingface=outputs, probabilities=probabilities) 81 | 82 | def interpretability_study_entry(self, corpus_ids: Tensor, corpus_mask: Tensor, label_ids: Tensor, selectedDataset: Tuple[str] = ('Human', 'ChatGPT', 'PaLM', 'LLaMA', 'GPT2')): 83 | assert self.injected_embedder is not None, "Injected gradient collector did not found" 84 | 85 | filteredDataset = [item for item in config.dataset if item.label in selectedDataset] 86 | outputs = self.backbone( 87 | input_ids=corpus_ids, 88 | attention_mask=corpus_mask, 89 | labels=label_ids, 90 | output_hidden_states=False, 91 | output_attentions=False 92 | ) 93 | raw_scores = outputs.logits 94 | loss = outputs.loss 95 | loss.backward() 96 | 97 | filtered_scores = raw_scores[:, 0, [item.token_id for item in filteredDataset]] 98 | probabilities = torch.softmax(filtered_scores, dim=-1) 99 | return probabilities 100 | -------------------------------------------------------------------------------- /detector/zerogpt_classifier/zerogpt_classifier_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | @brief: An async generator used to collect ZeroGPT's classifier's response on test dataset 3 | @author: Yutian Chen 4 | @date: June 10, 2023 5 | """ 6 | import asyncio 7 | import aiohttp 8 | import yaml 9 | import json 10 | import time 11 | 12 | from typing import TypedDict, List, Tuple 13 | from pathlib import Path 14 | from generator.client_base import AsyncRequestClient, TaskResult 15 | from pipeline.component.text_component import TextEntry 16 | import pipeline.component.text_component as P 17 | 18 | # Typing 19 | 20 | class ZeroGPTState(TypedDict): 21 | processed: set 22 | 23 | 24 | class ZeroGPTConfig(TypedDict): 25 | InputDirectory: List[str] 26 | OutputDirectory: List[str] 27 | WaitTime: float 28 | Header: dict 29 | URL: str 30 | 31 | ZeroGPTArgs = Tuple[TextEntry, Path] 32 | ZeroGPT_Client = AsyncRequestClient[ZeroGPTState, ZeroGPTArgs, ZeroGPTConfig] 33 | ### 34 | 35 | load_data_fn = P.FromJsonStr() >> P.WriteExtra({"pred_by": "zerogpt", "variant": "original"}) 36 | 37 | async def zerogpt_request_fn(self: ZeroGPT_Client, state: ZeroGPTState, *args: ZeroGPTArgs) -> TaskResult: 38 | entry: TextEntry 39 | destination: Path 40 | entry, destination = args 41 | 42 | submission = {"input_text": entry["text"]} 43 | 44 | async with self.worker_lock: 45 | start_time = time.time() 46 | try: 47 | async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=10)) as session: 48 | async with session.post(self.config["URL"], headers=self.config["Header"], json=submission) as response: 49 | status_code = response.status 50 | result = await response.json() 51 | 52 | duration = time.time() - start_time 53 | if status_code != 200: 54 | await asyncio.sleep(self.config["WaitTime"] - duration) 55 | return TaskResult.RETRY 56 | 57 | async with self.writer_lock: 58 | serializable = { 59 | "uid": entry["uid"], 60 | "extra": entry["extra"], 61 | "res": result 62 | } 63 | with open(destination, "a", encoding="utf-8") as f: f.write(json.dumps(serializable) + "\n") 64 | 65 | duration = time.time() - start_time 66 | await asyncio.sleep(self.config["WaitTime"] - duration) 67 | 68 | except (aiohttp.ClientError, aiohttp.ServerTimeoutError, aiohttp.ServerDisconnectedError): 69 | print("[x]\tClientError | ServerTimeoutError | ServerDisconnectedError: ") 70 | await asyncio.sleep(self.config["WaitTime"]) 71 | return TaskResult.RETRY 72 | 73 | except Exception as e: 74 | print("[x]\tUnexpected exception: ", e) 75 | await asyncio.sleep(self.config["WaitTime"]) 76 | return TaskResult.RETRY 77 | 78 | return TaskResult.FINISH 79 | 80 | 81 | def zerogpt_pred_fn(client: ZeroGPT_Client, state: ZeroGPTState, *args: ZeroGPTArgs) -> bool: 82 | entry: TextEntry 83 | entry, dest = args 84 | return entry["uid"] not in state["processed"] 85 | 86 | 87 | def zerogpt_task_generator(client: ZeroGPT_Client, state: ZeroGPTState) -> List[ZeroGPTArgs]: 88 | Tasks = [] 89 | for input_file, output_file in zip(client.config["InputDirectory"], client.config["OutputDirectory"]): 90 | counter = 0 91 | print(f"{input_file} --> {output_file}", end="\tCount:") 92 | assert Path(input_file).exists() 93 | with open(input_file, "r") as f: 94 | for line in f.read().strip().split("\n"): 95 | Tasks.append((load_data_fn(line), Path(output_file))) 96 | counter += 1 97 | print(counter) 98 | return Tasks 99 | 100 | 101 | def zerogpt_state_initializer(client: ZeroGPT_Client) -> ZeroGPTState: 102 | return {"processed": set()} 103 | 104 | 105 | if __name__ == "__main__": 106 | with open("./detector/zerogpt_classifier/zerogpt_classifier_client.yaml", "r") as f: 107 | openai_config = yaml.safe_load(f) 108 | 109 | ZeroGPTClient = ZeroGPT_Client( 110 | openai_config, 111 | zerogpt_request_fn, 112 | zerogpt_pred_fn, 113 | zerogpt_task_generator, 114 | zerogpt_state_initializer, 115 | display_args=lambda args: args[0]["uid"] 116 | ) 117 | asyncio.run(ZeroGPTClient.execute()) 118 | -------------------------------------------------------------------------------- /evaluator/data_analysis/plot_adjative_distribution.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.ticker as mtick 3 | import matplotlib.patches as mpatches 4 | 5 | import nltk 6 | import memoizer 7 | from evaluator.toolkit import * 8 | from tqdm import tqdm 9 | import string 10 | 11 | 12 | def count_character(s: str, counter: dict) -> None: 13 | text = nltk.word_tokenize(s) 14 | pos_tagged = nltk.pos_tag(text, tagset='universal') 15 | for c in pos_tagged: 16 | if c in counter: counter[c] += 1 17 | else: counter[c] = 1 18 | 19 | 20 | def argeq(a, b): return a[0] == b[0] 21 | 22 | 23 | @memoizer.memoize(Path("cache", "dataset_adj_count.pt"), arg_eq=argeq) 24 | def count_dataset(dataset_name: Tp.Literal["human", "chatgpt", "palm", "llama", "gpt2"]) -> dict: 25 | selected_files = { 26 | "human": Human_Data, "chatgpt": GPT3_Data, "palm": PaLM_Data, "llama": LLaMA_Data, "gpt2": GPT2_Data 27 | }[dataset_name] 28 | 29 | counter = {c: 0 for c in string.printable} 30 | dataset = load_data(selected_files[1:]) 31 | for entry in tqdm(dataset): count_character(entry, counter) 32 | 33 | total_token = sum([counter[k] for k in counter]) 34 | result = {k: counter[k] / total_token for k in counter} 35 | return result 36 | 37 | 38 | 39 | def get_top_k_chars(counter: dict, k: int) -> list: 40 | kv_pair = [(counter[k], k) for k in counter] 41 | kv_pair.sort(key=lambda x: x[0], reverse=True) 42 | return [entry[1] for entry in kv_pair[:k]] 43 | 44 | 45 | def filter_dict(counter: dict, keys: list) -> dict: 46 | resulted_dict = {} 47 | for k in keys: 48 | resulted_dict[k] = counter[k] if k in counter else 0 49 | return resulted_dict 50 | 51 | 52 | def merge_keys(*arr_keys) -> list: 53 | set_keys = set() 54 | for keys in arr_keys: set_keys = set_keys.union(set(keys)) 55 | return list(set_keys) 56 | 57 | 58 | def sort_keys(counter, keys) -> list: 59 | key_arr = [(counter[key], key) for key in keys] 60 | key_arr.sort(key=lambda x: x[0], reverse=True) 61 | return [entry[1] for entry in key_arr] 62 | 63 | 64 | 65 | def plot_adj_distribution(): 66 | human_counter = count_dataset("human") 67 | gpt3_counter = count_dataset("chatgpt") 68 | palm_counter = count_dataset("palm") 69 | llama_counter = count_dataset("llama") 70 | gpt2_counter = count_dataset("gpt2") 71 | 72 | select_tok = get_top_k_chars(human_counter, k=40) 73 | 74 | human_counter = filter_dict(human_counter, select_tok) 75 | gpt3_counter = filter_dict(gpt3_counter, select_tok) 76 | palm_counter = filter_dict(palm_counter, select_tok) 77 | llama_counter = filter_dict(llama_counter, select_tok) 78 | gpt2_counter = filter_dict(gpt2_counter, select_tok) 79 | selected_keys = sort_keys(human_counter, select_tok) 80 | 81 | fig, axes = plt.subplots(ncols=1, nrows=5, dpi=200, sharey=True, sharex=True) 82 | colors = ["#2576b0", "#fc822e", "#349f3c", "#d32f2e", "#9368b9"] 83 | categories = ["Human", "GPT3.5", "PaLM", "LLaMA", "GPT2"] 84 | all_data = [human_counter, gpt3_counter, palm_counter, llama_counter, gpt2_counter] 85 | 86 | for idx, (category, counter, color, ax) in enumerate(zip(categories, all_data, colors, axes)): 87 | values = [counter[k] for k in selected_keys] 88 | keys = [k[0] for k in selected_keys] 89 | # print(selected_keys, values) 90 | # print(type(selected_keys[0]), type(values[0]), type(color)) 91 | ax.bar(keys, values, color=color) 92 | 93 | # for ax in axes[:-1]: ax.get_xaxis().set_visible(False) 94 | for ax in axes: ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0)) 95 | 96 | handles = [mpatches.Patch(color=c, label=label) for c, label in zip(colors, categories)] 97 | fig: plt.Figure 98 | axes: Tp.List[plt.Axes] 99 | axes[0].legend(loc='upper left', bbox_to_anchor=(1.05, 1.0), handles=handles) 100 | # axes[-1].set_xlabel(axes[-1].get_xlabel(), rotation=90) 101 | axes[-1].set_xticklabels(keys, rotation=45) 102 | 103 | fig.tight_layout() 104 | # axes[1].legend(handles=handles) 105 | fig.subplots_adjust(wspace=0, hspace=0.2) 106 | fig.text(0.00, 0.5, 'Frequency', va='center', rotation='vertical') 107 | fig.text(0.5, 0.01, 'Word classes distribution in OpenLLMText dataset', ha='center') 108 | 109 | fig.savefig("./result/data/dataset_adj_count.pdf") 110 | 111 | 112 | if __name__ == "__main__": 113 | TASKS = [ 114 | plot_adj_distribution 115 | ] 116 | for task in TASKS: 117 | print(f"Executing {task.__name__}") 118 | task() 119 | -------------------------------------------------------------------------------- /evaluator/plot/plot_tsne.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib.patches as mpatches 4 | from sklearn.decomposition import PCA 5 | from sklearn.manifold import TSNE 6 | from pathlib import Path 7 | 8 | import evaluator.models.t5_sentinel.t5_get_hidden_states as T5_Full 9 | import evaluator.models.t5_hidden.t5_get_hidden_states as T5_Hidden 10 | 11 | 12 | def tsne_analysis(hidden_states, labels, perplexity): 13 | hiddens = [ 14 | [entry["data"][-1] for entry in hidden_states if entry["extra"]["source"] == label] 15 | for label in labels 16 | ] 17 | Harrays = [np.concatenate(hidden, axis=0).reshape((len(hidden), -1)) for hidden in hiddens] 18 | X_original = np.concatenate(Harrays, axis=0) 19 | 20 | pca_core = PCA(n_components=50) 21 | tsne_core = TSNE(n_components=2, perplexity=perplexity, verbose=1) 22 | 23 | pca_core.fit(X_original) 24 | X_pca = pca_core.transform(X_original) 25 | X_tsne = tsne_core.fit_transform(X_pca) 26 | 27 | separate_tsne = [] 28 | accum = 0 29 | for h in hiddens: 30 | separate_tsne.append(X_tsne[accum:accum+len(h)]) 31 | accum += len(h) 32 | 33 | return separate_tsne 34 | 35 | 36 | def plot_t5_full_tsne(): 37 | hiddens = T5_Full.evaluate_hidden_states([ 38 | Path("data", "split", "open-web-text", "test-dirty.jsonl"), 39 | Path("data", "split", "open-gpt-text", "test-dirty.jsonl"), 40 | Path("data", "split", "open-palm-text", "test-dirty.jsonl"), 41 | Path("./data/split/open-llama-text/test-dirty.jsonl"), 42 | Path("./data/split/gpt2-output/test-dirty.jsonl") 43 | ]) 44 | labels = ["openweb", "chatgpt", "palm", "llama", "gpt2_xl"] 45 | display_labels = ["Human", "GPT3.5", "PaLM", "LLaMA", "GPT2-XL"] 46 | # perplexities = [5, 10, 25, 50, 75, 100] 47 | perplexities = [100] 48 | for perplexity in perplexities: 49 | separate_tsne = tsne_analysis(hiddens, labels, perplexity) 50 | 51 | fig = plt.figure(dpi=200) 52 | ax: plt.Axes = fig.add_subplot(1, 1, 1) 53 | # ax.set_prop_cycle('color', sns.color_palette("hls")) 54 | # ax.set_title(f"t-SNE Plot on Hidden State of T5-Sentinel\nwith perplexity {perplexity}") 55 | for transformed, label in zip(separate_tsne, display_labels): 56 | random_mask = np.random.randn(*(transformed[:, 0].shape)) < 0.2 57 | ax.scatter(transformed[random_mask, 0], transformed[random_mask, 1], s=1) 58 | 59 | colors = ["#2576b0", "#fc822e", "#349f3c", "#d32f2e", "#9368b9"] 60 | handles = [ 61 | mpatches.Patch(color=c, label=label) for c, label in zip(colors, display_labels) 62 | ] 63 | ax.legend(handles=handles) 64 | fig.tight_layout() 65 | fig.savefig("result/t5_sentinel/" + "tsne_" + str(perplexity) + ".pdf") 66 | 67 | 68 | def plot_t5_hidden_tsne(): 69 | hiddens = T5_Hidden.evaluate_hidden_states([ 70 | Path("data", "split", "open-web-text", "test-dirty.jsonl"), 71 | Path("data", "split", "open-gpt-text", "test-dirty.jsonl"), 72 | Path("data", "split", "open-palm-text", "test-dirty.jsonl"), 73 | Path("./data/split/open-llama-text/test-dirty.jsonl"), 74 | Path("./data/split/gpt2-output/test-dirty.jsonl") 75 | ]) 76 | labels = ["openweb", "chatgpt", "palm", "llama", "gpt2_xl"] 77 | display_labels = ["Human", "GPT3.5", "PaLM", "LLaMA", "GPT2-XL"] 78 | perplexities = [5, 10, 25, 50, 75, 100] 79 | # perplexities = [100] 80 | for perplexity in perplexities: 81 | separate_tsne = tsne_analysis(hiddens, labels, perplexity) 82 | 83 | fig = plt.figure(dpi=200) 84 | ax: plt.Axes = fig.add_subplot(1, 1, 1) 85 | # ax.set_prop_cycle('color', sns.color_palette("hls")) 86 | # ax.set_title(f"t-SNE Plot on Hidden State of T5-Sentinel\nwith perplexity {perplexity}") 87 | for transformed, label in zip(separate_tsne, display_labels): 88 | random_mask = np.random.randn(*(transformed[:, 0].shape)) < 0.2 89 | ax.scatter(transformed[random_mask, 0], transformed[random_mask, 1], s=1) 90 | 91 | colors = ["#2576b0", "#fc822e", "#349f3c", "#d32f2e", "#9368b9"] 92 | handles = [ 93 | mpatches.Patch(color=c, label=label) for c, label in zip(colors, display_labels) 94 | ] 95 | ax.legend(handles=handles) 96 | fig.tight_layout() 97 | fig.savefig("result/hidden/" + "tsne_" + str(perplexity) + ".pdf") 98 | 99 | if __name__ == "__main__": 100 | TASKS = [ 101 | plot_t5_full_tsne, 102 | plot_t5_hidden_tsne 103 | ] 104 | 105 | for task in TASKS: 106 | print("Executing task: ", task.__name__) 107 | task() 108 | 109 | -------------------------------------------------------------------------------- /evaluator/data_analysis/plot_token_distribution.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.ticker as mtick 3 | 4 | from transformers import T5TokenizerFast 5 | 6 | import memoizer 7 | from evaluator.toolkit import * 8 | from tqdm import tqdm 9 | import string 10 | 11 | Tokenizer = T5TokenizerFast.from_pretrained("t5-small") 12 | 13 | 14 | def count_character(s: str, counter: dict) -> None: 15 | tokens = Tokenizer.encode(s) 16 | for tok in tokens: 17 | if tok in counter: 18 | counter[tok] += 1 19 | else: 20 | counter[tok] = 1 21 | 22 | 23 | def argeq(a, b): 24 | return a[0] == b[0] 25 | 26 | 27 | @memoizer.memoize(Path("cache", "dataset_token_count_cache.pt"), arg_eq=argeq) 28 | def count_dataset( 29 | dataset: Tp.Literal["human", "chatgpt", "palm", "llama", "gpt2"] 30 | ) -> dict: 31 | selected_files = { 32 | "human": Human_Data, 33 | "chatgpt": GPT3_Data, 34 | "palm": PaLM_Data, 35 | "llama": LLaMA_Data, 36 | "gpt2": GPT2_Data, 37 | }[dataset] 38 | 39 | counter = {c: 0 for c in string.printable} 40 | dataset = load_data(selected_files) 41 | for entry in tqdm(dataset): 42 | count_character(entry, counter) 43 | 44 | total_token = sum([counter[k] for k in counter]) 45 | result = {k: counter[k] / total_token for k in counter} 46 | return result 47 | 48 | 49 | def get_top_k_chars(counter: dict, k: int) -> list: 50 | kv_pair = [(counter[k], k) for k in counter] 51 | kv_pair.sort(key=lambda x: x[0], reverse=True) 52 | return [entry[1] for entry in kv_pair[:k]] 53 | 54 | 55 | def filter_dict(counter: dict, keys: list) -> dict: 56 | resulted_dict = {} 57 | for k in keys: 58 | resulted_dict[k] = counter[k] if k in counter else 0 59 | return resulted_dict 60 | 61 | 62 | def merge_keys(*arr_keys) -> list: 63 | set_keys = set() 64 | for keys in arr_keys: 65 | set_keys = set_keys.union(set(keys)) 66 | return list(set_keys) 67 | 68 | 69 | def sort_keys(counter, keys) -> list: 70 | key_arr = [(counter[key], key) for key in keys] 71 | key_arr.sort(key=lambda x: x[0], reverse=False) 72 | return [entry[1] for entry in key_arr] 73 | 74 | 75 | def plot_distribution(): 76 | human_counter = count_dataset("human") 77 | gpt3_counter = count_dataset("chatgpt") 78 | palm_counter = count_dataset("palm") 79 | llama_counter = count_dataset("llama") 80 | gpt2_counter = count_dataset("gpt2") 81 | 82 | # selected_keys = merge_keys( 83 | # get_top_k_chars(human_counter, 40), 84 | # get_top_k_chars(gpt3_counter, 40), 85 | # get_top_k_chars(palm_counter, 40), 86 | # get_top_k_chars(llama_counter, 40), 87 | # get_top_k_chars(gpt2_counter, 40) 88 | # ) 89 | selected_keys = get_top_k_chars(human_counter, 40) 90 | 91 | human_counter = filter_dict(human_counter, selected_keys) 92 | gpt3_counter = filter_dict(gpt3_counter, selected_keys) 93 | palm_counter = filter_dict(palm_counter, selected_keys) 94 | llama_counter = filter_dict(llama_counter, selected_keys) 95 | gpt2_counter = filter_dict(gpt2_counter, selected_keys) 96 | selected_keys = sort_keys(human_counter, selected_keys) 97 | display_keys = [Tokenizer.decode(k) for k in selected_keys] 98 | 99 | axes: Tp.List[plt.Axes] 100 | fig, axes = plt.subplots(ncols=5, nrows=1, dpi=200, sharey=True, sharex=True) 101 | colors = ["#2576b0", "#fc822e", "#349f3c", "#d32f2e", "#9368b9"] 102 | categoryies = ["Human", "GPT3.5", "PaLM", "LLaMA", "GPT2"] 103 | all_data = [human_counter, gpt3_counter, palm_counter, llama_counter, gpt2_counter] 104 | 105 | for idx, (category, counter, color, ax) in enumerate( 106 | zip(categoryies, all_data, colors, axes) 107 | ): 108 | values = [counter[k] for k in selected_keys] 109 | ax.barh(display_keys, values, color=color) 110 | 111 | for ax in axes[1:]: 112 | ax.get_yaxis().set_visible(False) 113 | for ax, cate in zip(axes, categoryies): 114 | ax.set_title(cate) 115 | ax.xaxis.set_major_formatter(mtick.PercentFormatter(1.0)) 116 | axes[0].set_yticklabels(display_keys, fontdict=dict(fontsize=7)) 117 | 118 | fig.text( 119 | 0.01, 120 | 0.5, 121 | "Most common tokens in OpenLLMText dataset", 122 | va="center", 123 | rotation="vertical", 124 | ) 125 | fig.text(0.5, 0.01, "Frequency", ha="center") 126 | fig.savefig("./result/data/dataset_token_count.pdf") 127 | 128 | 129 | if __name__ == "__main__": 130 | TASKS = [plot_distribution] 131 | for task in TASKS: 132 | print(f"Executing {task.__name__}") 133 | task() 134 | -------------------------------------------------------------------------------- /detector/openai_classifier/openai_classifier_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | @brief: An async generator used to collect OpenAI's classifier's response on test dataset 3 | @author: Yutian Chen 4 | @date: May 16, 2023 5 | """ 6 | import asyncio 7 | import aiohttp 8 | import yaml 9 | import json 10 | import time 11 | 12 | from typing import TypedDict, List, Tuple 13 | from pathlib import Path 14 | from generator.client_base import AsyncRequestClient, TaskResult 15 | from pipeline.component.text_component import TextEntry 16 | import pipeline.component.text_component as P 17 | 18 | # Typing 19 | 20 | class OpenAIState(TypedDict): 21 | processed: set 22 | 23 | 24 | class OpenAIConfig(TypedDict): 25 | InputDirectory: List[str] 26 | OutputDirectory: List[str] 27 | WaitTime: float 28 | Header: dict 29 | URL: str 30 | 31 | OpenAIArgs = Tuple[TextEntry, Path] 32 | OpenAI_Type = AsyncRequestClient[OpenAIState, OpenAIArgs, OpenAIConfig] 33 | ### 34 | 35 | load_data_fn = P.FromJsonStr() >> P.WriteExtra({"pred_by": "openai", "variant": "original"}) 36 | 37 | async def openai_request_fn(self: OpenAI_Type, state: OpenAIState, *args: OpenAIArgs) -> TaskResult: 38 | entry: TextEntry 39 | destination: Path 40 | entry, destination = args 41 | 42 | submission = { 43 | "model": "model-detect-v2", 44 | "max_tokens": 1, "temperature": 1, "top_p": 1, "n": 1, "logprobs": 5, 45 | "stop": "\n", "stream": False, 46 | "prompt": entry["text"] + "<|disc_score|>" 47 | } 48 | 49 | async with self.worker_lock: 50 | start_time = time.time() 51 | try: 52 | async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=10)) as session: 53 | async with session.post(self.config["URL"], headers=self.config["Header"], json=submission) as response: 54 | status_code = response.status 55 | result = await response.json() 56 | 57 | duration = time.time() - start_time 58 | if status_code != 200: 59 | await asyncio.sleep(self.config["WaitTime"] - duration) 60 | return TaskResult.RETRY 61 | 62 | async with self.writer_lock: 63 | serializable = { 64 | "uid": entry["uid"], 65 | "extra": entry["extra"], 66 | "res": result 67 | } 68 | with open(destination, "a", encoding="utf-8") as f: f.write(json.dumps(serializable) + "\n") 69 | 70 | duration = time.time() - start_time 71 | await asyncio.sleep(self.config["WaitTime"] - duration) 72 | 73 | except (aiohttp.ClientError, aiohttp.ServerTimeoutError, aiohttp.ServerDisconnectedError): 74 | await asyncio.sleep(self.config["WaitTime"]) 75 | return TaskResult.RETRY 76 | 77 | except Exception as e: 78 | print("[x]\tUnexpected exception: ", e) 79 | return TaskResult.CANCEL 80 | 81 | return TaskResult.FINISH 82 | 83 | 84 | def openai_pred_fn(client: OpenAI_Type, state: OpenAIState, *args: OpenAIArgs) -> bool: 85 | entry: TextEntry 86 | entry, dest = args 87 | return entry["uid"] not in state["processed"] 88 | 89 | 90 | def openai_task_generator(client: OpenAI_Type, state: OpenAIState) -> List[OpenAIArgs]: 91 | Tasks = [] 92 | for input_file, output_file in zip(client.config["InputDirectory"], client.config["OutputDirectory"]): 93 | counter = 0 94 | print(f"{input_file} --> {output_file}", end="\tCount:") 95 | assert Path(input_file).exists() 96 | with open(input_file, "r") as f: 97 | for line in f.read().strip().split("\n"): 98 | Tasks.append((load_data_fn(line), Path(output_file))) 99 | counter += 1 100 | print(counter) 101 | return Tasks 102 | 103 | 104 | def openai_state_initializer(client: OpenAI_Type) -> OpenAIState: 105 | return {"processed": set()} 106 | 107 | 108 | if __name__ == "__main__": 109 | with open("./detector/openai_classifier/openai_classifier_client.yaml", "r") as f: 110 | openai_config = yaml.safe_load(f) 111 | 112 | with open("./detector/openai_classifier/secret.json", "r") as f: 113 | openai_secret = json.load(f) 114 | openai_config["Config"]["Header"].update(openai_secret) 115 | 116 | OpenAIClient = OpenAI_Type( 117 | openai_config, 118 | openai_request_fn, 119 | openai_pred_fn, 120 | openai_task_generator, 121 | openai_state_initializer, 122 | display_args=lambda args: args[0]["uid"] 123 | ) 124 | asyncio.run(OpenAIClient.execute()) 125 | -------------------------------------------------------------------------------- /evaluator/data_analysis/plot_length_distribution.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.ticker as mtick 3 | import matplotlib.patches as mpatches 4 | from transformers import T5TokenizerFast 5 | from tqdm import tqdm 6 | 7 | from evaluator.toolkit import * 8 | from memoizer import memoize 9 | 10 | Tokenizer = T5TokenizerFast.from_pretrained("t5-small") 11 | 12 | 13 | def get_string_length(s: str): 14 | return len(s) 15 | 16 | 17 | def get_token_length(s: str): 18 | return len(Tokenizer.encode(s)) 19 | 20 | 21 | def argeq(a, b): 22 | return a[0] == b[0] 23 | 24 | 25 | @memoize(Path("cache", "dataset_str_length_cache.pt"), arg_eq=argeq) 26 | def get_data_tok_length( 27 | dataset_name: Tp.Literal["human", "chatgpt", "palm", "llama", "gpt2"] 28 | ): 29 | selected_files = { 30 | "human": Human_Data, 31 | "chatgpt": GPT3_Data, 32 | "palm": PaLM_Data, 33 | "llama": LLaMA_Data, 34 | "gpt2": GPT2_Data, 35 | }[dataset_name] 36 | all_data = load_data(selected_files) 37 | return [get_token_length(s) for s in tqdm(all_data)] 38 | 39 | 40 | def plot_data_length_distribution(): 41 | human_len = get_data_tok_length("human") 42 | gpt3_len = get_data_tok_length("chatgpt") 43 | palm_len = get_data_tok_length("palm") 44 | llama_len = get_data_tok_length("llama") 45 | gpt2_len = get_data_tok_length("gpt2") 46 | 47 | colors = ["#2576b0", "#fc822e", "#349f3c", "#d32f2e", "#9368b9"] 48 | categories = ["Human", "GPT3.5", "PaLM", "LLaMA", "GPT2"] 49 | all_data = [human_len, gpt3_len, palm_len, llama_len, gpt2_len] 50 | 51 | fig, axes = plt.subplots(ncols=1, nrows=5, dpi=200, sharey=True) 52 | for idx, (category, data, color, ax) in enumerate( 53 | zip(categories, all_data, colors, axes) 54 | ): 55 | ax.hist(data, bins=100, range=(0, 2500), color=color, density=True) 56 | ax.grid(visible=True, linestyle="--") 57 | ax.set_ylim(0, 0.005) 58 | 59 | # axes[-1].set_xlabel("Sample Length (# Token)") 60 | for ax in axes[:-1]: 61 | ax.set_xticklabels([]) 62 | for ax in axes: 63 | ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0)) 64 | 65 | handles = [ 66 | mpatches.Patch(color=c, label=label) for c, label in zip(colors, categories) 67 | ] 68 | fig: plt.Figure 69 | axes[0].legend(loc="upper left", bbox_to_anchor=(1.05, 1.0), handles=handles) 70 | fig.tight_layout() 71 | 72 | fig.subplots_adjust(wspace=0, hspace=0.2) 73 | fig.text(0.01, 0.5, "Frequency", va="center", rotation="vertical") 74 | fig.text(0.5, 0.01, "Sample Length in OpenLLMText dataset (# Token)", ha="center") 75 | fig.savefig("./result/data/dataset_length_token.pdf") 76 | 77 | 78 | def plot_data_length_distribution_cut(): 79 | human_len = get_data_tok_length("human") 80 | gpt3_len = get_data_tok_length("chatgpt") 81 | palm_len = get_data_tok_length("palm") 82 | llama_len = get_data_tok_length("llama") 83 | gpt2_len = get_data_tok_length("gpt2") 84 | 85 | human_len = [min(l, 512) for l in human_len] 86 | gpt3_len = [min(l, 512) for l in gpt3_len] 87 | palm_len = [min(l, 512) for l in palm_len] 88 | llama_len = [min(l, 512) for l in llama_len] 89 | gpt2_len = [min(l, 512) for l in gpt2_len] 90 | 91 | colors = ["#2576b0", "#fc822e", "#349f3c", "#d32f2e", "#9368b9"] 92 | categories = ["Human", "GPT3.5", "PaLM", "LLaMA", "GPT2"] 93 | all_data = [human_len, gpt3_len, palm_len, llama_len, gpt2_len] 94 | 95 | fig, axes = plt.subplots(ncols=1, nrows=5, dpi=200, sharey=True) 96 | for idx, (category, data, color, ax) in enumerate( 97 | zip(categories, all_data, colors, axes) 98 | ): 99 | ax.hist(data, bins=100, range=(0, 512), color=color, density=True) 100 | ax.grid(visible=True, linestyle="--") 101 | ax.set_ylim(0, 0.15) 102 | 103 | # axes[-1].set_xlabel("Sample Length seen by T5-Sentinel (# Token)") 104 | for ax in axes[:-1]: 105 | ax.set_xticklabels([]) 106 | for ax in axes: 107 | ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0)) 108 | 109 | handles = [ 110 | mpatches.Patch(color=c, label=label) for c, label in zip(colors, categories) 111 | ] 112 | fig: plt.Figure 113 | axes[0].legend(loc="upper left", bbox_to_anchor=(1.05, 1.0), handles=handles) 114 | fig.tight_layout() 115 | 116 | fig.subplots_adjust(wspace=0, hspace=0.2) 117 | fig.text(0.01, 0.5, "Frequency", va="center", rotation="vertical") 118 | fig.text(0.5, 0.01, "Sample Length received by T5-Sentinel (# Token)", ha="center") 119 | fig.savefig("./result/data/dataset_length_token_cut.pdf") 120 | 121 | 122 | if __name__ == "__main__": 123 | TASKS = [plot_data_length_distribution, plot_data_length_distribution_cut] 124 | for task in TASKS: 125 | print(f"Executing {task.__name__}") 126 | task() 127 | -------------------------------------------------------------------------------- /pipeline/component/text_component.py: -------------------------------------------------------------------------------- 1 | import typing as Tp 2 | import json 3 | import re 4 | import string 5 | import unidecode 6 | from pathlib import Path 7 | 8 | from ..pipeline_base import Pipeline 9 | 10 | I = Tp.TypeVar("I") 11 | O = Tp.TypeVar("O") 12 | WHITELIST = string.whitespace + string.digits + string.ascii_letters 13 | 14 | 15 | class TextEntry(Tp.TypedDict): 16 | text: str 17 | uid: Tp.Optional[str] 18 | extra: Tp.Optional[dict] 19 | 20 | 21 | class FromJsonStr(Pipeline[Tp.Optional[str], Tp.Optional[TextEntry]]): 22 | def __call__(self, x): 23 | if x is None: return None 24 | obj = json.loads(x, strict=False) 25 | if "uid" not in obj: obj["uid"] = None 26 | if "extra" not in obj: obj["extra"] = None 27 | return obj 28 | 29 | 30 | class ToJsonStr(Pipeline[Tp.Optional[TextEntry], Tp.Optional[str]]): 31 | def __call__(self, x): 32 | if x is None: return None 33 | return json.dumps(x) 34 | 35 | 36 | class ToStr(Pipeline[Tp.Optional[TextEntry], Tp.Optional[str]]): 37 | def __call__(self, x): 38 | if x is None: return None 39 | return x["text"] 40 | 41 | 42 | class ToUID(Pipeline[Tp.Optional[TextEntry], Tp.Optional[str]]): 43 | def __call__(self, obj): 44 | if obj is None: return None 45 | return obj["uid"] 46 | 47 | 48 | class StripNewline(Pipeline[Tp.Optional[TextEntry], Tp.Optional[TextEntry]]): 49 | def __call__(self, obj): 50 | if obj is None: return obj 51 | obj["text"] = re.sub(r"\n+", r"\n", obj["text"]) 52 | return obj 53 | 54 | 55 | class CastUnicode(Pipeline[Tp.Optional[TextEntry], Tp.Optional[TextEntry]]): 56 | def __call__(self, obj): 57 | if obj is None: return obj 58 | obj["text"] = unidecode.unidecode(obj["text"]) 59 | return obj 60 | 61 | 62 | class WriteExtra(Pipeline[Tp.Optional[TextEntry], Tp.Optional[TextEntry]]): 63 | def __init__(self, extra_fields: dict): 64 | super().__init__() 65 | self.extra_fields = extra_fields 66 | 67 | def __call__(self, obj): 68 | if obj is None: return obj 69 | if obj["extra"] is None: obj["extra"] = dict() 70 | 71 | for key in self.extra_fields: 72 | obj["extra"][key] = self.extra_fields[key] 73 | return obj 74 | 75 | 76 | class ToLower(Pipeline[Tp.Optional[TextEntry], Tp.Optional[TextEntry]]): 77 | def __call__(self, obj): 78 | if obj is None: return obj 79 | obj["text"] = obj["text"].lower() 80 | return obj 81 | 82 | 83 | class RemovePunc(Pipeline[Tp.Optional[TextEntry], Tp.Optional[TextEntry]]): 84 | def __call__(self, obj): 85 | if obj is None: return obj 86 | obj["text"] = "".join(filter(WHITELIST.__contains__, obj["text"])) 87 | return obj 88 | 89 | 90 | class RemoveSingleton(Pipeline[Tp.Optional[TextEntry], Tp.Optional[TextEntry]]): 91 | def __init__(self, singleToRemove: str): 92 | super().__init__() 93 | self.singleToRemove = singleToRemove 94 | 95 | def __call__(self, obj): 96 | if obj is None: return obj 97 | obj["text"] = obj["text"].replace(self.singleToRemove, "") 98 | return obj 99 | 100 | class RemoveContSpace(Pipeline[Tp.Optional[TextEntry], Tp.Optional[TextEntry]]): 101 | def __call__(self, obj: Tp.Optional[TextEntry]): 102 | if obj is None: return obj 103 | obj["text"] = re.sub(r'\s+', ' ', obj["text"]) 104 | obj["text"] = obj["text"].strip() 105 | return obj 106 | 107 | 108 | class FilterTextEntry(Pipeline[Tp.Optional[TextEntry], Tp.Optional[TextEntry]]): 109 | def __init__(self, rule: Tp.Callable[[TextEntry], bool]): 110 | super().__init__() 111 | self.filter_rule = rule 112 | 113 | def __call__(self, obj): 114 | return obj if (obj is not None) and (not self.filter_rule(obj)) else None 115 | 116 | 117 | class FilterIf_UID_NotInFile(Pipeline[Tp.Optional[TextEntry], Tp.Optional[TextEntry]]): 118 | def __init__(self, file_path: Path): 119 | super().__init__() 120 | assert (file_path.exists()) 121 | loader_pipeline = FromJsonStr() >> ToUID() 122 | 123 | self.uid_whitelist = set() 124 | with open(file_path, "r") as f: 125 | for line in f.read().strip().split("\n"): 126 | if line != "": self.uid_whitelist.add(loader_pipeline(line)) 127 | 128 | def __call__(self, obj: Tp.Optional[TextEntry]): 129 | if obj is None or obj["uid"] not in self.uid_whitelist: return None 130 | return obj 131 | 132 | 133 | class NegateFilter(Pipeline[Tp.Optional[TextEntry], Tp.Optional[TextEntry]]): 134 | def __init__(self, filter_pipe: Pipeline): 135 | super().__init__() 136 | self.filter = filter_pipe 137 | 138 | def __call__(self, obj: Tp.Optional[TextEntry]): 139 | if obj is None: return None 140 | result = self.filter(obj) 141 | return obj if result is None else None 142 | -------------------------------------------------------------------------------- /detector/t5_hidden/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch import Tensor 4 | from typing import Optional, Tuple 5 | from transformers import T5ForConditionalGeneration as Backbone 6 | from detector.t5_hidden.__init__ import config 7 | from detector.t5_hidden.types import SentinelOutput 8 | 9 | 10 | class Sentinel(nn.Module): 11 | def __init__(self) -> None: 12 | super().__init__() 13 | self.backbone: Backbone = Backbone.from_pretrained(config.backbone.name) 14 | self.sequential = nn.Sequential(nn.Linear(512, 512), nn.GELU(), nn.Linear(512, 5)) 15 | self.criterion = nn.CrossEntropyLoss() 16 | self.config = config 17 | 18 | def forward(self, corpus_ids: Tensor, corpus_mask: Tensor, label_ids: Optional[Tensor] = None, selectedDataset: Tuple[str] = ('Human', 'ChatGPT', 'PaLM', 'LLaMA', 'GPT2')) -> SentinelOutput: 19 | ''' 20 | Args: 21 | corpus_ids (Tensor): The input corpus ids. 22 | corpus_mask (Tensor): The input attention mask. 23 | label_ids (Tensor): The input label ids. 24 | 25 | Returns: 26 | output (SentinelOutput): The output of the model. 27 | 28 | Example: 29 | >>> model = Sentinel() 30 | >>> model.eval() 31 | >>> with torch.no_grad(): 32 | >>> corpus_ids, corpus_mask, label_ids = next(iter(train_loader)) 33 | >>> model.forward(corpus_ids.cuda(), corpus_mask.cuda(), label_ids.cuda()) 34 | huggingface=Seq2SeqLMOutput( 35 | loss=..., 36 | logits=..., 37 | past_key_values=..., 38 | decoder_hidden_states=..., 39 | decoder_attentions=..., 40 | cross_attentions=..., 41 | encoder_last_hidden_state=..., 42 | encoder_hidden_states=..., 43 | encoder_attentions=... 44 | ), 45 | probabilities=tensor([ 46 | [1.0000e+00, 2.5421e-07, 1.8315e-07, 4.8886e-07], 47 | [1.0000e+00, 5.2608e-07, 1.0334e-06, 9.4020e-07], 48 | [9.9997e-01, 5.3097e-06, 8.8986e-06, 1.4712e-05], 49 | [9.9999e-01, 2.4895e-06, 1.7681e-06, 5.8721e-06], 50 | [9.9999e-01, 1.3558e-06, 1.1293e-06, 2.8045e-06], 51 | [1.0000e+00, 3.5004e-07, 3.6059e-07, 8.7667e-07], 52 | [9.9997e-01, 5.6359e-06, 7.8194e-06, 1.4346e-05], 53 | [9.9995e-01, 1.1463e-05, 1.2729e-05, 2.9505e-05] 54 | ], device='cuda:0') 55 | ''' 56 | 57 | if self.training: 58 | outputs = self.backbone.forward( 59 | input_ids=corpus_ids, 60 | attention_mask=corpus_mask, 61 | labels=label_ids, 62 | output_hidden_states=True, 63 | output_attentions=(self.config.mode == 'interpret') 64 | ) 65 | hiddens = outputs.decoder_hidden_states[-1][:, 0, :] 66 | logits = self.sequential(hiddens.squeeze()) 67 | 68 | labels = torch.zeros(label_ids.size(0), 5, dtype=torch.float32) 69 | labels[label_ids[:, 0] == 32099] = torch.tensor([1, 0, 0, 0, 0], dtype=torch.float32) 70 | labels[label_ids[:, 0] == 32098] = torch.tensor([0, 1, 0, 0, 0], dtype=torch.float32) 71 | labels[label_ids[:, 0] == 32097] = torch.tensor([0, 0, 1, 0, 0], dtype=torch.float32) 72 | labels[label_ids[:, 0] == 32096] = torch.tensor([0, 0, 0, 1, 0], dtype=torch.float32) 73 | labels[label_ids[:, 0] == 32095] = torch.tensor([0, 0, 0, 0, 1], dtype=torch.float32) 74 | labels = labels.cuda() 75 | 76 | outputs.loss = self.criterion(logits, labels) 77 | probabilities = torch.softmax(logits, dim=-1) 78 | 79 | else: 80 | outputs = self.backbone.forward( 81 | input_ids=corpus_ids, 82 | attention_mask=corpus_mask, 83 | labels=torch.tensor([[0, 1] for _ in range(corpus_ids.size(0))]).cuda(), # as dummy labels 84 | output_hidden_states=True, 85 | output_attentions=(self.config.mode == 'interpret') 86 | ) 87 | hiddens = outputs.decoder_hidden_states[-1][:, 0, :] 88 | logits = self.sequential(hiddens.squeeze()) 89 | probabilities = torch.softmax(logits, dim=-1) 90 | 91 | return SentinelOutput.construct(huggingface=outputs, probabilities=probabilities) 92 | 93 | def interpretability_study_entry(self, corpus_ids: Tensor, corpus_mask: Tensor, label_ids: Tensor, selectedDataset: Tuple[str] = ('Human', 'ChatGPT', 'PaLM', 'LLaMA', 'GPT2')): 94 | assert self.injected_embedder is not None, "Injected gradient collector did not found" 95 | 96 | filteredDataset = [item for item in config.dataset if item.label in selectedDataset] 97 | outputs = self.backbone( 98 | input_ids=corpus_ids, 99 | attention_mask=corpus_mask, 100 | labels=label_ids, 101 | output_hidden_states=False, 102 | output_attentions=False 103 | ) 104 | raw_scores = outputs.logits 105 | loss = outputs.loss 106 | loss.backward() 107 | 108 | filtered_scores = raw_scores[:, 0, [item.token_id for item in filteredDataset]] 109 | probabilities = torch.softmax(filtered_scores, dim=-1) 110 | return probabilities 111 | -------------------------------------------------------------------------------- /pipeline/pipeline_executor.py: -------------------------------------------------------------------------------- 1 | """ 2 | @brief: A parallel data processor that executes data pipeline 3 | @author: Yutian Chen 4 | @date: May 16, 2023 5 | """ 6 | from typing import Callable, List, TypeVar 7 | from pathlib import Path 8 | from tqdm import tqdm 9 | 10 | import copy 11 | import multiprocessing as mp 12 | 13 | T = TypeVar("T") 14 | 15 | 16 | class PipelineExecutor: 17 | def __init__(self, worker_num): 18 | self.worker_num = mp.cpu_count() if (worker_num is None) else worker_num 19 | if self.worker_num > mp.cpu_count(): 20 | print(f"You are using more multiprocess worker than cpu count ({mp.cpu_count()})!") 21 | 22 | def parallel_file_mapping(self, pipeline: Callable, from_files: List[Path], to_files: List[Path], write_mode="w", verbose=False, encoding=None): 23 | tqdm.set_lock(mp.RLock()) 24 | assert len(from_files) == len(to_files) 25 | 26 | if verbose: 27 | print(f"PipelineExecutor: executing with {self.worker_num} workers") 28 | for from_file, to_file in zip(from_files, to_files): 29 | print(f"\t[{from_file}] => [{pipeline}]=> [{to_file}]") 30 | 31 | args = [(pipeline, from_file, to_file, write_mode, verbose, encoding) for from_file, to_file in zip(from_files, to_files)] 32 | 33 | with mp.Pool(self.worker_num, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) as p: 34 | p.map(PipelineExecutor.file_mapping_worker, args) 35 | 36 | if verbose: print("Finish") 37 | 38 | @staticmethod 39 | def file_mapping_worker(args): 40 | pipeline, from_file, to_file, write_mode, verbose, encoding = args 41 | 42 | try: tqdm_position = mp.current_process()._identity[0] - 1 43 | except: tqdm_position = None 44 | 45 | with open(from_file, "r", encoding=encoding) as fin: 46 | with open(to_file, write_mode) as fout: 47 | try: 48 | pb = tqdm(fin.readlines(), position=tqdm_position, leave=False) if verbose else fin.readlines() 49 | for line in pb: 50 | result = pipeline(line) 51 | if result is not None: fout.write(result + "\n") 52 | finally: 53 | fout.flush() 54 | 55 | def sequantial_file_mapping(self, pipeline: Callable, from_files: List[Path], to_files: List[Path], write_mode="w", verbose=False, encoding=None): 56 | assert len(from_files) == len(to_files) 57 | if verbose: 58 | print(f"PipelineExecutor: executing sequentially") 59 | for from_file, to_file in zip(from_files, to_files): 60 | if verbose: print(f"\t[{from_file}] --[{write_mode}]--> [{to_file}]") 61 | self.file_mapping_worker((pipeline, from_file, to_file, write_mode, verbose, encoding)) 62 | 63 | def parallel_mapreduce(self, map_fn: Callable[[str], T], from_files: List[Path], identity: T, reduce_fn: Callable[[T, T], T], verbose=False, encoding=None) -> T: 64 | """ 65 | :param map_fn: str (line in input file) -> 'a 66 | :param from_files: input files 67 | :param identity: 'a 68 | :param reduce_fn: ('a * 'a) -> 'a **Need to be associative** 69 | :return: The reduced result. 70 | """ 71 | tqdm.set_lock(mp.RLock()) 72 | if verbose: 73 | print(f"PipelineExecutor: mapreduce with {self.worker_num} workers") 74 | for from_file in from_files: print(f"\t[{from_file}] --[Map]--> [Reduce] -->") 75 | 76 | args = [(map_fn, from_file, identity, reduce_fn, verbose, encoding) for from_file in from_files] 77 | with mp.Pool(self.worker_num, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) as p: 78 | reduce_results = p.map(PipelineExecutor.mapreduce_worker, args) 79 | 80 | final_reduction = identity 81 | for result in reduce_results: 82 | final_reduction = reduce_fn(final_reduction, result) 83 | 84 | return final_reduction 85 | 86 | @staticmethod 87 | def mapreduce_worker(args): 88 | map_fn, from_file, identity, reduce_fn, verbose = args 89 | 90 | try: tqdm_position = mp.current_process()._identity[0] - 1 91 | except: tqdm_position = None 92 | 93 | map_result = [] 94 | with open(from_file, "r") as fin: 95 | pb = tqdm(fin.readlines(), position=tqdm_position, desc="Mapping") if verbose else fin.readlines() 96 | for line in pb: map_result.append(map_fn(line)) 97 | 98 | reduce_result = identity 99 | pb = tqdm(map_result, position=tqdm_position, desc="Reducing") if verbose else map_result 100 | for item in pb: 101 | reduce_result = reduce_fn(reduce_result, item) 102 | 103 | return reduce_result 104 | 105 | def sequential_mapreduce(self, map_fn: Callable[[str], T], from_files: List[Path], identity: T, reduce_fn: Callable[[T, T], T], verbose=False) -> T: 106 | if verbose: 107 | print(f"PipelineExecutor: mapreduce sequentially") 108 | for from_file in from_files: print(f"\t[{from_file}] --[Map {map_fn}]--> [Reduce {reduce_fn}] -->") 109 | 110 | final_reduction = copy.deepcopy(identity) 111 | reduce_results = [self.mapreduce_worker((map_fn, from_file, copy.deepcopy(identity), reduce_fn, verbose)) 112 | for from_file in from_files] 113 | for result in reduce_results: final_reduction = reduce_fn(final_reduction, result) 114 | 115 | return final_reduction 116 | -------------------------------------------------------------------------------- /evaluator/plot/plot_confusion_mat.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | 4 | from evaluator.toolkit import * 5 | from pathlib import Path 6 | 7 | import evaluator.models.t5_sentinel.t5_get_hidden_states as T5_Full 8 | 9 | 10 | def plot_full_confusion_mat(): 11 | predictions = T5_Full.evaluate_predictions([ 12 | Path("data", "split", "open-web-text", "test-dirty.jsonl"), 13 | Path("data", "split", "open-gpt-text", "test-dirty.jsonl"), 14 | Path("data", "split", "open-palm-text", "test-dirty.jsonl"), 15 | Path("./data/split/open-llama-text/test-dirty.jsonl"), 16 | Path("./data/split/gpt2-output/test-dirty.jsonl") 17 | ]) 18 | display_labels = ['Human', 'GPT3.5', 'PaLM', 'LLaMA', 'GPT2'] 19 | 20 | matrix = calculate_confusion_matrix(predictions, ["openweb", "chatgpt", "palm", "llama", "gpt2_xl"]) 21 | fig: plt.Figure = plt.figure(dpi=200) 22 | ax: plt.Axes = fig.add_subplot(1, 1, 1) 23 | ax.imshow(matrix, cmap=sns.color_palette("crest_r", as_cmap=True), interpolation='nearest') 24 | ax.set_xticks([_ for _ in range(len(display_labels))], display_labels) 25 | ax.set_yticks([_ for _ in range(len(display_labels))], display_labels) 26 | ax.set_ylabel("Actual") 27 | ax.set_xlabel("Predicted") 28 | for i in range(len(display_labels)): 29 | for j in range(len(display_labels)): 30 | ax.text( 31 | j, i, format(int(matrix[i, j]), 'd'), 32 | horizontalalignment="center", 33 | color="white" if matrix[i, j] < np.sum(matrix) / (len(display_labels) + 1) else "black", 34 | fontsize="large" 35 | ) 36 | # ax.set_title("Confusion Matrix for T5-Sentinel") 37 | fig.tight_layout() 38 | fig.savefig(Path("./result/t5_sentinel/confusion_mat_t5_full_dirty.pdf")) 39 | 40 | 41 | def plot_full_confusion_mat_abalation(file_name: str, variant_level: int): 42 | def implement(): 43 | predictions = T5_Full.evaluate_predictions([ 44 | Path("data", "split", "open-web-text", file_name), 45 | Path("data", "split", "open-gpt-text", file_name), 46 | Path("data", "split", "open-palm-text", file_name), 47 | Path("./data/split/open-llama-text", file_name), 48 | Path("./data/split/gpt2-output", file_name) 49 | ]) 50 | display_labels = ['Human', 'GPT3.5', 'PaLM', 'LLaMA', 'GPT2'] 51 | 52 | matrix = calculate_confusion_matrix(predictions, ["openweb", "chatgpt", "palm", "llama", "gpt2_xl"]) 53 | fig: plt.Figure = plt.figure(dpi=200) 54 | ax: plt.Axes = fig.add_subplot(1, 1, 1) 55 | ax.imshow(matrix, cmap=sns.color_palette("crest_r", as_cmap=True), interpolation='nearest') 56 | ax.set_xticks([_ for _ in range(len(display_labels))], display_labels) 57 | ax.set_yticks([_ for _ in range(len(display_labels))], display_labels) 58 | ax.set_ylabel("Actual") 59 | ax.set_xlabel("Predicted") 60 | for i in range(len(display_labels)): 61 | for j in range(len(display_labels)): 62 | ax.text( 63 | j, i, format(int(matrix[i, j]), 'd'), 64 | horizontalalignment="center", 65 | color="white" if matrix[i, j] < np.sum(matrix) / (len(display_labels) + 1) else "black", 66 | fontsize="large" 67 | ) 68 | ax.set_title(f"Confusion Matrix for T5-Sentinel (Variant {variant_level})") 69 | fig.tight_layout() 70 | fig.savefig(Path(f"./result/t5_sentinel/confusion_variant{variant_level}.pdf")) 71 | 72 | return implement 73 | 74 | 75 | def plot_full_confusion_mat_compare(): 76 | fig: plt.Figure = plt.figure(dpi=200, figsize=(8, 8)) 77 | for idx, variant in enumerate(["test.variant1.jsonl", "test.variant2.jsonl", "test.variant3.jsonl", "test.variant4.jsonl"]): 78 | predictions = T5_Full.evaluate_predictions([ 79 | Path("data", "split", "open-web-text", variant), 80 | Path("data", "split", "open-gpt-text", variant), 81 | Path("data", "split", "open-palm-text", variant), 82 | Path("data", "split", "open-llama-text", variant), 83 | Path("data", "split", "gpt2-output", variant) 84 | ]) 85 | display_labels = ['Human', 'GPT3.5', 'PaLM', 'LLaMA', 'GPT2'] 86 | 87 | matrix = calculate_confusion_matrix(predictions, ["openweb", "chatgpt", "palm", "llama", "gpt2_xl"]) 88 | ax: plt.Axes = fig.add_subplot(2, 2, idx + 1) 89 | ax.imshow(matrix, cmap=sns.color_palette("crest_r", as_cmap=True), interpolation='nearest') 90 | ax.set_xticks([_ for _ in range(len(display_labels))], display_labels) 91 | ax.set_yticks([_ for _ in range(len(display_labels))], display_labels) 92 | ax.set_ylabel("Actual") 93 | ax.set_xlabel("Predicted") 94 | for i in range(len(display_labels)): 95 | for j in range(len(display_labels)): 96 | ax.text( 97 | j, i, format(int(matrix[i, j]), 'd'), 98 | horizontalalignment="center", 99 | color="white" if matrix[i, j] < np.sum(matrix) / (len(display_labels) + 1) else "black", 100 | ) 101 | fig.tight_layout() 102 | fig.savefig(Path(f"./result/t5_sentinel/confusion_compare_variants.pdf")) 103 | 104 | 105 | 106 | if __name__ == "__main__": 107 | TASKS = [ 108 | plot_full_confusion_mat, 109 | plot_full_confusion_mat_abalation("test.variant1.jsonl", 1), 110 | plot_full_confusion_mat_abalation("test.variant2.jsonl", 2), 111 | plot_full_confusion_mat_abalation("test.variant3.jsonl", 3), 112 | plot_full_confusion_mat_abalation("test.variant4.jsonl", 4), 113 | plot_full_confusion_mat_compare 114 | ] 115 | 116 | for task in TASKS: 117 | print("Executing task: ", task.__name__) 118 | task() 119 | -------------------------------------------------------------------------------- /evaluator/interpret/sample_pca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | from pathlib import Path 5 | 6 | from evaluator.plot.plot_pca import pca_analysis 7 | 8 | import evaluator.models.t5_sentinel.t5_get_hidden_states as T5_Full 9 | 10 | 11 | def retrieve_nearest_neighbor_center(points): 12 | # points - N x 2 ndarray 13 | center = np.mean(points, axis=0) 14 | nearest_neighbor_idx = np.argmin(np.linalg.norm(points - center, axis=1)) 15 | return points[nearest_neighbor_idx], nearest_neighbor_idx.item() 16 | 17 | 18 | def sample_pca_t5_full_center(): 19 | evaluate_paths = [ 20 | Path("data", "split", "open-web-text", "test-dirty.jsonl"), 21 | Path("data", "split", "open-gpt-text", "test-dirty.jsonl"), 22 | Path("data", "split", "open-palm-text", "test-dirty.jsonl"), 23 | Path("./data/split/open-llama-text/test-dirty.jsonl"), 24 | Path("./data/split/gpt2-output/test-dirty.jsonl"), 25 | ] 26 | 27 | hiddens = T5_Full.evaluate_hidden_states(evaluate_paths) 28 | labels = ["openweb", "chatgpt", "palm", "llama", "gpt2_xl"] 29 | Tarrays = pca_analysis(hiddens, labels, "t5_full_pca") 30 | 31 | fig = plt.figure(dpi=200) 32 | ax: plt.Axes = fig.add_subplot(1, 1, 1) 33 | ax.set_prop_cycle('color', sns.color_palette("pastel")) 34 | ax.set_title("PCA projection of decoder hidden state\nT5-Sentinel") 35 | for transformed, label in zip(Tarrays, labels): 36 | ax.scatter(transformed[:, 0], transformed[:, 1], label=label, s=1) 37 | 38 | ax.set_prop_cycle('color', sns.color_palette("dark")) 39 | f = open("./result/t5_sentinel/pca_sampling_result.txt", "w") 40 | f.writelines(["This file is dynamically generated from sample_pca.py, do not modify\n"]) 41 | 42 | for eval_path, transformed in zip(evaluate_paths, Tarrays): 43 | cluster_hiddens = T5_Full.evaluate_hiddens_impl(eval_path) 44 | center, center_idx = retrieve_nearest_neighbor_center(transformed) 45 | center_uid, center_extra = cluster_hiddens[center_idx]["uid"], cluster_hiddens[center_idx]["extra"] 46 | ax.scatter(center[0], center[1], marker="*", s=15) 47 | f.writelines([f"UID: {center_uid} | extra: {center_extra}\n"]) 48 | 49 | f.close() 50 | ax.legend() 51 | fig.savefig(Path("./result/t5_sentinel/pca_sampling.pdf")) 52 | 53 | 54 | def sample_pca_t5_full_edge(): 55 | evaluate_paths = [ 56 | Path("data", "split", "open-web-text", "test-dirty.jsonl"), 57 | Path("data", "split", "open-gpt-text", "test-dirty.jsonl"), 58 | Path("data", "split", "open-palm-text", "test-dirty.jsonl"), 59 | Path("./data/split/open-llama-text/test-dirty.jsonl"), 60 | Path("./data/split/gpt2-output/test-dirty.jsonl"), 61 | ] 62 | 63 | hiddens = T5_Full.evaluate_hidden_states(evaluate_paths) 64 | labels = ["openweb", "chatgpt", "palm", "llama", "gpt2_xl"] 65 | Tarrays = pca_analysis(hiddens, labels, "t5_full_pca") 66 | 67 | fig = plt.figure(dpi=200) 68 | ax: plt.Axes = fig.add_subplot(1, 1, 1) 69 | ax.set_prop_cycle('color', sns.color_palette("pastel")) 70 | ax.set_title("PCA projection of decoder hidden state\nT5-Sentinel") 71 | for transformed, label in zip(Tarrays, labels): 72 | random_mask = np.random.randn(*(transformed[:, 0].shape)) < 0.2 73 | ax.scatter(transformed[random_mask, 0], transformed[random_mask, 1], label=label, s=1) 74 | # ax.scatter(transformed[:, 0], transformed[:, 1], label=label, s=1) 75 | 76 | f = open("./result/t5_sentinel/pca_sampling_result_edges.txt", "w") 77 | f.write("This file is dynamically generated from sample_pca.py, do not modify\n") 78 | 79 | top_most = Tarrays[1] # ChatGPT 80 | right_most = Tarrays[2] # PaLMs 81 | bottom_most = Tarrays[3] # LLaMA 82 | left_most = Tarrays[4] # GPT2 83 | 84 | left_most_idx = np.argmin(left_most[:, 0], axis=0) 85 | right_most_idx = np.argmax(right_most[:, 0], axis=0) 86 | top_most_idx = np.argmax(top_most[:, 1], axis=0) 87 | bottom_most_idx = np.argmin(bottom_most[:, 1], axis=0) 88 | 89 | ax.set_prop_cycle('color', sns.color_palette("dark")) 90 | # The order of plotting here matters! 91 | ax.scatter([], [], marker="*", s=15) 92 | ax.scatter(top_most[top_most_idx, 0], top_most[top_most_idx, 1], marker="*", s=15) 93 | ax.scatter(right_most[right_most_idx, 0] , right_most[right_most_idx, 1] , marker="*", s=15) 94 | ax.scatter(bottom_most[bottom_most_idx, 0], bottom_most[bottom_most_idx, 1], marker="*", s=15) 95 | ax.scatter(left_most[left_most_idx, 0], left_most[left_most_idx, 1], marker="*", s=15) 96 | 97 | top_most_preds = T5_Full.evaluate_predictions([evaluate_paths[1]]) 98 | right_most_preds = T5_Full.evaluate_predictions([evaluate_paths[2]]) 99 | bottom_most_preds = T5_Full.evaluate_predictions([evaluate_paths[3]]) 100 | left_most_preds = T5_Full.evaluate_predictions([evaluate_paths[4]]) 101 | 102 | top_most_uid, top_most_extra = top_most_preds[top_most_idx.item()]["uid"], top_most_preds[top_most_idx.item()]["extra"] 103 | right_most_uid, right_most_extra = right_most_preds[right_most_idx.item()]["uid"], right_most_preds[right_most_idx.item()]["extra"] 104 | bottom_most_uid, bottom_most_extra = bottom_most_preds[bottom_most_idx.item()]["uid"], bottom_most_preds[bottom_most_idx.item()]["extra"] 105 | left_most_uid, left_most_extra = left_most_preds[left_most_idx.item()]["uid"], left_most_preds[left_most_idx.item()]["extra"] 106 | 107 | f.write(f"TOP: {top_most_uid}, {top_most_extra}\n") 108 | f.write(f"RIGHT: {right_most_uid}, {right_most_extra}\n") 109 | f.write(f"BOTTOM: {bottom_most_uid}, {bottom_most_extra}\n") 110 | f.write(f"LEFT: {left_most_uid}, {left_most_extra}\n") 111 | 112 | f.close() 113 | ax.legend() 114 | fig.savefig(Path("./result/t5_sentinel/pca_sampling_edge.pdf")) 115 | 116 | 117 | if __name__ == "__main__": 118 | TASKS = [ 119 | # sample_pca_t5_full_center, 120 | sample_pca_t5_full_edge 121 | ] 122 | 123 | for task in TASKS: 124 | print("Executing task: ", task.__name__) 125 | task() 126 | -------------------------------------------------------------------------------- /evaluator/data_analysis/plot_character_distribution.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.ticker as mtick 3 | import matplotlib.patches as mpatches 4 | 5 | import string 6 | import memoizer 7 | from evaluator.toolkit import * 8 | from tqdm import tqdm 9 | import string 10 | 11 | 12 | def count_character(s: str, counter: dict) -> None: 13 | for c in s: 14 | if c in counter: counter[c] += 1 15 | else: counter[c] = 1 16 | 17 | 18 | def argeq(a, b): return a[0] == b[0] 19 | 20 | 21 | @memoizer.memoize(Path("cache", "dataset_char_count_cache.pt"), arg_eq=argeq) 22 | def count_dataset(dataset: Tp.Literal["human", "chatgpt", "palm", "llama", "gpt2"]) -> dict: 23 | selected_files = { 24 | "human": Human_Data, "chatgpt": GPT3_Data, "palm": PaLM_Data, "llama": LLaMA_Data, "gpt2": GPT2_Data 25 | }[dataset] 26 | 27 | counter = {c: 0 for c in string.printable} 28 | dataset = load_data(selected_files) 29 | for entry in tqdm(dataset): count_character(entry, counter) 30 | 31 | total_token = sum([counter[k] for k in counter]) 32 | result = {k: counter[k] / total_token for k in counter} 33 | return result 34 | 35 | 36 | def get_top_k_chars(counter: dict, k: int) -> list: 37 | kv_pair = [(counter[k], k) for k in counter] 38 | kv_pair.sort(key=lambda x: x[0], reverse=True) 39 | return [entry[1] for entry in kv_pair[:k]] 40 | 41 | 42 | def filter_dict(counter: dict, keys: list) -> dict: 43 | resulted_dict = {} 44 | for k in keys: 45 | resulted_dict[k] = counter[k] if k in counter else 0 46 | return resulted_dict 47 | 48 | 49 | def merge_keys(*arr_keys) -> list: 50 | set_keys = set() 51 | for keys in arr_keys: set_keys = set_keys.union(set(keys)) 52 | return list(set_keys) 53 | 54 | 55 | def sort_keys(counter, keys) -> list: 56 | key_arr = [(counter[key], key) for key in keys] 57 | key_arr.sort(key=lambda x: x[0], reverse=True) 58 | return [entry[1] for entry in key_arr] 59 | 60 | 61 | def plot_distribution(): 62 | human_counter = count_dataset("human") 63 | gpt3_counter = count_dataset("chatgpt") 64 | palm_counter = count_dataset("palm") 65 | llama_counter = count_dataset("llama") 66 | gpt2_counter = count_dataset("gpt2") 67 | 68 | selected_keys = merge_keys( 69 | get_top_k_chars(human_counter, 40), 70 | get_top_k_chars(gpt3_counter, 40), 71 | get_top_k_chars(palm_counter, 40), 72 | get_top_k_chars(llama_counter, 40), 73 | get_top_k_chars(gpt2_counter, 40) 74 | ) 75 | 76 | human_counter = filter_dict(human_counter, selected_keys) 77 | gpt3_counter = filter_dict(gpt3_counter, selected_keys) 78 | palm_counter = filter_dict(palm_counter, selected_keys) 79 | llama_counter = filter_dict(llama_counter, selected_keys) 80 | gpt2_counter = filter_dict(gpt2_counter, selected_keys) 81 | selected_keys = sort_keys(human_counter, selected_keys) 82 | 83 | fig, axes = plt.subplots(ncols=1, nrows=5, dpi=200, sharey=True, sharex=True) 84 | colors = ["#2576b0", "#fc822e", "#349f3c", "#d32f2e", "#9368b9"] 85 | categories = ["Human", "GPT3.5", "PaLM", "LLaMA", "GPT2"] 86 | all_data = [human_counter, gpt3_counter, palm_counter, llama_counter, gpt2_counter] 87 | 88 | for idx, (category, counter, color, ax) in enumerate(zip(categories, all_data, colors, axes)): 89 | values = [counter[k] for k in selected_keys] 90 | ax.bar(selected_keys, values, color=color) 91 | 92 | for ax in axes[:-1]: ax.get_xaxis().set_visible(False) 93 | for ax in axes: ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0)) 94 | 95 | ax = axes[-1] 96 | handles = [ 97 | mpatches.Patch(color=c, label=label) for c, label in zip(colors, categories) 98 | ] 99 | ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1.0), ncol=1, handles=handles) 100 | 101 | fig.text(0.01, 0.5, 'Frequency', va='center', rotation='vertical') 102 | fig.text(0.5, 0.01, 'Most common character in OpenLLMText dataset', ha='center') 103 | fig.savefig("./result/data/dataset_char_count.pdf") 104 | 105 | 106 | def plot_punc_distribution(): 107 | human_counter = count_dataset("human") 108 | gpt3_counter = count_dataset("chatgpt") 109 | palm_counter = count_dataset("palm") 110 | llama_counter = count_dataset("llama") 111 | gpt2_counter = count_dataset("gpt2") 112 | 113 | punctuation_tok = [tok for tok in string.punctuation] 114 | 115 | human_counter = filter_dict(human_counter, punctuation_tok) 116 | gpt3_counter = filter_dict(gpt3_counter, punctuation_tok) 117 | palm_counter = filter_dict(palm_counter, punctuation_tok) 118 | llama_counter = filter_dict(llama_counter, punctuation_tok) 119 | gpt2_counter = filter_dict(gpt2_counter, punctuation_tok) 120 | selected_keys = sort_keys(human_counter, punctuation_tok) 121 | 122 | fig, axes = plt.subplots(ncols=1, nrows=5, dpi=200, sharey=True, sharex=True) 123 | colors = ["#2576b0", "#fc822e", "#349f3c", "#d32f2e", "#9368b9"] 124 | categories = ["Human", "GPT3.5", "PaLM", "LLaMA", "GPT2"] 125 | all_data = [human_counter, gpt3_counter, palm_counter, llama_counter, gpt2_counter] 126 | 127 | for idx, (category, counter, color, ax) in enumerate(zip(categories, all_data, colors, axes)): 128 | values = [counter[k] for k in selected_keys] 129 | ax.bar(selected_keys, values, color=color) 130 | 131 | # for ax in axes[:-1]: ax.get_xaxis().set_visible(False) 132 | for ax in axes: ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0)) 133 | 134 | handles = [mpatches.Patch(color=c, label=label) for c, label in zip(colors, categories)] 135 | fig: plt.Figure 136 | axes[0].legend(loc='upper left', bbox_to_anchor=(1.05, 1.0), handles=handles) 137 | fig.tight_layout() 138 | # axes[1].legend(handles=handles) 139 | fig.subplots_adjust(wspace=0, hspace=0.2) 140 | fig.text(0.00, 0.5, 'Frequency', va='center', rotation='vertical') 141 | fig.text(0.5, 0.01, 'Most common punctuation in OpenLLMText dataset', ha='center') 142 | 143 | fig.savefig("./result/data/dataset_punc_count.pdf") 144 | 145 | 146 | if __name__ == "__main__": 147 | TASKS = [ 148 | # plot_distribution, 149 | plot_punc_distribution 150 | ] 151 | for task in TASKS: 152 | print(f"Executing {task.__name__}") 153 | task() 154 | -------------------------------------------------------------------------------- /generator/chatgpt/chatgpt_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | @brief: A Chat-GPT response generator using Async io 3 | @author: Yutian Chen 4 | @date: March 19, 2023 5 | """ 6 | 7 | import asyncio 8 | import random 9 | import string 10 | import openai 11 | import yaml 12 | import json 13 | import time 14 | 15 | import pipeline.component.text_component as P 16 | from typing import TypedDict, List, Tuple 17 | from pathlib import Path 18 | from generator.client_base import AsyncRequestClient, TaskResult 19 | 20 | 21 | # Typing 22 | 23 | class ChatGPTState(TypedDict): 24 | processed: set 25 | token: int 26 | 27 | 28 | class ChatGPTConfig(TypedDict): 29 | MaxTokenCount: int 30 | MaxLengthAllowed: int 31 | WaitTime: float 32 | InputDirectory: str 33 | OutputDirectory: str 34 | Sampling: float 35 | InputSubsets: List[str] 36 | 37 | 38 | ChatGPTArgs = Tuple[str, str, str] 39 | 40 | ChatGPTType = AsyncRequestClient[ChatGPTState, ChatGPTArgs, ChatGPTConfig] 41 | Converter = P.WriteExtra({"source": "chatgpt", "variant": "original"}) >> P.ToJsonStr() 42 | ### 43 | 44 | 45 | TOKEN_SPLITER = {char for char in string.punctuation + string.whitespace} 46 | HANDLE_STRATEGY = { 47 | "stop": TaskResult.FINISH, 48 | "length": TaskResult.FINISH, 49 | "content_filter": TaskResult.CANCEL, 50 | "null": TaskResult.RETRY 51 | } 52 | 53 | 54 | def estimate_token_count(sample: str) -> int: 55 | est_num = 0 56 | for char in sample: 57 | est_num += 1 if char in TOKEN_SPLITER else 0 58 | return est_num 59 | 60 | 61 | async def chatgpt_request_fn(self: ChatGPTType, state, subset, uid, text) -> TaskResult: 62 | if state["token"] > self.config["MaxTokenCount"]: 63 | print("Abort due to budget limit.") 64 | raise Exception("Exceed the MaxTokenCount setting") 65 | 66 | await self.worker_lock.acquire() 67 | start_time = time.time() 68 | 69 | # Ready ... now Work! 70 | 71 | estimatedNumTokens = estimate_token_count(text) 72 | if estimatedNumTokens > self.config["MaxLengthAllowed"]: 73 | print("[x]\t", uid, 74 | "failed since it exceeds the token limit (" + str(self.config["MaxLengthAllowed"]) + ")") 75 | self.worker_lock.release() 76 | return TaskResult.CANCEL 77 | 78 | try: 79 | response = await openai.ChatCompletion.acreate( 80 | model="gpt-3.5-turbo", 81 | messages=[ 82 | {"role": "user", "content": "Rephrase the following paragraph by paragraph:\n\n" + text} 83 | ] 84 | ) 85 | 86 | except openai.error.InvalidRequestError: 87 | # no need to wait, since the request is not sent for some reason 88 | await asyncio.sleep(1.0) # Avoid flushing the API 89 | self.worker_lock.release() 90 | return TaskResult.RETRY 91 | 92 | except (openai.error.RateLimitError, openai.error.APIError, openai.error.TryAgain, openai.error.Timeout): 93 | await asyncio.sleep(self.config["WaitTime"]) 94 | self.worker_lock.release() 95 | return TaskResult.RETRY 96 | 97 | finishReason = response["choices"][0]["finish_reason"] 98 | result = HANDLE_STRATEGY[finishReason] 99 | 100 | if result == TaskResult.FINISH: 101 | machineText = response["choices"][0]["message"]["content"].strip() 102 | 103 | await self.writer_lock.acquire() 104 | with open(Path(self.config["OutputDirectory"], subset + ".jsonl"), "a", encoding="utf-8") as f: 105 | f.write(Converter({"uid": uid, "text": machineText, "extra": dict()})) 106 | f.write("\n") 107 | self.writer_lock.release() 108 | self.state["processed"].add((subset, uid)) 109 | 110 | self.state["token"] += response["usage"]["total_tokens"] 111 | 112 | # Wait for 60 secs, then release the lock to spawn a new worker coroutine 113 | # (We won't be blocked out) 114 | end_time = time.time() 115 | await asyncio.sleep(self.config["WaitTime"] - (end_time - start_time)) 116 | self.worker_lock.release() 117 | 118 | return result 119 | 120 | 121 | def chatgpt_pred_fn(client: ChatGPTType, state: ChatGPTState, subset, uid, text) -> bool: 122 | return (subset, uid) not in state["processed"] 123 | 124 | 125 | def chatgpt_task_generator(client: ChatGPTType, state: ChatGPTState) -> List[ChatGPTArgs]: 126 | config = client.config 127 | task_args, subsets = [], config["InputSubsets"] 128 | 129 | for subset in subsets: 130 | print("Processing", subset) 131 | humanTextEntries = dict() 132 | 133 | with open(Path(config["InputDirectory"], subset + ".jsonl"), "r") as f: 134 | lines = f.read().strip().split("\n") 135 | for line in lines: 136 | entry = json.loads(line) 137 | humanTextEntries[entry["uid"]] = entry 138 | 139 | exist_count = len([uid for (_subset, uid) in state["processed"] if _subset == subset]) 140 | target_count = int(len(humanTextEntries) * config["Sampling"]) 141 | remain_cnt = max(target_count - exist_count, 0) 142 | remain_uids = random.choices(list(humanTextEntries.keys()), k=remain_cnt) 143 | 144 | for uid in remain_uids: 145 | task_args.append((subset, uid, humanTextEntries[uid]["text"])) 146 | 147 | return task_args 148 | 149 | 150 | def chatgpt_state_initializer(client: ChatGPTType) -> ChatGPTState: 151 | processed = set() 152 | for subset in client.config["InputSubsets"]: 153 | if not Path(client.config["OutputDirectory"], subset + ".jsonl").exists(): continue 154 | with open(Path(client.config["OutputDirectory"], subset + ".jsonl"), "r") as f: 155 | lines = f.read().strip().split("\n") 156 | for line in lines: 157 | processed.add((subset, json.loads(line)["uid"])) 158 | 159 | return {"processed": processed, "token": 0} 160 | 161 | 162 | def chatgpt_on_init_finish(client: ChatGPTType) -> None: 163 | ... 164 | 165 | 166 | if __name__ == "__main__": 167 | with open("./generator/chatgpt/chatgpt_client.yaml", "r") as f: 168 | chatgpt_config = yaml.safe_load(f) 169 | 170 | with open(Path(chatgpt_config["ClientRoot"], "secret.json"), "r") as f: 171 | API_KEY = json.load(f)["OPENAI_API_KEY"] 172 | openai.api_key = API_KEY 173 | 174 | ChatGPTClient = AsyncRequestClient[ChatGPTState, ChatGPTArgs, ChatGPTConfig]( 175 | chatgpt_config, 176 | chatgpt_request_fn, 177 | chatgpt_pred_fn, 178 | chatgpt_task_generator, 179 | chatgpt_state_initializer, 180 | on_init_finish=chatgpt_on_init_finish, 181 | display_args=lambda args: args[1] 182 | ) 183 | asyncio.run(ChatGPTClient.execute()) 184 | -------------------------------------------------------------------------------- /detector/solaiman_classifier/evaluate.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "os.chdir(\"../..\")" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": { 17 | "ExecuteTime": { 18 | "end_time": "2023-08-27T14:53:58.017309Z", 19 | "start_time": "2023-08-27T14:53:54.568446700Z" 20 | }, 21 | "collapsed": true 22 | }, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/plain": [ 27 | "RobertaForSequenceClassification(\n", 28 | " (roberta): RobertaModel(\n", 29 | " (embeddings): RobertaEmbeddings(\n", 30 | " (word_embeddings): Embedding(50265, 768, padding_idx=1)\n", 31 | " (position_embeddings): Embedding(514, 768, padding_idx=1)\n", 32 | " (token_type_embeddings): Embedding(1, 768)\n", 33 | " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", 34 | " (dropout): Dropout(p=0.1, inplace=False)\n", 35 | " )\n", 36 | " (encoder): BertEncoder(\n", 37 | " (layer): ModuleList(\n", 38 | " (0-11): 12 x BertLayer(\n", 39 | " (attention): BertAttention(\n", 40 | " (self): BertSelfAttention(\n", 41 | " (query): Linear(in_features=768, out_features=768, bias=True)\n", 42 | " (key): Linear(in_features=768, out_features=768, bias=True)\n", 43 | " (value): Linear(in_features=768, out_features=768, bias=True)\n", 44 | " (dropout): Dropout(p=0.1, inplace=False)\n", 45 | " )\n", 46 | " (output): BertSelfOutput(\n", 47 | " (dense): Linear(in_features=768, out_features=768, bias=True)\n", 48 | " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", 49 | " (dropout): Dropout(p=0.1, inplace=False)\n", 50 | " )\n", 51 | " )\n", 52 | " (intermediate): BertIntermediate(\n", 53 | " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", 54 | " )\n", 55 | " (output): BertOutput(\n", 56 | " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", 57 | " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", 58 | " (dropout): Dropout(p=0.1, inplace=False)\n", 59 | " )\n", 60 | " )\n", 61 | " )\n", 62 | " )\n", 63 | " (pooler): BertPooler(\n", 64 | " (dense): Linear(in_features=768, out_features=768, bias=True)\n", 65 | " (activation): Tanh()\n", 66 | " )\n", 67 | " )\n", 68 | " (classifier): RobertaClassificationHead(\n", 69 | " (dense): Linear(in_features=768, out_features=768, bias=True)\n", 70 | " (dropout): Dropout(p=0.1, inplace=False)\n", 71 | " (out_proj): Linear(in_features=768, out_features=2, bias=True)\n", 72 | " )\n", 73 | ")" 74 | ] 75 | }, 76 | "execution_count": 2, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "# Use transformer previous version\n", 83 | "import torch\n", 84 | "import json\n", 85 | "from tqdm import tqdm\n", 86 | "from pathlib import Path\n", 87 | "from transformers import RobertaForSequenceClassification, RobertaTokenizer\n", 88 | "\n", 89 | "state_path = Path(\"data\", \"checkpoint\", \"solaiman-detector-base.pt\")\n", 90 | "model = RobertaForSequenceClassification.from_pretrained(\"roberta-base\")\n", 91 | "tokenizer = RobertaTokenizer.from_pretrained(\"roberta-base\", truncation=True, max_length=510)\n", 92 | "\n", 93 | "model.load_state_dict(torch.load(state_path)[\"model_state_dict\"])\n", 94 | "model.to(\"cuda\")\n" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 3, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "def load_line(content):\n", 104 | " entry = json.loads(content)\n", 105 | " return {\"text\": entry[\"text\"], \"source\": entry[\"extra\"][\"source\"]}\n", 106 | "\n", 107 | "\n", 108 | "def evaluate_entry(entry, model):\n", 109 | " text, src = entry[\"text\"], entry[\"source\"]\n", 110 | "\n", 111 | " # encode\n", 112 | " tokens = tokenizer.encode(text, max_length=512)\n", 113 | " tokens = tokens[:tokenizer.max_len - 2]\n", 114 | " tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0)\n", 115 | " mask = torch.ones_like(tokens)\n", 116 | "\n", 117 | " # forward propagation\n", 118 | " with torch.no_grad():\n", 119 | " logits = model(tokens.to(\"cuda\"), attention_mask=mask.to(\"cuda\"))[0]\n", 120 | " probs = logits.softmax(dim=-1)\n", 121 | "\n", 122 | " # update statistics\n", 123 | " generated, human = probs.detach().cpu().flatten().numpy().tolist()\n", 124 | "\n", 125 | " return {\"pred\": [generated, human], \"source\": src}\n" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 4, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "def evaluate_files(from_files, to_files, model):\n", 135 | " for from_file, to_file in zip(from_files, to_files):\n", 136 | " with open(Path(\"data\", \"split\", from_file), \"r\") as f:\n", 137 | " content = f.read().strip().split(\"\\n\")\n", 138 | "\n", 139 | " with open(Path(\"data\", \"baselines\", \"solaiman\", to_file), \"w\") as f:\n", 140 | " for line in tqdm(content):\n", 141 | " entry = load_line(line)\n", 142 | " try:\n", 143 | " result = evaluate_entry(entry, model)\n", 144 | " f.write(json.dumps(result) + \"\\n\")\n", 145 | " except:\n", 146 | " print(\"Failed to process\", entry)\n", 147 | " pass\n" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 5, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stderr", 157 | "output_type": "stream", 158 | "text": [ 159 | " 35%|███▍ | 2583/7400 [00:51<01:20, 59.81it/s]" 160 | ] 161 | }, 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "Failed to process {'text': '', 'source': 'palm'}\n" 167 | ] 168 | }, 169 | { 170 | "name": "stderr", 171 | "output_type": "stream", 172 | "text": [ 173 | " 93%|█████████▎| 6877/7400 [02:12<00:08, 60.75it/s]" 174 | ] 175 | }, 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "Failed to process {'text': '', 'source': 'palm'}\n" 181 | ] 182 | }, 183 | { 184 | "name": "stderr", 185 | "output_type": "stream", 186 | "text": [ 187 | "100%|██████████| 7400/7400 [02:22<00:00, 51.76it/s]\n", 188 | "100%|██████████| 6587/6587 [02:19<00:00, 47.36it/s]\n", 189 | "100%|██████████| 7385/7385 [03:03<00:00, 40.33it/s]\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "evaluate_files(\n", 195 | " [\n", 196 | " # \"open-web-text/test.jsonl\", \"open-gpt-text/test.jsonl\", \n", 197 | " \"open-palm-text/test.jsonl\", \"open-llama-text/test.jsonl\", \"gpt2-output/test.jsonl\"\n", 198 | " ],\n", 199 | " [\n", 200 | " # \"solaiman-openweb.jsonl\", \"solaiman-opengpt.jsonl\",\n", 201 | " \"solaiman-openpalm.jsonl\", \"solaiman-openllama.jsonl\", \"solaiman-gpt2.jsonl\"\n", 202 | " ],\n", 203 | " model\n", 204 | ")" 205 | ] 206 | } 207 | ], 208 | "metadata": { 209 | "kernelspec": { 210 | "display_name": "Python 3", 211 | "language": "python", 212 | "name": "python3" 213 | }, 214 | "language_info": { 215 | "codemirror_mode": { 216 | "name": "ipython", 217 | "version": 3 218 | }, 219 | "file_extension": ".py", 220 | "mimetype": "text/x-python", 221 | "name": "python", 222 | "nbconvert_exporter": "python", 223 | "pygments_lexer": "ipython3", 224 | "version": "3.8.16" 225 | } 226 | }, 227 | "nbformat": 4, 228 | "nbformat_minor": 0 229 | } 230 | -------------------------------------------------------------------------------- /evaluator/interpret/integrated_gradient.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import click 3 | from pathlib import Path 4 | from transformers import T5TokenizerFast as Tokenizer 5 | from detector.t5_sentinel.model import Sentinel 6 | 7 | 8 | class SentinelGradientExtractor(torch.nn.Module): 9 | def __init__(self, embedder, interpolate_step=100) -> None: 10 | super().__init__() 11 | self.embedder = embedder 12 | self.encoder_mode = True 13 | 14 | self.max_step = interpolate_step 15 | self.step = 0 16 | self.tokens = None 17 | self.embed_result = None 18 | self.pad_result = None 19 | self.mid_results = [] 20 | 21 | def reset(self): 22 | self.encoder_mode = True 23 | self.step = 0 24 | self.embed_result = None 25 | self.pad_result = None 26 | self.mid_results = [] 27 | 28 | def pure_forward(self, *args, **kwargs): 29 | return self.embedder(*args, **kwargs) 30 | 31 | def grad_forward(self, *args, **kwargs): 32 | # print(f"Gradient integrating - Step {self.step} / {self.max_step}") 33 | if self.step == 0: 34 | embedding = self.embedder(*args, **kwargs) 35 | self.tokens = args[0] 36 | self.pad_result = self.embedder(torch.zeros_like(args[0])).detach() 37 | self.embed_result = embedding.detach() 38 | 39 | embed_result = self.embed_result.clone() 40 | embed_result.requires_grad_(True) 41 | embed_result.retain_grad() 42 | self.mid_results.append(embed_result) 43 | 44 | mix_percent = self.step / self.max_step 45 | mix_result: torch.Tensor = ( 46 | mix_percent * embed_result + (1 - mix_percent) * self.pad_result 47 | ) 48 | # mix_result.requires_grad_(True) 49 | # mix_result.retain_grad() 50 | # self.mid_results.append(mix_result) 51 | self.step += 1 52 | 53 | return mix_result 54 | 55 | def forward(self, *args, **kwargs): 56 | # Embedding layer will be called twice by T5, the first call is for encoder 57 | # second call is for decoder 58 | if self.encoder_mode: 59 | self.encoder_mode = False 60 | return self.grad_forward(*args, **kwargs) 61 | else: 62 | self.encoder_mode = True 63 | return self.pure_forward(*args, **kwargs) 64 | 65 | 66 | def injectSentinel(model: Sentinel): 67 | model.injected_embedder = None 68 | 69 | def auto_embedder_substitution(*args, **kwargs): 70 | embedder = model.backbone.get_input_embeddings() 71 | grad_embedder = SentinelGradientExtractor(embedder, 100) 72 | model.injected_embedder = grad_embedder 73 | model.backbone.set_input_embeddings(grad_embedder) 74 | print("Embedder substitution Complete.") 75 | 76 | model.register_load_state_dict_post_hook(auto_embedder_substitution) 77 | return model 78 | 79 | 80 | model = injectSentinel(Sentinel()) 81 | checkpoint = torch.load(Path("data", "checkpoint", "T5Sentinel.0613.pt")) 82 | model.load_state_dict(checkpoint["model"]) 83 | model = model.cuda().eval() 84 | 85 | 86 | def explain(text, label): 87 | tokenizer = Tokenizer.from_pretrained("t5-small", model_max_length=512) 88 | label_tokenizer = Tokenizer.from_pretrained("t5-small", model_max_length=2) 89 | 90 | text_tokenized = tokenizer.batch_encode_plus( 91 | (text,), padding=True, truncation=True, return_tensors="pt" 92 | ) 93 | lab_tokenized = label_tokenizer.batch_encode_plus( 94 | (label,), padding=True, truncation=True, return_tensors="pt" 95 | ) 96 | 97 | for i in range(100): 98 | prob = model.interpretability_study_entry( 99 | text_tokenized.input_ids.cuda(), 100 | text_tokenized.attention_mask.cuda(), 101 | lab_tokenized.input_ids.cuda(), 102 | ) 103 | 104 | all_gradient = [mid.grad for mid in model.injected_embedder.mid_results] 105 | avg_gradient = torch.zeros_like(all_gradient[0]) 106 | for i in range(len(all_gradient)): 107 | avg_gradient += all_gradient[i] 108 | avg_gradient = avg_gradient / len(all_gradient) 109 | diff = model.injected_embedder.embed_result - model.injected_embedder.pad_result 110 | integrated_gradient = torch.norm((avg_gradient * diff)[0], dim=1) 111 | 112 | pred_label = ["Human", "ChatGPT", "PaLM", "LLaMA", "GPT2"] 113 | pred_idx = torch.argmax(prob).item() 114 | print(f"Predicted as {pred_label[pred_idx]} with prob of {prob[0, pred_idx]}") 115 | 116 | tokens = [tokenizer.decode(tok) for tok in model.injected_embedder.tokens[0]] 117 | model.injected_embedder.reset() 118 | return integrated_gradient, tokens, prob 119 | 120 | 121 | def visualize_explain_simple(text, label): 122 | gradient, tokens, prob = explain(text, label) 123 | avg_grad = torch.mean(gradient).item() 124 | std_grad = torch.std(gradient).item() 125 | more_than_1std = gradient > avg_grad + 1 * std_grad 126 | more_than_0std = gradient > avg_grad 127 | mask_0std = torch.logical_xor(more_than_1std, more_than_0std) 128 | mask_1std = more_than_1std 129 | for idx in range(gradient.shape[0]): 130 | tok = tokens[idx] 131 | if mask_0std[idx].item(): 132 | print(click.style(tok, fg="yellow"), end=" ") 133 | elif mask_1std[idx].item(): 134 | print(click.style(tok, fg="red"), end=" ") 135 | else: 136 | print(tok, end=" ") 137 | print("") 138 | 139 | 140 | if __name__ == "__main__": 141 | visualize_explain_simple("Hello world!", "") 142 | visualize_explain_simple( 143 | 'Media playback is unsupported on your device Media caption Hungarian Prime Minister Viktor Orban: "It\'s a serious ecological catastrophe"\n\nToxic red sludge from a spill at an industrial plant in Hungary has reached the River Danube, officials say.\n\nThey said alkaline levels that killed all fish in one river were now greatly reduced, but were being monitored.\n\nPM Viktor Orban called the spill an "ecological tragedy". There are fears the mud, which burst out of a reservoir on Monday, could poison the Danube.\n\nCountries downstream from Hungary, including Croatia, Serbia and Romania, are drawing up emergency plans.\n\nA million cubic metres (35m cu ft) of the sludge spilled from a reservoir at an alumina plant in Ajka in western Hungary. Four people were killed and about 100 injured.\n\nThe mud also caused massive damage in nearby villages and towns, as well as a wide swathe of farmland.\n\nNo victory declaration\n\nDisaster official Tibor Dobson said all life in the Marcal river, which feeds the Danube, had been "extinguished".\n\nThe BBC\'s Nick Thorpe in western Hungary says news that the spill has now reached the Danube is worrying.\n\nTests are being carried out for two potential hazards - a powerful alkaline solution and heavy metals.\n\nOfficials say both are below toxic levels for humans in the Danube and its tributary, the Raba.\n\nBut Mr Dobson said this was "by no means a victory declaration".\n\nDead fish have been spotted in both rivers, Mr Dobson notes.\n\nTo save their eco-system, he adds, pH levels must be reduced to 8 from about 9 recently recorded at the confluence of the Raba with the Danube.\n\nThe authorities have been pouring a mixture of clay and acid to reduce alkalinity.\n\n"The main effort is now being concentrated on the Raba and the Danube," Mr Dobson said. "That\'s what has to be saved."\n\nPhilip Weller, executive director of the International Commission for the Protection of the Danube, told the BBC that that the best one could hope was for the Danube to dilute the toxic sludge.\n\n"It\'s a rather large amount of water in the Danube that the dilution effects will at least mean that there will not be immediate consequences," he said.\n\nAbandoning villages\n\nEnvironmental expert Paul Younger of Newcastle University says high alkaline concentrations are an irritant, but not life-threatening for people.\n\n"It\'s not like a big cyanide spill," he told the BBC.\n\nThe sludge itself is a hazardous mixture of water and mining waste containing heavy metals.\n\nThe victims are believed to have drowned, with the depth of the fast-moving flood reaching 2m (6.5ft) in places, but many of those injured suffered chemical burns.\n\nOn Thursday Mr Orban visited the village of Kolontar, the worst-affected settlement, and said some areas would have to be abandoned.\n\n"Hungary is strong enough to be able to combat the effects of such a catastrophe. But we\'re still open to any expertise which will help us combat the pollution effects," he added.\n\nAngry villagers confronted a company official in Kolontar on Wednesday evening. They say they plan to sue the firm for damages.\n\nHerwit Schuster, a spokesman for Greenpeace International, described the spill as "one of the top three environmental disasters in Europe in the last 20 or 30 years".\n\nLand had been "polluted and destroyed for a long time", he told AP.\n\n"If there are substances like arsenic and mercury, that would affect river systems and ground water on long-term basis," he added.', 144 | "", 145 | ) 146 | -------------------------------------------------------------------------------- /evaluator/plot/plot_roc.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | from sklearn.metrics import auc 4 | 5 | from evaluator.toolkit import * 6 | from pathlib import Path 7 | 8 | from pipeline.lib.import_openai_result import import_openai_prediction_result 9 | from pipeline.lib.import_zerogpt_result import import_zerogpt_prediction_result 10 | 11 | import evaluator.models.t5_sentinel.t5_get_hidden_states as T5_Full 12 | import evaluator.models.t5_hidden.t5_get_hidden_states as T5_Hidden 13 | 14 | 15 | def get_openai_baseline_curve(): 16 | predictions = import_openai_prediction_result() 17 | 18 | reformulated_predictions = [] 19 | for entry in predictions: 20 | p_selected = entry["data"][0] 21 | new_entry = entry.copy() 22 | new_entry["data"] = np.array([p_selected, 1 - p_selected]) 23 | reformulated_predictions.append(new_entry) 24 | 25 | curve = get_roc_binary(reformulated_predictions, "openweb") 26 | return curve 27 | 28 | 29 | def get_zerogpt_baseline_curve(): 30 | prediction = import_zerogpt_prediction_result() 31 | 32 | reformulated_predictions = [] 33 | for entry in prediction: 34 | p_selected = entry["data"][0] 35 | new_entry = entry.copy() 36 | new_entry["data"] = np.array([p_selected, 1 - p_selected]) 37 | reformulated_predictions.append(new_entry) 38 | 39 | curve = get_roc_binary(reformulated_predictions, "openweb") 40 | return curve 41 | 42 | 43 | 44 | def get_t5_one_to_rest_roc_full(file_name, prediction_idx: int, pos_label: str): 45 | predictions = T5_Full.evaluate_predictions([ 46 | Path("data", "split", "open-web-text", file_name), 47 | Path("data", "split", "open-gpt-text", file_name), 48 | Path("data", "split", "open-palm-text", file_name), 49 | Path("./data/split/open-llama-text/", file_name), 50 | Path("./data/split/gpt2-output/", file_name) 51 | ]) 52 | reformulated_predictions = [] 53 | for entry in predictions: 54 | p_selected = entry["data"][prediction_idx] 55 | new_entry = entry.copy() 56 | new_entry["data"] = np.array([p_selected, 1 - p_selected]) 57 | reformulated_predictions.append(new_entry) 58 | curve = get_roc_binary(reformulated_predictions, pos_label) 59 | return curve 60 | 61 | 62 | def get_t5_one_to_rest_roc_hidden(file_name, prediction_idx: int, pos_label: str): 63 | predictions = T5_Hidden.evaluate_predictions([ 64 | Path("data", "split", "open-web-text" , file_name), 65 | Path("data", "split", "open-gpt-text" , file_name), 66 | Path("data", "split", "open-palm-text" , file_name), 67 | Path("data", "split", "open-llama-text", file_name), 68 | Path("data", "split", "gpt2-output" , file_name) 69 | ]) 70 | reformulated_predictions = [] 71 | for entry in predictions: 72 | p_selected = entry["data"][prediction_idx] 73 | new_entry = entry.copy() 74 | new_entry["data"] = np.array([p_selected, 1 - p_selected]) 75 | reformulated_predictions.append(new_entry) 76 | curve = get_roc_binary(reformulated_predictions, pos_label) 77 | return curve 78 | 79 | def plot_t5_full_one_to_rest(): 80 | curve0 = get_t5_one_to_rest_roc_full("test-dirty.jsonl", 0, "openweb") 81 | curve1 = get_t5_one_to_rest_roc_full("test-dirty.jsonl", 1, "chatgpt") 82 | curve2 = get_t5_one_to_rest_roc_full("test-dirty.jsonl", 2, "palm") 83 | curve3 = get_t5_one_to_rest_roc_full("test-dirty.jsonl", 3, "llama") 84 | curve4 = get_t5_one_to_rest_roc_full("test-dirty.jsonl", 4, "gpt2_xl") 85 | 86 | print(f"Human AUC: {auc(curve0[0], curve0[1])}") 87 | print(f"GPT3.5 AUC: {auc(curve1[0], curve1[1])}") 88 | print(f"PaLM AUC: {auc(curve2[0], curve2[1])}") 89 | print(f"LLaMA AUC: {auc(curve3[0], curve3[1])}") 90 | print(f"GPT2-XL AUC: {auc(curve4[0], curve4[1])}") 91 | 92 | figure: plt.Figure = plt.figure(dpi=200) 93 | ax: plt.Axes = figure.add_subplot(1, 1, 1) 94 | # ax.set_prop_cycle('color', sns.color_palette("hls")) 95 | ax.plot(curve0[0], curve0[1], label="Human") 96 | ax.plot(curve1[0], curve1[1], label="GPT3.5") 97 | ax.plot(curve2[0], curve2[1], label="PaLM") 98 | ax.plot(curve3[0], curve3[1], label="LLaMA") 99 | ax.plot(curve4[0], curve4[1], label="GPT2-XL") 100 | ax.set_xlim(-0.05, 1.05) 101 | ax.set_ylim(-0.05, 1.05) 102 | # ax.set_title("ROC Curves for T5-Sentinel for each \nclassification label on one-to-rest classification task") 103 | ax.set_xlabel("False Positive Rate") 104 | ax.set_ylabel("True Positive Rate") 105 | ax.grid(visible=True, linestyle="--") 106 | ax.legend() 107 | figure.tight_layout() 108 | figure.savefig(Path("./result/t5_sentinel/roc_t5_full.pdf")) 109 | 110 | 111 | def plot_t5_hidden_one_to_rest(): 112 | curve0 = get_t5_one_to_rest_roc_hidden("test-dirty.jsonl", 0, "openweb") 113 | curve1 = get_t5_one_to_rest_roc_hidden("test-dirty.jsonl", 1, "chatgpt") 114 | curve2 = get_t5_one_to_rest_roc_hidden("test-dirty.jsonl", 2, "palm") 115 | curve3 = get_t5_one_to_rest_roc_hidden("test-dirty.jsonl", 3, "llama") 116 | curve4 = get_t5_one_to_rest_roc_hidden("test-dirty.jsonl", 4, "gpt2_xl") 117 | 118 | print(f"Human AUC: {auc(curve0[0], curve0[1])}") 119 | print(f"GPT3.5 AUC: {auc(curve1[0], curve1[1])}") 120 | print(f"PaLM AUC: {auc(curve2[0], curve2[1])}") 121 | print(f"LLaMA AUC: {auc(curve3[0], curve3[1])}") 122 | print(f"GPT2-XL AUC: {auc(curve4[0], curve4[1])}") 123 | 124 | figure: plt.Figure = plt.figure(dpi=200) 125 | ax: plt.Axes = figure.add_subplot(1, 1, 1) 126 | # ax.set_prop_cycle('color', sns.color_palette("hls")) 127 | ax.plot(curve0[0], curve0[1], label="Human") 128 | ax.plot(curve1[0], curve1[1], label="GPT3.5") 129 | ax.plot(curve2[0], curve2[1], label="PaLM") 130 | ax.plot(curve3[0], curve3[1], label="LLaMA") 131 | ax.plot(curve4[0], curve4[1], label="GPT2-XL") 132 | ax.set_xlim(-0.05, 1.05) 133 | ax.set_ylim(-0.05, 1.05) 134 | # ax.set_title("ROC Curves for T5-Sentinel for each \nclassification label on one-to-rest classification task") 135 | ax.set_xlabel("False Positive Rate") 136 | ax.set_ylabel("True Positive Rate") 137 | ax.grid(visible=True, linestyle="--") 138 | ax.legend() 139 | figure.tight_layout() 140 | figure.savefig(Path("./result/t5_sentinel/roc_t5_hidden.pdf")) 141 | 142 | 143 | def plot_t5_full_ablation(pos: int, label: str): 144 | def implement() -> None: 145 | curve_0 = get_t5_one_to_rest_roc_full("test-dirty.jsonl", pos, label) 146 | curve_1 = get_t5_one_to_rest_roc_full("test.variant1.jsonl", pos, label) 147 | curve_2 = get_t5_one_to_rest_roc_full("test.variant2.jsonl", pos, label) 148 | curve_3 = get_t5_one_to_rest_roc_full("test.variant3.jsonl", pos, label) 149 | curve_4 = get_t5_one_to_rest_roc_full("test.variant4.jsonl", pos, label) 150 | 151 | figure: plt.Figure = plt.figure(dpi=200) 152 | ax: plt.Axes = figure.add_subplot(1, 1, 1) 153 | ax.set_prop_cycle('color', sns.color_palette("hls")) 154 | ax.plot(curve_0[0], curve_0[1], label="Original") 155 | ax.plot(curve_1[0], curve_1[1], label="Remove Newline") 156 | ax.plot(curve_2[0], curve_2[1], label="Unicode to ASCII") 157 | ax.plot(curve_3[0], curve_3[1], label="Remove Punctuations") 158 | ax.plot(curve_4[0], curve_4[1], label="To Lower") 159 | ax.set_xlim(-0.05, 1.05) 160 | ax.set_ylim(-0.05, 1.05) 161 | # ax.set_title( 162 | # f"ROC Curves for T5-Sentinel for each \ndifferent sanitization level on one-to-rest classification task ({label})") 163 | ax.set_xlabel("False Positive Rate") 164 | ax.set_ylabel("True Positive Rate") 165 | ax.grid(visible=True, linestyle="--") 166 | ax.legend() 167 | figure.savefig(Path(f"./result/t5_sentinel/ablation_{label}_roc.pdf")) 168 | 169 | return implement 170 | 171 | 172 | def plot_t5_compare_with_baseline(): 173 | curve_openai = get_openai_baseline_curve() 174 | curve_zerogpt = get_zerogpt_baseline_curve() 175 | curve_t5 = get_t5_one_to_rest_roc_full("test-dirty.jsonl", 0, "openweb") 176 | curve_hidden = get_t5_one_to_rest_roc_hidden("test-dirty.jsonl", 0, "openweb") 177 | 178 | print(f"OpenAI AUC: {auc(curve_openai[0], curve_openai[1])}") 179 | print(f"ZeroGPT AUC: {auc(curve_zerogpt[0], curve_zerogpt[1])}") 180 | print(f"T5-Sentinel AUC: {auc(curve_t5[0], curve_t5[1])}") 181 | print(f"T5-Hidden AUC: {auc(curve_hidden[0], curve_t5[1])}") 182 | 183 | figure: plt.Figure = plt.figure(dpi=200) 184 | ax: plt.Axes = figure.add_subplot(1, 1, 1) 185 | # ax.set_prop_cycle('color', sns.color_palette("hls")) 186 | ax.plot(curve_openai[0], curve_openai[1], label="OpenAI") 187 | ax.plot(curve_zerogpt[0], curve_zerogpt[1], label="ZeroGPT") 188 | ax.plot(curve_t5[0], curve_t5[1], label="T5-Sentinel") 189 | ax.plot(curve_hidden[0], curve_hidden[1], label="T5-Hidden") 190 | ax.set_xlim(-0.05, 1.05) 191 | ax.set_ylim(-0.05, 1.05) 192 | ax.set_xlabel("False Positive Rate") 193 | ax.set_ylabel("True Positive Rate") 194 | # ax.set_title(f"ROC Curves for T5-Sentinel on Identifying human") 195 | ax.grid(visible=True, linestyle="--") 196 | ax.legend() 197 | figure.tight_layout() 198 | figure.savefig(Path(f"./result/t5_sentinel/t5_compare_with_baseline.pdf")) 199 | 200 | 201 | if __name__ == "__main__": 202 | TASKS = [ 203 | plot_t5_full_one_to_rest, 204 | plot_t5_full_ablation(0, "openweb"), 205 | plot_t5_full_ablation(1, "chatgpt"), 206 | plot_t5_full_ablation(2, "palm"), 207 | plot_t5_full_ablation(3, "llama"), 208 | plot_t5_full_ablation(4, "gpt2_xl"), 209 | plot_t5_compare_with_baseline 210 | ] 211 | 212 | for task in TASKS: 213 | print("Executing task: ", task.__name__) 214 | task() 215 | --------------------------------------------------------------------------------