├── .gitignore
├── Datasets.py
├── README.md
├── configs
    ├── dialog.json
    ├── dialog_dp
    │   ├── dp_0.0.json
    │   ├── dp_0.05.json
    │   ├── dp_0.1.json
    │   ├── dp_0.2.json
    │   ├── dp_0.3.json
    │   ├── dp_0.4.json
    │   ├── dp_0.5.json
    │   ├── dp_0.6.json
    │   ├── dp_0.7.json
    │   ├── dp_0.8.json
    │   └── dp_0.9.json
    ├── dialog_st
    │   ├── dialog_suffix_tree.json
    │   ├── dialog_suffix_tree1.json
    │   ├── dialog_suffix_tree2.json
    │   ├── dialog_suffix_tree3.json
    │   └── dialog_suffix_tree4.json
    ├── dialog_suffix_tree_debug.json
    ├── dialog_unlearn
    │   ├── dialog.json
    │   ├── dialog1.json
    │   ├── dialog2.json
    │   ├── dialog3.json
    │   └── dialog4.json
    ├── dp
    │   ├── 1.3B_0.json
    │   ├── 1.3B_1.json
    │   ├── 1.3B_2.json
    │   ├── 1.3B_3.json
    │   ├── 1.3B_4.json
    │   ├── 1.3B_general.json
    │   ├── 125M_0.json
    │   ├── 125M_1.json
    │   ├── 125M_2.json
    │   ├── 125M_3.json
    │   ├── 125M_4.json
    │   ├── 125M_general.json
    │   ├── 2.7B_0.json
    │   ├── 2.7B_1.json
    │   ├── 2.7B_2.json
    │   ├── 2.7B_3.json
    │   ├── 2.7B_4.json
    │   ├── 2.7B_general.json
    │   ├── create_configs.py
    │   └── template.json
    └── example.json
├── csv_out
    └── Dialog Initial.csv
├── data
    ├── domain_main
    │   ├── books3_8_0.csv
    │   ├── books3_8_1.csv
    │   ├── books3_8_2.csv
    │   ├── books3_8_3.csv
    │   ├── books3_8_4.csv
    │   ├── enron_emails_8_0.csv
    │   ├── enron_emails_8_1.csv
    │   ├── enron_emails_8_2.csv
    │   ├── enron_emails_8_3.csv
    │   ├── enron_emails_8_4.csv
    │   ├── freelaw_8_0.csv
    │   ├── freelaw_8_1.csv
    │   ├── freelaw_8_2.csv
    │   ├── freelaw_8_3.csv
    │   ├── freelaw_8_4.csv
    │   ├── github_8_0.csv
    │   ├── github_8_1.csv
    │   ├── github_8_2.csv
    │   ├── github_8_3.csv
    │   ├── github_8_4.csv
    │   ├── license_8_0.csv
    │   ├── license_8_1.csv
    │   ├── license_8_2.csv
    │   ├── license_8_3.csv
    │   ├── license_8_4.csv
    │   ├── pile-cc_8_0.csv
    │   ├── pile-cc_8_1.csv
    │   ├── pile-cc_8_2.csv
    │   ├── pile-cc_8_3.csv
    │   ├── pile-cc_8_4.csv
    │   ├── pubmed_central_8_0.csv
    │   ├── pubmed_central_8_1.csv
    │   ├── pubmed_central_8_2.csv
    │   ├── pubmed_central_8_3.csv
    │   ├── pubmed_central_8_4.csv
    │   ├── uspto_backgrounds_8_0.csv
    │   ├── uspto_backgrounds_8_1.csv
    │   ├── uspto_backgrounds_8_2.csv
    │   ├── uspto_backgrounds_8_3.csv
    │   └── uspto_backgrounds_8_4.csv
    └── main
    │   ├── lm_extraction_128_0.csv
    │   ├── lm_extraction_128_1.csv
    │   ├── lm_extraction_128_2.csv
    │   ├── lm_extraction_128_3.csv
    │   ├── lm_extraction_128_4.csv
    │   ├── lm_extraction_1_0.csv
    │   ├── lm_extraction_1_1.csv
    │   ├── lm_extraction_1_2.csv
    │   ├── lm_extraction_1_3.csv
    │   ├── lm_extraction_1_4.csv
    │   ├── lm_extraction_32_0.csv
    │   ├── lm_extraction_32_1.csv
    │   ├── lm_extraction_32_2.csv
    │   ├── lm_extraction_32_3.csv
    │   ├── lm_extraction_32_4.csv
    │   ├── lm_extraction_4_0.csv
    │   ├── lm_extraction_4_1.csv
    │   ├── lm_extraction_4_2.csv
    │   ├── lm_extraction_4_3.csv
    │   ├── lm_extraction_4_4.csv
    │   ├── lm_extraction_8_0.csv
    │   ├── lm_extraction_8_1.csv
    │   ├── lm_extraction_8_2.csv
    │   ├── lm_extraction_8_3.csv
    │   └── lm_extraction_8_4.csv
├── fig1.png
├── models
    ├── Neo_Model.py
    ├── Neo_Model_DP.py
    ├── Neo_Model_suffix_tree.py
    └── Neo_Model_valid.py
├── outputs
    ├── example.csv
    ├── init_DP-1.3B_0.0.csv
    ├── init_DP-1.3B_0.1.csv
    ├── init_DP-1.3B_0.2.csv
    ├── init_DP-1.3B_0.3.csv
    ├── init_DP-1.3B_0.4.csv
    ├── init_DP-1.3B_0.5.csv
    ├── init_DP-1.3B_0.6.csv
    ├── init_DP-1.3B_0.7.csv
    ├── init_DP-1.3B_0.8.csv
    ├── init_DP-1.3B_0.9.csv
    └── init_example.csv
├── requirements.txt
├── run.py
├── run_dp.py
├── run_st.py
├── utils.py
└── validation_data
    ├── blended_skill_talk.json
    ├── empathetic_dialogues.json
    ├── lambada.csv
    ├── pile.csv
    ├── pubmed_qa.csv
    ├── valid_dm_mathematics.csv
    ├── wikitext.csv
    ├── wizard_of_internet.json
    └── wizard_of_wikipedia.json


/.gitignore:
--------------------------------------------------------------------------------
 1 | deepspeed
 2 | 
 3 | #evaluation
 4 | ckpt
 5 | 
 6 | wandb
 7 | tbImport.log
 8 | 
 9 | __pycache__
10 | models/__pycache__
11 | 
12 | logs
13 | 
14 | *.pyc
15 | 
16 | test.py
17 | nohup.out
18 | .fuse*


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Knowledge Unlearning for Mitigating Privacy Risks in Langauge Models
 2 | 
 3 | ![alt text](fig1.png "Main Figure")
 4 | 
 5 | paper link: https://arxiv.org/abs/2210.01504
 6 | 
 7 | In order to reproduce our results, take the following steps:
 8 | ### 1. Create conda environment and install requirements
 9 | ```
10 | conda create -n ufl python=3.8
11 | conda activate ufl
12 | # Install the correct torch version depending on CUDA version from https://pytorch.org/
13 | conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch 
14 | pip install -r requirements.txt
15 | ```
16 | 
17 | ### 2. In order to run the basic code, use the following command
18 | ```
19 | python run.py --config configs/example.json
20 | ```
21 | 
22 | ### 3. Reproducing Experimental Results
23 | 
24 | **Configs**
25 | - mode (string) : Either "unlearn" or "general_lm_eval"
26 |   - "unlearn" will measure MA and EL for validation sets with valid_type_path == "target", for others it will run normal evaluation
27 |   - "general_lm_eval" will run normal evaluation for all validation sets, only use when not evaulating the target data (the data that should be unlearned) 
28 | - check_validation_only (bool) : If true, a single validation loop will run without training
29 | - do_init_eval (bool) : Whether to run a single validation loop before training
30 | - train_set (string) : Path to train_set, should be a .csv file
31 | - valid_sets (list[string]) : List containing validation set info
32 |   - Could either be a .csv file path, or the dataset name on Huggingface hub
33 | - valid_subset_path (list[string]) : Subset name of the dataset from HF hub
34 |   - If it does not have a subset, or is a .csv file the string will be ignored
35 | - valid_type_path (list[string]) : Type of the valdiation data
36 |   - If it's the target data pass "target"
37 |   - If it's a HF hub data pass the appropriate type
38 |   - If it's a .csv file the string will be ignored
39 | - el_n (list[int]) : list of n values for EL
40 | - el_threshold (float) : The models EL score for unseen data, exact values for each models in paper
41 | - ma_threshold (float) : The models MA score for unseen data, exact values for each models in paper
42 | - min_train_epochs (int) : Guarantees the minimum amount of epochs
43 |   - By default the model will stop training when it reaches both el_threshold and ma_threshold
44 |   - This configuration will give some control over this behaviour
45 | - target_length (int) : The token length of the unlearning target data
46 | - input_length, output_length (int) : The token length of the input, output for LM evaluation tasks
47 | - strategy : Strategy passed to Lightning Trainer()
48 |   - The code was tested with "deepspeed_stage_2" and "deepspeed_stage_2_offload"
49 |   
50 | **Note**
51 | - The effective batch size (train_batch_size * gradient_accumulation_steps * ngpu) should be identical to the train set size
52 |   - We found that minimizing gradient updates is crucial for retaining LM performance
53 |   - If "effective batch size" != "train set size" the code will throw an error
54 | - The eval_batch_size will be replaced with train_batch_size only for "target" data, because "target" data are usually much smaller than LM eval data
55 |   - This also speeds up the evaluation, because it guarantees a single eval step
56 | - The code will save two .csv files to "outputs/". They contain MA and EL scores for each individual examples within the target data
57 |   - One contains the validation results measured before training
58 |   - The other contains the validation results throughout training
59 | 


--------------------------------------------------------------------------------
/configs/dialog.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "Dialog Initial",
 5 |     "num_train_epochs": 20,
 6 |     "check_val_every_n_espoch": 1,
 7 |     "check_validation_only": true,
 8 |     "do_init_eval": true,
 9 |     "train_set": "data/main/lm_extraction_32_0.csv",
10 |     "valid_sets": [
11 |         "validation_data/wizard_of_wikipedia.json",
12 |         "validation_data/empathetic_dialogues.json",
13 |         "validation_data/blended_skill_talk.json",
14 |         "validation_data/wizard_of_internet.json"
15 |     ],
16 |     "valid_subset_path": [
17 |         "",
18 |         "",
19 |         "",
20 |         "",
21 |         ""
22 |     ],
23 |     "valid_type_path": [
24 |         "target",
25 |         "",
26 |         "",
27 |         "",
28 |         ""
29 |     ],
30 |     "train_batch_size": 32,
31 |     "eval_batch_size": 32,
32 |     "gradient_accumulation_steps": 1,
33 |     "ngpu": 1,
34 |     "learning_rate": 5e-5,
35 |     "model_name_or_path": "EleutherAI/gpt-neo-125M",
36 |     "el_threshold": 0.0499,
37 |     "ma_threshold": 0.2994,
38 |     "input_length": 512,
39 |     "output_length": 512,
40 |     "target_length": 200,
41 |     "num_workers": 64,
42 |     "strategy": "deepspeed_stage_2_offload",
43 |     "fp16": true,
44 |     "wandb_log": true
45 | }
46 | 


--------------------------------------------------------------------------------
/configs/dialog_dp/dp_0.0.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "DP-1.3B_0.0",
 5 |     "lambda_weight": 0,
 6 |     "num_train_epochs": 20,
 7 |     "check_val_every_n_epoch": 1,
 8 |     "check_validation_only": true,
 9 |     "do_init_eval": true,
10 |     "train_set": "data/main/lm_extraction_32_0.csv",
11 |     "valid_sets": [
12 |         "data/main/lm_extraction_32_0.csv",
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         "",
23 |         ""
24 |     ],
25 |     "valid_type_path": [
26 |         "target",
27 |         "",
28 |         "",
29 |         "",
30 |         ""
31 |     ],
32 |     "train_batch_size": 32,
33 |     "eval_batch_size": 32,
34 |     "gradient_accumulation_steps": 1,
35 |     "ngpu": 1,
36 |     "learning_rate": 5e-5,
37 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
38 |     "el_threshold": 0.0499,
39 |     "ma_threshold": 0.2994,
40 |     "input_length": 512,
41 |     "output_length": 512,
42 |     "target_length": 200,
43 |     "num_workers": 64,
44 |     "strategy": "deepspeed_stage_2_offload",
45 |     "fp16": true,
46 |     "wandb_log": true
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/dialog_dp/dp_0.05.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "DP-1.3B_0.05",
 5 |     "lambda_weight": 0.05,
 6 |     "num_train_epochs": 20,
 7 |     "check_val_every_n_epoch": 1,
 8 |     "check_validation_only": true,
 9 |     "do_init_eval": true,
10 |     "train_set": "data/main/lm_extraction_32_0.csv",
11 |     "valid_sets": [
12 |         "data/main/lm_extraction_32_0.csv",
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         "",
23 |         ""
24 |     ],
25 |     "valid_type_path": [
26 |         "target",
27 |         "",
28 |         "",
29 |         "",
30 |         ""
31 |     ],
32 |     "train_batch_size": 32,
33 |     "eval_batch_size": 32,
34 |     "gradient_accumulation_steps": 1,
35 |     "ngpu": 1,
36 |     "learning_rate": 5e-5,
37 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
38 |     "el_threshold": 0.0499,
39 |     "ma_threshold": 0.2994,
40 |     "input_length": 512,
41 |     "output_length": 512,
42 |     "target_length": 200,
43 |     "num_workers": 64,
44 |     "strategy": "deepspeed_stage_2_offload",
45 |     "fp16": true,
46 |     "wandb_log": true
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/dialog_dp/dp_0.1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "DP-1.3B_0.1",
 5 |     "lambda_weight": 0.1,
 6 |     "num_train_epochs": 20,
 7 |     "check_val_every_n_epoch": 1,
 8 |     "check_validation_only": true,
 9 |     "do_init_eval": true,
10 |     "train_set": "data/main/lm_extraction_32_0.csv",
11 |     "valid_sets": [
12 |         "data/main/lm_extraction_32_0.csv",
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         "",
23 |         ""
24 |     ],
25 |     "valid_type_path": [
26 |         "target",
27 |         "",
28 |         "",
29 |         "",
30 |         ""
31 |     ],
32 |     "train_batch_size": 32,
33 |     "eval_batch_size": 32,
34 |     "gradient_accumulation_steps": 1,
35 |     "ngpu": 1,
36 |     "learning_rate": 5e-5,
37 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
38 |     "el_threshold": 0.0499,
39 |     "ma_threshold": 0.2994,
40 |     "input_length": 512,
41 |     "output_length": 512,
42 |     "target_length": 200,
43 |     "num_workers": 64,
44 |     "strategy": "deepspeed_stage_2_offload",
45 |     "fp16": true,
46 |     "wandb_log": true
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/dialog_dp/dp_0.2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "DP-1.3B_0.2",
 5 |     "lambda_weight": 0.2,
 6 |     "num_train_epochs": 20,
 7 |     "check_val_every_n_epoch": 1,
 8 |     "check_validation_only": true,
 9 |     "do_init_eval": true,
10 |     "train_set": "data/main/lm_extraction_32_0.csv",
11 |     "valid_sets": [
12 |         "data/main/lm_extraction_32_0.csv",
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         "",
23 |         ""
24 |     ],
25 |     "valid_type_path": [
26 |         "target",
27 |         "",
28 |         "",
29 |         "",
30 |         ""
31 |     ],
32 |     "train_batch_size": 32,
33 |     "eval_batch_size": 32,
34 |     "gradient_accumulation_steps": 1,
35 |     "ngpu": 1,
36 |     "learning_rate": 5e-5,
37 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
38 |     "el_threshold": 0.0499,
39 |     "ma_threshold": 0.2994,
40 |     "input_length": 512,
41 |     "output_length": 512,
42 |     "target_length": 200,
43 |     "num_workers": 64,
44 |     "strategy": "deepspeed_stage_2_offload",
45 |     "fp16": true,
46 |     "wandb_log": true
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/dialog_dp/dp_0.3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "DP-1.3B_0.3",
 5 |     "lambda_weight": 0.3,
 6 |     "num_train_epochs": 20,
 7 |     "check_val_every_n_epoch": 1,
 8 |     "check_validation_only": true,
 9 |     "do_init_eval": true,
10 |     "train_set": "data/main/lm_extraction_32_0.csv",
11 |     "valid_sets": [
12 |         "data/main/lm_extraction_32_0.csv",
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         "",
23 |         ""
24 |     ],
25 |     "valid_type_path": [
26 |         "target",
27 |         "",
28 |         "",
29 |         "",
30 |         ""
31 |     ],
32 |     "train_batch_size": 32,
33 |     "eval_batch_size": 32,
34 |     "gradient_accumulation_steps": 1,
35 |     "ngpu": 1,
36 |     "learning_rate": 5e-5,
37 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
38 |     "el_threshold": 0.0499,
39 |     "ma_threshold": 0.2994,
40 |     "input_length": 512,
41 |     "output_length": 512,
42 |     "target_length": 200,
43 |     "num_workers": 64,
44 |     "strategy": "deepspeed_stage_2_offload",
45 |     "fp16": true,
46 |     "wandb_log": true
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/dialog_dp/dp_0.4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "DP-1.3B_0.4",
 5 |     "lambda_weight": 0.4,
 6 |     "num_train_epochs": 20,
 7 |     "check_val_every_n_epoch": 1,
 8 |     "check_validation_only": true,
 9 |     "do_init_eval": true,
10 |     "train_set": "data/main/lm_extraction_32_0.csv",
11 |     "valid_sets": [
12 |         "data/main/lm_extraction_32_0.csv",
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         "",
23 |         ""
24 |     ],
25 |     "valid_type_path": [
26 |         "target",
27 |         "",
28 |         "",
29 |         "",
30 |         ""
31 |     ],
32 |     "train_batch_size": 32,
33 |     "eval_batch_size": 32,
34 |     "gradient_accumulation_steps": 1,
35 |     "ngpu": 1,
36 |     "learning_rate": 5e-5,
37 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
38 |     "el_threshold": 0.0499,
39 |     "ma_threshold": 0.2994,
40 |     "input_length": 512,
41 |     "output_length": 512,
42 |     "target_length": 200,
43 |     "num_workers": 64,
44 |     "strategy": "deepspeed_stage_2_offload",
45 |     "fp16": true,
46 |     "wandb_log": true
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/dialog_dp/dp_0.5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "DP-1.3B_0.5",
 5 |     "lambda_weight": 0.5,
 6 |     "num_train_epochs": 20,
 7 |     "check_val_every_n_epoch": 1,
 8 |     "check_validation_only": true,
 9 |     "do_init_eval": true,
10 |     "train_set": "data/main/lm_extraction_32_0.csv",
11 |     "valid_sets": [
12 |         "data/main/lm_extraction_32_0.csv",
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         "",
23 |         ""
24 |     ],
25 |     "valid_type_path": [
26 |         "target",
27 |         "",
28 |         "",
29 |         "",
30 |         ""
31 |     ],
32 |     "train_batch_size": 32,
33 |     "eval_batch_size": 32,
34 |     "gradient_accumulation_steps": 1,
35 |     "ngpu": 1,
36 |     "learning_rate": 5e-5,
37 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
38 |     "el_threshold": 0.0499,
39 |     "ma_threshold": 0.2994,
40 |     "input_length": 512,
41 |     "output_length": 512,
42 |     "target_length": 200,
43 |     "num_workers": 64,
44 |     "strategy": "deepspeed_stage_2_offload",
45 |     "fp16": true,
46 |     "wandb_log": true
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/dialog_dp/dp_0.6.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "DP-1.3B_0.6",
 5 |     "lambda_weight": 0.6,
 6 |     "num_train_epochs": 20,
 7 |     "check_val_every_n_epoch": 1,
 8 |     "check_validation_only": true,
 9 |     "do_init_eval": true,
10 |     "train_set": "data/main/lm_extraction_32_0.csv",
11 |     "valid_sets": [
12 |         "data/main/lm_extraction_32_0.csv",
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         "",
23 |         ""
24 |     ],
25 |     "valid_type_path": [
26 |         "target",
27 |         "",
28 |         "",
29 |         "",
30 |         ""
31 |     ],
32 |     "train_batch_size": 32,
33 |     "eval_batch_size": 32,
34 |     "gradient_accumulation_steps": 1,
35 |     "ngpu": 1,
36 |     "learning_rate": 5e-5,
37 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
38 |     "el_threshold": 0.0499,
39 |     "ma_threshold": 0.2994,
40 |     "input_length": 512,
41 |     "output_length": 512,
42 |     "target_length": 200,
43 |     "num_workers": 64,
44 |     "strategy": "deepspeed_stage_2_offload",
45 |     "fp16": true,
46 |     "wandb_log": true
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/dialog_dp/dp_0.7.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "DP-1.3B_0.7",
 5 |     "lambda_weight": 0.7,
 6 |     "num_train_epochs": 20,
 7 |     "check_val_every_n_epoch": 1,
 8 |     "check_validation_only": true,
 9 |     "do_init_eval": true,
10 |     "train_set": "data/main/lm_extraction_32_0.csv",
11 |     "valid_sets": [
12 |         "data/main/lm_extraction_32_0.csv",
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         "",
23 |         ""
24 |     ],
25 |     "valid_type_path": [
26 |         "target",
27 |         "",
28 |         "",
29 |         "",
30 |         ""
31 |     ],
32 |     "train_batch_size": 32,
33 |     "eval_batch_size": 32,
34 |     "gradient_accumulation_steps": 1,
35 |     "ngpu": 1,
36 |     "learning_rate": 5e-5,
37 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
38 |     "el_threshold": 0.0499,
39 |     "ma_threshold": 0.2994,
40 |     "input_length": 512,
41 |     "output_length": 512,
42 |     "target_length": 200,
43 |     "num_workers": 64,
44 |     "strategy": "deepspeed_stage_2_offload",
45 |     "fp16": true,
46 |     "wandb_log": true
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/dialog_dp/dp_0.8.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "DP-1.3B_0.8",
 5 |     "lambda_weight": 0.8,
 6 |     "num_train_epochs": 20,
 7 |     "check_val_every_n_epoch": 1,
 8 |     "check_validation_only": true,
 9 |     "do_init_eval": true,
10 |     "train_set": "data/main/lm_extraction_32_0.csv",
11 |     "valid_sets": [
12 |         "data/main/lm_extraction_32_0.csv",
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         "",
23 |         ""
24 |     ],
25 |     "valid_type_path": [
26 |         "target",
27 |         "",
28 |         "",
29 |         "",
30 |         ""
31 |     ],
32 |     "train_batch_size": 32,
33 |     "eval_batch_size": 32,
34 |     "gradient_accumulation_steps": 1,
35 |     "ngpu": 1,
36 |     "learning_rate": 5e-5,
37 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
38 |     "el_threshold": 0.0499,
39 |     "ma_threshold": 0.2994,
40 |     "input_length": 512,
41 |     "output_length": 512,
42 |     "target_length": 200,
43 |     "num_workers": 64,
44 |     "strategy": "deepspeed_stage_2_offload",
45 |     "fp16": true,
46 |     "wandb_log": true
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/dialog_dp/dp_0.9.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "DP-1.3B_0.9",
 5 |     "lambda_weight": 0.9,
 6 |     "num_train_epochs": 20,
 7 |     "check_val_every_n_epoch": 1,
 8 |     "check_validation_only": true,
 9 |     "do_init_eval": true,
10 |     "train_set": "data/main/lm_extraction_32_0.csv",
11 |     "valid_sets": [
12 |         "data/main/lm_extraction_32_0.csv",
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         "",
23 |         ""
24 |     ],
25 |     "valid_type_path": [
26 |         "target",
27 |         "",
28 |         "",
29 |         "",
30 |         ""
31 |     ],
32 |     "train_batch_size": 32,
33 |     "eval_batch_size": 32,
34 |     "gradient_accumulation_steps": 1,
35 |     "ngpu": 1,
36 |     "learning_rate": 5e-5,
37 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
38 |     "el_threshold": 0.0499,
39 |     "ma_threshold": 0.2994,
40 |     "input_length": 512,
41 |     "output_length": 512,
42 |     "target_length": 200,
43 |     "num_workers": 64,
44 |     "strategy": "deepspeed_stage_2_offload",
45 |     "fp16": true,
46 |     "wandb_log": true
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/dialog_st/dialog_suffix_tree.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "Suffix-1.3B_32_0",
 5 |     "num_train_epochs": 20,
 6 |     "check_val_every_n_epoch": 1,
 7 |     "check_validation_only": true,
 8 |     "do_init_eval": true,
 9 |     "train_set": "data/main/lm_extraction_32_0.csv",
10 |     "valid_sets": [
11 |         "validation_data/wizard_of_wikipedia.json",
12 |         "validation_data/empathetic_dialogues.json",
13 |         "validation_data/blended_skill_talk.json",
14 |         "validation_data/wizard_of_internet.json"
15 |     ],
16 |     "valid_subset_path": [
17 |         "",
18 |         "",
19 |         "",
20 |         "",
21 |         ""
22 |     ],
23 |     "valid_type_path": [
24 |         "target",
25 |         "",
26 |         "",
27 |         "",
28 |         ""
29 |     ],
30 |     "train_batch_size": 32,
31 |     "eval_batch_size": 32,
32 |     "gradient_accumulation_steps": 1,
33 |     "ngpu": 1,
34 |     "learning_rate": 5e-5,
35 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
36 |     "el_threshold": 0.0499,
37 |     "ma_threshold": 0.2994,
38 |     "input_length": 512,
39 |     "output_length": 512,
40 |     "target_length": 200,
41 |     "num_workers": 64,
42 |     "strategy": "deepspeed_stage_2_offload",
43 |     "fp16": true,
44 |     "wandb_log": true
45 | }
46 | 


--------------------------------------------------------------------------------
/configs/dialog_st/dialog_suffix_tree1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "Suffix-1.3B_32_1",
 5 |     "num_train_epochs": 20,
 6 |     "check_val_every_n_epoch": 1,
 7 |     "check_validation_only": true,
 8 |     "do_init_eval": true,
 9 |     "train_set": "data/main/lm_extraction_32_1.csv",
10 |     "valid_sets": [
11 |         "validation_data/wizard_of_wikipedia.json",
12 |         "validation_data/empathetic_dialogues.json",
13 |         "validation_data/blended_skill_talk.json",
14 |         "validation_data/wizard_of_internet.json"
15 |     ],
16 |     "valid_subset_path": [
17 |         "",
18 |         "",
19 |         "",
20 |         "",
21 |         ""
22 |     ],
23 |     "valid_type_path": [
24 |         "target",
25 |         "",
26 |         "",
27 |         "",
28 |         ""
29 |     ],
30 |     "train_batch_size": 32,
31 |     "eval_batch_size": 32,
32 |     "gradient_accumulation_steps": 1,
33 |     "ngpu": 1,
34 |     "learning_rate": 5e-5,
35 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
36 |     "el_threshold": 0.0499,
37 |     "ma_threshold": 0.2994,
38 |     "input_length": 512,
39 |     "output_length": 512,
40 |     "target_length": 200,
41 |     "num_workers": 64,
42 |     "strategy": "deepspeed_stage_2_offload",
43 |     "fp16": true,
44 |     "wandb_log": true
45 | }
46 | 


--------------------------------------------------------------------------------
/configs/dialog_st/dialog_suffix_tree2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "Suffix-1.3B_32_2",
 5 |     "num_train_epochs": 20,
 6 |     "check_val_every_n_epoch": 1,
 7 |     "check_validation_only": true,
 8 |     "do_init_eval": true,
 9 |     "train_set": "data/main/lm_extraction_32_2.csv",
10 |     "valid_sets": [
11 |         "validation_data/wizard_of_wikipedia.json",
12 |         "validation_data/empathetic_dialogues.json",
13 |         "validation_data/blended_skill_talk.json",
14 |         "validation_data/wizard_of_internet.json"
15 |     ],
16 |     "valid_subset_path": [
17 |         "",
18 |         "",
19 |         "",
20 |         "",
21 |         ""
22 |     ],
23 |     "valid_type_path": [
24 |         "target",
25 |         "",
26 |         "",
27 |         "",
28 |         ""
29 |     ],
30 |     "train_batch_size": 32,
31 |     "eval_batch_size": 32,
32 |     "gradient_accumulation_steps": 1,
33 |     "ngpu": 1,
34 |     "learning_rate": 5e-5,
35 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
36 |     "el_threshold": 0.0499,
37 |     "ma_threshold": 0.2994,
38 |     "input_length": 512,
39 |     "output_length": 512,
40 |     "target_length": 200,
41 |     "num_workers": 64,
42 |     "strategy": "deepspeed_stage_2_offload",
43 |     "fp16": true,
44 |     "wandb_log": true
45 | }
46 | 


--------------------------------------------------------------------------------
/configs/dialog_st/dialog_suffix_tree3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "Suffix-1.3B_32_3",
 5 |     "num_train_epochs": 20,
 6 |     "check_val_every_n_epoch": 1,
 7 |     "check_validation_only": true,
 8 |     "do_init_eval": true,
 9 |     "train_set": "data/main/lm_extraction_32_3.csv",
10 |     "valid_sets": [
11 |         "validation_data/wizard_of_wikipedia.json",
12 |         "validation_data/empathetic_dialogues.json",
13 |         "validation_data/blended_skill_talk.json",
14 |         "validation_data/wizard_of_internet.json"
15 |     ],
16 |     "valid_subset_path": [
17 |         "",
18 |         "",
19 |         "",
20 |         "",
21 |         ""
22 |     ],
23 |     "valid_type_path": [
24 |         "target",
25 |         "",
26 |         "",
27 |         "",
28 |         ""
29 |     ],
30 |     "train_batch_size": 32,
31 |     "eval_batch_size": 32,
32 |     "gradient_accumulation_steps": 1,
33 |     "ngpu": 1,
34 |     "learning_rate": 5e-5,
35 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
36 |     "el_threshold": 0.0499,
37 |     "ma_threshold": 0.2994,
38 |     "input_length": 512,
39 |     "output_length": 512,
40 |     "target_length": 200,
41 |     "num_workers": 64,
42 |     "strategy": "deepspeed_stage_2_offload",
43 |     "fp16": true,
44 |     "wandb_log": true
45 | }
46 | 


--------------------------------------------------------------------------------
/configs/dialog_st/dialog_suffix_tree4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "Suffix-1.3B_32_4",
 5 |     "num_train_epochs": 20,
 6 |     "check_val_every_n_epoch": 1,
 7 |     "check_validation_only": true,
 8 |     "do_init_eval": true,
 9 |     "train_set": "data/main/lm_extraction_32_4.csv",
10 |     "valid_sets": [
11 |         "validation_data/wizard_of_wikipedia.json",
12 |         "validation_data/empathetic_dialogues.json",
13 |         "validation_data/blended_skill_talk.json",
14 |         "validation_data/wizard_of_internet.json"
15 |     ],
16 |     "valid_subset_path": [
17 |         "",
18 |         "",
19 |         "",
20 |         "",
21 |         ""
22 |     ],
23 |     "valid_type_path": [
24 |         "target",
25 |         "",
26 |         "",
27 |         "",
28 |         ""
29 |     ],
30 |     "train_batch_size": 32,
31 |     "eval_batch_size": 32,
32 |     "gradient_accumulation_steps": 1,
33 |     "ngpu": 1,
34 |     "learning_rate": 5e-5,
35 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
36 |     "el_threshold": 0.0499,
37 |     "ma_threshold": 0.2994,
38 |     "input_length": 512,
39 |     "output_length": 512,
40 |     "target_length": 200,
41 |     "num_workers": 64,
42 |     "strategy": "deepspeed_stage_2_offload",
43 |     "fp16": true,
44 |     "wandb_log": true
45 | }
46 | 


--------------------------------------------------------------------------------
/configs/dialog_suffix_tree_debug.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning",
 4 |     "wandb_run_name": "example",
 5 |     "num_train_epochs": 20,
 6 |     "check_val_every_n_epoch": 1,
 7 |     "check_validation_only": true,
 8 |     "do_init_eval": true,
 9 |     "train_set": "data/main/lm_extraction_32_0.csv",
10 |     "valid_sets": [
11 |         "data/main/lm_extraction_32_0.csv"
12 |     ],
13 |     "valid_subset_path": [
14 |         ""
15 |     ],
16 |     "valid_type_path": [
17 |         "target"
18 |     ],
19 |     "train_batch_size": 32,
20 |     "eval_batch_size": 32,
21 |     "gradient_accumulation_steps": 1,
22 |     "ngpu": 1,
23 |     "learning_rate": 5e-5,
24 |     "model_name_or_path": "EleutherAI/gpt-neo-125M",
25 |     "el_threshold": 0.0499,
26 |     "ma_threshold": 0.2994,
27 |     "input_length": 512,
28 |     "output_length": 512,
29 |     "target_length": 200,
30 |     "num_workers": 64,
31 |     "strategy": "deepspeed_stage_2_offload",
32 |     "fp16": true,
33 |     "wandb_log": true
34 | }
35 | 


--------------------------------------------------------------------------------
/configs/dialog_unlearn/dialog.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "Unlearn-1.3B_32_0",
 5 |     "num_train_epochs": 13,
 6 |     "check_val_every_n_epoch": 13,
 7 |     "check_validation_only": false,
 8 |     "do_init_eval": false,
 9 |     "train_set": "data/main/lm_extraction_32_0.csv",
10 |     "valid_sets": [
11 |         "validation_data/wizard_of_wikipedia.json",
12 |         "validation_data/empathetic_dialogues.json",
13 |         "validation_data/blended_skill_talk.json",
14 |         "validation_data/wizard_of_internet.json"
15 |     ],
16 |     "valid_subset_path": [
17 |         "",
18 |         "",
19 |         "",
20 |         ""
21 |     ],
22 |     "valid_type_path": [
23 |         "",
24 |         "",
25 |         "",
26 |         ""
27 |     ],
28 |     "train_batch_size": 8,
29 |     "eval_batch_size": 32,
30 |     "gradient_accumulation_steps": 1,
31 |     "ngpu": 4,
32 |     "learning_rate": 5e-5,
33 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
34 |     "el_threshold": 0.0499,
35 |     "ma_threshold": 0.2994,
36 |     "input_length": 512,
37 |     "output_length": 512,
38 |     "target_length": 200,
39 |     "num_workers": 64,
40 |     "strategy": "deepspeed_stage_2_offload",
41 |     "fp16": true,
42 |     "wandb_log": true
43 | }
44 | 


--------------------------------------------------------------------------------
/configs/dialog_unlearn/dialog1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "Unlearn-1.3B_32_1",
 5 |     "num_train_epochs": 14,
 6 |     "check_val_every_n_epoch": 14,
 7 |     "check_validation_only": false,
 8 |     "do_init_eval": false,
 9 |     "train_set": "data/main/lm_extraction_32_1.csv",
10 |     "valid_sets": [
11 |         "validation_data/wizard_of_wikipedia.json",
12 |         "validation_data/empathetic_dialogues.json",
13 |         "validation_data/blended_skill_talk.json",
14 |         "validation_data/wizard_of_internet.json"
15 |     ],
16 |     "valid_subset_path": [
17 |         "",
18 |         "",
19 |         "",
20 |         ""
21 |     ],
22 |     "valid_type_path": [
23 |         "",
24 |         "",
25 |         "",
26 |         ""
27 |     ],
28 |     "train_batch_size": 8,
29 |     "eval_batch_size": 32,
30 |     "gradient_accumulation_steps": 1,
31 |     "ngpu": 4,
32 |     "learning_rate": 5e-5,
33 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
34 |     "el_threshold": 0.0499,
35 |     "ma_threshold": 0.2994,
36 |     "input_length": 512,
37 |     "output_length": 512,
38 |     "target_length": 200,
39 |     "num_workers": 64,
40 |     "strategy": "deepspeed_stage_2_offload",
41 |     "fp16": true,
42 |     "wandb_log": true
43 | }
44 | 


--------------------------------------------------------------------------------
/configs/dialog_unlearn/dialog2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "Unlearn-1.3B_32_2",
 5 |     "num_train_epochs": 13,
 6 |     "check_val_every_n_epoch": 13,
 7 |     "check_validation_only": false,
 8 |     "do_init_eval": false,
 9 |     "train_set": "data/main/lm_extraction_32_2.csv",
10 |     "valid_sets": [
11 |         "validation_data/wizard_of_wikipedia.json",
12 |         "validation_data/empathetic_dialogues.json",
13 |         "validation_data/blended_skill_talk.json",
14 |         "validation_data/wizard_of_internet.json"
15 |     ],
16 |     "valid_subset_path": [
17 |         "",
18 |         "",
19 |         "",
20 |         ""
21 |     ],
22 |     "valid_type_path": [
23 |         "",
24 |         "",
25 |         "",
26 |         ""
27 |     ],
28 |     "train_batch_size": 8,
29 |     "eval_batch_size": 32,
30 |     "gradient_accumulation_steps": 1,
31 |     "ngpu": 4,
32 |     "learning_rate": 5e-5,
33 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
34 |     "el_threshold": 0.0499,
35 |     "ma_threshold": 0.2994,
36 |     "input_length": 512,
37 |     "output_length": 512,
38 |     "target_length": 200,
39 |     "num_workers": 64,
40 |     "strategy": "deepspeed_stage_2_offload",
41 |     "fp16": true,
42 |     "wandb_log": true
43 | }
44 | 


--------------------------------------------------------------------------------
/configs/dialog_unlearn/dialog3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "Unlearn-1.3B_32_3",
 5 |     "num_train_epochs": 14,
 6 |     "check_val_every_n_epoch": 14,
 7 |     "check_validation_only": false,
 8 |     "do_init_eval": false,
 9 |     "train_set": "data/main/lm_extraction_32_3.csv",
10 |     "valid_sets": [
11 |         "validation_data/wizard_of_wikipedia.json",
12 |         "validation_data/empathetic_dialogues.json",
13 |         "validation_data/blended_skill_talk.json",
14 |         "validation_data/wizard_of_internet.json"
15 |     ],
16 |     "valid_subset_path": [
17 |         "",
18 |         "",
19 |         "",
20 |         ""
21 |     ],
22 |     "valid_type_path": [
23 |         "",
24 |         "",
25 |         "",
26 |         ""
27 |     ],
28 |     "train_batch_size": 8,
29 |     "eval_batch_size": 32,
30 |     "gradient_accumulation_steps": 1,
31 |     "ngpu": 4,
32 |     "learning_rate": 5e-5,
33 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
34 |     "el_threshold": 0.0499,
35 |     "ma_threshold": 0.2994,
36 |     "input_length": 512,
37 |     "output_length": 512,
38 |     "target_length": 200,
39 |     "num_workers": 64,
40 |     "strategy": "deepspeed_stage_2_offload",
41 |     "fp16": true,
42 |     "wandb_log": true
43 | }
44 | 


--------------------------------------------------------------------------------
/configs/dialog_unlearn/dialog4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "wandb_project": "Knowledge Unlearning Dialog",
 4 |     "wandb_run_name": "Unlearn-1.3B_32_4",
 5 |     "num_train_epochs": 15,
 6 |     "check_val_every_n_epoch": 15,
 7 |     "check_validation_only": false,
 8 |     "do_init_eval": false,
 9 |     "train_set": "data/main/lm_extraction_32_4.csv",
10 |     "valid_sets": [
11 |         "validation_data/wizard_of_wikipedia.json",
12 |         "validation_data/empathetic_dialogues.json",
13 |         "validation_data/blended_skill_talk.json",
14 |         "validation_data/wizard_of_internet.json"
15 |     ],
16 |     "valid_subset_path": [
17 |         "",
18 |         "",
19 |         "",
20 |         ""
21 |     ],
22 |     "valid_type_path": [
23 |         "",
24 |         "",
25 |         "",
26 |         ""
27 |     ],
28 |     "train_batch_size": 8,
29 |     "eval_batch_size": 32,
30 |     "gradient_accumulation_steps": 1,
31 |     "ngpu": 4,
32 |     "learning_rate": 5e-5,
33 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
34 |     "el_threshold": 0.0499,
35 |     "ma_threshold": 0.2994,
36 |     "input_length": 512,
37 |     "output_length": 512,
38 |     "target_length": 200,
39 |     "num_workers": 64,
40 |     "strategy": "deepspeed_stage_2_offload",
41 |     "fp16": true,
42 |     "wandb_log": true
43 | }
44 | 


--------------------------------------------------------------------------------
/configs/dp/1.3B_0.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-1.3B_0",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_0.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_0.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/1.3B_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-1.3B_1",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_1.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_1.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/1.3B_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-1.3B_2",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_2.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_2.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/1.3B_3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-1.3B_3",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_3.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_3.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/1.3B_4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-1.3B_4",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_4.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_4.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/1.3B_general.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-1.3B_General",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_0.csv",
12 |     "valid_sets": [
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         ""
23 |     ],
24 |     "valid_type_path": [
25 |         "",
26 |         "",
27 |         "",
28 |         ""
29 |     ],
30 |     "train_batch_size": 32,
31 |     "eval_batch_size": 32,
32 |     "gradient_accumulation_steps": 1,
33 |     "ngpu": 1,
34 |     "learning_rate": 5e-5,
35 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
36 |     "el_threshold": 0.0499,
37 |     "ma_threshold": 0.2994,
38 |     "input_length": 512,
39 |     "output_length": 512,
40 |     "target_length": 200,
41 |     "num_workers": 64,
42 |     "strategy": "deepspeed_stage_2_offload",
43 |     "fp16": true,
44 |     "wandb_log": true
45 | }
46 | 


--------------------------------------------------------------------------------
/configs/dp/125M_0.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-125M_0",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_0.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_0.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-125M",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/125M_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-125M_1",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_1.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_1.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-125M",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/125M_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-125M_2",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_2.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_2.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-125M",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/125M_3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-125M_3",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_3.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_3.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-125M",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/125M_4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-125M_4",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_4.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_4.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-125M",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/125M_general.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-125M_General",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_0.csv",
12 |     "valid_sets": [
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         ""
23 |     ],
24 |     "valid_type_path": [
25 |         "",
26 |         "",
27 |         "",
28 |         ""
29 |     ],
30 |     "train_batch_size": 32,
31 |     "eval_batch_size": 32,
32 |     "gradient_accumulation_steps": 1,
33 |     "ngpu": 1,
34 |     "learning_rate": 5e-5,
35 |     "model_name_or_path": "EleutherAI/gpt-neo-125M",
36 |     "el_threshold": 0.0499,
37 |     "ma_threshold": 0.2994,
38 |     "input_length": 512,
39 |     "output_length": 512,
40 |     "target_length": 200,
41 |     "num_workers": 64,
42 |     "strategy": "deepspeed_stage_2_offload",
43 |     "fp16": true,
44 |     "wandb_log": true
45 | }
46 | 


--------------------------------------------------------------------------------
/configs/dp/2.7B_0.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-2.7B_0",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_0.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_0.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-2.7B",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/2.7B_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-2.7B_1",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_1.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_1.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-2.7B",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/2.7B_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-2.7B_2",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_2.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_2.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-2.7B",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/2.7B_3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-2.7B_3",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_3.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_3.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-2.7B",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/2.7B_4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-2.7B_4",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_4.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_4.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-05,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-2.7B",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }


--------------------------------------------------------------------------------
/configs/dp/2.7B_general.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "general_lm_eval",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-2.7B_General",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_0.csv",
12 |     "valid_sets": [
13 |         "validation_data/wizard_of_wikipedia.json",
14 |         "validation_data/empathetic_dialogues.json",
15 |         "validation_data/blended_skill_talk.json",
16 |         "validation_data/wizard_of_internet.json"
17 |     ],
18 |     "valid_subset_path": [
19 |         "",
20 |         "",
21 |         "",
22 |         ""
23 |     ],
24 |     "valid_type_path": [
25 |         "",
26 |         "",
27 |         "",
28 |         ""
29 |     ],
30 |     "train_batch_size": 32,
31 |     "eval_batch_size": 32,
32 |     "gradient_accumulation_steps": 1,
33 |     "ngpu": 1,
34 |     "learning_rate": 5e-5,
35 |     "model_name_or_path": "EleutherAI/gpt-neo-2.7B",
36 |     "el_threshold": 0.0499,
37 |     "ma_threshold": 0.2994,
38 |     "input_length": 512,
39 |     "output_length": 512,
40 |     "target_length": 200,
41 |     "num_workers": 64,
42 |     "strategy": "deepspeed_stage_2_offload",
43 |     "fp16": true,
44 |     "wandb_log": true
45 | }
46 | 


--------------------------------------------------------------------------------
/configs/dp/create_configs.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | model = '2.7B'
 4 | batch = 128
 5 | f = open(f'/home/lklab/knowledge-unlearning/configs/dp/template.json')
 6 | data = json.load(f)
 7 | print(data)
 8 | 
 9 | for i in range(0, 5):
10 |     data['wandb_run_name'] = f'DP-0.2-{model}_{i}'
11 |     data['model_name_or_path'] = f"EleutherAI/gpt-neo-{model}"
12 |     data['train_set'] = f'data/main/lm_extraction_32_{i}.csv'
13 |     data['valid_sets'][0] = f'data/main/lm_extraction_32_{i}.csv'
14 |     with open(f'/home/lklab/knowledge-unlearning/configs/dp/{model}_{i}.json', 'w') as fp:
15 |         json.dump(data, fp, indent=4)


--------------------------------------------------------------------------------
/configs/dp/template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "privacy_method": "dp",
 4 |     "wandb_project": "Knowledge Unlearning Dialog",
 5 |     "wandb_run_name": "DP-0.2-1.3B_0",
 6 |     "lambda_weight": 0.2,
 7 |     "num_train_epochs": 20,
 8 |     "check_val_every_n_epoch": 1,
 9 |     "check_validation_only": true,
10 |     "do_init_eval": true,
11 |     "train_set": "data/main/lm_extraction_32_0.csv",
12 |     "valid_sets": [
13 |         "data/main/lm_extraction_32_0.csv"
14 |     ],
15 |     "valid_subset_path": [
16 |         ""
17 |     ],
18 |     "valid_type_path": [
19 |         "target"
20 |     ],
21 |     "train_batch_size": 32,
22 |     "eval_batch_size": 32,
23 |     "gradient_accumulation_steps": 1,
24 |     "ngpu": 1,
25 |     "learning_rate": 5e-5,
26 |     "model_name_or_path": "EleutherAI/gpt-neo-1.3B",
27 |     "el_threshold": 0.0499,
28 |     "ma_threshold": 0.2994,
29 |     "input_length": 512,
30 |     "output_length": 512,
31 |     "target_length": 200,
32 |     "num_workers": 64,
33 |     "strategy": "deepspeed_stage_2_offload",
34 |     "fp16": true,
35 |     "wandb_log": true
36 | }
37 | 


--------------------------------------------------------------------------------
/configs/example.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mode": "unlearn",
 3 |     "wandb_project": "Knowledge Unlearning",
 4 |     "wandb_run_name": "example",
 5 |     "num_train_epochs": 20,
 6 |     "check_val_every_n_epoch": 1,
 7 |     "check_validation_only": false,
 8 |     "do_init_eval": true,
 9 |     "train_set": "data/main/lm_extraction_32_0.csv",
10 |     "valid_sets": [
11 |         "data/main/lm_extraction_32_0.csv",
12 |         "validation_data/lambada.csv",
13 |         "piqa",
14 |         "hellaswag",
15 |         "ai2_arc",
16 |         "ai2_arc",
17 |         "super_glue",
18 |         "winogrande",
19 |         "math_qa",
20 |         "validation_data/pubmed_qa.csv"
21 |     ],
22 |     "valid_subset_path": [
23 |         "",
24 |         "",
25 |         "",
26 |         "",
27 |         "ARC-Easy",
28 |         "ARC-Challenge",
29 |         "copa",
30 |         "winogrande_s",
31 |         "",
32 |         ""
33 |     ],
34 |     "valid_type_path": [
35 |         "target",
36 |         "test",
37 |         "validation",
38 |         "validation",
39 |         "validation",
40 |         "validation",
41 |         "validation",
42 |         "validation",
43 |         "validation",
44 |         ""
45 |     ],
46 |     "train_batch_size": 8,
47 |     "eval_batch_size": 8,
48 |     "gradient_accumulation_steps": 4,
49 |     "ngpu": 1,
50 |     "learning_rate": 5e-5,
51 |     "model_name_or_path": "EleutherAI/gpt-neo-125M",
52 |     "el_threshold": 0.0499,
53 |     "ma_threshold": 0.2994,
54 |     "input_length": 512,
55 |     "output_length": 512,
56 |     "target_length": 200,
57 |     "num_workers": 64,
58 |     "strategy": "deepspeed_stage_2_offload",
59 |     "fp16": true,
60 |     "wandb_log": true
61 | }
62 | 


--------------------------------------------------------------------------------
/csv_out/Dialog Initial.csv:
--------------------------------------------------------------------------------
1 | ,wizard_of_wikipedia/loss,wizard_of_wikipedia/f1,empathetic_dialogues/loss,empathetic_dialogues/f1,blended_skill_talk/loss,blended_skill_talk/f1,wizard_of_internet/loss,wizard_of_internet/f1
2 | 0,4.13671875,0.07524767518043518,3.724609375,0.08438178896903992,3.865234375,0.11232497543096542,3.83203125,0.1023608073592186
3 | 


--------------------------------------------------------------------------------
/data/domain_main/enron_emails_8_1.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 2809,enron_emails,",
  3 | ?
  4 | You have been selected to participate in the Mid Year 2001 Performance 
  5 | Management process.  Your feedback plays an important role in the process, 
  6 | and your participation is critical to the success of Enron's Performance 
  7 | Management goals.
  8 | ?
  9 | To complete a request for feedback, access PEP at http://pep.enron.com and 
 10 | select Complete Feedback from the Main Menu.  You may begin providing 
 11 | feedback immediately and are requested to have all feedback forms completed 
 12 | by Friday, May 25, 2001. 
 13 | ?
 14 | If you have any questions regarding PEP or your responsibility in the 
 15 | process, please contact the PEP Help Desk at:
 16 | Houston:  1.713.853.4777, Option 4 or email: perfmgmt@enron.com
 17 | London:  44.207.783.4040, Option 4 or email: pep.enquiries@enron."
 18 | 2128,enron_emails," This e-mail is the property of Enron Corp. and/or its relevant
 19 | > affiliate and may contain confidential and privileged material for the
 20 | > sole use of the intended recipient (s). Any review, use, distribution or
 21 | > disclosure by others is strictly prohibited. If you are not the intended
 22 | > recipient (or authorized to receive for the recipient), please contact
 23 | > the sender or reply to Enron Corp. at
 24 | > enron.messaging.administration@enron.com and delete all copies of the
 25 | > message. This e-mail (and any attachments hereto) are not intended to be
 26 | > an offer (or an acceptance) and do not create or evidence a binding and
 27 | > enforceable contract between Enron Corp. (or any of its affiliates) and
 28 | > the intended recipient or any other party, and may not be relied on by
 29 | > anyone as the basis of a contract by estoppel or otherwise. Thank you"
 30 | 8151,enron_emails,"B
 31 | 
 32 | Andrew B. Brown
 33 | Ellison, Schneider & Harris, LLP
 34 | 2015 H Street
 35 | Sacramento, CA  95814
 36 | Phone: (916) 447-2166
 37 | Fax: (916) 447-3512
 38 | mailto:abb@eslawfirm.com
 39 | 
 40 | CONFIDENTIALITY NOTICE:  This communication and any accompanying document(s)
 41 | are confidential and privileged.  They are intended for the sole use of the
 42 | addressee.  If you receive this transmission in error, you are advised that
 43 | any disclosure, copying, distribution, or the taking of any action in
 44 | reliance upon the communication is strictly prohibited.  Moreover, any such
 45 | inadvertent disclosure shall not compromise or waive the attorney-client
 46 | privilege as to this communication or otherwise.  If you have received this
 47 | communication in error, please contact the sender at the internet address
 48 | indicated or by telephone at (916)"
 49 | 416,enron_emails,"
 50 | 		 
 51 | 		 To: Karen Lambert/HOU/ECT@ECT, Tana Jones/HOU/ECT@ECT, Samuel 
 52 | Schott/HOU/ECT@ECT, Sheri Thomas/HOU/ECT@ECT, Mark Taylor/HOU/ECT@ECT, 
 53 | Bernice Rodriguez/HOU/ECT@ECT, Brant Reves/HOU/ECT@ECT, Debbie R 
 54 | Brackett/HOU/ECT@ECT, David Hardy/LON/ECT@ECT, Lesli Campbell/HOU/ECT@ECT, 
 55 | Molly Harris/HOU/ECT@ECT, Cynthia Clark/Corp/Enron@ENRON, Mary G 
 56 | Gosnell/HOU/ECT@ECT, Enron Europe Global Contracts and Facilities, Enron 
 57 | Europe Global CounterParty, Stephanie Sever/HOU/ECT@ECT, Bradley 
 58 | Diebner/HOU/ECT@ECT, Stacey Richardson/HOU/ECT@"
 59 | 5275,enron_emails,"
 60 | ------------------------------------------------------------------------------
 61 | NEW E-MAIL ADDRESSES AT PAUL, HASTINGS, JANOFSKY & WALKER LLP
 62 | 
 63 | We have changed our e-mail address.  Our new domain name is
 64 | paulhastings.com.  In most cases, our address is composed of
 65 | conventional first name and last name plus @paulhastings.com.  Here are
 66 | two examples: janesmith@paulhastings.com and danjones@paulhastings.com.
 67 | If you have any questions, please contact us at noc@paulhastings.com.
 68 | 
 69 | ==============================================================================
 70 | ""The information transmitted is intended only for the person or entity
 71 | to which it is addressed and may contain confidential and/or privileged
 72 | material. Any review, retransmission, dissemination or other use of, or
 73 | taking of any action in reliance upon, this information by persons"
 74 | 9293,enron_emails,".fantasy.sportsline.com/mp/options-ereports?league=ene&owner=45547.3>click here</A><BR>
 75 | <BR>
 76 | <TABLE WIDTH=100% BORDER=0 CELLPADDING=1 CELLSPACING=0 BGCOLOR=""#000099""><TR><TD ALIGN=left><FONT FACE=arial color=ffffff SIZE=><B>NFL Reports,&nbsp;Player Updates&nbsp;</B></FONT></TD></TR></TABLE><!-- Pizzahut Presenting Logo Spotlight tag - Do not remove until 12-31-01 -->
 77 | <SCRIPT language=""JavaScript"">
 78 | if (!random) {if (parent.random) {var random = parent.random} else {var abc = Math.random() + """"; var random = abc.substring(2,abc.length);"
 79 | 4210,enron_emails,"@enron.com
 80 | 
 81 | 
 82 | 
 83 | **********************************************************************
 84 | This e-mail is the property of Enron Corp. and/or its relevant affiliate
 85 | and may contain confidential and privileged material for the sole use of
 86 | the intended recipient (s). Any review, use, distribution or disclosure by
 87 | others is strictly prohibited. If you are not the intended recipient (or
 88 | authorized to receive for the recipient), please contact the sender or
 89 | reply to Enron Corp. at enron.messaging.administration@enron.com and delete
 90 | all copies of the message. This e-mail (and any attachments hereto) are not
 91 | intended to be an offer (or an acceptance) and do not create or evidence a
 92 | binding and enforceable contract between Enron Corp. (or any of its
 93 | affiliates) and the intended recipient or any other party, and may not be
 94 | relied on by anyone as the basis of a contract by estopp"
 95 | 1287,enron_emails,"
 96 | BNP PARIBAS Commodity Futures, Inc.
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | _____________________________________________________________________________________________________________________________________
103 | 
104 | Ce message et toutes les pieces jointes (ci-apres le ""message"") sont etablis a l'intention exclusive de ses destinataires et sont confidentiels. Si vous recevez ce message par erreur, merci de le detruire et d'en avertir immediatement l'expediteur.
105 | 
106 | Toute utilisation de ce message non conforme a sa destination, toute diffusion ou toute publication, totale ou partielle, est interdite, sauf autorisation expresse.
107 | 
108 | L'internet ne permettant pas d'assurer l'integrite de ce message, BNP PARIBAS (et ses filiales) decline(nt) toute responsabilite au titre de ce message,"
109 | 


--------------------------------------------------------------------------------
/data/domain_main/enron_emails_8_2.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 3127,enron_emails,".html?todaysheadlines 
  3 | 
  4 | -----
  5 | 
  6 | 
  7 | 
  8 | MORE OP-ED  NEWS:
  9 | http://www.nytimes.com/pages/opinion/index.html?todaysheadline
 10 | 
 11 | 
 12 | 
 13 | 
 14 | 
 15 | HOW TO CHANGE YOUR SUBSCRIPTION
 16 | ------------------------------------------------------------
 17 | You received these headlines because you requested The New 
 18 | York Times Direct e-mail service. To cancel delivery, 
 19 | change delivery options, change your e-mail address or sign 
 20 | up for other newsletters, see http://www.nytimes.com/email. 
 21 | Check or uncheck the headlines you would like to receive 
 22 | and remember to go to the bottom of the page and click on 
 23 | ""Save Selections."" To change your e-mail address, go to our 
 24 | help center: http://www.nytimes.com/help.  Suggestions and 
 25 | feedback are welcome at feedback@nytimes.com.
 26 | 
 27 | "
 28 | 3968,enron_emails,".com>, Rick Shapiro 
 29 | <rshapiro@enron.com>, Jim Steffes <james.d.steffes@enron.com>, Alan Comnes 
 30 | <acomnes@enron.com>, Chris Calger <ccalger@enron.com>, Mary Hain 
 31 | <mary.hain@enron.com>, Joe Hartsoe <Joe.Hartsoe@enron.com>, Donna Fulton 
 32 | <Donna.Fulton@enron.com>, Steven Kean <Steven.J.Kean@enron.com>, Karen Denne 
 33 | <kdenne@enron.com>, Beverly Aden <beverly.aden@enron.com>, Bill Votaw 
 34 | <bill.votaw@enron.com>, Carol Moffett <carol.moffett@enron.com>, Debora 
 35 | Whitehead <deb"
 36 | 9812,enron_emails,"
 37 | **********************************************************************
 38 | This e-mail is the property of Enron Corp. and/or its relevant affiliate
 39 | and may contain confidential and privileged material for the sole use of
 40 | the intended recipient (s). Any review, use, distribution or disclosure
 41 | by
 42 | others is strictly prohibited. If you are not the intended recipient (or
 43 | authorized to receive for the recipient), please contact the sender or
 44 | reply to Enron Corp. at enron.messaging.administration@enron.com and
 45 | delete
 46 | all copies of the message. This e-mail (and any attachments hereto) are
 47 | not
 48 | intended to be an offer (or an acceptance) and do not create or evidence
 49 | a
 50 | binding and enforceable contract between Enron Corp. (or any of its
 51 | affiliates) and the intended recipient or any other party, and may not
 52 | be
 53 | relied on by anyone as the basis of a contract by estoppel or"
 54 | 416,enron_emails,"
 55 | 		 
 56 | 		 To: Karen Lambert/HOU/ECT@ECT, Tana Jones/HOU/ECT@ECT, Samuel 
 57 | Schott/HOU/ECT@ECT, Sheri Thomas/HOU/ECT@ECT, Mark Taylor/HOU/ECT@ECT, 
 58 | Bernice Rodriguez/HOU/ECT@ECT, Brant Reves/HOU/ECT@ECT, Debbie R 
 59 | Brackett/HOU/ECT@ECT, David Hardy/LON/ECT@ECT, Lesli Campbell/HOU/ECT@ECT, 
 60 | Molly Harris/HOU/ECT@ECT, Cynthia Clark/Corp/Enron@ENRON, Mary G 
 61 | Gosnell/HOU/ECT@ECT, Enron Europe Global Contracts and Facilities, Enron 
 62 | Europe Global CounterParty, Stephanie Sever/HOU/ECT@ECT, Bradley 
 63 | Diebner/HOU/ECT@ECT, Stacey Richardson/HOU/ECT@"
 64 | 9293,enron_emails,".fantasy.sportsline.com/mp/options-ereports?league=ene&owner=45547.3>click here</A><BR>
 65 | <BR>
 66 | <TABLE WIDTH=100% BORDER=0 CELLPADDING=1 CELLSPACING=0 BGCOLOR=""#000099""><TR><TD ALIGN=left><FONT FACE=arial color=ffffff SIZE=><B>NFL Reports,&nbsp;Player Updates&nbsp;</B></FONT></TD></TR></TABLE><!-- Pizzahut Presenting Logo Spotlight tag - Do not remove until 12-31-01 -->
 67 | <SCRIPT language=""JavaScript"">
 68 | if (!random) {if (parent.random) {var random = parent.random} else {var abc = Math.random() + """"; var random = abc.substring(2,abc.length);"
 69 | 228,enron_emails,".
 70 | 
 71 | Best,
 72 | Jeff
 73 | 
 74 | 
 75 | **********************************************************************
 76 | This e-mail is the property of Enron Corp. and/or its relevant affiliate and
 77 | may contain confidential and privileged material for the sole use of the
 78 | intended recipient (s). Any review, use, distribution or disclosure by
 79 | others is strictly prohibited. If you are not the intended recipient (or
 80 | authorized to receive for the recipient), please contact the sender or reply
 81 | to Enron Corp. at enron.messaging.administration@enron.com and delete all
 82 | copies of the message. This e-mail (and any attachments hereto) are not
 83 | intended to be an offer (or an acceptance) and do not create or evidence a
 84 | binding and enforceable contract between Enron Corp. (or any of its
 85 | affiliates) and the intended recipient or any other party, and may not be
 86 | relied on by anyone as the basis of a contract"
 87 | 7273,enron_emails,".com
 88 | 
 89 | 
 90 | **********************************************************************
 91 | This e-mail is the property of Enron Corp. and/or its relevant affiliate and
 92 | may contain confidential and privileged material for the sole use of the
 93 | intended recipient (s). Any review, use, distribution or disclosure by
 94 | others is strictly prohibited. If you are not the intended recipient (or
 95 | authorized to receive for the recipient), please contact the sender or reply
 96 | to Enron Corp. at enron.messaging.administration@enron.com and delete all
 97 | copies of the message. This e-mail (and any attachments hereto) are not
 98 | intended to be an offer (or an acceptance) and do not create or evidence a
 99 | binding and enforceable contract between Enron Corp. (or any of its
100 | affiliates) and the intended recipient or any other party, and may not be
101 | relied on by anyone as the basis of a contract by estoppel or"
102 | 2128,enron_emails," This e-mail is the property of Enron Corp. and/or its relevant
103 | > affiliate and may contain confidential and privileged material for the
104 | > sole use of the intended recipient (s). Any review, use, distribution or
105 | > disclosure by others is strictly prohibited. If you are not the intended
106 | > recipient (or authorized to receive for the recipient), please contact
107 | > the sender or reply to Enron Corp. at
108 | > enron.messaging.administration@enron.com and delete all copies of the
109 | > message. This e-mail (and any attachments hereto) are not intended to be
110 | > an offer (or an acceptance) and do not create or evidence a binding and
111 | > enforceable contract between Enron Corp. (or any of its affiliates) and
112 | > the intended recipient or any other party, and may not be relied on by
113 | > anyone as the basis of a contract by estoppel or otherwise. Thank you"
114 | 


--------------------------------------------------------------------------------
/data/domain_main/freelaw_8_0.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 1432,freelaw,"
 3 | 
 4 |                           UNITED STATES COURT OF APPEALS
 5 |                               FOR THE SECOND CIRCUIT
 6 | 
 7 |                                       SUMMARY ORDER
 8 |      RULINGS BY SUMMARY ORDER DO NOT HAVE PRECEDENTIAL EFFECT. CITATION TO A SUMMARY ORDER FILED
 9 |      ON OR AFTER JANUARY 1, 2007, IS PERMITTED AND IS GOVERNED BY FEDERAL RULE OF APPELLATE
10 |      PROCEDURE 32.1 AND THIS COURT"
11 | 12706,freelaw,"                               SUMMARY ORDER
12 | RULINGS BY SUMMARY ORDER DO NOT HAVE PRECEDENTIAL EFFECT.     CITATION TO SUMMARY ORDERS
13 | FILED AFTER JANUARY 1, 2007, IS PERMITTED AND IS GOVERNED BY THIS COURT’S LOCAL RULE 32.1
14 | AND FEDERAL RULE OF APPELLATE PROCEDURE 32.1.     IN A BRIEF OR OTHER PAPER IN WHICH A
15 | LITIGANT CITES A SUMMARY ORDER, IN EACH PARAGRAPH IN WHICH A CITATION APPEARS, AT LEAST
16 | ONE CITATION MUST EITHER BE TO THE FEDERAL APPENDIX OR BE ACCOMPAN"
17 | 8554,freelaw," U. S. ____ (2010)                              1
18 | 
19 |                              Opinion of the Court
20 | 
21 |      NOTICE: This opinion is subject to formal revision before publication in the
22 |      preliminary print of the United States Reports. Readers are requested to
23 |      notify the Reporter of Decisions, Supreme Court of the United States, Wash­
24 |      ington, D. C. 20543, of any typographical or other formal errors, in order
25 |      that corrections may be made before the preliminary print goes to press.
26 | 
27 | 
28 | SUPREME COURT OF THE UNITED STATES
29 |                  "
30 | 7640,freelaw,"                              FIRST DIVISION
31 |                                BARNES, P. J.,
32 |                           MCMILLIAN and REESE, JJ.
33 | 
34 |                    NOTICE: Motions for reconsideration must be
35 |                    physically received in our clerk’s office within ten
36 |                    days of the date of decision to be deemed timely filed.
37 |    "
38 | 13091,freelaw,"C. § 2520 (2018) or Neb. Rev.
39 |     Stat. § 86-297 (Reissue 2014).
40 | 12. Appeal and Error. An appellate court is not obligated to engage in an
41 |     analysis that is not necessary to adjudicate the case and controversy
42 |     before it.
43 | 
44 |   Appeal from the District Court for Douglas County:
45 | Kimberly Miller Pankonin, Judge. Affirmed.
46 |   Karl von Oldenburg, of BQ & Associates, P.C., L.L.O., for
47 | appellant.
48 |     Karen S. Nelson, of Carlson & Burnett, L.L.P., for appellee.
49 |   Heavican, C.J., Miller-Lerman, Cassel, Stacy, Funke,
50 | Papik, and Freudenberg, JJ.
51 |     Cassel, J.
52 |         "
53 | 13576,freelaw," v. SPANG
54 |                               Cite as 302 Neb. 285
55 | 
56 |  6.	 Effectiveness of Counsel: Records: Appeal and Error. The trial
57 |       record reviewed on appeal is devoted to issues of guilt or innocence; as
58 |       such, it does not usually address issues of counsel’s performance and is
59 |       often insufficient to review on direct appeal an ineffective assistance of
60 |       counsel claim.
61 |  7.	 Effectiveness of Counsel: Records: Proof: Appeal and Error. An
62 |       ineffective assistance of counsel claim made on direct appeal can be
63 |       found to be without merit if the record establishes that trial counsel’s
64 |       performance was not deficient or that the appellant"
65 | 241,freelaw,"       IF A STAY OF EXECUTION OF SENTENCE AND RELEASE UPON
66 | BAIL HAS BEEN PREVIOUSLY GRANTED BY THE TRIAL COURT OR
67 | THIS COURT, it is temporarily continued for a period not to exceed sixty days
68 | upon the bail previously posted. The purpose of a continued stay is to allow
69 | Appellant to file with the Supreme Court of Ohio an application for a stay during
70 | the pendency of proceedings in that court. If a stay is continued by this entry, it
71 | will terminate at the earlier of the expiration of the sixty day period, or the failure
72 | of the Appellant to file a notice of appeal with the Supreme Court of Ohio in the
73 | forty-five day appeal period pursuant to Rule II, Sec. 2 of the Rules of Practice of
74 | the Supreme Court of Ohio. Additionally, if the Supreme Court of Ohio dismisses
75 | the appeal prior to expiration of sixty days, the"
76 | 11243,freelaw," excessive.
77 |        Sentencing is a matter for the trial court’s discretion. Both our standard of review and the
78 | factors to be considered in evaluating the reasonableness of the sentence are well established and
79 | 
80 | 
81 |                                                 1
82 | need not be repeated here. See State v. Hernandez, 121 Idaho 114, 117-18, 822 P.2d 1011, 1014-
83 | 15 (Ct. App. 1991); State v. Lopez, 106 Idaho 447, 449-51, 680 P.2d 869, 871-73 (Ct. App.
84 | 1984); State v. Toohill, 103 Idaho 565, 568, 650 P.2d 707,"
85 | 


--------------------------------------------------------------------------------
/data/domain_main/freelaw_8_1.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 7559,freelaw,", the parties filed the attached joint stipulation,
 3 | stating that a decision should be entered awarding compensation. The undersigned
 4 | finds the stipulation reasonable and adopts it as the decision of the Court in awarding
 5 | damages, on the terms set forth therein.
 6 | 
 7 | 1
 8 |   Because this unpublished decision contains a reasoned explanation for the action in this case, the
 9 | undersigned intends to post it on the United States Court of Federal Claims' website, in accordance with
10 | the E-Government Act of 2002. 44 U.S.C. § 3501 note (2012) (Federal Management and Promotion of
11 | Electronic Government Services). In accordance with Vaccine Rule 18(b), petitioner has 14 days to
12 | identify and move to redact medical or other information, the disclosure of which would constitute an
13 | unwarranted invasion of privacy. If, upon review, the undersigned agrees that the identified material fits
14 | within this definition, the undersigned will redact"
15 | 9012,freelaw,"
16 |         agreement by the parties; b) by declaration of the Mediator to the effect that further efforts at mediation
17 |         are no longer worthwhile; or c) after the completion of one full mediation session, by a written declaration
18 |         of a party or parties to the effect that the mediation proceedings are terminated.
19 | 
20 | xvi.    Exclusion of Liability. The Mediator is not a necessary or proper party in judicial proceedings relating to
21 |         the mediation. Neither Mediator nor any law firm employing Mediator shall be liable to any party for any
22 |         act or omission in connection with any mediation conducted under these rules.
23 | 
24 | xvii.   Interpretation and Application of Rules. The Mediator shall interpret and apply these rules.
25 | 
26 | xviii. Fees and Expenses. The Mediator's"
27 | 4893,freelaw," law that gives you this right. It is found in
28 | Title 5 of the United States Code, section 7703 (5 U.S.C. § 7703) (as rev. eff.
29 | Dec. 27, 2012). You may read this law as well as other sections of the United
30 | States   Code,     at   our      website,   http://www.mspb.gov/appeals/uscode.htm.
31 | Additional         information         is     available     at      the         court’s
32 | website, www.cafc.uscourts.gov. Of particular relevance is the court’s “Guide
33 | for Pro Se Petitioners and Appellants,” which is contained within the
34 | court’s"
35 | 2188,freelaw," moot. When the evidence is consistent with the
36 |       pleadings, the appellate court should order a judgment
37 |       entered for the defendant on the ground that the facts
38 |       elicited at trial demonstrated a good defense to the action
39 |       rather than because of the defect in the pleadings.28
40 |    [8] Neb. Ct. R. Pldg. § 6-1115(b) is substantially identical to
41 | Fed. R. Civ. P. 15(b).29 Our rule explicitly provides that
42 |       amendment of the pleadings as may be necessary to cause
43 |       them to conform to the evidence and to raise these issues
44 |       may be made upon motion of any party at any time, even
45 |       after judgment; but failure so to amend does not affect the
46 |      "
47 | 12346,freelaw," seeks to appeal the district court’s
48 | 
49 | order accepting the recommendation of the magistrate judge and
50 | 
51 | denying relief on his 28 U.S.C. § 2254 (2012) petition.                              The
52 | 
53 | order is not appealable unless a circuit justice or judge issues
54 | 
55 | a   certificate        of    appealability.           28   U.S.C.    § 2253(c)(1)(A)
56 | 
57 | (2012).     A certificate of appealability will not issue absent “a
58 | 
59 | substantial showing of the denial of a constitutional right.”
60 | 
61 | 28 U.S.C. § 2253(c)(2) (2012).      "
62 | 11265,freelaw,"                                   PER CURIAM
63 |                        RULE 38. REQUISITES OF BRIEFS
64 | 
65 | Tex. R. App. P. 38.8. Failure of Appellant to File Brief.
66 | 
67 |       (b) Criminal Cases.
68 | 
69 |              (1) Effect.    An appellant=s failure to timely file a brief does not
70 | authorize either dismissal of the appeal or, except as provided in (4), consideration
71 | of the appeal without briefs.
72 | 
73 |              (2) Notice. If the appellant=s brief is not timely filed, the appellate"
74 | 2044,freelaw,"        IF A STAY OF EXECUTION OF SENTENCE AND RELEASE UPON BAIL
75 | HAS BEEN PREVIOUSLY GRANTED BY THE TRIAL COURT OR THIS COURT, it
76 | is temporarily continued for a period not to exceed sixty days upon the bail previously
77 | posted. The purpose of a continued stay is to allow Appellant to file with the Supreme
78 | Court of Ohio an application for a stay during the pendency of proceedings in that court.
79 | If a stay is continued by this entry, it will terminate at the earlier of the expiration of the
80 | sixty day period, or the failure of the Appellant to file a notice of appeal with the
81 | Supreme Court of Ohio in the forty-five day appeal period pursuant to Rule II, Sec. 2 of
82 | the Rules of Practice of the Supreme Court of Ohio. Additionally, if the Supreme Court
83 | of Ohio dismisses the appeal prior to expiration of sixty days"
84 | 6233,freelaw," is AFFIRMED.
85 | 
86 | 
87 | 
88 | 1
89 |  Effective March 31, 1995, the functions of the Secretary of Health and Human Services in social security cases were transferred to the Commissioner of Social Security.  P.L. No. 103-296.  Pursuant to Fed.  R.App. P. 43(c), Shirley S. Chater, Commissioner of Social Security, is substituted for Donna E. Shalala, Secretary of Health and Human Services, as the defendant in this action.  Although we have substituted the Commissioner for the Secretary in the caption, in the text we continue to refer to the Secretary because she was the appropriate party at the time of the underlying decision
90 | 
91 | 
92 | 2
93 |  This order and judgment is not binding precedent, except under the doctrines of law of the case, res judicata, and collateral estoppel.  The court generally disfavors the citation of orders and judgments;  nevertheless, an order and judgment may be cited under"
94 | 


--------------------------------------------------------------------------------
/data/domain_main/freelaw_8_2.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 4475,freelaw,"
 3 | 
 4 | 1
 5 |   Although this Decision has been formally designated “unpublished,” it will nevertheless be posted on the
 6 | Court of Federal Claims’s website, in accordance with the E-Government Act of 2002, Pub. L. No. 107-
 7 | 347, 116 Stat. 2899, 2913 (codified as amended at 44 U.S.C. § 3501 note (2006)). This means the Decision
 8 | will be available to anyone with access to the internet. However, the parties may object to the Decision’s
 9 | inclusion of certain kinds of confidential information. Specifically, under Vaccine Rule 18(b), each party
10 | has fourteen days within which to request redaction “of any information furnished by that party: (1) that is
11 | a trade secret or commercial or financial in substance and is privileged or confidential; or (2) that includes
12 | medical files or similar files, the disclosure of which would constitute a clearly unwarranted"
13 | 13729,freelaw," If multiple prison terms are imposed on an offender for convictions of
14 |        multiple offenses, the court may require the offender to serve the prison
15 |        terms consecutively if the court finds that the consecutive service is
16 |        necessary to protect the public from future crime or to punish the offender
17 |        and that consecutive sentences are not disproportionate to the seriousness of
18 |        the offender’s conduct and to the danger the offender poses to the public,
19 |        and if the court also finds any of the following:
20 | 
21 |        (a) The offender committed one or more of the multiple offenses while the
22 |        offender was awaiting trial or sentencing, was under a sanction imposed
23 |        pursuant to section 2929.16, 2929.17, or 2929.18 of the Revised"
24 | 826,freelaw," U. S. ____ (2015)                              1
25 | 
26 |                              Opinion of the Court
27 | 
28 |      NOTICE: This opinion is subject to formal revision before publication in the
29 |      preliminary print of the United States Reports. Readers are requested to
30 |      notify the Reporter of Decisions, Supreme Court of the United States, Wash­
31 |      ington, D. C. 20543, of any typographical or other formal errors, in order
32 |      that corrections may be made before the preliminary print goes to press.
33 | 
34 | 
35 | SUPREME COURT OF THE UNITED STATES
36 |                  "
37 | 7640,freelaw,"                              FIRST DIVISION
38 |                                BARNES, P. J.,
39 |                           MCMILLIAN and REESE, JJ.
40 | 
41 |                    NOTICE: Motions for reconsideration must be
42 |                    physically received in our clerk’s office within ten
43 |                    days of the date of decision to be deemed timely filed.
44 |    "
45 | 1829,freelaw," held that
46 | 
47 |                 “[t]ermination of parental rights, the most drastic remedy under the
48 |        statutory provision covering the disposition of neglected children, [West Virginia
49 |        Code § 49-4-604]... may be employed without the use of intervening less
50 |        restrictive alternatives when it is found that there is no reasonable likelihood under
51 |        [West Virginia Code § 49-4-604(c)]... that conditions of neglect or abuse can be
52 |        substantially corrected.” Syllabus point 2, In re R.J.M., 164 W.Va. 496, 266 S.E.2d
53 |        114 (1980).
54 | 
55 | Syl. Pt. 5, In re Kristin Y., 227"
56 | 7064,freelaw,"
57 |   Pursuant to the E-Government Act of 2002, see 44 U.S.C. § 3501 note (2012), because this decision contains a
58 | reasoned explanation for the action in this case, I am required to post it on the website of the United States Court of
59 | Federal Claims. The court’s website is at http://www.uscfc.uscourts.gov/aggregator/sources/7. This means the
60 | Ruling will be available to anyone with access to the Internet. Before the decision is posted on the court’s
61 | website, each party has 14 days to file a motion requesting redaction “of any information furnished by that party:
62 | (1) that is a trade secret or commercial or financial in substance and is privileged or confidential; or (2) that
63 | includes medical files or similar files, the disclosure of which would constitute a clearly unwarranted invasion of
64 | privacy.�"
65 | 2046,freelaw,"         
66 | RULINGS  BY  SUMMARY  ORDER  DO  NOT  HAVE  PRECEDENTIAL  EFFECT.    CITATION  TO  A  SUMMARY  ORDER  FILED  ON  OR  AFTER 
67 | JANUARY  1,  2007,  IS  PERMITTED  AND  IS  GOVERNED  BY  FEDERAL  RULE  OF  APPELLATE  PROCEDURE  32.1  AND  THIS  COURT’S 
68 | LOCAL  RULE  32.1.1.  WHEN CITING A SUMMARY ORDER IN A DOCUMENT FILED WITH THIS COURT, A PARTY MUST CITE EITHER 
69 | THE  FEDERAL  APPENDIX  OR"
70 | 578,freelaw,"                                    FOR THE SECOND CIRCUIT
71 | 
72 |                                               SUMMARY ORDER
73 | 
74 | RULINGS BY SUMMARY ORDER DO NOT HAVE PRECEDENTIAL EFFECT. CITATION TO A SUMMARY ORDER FILED ON OR AFTER JANUARY 1,
75 | 2007, IS PERMITTED AND IS GOVERNED BY FEDERAL RULE OF APPELLATE PROCEDURE 32.1 AND THIS COURT’S LOCAL RULE 32.1.1.
76 | WHEN CITING A SUMMARY ORDER IN A DOCUMENT FILED WITH"
77 | 


--------------------------------------------------------------------------------
/data/domain_main/freelaw_8_3.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 3098,freelaw,"                                      SUMMARY ORDER 
 3 |       
 4 |      RULINGS  BY  SUMMARY  ORDER  DO  NOT  HAVE  PRECEDENTIAL 
 5 |      EFFECT.    CITATION  TO  A  SUMMARY  ORDER  FILED  ON  OR  AFTER 
 6 |      JANUARY 1, 2007 IS PERMITTED AND IS GOVERNED BY FEDERAL RULE 
 7 |      OF  APPELLATE  PROCEDURE  32.1  AND  THIS  COURT’S  LOCAL  RULE 
 8 |      32.1.1.   "
 9 | 4403,freelaw,"                      Cite as 297 Neb. 798
10 | 
11 |       In any of the following cases the United States court in
12 |       and for the district wherein the award was made may
13 |       make an order vacating the award upon the application of
14 |       any party to the arbitration—
15 |           (1) where the award was procured by corruption, fraud,
16 |       or undue means;
17 |           (2) where there was evident partiality or corruption in
18 |       the arbitrators, or either of them;
19 |           (3) where the arbitrators were guilty of misconduct in
20 |       refusing to postpone the hearing, upon sufficient cause
21 | "
22 | 10632,freelaw,"       The Court finds there were reasonable grounds for this appeal.
23 | 
24 |       It is ordered that a special mandate issue out of this Court directing the Highland
25 | County Court of Common Pleas to carry this judgment into execution.
26 | 
27 |         IF A STAY OF EXECUTION OF SENTENCE AND RELEASE UPON BAIL HAS
28 | BEEN PREVIOUSLY GRANTED BY THE TRIAL COURT OR THIS COURT, it is
29 | temporarily continued for a period not to exceed sixty days upon the bail previously
30 | posted. The purpose of a continued stay is to allow Appellant to file with the Supreme
31 | Court of Ohio an application for a stay during the pendency of proceedings in that court.
32 | If a stay is continued by this entry, it will terminate at the earlier of the expiration of the
33 | sixty day period, or the failure of the Appellant to file a notice of appeal with"
34 | 9915,freelaw," sentence
35 |     is considered separately, not cumulatively, for purposes of determining
36 |     whether it is cruel and unusual.
37 | 11. Sentences: Appeal and Error. Where a sentence imposed within the
38 |     statutory limits is alleged on appeal to be excessive, the appellate court
39 |     must determine whether a sentencing court abused its discretion in con-
40 |     sidering and applying the relevant factors as well as any applicable legal
41 |     principles in determining the sentence to be imposed.
42 | 12. Sentences. In determining a sentence to be imposed, relevant factors
43 |     customarily considered and applied are the defendant’s (1) age, (2) men-
44 |     tality, (3) education and experience, (4) social and cultural background,
45 |     (5) past criminal record or record of law-abiding conduct, and (6) moti-
46 |   "
47 | 12710,freelaw," appropriate for the
48 | reason listed below.
49 | 
50 |       I.      Legal Standard
51 | 
52 |          The Vaccine Act permits an award of reasonable attorneys’ fees and costs. §
53 | 15(e). Counsel must submit fee requests that include contemporaneous and specific
54 | billing records indicating the service performed, the number of hours expended on the
55 | service, and the name of the person performing the service. See Savin v. Sec’y of
56 | Health & Human Servs., 85 Fed. Cl. 313, 316-18 (2008). Counsel should not include in
57 | their fee requests hours that are “excessive, redundant, or otherwise unnecessary.”
58 | Saxton v. Sec’y of Health & Human Servs., 3 F.3d 1517, 1521 (Fed. Cir. 1993) (quoting
59 | Hensley v. Eckerhart, 461 U.S"
60 | 6658,freelaw,"                             THIRD DIVISION
61 |                             ELLINGTON, P. J.,
62 |                         ANDREWS and RICKMAN, JJ.
63 | 
64 |                    NOTICE: Motions for reconsideration must be
65 |                    physically received in our clerk’s office within ten
66 |                    days of the date of decision to be deemed timely filed.
67 |        "
68 | 11366,freelaw,"  ____. Neb. Rev. Stat. § 28-319(1)(b) (Reissue 2016) applies to a wide
69 |       array of situations that affect a victim’s capacity, including age.
70 | 10.   Jury Instructions: Evidence: Appeal and Error. When examining for
71 |       harmless error, the court may look at a variety of factors including the
72 |       jury instructions as a whole, the evidence presented at trial, and the clos-
73 |       ing arguments.
74 | 11.   Convictions: Evidence: Appeal and Error. In reviewing a criminal
75 |       conviction for a sufficiency of the evidence claim, whether the evidence
76 |       is direct, circumstantial, or a combination thereof, the standard is the
77 |       same: An appellate court does not resolve conflicts in the evidence, pass
78 |    "
79 | 3839,freelaw," is excessive.
80 |        Sentencing is a matter for the trial court’s discretion. Both our standard of review and the
81 | factors to be considered in evaluating the reasonableness of the sentence are well established and
82 | need not be repeated here. See State v. Hernandez, 121 Idaho 114, 117-18, 822 P.2d 1011, 1014-
83 | 
84 |                                                 1
85 | 15 (Ct. App. 1991); State v. Lopez, 106 Idaho 447, 449-51, 680 P.2d 869, 871-73 (Ct. App.
86 | 1984); State v. Toohill, 103 Idaho 565, 568, 650 P.2d 707,"
87 | 


--------------------------------------------------------------------------------
/data/domain_main/freelaw_8_4.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 697,freelaw," Angelone, 369 F.3d 363, 369
  3 | 
  4 | (4th Cir. 2004).    A certificate of appealability will not issue
  5 | 
  6 | absent “a substantial showing of the denial of a constitutional
  7 | 
  8 | right.”   28 U.S.C. § 2253(c)(2) (2000).   A prisoner satisfies this
  9 | 
 10 | standard by demonstrating that reasonable jurists would find that
 11 | 
 12 | any assessment of the constitutional claims by the district court
 13 | 
 14 | is debatable or wrong and that any dispositive procedural ruling by
 15 | 
 16 | the district court is likewise debatable.    Miller-El v. Cockrell,
 17 | 
 18 | 537 U.S. 322, 336-38 (2003); Slack v. McDaniel, 529 U.S. 473, 484
 19 | 
 20 | (2000); Rose v. Lee, 252 F.3d 676, 683-84 (4th Cir. 2001).  "
 21 | 7738,freelaw," his sentence is excessive.
 22 |        Sentencing is a matter for the trial court’s discretion. Both our standard of review and the
 23 | factors to be considered in evaluating the reasonableness of the sentence are well established and
 24 | need not be repeated here. See State v. Hernandez, 121 Idaho 114, 117-18, 822 P.2d 1011, 1014-
 25 | 
 26 | 
 27 |                                                 1
 28 | 15 (Ct. App. 1991); State v. Lopez, 106 Idaho 447, 449-51, 680 P.2d 869, 871-73 (Ct. App.
 29 | 1984); State v. Toohill, 103 Idaho 565, 568, 650 P.2d"
 30 | 9638,freelaw," seeks to appeal the district court’s
 31 | 
 32 | order denying relief on his 28 U.S.C. § 2255 (2012) motion.                                The
 33 | 
 34 | order is not appealable unless a circuit justice or judge issues
 35 | 
 36 | a    certificate       of    appealability.               28   U.S.C.    § 2253(c)(1)(B)
 37 | 
 38 | (2012).     A certificate of appealability will not issue absent “a
 39 | 
 40 | substantial showing of the denial of a constitutional right.”
 41 | 
 42 | 28 U.S.C. § 2253(c)(2) (2012).           "
 43 | 13070,freelaw,"                                            6
 44 |                            Expedited Hearing Order Right to Appeal:
 45 | 
 46 |      If you disagree with this Expedited Hearing Order, you may appeal to the Workers’
 47 | Compensation Appeals Board. To appeal an expedited hearing order, you must:
 48 | 
 49 |    1. Complete the enclosed form entitled: “Expedited Hearing Notice of Appeal,” and file the
 50 |       form with the Clerk of the Court of Workers’ Compensation Claims within seven
 51 |       business days of the date the expedited hearing order was filed. When filing the Notice
 52 |       of Appeal, you"
 53 | 120,freelaw,"                                       1
 54 | 
 55 |                                        Syllabus
 56 | 
 57 |          NOTE: Where it is feasible, a syllabus (headnote) will be released, as is
 58 |        being done in connection with this case, at the time the opinion is issued.
 59 |        The syllabus constitutes no part of the opinion of the Court but has been
 60 |        prepared by the Reporter of Decisions for the convenience of the reader.
 61 |        See United States v. Detroit Timber & Lumber Co., 200 U. S."
 62 | 8761,freelaw,"***********************************************
 63 |     The “officially released” date that appears near the be-
 64 | ginning of each opinion is the date the opinion will be pub-
 65 | lished in the Connecticut Law Journal or the date it was
 66 | released as a slip opinion. The operative date for the be-
 67 | ginning of all time periods for filing postopinion motions
 68 | and petitions for certification is the “officially released”
 69 | date appearing in the opinion.
 70 | 
 71 |    All opinions are subject to modification and technical
 72 | correction prior to official publication in the Connecticut
 73 | Reports and Connecticut Appellate Reports. In the event of
 74 | discrepancies between the advance release version of an
 75 | opinion and the latest version appearing in the Connecticut
 76 | Law Journal and subsequently in the Connecticut Reports
 77 | or Connecticut Appellate Reports, the latest version is to
 78 | be considered authoritative.
 79 | 
 80 |    The syllabus and procedural history accompanying the
 81 | opinion as it appears in the Connecticut"
 82 | 7066,freelaw," seeks to appeal the district court’s order denying relief on his
 83 | 
 84 | 28 U.S.C. § 2255 (2018) motion. The order is not appealable unless a circuit justice or
 85 | 
 86 | judge issues a certificate of appealability. See 28 U.S.C. § 2253(c)(1)(B) (2018). A
 87 | 
 88 | certificate of appealability will not issue absent “a substantial showing of the denial of a
 89 | 
 90 | constitutional right.” 28 U.S.C. § 2253(c)(2) (2018). When the district court denies relief
 91 | 
 92 | on the merits, a prisoner satisfies this standard by demonstrating that reasonable jurists
 93 | 
 94 | would find the district court’s assessment of the constitutional claims debatable or wrong.
 95 | 
 96 | See Buck v. Davis, 137 S. Ct. 759, 773-74 (2017). When the district court denies relief on
 97 | 
 98 | procedural grounds"
 99 | 13091,freelaw,"C. § 2520 (2018) or Neb. Rev.
100 |     Stat. § 86-297 (Reissue 2014).
101 | 12. Appeal and Error. An appellate court is not obligated to engage in an
102 |     analysis that is not necessary to adjudicate the case and controversy
103 |     before it.
104 | 
105 |   Appeal from the District Court for Douglas County:
106 | Kimberly Miller Pankonin, Judge. Affirmed.
107 |   Karl von Oldenburg, of BQ & Associates, P.C., L.L.O., for
108 | appellant.
109 |     Karen S. Nelson, of Carlson & Burnett, L.L.P., for appellee.
110 |   Heavican, C.J., Miller-Lerman, Cassel, Stacy, Funke,
111 | Papik, and Freudenberg, JJ.
112 |     Cassel, J.
113 |         "
114 | 


--------------------------------------------------------------------------------
/data/domain_main/github_8_0.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 4798,github,"NamesShort: ['Janv.','Févr.','Mars','Avril','Mai','Juin',
  3 | 		'Juil.','Août','Sept.','Oct.','Nov.','Déc.'],
  4 | 		dayNames: ['Dimanche','Lundi','Mardi','Mercredi','Jeudi','Vendredi','Samedi'],
  5 | 		dayNamesShort: ['Dim.','Lun.','Mar.','Mer.','Jeu.','Ven.','Sam.'],
  6 | 		dayNamesMin: ['D','L','M','M','J','V','S'],
  7 | 		weekHeader: 'Sem.',
  8 | 		dateFormat: 'dd/mm/yy',
  9 | 		firstDay: 1,
 10 | 		isRTL: false,
 11 | 		showMonthAfterYear: false,
 12 | 		yearSuffix: ''};
 13 | "
 14 | 9549,github,"/*----------------------------------------------------------------------------
 15 |  * Copyright (c) <2013-2015>, <Huawei Technologies Co., Ltd>
 16 |  * All rights reserved.
 17 |  * Redistribution and use in source and binary forms, with or without modification,
 18 |  * are permitted provided that the following conditions are met:
 19 |  * 1. Redistributions of source code must retain the above copyright notice, this list of
 20 |  * conditions and the following disclaimer.
 21 |  * 2. Redistributions in binary form must reproduce the above copyright notice, this list
 22 |  * of conditions and the following disclaimer in the documentation and/or other materials
 23 |  * provided with the distribution.
 24 |  * 3. Neither the name of the copyright holder nor the names of its contributors may be used
 25 |  * to endorse or promote products derived from this software without specific prior written
 26 |  * permission.
 27 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 28 |  * ""AS IS"" AND ANY EXPRESS"
 29 | 3856,github,"{}; 
 30 |         ///Iteration start condition check bit 
 31 |         constexpr Register::FieldLocation<Addr,Register::maskFromRange(2,2),Register::ReadWriteAccess,unsigned> rsc{}; 
 32 |         ///Stop condition check bit 
 33 |         constexpr Register::FieldLocation<Addr,Register::maskFromRange(1,1),Register::ReadWriteAccess,unsigned> spc{}; 
 34 |         ///Bus state bit 
 35 |         constexpr Register::FieldLocation<Addr,Register::maskFromRange(0,0),Register::Access<Register::AccessType::readOnly,Register::ReadActionType::normal,Register::ModifiedWriteValueType::normal>,unsigned> bb{}; 
 36 |     }
 37 |  "
 38 | 6993,github," /* COLLAPSE PUBLIC CLASS DEFINITION
 39 |   * ================================ */
 40 | 
 41 |   var Collapse = function (element, options) {
 42 |     this.$element = $(element)
 43 |     this.options = $.extend({}, $.fn.collapse.defaults, options)
 44 | 
 45 |     if (this.options.parent) {
 46 |       this.$parent = $(this.options.parent)
 47 |     }
 48 | 
 49 |     this.options.toggle && this.toggle()
 50 |   }
 51 | 
 52 |   Collapse.prototype = {
 53 | 
 54 |     constructor: Collapse
 55 | 
 56 |  , dimension: function () {
 57 |       var hasWidth = this.$element.hasClass('width')
 58 |       return hasWidth? 'width' : 'height'
 59 |     }
 60 | 
 61 |  , show: function () {
 62 |       var dimension"
 63 | 360,github," = pc }
 64 | 
 65 | func (iov *Iovec) SetLen(length int) {
 66 | 	iov.Len = uint64(length)
 67 | }
 68 | 
 69 | func (msghdr *Msghdr) SetControllen(length int) {
 70 | 	msghdr.Controllen = uint64(length)
 71 | }
 72 | 
 73 | func (cmsg *Cmsghdr) SetLen(length int) {
 74 | 	cmsg.Len = uint64(length)
 75 | }
 76 | 
 77 | //sys	poll(fds *PollFd, nfds int, timeout int) (n int, err error)
 78 | 
 79 | func Poll(fds []PollFd, timeout int) (n int, err error) {
 80 | 	if len(fds) == 0 {
 81 | 		return poll(nil, 0, timeout)
 82 | 	}
 83 | 	return poll(&fds[0], len(fds), timeout)
 84 | "
 85 | 4036,github,"($obj1);
 86 | 
 87 |             } // if joined row was not null
 88 | 
 89 |             $results[] = $obj1;
 90 |         }
 91 |         $stmt->closeCursor();
 92 | 
 93 |         return $results;
 94 |     }
 95 | 
 96 | 
 97 |     /**
 98 |      * Returns the number of rows matching criteria, joining all related tables
 99 |      *
100 |      * @param      Criteria $criteria
101 |      * @param      boolean $distinct Whether to select only distinct columns; deprecated: use Criteria->setDistinct() instead.
102 |      * @param      PropelPDO $con
103 |      * @param   "
104 | 14866,github,"
105 |  THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
106 |  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
107 |  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
108 |  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
109 |  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
110 |  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
111 |  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
112 |  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
113 |  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)"
114 | 214,github,"-debian:stretch-build
115 | 
116 | # remove several traces of debian python
117 | RUN apt-get purge -y python.*
118 | 
119 | # http://bugs.python.org/issue19846
120 | # > At the moment, setting ""LANG=C"" on a Linux system *fundamentally breaks Python 3*, and that's not OK.
121 | ENV LANG C.UTF-8
122 | 
123 | # key 63C7CC90: public key ""Simon McVittie <smcv@pseudorandom.co.uk>"" imported
124 | # key 3372DCFA: public key ""Donald Stufft (dstufft) <donald@stufft.io>"" imported
125 | RUN gpg --batch --keyserver keyring.debian.org --recv-keys 4DE8FF2A63C7CC90 \
126 | 	&& gpg --batch --keyserver keyserver.ubuntu.com --recv-key 6E3"
127 | 


--------------------------------------------------------------------------------
/data/domain_main/github_8_1.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 4728,github,"""));
  3 | 
  4 | function _interopRequireDefault(obj) { return obj && obj.__esModule? obj : { default: obj }; }
  5 | 
  6 | function _getRequireWildcardCache() { if (typeof WeakMap!== ""function"") return null; var cache = new WeakMap(); _getRequireWildcardCache = function _getRequireWildcardCache() { return cache; }; return cache; }
  7 | 
  8 | function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj)!== ""object"" && typeof obj!== ""function"") { return { default: obj }; } var cache = _getRequireWildcardCache(); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.define"
  9 | 8875,github,"
 10 | 		azure.WithErrorUnlessStatusCode(http.StatusOK, http.StatusAccepted, http.StatusNoContent),
 11 | 		autorest.ByClosing())
 12 | 	result.Response = resp
 13 | 	return
 14 | }
 15 | 
 16 | // Get gets the specified virtual network by resource group.
 17 | // Parameters:
 18 | // resourceGroupName - the name of the resource group.
 19 | // virtualNetworkName - the name of the virtual network.
 20 | // expand - expands referenced resources.
 21 | func (client VirtualNetworksClient) Get(ctx context.Context, resourceGroupName string, virtualNetworkName string, expand string) (result VirtualNetwork, err error) {
 22 | 	if tracing.IsEnabled() {
 23 | 		ctx = tracing.StartSpan(ctx, fqdn+""/VirtualNetworksClient.Get"")
 24 | 		defer func() {
 25 | 			sc := -1
 26 | 			if result.Response."
 27 | 2484,github,"_instance_f32 Struct Reference</title>
 28 | <link href=""tabs.css"" rel=""stylesheet"" type=""text/css""/>
 29 | <link href=""search/search.css"" rel=""stylesheet"" type=""text/css""/>
 30 | <script type=""text/javaScript"" src=""search/search.js""></script>
 31 | <link href=""doxygen.css"" rel=""stylesheet"" type=""text/css""/>
 32 | </head>
 33 | <body onload='searchBox.OnSelectItem(0);'>
 34 | <!-- Generated by Doxygen 1.7.2 -->
 35 | <script type=""text/javascript""><!--
 36 | var searchBox = new SearchBox(""searchBox"", ""search"",false,'Search');
 37 | --></script>
 38 | <div class=""navigation"" id=""top"">
 39 |   <div class=""tabs"">
 40 |     <ul class=""tablist"">
 41 |    "
 42 | 9380,github,"/*
 43 |  * Copyright (C) 2008, 2009 Apple Inc. All rights reserved.
 44 |  *
 45 |  * Redistribution and use in source and binary forms, with or without
 46 |  * modification, are permitted provided that the following conditions
 47 |  * are met:
 48 |  *
 49 |  * 1.  Redistributions of source code must retain the above copyright
 50 |  *     notice, this list of conditions and the following disclaimer.
 51 |  * 2.  Redistributions in binary form must reproduce the above copyright
 52 |  *     notice, this list of conditions and the following disclaimer in the
 53 |  *     documentation and/or other materials provided with the distribution.
 54 |  * 3.  Neither the name of Apple Computer, Inc. (""Apple"") nor the names of
 55 |  *     its contributors may be used to endorse or promote products derived
 56 |  *     from this software without specific prior written permission.
 57 |  *
 58 |  * THIS SOFTWARE IS PROV"
 59 | 14369,github,"() {
 60 | 		ctx = tracing.StartSpan(ctx, fqdn+""/SecurityGroupsClient.ListAll"")
 61 | 		defer func() {
 62 | 			sc := -1
 63 | 			if result.Response().Response.Response!= nil {
 64 | 				sc = result.page.Response().Response.Response.StatusCode
 65 | 			}
 66 | 			tracing.EndSpan(ctx, sc, err)
 67 | 		}()
 68 | 	}
 69 | 	result.page, err = client.ListAll(ctx)
 70 | 	return
 71 | }
 72 | 
 73 | // UpdateTags updates a network security group tags.
 74 | // Parameters:
 75 | // resourceGroupName - the name of the resource group.
 76 | // networkSecurityGroupName - the name of the network security group.
 77 | // parameters - parameters supplied to update network security group tags.
 78 | func (client SecurityGroupsClient) UpdateTags(ctx context"
 79 | 5170,github,"
 80 | <link rel=""stylesheet"" type=""text/css"" href=""search.css""/>
 81 | <script type=""text/javascript"" src=""all_4.js""></script>
 82 | <script type=""text/javascript"" src=""search.js""></script>
 83 | </head>
 84 | <body class=""SRPage"">
 85 | <div id=""SRIndex"">
 86 | <div class=""SRStatus"" id=""Loading"">Loading...</div>
 87 | <div id=""SRResults""></div>
 88 | <script type=""text/javascript""><!--
 89 | createResults();
 90 | --></script>
 91 | <div class=""SRStatus"" id=""Searching"">Searching...</div>
 92 | <div class=""SRStatus"" id=""NoMatches"">No Matches</div>
 93 | <script type=""text/javascript""><!--
 94 | document.getElementById(""Loading"").style.display=""none"";
 95 | document.getElementById(""NoMatches"").style.display"
 96 | 362,github,"
 97 | // 8. Sliders & Dividers & Headers
 98 | // --------------------------------------------------*/
 99 | .hero-slider {
100 |   padding: 0px;
101 |   position: relative;
102 |   overflow: hidden;
103 |   background: #2b2b2b;
104 | }
105 | .hero-slider.slides li {
106 |   height: 780px;
107 |   position: relative;
108 |   overflow: hidden;
109 |   -webkit-transform-style: preserve-3d;
110 |   -moz-transform-style: preserve-3d;
111 |   transform-style: preserve-3d;
112 | }
113 | .hero-slider.slides li:before {
114 |   background-color: #333333;
115 |   opacity: 0.4;
116 |   position: absolute;
117 |   content: '';
118 |   width: 100%;
119 |   height: 100%;
120 |   z-index: 1;
121 |   top: 0px"
122 | 10081,github,""",
123 |         ""genesisRLP"" : ""0xf901f8f901f3a00000000000000000000000000000000000000000000000000000000000000000a01dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347942adc25665018aa1fe0e6bc666dac8fc2697ff9baa0f13800f754303d4c4a05de6e0cd61f664bd1e8415715c1d018f578f1dc86bb38a056e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421a056e81f171bcc55a6ff8345e692c0f86"
124 | 


--------------------------------------------------------------------------------
/data/domain_main/github_8_2.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 6681,github,"ype
  3 | 	sa.raw.Pkttype = sa.Pkttype
  4 | 	sa.raw.Halen = sa.Halen
  5 | 	for i := 0; i < len(sa.Addr); i++ {
  6 | 		sa.raw.Addr[i] = sa.Addr[i]
  7 | 	}
  8 | 	return unsafe.Pointer(&sa.raw), SizeofSockaddrLinklayer, nil
  9 | }
 10 | 
 11 | type SockaddrNetlink struct {
 12 | 	Family uint16
 13 | 	Pad    uint16
 14 | 	Pid    uint32
 15 | 	Groups uint32
 16 | 	raw    RawSockaddrNetlink
 17 | }
 18 | 
 19 | func (sa *SockaddrNetlink) sockaddr() (unsafe.Pointer, _Socklen, error) {
 20 | 	sa.raw.Family = AF_NETLINK
 21 | 	sa.raw.Pad = sa.Pad"
 22 | 199,github,"/Deployment. See the release notes for
 23 | // more information.
 24 | // Deployment enables declarative updates for Pods and ReplicaSets.
 25 | message Deployment {
 26 |   // Standard object metadata.
 27 |   // +optional
 28 |   optional k8s.io.apimachinery.pkg.apis.meta.v1.ObjectMeta metadata = 1;
 29 | 
 30 |   // Specification of the desired behavior of the Deployment.
 31 |   // +optional
 32 |   optional DeploymentSpec spec = 2;
 33 | 
 34 |   // Most recently observed status of the Deployment.
 35 |   // +optional
 36 |   optional DeploymentStatus status = 3;
 37 | }
 38 | 
 39 | // DeploymentCondition describes the state of a deployment at a certain point.
 40 | message DeploymentCondition {
 41 |   // Type of deployment condition.
 42 |   optional string type = 1;
 43 | 
 44 |   // Status of the condition, one of True, False, Unknown.
 45 |   optional string status = 2;"
 46 | 3420,github,"097CF813509F0A9CF88E024D0DA
 47 | :107F7000A6CFFC010A0167BFE895112407B600FCF3
 48 | :107F8000FDCF667029F0452B19F481E187BFE89594
 49 | :107F900008959091C00095FFFCCF8093C60008958E
 50 | :107FA0008091C00087FFFCCF8091C00084FD01C09C
 51 | :107FB000A8958091C6000895E0E6F0E098E19083EE
 52 | :107FC00080830895EDDF803219F088E0F5DFFFCF80
 53 | :107FD00084E1DFCFCF93C82FE3DFC150E9F7CF9122"
 54 | 14881,github,"UBDIRS)
 55 | DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
 56 | am__relativize = \
 57 |   dir0=`pwd`; \
 58 |   sed_first='s,^\([^/]*\)/.*$$,\1,'; \
 59 |   sed_rest='s,^[^/]*/*,,'; \
 60 |   sed_last='s,^.*/\([^/]*\)$$,\1,'; \
 61 |   sed_butlast='s,/*[^/]*$$,,'; \
 62 |   while test -n ""$$dir1""; do \
 63 |     first=`echo ""$$dir1"" | sed -e ""$$sed_first""`; \
 64 |     if test ""$$first""!= """
 65 | 2980,github," :ios, '9.0'
 66 | 
 67 | # CocoaPods analytics sends network stats synchronously affecting flutter build latency.
 68 | ENV['COCOAPODS_DISABLE_STATS'] = 'true'
 69 | 
 70 | def parse_KV_file(file, separator='=')
 71 |   file_abs_path = File.expand_path(file)
 72 |   if!File.exists? file_abs_path
 73 |     return [];
 74 |   end
 75 |   pods_ary = []
 76 |   skip_line_start_symbols = [""#"", ""/""]
 77 |   File.foreach(file_abs_path) { |line|
 78 |       next if skip_line_start_symbols.any? { |symbol| line =~ /^\s*#{symbol}/ }
 79 |       plugin = line.split(pattern="
 80 | 3303,github," {
 81 |   background-color: #2aabd2;
 82 |   background-image: none;
 83 | }
 84 | .btn-warning {
 85 |   background-image: -webkit-linear-gradient(top, #f0ad4e 0%, #eb9316 100%);
 86 |   background-image:      -o-linear-gradient(top, #f0ad4e 0%, #eb9316 100%);
 87 |   background-image: -webkit-gradient(linear, left top, left bottom, from(#f0ad4e), to(#eb9316));
 88 |   background-image:         linear-gradient(to bottom, #f0ad4e 0%, #eb9316 100%);
 89 |   filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff0ad4e', endColorstr='#ffeb9316', Grad"
 90 | 9428,github,"       HapiLogFactory.GetHapiLog(GetType()).Error(message, e); 
 91 | 	        throw new System.Exception(message);
 92 | 	    } 
 93 | 	    return reps; 
 94 | 	}
 95 | 	} 
 96 | 
 97 | 	/** 
 98 | 	 * Enumerate over the UAC results 
 99 | 	 */ 
100 | 	public IEnumerable<UAC> UACs 
101 | 	{ 
102 | 		get
103 | 		{
104 | 			for (int rep = 0; rep < UACRepetitionsUsed; rep++)
105 | 			{
106 | 				yield return (UAC)this.GetStructure(""UAC"", rep);
107 | 			}
108 | 		}
109 | 	}
110 | 
111 | 	///<summary>
112 | 	///Adds a new UAC
113 | 	///</summary>
114 | 	public UAC"
115 | 14262,github,"DIRNAME([""$file""])`
116 |     AS_MKDIR_P([$dirpart/$fdir])
117 |     # echo ""creating $dirpart/$file""
118 |     echo '# dummy' > ""$dirpart/$file""
119 |   done
120 | done
121 | ])# _AM_OUTPUT_DEPENDENCY_COMMANDS
122 | 
123 | 
124 | # AM_OUTPUT_DEPENDENCY_COMMANDS
125 | # -----------------------------
126 | # This macro should only be invoked once -- use via AC_REQUIRE.
127 | #
128 | # This code is only required when automatic dependency tracking
129 | # is enabled.  FIXME.  This creates each `.P' file that we will
130 | # need in order to bootstrap the dependency handling code.
131 | AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
132 | [AC_CONFIG_COMMANDS([depfiles],
133 |  "
134 | 


--------------------------------------------------------------------------------
/data/domain_main/github_8_3.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 8938,github,"
  3 | 	use(unsafe.Pointer(_p0))
  4 | 	if e1!= 0 {
  5 | 		err = errnoErr(e1)
  6 | 	}
  7 | 	return
  8 | }
  9 | 
 10 | // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
 11 | 
 12 | func Umask(newmask int) (oldmask int) {
 13 | 	r0, _, _ := Syscall(SYS_UMASK, uintptr(newmask), 0, 0)
 14 | 	oldmask = int(r0)
 15 | 	return
 16 | }
 17 | 
 18 | // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
 19 | 
 20 | func Unlink(path string) (err error) {
 21 | 	var _p0 *byte
 22 | 	_p0, err = BytePtrFromString(path)
 23 | 	if err!= nil {
 24 | 		return
 25 | 	}
 26 | 	_, _,"
 27 | 2200,github,"
 28 | # Set the SUBGROUPING tag to YES (the default) to allow class member groups of
 29 | # the same type (for instance a group of public functions) to be put as a
 30 | # subgroup of that type (e.g. under the Public Functions section). Set it to
 31 | # NO to prevent subgrouping. Alternatively, this can be done per class using
 32 | # the \nosubgrouping command.
 33 | 
 34 | SUBGROUPING            = YES
 35 | 
 36 | # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
 37 | # is documented as struct, union, or enum with the name of the typedef. So
 38 | # typedef struct TypeS {} TypeT, will appear in the documentation as a struct
 39 | # with name TypeT. When disabled the typedef will appear as a member of a file,
 40 | # namespace, or class"
 41 | 6369,github,"_DATA)
 42 | all: all-am
 43 | 
 44 | .SUFFIXES:
 45 | $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
 46 | 	@for dep in $?; do \
 47 | 	  case '$(am__configure_deps)' in \
 48 | 	    *$$dep*) \
 49 | 	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
 50 | 	        && { if test -f $@; then exit 0; else break; fi; }; \
 51 | 	      exit 1;; \
 52 | 	  esac; \
 53 | 	done; \
 54 | 	echo'cd $(top_srcdir) && $(AUTOMAKE) --"
 55 | 11854,github,"      # Unixware is an offshoot of SVR4, but it has its own version
 56 |         # number series starting with 2...
 57 |         # I am not positive that other SVR4 systems won't match this,
 58 | 	# I just have to hope.  -- rms.
 59 |         # Use sysv4.2uw... so that sysv4* matches it.
 60 | 	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
 61 | 	exit 0 ;;
 62 |     i*86:OS/2:*:*)
 63 | 	# If we were able to find `uname', then EMX Unix compatibility
 64 | 	# is probably installed.
 65 | 	echo ${UNAME_MACHINE}-pc-os2-emx
 66 | 	exit 0 ;;
 67 |  "
 68 | 11348,github,".0</dd></dl>
 69 | </li>
 70 | </ul>
 71 | </div>
 72 | <div class=""summary"">
 73 | <ul class=""blockList"">
 74 | <li class=""blockList"">
 75 | <!-- ========== METHOD SUMMARY =========== -->
 76 | <ul class=""blockList"">
 77 | <li class=""blockList""><a name=""method_summary"">
 78 | <!--   -->
 79 | </a>
 80 | <h3>Method Summary</h3>
 81 | <table class=""overviewSummary"" border=""0"" cellpadding=""3"" cellspacing=""0"" summary=""Method Summary table, listing methods, and an explanation"">
 82 | <caption><span>Methods</span><span class=""tabEnd"">&nbsp;</span></caption>
 83 | <tr>
 84 | <th class=""colFirst"" scope=""col"">Modifier and Type</th>
 85 | <th class=""colLast"" scope=""col"">"
 86 | 3415,github," Copyright (C) 2020 PX4 Development Team. All rights reserved.
 87 |  *
 88 |  * Redistribution and use in source and binary forms, with or without
 89 |  * modification, are permitted provided that the following conditions
 90 |  * are met:
 91 |  *
 92 |  * 1. Redistributions of source code must retain the above copyright
 93 |  *    notice, this list of conditions and the following disclaimer.
 94 |  * 2. Redistributions in binary form must reproduce the above copyright
 95 |  *    notice, this list of conditions and the following disclaimer in
 96 |  *    the documentation and/or other materials provided with the
 97 |  *    distribution.
 98 |  * 3. Neither the name PX4 nor the names of its contributors may be
 99 |  *    used to endorse or promote products derived from this software
100 |  *    without specific prior written permission.
101 |  *
102 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND"
103 | 5101,github,"[256] = {
104 |     0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
105 |     0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
106 |     0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
107 |     0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
108 |     0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
109 |     0x1adad47d, 0x6ddde4eb, 0xf"
110 | 2018,github,"// mksyscall.pl -l32 -tags linux,386 syscall_linux.go syscall_linux_386.go
111 | // Code generated by the command above; see README.md. DO NOT EDIT.
112 | 
113 | // +build linux,386
114 | 
115 | package unix
116 | 
117 | import (
118 | 	""syscall""
119 | 	""unsafe""
120 | )
121 | 
122 | var _ syscall.Errno
123 | 
124 | // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
125 | 
126 | func fchmodat(dirfd int, path string, mode uint32) (err error) {
127 | 	var _p0 *byte
128 | 	_p0, err = BytePtrFromString(path)
129 | 	if err!= nil {
130 | 		return
131 | 	}
132 | 	_, _, e1 := Syscall(SYS_FCHMODAT, uintptr(dirfd),"
133 | 


--------------------------------------------------------------------------------
/data/domain_main/github_8_4.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 14163,github,"_uint64
  3 | (
  4 |     GrB_Matrix C,
  5 |     const GrB_Matrix M,
  6 |     const bool Mask_struct,
  7 |     const GrB_Matrix A,
  8 |     const GrB_Matrix B,
  9 |     const bool Ch_is_Mh,
 10 |     const int64_t *GB_RESTRICT C_to_M,
 11 |     const int64_t *GB_RESTRICT C_to_A,
 12 |     const int64_t *GB_RESTRICT C_to_B,
 13 |     const GB_task_struct *GB_RESTRICT TaskList,
 14 |     const int ntasks,
 15 |     const int nthreads
 16 | )
 17 | { 
 18 |     #if GB_DISABLE
 19 |     return (GrB_NO_VALUE)"
 20 | 12113,github,"//
 21 | // Copyright (c) 2013 Mikko Mononen memon@inside.org
 22 | //
 23 | // This software is provided 'as-is', without any express or implied
 24 | // warranty.  In no event will the authors be held liable for any damages
 25 | // arising from the use of this software.
 26 | // Permission is granted to anyone to use this software for any purpose,
 27 | // including commercial applications, and to alter it and redistribute it
 28 | // freely, subject to the following restrictions:
 29 | // 1. The origin of this software must not be misrepresented; you must not
 30 | //    claim that you wrote the original software. If you use this software
 31 | //    in a product, an acknowledgment in the product documentation would be
 32 | //    appreciated but is not required.
 33 | // 2. Altered source versions must be plainly marked as such, and must not be
 34 | //    misrepresented as being the original software.
 35 | "
 36 | 7058,github,""" accesskey=""p"" rel=""prev""><span class=""fa fa-arrow-circle-left""></span> Previous</a>
 37 |       
 38 |     </div>
 39 |   
 40 | 
 41 |   <hr/>
 42 | 
 43 |   <div role=""contentinfo"">
 44 |     <p>
 45 |         
 46 |         &copy; Copyright 2020, Pierre-Alain Chaumeil, Aaron Mussig, and Donovan Parks
 47 | 
 48 |     </p>
 49 |   </div>
 50 |     
 51 |     
 52 |     
 53 |     Built with <a href=""http://sphinx-doc.org/"">Sphinx</a> using a
 54 |     
 55 |     <a href=""https://github.com/rtfd/sphinx_rtd_theme"">theme</a>
 56 |     "
 57 | 2957,github,"HighlightRules;
 58 | });
 59 | 
 60 | define(""ace/mode/folding/coffee"",[""require"",""exports"",""module"",""ace/lib/oop"",""ace/mode/folding/fold_mode"",""ace/range""], function(require, exports, module) {
 61 | ""use strict"";
 62 | 
 63 | var oop = require(""../../lib/oop"");
 64 | var BaseFoldMode = require(""./fold_mode"").FoldMode;
 65 | var Range = require(""../../range"").Range;
 66 | 
 67 | var FoldMode = exports.FoldMode = function() {};
 68 | oop.inherits(FoldMode, BaseFoldMode);
 69 | 
 70 | (function() {
 71 | 
 72 |     this.getFoldWidgetRange = function(session, foldStyle, row) {
 73 |         var range = this.indentationBlock(session, row);
 74 |         if ("
 75 | 915,github,"[iNdEx]
 76 | 			iNdEx++
 77 | 			wire |= (uint64(b) & 0x7F) << shift
 78 | 			if b < 0x80 {
 79 | 				break
 80 | 			}
 81 | 		}
 82 | 		fieldNum := int32(wire >> 3)
 83 | 		wireType := int(wire & 0x7)
 84 | 		if wireType == 4 {
 85 | 			return fmt.Errorf(""proto: JobCondition: wiretype end group for non-group"")
 86 | 		}
 87 | 		if fieldNum <= 0 {
 88 | 			return fmt.Errorf(""proto: JobCondition: illegal tag %d (wire type %d)"", fieldNum, wire)
 89 | 		}
 90 | 		switch fieldNum {
 91 | 		case 1:
 92 | 			if wireType!= 2 {
 93 | 				return"
 94 | 13828,github,"1.c
 95 |  *
 96 |  *
 97 |  * --------------------------------------------------------------------------
 98 |  *
 99 |  *      Pthreads-win32 - POSIX Threads Library for Win32
100 |  *      Copyright(C) 1998 John E. Bossom
101 |  *      Copyright(C) 1999,2005 Pthreads-win32 contributors
102 |  * 
103 |  *      Contact Email: rpj@callisto.canberra.edu.au
104 |  * 
105 |  *      The current list of contributors is contained
106 |  *      in the file CONTRIBUTORS included with the source
107 |  *      code distribution. The list can also be seen at the
108 |  *      following World Wide Web location:
109 |  *      http://sources.redhat.com/pthread"
110 | 6049,github,"dEx]
111 | 			iNdEx++
112 | 			wire |= (uint64(b) & 0x7F) << shift
113 | 			if b < 0x80 {
114 | 				break
115 | 			}
116 | 		}
117 | 		fieldNum := int32(wire >> 3)
118 | 		wireType := int(wire & 0x7)
119 | 		if wireType == 4 {
120 | 			return fmt.Errorf(""proto: CronJobStatus: wiretype end group for non-group"")
121 | 		}
122 | 		if fieldNum <= 0 {
123 | 			return fmt.Errorf(""proto: CronJobStatus: illegal tag %d (wire type %d)"", fieldNum, wire)
124 | 		}
125 | 		switch fieldNum {
126 | 		case 1:
127 | 			if wireType!= 2 {
128 | 				return fmt"
129 | 14137,github,"=="",
130 |           ""dev"": true
131 |         },
132 |         ""glob"": {
133 |           ""version"": ""7.1.2"",
134 |           ""resolved"": ""https://registry.npmjs.org/glob/-/glob-7.1.2.tgz"",
135 |           ""integrity"": ""sha512-MJTUg1kjuLeQCJ+ccE4Vpa6kKVXkPYJ2mOCQyUuKLcLQsdrMCpBPUi8qVE6+YuaJkozeA9NusTAw3hLr8Xe5EQ=="",
136 |         "
137 | 


--------------------------------------------------------------------------------
/data/domain_main/license_8_0.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 6876,license,"<?xml version=""1.0"" encoding=""ISO-8859-1""?>
  3 | <!--
  4 |   ~  Copyright (c) 2005-2010, WSO2 Inc. (http://wso2.com) All Rights Reserved.
  5 |   ~
  6 |   ~  WSO2 Inc. licenses this file to you under the Apache License,
  7 |   ~  Version 2.0 (the ""License""); you may not use this file except
  8 |   ~  in compliance with the License.
  9 |   ~  You may obtain a copy of the License at
 10 |   ~
 11 |   ~    http://www.apache.org/licenses/LICENSE-2.0
 12 |   ~
 13 |   ~  Unless required by applicable law or agreed to in writing,
 14 |   ~  software distributed under the License is distributed on an
 15 |   ~  ""AS IS"" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 16 |   ~  KIND"
 17 | 4954,license,")
 18 | 
 19 | % @=============================================================================
 20 | % This function is part of the Brainstorm software:
 21 | % https://neuroimage.usc.edu/brainstorm
 22 | % 
 23 | % Copyright (c)2000-2020 University of Southern California & McGill University
 24 | % This software is distributed under the terms of the GNU General Public License
 25 | % as published by the Free Software Foundation. Further details on the GPLv3
 26 | % license can be found at http://www.gnu.org/copyleft/gpl.html.
 27 | % 
 28 | % FOR RESEARCH PURPOSES ONLY. THE SOFTWARE IS PROVIDED ""AS IS,"" AND THE
 29 | % UNIVERSITY OF SOUTHERN CALIFORNIA AND ITS COLLABORATORS DO NOT MAKE ANY
 30 | % WARRANTY, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO WARRANTIES OF
 31 | % MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, NOR DO THEY"
 32 | 381,license," Protocol header file.
 33 | *
 34 | * Copyright (c) 2003 by Marc Boucher, Services Informatiques (MBSI) inc.
 35 | * portions Copyright (c) 1997 Global Election Systems Inc.
 36 | *
 37 | * The authors hereby grant permission to use, copy, modify, distribute,
 38 | * and license this software and its documentation for any purpose, provided
 39 | * that existing copyright notices are retained in all copies and that this
 40 | * notice and the following disclaimer are included verbatim in any 
 41 | * distributions. No written agreement, license, or royalty fee is required
 42 | * for any of the authorized uses.
 43 | *
 44 | * THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS *AS IS* AND ANY EXPRESS OR
 45 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 46 | * OF MERCHANTABILITY AND FITNESS FOR A PART"
 47 | 1312,license,"/*
 48 | * //******************************************************************
 49 | * //
 50 | * // Copyright 2015 Intel Corporation.
 51 | * //
 52 | * //-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 53 | * //
 54 | * // Licensed under the Apache License, Version 2.0 (the ""License"");
 55 | * // you may not use this file except in compliance with the License.
 56 | * // You may obtain a copy of the License at
 57 | * //
 58 | * //      http://www.apache.org/licenses/LICENSE-2.0
 59 | * //
 60 | * // Unless required by applicable law or agreed to in writing, software
 61 | * // distributed under the License is distributed on an ""AS IS"" BASIS,
 62 | * // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 63 | * // See the License for the specific language governing permissions and
 64 | * // limitations under the License.
 65 | * //
 66 | * //-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-"
 67 | 8053,license,"/*
 68 |  * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
 69 |  *
 70 |  * This program is free software; you can redistribute it and/or modify it under
 71 |  * the terms of the GNU General Public License, version 2.0, as published by the
 72 |  * Free Software Foundation.
 73 |  *
 74 |  * This program is also distributed with certain software (including but not
 75 |  * limited to OpenSSL) that is licensed under separate terms, as designated in a
 76 |  * particular file or component or in included license documentation. The
 77 |  * authors of MySQL hereby grant you an additional permission to link the
 78 |  * program and your derivative works with the separately licensed software that
 79 |  * they have included with MySQL.
 80 |  *
 81 |  * Without limiting anything contained in the foregoing, this file, which is
 82 |  * part of MySQL Connector/J, is also subject to the Universal FOSS Exception,
 83 |  * version 1.0, a copy of which can"
 84 | 5349,license,"
 85 | /*
 86 | Copyright Â© 2001-2004 World Wide Web Consortium, 
 87 | (Massachusetts Institute of Technology, European Research Consortium 
 88 | for Informatics and Mathematics, Keio University). All 
 89 | Rights Reserved. This work is distributed under the W3CÂ® Software License [1] in the 
 90 | hope that it will be useful, but WITHOUT ANY WARRANTY; without even 
 91 | the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 92 | 
 93 | [1] http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
 94 | */
 95 | 
 96 | 
 97 | 
 98 |    /**
 99 |     *  Gets URI that identifies the test.
100 |     *  @return uri identifier of test
101 |     */
102 | function getTargetURI() {
103 |       return ""http://www.w3.org/2001/DOM"
104 | 3595,license,"
105 | # 
106 | #  Permission is hereby granted, free of charge, to any person obtaining a copy
107 | #  of this software and associated documentation files (the ""Software""), to deal
108 | #  in the Software without restriction, including without limitation the rights
109 | #  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
110 | #  copies of the Software, and to permit persons to whom the Software is
111 | #  furnished to do so, subject to the following conditions:
112 | # 
113 | #  The above copyright notice and this permission notice shall be included in
114 | #  all copies or substantial portions of the Software.
115 | # 
116 | #  THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
117 | #  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
118 | #  FITNESS FOR A PARTICULAR PURPOSE AND NON"
119 | 8845,license,"-2020, AdaCore                  --
120 | --                                                                          --
121 | -- This is free software;  you can redistribute it  and/or modify it  under --
122 | -- terms of the  GNU General Public License as published  by the Free Soft- --
123 | -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
124 | -- sion.  This software is distributed in the hope  that it will be useful, --
125 | -- but WITHOUT ANY WARRANTY;  without even the implied warranty of MER"
126 | 


--------------------------------------------------------------------------------
/data/domain_main/license_8_1.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 6577,license," CEA LIST. All Rights Reserved.
  3 | #    Contributor(s): Cingulata team (formerly Armadillo team)
  4 | #
  5 | #    This software is governed by the CeCILL-C license under French law and
  6 | #    abiding by the rules of distribution of free software.  You can  use,
  7 | #    modify and/ or redistribute the software under the terms of the CeCILL-C
  8 | #    license as circulated by CEA, CNRS and INRIA at the following URL
  9 | #    ""http://www.cecill.info"".
 10 | #
 11 | #    As a counterpart to the access to the source code and  rights to copy,
 12 | #    modify and redistribute granted by the license, users are provided only
 13 | #    with a limited warranty  and the software's author,  the holder of the
 14 | #  "
 15 | 3068,license,"
 16 | // Copyright (c) 2013, ClearCanvas Inc.
 17 | // All rights reserved.
 18 | // http://www.clearcanvas.ca
 19 | //
 20 | // This file is part of the ClearCanvas RIS/PACS open source project.
 21 | //
 22 | // The ClearCanvas RIS/PACS open source project is free software: you can
 23 | // redistribute it and/or modify it under the terms of the GNU General Public
 24 | // License as published by the Free Software Foundation, either version 3 of the
 25 | // License, or (at your option) any later version.
 26 | //
 27 | // The ClearCanvas RIS/PACS open source project is distributed in the hope that it
 28 | // will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
 29 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 30 | //"
 31 | 8175,license,"-left: 0.5em;"">
 32 |    <div id=""projectname""><a href=""https://taskflow.github.io/"">Taskflow</a>
 33 |    &#160;<span id=""projectnumber"">2.7.0-master-branch</span>
 34 |    </div>
 35 |   </td>
 36 |  </tr>
 37 |  </tbody>
 38 | </table>
 39 | </div>
 40 | <!-- end header part -->
 41 | <!-- Generated by Doxygen 1.8.14 -->
 42 | <script type=""text/javascript"">
 43 | /* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
 44 | var searchBox = new SearchBox(""searchBox"", ""search"",false,'Search');
 45 | /*"
 46 | 4722,license,"/*
 47 |  * Copyright (C) 2015-2016 Federico Tomassetti
 48 |  * Copyright (C) 2017-2019 The JavaParser Team.
 49 |  *
 50 |  * This file is part of JavaParser.
 51 |  *
 52 |  * JavaParser can be used either under the terms of
 53 |  * a) the GNU Lesser General Public License as published by
 54 |  *     the Free Software Foundation, either version 3 of the License, or
 55 |  *     (at your option) any later version.
 56 |  * b) the terms of the Apache License
 57 |  *
 58 |  * You should have received a copy of both licenses in LICENCE.LGPL and
 59 |  * LICENCE.APACHE. Please refer to those files for details.
 60 |  *
 61 |  * JavaParser is distributed in the hope that it will be useful,
 62 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 63 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. "
 64 | 3965,license,"/* JNativeHook: Global keyboard and mouse listeners for Java.
 65 |  * Copyright (C) 2006-2020 Alexander Barker.  All Rights Received.
 66 |  * https://github.com/kwhat/jnativehook/
 67 |  *
 68 |  * JNativeHook is free software: you can redistribute it and/or modify
 69 |  * it under the terms of the GNU Lesser General Public License as published
 70 |  * by the Free Software Foundation, either version 3 of the License, or
 71 |  * (at your option) any later version.
 72 |  *
 73 |  * JNativeHook is distributed in the hope that it will be useful,
 74 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 75 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 76 |  * GNU General Public License for more details.
 77 |  *
 78 |  * You should have received a copy of the GNU Lesser General Public License
 79 |  * along with this program. "
 80 | 3721,license,"
 81 | %
 82 | % *** This function is part of ERPLAB Toolbox ***
 83 | % Author: Javier Lopez-Calderon & Steven Luck
 84 | % Center for Mind and Brain
 85 | % University of California, Davis,
 86 | % Davis, CA
 87 | % 2009
 88 | 
 89 | %b8d3721ed219e65100184c6b95db209bb8d3721ed219e65100184c6b95db209b
 90 | %
 91 | % ERPLAB Toolbox
 92 | % Copyright © 2007 The Regents of the University of California
 93 | % Created by Javier Lopez-Calderon and Steven Luck
 94 | % Center for Mind and Brain, University of California, Davis,
 95 | % javlopez@ucdavis.edu, sjluck@ucdavis.edu
 96 | %
 97 | % This program is free software: you can redistribute it and/or modify
 98 | % it under the terms of the GNU General Public License as published by
 99 | % the Free Software"
100 | 9433,license,"#region Copyright (C) 2005-2020 Team MediaPortal
101 | 
102 | // Copyright (C) 2005-2020 Team MediaPortal
103 | // http://www.team-mediaportal.com
104 | // 
105 | // MediaPortal is free software: you can redistribute it and/or modify
106 | // it under the terms of the GNU General Public License as published by
107 | // the Free Software Foundation, either version 2 of the License, or
108 | // (at your option) any later version.
109 | // 
110 | // MediaPortal is distributed in the hope that it will be useful,
111 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
112 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
113 | // GNU General Public License for more details.
114 | // 
115 | // You should have received a copy of the GNU General Public License
116 | //"
117 | 10998,license,".2
118 |  
119 |  Disclaimer: IMPORTANT:  This Apple software is supplied to you by Apple Inc.
120 |  (""Apple"") in consideration of your agreement to the following terms, and your
121 |  use, installation, modification or redistribution of this Apple software
122 |  constitutes acceptance of these terms.  If you do not agree with these terms,
123 |  please do not use, install, modify or redistribute this Apple software.
124 |  
125 |  In consideration of your agreement to abide by the following terms, and subject
126 |  to these terms, Apple grants you a personal, non-exclusive license, under
127 |  Apple's copyrights in this original Apple software (the ""Apple Software""), to
128 |  use, reproduce, modify and redistribute the Apple Software, with or without
129 |  modifications, in source and/or binary forms; provided that if you redistribute
130 |  the Apple Software in its entirety and without modifications, you must retain
131 |  this notice and the following text and disclaimers in all such redistributions
132 | "
133 | 


--------------------------------------------------------------------------------
/data/domain_main/license_8_2.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 594,license,"      Contributor(s):
  3 |       If you wish your version of this file to be governed by only the CDDL or
  4 |       only the GPL Version 2, indicate your decision by adding ""[Contributor]
  5 |       elects to include this software in this distribution under the [CDDL or GPL
  6 |       Version 2] license.""  If you don't indicate a single choice of license, a
  7 |       recipient has the option to distribute your version of this file under
  8 |       either the CDDL, the GPL Version 2 or to extend the choice of license to
  9 |       its licensees as provided above.  However, if you add GPL Version 2 code
 10 |       and therefore, elected the GPL Version 2 license, then the option applies
 11 |       only if the new code is made subject to such"
 12 | 14879,license,"/**
 13 |  *  This file is part of dvo.
 14 |  *
 15 |  *  Copyright 2012 Christian Kerl <christian.kerl@in.tum.de> (Technical University of Munich)
 16 |  *  For more information see <http://vision.in.tum.de/data/software/dvo>.
 17 |  *
 18 |  *  dvo is free software: you can redistribute it and/or modify
 19 |  *  it under the terms of the GNU General Public License as published by
 20 |  *  the Free Software Foundation, either version 3 of the License, or
 21 |  *  (at your option) any later version.
 22 |  *
 23 |  *  dvo is distributed in the hope that it will be useful,
 24 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 25 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 26 |  *  GNU General Public License for more details.
 27 |  *"
 28 | 10300,license,"// Copyright © 2009 Travis Robinson. All rights reserved.
 29 | // 
 30 | // website: http://sourceforge.net/projects/libusbdotnet
 31 | // e-mail:  trobinso@users.sourceforge.net
 32 | // 
 33 | // This program is free software; you can redistribute it and/or modify it
 34 | // under the terms of the GNU General Public License as published by the
 35 | // Free Software Foundation; either version 2 of the License, or 
 36 | // (at your option) any later version.
 37 | // 
 38 | // This program is distributed in the hope that it will be useful, but 
 39 | // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 40 | // or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 41 | // for more details.
 42 | // 
 43 | // You should have received a copy of the GNU General Public License along
 44 | // with this program; if"
 45 | 3357,license," this
 46 |      software without specific prior written permission.
 47 | 
 48 |   4. This software, with or without modification, must only be used with a
 49 |      Nordic Semiconductor ASA integrated circuit.
 50 | 
 51 |   5. Any software provided in binary form under this license must not be reverse
 52 |      engineered, decompiled, modified and/or disassembled.
 53 | 
 54 |   THIS SOFTWARE IS PROVIDED BY NORDIC SEMICONDUCTOR ASA ""AS IS"" AND ANY EXPRESS
 55 |   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 56 |   OF MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE ARE
 57 |   DISCLAIMED. IN NO EVENT SHALL NORDIC SEMICONDUCTOR ASA OR CONTRIBUTORS BE
 58 |   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR"
 59 | 684,license,"#region Disclaimer / License
 60 | // Copyright (C) 2015, The Duplicati Team
 61 | // http://www.duplicati.com, info@duplicati.com
 62 | // 
 63 | // This library is free software; you can redistribute it and/or
 64 | // modify it under the terms of the GNU Lesser General Public
 65 | // License as published by the Free Software Foundation; either
 66 | // version 2.1 of the License, or (at your option) any later version.
 67 | // 
 68 | // This library is distributed in the hope that it will be useful,
 69 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 70 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 71 | // Lesser General Public License for more details.
 72 | // 
 73 | // You should have received a copy of the GNU Lesser"
 74 | 7827,license," this License would be to
 75 | refrain entirely from distribution of the Program.
 76 | 
 77 | If any portion of this section is held invalid or unenforceable under
 78 | any particular circumstance, the balance of the section is intended to
 79 | apply and the section as a whole is intended to apply in other
 80 | circumstances.
 81 | 
 82 | It is not the purpose of this section to induce you to infringe any
 83 | patents or other property right claims or to contest validity of any
 84 | such claims; this section has the sole purpose of protecting the
 85 | integrity of the free software distribution system, which is
 86 | implemented by public license practices.  Many people have made
 87 | generous contributions to the wide range of software distributed
 88 | through that system in reliance on consistent application of that
 89 | system; it is up to the author/donor to decide if he or she is willing
 90 | to distribute software through any other"
 91 | 14627,license," the initiation of patent
 92 |      infringement litigation, then the reasonable value of the licenses
 93 |      granted by such Participant under Sections 2.1 or 2.2 shall be taken
 94 |      into account in determining the amount or value of any payment or
 95 |      license.
 96 | 
 97 |      8.4.  In the event of termination under Sections 8.1 or 8.2 above,
 98 |      all end user license agreements (excluding distributors and resellers)
 99 |      which have been validly granted by You or any distributor hereunder
100 |      prior to termination shall survive termination.
101 | 
102 | 9. LIMITATION OF LIABILITY.
103 | 
104 |      UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
105 |      (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU,"
106 | 454,license,"<?xml version=""1.0"" encoding=""UTF-8""?>
107 | <!--
108 |     Licensed to the Apache Software Foundation (ASF) under one or more
109 |     contributor license agreements.  See the NOTICE file distributed with
110 |     this work for additional information regarding copyright ownership.
111 |     The ASF licenses this file to You under the Apache License, Version 2.0
112 |     (the ""License""); you may not use this file except in compliance with
113 |     the License.  You may obtain a copy of the License at
114 | 
115 |        http://www.apache.org/licenses/LICENSE-2.0
116 | 
117 |     Unless required by applicable law or agreed to in writing, software
118 |     distributed under the License is distributed on an ""AS IS"" BASIS,
119 |     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or"
120 | 


--------------------------------------------------------------------------------
/data/domain_main/license_8_3.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 1388,license,"/*
  3 |  *   fs/cifs/smb2inode.c
  4 |  *
  5 |  *   Copyright (C) International Business Machines  Corp., 2002, 2011
  6 |  *                 Etersoft, 2012
  7 |  *   Author(s): Pavel Shilovsky (pshilovsky@samba.org),
  8 |  *              Steve French (sfrench@us.ibm.com)
  9 |  *
 10 |  *   This library is free software; you can redistribute it and/or modify
 11 |  *   it under the terms of the GNU Lesser General Public License as published
 12 |  *   by the Free Software Foundation; either version 2.1 of the License, or
 13 |  *   (at your option) any later version.
 14 |  *
 15 |  *   This library is distributed in the hope that it will be"
 16 | 10906,license,"#pragma once
 17 | 
 18 | /*
 19 |  *  Copyright(c) 2018 Jeremiah van Oosten
 20 |  *
 21 |  *  Permission is hereby granted, free of charge, to any person obtaining a copy
 22 |  *  of this software and associated documentation files(the ""Software""), to deal
 23 |  *  in the Software without restriction, including without limitation the rights
 24 |  *  to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
 25 |  *  copies of the Software, and to permit persons to whom the Software is
 26 |  *  furnished to do so, subject to the following conditions :
 27 |  *
 28 |  *  The above copyright notice and this permission notice shall be included in
 29 |  *  all copies or substantial portions of the Software.
 30 |  *
 31 |  *  THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 32 |  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCH"
 33 | 9912,license,"_01
 34 | //
 35 | // Generative Gestaltung – Creative Coding im Web
 36 | // ISBN: 978-3-87439-902-9, First Edition, Hermann Schmidt, Mainz, 2018
 37 | // Benedikt Groß, Hartmut Bohnacker, Julia Laub, Claudius Lazzeroni
 38 | // with contributions by Joey Lee and Niels Poldervaart
 39 | // Copyright 2018
 40 | //
 41 | // http://www.generative-gestaltung.de
 42 | //
 43 | // Licensed under the Apache License, Version 2.0 (the ""License"");
 44 | // you may not use this file except in compliance with the License.
 45 | // You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
 46 | // Unless required by applicable law or agreed to in writing, software
 47 | // distributed under the License is distributed on an ""AS IS"" BASIS,
 48 | // WITHOUT"
 49 | 14759,license,"# This file is part of Androguard.
 50 | #
 51 | # Copyright (C) 2010, Anthony Desnos <desnos at t0t0.org>
 52 | # All rights reserved.
 53 | #
 54 | # Androguard is free software: you can redistribute it and/or modify
 55 | # it under the terms of the GNU Lesser General Public License as published by
 56 | # the Free Software Foundation, either version 3 of the License, or
 57 | # (at your option) any later version.
 58 | #
 59 | # Androguard is distributed in the hope that it will be useful,
 60 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 61 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 62 | # GNU Lesser General Public License for more details.
 63 | #
 64 | # You should have received a copy of the GNU Lesser General Public License
 65 | # along with Androguard.  If"
 66 | 8305,license,"
 67 |  * Copyright 2010 VMware, Inc.
 68 |  * All Rights Reserved.
 69 |  * 
 70 |  * Permission is hereby granted, free of charge, to any person obtaining a
 71 |  * copy of this software and associated documentation files (the
 72 |  * ""Software""), to deal in the Software without restriction, including
 73 |  * without limitation the rights to use, copy, modify, merge, publish,
 74 |  * distribute, sub license, and/or sell copies of the Software, and to
 75 |  * permit persons to whom the Software is furnished to do so, subject to
 76 |  * the following conditions:
 77 |  * 
 78 |  * The above copyright notice and this permission notice (including the
 79 |  * next paragraph) shall be included in all copies or substantial portions
 80 |  * of the Software.
 81 |  * 
 82 |  * THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 83 |  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 84 |  * MERCHANTA"
 85 | 9401,license,"# Copyright (C) Dnspython Contributors, see LICENSE for text of ISC license
 86 | 
 87 | # Copyright (C) 2003-2007, 2009-2011 Nominum, Inc.
 88 | #
 89 | # Permission to use, copy, modify, and distribute this software and its
 90 | # documentation for any purpose with or without fee is hereby granted,
 91 | # provided that the above copyright notice and this permission notice
 92 | # appear in all copies.
 93 | #
 94 | # THE SOFTWARE IS PROVIDED ""AS IS"" AND NOMINUM DISCLAIMS ALL WARRANTIES
 95 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 96 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
 97 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 98 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS"
 99 | 7186,license,"xamarin.com)
100 | //
101 | // Permission is hereby granted, free of charge, to any person obtaining a copy
102 | // of this software and associated documentation files (the ""Software""), to deal
103 | // in the Software without restriction, including without limitation the rights
104 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
105 | // copies of the Software, and to permit persons to whom the Software is
106 | // furnished to do so, subject to the following conditions:
107 | //
108 | // The above copyright notice and this permission notice shall be included in
109 | // all copies or substantial portions of the Software.
110 | //
111 | // THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
112 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
113 | // FITNESS FOR A PART"
114 | 2894,license,"'' FreeBASIC binding for mingw-w64-v4.0.4
115 | ''
116 | '' based on the C header files:
117 | ''   This Software is provided under the Zope Public License (ZPL) Version 2.1.
118 | ''
119 | ''   Copyright (c) 2009, 2010 by the mingw-w64 project
120 | ''
121 | ''   See the AUTHORS file for the list of contributors to the mingw-w64 project.
122 | ''
123 | ''   This license has been certified as open source. It has also been designated
124 | ''   as GPL compatible by the Free Software Foundation (FSF).
125 | ''
126 | ''   Redistribution and use in source and binary forms, with or without
127 | ''   modification, are permitted provided that the following conditions are met:
128 | ''
129 | ''     1. Redistributions in source code must retain the accompanying copyright
130 | ''     "
131 | 


--------------------------------------------------------------------------------
/data/domain_main/pubmed_central_8_2.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 1885,pubmed_central," When submitting your revision, we need you to address these additional requirements.
  3 | 
  4 | Please ensure that your manuscript meets PLOS ONE\'s style requirements, including those for file naming. The PLOS ONE style templates can be found at
  5 | 
  6 | <http://www.journals.plos.org/plosone/s/file?id=wjVg/PLOSOne_formatting_sample_main_body.pdf> and <http://www.journals.plos.org/plosone/s/file?id=ba62/PLOSOne_formatting_sample_title_authors_affiliations.pdf>
  7 | 
  8 | 2\. We note that you have included the phrase ""data not shown"" in your manuscript. Unfortunately, this does not meet our data sharing requirements. PLOS does not permit references to inaccessible data. We require that authors provide all relevant data within the paper, Supporting Information files, or in an acceptable,"
  9 | 957,pubmed_central,"**Suggested citation:** EFSA (European Food Safety Authority), Arena M, Auteri D, Barmaz S, Bellisai G, Brancato A, Brocca D, Bura L, Byers H, Chiusolo A, Court Marques D, Crivellente F, De Lentdecker C, Egsmose M, Erdos Z, Fait G, Ferreira L, Goumenou M, Greco L, Ippolito A, Istace F, Jarrah S, Kardassi D, Leuschner R, Lythgo C, Magrans JO, Medina P, Miron I, Molnar T, Nougadere A, Padovani L, Parra Morte JM, Pedersen R, Reich H, Sacchi A, Santos M, Serafimova R, Sharp R, Stanek A, Streissl F, Sturma J, Szentes"
 10 | 6536,pubmed_central,".
 11 | 
 12 | Please prepare and submit your revised manuscript within 30 days. If you anticipate any delay, please let us know the expected resubmission date by replying to this email. 
 13 | 
 14 | When you are ready to resubmit, please upload the following:
 15 | 
 16 | \[1\] A letter containing a detailed list of your responses to all review comments, and a description of the changes you have made in the manuscript. 
 17 | 
 18 | Please note while forming your response, if your article is accepted, you may have the opportunity to make the peer review history publicly available. The record will include editor decision letters (with reviews) and your responses to reviewer comments. If eligible, we will contact you to opt in or out
 19 | 
 20 | \[2\] Two versions of the revised manuscript: one with either highlights or tracked changes denoting where the text has been changed; the other a clean version (uploaded as the manuscript file).
 21 | 
 22 | Important additional instructions are given below your"
 23 | 1190,pubmed_central,"
 24 | \*\*\*\*\*\*\*\*\*\*
 25 | 
 26 | 5\. Is the manuscript presented in an intelligible fashion and written in standard English?
 27 | 
 28 | PLOS ONE does not copyedit accepted manuscripts, so the language in submitted articles must be clear, correct, and unambiguous. Any typographical or grammatical errors should be corrected at revision, so please note any specific errors here.
 29 | 
 30 | Reviewer \#1: (No Response)
 31 | 
 32 | Reviewer \#2: Yes
 33 | 
 34 | \*\*\*\*\*\*\*\*\*\*
 35 | 
 36 | 6\. Review Comments to the Author
 37 | 
 38 | Please use the space provided to explain your answers to the questions above. You may also include additional comments for the author, including concerns about dual publication, research ethics, or publication ethics. (Please upload your review as an attachment if it exceeds 20,000 characters)
 39 | 
 40 | Reviewer \"
 41 | 8659,pubmed_central,"fordjournals.org/jnls/permissions/>.
 42 | 
 43 | **Advertising**
 44 | 
 45 | Advertising, inserts and artwork enquiries should be addressed to Advertising and Special Sales, Oxford Journals, Oxford University Press, Great Clarendon Street, Oxford OX2 6DP, UK. Tel: +44 (0)1865 354767; Email: <jnlsadvertising@oup.com>.
 46 | 
 47 | © Oxford University Press 2015
 48 | 
 49 | All rights reserved; no part of this publication may be reproduced, stored in a retrieval system, or transmitted in any form or by any means, electronic, mechanical, photocopying, recording, or otherwise without prior written permission of the Publishers, or a licence permitting restricted copying issued in the UK by the Copyright Licensing Agency Ltd, 90 Tottenham Court Road, London W1P 9HE, or in the USA by the Copyright Clearance Center, 222 Rosewood Drive, Danvers, MA 01923.
 50 | 
 51 | "
 52 | 9086,pubmed_central,"
 53 | 
 54 | Reviewer \#3: Yes
 55 | 
 56 | \*\*\*\*\*\*\*\*\*\*
 57 | 
 58 | 5\. Is the manuscript presented in an intelligible fashion and written in standard English?
 59 | 
 60 | PLOS ONE does not copyedit accepted manuscripts, so the language in submitted articles must be clear, correct, and unambiguous. Any typographical or grammatical errors should be corrected at revision, so please note any specific errors here.
 61 | 
 62 | Reviewer \#2: Yes
 63 | 
 64 | Reviewer \#3: Yes
 65 | 
 66 | \*\*\*\*\*\*\*\*\*\*
 67 | 
 68 | 6\. Review Comments to the Author
 69 | 
 70 | Please use the space provided to explain your answers to the questions above. You may also include additional comments for the author, including concerns about dual publication, research ethics, or publication ethics. (Please upload your review as an attachment if it exceeds 20,"
 71 | 4195,pubmed_central,"
 72 | 
 73 | \[Note: HTML markup is below. Please do not edit.\]
 74 | 
 75 | Reviewers\' comments:
 76 | 
 77 | Reviewer\'s Responses to Questions
 78 | 
 79 | **Comments to the Author**
 80 | 
 81 | 1\. If the authors have adequately addressed your comments raised in a previous round of review and you feel that this manuscript is now acceptable for publication, you may indicate that here to bypass the ""Comments to the Author"" section, enter your conflict of interest statement in the ""Confidential to Editor"" section, and submit your \""Accept\"" recommendation.
 82 | 
 83 | Reviewer \#1: (No Response)
 84 | 
 85 | Reviewer \#2: All comments have been addressed
 86 | 
 87 | Reviewer \#3: (No Response)
 88 | 
 89 | \*\*\*\*\*\*\*\*\*\*
 90 | 
 91 | 2\. Is the manuscript technically sound, and do the data support the conclusions?
 92 | 
 93 | The manuscript must describe a technically sound piece of scientific research"
 94 | 6043,pubmed_central,"
 95 | 
 96 | \*\*\*\*\*\*\*\*\*\*
 97 | 
 98 | 2\. Is the manuscript technically sound, and do the data support the conclusions?
 99 | 
100 | The manuscript must describe a technically sound piece of scientific research with data that supports the conclusions. Experiments must have been conducted rigorously, with appropriate controls, replication, and sample sizes. The conclusions must be drawn appropriately based on the data presented.
101 | 
102 | Reviewer \#1: Partly
103 | 
104 | \*\*\*\*\*\*\*\*\*\*
105 | 
106 | 3\. Has the statistical analysis been performed appropriately and rigorously?
107 | 
108 | Reviewer \#1: No
109 | 
110 | \*\*\*\*\*\*\*\*\*\*
111 | 
112 | 4\. Have the authors made all data underlying the findings in their manuscript fully available?
113 | 
114 | The [PLOS Data policy](http://www.pl"
115 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_1_0.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 1670,github,"</name>
 3 | 	<description></description>
 4 | 	<!-- TODO <organization> <name>company name</name> <url>company url</url> 
 5 | 		</organization> -->
 6 | 	<licenses>
 7 | 		<license>
 8 | 			<name>The Apache Software License, Version 2.0</name>
 9 | 			<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
10 | 			<distribution>repo</distribution>
11 | 		</license>
12 | 	</licenses>
13 | 	
14 | 	<dependencies>
15 | 		<!-- WICKET DEPENDENCIES -->
16 | 		<dependency>
17 | 			<groupId>org.apache.wicket</groupId>
18 | 			<artifactId>wicket-core</artifactId>
19 | 		"
20 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_1_1.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 7576,pile-cc,".
 3 | 
 4 | Cloud Expo, Inc. has announced today that Andi Mann returns to 'DevOps at Cloud Expo 2017' as Conference Chair
 5 | The @DevOpsSummit at Cloud Expo will take place on June 6-8, 2017, at the Javits Center in New York City, NY.
 6 | ""DevOps is set to be one of the most profound disruptions to hi...
 7 | 
 8 | 20th Cloud Expo, taking place June 6-8, 2017, at the Javits Center in New York City, NY, will feature technical sessions from a rock star conference faculty and the leading industry players in the world.
 9 | Cloud computing is now being embraced by a majority of enterprises of all sizes....
10 | 
11 | With major technology companies and startups seriously embracing IoT strategies, now is the perfect time to attend @ThingsExpo 2016 in New York. Learn what is going on, contribute to the discussions, and ensure that your enterprise is as ""IoT-Ready"" as it can be"
12 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_1_2.csv:
--------------------------------------------------------------------------------
1 | doc_id,corpus,text
2 | 7592,pile-cc,"...
3 | 
4 | You are responsible for reading, understanding and agreeing to the National Law Review's (NLR’s) and the National Law Forum LLC's Terms of Use and Privacy Policy before using the National Law Review website. The National Law Review is a free to use, no-log in database of legal and business articles. The content and links on www.NatLawReview.com are intended for general information purposes only. Any legal analysis, legislative updates or other content and links should not be construed as legal or professional advice or a substitute for such advice. No attorney-client or confidential relationship is formed by the transmission of information between you and the National Law Review website or any of the law firms, attorneys or other professionals or organizations who include content on the National Law Review website. If you require legal or professional advice, kindly contact an attorney or other suitable professional advisor.
5 | 
6 | Some states have laws and ethical rules regarding solicitation and advertisement practices by attorneys and/or other professionals. The"
7 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_1_3.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 11584,github,"/devel/api-conventions.md#spec-and-status
 3 |   // +optional
 4 |   optional CronJobStatus status = 3;
 5 | }
 6 | 
 7 | // CronJobList is a collection of cron jobs.
 8 | message CronJobList {
 9 |   // Standard list metadata.
10 |   // More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#metadata
11 |   // +optional
12 |   optional k8s.io.apimachinery.pkg.apis.meta.v1.ListMeta metadata = 1;
13 | 
14 |   // items is the list of CronJobs.
15 |   repeated CronJob items = 2;
16 | }
17 | 
18 | // CronJobSpec describes how the job execution will look like and when it will actually run.
19 | message CronJobSpec {
20 |   // The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron"
21 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_1_4.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 13249,github,".xcconfig */,
 3 | 			);
 4 | 			name = Pods;
 5 | 			sourceTree = ""<group>"";
 6 | 		};
 7 | 		9740EEB11CF90186004384FC /* Flutter */ = {
 8 | 			isa = PBXGroup;
 9 | 			children = (
10 | 				3B80C3931E831B6300D905FE /* App.framework */,
11 | 				3B3967151E833CAA004F5970 /* AppFrameworkInfo.plist */,
12 | 				9740EEBA1CF902C7004384FC /* Flutter.framework */,
13 | 				9740EEB21CF90195004384FC /* Debug.xcconfig */,
14 | 				7AFA3C8E1D35360C0083082E /* Release"
15 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_4_0.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 1670,github,"</name>
 3 | 	<description></description>
 4 | 	<!-- TODO <organization> <name>company name</name> <url>company url</url> 
 5 | 		</organization> -->
 6 | 	<licenses>
 7 | 		<license>
 8 | 			<name>The Apache Software License, Version 2.0</name>
 9 | 			<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
10 | 			<distribution>repo</distribution>
11 | 		</license>
12 | 	</licenses>
13 | 	
14 | 	<dependencies>
15 | 		<!-- WICKET DEPENDENCIES -->
16 | 		<dependency>
17 | 			<groupId>org.apache.wicket</groupId>
18 | 			<artifactId>wicket-core</artifactId>
19 | 		"
20 | 13379,github," Corbin Simpson <MostAwesomeDude@gmail.com>
21 |  *
22 |  * Permission is hereby granted, free of charge, to any person obtaining a
23 |  * copy of this software and associated documentation files (the ""Software""),
24 |  * to deal in the Software without restriction, including without limitation
25 |  * on the rights to use, copy, modify, merge, publish, distribute, sub
26 |  * license, and/or sell copies of the Software, and to permit persons to whom
27 |  * the Software is furnished to do so, subject to the following conditions:
28 |  *
29 |  * The above copyright notice and this permission notice (including the next
30 |  * paragraph) shall be included in all copies or substantial portions of the
31 |  * Software.
32 |  *
33 |  * THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 |  * FITNESS FOR"
36 | 10234,github,"al_searchbar
37 | msgid ""<span class=\""small mr-1 navbar-text\"">Sort By:</span>""
38 | msgstr """"
39 | 
40 | #. module: portal
41 | #: model_terms:ir.ui.view,arch_db:portal.portal_share_template
42 | msgid ""<strong>Open </strong>""
43 | msgstr """"
44 | 
45 | #. module: portal
46 | #: model:mail.template,body_html:portal.mail_template_data_portal_welcome
47 | msgid """"
48 | ""<table border=\""0\"" cellpadding=\""0\"" cellspacing=\""0\"" style=\""padding-top: 16px; background-color: #F1F1F1; font-family:Verdana, Arial,sans-serif; color: #454748; width: 100%; border-collapse:separate;\""><tr><td align=\""center"
49 | 4719,github,"px;
50 | }
51 | h3 {
52 |   font-size: 20px;
53 |   line-height: 28px;
54 | }
55 | .large-h1 {
56 |   font-size: 42px;
57 |   line-height: 48px;
58 |   font-weight: 300;
59 | }
60 | p {
61 |   font-size: 14px;
62 |   line-height: 24px;
63 | }
64 | p:last-child {
65 |   margin-bottom: 0px;
66 | }
67 | p.lead {
68 |   font-size: 16px;
69 |   line-height: 30px;
70 |   font-weight: 400;
71 | }
72 | span.lead {
73 |   font-weight: 400;
74 | }
75 | .uppercase {
76 |   text-transform: uppercase;
77 |   letter-spacing: 1px;
78 |   display: inline-block;
79 |   margin-right: -1px;
80 | }
81 | strong"
82 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_4_1.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 7576,pile-cc,".
 3 | 
 4 | Cloud Expo, Inc. has announced today that Andi Mann returns to 'DevOps at Cloud Expo 2017' as Conference Chair
 5 | The @DevOpsSummit at Cloud Expo will take place on June 6-8, 2017, at the Javits Center in New York City, NY.
 6 | ""DevOps is set to be one of the most profound disruptions to hi...
 7 | 
 8 | 20th Cloud Expo, taking place June 6-8, 2017, at the Javits Center in New York City, NY, will feature technical sessions from a rock star conference faculty and the leading industry players in the world.
 9 | Cloud computing is now being embraced by a majority of enterprises of all sizes....
10 | 
11 | With major technology companies and startups seriously embracing IoT strategies, now is the perfect time to attend @ThingsExpo 2016 in New York. Learn what is going on, contribute to the discussions, and ensure that your enterprise is as ""IoT-Ready"" as it can be"
12 | 10509,github,"_PATIENT_VISIT - this is probably a bug in the source code generator."", e);
13 | 	   }
14 | 	}
15 | 
16 | 	///<summary>
17 | 	/// Returns PV1 (Patient visit) - creates it if necessary
18 | 	///</summary>
19 | 	public PV1 PV1 { 
20 | get{
21 | 	   PV1 ret = null;
22 | 	   try {
23 | 	      ret = (PV1)this.GetStructure(""PV1"");
24 | 	   } catch(HL7Exception e) {
25 | 	      HapiLogFactory.GetHapiLog(GetType()).Error(""Unexpected error accessing data - this is probably a bug in the source code generator."", e);
26 | 	      throw new System.Exception(""An unexpected error ocurred"",e"
27 | 4253,github,"
28 |  *
29 |  * Based on libqrencode C library distributed under LGPL 2.1
30 |  * Copyright (C) 2006, 2007, 2008, 2009 Kentaro Fukuchi <fukuchi@megaui.net>
31 |  *
32 |  * PHP QR Code is distributed under LGPL 3
33 |  * Copyright (C) 2010 Dominik Dzienia <deltalab at poczta dot fm>
34 |  *
35 |  * This library is free software; you can redistribute it and/or
36 |  * modify it under the terms of the GNU Lesser General Public
37 |  * License as published by the Free Software Foundation; either
38 |  * version 3 of the License, or any later version.
39 |  *
40 |  * This library is distributed in the hope that it will be useful,
41 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
42 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
43 |  * Lesser"
44 | 5150,github,"</B>()</PRE>
45 |         <DL>
46 |         <DD>
47 |         </DD>
48 |         <P>
49 |         </DL>
50 |         <HR>
51 |     
52 |     &nbsp;
53 | 
54 | 
55 | <!-- ============ METHOD DETAIL ========== -->
56 | 
57 | <A NAME=""method_detail""><!-- --></A>
58 | 
59 |     <TABLE BORDER=""1"" WIDTH=""100%"" CELLPADDING=""3"" CELLSPACING=""0"" SUMMARY="""">
60 |     <TR BGCOLOR=""#CCCCFF"" CLASS=""TableHeadingColor"">
61 |     <TH ALIGN=""left"" COLSPAN=""1""><FONT SIZE="""
62 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_4_2.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 7592,pile-cc,"...
 3 | 
 4 | You are responsible for reading, understanding and agreeing to the National Law Review's (NLR’s) and the National Law Forum LLC's Terms of Use and Privacy Policy before using the National Law Review website. The National Law Review is a free to use, no-log in database of legal and business articles. The content and links on www.NatLawReview.com are intended for general information purposes only. Any legal analysis, legislative updates or other content and links should not be construed as legal or professional advice or a substitute for such advice. No attorney-client or confidential relationship is formed by the transmission of information between you and the National Law Review website or any of the law firms, attorneys or other professionals or organizations who include content on the National Law Review website. If you require legal or professional advice, kindly contact an attorney or other suitable professional advisor.
 5 | 
 6 | Some states have laws and ethical rules regarding solicitation and advertisement practices by attorneys and/or other professionals. The"
 7 | 3551,github,"/*
 8 |  *Copyright (c) 2018 Intel Corporation.
 9 |  *
10 |  *Permission is hereby granted, free of charge, to any person obtaining a copy
11 |  *of this software and associated documentation files (the ""Software""), to deal
12 |  *in the Software without restriction, including without limitation the rights
13 |  *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 |  *copies of the Software, and to permit persons to whom the Software is
15 |  *furnished to do so, subject to the following conditions:
16 |  *
17 |  *The above copyright notice and this permission notice shall be included in
18 |  *all copies or substantial portions of the Software.
19 |  *
20 |  *THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 |  *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 |  *FITNESS FOR A PARTICULAR PURPOSE AND NON"
23 | 9698,books3,"-0
24 | 
25 | # About the Publisher
26 | 
27 | Australia
28 | 
29 | HarperCollins Publishers Australia Pty. Ltd.
30 | 
31 | Level 13, 201 Elizabeth Street
32 | 
33 | Sydney, NSW 2000, Australia
34 | 
35 | www.harpercollins.com.au
36 | 
37 | Canada
38 | 
39 | HarperCollins Publishers Ltd
40 | 
41 | Bay Adelaide Centre, East Tower
42 | 
43 | 22 Adelaide Street West, 41st Floor
44 | 
45 | Toronto, Ontario, M5H 4E3
46 | 
47 | www.harpercollins.ca
48 | 
49 | India
50 | 
51 | HarperCollins India
52 | 
53 | A 75, Sector 57
54 | 
55 | Noida
56 | 
57 | Uttar Pradesh 201 301
58 | 
59 | www.harpercollins.co.in
60 | 
61 | New Zealand
62 | 
63 | HarperCollins Publishers New Zealand
64 | 
65 | Unit D1, 63 Apollo Drive
66 | 
67 | Rosedale 0632
68 | 
69 | Auckland, New Zealand
70 | 
71 | www.harpercollins.co.nz
72 | 
73 | United Kingdom
74 | 
75 | HarperCollins"
76 | 3759,github,"'    WakeOnLAN - Wake On LAN
77 | '    Copyright (C) 2004-2019 Aquila Technology, LLC. <webmaster@aquilatech.com>
78 | '
79 | '    This file is part of WakeOnLAN.
80 | '
81 | '    WakeOnLAN is free software: you can redistribute it and/or modify
82 | '    it under the terms of the GNU General Public License as published by
83 | '    the Free Software Foundation, either version 3 of the License, or
84 | '    (at your option) any later version.
85 | '
86 | '    WakeOnLAN is distributed in the hope that it will be useful,
87 | '    but WITHOUT ANY WARRANTY; without even the implied warranty of
88 | '    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. "
89 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_4_3.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 11584,github,"/devel/api-conventions.md#spec-and-status
 3 |   // +optional
 4 |   optional CronJobStatus status = 3;
 5 | }
 6 | 
 7 | // CronJobList is a collection of cron jobs.
 8 | message CronJobList {
 9 |   // Standard list metadata.
10 |   // More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#metadata
11 |   // +optional
12 |   optional k8s.io.apimachinery.pkg.apis.meta.v1.ListMeta metadata = 1;
13 | 
14 |   // items is the list of CronJobs.
15 |   repeated CronJob items = 2;
16 | }
17 | 
18 | // CronJobSpec describes how the job execution will look like and when it will actually run.
19 | message CronJobSpec {
20 |   // The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron"
21 | 5034,github,"-apache-stretch
22 | 
23 | # make Apt non-interactive
24 | RUN echo 'APT::Get::Assume-Yes ""true"";' > /etc/apt/apt.conf.d/90circleci \
25 |   && echo 'DPkg::Options ""--force-confnew"";' >> /etc/apt/apt.conf.d/90circleci
26 | 
27 | ENV DEBIAN_FRONTEND=noninteractive
28 | 
29 | # Debian Jessie is EOL'd and original repos don't work.
30 | # Switch to the archive mirror until we can get people to
31 | # switch to Stretch.
32 | RUN if grep -q Debian /etc/os-release && grep -q jessie /etc/os-release; then \
33 | 	rm /etc/apt/sources.list \
34 |     && echo ""deb http://archive.debian.org/debian/ jessie main"" >> /"
35 | 12282,github,"
36 |     },
37 |     ""PrivateEndpointConnectionProperties"": {
38 |       ""type"": ""object"",
39 |       ""properties"": {
40 |         ""privateEndpoint"": {
41 |           ""oneOf"": [
42 |             {
43 |               ""$ref"": ""#/definitions/PrivateEndpoint""
44 |             },
45 |             {
46 |               ""$ref"": ""https://schema.management.azure.com/schemas/common/definitions.json#/definitions/expression""
47 |             }
48 |      "
49 | 28,github,"// Copyright (c) 2014, Ryan Lewis All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
50 | // 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
51 | // 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
52 | // 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.  
53 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ""AS IS"" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR"
54 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_4_4.csv:
--------------------------------------------------------------------------------
 1 | doc_id,corpus,text
 2 | 13249,github,".xcconfig */,
 3 | 			);
 4 | 			name = Pods;
 5 | 			sourceTree = ""<group>"";
 6 | 		};
 7 | 		9740EEB11CF90186004384FC /* Flutter */ = {
 8 | 			isa = PBXGroup;
 9 | 			children = (
10 | 				3B80C3931E831B6300D905FE /* App.framework */,
11 | 				3B3967151E833CAA004F5970 /* AppFrameworkInfo.plist */,
12 | 				9740EEBA1CF902C7004384FC /* Flutter.framework */,
13 | 				9740EEB21CF90195004384FC /* Debug.xcconfig */,
14 | 				7AFA3C8E1D35360C0083082E /* Release"
15 | 8335,github,"
16 | 
17 | f 1/1/1 2/2/2 3/3/3
18 | f 1/4/4 3/5/5 4/6/6
19 | f 5/7/7 6/8/8 7/9/9
20 | f 5/10/10 7/11/11 8/12/12
21 | f 9/13/13 10/14/14 11/15/15
22 | f 9/16/16 11/17/17 12/18/18
23 | f 13/19/19 14/20/20 15/21/21
24 | f 13/22/22 15/23/23 16/24/24
25 | f 17/25/25 18/26/26 19/27/27
26 | f 17/28/28 19/29/29 20/30/30
27 | f 21/31/31 22/32/32 23/33/33"
28 | 13075,github,"Options.html"" target=""_top"">No&nbsp;Frames</a></li>
29 | </ul>
30 | <ul class=""navList"" id=""allclasses_navbar_top"">
31 | <li><a href=""../../../../../allclasses-noframe.html"">All&nbsp;Classes</a></li>
32 | </ul>
33 | <ul class=""navListSearch"">
34 | <li><span>SEARCH:&nbsp;</span>
35 | <input type=""text"" id=""search"" value="" "" disabled=""disabled"">
36 | <input type=""reset"" id=""reset"" value="" "" disabled=""disabled"">
37 | </li>
38 | </ul>
39 | <div>
40 | <script type=""text/javascript""><!--
41 |   allClassesLink = document.getElementById(""allclasses_navbar_top"");
42 |   if(window==top) {
43 |     allClassesLink"
44 | 2764,pile-cc,".
45 | 
46 | Page last modified 2019-02-28
47 | 
48 | Why is FinancesOnline free?
49 | 
50 | FinancesOnline is available for free for all business professionals interested in an efficient way to find top-notch SaaS solutions. We are able to keep our service free of charge thanks to cooperation with some of the vendors, who are willing to pay us for traffic and sales opportunities provided by our website.
51 | 
52 | Our rankings can be filtered by:
53 | 
54 | Sponsored: vendors bid for position here and products are sorted based on the amount of that bid.
55 | 
56 | SmartScore: products are sorted based on our SmartScore rating.
57 | 
58 | User Reviews: products are sorted based on ratings from user reviews.
59 | 
60 | Please note, that FinancesOnline lists all vendors, we’re not limited only to the ones that pay us, and all software providers have an equal opportunity to get featured in our rankings and comparisons, win awards, gather user reviews, all in an effort"
61 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_8_0.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 1670,github,"</name>
  3 | 	<description></description>
  4 | 	<!-- TODO <organization> <name>company name</name> <url>company url</url> 
  5 | 		</organization> -->
  6 | 	<licenses>
  7 | 		<license>
  8 | 			<name>The Apache Software License, Version 2.0</name>
  9 | 			<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
 10 | 			<distribution>repo</distribution>
 11 | 		</license>
 12 | 	</licenses>
 13 | 	
 14 | 	<dependencies>
 15 | 		<!-- WICKET DEPENDENCIES -->
 16 | 		<dependency>
 17 | 			<groupId>org.apache.wicket</groupId>
 18 | 			<artifactId>wicket-core</artifactId>
 19 | 		"
 20 | 13379,github," Corbin Simpson <MostAwesomeDude@gmail.com>
 21 |  *
 22 |  * Permission is hereby granted, free of charge, to any person obtaining a
 23 |  * copy of this software and associated documentation files (the ""Software""),
 24 |  * to deal in the Software without restriction, including without limitation
 25 |  * on the rights to use, copy, modify, merge, publish, distribute, sub
 26 |  * license, and/or sell copies of the Software, and to permit persons to whom
 27 |  * the Software is furnished to do so, subject to the following conditions:
 28 |  *
 29 |  * The above copyright notice and this permission notice (including the next
 30 |  * paragraph) shall be included in all copies or substantial portions of the
 31 |  * Software.
 32 |  *
 33 |  * THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 34 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 35 |  * FITNESS FOR"
 36 | 10234,github,"al_searchbar
 37 | msgid ""<span class=\""small mr-1 navbar-text\"">Sort By:</span>""
 38 | msgstr """"
 39 | 
 40 | #. module: portal
 41 | #: model_terms:ir.ui.view,arch_db:portal.portal_share_template
 42 | msgid ""<strong>Open </strong>""
 43 | msgstr """"
 44 | 
 45 | #. module: portal
 46 | #: model:mail.template,body_html:portal.mail_template_data_portal_welcome
 47 | msgid """"
 48 | ""<table border=\""0\"" cellpadding=\""0\"" cellspacing=\""0\"" style=\""padding-top: 16px; background-color: #F1F1F1; font-family:Verdana, Arial,sans-serif; color: #454748; width: 100%; border-collapse:separate;\""><tr><td align=\""center"
 49 | 4719,github,"px;
 50 | }
 51 | h3 {
 52 |   font-size: 20px;
 53 |   line-height: 28px;
 54 | }
 55 | .large-h1 {
 56 |   font-size: 42px;
 57 |   line-height: 48px;
 58 |   font-weight: 300;
 59 | }
 60 | p {
 61 |   font-size: 14px;
 62 |   line-height: 24px;
 63 | }
 64 | p:last-child {
 65 |   margin-bottom: 0px;
 66 | }
 67 | p.lead {
 68 |   font-size: 16px;
 69 |   line-height: 30px;
 70 |   font-weight: 400;
 71 | }
 72 | span.lead {
 73 |   font-weight: 400;
 74 | }
 75 | .uppercase {
 76 |   text-transform: uppercase;
 77 |   letter-spacing: 1px;
 78 |   display: inline-block;
 79 |   margin-right: -1px;
 80 | }
 81 | strong"
 82 | 7003,github," class=""separator:a25fe4a28bd22016ef2a6cf10db5d649d inherit pub_methods_class_emojicode_compiler_1_1_a_s_t_unary""><td class=""memSeparator"" colspan=""2"">&#160;</td></tr>
 83 | <tr class=""inherit_header pub_methods_class_emojicode_compiler_1_1_a_s_t_expr""><td colspan=""2"" onclick=""javascript:toggleInherit('pub_methods_class_emojicode_compiler_1_1_a_s_t_expr')""><img src=""closed.png"" alt=""-""/>&#160;Public Member Functions inherited from <a class=""el"" href=""class_emojicode_compiler_1_1_a"
 84 | 2831,openwebtext2," to.
 85 | 
 86 | Thank you for reading this Techdirt post. With so many things competing for everyone’s attention these days, we really appreciate you giving us your time. We work hard every day to put quality content out there for our community. Techdirt is one of the few remaining truly independent media outlets. We do not have a giant corporation behind us, and we rely heavily on our community to support us, in an age when advertisers are increasingly uninterested in sponsoring small, independent sites — especially a site like ours that is unwilling to pull punches in its reporting and analysis. While other websites have resorted to paywalls, registration requirements, and increasingly annoying/intrusive advertising, we have always kept Techdirt open and available to anyone. But in order to continue doing so, we need your support. We offer a variety of ways for our readers to support us, from direct donations to special subscriptions and cool merchandise — and every little bit helps. Thank you."
 87 | 13014,github,"
 88 | 	req, err := client.DeletePreparer(ctx, resourceGroupName, gatewayName)
 89 | 	if err!= nil {
 90 | 		err = autorest.NewErrorWithError(err, ""network.VpnGatewaysClient"", ""Delete"", nil, ""Failure preparing request"")
 91 | 		return
 92 | 	}
 93 | 
 94 | 	result, err = client.DeleteSender(req)
 95 | 	if err!= nil {
 96 | 		err = autorest.NewErrorWithError(err, ""network.VpnGatewaysClient"", ""Delete"", result.Response(), ""Failure sending request"")
 97 | 		return
 98 | 	}
 99 | 
100 | 	return
101 | }
102 | 
103 | // DeletePreparer prepares the Delete request.
104 | func (client VpnGatewaysClient) DeletePreparer(ctx context.Context, resourceGroupName string, gatewayName string) (*http.Request, error) {
105 | 	pathParameters := map[string]interface{}{
106 | 		"
107 | 11979,pile-cc,"
108 | 
109 | Content Continues Below
110 | 
111 | Download this free guide
112 | 
113 | Unified Communications: the key to prospering in the new working reality of Covid-19
114 | 
115 | The coronavirus is changing everything about how people work, and will do so permanently. It added that even though the working world was experiencing unprecedented uncertainty, there were two things that should be borne in mind: the virus will pass, and at the other side of the pandemic, the world of work will look very different.
116 | 
117 | I agree to TechTarget’s Terms of Use, Privacy Policy, and the transfer of my information to the United States for processing to provide me with relevant information as described in our Privacy Policy.
118 | 
119 | Please check the box if you want to proceed.
120 | 
121 | I agree to my information being processed by TechTarget and its Partners to contact me via phone, email, or other means regarding information relevant to my professional interests. I may unsubscribe at any time.
122 | 
123 | Please"
124 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_8_1.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 7576,pile-cc,".
  3 | 
  4 | Cloud Expo, Inc. has announced today that Andi Mann returns to 'DevOps at Cloud Expo 2017' as Conference Chair
  5 | The @DevOpsSummit at Cloud Expo will take place on June 6-8, 2017, at the Javits Center in New York City, NY.
  6 | ""DevOps is set to be one of the most profound disruptions to hi...
  7 | 
  8 | 20th Cloud Expo, taking place June 6-8, 2017, at the Javits Center in New York City, NY, will feature technical sessions from a rock star conference faculty and the leading industry players in the world.
  9 | Cloud computing is now being embraced by a majority of enterprises of all sizes....
 10 | 
 11 | With major technology companies and startups seriously embracing IoT strategies, now is the perfect time to attend @ThingsExpo 2016 in New York. Learn what is going on, contribute to the discussions, and ensure that your enterprise is as ""IoT-Ready"" as it can be"
 12 | 10509,github,"_PATIENT_VISIT - this is probably a bug in the source code generator."", e);
 13 | 	   }
 14 | 	}
 15 | 
 16 | 	///<summary>
 17 | 	/// Returns PV1 (Patient visit) - creates it if necessary
 18 | 	///</summary>
 19 | 	public PV1 PV1 { 
 20 | get{
 21 | 	   PV1 ret = null;
 22 | 	   try {
 23 | 	      ret = (PV1)this.GetStructure(""PV1"");
 24 | 	   } catch(HL7Exception e) {
 25 | 	      HapiLogFactory.GetHapiLog(GetType()).Error(""Unexpected error accessing data - this is probably a bug in the source code generator."", e);
 26 | 	      throw new System.Exception(""An unexpected error ocurred"",e"
 27 | 4253,github,"
 28 |  *
 29 |  * Based on libqrencode C library distributed under LGPL 2.1
 30 |  * Copyright (C) 2006, 2007, 2008, 2009 Kentaro Fukuchi <fukuchi@megaui.net>
 31 |  *
 32 |  * PHP QR Code is distributed under LGPL 3
 33 |  * Copyright (C) 2010 Dominik Dzienia <deltalab at poczta dot fm>
 34 |  *
 35 |  * This library is free software; you can redistribute it and/or
 36 |  * modify it under the terms of the GNU Lesser General Public
 37 |  * License as published by the Free Software Foundation; either
 38 |  * version 3 of the License, or any later version.
 39 |  *
 40 |  * This library is distributed in the hope that it will be useful,
 41 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 42 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 43 |  * Lesser"
 44 | 5150,github,"</B>()</PRE>
 45 |         <DL>
 46 |         <DD>
 47 |         </DD>
 48 |         <P>
 49 |         </DL>
 50 |         <HR>
 51 |     
 52 |     &nbsp;
 53 | 
 54 | 
 55 | <!-- ============ METHOD DETAIL ========== -->
 56 | 
 57 | <A NAME=""method_detail""><!-- --></A>
 58 | 
 59 |     <TABLE BORDER=""1"" WIDTH=""100%"" CELLPADDING=""3"" CELLSPACING=""0"" SUMMARY="""">
 60 |     <TR BGCOLOR=""#CCCCFF"" CLASS=""TableHeadingColor"">
 61 |     <TH ALIGN=""left"" COLSPAN=""1""><FONT SIZE="""
 62 | 506,github,"': [
 63 | \ ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl','menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'isindex', 'fieldset', 'table', 'form', 'a', 'br','span', 'bdo', 'object', 'applet', 'img','map', 'iframe', 'tt', 'i', 'b', 'u','s','strike', 'big','small', 'font', 'basefont', 'em','strong', 'dfn', 'code', 'q','samp', 'kbd', 'var', 'cite', 'abbr', 'acronym','sub','sup', 'input','select', 'textarea', 'label', 'button"
 64 | 1622,pile-cc,".”
 65 | 
 66 | Copyright 2016 The Arsenal Football Club plc. Permission to use quotations from this article is granted subject to appropriate credit being given to www.arsenal.com as the source
 67 | 
 68 | Please ensure you enter the correct Date of Birth as it will be required if you need to reset your password
 69 | 
 70 | Please ensure that you enter your parents email address and click here to review our privacy policy for those who are under 13. Please note that an email address can only be used once to register as a Digital Member so you cannot register for both an adult and junior membership under the same email address.
 71 | 
 72 | Mobile Number
 73 | 
 74 | * Country
 75 | 
 76 | * State
 77 | 
 78 | Language Preference
 79 | 
 80 | * Postcode
 81 | 
 82 | * Email Address
 83 | 
 84 | * Confirm Email Address
 85 | 
 86 | * Password
 87 | 
 88 | * Confirm Password
 89 | 
 90 | Get all the news from Arsenal.com
 91 | 
 92 | Weekly E-Newsletter (only in English)I would like to receive the weekly e-news"
 93 | 13684,github,"
 94 | 
 95 |   This file is part of DeclarativeWidgets, library and tools for creating QtWidget UIs with QML.
 96 | 
 97 |   Copyright (C) 2017-2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com
 98 |   Author: Nathan Collins <nathan.collins@kdab.com>
 99 | 
100 |   Licensees holding valid commercial KDAB DeclarativeWidgets licenses may use this file in
101 |   accordance with DeclarativeWidgets Commercial License Agreement provided with the Software.
102 | 
103 |   Contact info@kdab.com if any conditions of this licensing are not clear to you.
104 | 
105 |   This program is free software; you can redistribute it and/or modify
106 |   it under the terms of the GNU General Public License as published by
107 |   the Free Software Foundation, either version 2 of the License, or
108 |   (at your option) any later version"
109 | 2145,freelaw,"
110 |                              UNITED STATES COURT OF APPEALS
111 |                                  FOR THE SECOND CIRCUIT
112 | 
113 |                                     SUMMARY ORDER
114 |      RULINGS BY SUMMARY ORDER DO NOT HAVE PRECEDENTIAL EFFECT.     CITATION TO SUMMARY ORDERS
115 |      FILED AFTER JANUARY 1, 2007, IS PERMITTED AND IS GOVERNED BY THIS COURT’S LOCAL RULE 32.1
116 |     "
117 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_8_2.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 7592,pile-cc,"...
  3 | 
  4 | You are responsible for reading, understanding and agreeing to the National Law Review's (NLR’s) and the National Law Forum LLC's Terms of Use and Privacy Policy before using the National Law Review website. The National Law Review is a free to use, no-log in database of legal and business articles. The content and links on www.NatLawReview.com are intended for general information purposes only. Any legal analysis, legislative updates or other content and links should not be construed as legal or professional advice or a substitute for such advice. No attorney-client or confidential relationship is formed by the transmission of information between you and the National Law Review website or any of the law firms, attorneys or other professionals or organizations who include content on the National Law Review website. If you require legal or professional advice, kindly contact an attorney or other suitable professional advisor.
  5 | 
  6 | Some states have laws and ethical rules regarding solicitation and advertisement practices by attorneys and/or other professionals. The"
  7 | 3551,github,"/*
  8 |  *Copyright (c) 2018 Intel Corporation.
  9 |  *
 10 |  *Permission is hereby granted, free of charge, to any person obtaining a copy
 11 |  *of this software and associated documentation files (the ""Software""), to deal
 12 |  *in the Software without restriction, including without limitation the rights
 13 |  *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 14 |  *copies of the Software, and to permit persons to whom the Software is
 15 |  *furnished to do so, subject to the following conditions:
 16 |  *
 17 |  *The above copyright notice and this permission notice shall be included in
 18 |  *all copies or substantial portions of the Software.
 19 |  *
 20 |  *THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 21 |  *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 22 |  *FITNESS FOR A PARTICULAR PURPOSE AND NON"
 23 | 9698,books3,"-0
 24 | 
 25 | # About the Publisher
 26 | 
 27 | Australia
 28 | 
 29 | HarperCollins Publishers Australia Pty. Ltd.
 30 | 
 31 | Level 13, 201 Elizabeth Street
 32 | 
 33 | Sydney, NSW 2000, Australia
 34 | 
 35 | www.harpercollins.com.au
 36 | 
 37 | Canada
 38 | 
 39 | HarperCollins Publishers Ltd
 40 | 
 41 | Bay Adelaide Centre, East Tower
 42 | 
 43 | 22 Adelaide Street West, 41st Floor
 44 | 
 45 | Toronto, Ontario, M5H 4E3
 46 | 
 47 | www.harpercollins.ca
 48 | 
 49 | India
 50 | 
 51 | HarperCollins India
 52 | 
 53 | A 75, Sector 57
 54 | 
 55 | Noida
 56 | 
 57 | Uttar Pradesh 201 301
 58 | 
 59 | www.harpercollins.co.in
 60 | 
 61 | New Zealand
 62 | 
 63 | HarperCollins Publishers New Zealand
 64 | 
 65 | Unit D1, 63 Apollo Drive
 66 | 
 67 | Rosedale 0632
 68 | 
 69 | Auckland, New Zealand
 70 | 
 71 | www.harpercollins.co.nz
 72 | 
 73 | United Kingdom
 74 | 
 75 | HarperCollins"
 76 | 3759,github,"'    WakeOnLAN - Wake On LAN
 77 | '    Copyright (C) 2004-2019 Aquila Technology, LLC. <webmaster@aquilatech.com>
 78 | '
 79 | '    This file is part of WakeOnLAN.
 80 | '
 81 | '    WakeOnLAN is free software: you can redistribute it and/or modify
 82 | '    it under the terms of the GNU General Public License as published by
 83 | '    the Free Software Foundation, either version 3 of the License, or
 84 | '    (at your option) any later version.
 85 | '
 86 | '    WakeOnLAN is distributed in the hope that it will be useful,
 87 | '    but WITHOUT ANY WARRANTY; without even the implied warranty of
 88 | '    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. "
 89 | 2353,github," Free Software Foundation, Inc.
 90 | #
 91 | # This file is free software; the Free Software Foundation
 92 | # gives unlimited permission to copy and/or distribute it,
 93 | # with or without modifications, as long as this notice is preserved.
 94 | 
 95 | # AM_MAKE_INCLUDE()
 96 | # -----------------
 97 | # Check to see how make treats includes.
 98 | AC_DEFUN([AM_MAKE_INCLUDE],
 99 | [am_make=${MAKE-make}
100 | cat > confinc << 'END'
101 | am__doit:
102 | 	@echo this is the am__doit target
103 | .PHONY: am__doit
104 | END
105 | # If we don't find an include directive, just comment out the code.
106 | AC_MSG_CHECKING([for style of include used by $am_make])
107 | am__include=""#""
108 | am__quote=
109 | _am_result=none
110 | # First try"
111 | 9760,github," \
112 |     rclpy \
113 |     std-msgs \
114 | ""
115 | 
116 | # Currently informational only -- see http://www.ros.org/reps/rep-0149.html#dependency-tags.
117 | ROS_TEST_DEPENDS = "" \
118 |     ament-copyright \
119 |     ament-flake8 \
120 |     ament-pep257 \
121 |     python3-pytest \
122 | ""
123 | 
124 | DEPENDS = ""${ROS_BUILD_DEPENDS} ${ROS_BUILDTOOL_DEPENDS}""
125 | # Bitbake doesn't support the ""export"" concept, so build them as if we needed them to build this package (even though we actually
126 | # don't) so that they're guaranteed to have been staged should this package appear in another's DEPENDS.
127 | DEPENDS +="
128 | 4952,github,"div>
129 | <dl>
130 | <dt><span class=""simpleTagLabel"">Since:</span></dt>
131 | <dd>1.7</dd>
132 | </dl>
133 | </li>
134 | </ul>
135 | </div>
136 | <div class=""summary"">
137 | <ul class=""blockList"">
138 | <li class=""blockList"">
139 | <!-- ========== METHOD SUMMARY =========== -->
140 | <section role=""region"">
141 | <ul class=""blockList"">
142 | <li class=""blockList""><a id=""method.summary"">
143 | <!--   -->
144 | </a>
145 | <h3>Method Summary</h3>
146 | <div class=""memberSummary"">
147 | <div role=""tablist"" aria-orientation=""horizontal""><button role=""tab"" aria-selected=""true"" aria-controls=""memberSummary_tabpanel"" tabindex=""0"" onkeydown=""switchTab("
148 | 14125,github,""")]
149 | [assembly: AssemblyCopyright(""Copyright ©  2015"")]
150 | [assembly: AssemblyTrademark("""")]
151 | [assembly: AssemblyCulture("""")]
152 | 
153 | // Setting ComVisible to false makes the types in this assembly not visible 
154 | // to COM components.  If you need to access a type in this assembly from 
155 | // COM, set the ComVisible attribute to true on that type.
156 | 
157 | [assembly: ComVisible(false)]
158 | 
159 | //In order to begin building localizable applications, set 
160 | //<UICulture>CultureYouAreCodingWith</UICulture> in your.csproj file
161 | //inside a <PropertyGroup>.  For example, if you are using US english
162 | //in your source files, set the <UICulture> to en-US.  Then uncomment
163 | //the NeutralResourceLanguage attribute below.  Update the ""en-US"" in
164 | "
165 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_8_3.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 11584,github,"/devel/api-conventions.md#spec-and-status
  3 |   // +optional
  4 |   optional CronJobStatus status = 3;
  5 | }
  6 | 
  7 | // CronJobList is a collection of cron jobs.
  8 | message CronJobList {
  9 |   // Standard list metadata.
 10 |   // More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#metadata
 11 |   // +optional
 12 |   optional k8s.io.apimachinery.pkg.apis.meta.v1.ListMeta metadata = 1;
 13 | 
 14 |   // items is the list of CronJobs.
 15 |   repeated CronJob items = 2;
 16 | }
 17 | 
 18 | // CronJobSpec describes how the job execution will look like and when it will actually run.
 19 | message CronJobSpec {
 20 |   // The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron"
 21 | 5034,github,"-apache-stretch
 22 | 
 23 | # make Apt non-interactive
 24 | RUN echo 'APT::Get::Assume-Yes ""true"";' > /etc/apt/apt.conf.d/90circleci \
 25 |   && echo 'DPkg::Options ""--force-confnew"";' >> /etc/apt/apt.conf.d/90circleci
 26 | 
 27 | ENV DEBIAN_FRONTEND=noninteractive
 28 | 
 29 | # Debian Jessie is EOL'd and original repos don't work.
 30 | # Switch to the archive mirror until we can get people to
 31 | # switch to Stretch.
 32 | RUN if grep -q Debian /etc/os-release && grep -q jessie /etc/os-release; then \
 33 | 	rm /etc/apt/sources.list \
 34 |     && echo ""deb http://archive.debian.org/debian/ jessie main"" >> /"
 35 | 12282,github,"
 36 |     },
 37 |     ""PrivateEndpointConnectionProperties"": {
 38 |       ""type"": ""object"",
 39 |       ""properties"": {
 40 |         ""privateEndpoint"": {
 41 |           ""oneOf"": [
 42 |             {
 43 |               ""$ref"": ""#/definitions/PrivateEndpoint""
 44 |             },
 45 |             {
 46 |               ""$ref"": ""https://schema.management.azure.com/schemas/common/definitions.json#/definitions/expression""
 47 |             }
 48 |      "
 49 | 28,github,"// Copyright (c) 2014, Ryan Lewis All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 50 | // 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 51 | // 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 52 | // 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.  
 53 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ""AS IS"" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR"
 54 | 9702,pile-cc,".
 55 | 
 56 | *Number of views in last 30 days
 57 | 
 58 | † Based on 2018 EPA mileage ratings. Use for comparison purposes only. Your actual mileage will vary depending on how you drive and maintain your vehicle.
 59 | 
 60 | *Price listed does not include a $35 electronic filing fee for titling (custom plates do not apply) or $115 Documentation fee. Price listed does not include, tax, title, license, registration or state emission fees. ""Instant Savings"" includes Kendall Discount that all customers qualify for. Please call or visit Kendall Volkswagen of Bend to see if you qualify for any additional discounts that are not included in ""Instant Savings"". While every reasonable effort is made to ensure the accuracy of the vehicle description displayed on this page, dealer makes no warranties, express or implied, with regard to the vehicle or vehicle description. Please verify all vehicle information before entering into a purchase agreement. If the vehicle being purchased is to be financed, the annual percentage rate may be negotiated with the dealer. The"
 61 | 11437,github,"ef-0d81-4d96-af62-32ba7e181dc2"",
 62 |             ""5ae01620-6217-429c-a6ee-f3c90a1813fe"",
 63 |             ""cea43ac2-635b-47f0-8e22-31dd43944c2b"",
 64 |             ""80ed906b-d3d8-4eff-a382-f3ccfe770fbd"",
 65 |             ""a6284cb9-0ec4-4644-ac38-9df2c430d565"",
 66 |             ""0e61b3b3-d822-42b9-"
 67 | 6193,github," ""HISTORY.md"",
 68 |     ""LICENSE"",
 69 |     ""README.md"",
 70 |     ""index.js""
 71 |   ],
 72 |   ""homepage"": ""https://github.com/stream-utils/unpipe#readme"",
 73 |   ""license"": ""MIT"",
 74 |   ""name"": ""unpipe"",
 75 |   ""repository"": {
 76 |     ""type"": ""git"",
 77 |     ""url"": ""git+https://github.com/stream-utils/unpipe.git""
 78 |   },
 79 |   ""scripts"": {
 80 |     ""test"": ""mocha --reporter spec --bail --check-leaks test/"",
 81 |     ""test-cov"": ""istanbul cover node_modules/mocha/bin/_mocha -- --reporter dot --check-leaks test/"",
 82 |     """
 83 | 10573,github,"<?xml version=""1.0"" encoding=""UTF-8""?>
 84 | <!--
 85 |   Licensed to the Apache Software Foundation (ASF) under one
 86 |   or more contributor license agreements.  See the NOTICE file
 87 |   distributed with this work for additional information
 88 |   regarding copyright ownership.  The ASF licenses this file
 89 |   to you under the Apache License, Version 2.0 (the
 90 |   ""License""); you may not use this file except in compliance
 91 |   with the License.  You may obtain a copy of the License at
 92 | 
 93 |          http://www.apache.org/licenses/LICENSE-2.0
 94 | 
 95 |   Unless required by applicable law or agreed to in writing,
 96 |   software distributed under the License is distributed on an
 97 |   ""AS IS"" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 98 |   KIND, either express or implied.  See the License for the
 99 |   specific"
100 | 


--------------------------------------------------------------------------------
/data/main/lm_extraction_8_4.csv:
--------------------------------------------------------------------------------
  1 | doc_id,corpus,text
  2 | 13249,github,".xcconfig */,
  3 | 			);
  4 | 			name = Pods;
  5 | 			sourceTree = ""<group>"";
  6 | 		};
  7 | 		9740EEB11CF90186004384FC /* Flutter */ = {
  8 | 			isa = PBXGroup;
  9 | 			children = (
 10 | 				3B80C3931E831B6300D905FE /* App.framework */,
 11 | 				3B3967151E833CAA004F5970 /* AppFrameworkInfo.plist */,
 12 | 				9740EEBA1CF902C7004384FC /* Flutter.framework */,
 13 | 				9740EEB21CF90195004384FC /* Debug.xcconfig */,
 14 | 				7AFA3C8E1D35360C0083082E /* Release"
 15 | 8335,github,"
 16 | 
 17 | f 1/1/1 2/2/2 3/3/3
 18 | f 1/4/4 3/5/5 4/6/6
 19 | f 5/7/7 6/8/8 7/9/9
 20 | f 5/10/10 7/11/11 8/12/12
 21 | f 9/13/13 10/14/14 11/15/15
 22 | f 9/16/16 11/17/17 12/18/18
 23 | f 13/19/19 14/20/20 15/21/21
 24 | f 13/22/22 15/23/23 16/24/24
 25 | f 17/25/25 18/26/26 19/27/27
 26 | f 17/28/28 19/29/29 20/30/30
 27 | f 21/31/31 22/32/32 23/33/33"
 28 | 13075,github,"Options.html"" target=""_top"">No&nbsp;Frames</a></li>
 29 | </ul>
 30 | <ul class=""navList"" id=""allclasses_navbar_top"">
 31 | <li><a href=""../../../../../allclasses-noframe.html"">All&nbsp;Classes</a></li>
 32 | </ul>
 33 | <ul class=""navListSearch"">
 34 | <li><span>SEARCH:&nbsp;</span>
 35 | <input type=""text"" id=""search"" value="" "" disabled=""disabled"">
 36 | <input type=""reset"" id=""reset"" value="" "" disabled=""disabled"">
 37 | </li>
 38 | </ul>
 39 | <div>
 40 | <script type=""text/javascript""><!--
 41 |   allClassesLink = document.getElementById(""allclasses_navbar_top"");
 42 |   if(window==top) {
 43 |     allClassesLink"
 44 | 2764,pile-cc,".
 45 | 
 46 | Page last modified 2019-02-28
 47 | 
 48 | Why is FinancesOnline free?
 49 | 
 50 | FinancesOnline is available for free for all business professionals interested in an efficient way to find top-notch SaaS solutions. We are able to keep our service free of charge thanks to cooperation with some of the vendors, who are willing to pay us for traffic and sales opportunities provided by our website.
 51 | 
 52 | Our rankings can be filtered by:
 53 | 
 54 | Sponsored: vendors bid for position here and products are sorted based on the amount of that bid.
 55 | 
 56 | SmartScore: products are sorted based on our SmartScore rating.
 57 | 
 58 | User Reviews: products are sorted based on ratings from user reviews.
 59 | 
 60 | Please note, that FinancesOnline lists all vendors, we’re not limited only to the ones that pay us, and all software providers have an equal opportunity to get featured in our rankings and comparisons, win awards, gather user reviews, all in an effort"
 61 | 279,github,"/*
 62 |  *
 63 |  *  Copyright (C) 1993-2005, OFFIS
 64 |  *
 65 |  *  This software and supporting documentation were developed by
 66 |  *
 67 |  *    Kuratorium OFFIS e.V.
 68 |  *    Healthcare Information and Communication Systems
 69 |  *    Escherweg 2
 70 |  *    D-26121 Oldenburg, Germany
 71 |  *
 72 |  *  THIS SOFTWARE IS MADE AVAILABLE,  AS IS,  AND OFFIS MAKES NO  WARRANTY
 73 |  *  REGARDING  THE  SOFTWARE,  ITS  PERFORMANCE,  ITS  MERCHANTABILITY  OR
 74 |  *  FITNESS FOR ANY PARTICULAR USE, FREEDOM FROM ANY COMPUTER DISEASES  OR
 75 |  *  ITS CONFORMITY TO ANY SPECIFICATION. THE ENTIRE RISK AS TO QUALITY AND
 76 |  *  PERFORMANCE OF THE SOFTWARE IS WITH THE US"
 77 | 6713,github,"/*
 78 |  * Copyright © 2008 Chris Wilson
 79 |  *
 80 |  * Permission to use, copy, modify, distribute, and sell this software
 81 |  * and its documentation for any purpose is hereby granted without
 82 |  * fee, provided that the above copyright notice appear in all copies
 83 |  * and that both that copyright notice and this permission notice
 84 |  * appear in supporting documentation, and that the name of
 85 |  * Chris Wilson not be used in advertising or publicity pertaining to
 86 |  * distribution of the software without specific, written prior
 87 |  * permission. Chris Wilson makes no representations about the
 88 |  * suitability of this software for any purpose.  It is provided ""as
 89 |  * is"" without express or implied warranty.
 90 |  *
 91 |  * CHRIS WILSON DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
 92 |  * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
 93 |  * FITNESS, IN NO EVENT SHALL CHRIS WILSON BE LIABLE FOR ANY SPECIAL,
 94 |  *"
 95 | 2319,pile-cc,".
 96 | 
 97 | Your Rating:
 98 | 
 99 | Your Recommendations:
100 | 
101 | Barnes & Noble.com Review Rules
102 | 
103 | Our reader reviews allow you to share your comments on titles you liked,
104 | or didn't, with others. By submitting an online review, you are representing to
105 | Barnes & Noble.com that all information contained in your review is original
106 | and accurate in all respects, and that the submission of such content by you
107 | and the posting of such content by Barnes & Noble.com does not and will not
108 | violate the rights of any third party. Please follow the rules below to help
109 | ensure that your review can be posted.
110 | 
111 | Reviews by Our Customers Under the Age of 13
112 | 
113 | We highly value and respect everyone's opinion concerning the titles we offer.
114 | However, we cannot allow persons under the age of 13 to have accounts at BN.com or
115 | to post customer reviews. Please see our Terms of Use for more details.
116 | 
117 | "
118 | 1959,github,"<?xml version=""1.0"" encoding=""utf-8""?>
119 | <!--
120 |      Copyright (C) 2012 The Android Open Source Project
121 | 
122 |      Licensed under the Apache License, Version 2.0 (the ""License"");
123 |      you may not use this file except in compliance with the License.
124 |      You may obtain a copy of the License at
125 | 
126 |           http://www.apache.org/licenses/LICENSE-2.0
127 | 
128 |      Unless required by applicable law or agreed to in writing, software
129 |      distributed under the License is distributed on an ""AS IS"" BASIS,
130 |      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
131 |      See the License for the specific language"
132 | 


--------------------------------------------------------------------------------
/fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joeljang/knowledge-unlearning/732876dc19a782c4440c3565cf9821df0a4c7ca0/fig1.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers
 2 | pytorch-lightning
 3 | pandas
 4 | numpy
 5 | nlp==0.4.0
 6 | sentencepiece==0.1.94
 7 | wandb
 8 | nltk
 9 | deepspeed
10 | boto3
11 | rouge
12 | lm-eval
13 | torchmetrics==0.10.3
14 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import argparse
  3 | from argparse import ArgumentParser
  4 | import json
  5 | import pytorch_lightning as pl
  6 | from pytorch_lightning.loggers import WandbLogger
  7 | from models.Neo_Model import Neo
  8 | from models.Neo_Model_valid import NeoValid
  9 | from models.Neo_Model_suffix_tree import NeoST
 10 | from models.Neo_Model_DP import NeoDP
 11 | from utils import MetricTracker
 12 | 
 13 | if __name__ == '__main__':
 14 |     # Parsing Arguments
 15 |     parser = ArgumentParser()
 16 |     parser.add_argument('--config', default=None, type=str)
 17 |     arg_ = parser.parse_args()
 18 |     if arg_.config is None:
 19 |         raise NameError("Include a config file in the argument please.")
 20 | 
 21 |     # Getting configurations
 22 |     config_path = arg_.config
 23 |     with open(config_path) as config_file:
 24 |         config = json.load(config_file)
 25 |     config = argparse.Namespace(**config)
 26 | 
 27 |     # Init configs that are not given
 28 |     if 'seed' not in config:
 29 |         seed = 42
 30 |     if 'privacy_method' not in config:
 31 |         config.privacy_method = None
 32 |     if 'train_sets' not in config:
 33 |         config.train_sets = ""
 34 |     if 'valid_sets' not in config:
 35 |         config.valid_sets = []
 36 |     if 'valid_subset_path' not in config:
 37 |         config.valid_subset_path = None
 38 |     if 'valid_type_path' not in config:
 39 |         config.valid_type_path = None
 40 |     if 'learning_rate' not in config:
 41 |         config.learning_rate = 5e-5
 42 |     if 'negative_loss' not in config:
 43 |         config.negative_loss = True
 44 |     if 'gradient_accumulation_steps' not in config:
 45 |         config.gradient_accumulation_steps = 1
 46 |     if 'num_train_epochs' not in config:
 47 |         config.num_train_epochs = 0
 48 |     if 'num_workers' not in config:
 49 |         config.num_workers = 0
 50 |     if 'wandb_log' not in config:
 51 |         config.wandb_log = False
 52 |     if 'strategy' not in config:
 53 |         config.strategy = None
 54 |     if 'fp16' not in config:
 55 |         config.fp16 = False
 56 |     if 'check_validation_only' not in config:
 57 |         config.check_validation_only = False
 58 |     if 'check_val_every_n_epoch' not in config:
 59 |         config.check_val_every_n_epoch = 1
 60 |     if 'tokenizer' not in config:
 61 |         config.tokenizer_name_or_path = config.model_name_or_path
 62 |     if 'target_length' not in config:
 63 |         config.target_length = None
 64 |     if 'el_n' not in config:
 65 |         config.el_n = [10]
 66 |     if 'el_threshold' not in config:
 67 |         config.el_threshold = 0
 68 |     if 'ma_threshold' not in config:
 69 |         config.ma_threshold = 0
 70 |     if 'min_train_epochs' not in config:
 71 |         config.min_train_epochs = 0
 72 |     if 'do_init_eval' not in config:
 73 |         config.do_init_eval = True if config.mode == 'unlearn' else False
 74 | 
 75 |     pl.seed_everything(seed, workers=True)
 76 | 
 77 |     # Set console logger
 78 |     logger = logging.getLogger()
 79 |     logger.setLevel(logging.INFO)
 80 |     formatter = logging.Formatter(
 81 |         '[%(levelname)s] %(asctime)s (%(filename)s:%(lineno)d) : %(message)s'
 82 |     )
 83 |     handler = logging.StreamHandler()
 84 |     handler.setFormatter(formatter)
 85 |     logger.addHandler(handler)
 86 | 
 87 |     # Set wandb logger
 88 |     if config.wandb_log:
 89 |         wandb_logger = WandbLogger(
 90 |             project=config.wandb_project,
 91 |             name=config.wandb_run_name,
 92 |             entity='lklab_kaist')
 93 |     else:
 94 |         wandb_logger = None
 95 | 
 96 |     callbacks = [MetricTracker(config.wandb_run_name)]
 97 | 
 98 |     # Setting for pytorch lightning trainer
 99 |     train_params = dict(
100 |         accumulate_grad_batches=config.gradient_accumulation_steps,
101 |         accelerator='gpu',
102 |         devices=config.ngpu,
103 |         max_epochs=int(config.num_train_epochs),
104 |         precision=16 if config.fp16 else 32,
105 |         check_val_every_n_epoch=config.check_val_every_n_epoch,
106 |         enable_checkpointing=False,
107 |         callbacks=callbacks,
108 |         logger=wandb_logger,
109 |         strategy=config.strategy,
110 |         num_sanity_val_steps=0,
111 |         limit_val_batches=1,
112 |         log_every_n_steps=1
113 |     )
114 | 
115 |     if config.check_validation_only:
116 |         trainer = pl.Trainer(**train_params)
117 |         if config.privacy_method == 'dp':
118 |             model = NeoDP(config)
119 |         elif config.privacy_method == 'st':
120 |             model = NeoST(config)
121 |         else:
122 |             model = NeoValid(config)
123 |         model = NeoST(config)
124 |         trainer.validate(model)
125 |     else:
126 |         trainer = pl.Trainer(**train_params)
127 |         if config.do_init_eval:
128 |             model = NeoValid(config)
129 |             trainer.validate(model)
130 |         model = Neo(config)
131 |         trainer.fit(model)
132 | 


--------------------------------------------------------------------------------
/run_dp.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import argparse
  3 | from argparse import ArgumentParser
  4 | import json
  5 | import pytorch_lightning as pl
  6 | from pytorch_lightning.loggers import WandbLogger
  7 | from models.Neo_Model import Neo
  8 | from models.Neo_Model_DP import NeoDP
  9 | # from models.Neo_Model_valid import NeoValid
 10 | 
 11 | 
 12 | if __name__ == '__main__':
 13 |     # Parsing Arguments
 14 |     parser = ArgumentParser()
 15 |     parser.add_argument('--config', default=None, type=str)
 16 |     arg_ = parser.parse_args()
 17 |     if arg_.config is None:
 18 |         raise NameError("Include a config file in the argument please.")
 19 | 
 20 |     # Getting configurations
 21 |     config_path = arg_.config
 22 |     with open(config_path) as config_file:
 23 |         config = json.load(config_file)
 24 |     config = argparse.Namespace(**config)
 25 | 
 26 |     # Init configs that are not given
 27 |     if 'seed' not in config:
 28 |         seed = 42
 29 |     if 'train_sets' not in config:
 30 |         config.train_sets = ""
 31 |     if 'valid_sets' not in config:
 32 |         config.valid_sets = []
 33 |     if 'valid_subset_path' not in config:
 34 |         config.valid_subset_path = None
 35 |     if 'valid_type_path' not in config:
 36 |         config.valid_type_path = None
 37 |     if 'learning_rate' not in config:
 38 |         config.learning_rate = 5e-5
 39 |     if 'negative_loss' not in config:
 40 |         config.negative_loss = True
 41 |     if 'gradient_accumulation_steps' not in config:
 42 |         config.gradient_accumulation_steps = 1
 43 |     if 'num_train_epochs' not in config:
 44 |         config.num_train_epochs = 0
 45 |     if 'num_workers' not in config:
 46 |         config.num_workers = 0
 47 |     if 'wandb_log' not in config:
 48 |         config.wandb_log = False
 49 |     if 'strategy' not in config:
 50 |         config.strategy = None
 51 |     if 'fp16' not in config:
 52 |         config.fp16 = False
 53 |     if 'check_validation_only' not in config:
 54 |         config.check_validation_only = False
 55 |     if 'check_val_every_n_epoch' not in config:
 56 |         config.check_val_every_n_epoch = 1
 57 |     if 'tokenizer' not in config:
 58 |         config.tokenizer_name_or_path = config.model_name_or_path
 59 |     if 'target_length' not in config:
 60 |         config.target_length = None
 61 |     if 'el_n' not in config:
 62 |         config.el_n = [10]
 63 |     if 'el_threshold' not in config:
 64 |         config.el_threshold = 0
 65 |     if 'ma_threshold' not in config:
 66 |         config.ma_threshold = 0
 67 |     if 'min_train_epochs' not in config:
 68 |         config.min_train_epochs = 0
 69 |     if 'do_init_eval' not in config:
 70 |         config.do_init_eval = True if config.mode == 'unlearn' else False
 71 | 
 72 |     pl.seed_everything(seed, workers=True)
 73 | 
 74 |     # Set console logger
 75 |     logger = logging.getLogger()
 76 |     logger.setLevel(logging.INFO)
 77 |     formatter = logging.Formatter(
 78 |         '[%(levelname)s] %(asctime)s (%(filename)s:%(lineno)d) : %(message)s'
 79 |     )
 80 |     handler = logging.StreamHandler()
 81 |     handler.setFormatter(formatter)
 82 |     logger.addHandler(handler)
 83 | 
 84 |     # Set wandb logger
 85 |     if config.wandb_log:
 86 |         wandb_logger = WandbLogger(
 87 |             project=config.wandb_project,
 88 |             name=config.wandb_run_name,
 89 |             entity='lklab_kaist')
 90 |     else:
 91 |         wandb_logger = None
 92 | 
 93 |     # Setting for pytorch lightning trainer
 94 |     train_params = dict(
 95 |         accumulate_grad_batches=config.gradient_accumulation_steps,
 96 |         accelerator='gpu',
 97 |         devices=config.ngpu,
 98 |         max_epochs=int(config.num_train_epochs),
 99 |         precision=16 if config.fp16 else 32,
100 |         check_val_every_n_epoch=config.check_val_every_n_epoch,
101 |         enable_checkpointing=False,
102 |         logger=wandb_logger,
103 |         strategy=config.strategy,
104 |         num_sanity_val_steps=0,
105 |         log_every_n_steps=1
106 |     )
107 | 
108 |     if config.check_validation_only:
109 |         trainer = pl.Trainer(**train_params)
110 |         model = NeoDP(config)
111 |         trainer.validate(model)
112 |     else:
113 |         trainer = pl.Trainer(**train_params)
114 |         if config.do_init_eval:
115 |             model = NeoDP(config)
116 |             trainer.validate(model)
117 |         model = Neo(config)
118 |         trainer.fit(model)
119 | 


--------------------------------------------------------------------------------
/run_st.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import argparse
  3 | from argparse import ArgumentParser
  4 | import json
  5 | import pytorch_lightning as pl
  6 | from pytorch_lightning.loggers import WandbLogger
  7 | from models.Neo_Model import Neo
  8 | from models.Neo_Model_suffix_tree import NeoST
  9 | # from models.Neo_Model_valid import NeoValid
 10 | 
 11 | 
 12 | if __name__ == '__main__':
 13 |     # Parsing Arguments
 14 |     parser = ArgumentParser()
 15 |     parser.add_argument('--config', default=None, type=str)
 16 |     arg_ = parser.parse_args()
 17 |     if arg_.config is None:
 18 |         raise NameError("Include a config file in the argument please.")
 19 | 
 20 |     # Getting configurations
 21 |     config_path = arg_.config
 22 |     with open(config_path) as config_file:
 23 |         config = json.load(config_file)
 24 |     config = argparse.Namespace(**config)
 25 | 
 26 |     # Init configs that are not given
 27 |     if 'seed' not in config:
 28 |         seed = 42
 29 |     if 'train_sets' not in config:
 30 |         config.train_sets = ""
 31 |     if 'valid_sets' not in config:
 32 |         config.valid_sets = []
 33 |     if 'valid_subset_path' not in config:
 34 |         config.valid_subset_path = None
 35 |     if 'valid_type_path' not in config:
 36 |         config.valid_type_path = None
 37 |     if 'learning_rate' not in config:
 38 |         config.learning_rate = 5e-5
 39 |     if 'negative_loss' not in config:
 40 |         config.negative_loss = True
 41 |     if 'gradient_accumulation_steps' not in config:
 42 |         config.gradient_accumulation_steps = 1
 43 |     if 'num_train_epochs' not in config:
 44 |         config.num_train_epochs = 0
 45 |     if 'num_workers' not in config:
 46 |         config.num_workers = 0
 47 |     if 'wandb_log' not in config:
 48 |         config.wandb_log = False
 49 |     if 'strategy' not in config:
 50 |         config.strategy = None
 51 |     if 'fp16' not in config:
 52 |         config.fp16 = False
 53 |     if 'check_validation_only' not in config:
 54 |         config.check_validation_only = False
 55 |     if 'check_val_every_n_epoch' not in config:
 56 |         config.check_val_every_n_epoch = 1
 57 |     if 'tokenizer' not in config:
 58 |         config.tokenizer_name_or_path = config.model_name_or_path
 59 |     if 'target_length' not in config:
 60 |         config.target_length = None
 61 |     if 'el_n' not in config:
 62 |         config.el_n = [10]
 63 |     if 'el_threshold' not in config:
 64 |         config.el_threshold = 0
 65 |     if 'ma_threshold' not in config:
 66 |         config.ma_threshold = 0
 67 |     if 'min_train_epochs' not in config:
 68 |         config.min_train_epochs = 0
 69 |     if 'do_init_eval' not in config:
 70 |         config.do_init_eval = True if config.mode == 'unlearn' else False
 71 | 
 72 |     pl.seed_everything(seed, workers=True)
 73 | 
 74 |     # Set console logger
 75 |     logger = logging.getLogger()
 76 |     logger.setLevel(logging.INFO)
 77 |     formatter = logging.Formatter(
 78 |         '[%(levelname)s] %(asctime)s (%(filename)s:%(lineno)d) : %(message)s'
 79 |     )
 80 |     handler = logging.StreamHandler()
 81 |     handler.setFormatter(formatter)
 82 |     logger.addHandler(handler)
 83 | 
 84 |     # Set wandb logger
 85 |     if config.wandb_log:
 86 |         wandb_logger = WandbLogger(
 87 |             project=config.wandb_project,
 88 |             name=config.wandb_run_name,
 89 |             entity='lklab_kaist')
 90 |     else:
 91 |         wandb_logger = None
 92 | 
 93 |     # Setting for pytorch lightning trainer
 94 |     train_params = dict(
 95 |         accumulate_grad_batches=config.gradient_accumulation_steps,
 96 |         accelerator='gpu',
 97 |         devices=config.ngpu,
 98 |         max_epochs=int(config.num_train_epochs),
 99 |         precision=16 if config.fp16 else 32,
100 |         check_val_every_n_epoch=config.check_val_every_n_epoch,
101 |         enable_checkpointing=False,
102 |         logger=wandb_logger,
103 |         strategy=config.strategy,
104 |         num_sanity_val_steps=0,
105 |         log_every_n_steps=1
106 |     )
107 | 
108 |     if config.check_validation_only:
109 |         trainer = pl.Trainer(**train_params)
110 |         model = NeoST(config)
111 |         trainer.validate(model)
112 |     else:
113 |         trainer = pl.Trainer(**train_params)
114 |         if config.do_init_eval:
115 |             model = NeoST(config)
116 |             trainer.validate(model)
117 |         model = Neo(config)
118 |         trainer.fit(model)
119 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict
  2 | from pytorch_lightning import Callback
  3 | import pandas as pd
  4 | 
  5 | class MetricTracker(Callback):
  6 | 
  7 |     def __init__(self, run_name):
  8 |         self.df = None
  9 |         self.run_name = run_name
 10 | 
 11 |     def on_validation_end(self, trainer, module):
 12 |         print(trainer.logged_metrics)
 13 |         elogs = trainer.logged_metrics # access it here
 14 |         elogs = {k: [v.item()] for k, v in elogs.items()}
 15 |         new_df = pd.DataFrame(elogs)
 16 |         # new_df = new_df[self.df.columns]
 17 |         # self.df = pd.concat([self.df, new_df])
 18 |         new_df.to_csv(f'csv_out/{self.run_name}.csv')
 19 |         print('Hello World')
 20 | 
 21 |     # def on_validation_epoch_end(self, trainer, module):
 22 |     #     if isinstance(module, GPT2Valid):
 23 |     #         elogs = trainer.logged_metrics # access it here
 24 |     #         elogs = {k: [v.item()] for k, v in elogs.items()}
 25 |     #         self.df = pd.DataFrame(elogs)
 26 | 
 27 | 
 28 | DIALOG_DATASETS = [
 29 |     'wizard_of_wikipedia',
 30 |     'empathetic_dialogues',
 31 |     'blended_skill_talk',
 32 |     'wizard_of_internet'
 33 | ]
 34 | 
 35 | CLASSIFICATION_DATASETS = [
 36 |     'piqa',
 37 |     'hellaswag',
 38 |     'ai2_arc',
 39 |     'winogrande',
 40 |     'math_qa',
 41 |     'pubmed_qa',
 42 |     'copa'
 43 | ]
 44 | 
 45 | PPL_DATASETS = [
 46 |     'wikitext',
 47 |     'pile'
 48 | ]
 49 | 
 50 | COMPLETION_DATASETS = [
 51 |     'lambada'
 52 | ]
 53 | 
 54 | class Trie(object):
 55 |     def __init__(self, sequences: List[List[int]] = []):
 56 |         self.trie_dict = {}
 57 |         self.len = 0
 58 |         if sequences:
 59 |             for sequence in sequences:
 60 |                 Trie._add_to_trie(sequence, self.trie_dict)
 61 |                 self.len += 1
 62 | 
 63 |         self.append_trie = None
 64 |         self.bos_token_id = None
 65 | 
 66 |     def append(self, trie, bos_token_id):
 67 |         self.append_trie = trie
 68 |         self.bos_token_id = bos_token_id
 69 | 
 70 |     def add(self, sequence: List[int]):
 71 |         Trie._add_to_trie(sequence, self.trie_dict)
 72 |         self.len += 1
 73 | 
 74 |     def get(self, prefix_sequence: List[int]):
 75 |         return Trie._get_from_trie(
 76 |             prefix_sequence, self.trie_dict, self.append_trie, self.bos_token_id
 77 |         )
 78 | 
 79 |     @staticmethod
 80 |     def load_from_dict(trie_dict):
 81 |         trie = Trie()
 82 |         trie.trie_dict = trie_dict
 83 |         trie.len = sum(1 for _ in trie)
 84 |         return trie
 85 | 
 86 |     @staticmethod
 87 |     def _add_to_trie(sequence: List[int], trie_dict: Dict):
 88 |         if sequence:
 89 |             if sequence[0] not in trie_dict:
 90 |                 trie_dict[sequence[0]] = {}
 91 |             Trie._add_to_trie(sequence[1:], trie_dict[sequence[0]])
 92 | 
 93 |     @staticmethod
 94 |     def _get_from_trie(
 95 |         prefix_sequence: List[int],
 96 |         trie_dict: Dict,
 97 |         append_trie=None,
 98 |         bos_token_id: int = None,
 99 |     ):
100 |         if len(prefix_sequence) == 0:
101 |             output = list(trie_dict.keys())
102 |             if append_trie and bos_token_id in output:
103 |                 output.remove(bos_token_id)
104 |                 output += list(append_trie.trie_dict.keys())
105 |             return output
106 |         elif prefix_sequence[0] in trie_dict:
107 |             return Trie._get_from_trie(
108 |                 prefix_sequence[1:],
109 |                 trie_dict[prefix_sequence[0]],
110 |                 append_trie,
111 |                 bos_token_id,
112 |             )
113 |         else:
114 |             if append_trie:
115 |                 return append_trie.get(prefix_sequence)
116 |             else:
117 |                 return []
118 | 
119 |     def __iter__(self):
120 |         def _traverse(prefix_sequence, trie_dict):
121 |             if trie_dict:
122 |                 for next_token in trie_dict:
123 |                     yield from _traverse(
124 |                         prefix_sequence + [next_token], trie_dict[next_token]
125 |                     )
126 |             else:
127 |                 yield prefix_sequence
128 | 
129 |         return _traverse([], self.trie_dict)
130 | 
131 |     def __len__(self):
132 |         return self.len
133 | 
134 |     def __getitem__(self, value):
135 |         return self.get(value)
136 | 
137 | def normalize_reply(text: str, version=2) -> str:
138 |     """
139 |     Standardize the capitalization and punctuation spacing of the input text.
140 |     Version 1: Fix sentence start casing, and punctuation.
141 |     Version 2: Add trailing period, if missing.
142 |     """
143 | 
144 |     switch_list = [(' .', '.'), (' ,', ','), (' ?', '?'), (' !', '!'), (" ' ", "'")]
145 | 
146 |     # add spaces so that words and punctuation can be seaprated
147 |     new_text = text.lower()
148 | 
149 |     # normalize in case of human:
150 |     for new, old in switch_list:
151 |         new_text = new_text.replace(old, new).replace('  ', ' ')
152 | 
153 |     # split on punctuation to find sentence boundaries
154 |     # capitalize stuff
155 |     tokens = new_text.split(' ')
156 |     for i in range(len(tokens)):
157 |         if i == 0:
158 |             tokens[i] = uppercase(tokens[i])
159 |         elif tokens[i] in ('i', "i'm", "i've", "i'll", "i'd"):
160 |             tokens[i] = uppercase(tokens[i])
161 |         elif tokens[i] in '?.!' and i < len(tokens) - 1:
162 |             tokens[i + 1] = uppercase(tokens[i + 1])
163 |     new_text = ' '.join(tokens)
164 |     new_text = ' ' + new_text + ' '
165 | 
166 |     for tup in switch_list:
167 |         new_text = new_text.replace(tup[0], tup[1])
168 | 
169 |     # get rid of surrounding whitespace
170 |     new_text = new_text.strip()
171 |     new_text = new_text.replace('  ', ' ')
172 | 
173 |     if version > 1 and new_text and new_text[-1] not in '!.?)"\'':
174 |         new_text += '.'
175 | 
176 |     return new_text
177 | 
178 | 
179 | def uppercase(string: str) -> str:
180 |     """
181 |     Make the first character of the string uppercase, if the string is non-empty.
182 |     """
183 |     if len(string) == 0:
184 |         return string
185 |     else:
186 |         return string[0].upper() + string[1:]
187 | 
188 | 


--------------------------------------------------------------------------------