├── .gitignore
├── notebooks
    ├── raw_text.txt
    ├── tokenized_data.bin
    └── pre_process_data.ipynb
├── misc
    ├── loss.png
    └── lilLM_architecture.png
├── model
    ├── config.py
    ├── tokenizer
    │   ├── tokenizer_config.json
    │   └── vocab.json
    ├── model_lora.py
    ├── utils.py
    ├── dataset.py
    └── model.py
├── download_model.py
├── requirements.txt
├── inference.py
├── data
    └── pretraining
    │   └── process.py
├── train_custom_tokenizer.py
├── inference_gradio.py
├── pretrain.py
├── sft_train.py
├── sft_lora_train.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | openwebtext_800k.jsonl
2 | 


--------------------------------------------------------------------------------
/notebooks/raw_text.txt:
--------------------------------------------------------------------------------
1 | Hello, world! This is an example of tokenization.


--------------------------------------------------------------------------------
/misc/loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CohleM/lilLM/HEAD/misc/loss.png


--------------------------------------------------------------------------------
/misc/lilLM_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CohleM/lilLM/HEAD/misc/lilLM_architecture.png


--------------------------------------------------------------------------------
/notebooks/tokenized_data.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CohleM/lilLM/HEAD/notebooks/tokenized_data.bin


--------------------------------------------------------------------------------
/model/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass 
 2 | 
 3 | @dataclass
 4 | class Config:
 5 |     vocab_size: int = 2**13
 6 |     d_model: int = 512
 7 |     n_layers: int = 12
 8 |     max_seq_len: int = 512
 9 |     q_heads: int = 16
10 |     kv_heads: int = 8
11 |     dropout: float = 0.0
12 |     max_batch_size: int = 32
13 |     hidden_dim: int = None
14 |     multiple_of: int = 128
15 |     eps: float = 1e-6
16 |     flash: bool = True
17 | 


--------------------------------------------------------------------------------
/download_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | 
 4 | from huggingface_hub import hf_hub_download
 5 | 
 6 | DEFAULT_REPO_ID = "jonwondo/lilLM_40M_param_10B_tok"
 7 | DEFAULT_FILENAME = "lilLM_40M_params_10B_tok.pt"
 8 | 
 9 | 
10 | 
11 | if __name__=='__main__':
12 | 
13 | 
14 |     parser = argparse.ArgumentParser(description='Download model for inference/sft training')
15 |     parser.add_argument("--repo_id", type=str, default=DEFAULT_REPO_ID, help="Huggingface repo id exp: jonwondo/lilLM_40M_param_10B_tok")
16 |     parser.add_argument("--filename", type=str, default=DEFAULT_FILENAME, help="file name inside that repo id exp: lilLM_40M_params_10B_tok.pt")
17 |     args = parser.parse_args()
18 | # Define the repository ID and file name
19 |     current_dir = os.path.dirname(os.path.abspath(__file__))
20 | 
21 |     model_path = hf_hub_download(repo_id=args.repo_id, filename=args.filename, local_dir=current_dir)
22 | 
23 |     print(f"Model downloaded to: {model_path}")
24 |     os.rename(args.filename, 'best_model.pt')
25 | 
26 |     print(f"File renamed from '{args.filename}' to best_model.pt")
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/model/tokenizer/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "add_bos_token": false,
 3 |     "add_eos_token": false,
 4 |     "add_prefix_space": true,
 5 |     "added_tokens_decoder": {
 6 |         "0": {
 7 |             "content": "<unk>",
 8 |             "lstrip": false,
 9 |             "normalized": false,
10 |             "rstrip": false,
11 |             "single_word": false,
12 |             "special": true
13 |         },
14 |         "1": {
15 |             "content": "<s>",
16 |             "lstrip": false,
17 |             "normalized": false,
18 |             "rstrip": false,
19 |             "single_word": false,
20 |             "special": true
21 |         },
22 |         "2": {
23 |             "content": "</s>",
24 |             "lstrip": false,
25 |             "normalized": false,
26 |             "rstrip": false,
27 |             "single_word": false,
28 |             "special": true
29 |         }
30 |     },
31 |     "additional_special_tokens": [],
32 |     "bos_token": "<s>",
33 |     "clean_up_tokenization_spaces": false,
34 |     "eos_token": "</s>",
35 |     "legacy": true,
36 |     "model_max_length": 1000000000000000019884624838656,
37 |     "pad_token": null,
38 |     "sp_model_kwargs": {},
39 |     "spaces_between_special_tokens": false,
40 |     "tokenizer_class": "PreTrainedTokenizerFast",
41 |     "unk_token": "<unk>",
42 |     "use_default_system_prompt": false
43 | }


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==23.2.1
 2 | aiohappyeyeballs==2.4.6
 3 | aiohttp==3.11.12
 4 | aiosignal==1.3.2
 5 | annotated-types==0.7.0
 6 | anyio==4.8.0
 7 | attrs==25.1.0
 8 | certifi==2025.1.31
 9 | charset-normalizer==3.4.1
10 | click==8.1.8
11 | datasets==3.2.0
12 | dill==0.3.8
13 | docker-pycreds==0.4.0
14 | fastapi==0.115.8
15 | ffmpy==0.5.0
16 | filelock==3.17.0
17 | frozenlist==1.5.0
18 | fsspec==2024.9.0
19 | gitdb==4.0.12
20 | GitPython==3.1.44
21 | gradio==5.18.0
22 | gradio_client==1.7.2
23 | h11==0.14.0
24 | httpcore==1.0.7
25 | httpx==0.28.1
26 | huggingface-hub==0.28.1
27 | idna==3.10
28 | Jinja2==3.1.5
29 | markdown-it-py==3.0.0
30 | MarkupSafe==2.1.5
31 | mdurl==0.1.2
32 | mpmath==1.3.0
33 | multidict==6.1.0
34 | multiprocess==0.70.16
35 | networkx==3.4.2
36 | numpy==2.2.2
37 | orjson==3.10.15
38 | packaging==24.2
39 | pandas==2.2.3
40 | pillow==11.1.0
41 | platformdirs==4.3.6
42 | propcache==0.2.1
43 | protobuf==5.29.3
44 | psutil==6.1.1
45 | pyarrow==19.0.0
46 | pydantic==2.10.6
47 | pydantic_core==2.27.2
48 | pydub==0.25.1
49 | Pygments==2.19.1
50 | python-dateutil==2.9.0.post0
51 | python-multipart==0.0.20
52 | pytz==2025.1
53 | PyYAML==6.0.2
54 | regex==2024.11.6
55 | requests==2.32.3
56 | rich==13.9.4
57 | ruff==0.9.7
58 | safehttpx==0.1.6
59 | safetensors==0.5.2
60 | semantic-version==2.10.0
61 | sentry-sdk==2.20.0
62 | setproctitle==1.3.4
63 | setuptools==75.8.0
64 | shellingham==1.5.4
65 | six==1.17.0
66 | smmap==5.0.2
67 | sniffio==1.3.1
68 | starlette==0.45.3
69 | sympy==1.13.1
70 | tokenizers==0.21.0
71 | tomlkit==0.13.2
72 | torch==2.6.0
73 | tqdm==4.67.1
74 | transformers==4.48.3
75 | typer==0.15.1
76 | typing_extensions==4.12.2
77 | tzdata==2025.1
78 | urllib3==2.3.0
79 | uvicorn==0.34.0
80 | wandb==0.19.6
81 | websockets==15.0
82 | wheel==0.45.1
83 | xxhash==3.5.0
84 | yarl==1.18.3
85 | 


--------------------------------------------------------------------------------
/model/model_lora.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import time
 4 | import torch
 5 | from torch import nn
 6 | import torch.nn.functional as F
 7 | import numpy as np
 8 | import math
 9 | 
10 | 
11 | class LoRA(nn.Module):
12 |     def __init__(self,d, k, r):
13 |         super().__init__()
14 |         self.B = nn.Parameter(torch.zeros((d,r)))
15 |         self.A = nn.Parameter((torch.randn((r,k)))) 
16 |         torch.nn.init.normal_(self.A, mean= 0.0, std= 0.02) # weight initialization i.e divide by sqrt(in_features)
17 |         
18 |         
19 |     def forward(self,x):
20 |         return torch.matmul(x,torch.matmul(self.B, self.A))
21 | 
22 | 
23 | def apply_lora(model,r):
24 |     for name, module in model.named_modules():
25 |         
26 |         if isinstance(module, nn.Linear) and module.weight.shape[0] == module.weight.shape[1]:
27 | 
28 |             lora = LoRA(module.weight.shape[0], module.weight.shape[1], 8)
29 |             original_forward = module.forward
30 |             
31 |             def lora_forward(x, layer1=original_forward, layer2=lora):
32 |                 return layer1(x) + layer2(x)
33 |         
34 | #             module.forward = lambda x: original_forward(x) + lora(x)
35 |             module.forward = lora_forward
36 |     
37 |             setattr(module, 'lora', lora)
38 | 
39 | 
40 | def save_lora(model, output_path):
41 |     lora_parameters = {}
42 |     for (k,v) in model.state_dict().items():
43 |         if 'lora' in k:
44 |             lora_parameters[k] = v
45 |             print(k, v)
46 |             
47 |     torch.save(lora_parameters, output_path)
48 | 
49 | def load_lora(model, model_path):
50 |     lora_parameters = torch.load(model_path, map_location=model.device)
51 |     for k,v in model.state_dict().items():
52 |         for lk, lv in lora_parameters.items():
53 |             if k == lk:
54 |                 v = lv
55 | 
56 | 


--------------------------------------------------------------------------------
/model/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # see https://arxiv.org/pdf/2203.15556 Appendix F
 3 | def calculate_transformer_flops(
 4 |     seq_len: int,
 5 |     vocab_size: int,
 6 |     d_model: int,
 7 |     key_size: int,
 8 |     num_heads: int,
 9 |     ffw_size: int,
10 |     num_layers: int,
11 | ) -> dict:
12 |     """
13 |     Calculates flops required for one step with one batch size
14 |     Args:
15 |         seq_len: Sequence length
16 |         vocab_size: Vocabulary size
17 |         d_model: Model dimension
18 |         key_size: Key dimension
19 |         num_heads: Number of attention heads
20 |         ffw_size: Feed-forward layer size
21 |         num_layers: Number of transformer layers
22 |     """
23 | 
24 |     # Embeddings
25 |     embedding_flops = 2 * seq_len * vocab_size * d_model
26 | 
27 |     # Single Attention Layer
28 |     key_query_value_proj = 2 * 3 * seq_len * d_model * (key_size * num_heads)
29 |     key_query_logits = 2 * seq_len * seq_len * (key_size * num_heads)
30 |     softmax_ops = 3 * num_heads * seq_len * seq_len
31 |     softmax_query_reduction = 2 * seq_len * seq_len * (key_size * num_heads)
32 |     final_linear = 2 * seq_len * (key_size * num_heads) * d_model
33 | 
34 |     total_attention_flops = (
35 |         key_query_value_proj
36 |         + key_query_logits
37 |         + softmax_ops
38 |         + softmax_query_reduction
39 |         + final_linear
40 |     )
41 | 
42 |     # Single Dense Block
43 |     dense_block_flops = 2 * seq_len * (d_model * ffw_size + d_model * ffw_size)
44 | 
45 |     # Final Logits
46 |     final_logits_flops = 2 * seq_len * d_model * vocab_size
47 | 
48 |     # Total forward pass
49 |     total_forward_pass = (
50 |         embedding_flops
51 |         + num_layers * (total_attention_flops + dense_block_flops)
52 |         + final_logits_flops
53 |     )
54 | 
55 |     # Backward pass is approximately 2x forward pass
56 |     total_backward_pass = 2 * total_forward_pass
57 | 
58 |     # Total forward + backward
59 |     total_flops = total_forward_pass + total_backward_pass
60 | 
61 |     return total_flops
62 | 


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import torch
 3 | import argparse
 4 | import os
 5 | 
 6 | from transformers import AutoTokenizer
 7 | 
 8 | from model.model import LilLM
 9 | from model.config import Config
10 | 
11 | DEFAULT_TOKENIZER_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'model/tokenizer')
12 | DEFAULT_MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'best_model_790_sft.pt')
13 | DEFAULT_TEXT = 'What is the capital of France?'
14 | DEFAULT_MODEL_TYPE = 'sft'
15 | 
16 | def add_chat_format(text):
17 |     template = f"<r0>user<r1>" + f"{text}</r2><r0>assistant<r1>"
18 |     return template
19 | 
20 | 
21 | if __name__=='__main__':
22 |     torch.serialization.add_safe_globals([Config])
23 |     parser = argparse.ArgumentParser(description='Sample text')
24 |     parser.add_argument("--tokenizer_path", type=str, default=DEFAULT_TOKENIZER_PATH, help="Tokenizer path")
25 |     parser.add_argument("--model_path", type=str, default=DEFAULT_MODEL_PATH, help="Model path")
26 |     parser.add_argument("--text", type=str, default=DEFAULT_TEXT, help="Input to the model")
27 |     parser.add_argument("--model_type", type=str, default=DEFAULT_MODEL_TYPE, help="sft model or pretrained")
28 |     
29 |     
30 | 
31 |     args = parser.parse_args()
32 | 
33 |     #model_path = 'best_model_790_sft.pt' 
34 |     model = LilLM(Config(flash=False))
35 | 
36 |     tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
37 |     device = 'cuda' if torch.cuda.is_available() else 'cpu'
38 |     model.eval()
39 |     model.to(device)
40 |     #checkpoint = torch.load('/Users/cohlem/Projects/Experimentation/lillm/best_model_790_sft.pt', map_location=device)
41 |     checkpoint = torch.load(args.model_path, map_location=device)
42 | 
43 | 
44 |     state_dict = checkpoint['model'] 
45 |     unwanted_prefix = '_orig_mod.'
46 | 
47 |     for k in list(state_dict.keys()):
48 |         if k.startswith(unwanted_prefix):
49 |             state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
50 | 
51 |     model.load_state_dict(checkpoint['model'])
52 | 
53 |     template_text = add_chat_format(args.text) if args.model_type == "sft" else args.text
54 |     
55 |     t0 = time.time()
56 |     start_prompt = torch.tensor(tokenizer.encode(template_text)).unsqueeze(dim=0).to(device)
57 |     eos = torch.tensor([[2]]).to(device)
58 |     print(tokenizer.decode(model.generate(start_prompt, eos).squeeze()))
59 |     t1 = time.time()
60 | 
61 |     print(f'\n Completed in {t1-t0} seconds')
62 | 
63 | 


--------------------------------------------------------------------------------
/data/pretraining/process.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import argparse
 4 | 
 5 | from transformers import AutoTokenizer
 6 | from datasets import load_dataset # huggingface dataset
 7 | from tqdm import tqdm
 8 | 
 9 | if __name__=='__main__':
10 |     #file_path = '/Users/cohlem/Projects/Experimentation/lillm/model/tokenizer/'
11 |     parser = argparse.ArgumentParser(description='Loading the tokenizer')
12 |     parser.add_argument('--tokenizer_path', required=True, type=str, help='Path to tokenizer')
13 |     args = parser.parse_args()
14 | 
15 |     tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
16 | #    tokenizer = AutoTokenizer.from_pretrained('gpt2')
17 | 
18 |     num_proc = 8 
19 | 
20 |     #dataset = load_dataset('text', num_proc=num_proc,data_files = 'input.txt') #for loading custom data
21 |     dataset = load_dataset("Skylion007/openwebtext", num_proc=num_proc) #for loading custom data
22 |     split_dataset = dataset['train'].train_test_split(test_size = 0.0010, shuffle=True, seed=43)
23 |     split_dataset['val'] = split_dataset.pop('test')
24 | 
25 |     def tokenize(item):
26 |         ids = tokenizer.encode(item['text'] + '</s>')
27 |         return {'ids': ids, 'len': len(ids)}
28 | 
29 |     #under the hood, data is broken down into shards/batches, accessed using Memory mapping, and only processing batches in the RAM. 
30 |     #See https://huggingface.co/docs/datasets/v2.1.0/en/about_arrow#:~:text=Memory%2Dmapping,with%20relatively%20small%20device%20memory.
31 |     # essentially not loaded entirely in RAM but via memory mapping loads only what needs to be processed
32 |     tokenized_data = split_dataset.map(tokenize, remove_columns='text', num_proc=num_proc)
33 | 
34 | 
35 |         # Writing .bin file but processing in batches
36 |     for split, dset in tokenized_data.items():
37 |         batch_size = 1024
38 |         idx = 0
39 |         arr_sz = np.sum(dset['len'])
40 | 
41 |         # Memory mapping of file to our array
42 |         filename = os.path.join(os.path.dirname(__file__), f'{split}.bin')
43 |         arr = np.memmap(filename, dtype=np.uint16, mode='w+', shape=(arr_sz,)) 
44 | 
45 |         for b in tqdm(range(batch_size), desc=f'processing {filename}'):
46 |             shard = dset.shard(num_shards=batch_size, index=b, contiguous=True).with_format('numpy')
47 |             shard = np.concatenate(shard['ids'])
48 | 
49 |             arr[idx: idx + len(shard)] = shard # write the shard to virtual memory page cache (in RAM), OS writes to the file whenever it feels necessary
50 |             idx += len(shard)
51 | 
52 |         arr.flush() # force OS to clear the page cache and write to the disk
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/model/dataset.py:
--------------------------------------------------------------------------------
 1 | from datasets import load_dataset
 2 | from transformers import AutoTokenizer
 3 | import torch
 4 | 
 5 | class SFTDataset:
 6 |     def __init__(self,tokenizer_path, max_seq_len, data_path = 'CohleM/lillm-sft-dataset-v1'):
 7 |         self.data = load_dataset(data_path)
 8 |         self.max_seq_len = max_seq_len
 9 |         self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
10 |         self.filtered_data = self.data.filter(self._filter_by_token_len, num_proc = 8)
11 |         self.tokenized_data = self.filtered_data.map(self._tokenize, num_proc = 8)
12 | 
13 |     def _filter_by_token_len(self, example):
14 |         template = self._add_chat_format(example)
15 |         return len(self.tokenizer.encode(template)) < self.max_seq_len
16 |     
17 |     def _add_chat_format(self, example):
18 |         items = example['conversation']
19 |         template = ""
20 |         
21 |         for item in items:
22 | 
23 |             if item['role'] == 'user':
24 |                 template += f"<r0>{item['role']}<r1>" + f"{item['content']}</r2>"
25 |             elif item['role'] =='assistant':
26 |                 template += f"<r0>{item['role']}<r1>" + f"{item['content']}</s>"
27 |         return template
28 | 
29 |     def _generate_loss_mask(self, tokenized_input):
30 |         assistant_token = self.tokenizer.encode('<r0>assistant<r1>')
31 |         end_token = self.tokenizer.encode('</s>')[0]
32 | 
33 |         assist_token_idx = [i+3 for i in range(len(tokenized_input)) if tokenized_input[i:i+3] == assistant_token]
34 |         end_token_idx = [i for i,v in enumerate(tokenized_input) if v == end_token]
35 | 
36 |         loss_mask = [0]*len(tokenized_input)
37 | 
38 |         for i in range(len(assist_token_idx)):
39 |             loss_mask[assist_token_idx[i]: end_token_idx[i] + 1] = [1]* (end_token_idx[i] - assist_token_idx[i] + 1)
40 | 
41 |         return loss_mask
42 | 
43 |         
44 |     def _tokenize(self,example):
45 |         template = self._add_chat_format(example)
46 | 
47 |         x = self.tokenizer.encode(template)
48 | 
49 |         x += (self.max_seq_len - len(x))* [0]
50 |         x = x[:self.max_seq_len]
51 |         
52 |         X = torch.tensor(x[:-1], dtype=torch.long)
53 |         Y = torch.tensor(x[1:], dtype=torch.long)
54 |         
55 |         loss_mask = self._generate_loss_mask(x[1:])
56 |         
57 |         loss_mask = torch.tensor(loss_mask, dtype=torch.long)
58 |         
59 |         return {'X': X, 'Y': Y, 'loss_mask': loss_mask}
60 |     
61 |     
62 |     def get_batch(self, split, batch_size):
63 |         batches = torch.randint(0, self.tokenized_data[split].num_rows, (batch_size,))
64 |         out = self.tokenized_data[split][batches]
65 |         
66 |         return torch.tensor(out['X'], dtype=torch.long), torch.tensor(out['Y'], dtype=torch.long) , torch.tensor(out['loss_mask'], dtype=torch.long)
67 | 
68 |     
69 | 


--------------------------------------------------------------------------------
/train_custom_tokenizer.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from tokenizers import Tokenizer, models, pre_tokenizers, trainers, decoders
  3 | import os 
  4 | import random
  5 | 
  6 | random.seed(42)
  7 | # Define the data loader function separately
  8 | def read_texts_from_jsonl(file_path):
  9 |     """Reads text data from a JSONL file."""
 10 |     with open(file_path, 'r', encoding='utf-8') as f:
 11 |         for line in f:
 12 |             data = json.loads(line)
 13 |             yield data['text']
 14 | 
 15 | 
 16 | 
 17 | def train_tokenizer(file_path):
 18 | 
 19 |     tokenizer = Tokenizer(models.BPE()) 
 20 |     tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=False) # convert character into bytes, and don't add space to the beginning of text
 21 | 
 22 |     # tokens: <unk> - for token it hasn't seen during training,
 23 |     # <s> - start of sentence
 24 |     # </s> - end of sentence
 25 |     special_tokens = ["<unk>", "<s>", "</s>"]
 26 |     special_tokens = special_tokens + [f'<r{i}>' for i in range(13)]  # total 16 special tokens, 13 reserved for later
 27 | 
 28 |     
 29 |     # set configs for BPE trainer, 
 30 |     trainer = trainers.BpeTrainer(
 31 |         vocab_size=8192, # 2^13
 32 |         special_tokens=special_tokens,  
 33 |         show_progress=True,
 34 |         initial_alphabet=pre_tokenizers.ByteLevel.alphabet()
 35 |     )
 36 | 
 37 |     tokenizer.decoder = decoders.ByteLevel()
 38 | 
 39 |     # Read dataset from jsonl 
 40 |     texts = read_texts_from_jsonl(file_path)
 41 |     
 42 |     #train using iterator
 43 |     tokenizer.train_from_iterator(texts, trainer=trainer)
 44 |     
 45 |     tokenizer_dir = "./model/tokenizer"
 46 |     os.makedirs(tokenizer_dir, exist_ok=True)
 47 |     tokenizer.save(os.path.join(tokenizer_dir, "tokenizer.json"))
 48 |     tokenizer.model.save("./model/tokenizer")
 49 |     
 50 |     config = {
 51 |         "add_bos_token": False,
 52 |         "add_eos_token": False,
 53 |         "add_prefix_space": True,
 54 |         "added_tokens_decoder": {
 55 |             "0": {
 56 |                 "content": "<unk>",
 57 |                 "lstrip": False,
 58 |                 "normalized": False,
 59 |                 "rstrip": False,
 60 |                 "single_word": False,
 61 |                 "special": True
 62 |             },
 63 |             "1": {
 64 |                 "content": "<s>",
 65 |                 "lstrip": False,
 66 |                 "normalized": False,
 67 |                 "rstrip": False,
 68 |                 "single_word": False,
 69 |                 "special": True
 70 |             },
 71 |             "2": {
 72 |                 "content": "</s>",
 73 |                 "lstrip": False,
 74 |                 "normalized": False,
 75 |                 "rstrip": False,
 76 |                 "single_word": False,
 77 |                 "special": True
 78 |             }
 79 |         },
 80 |         "additional_special_tokens": [],
 81 |         "bos_token": "<s>",
 82 |         "clean_up_tokenization_spaces": False,
 83 |         "eos_token": "</s>",
 84 |         "legacy": True,
 85 |         "model_max_length": 1000000000000000019884624838656,
 86 |         "pad_token": None,
 87 |         "sp_model_kwargs": {},
 88 |         "spaces_between_special_tokens": False,
 89 |         "tokenizer_class": "PreTrainedTokenizerFast",
 90 |         "unk_token": "<unk>",
 91 |         "use_default_system_prompt": False
 92 | #         "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<s>user\\n' + content + '</s>\\n<s>assistant\\n' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' + '\\n' }}{% endif %}{% endfor %}"
 93 |     }
 94 |     
 95 |     with open(os.path.join(tokenizer_dir, "tokenizer_config.json"), "w", encoding="utf-8") as config_file:
 96 |         json.dump(config, config_file, ensure_ascii=False, indent=4)
 97 | 
 98 |     print("Tokenizer training completed and saved.")
 99 |     
100 | 
101 | 
102 | if __name__ =='__main__':
103 |     file_path = 'openwebtext_800k.jsonl'
104 |     train_tokenizer(file_path)
105 | 


--------------------------------------------------------------------------------
/inference_gradio.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import torch
  3 | import argparse
  4 | import os
  5 | import gradio as gr
  6 | from transformers import AutoTokenizer
  7 | from model.model import LilLM
  8 | from model.config import Config
  9 | 
 10 | DEFAULT_TOKENIZER_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'model/tokenizer')
 11 | DEFAULT_MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'best_model_sft_4epoch_with_hard_coded.pt')
 12 | DEFAULT_MODEL_TYPE = 'sft'
 13 | 
 14 | class ChatInterface:
 15 |     def __init__(self, tokenizer_path, model_path, model_type):
 16 |         self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
 17 |         self.model_type = model_type
 18 |         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 19 |         
 20 |         # Load model
 21 |         self.model = LilLM(Config(flash=False))
 22 |         self.model.eval()
 23 |         self.model.to(self.device)
 24 |         
 25 |         # Load checkpoint
 26 |         checkpoint = torch.load(model_path, map_location=self.device)
 27 |         state_dict = checkpoint['model']
 28 |         
 29 |         # Remove unwanted prefix if present
 30 |         unwanted_prefix = '_orig_mod.'
 31 |         for k in list(state_dict.keys()):
 32 |             if k.startswith(unwanted_prefix):
 33 |                 state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
 34 |         
 35 |         self.model.load_state_dict(checkpoint['model'])
 36 |         self.eos = torch.tensor([[2]]).to(self.device)
 37 |     
 38 |     def add_chat_format(self, conversation):
 39 |         formatted_chat = ""
 40 |         for message in conversation:
 41 |             role = message["role"]
 42 |             content = message["content"]
 43 |             if role == "user":
 44 |                 formatted_chat += f"<r0>user<r1>{content}</r2>"
 45 |             elif role == "assistant":
 46 |                 formatted_chat += f"<r0>assistant<r1>{content}</s>"
 47 |         # Add the final assistant prompt to get model to generate a response
 48 |         formatted_chat += "<r0>assistant<r1>"
 49 |         return formatted_chat
 50 |     
 51 |     def generate_response(self, message, chat_history):
 52 |         # Add the new user message to the chat history
 53 |         if chat_history is None:
 54 |             chat_history = []
 55 |         
 56 |         # Format conversation for model input
 57 |         conversation = []
 58 |         for user_msg, assistant_msg in chat_history:
 59 |             conversation.append({"role": "user", "content": user_msg})
 60 |             conversation.append({"role": "assistant", "content": assistant_msg})
 61 |         
 62 |         # Add the new message
 63 |         conversation.append({"role": "user", "content": message})
 64 |         
 65 |         # Format the conversation for the model
 66 |         template_text = self.add_chat_format(conversation) if self.model_type == "sft" else message
 67 |         
 68 |         # Generate response
 69 |         t0 = time.time()
 70 |         start_prompt = torch.tensor(self.tokenizer.encode(template_text)).unsqueeze(dim=0).to(self.device)
 71 |         generated_output = self.model.generate(start_prompt, self.eos).squeeze()
 72 |         response = self.tokenizer.decode(generated_output)
 73 |         # Extract just the assistant's response from the full output
 74 |         print('conversation', conversation)
 75 |         if self.model_type == "sft":
 76 |             # The response should be after the last assistant tag
 77 |             last_assistant_tag_pos = response.rfind("<r0> assistant<r1>") + len("<r0> assistant<r1>")
 78 |             response = response[last_assistant_tag_pos:].replace("</s>", "").strip()
 79 |             print('ggg response', response) 
 80 |         t1 = time.time()
 81 |         print(f'Generation completed in {t1-t0} seconds')
 82 |         
 83 |         # Update chat history
 84 |         chat_history.append((message, response))
 85 |         return "", chat_history
 86 | 
 87 | def create_chat_interface(tokenizer_path, model_path, model_type):
 88 |     chat_interface = ChatInterface(tokenizer_path, model_path, model_type)
 89 |     
 90 |     with gr.Blocks(css="footer {visibility: hidden}") as demo:
 91 |         gr.Markdown("# LilLM Chat Interface")
 92 |         
 93 |         chatbot = gr.Chatbot(height=600)
 94 |         msg = gr.Textbox(placeholder="Type your message here...", show_label=False)
 95 |         clear = gr.Button("Clear")
 96 |         
 97 |         msg.submit(
 98 |             chat_interface.generate_response, 
 99 |             [msg, chatbot], 
100 |             [msg, chatbot]
101 |         )
102 |         clear.click(lambda: None, None, chatbot, queue=False)
103 |         
104 |     return demo
105 | 
106 | if __name__ == '__main__':
107 | 
108 |     torch.serialization.add_safe_globals([Config])
109 |     parser = argparse.ArgumentParser(description='LilLM Chat Interface')
110 |     parser.add_argument("--tokenizer_path", type=str, default=DEFAULT_TOKENIZER_PATH, help="Tokenizer path")
111 |     parser.add_argument("--model_path", type=str, default=DEFAULT_MODEL_PATH, help="Model path")
112 |     parser.add_argument("--model_type", type=str, default=DEFAULT_MODEL_TYPE, help="sft model or pretrained")
113 |     args = parser.parse_args()
114 |     
115 |     # Update the ChatInterface class with command line arguments
116 | 
117 |     # Launch the interface
118 |     demo = create_chat_interface(args.tokenizer_path, args.model_path,args.model_type )
119 |     demo.launch(share=False)
120 | 


--------------------------------------------------------------------------------
/notebooks/pre_process_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "id": "02c4e02c",
  7 |    "metadata": {},
  8 |    "outputs": [
  9 |     {
 10 |      "name": "stderr",
 11 |      "output_type": "stream",
 12 |      "text": [
 13 |       "/Users/cohlem/anaconda3/envs/deep_learning/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
 14 |       "  from .autonotebook import tqdm as notebook_tqdm\n"
 15 |      ]
 16 |     },
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Raw text size: 49 bytes\n",
 22 |       "Tokenized size: 24 bytes\n"
 23 |      ]
 24 |     }
 25 |    ],
 26 |    "source": [
 27 |     "from transformers import GPT2Tokenizer\n",
 28 |     "import numpy as np\n",
 29 |     "\n",
 30 |     "# Load tokenizer\n",
 31 |     "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
 32 |     "\n",
 33 |     "# Example text\n",
 34 |     "text = \"Hello, world! This is an example of tokenization.\"\n",
 35 |     "\n",
 36 |     "# Tokenize the text\n",
 37 |     "tokens = tokenizer.encode(text)\n",
 38 |     "\n",
 39 |     "# Save raw text to a file\n",
 40 |     "with open(\"raw_text.txt\", \"w\", encoding=\"utf-8\") as f:\n",
 41 |     "    f.write(text)\n",
 42 |     "\n",
 43 |     "# Save tokenized data to a binary file\n",
 44 |     "tokens_np = np.array(tokens, dtype=np.uint16)\n",
 45 |     "tokens_np.tofile(\"tokenized_data.bin\")\n",
 46 |     "\n",
 47 |     "# Compare file sizes\n",
 48 |     "import os\n",
 49 |     "raw_text_size = os.path.getsize(\"raw_text.txt\")\n",
 50 |     "tokenized_size = os.path.getsize(\"tokenized_data.bin\")\n",
 51 |     "\n",
 52 |     "print(f\"Raw text size: {raw_text_size} bytes\")\n",
 53 |     "print(f\"Tokenized size: {tokenized_size} bytes\")"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 5,
 59 |    "id": "da6decfe",
 60 |    "metadata": {},
 61 |    "outputs": [
 62 |     {
 63 |      "data": {
 64 |       "text/plain": [
 65 |        "49"
 66 |       ]
 67 |      },
 68 |      "execution_count": 5,
 69 |      "metadata": {},
 70 |      "output_type": "execute_result"
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "len(list(text.encode('utf-8')))"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 9,
 80 |    "id": "e6aa8db6",
 81 |    "metadata": {},
 82 |    "outputs": [
 83 |     {
 84 |      "data": {
 85 |       "text/plain": [
 86 |        "24"
 87 |       ]
 88 |      },
 89 |      "execution_count": 9,
 90 |      "metadata": {},
 91 |      "output_type": "execute_result"
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "len(tokens_np)*2"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 10,
101 |    "id": "b347f08a",
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "data": {
106 |       "text/plain": [
107 |        "2.0416666666666665"
108 |       ]
109 |      },
110 |      "execution_count": 10,
111 |      "metadata": {},
112 |      "output_type": "execute_result"
113 |     }
114 |    ],
115 |    "source": [
116 |     "49/24"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 1,
122 |    "id": "901f396b",
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "from torch.utils.data import Dataset, DataLoader"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "id": "9a10d716",
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": []
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "id": "c730062f",
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "class PretrainDataset(Dataset):\n",
145 |     "    def __init__(self, df, tokenizer, max_length=512):\n",
146 |     "        super().__init__()\n",
147 |     "        self.df = df\n",
148 |     "        self.tokenizer = tokenizer\n",
149 |     "        self.max_length = max_length\n",
150 |     "        self.padding = 0\n",
151 |     "\n",
152 |     "    def __len__(self):\n",
153 |     "        return self.df.shape[0]\n",
154 |     "\n",
155 |     "    def __getitem__(self, index: int):\n",
156 |     "        #\n",
157 |     "        sample = self.df.iloc[index]\n",
158 |     "        text = f\"{self.tokenizer.bos_token}{str(sample['text'])}{self.tokenizer.eos_token}\"\n",
159 |     "        input_id = self.tokenizer(text).data['input_ids'][:self.max_length]\n",
160 |     "        text_len = len(input_id)\n",
161 |     "        # 没满最大长度的剩余部分\n",
162 |     "        padding_len = self.max_length - text_len\n",
163 |     "        input_id = input_id + [self.padding] * padding_len\n",
164 |     "        # 0表示不计算损失\n",
165 |     "        loss_mask = [1] * text_len + [0] * padding_len\n",
166 |     "\n",
167 |     "        input_id = np.array(input_id)\n",
168 |     "        X = np.array(input_id[:-1]).astype(np.int64)\n",
169 |     "        Y = np.array(input_id[1:]).astype(np.int64)\n",
170 |     "        loss_mask = np.array(loss_mask[1:]).astype(np.int64)\n",
171 |     "        return torch.from_numpy(X), torch.from_numpy(Y), torch.from_numpy(loss_mask)"
172 |    ]
173 |   }
174 |  ],
175 |  "metadata": {
176 |   "kernelspec": {
177 |    "display_name": "deep_learning",
178 |    "language": "python",
179 |    "name": "deep_learning"
180 |   },
181 |   "language_info": {
182 |    "codemirror_mode": {
183 |     "name": "ipython",
184 |     "version": 3
185 |    },
186 |    "file_extension": ".py",
187 |    "mimetype": "text/x-python",
188 |    "name": "python",
189 |    "nbconvert_exporter": "python",
190 |    "pygments_lexer": "ipython3",
191 |    "version": "3.12.7"
192 |   }
193 |  },
194 |  "nbformat": 4,
195 |  "nbformat_minor": 5
196 | }
197 | 


--------------------------------------------------------------------------------
/model/model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import time
  4 | import torch
  5 | from torch import nn
  6 | import torch.nn.functional as F
  7 | import numpy as np
  8 | import math
  9 | 
 10 | 
 11 | def precompute_cis(dim: int, end: int, theta: float = 10000.0):
 12 |     freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
 13 |     t = torch.arange(end, device=freqs.device)
 14 |     freqs = torch.outer(t, freqs).float()
 15 |     pos_cis = torch.polar(torch.ones_like(freqs), freqs)  # complex64
 16 |     return pos_cis
 17 | 
 18 | def apply_rotary_pe(xq, xk, cis):
 19 | 
 20 |     # at this point cis will have shape (T,head_dim//2) and we transform it to (1,T,1,head_dim//2)
 21 |     #cause we need to broadcast it so we can multiply it with (B,T,n_heads, head_dim//2)
 22 |     # head_dim is div by 2 cause, it is complex form (ex. 1+2j)
 23 |     def reshape_for_broadcast(pos_cis, x):
 24 |         ndim = x.ndim
 25 |         assert 0 <= 1 < ndim
 26 |         assert pos_cis.shape == (x.shape[1], x.shape[-1]) # x.shape = (B,T,n_heads,head_dim), T and C should batch
 27 |         shape = [d if i == 1 or i == ndim - 1 else 1 for i, d in enumerate(x.shape)]
 28 |         return pos_cis.view(*shape)
 29 | 
 30 |     # (B,T,n_heads,head_dim) --> (B,T,n_heads, head_dim//2, 2), cause view_as_complex expects last dim to be 2. [1,2] --> [1+2j]
 31 | 
 32 |     xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1],-1,2))
 33 |     xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1],-1,2))
 34 | 
 35 | #     print(cis.shape, xq_.shape, xk_.shape)
 36 | 
 37 |     cis = reshape_for_broadcast(cis,xq_)
 38 | 
 39 |     # convert to real i.e (B,T,n_heads, head_dim//2,2) --> (B,T,n_heads, head_dim)
 40 |     xq_out,xk_out = torch.view_as_real(xq_*cis).flatten(3), torch.view_as_real(xk_*cis).flatten(3)
 41 | 
 42 |     return xq_out.type_as(xq), xk_out.type_as(xk)
 43 | 
 44 | def repeat_kv(x, n_rep):
 45 |     B,T,kv_heads, head_dim = x.shape
 46 |     if n_rep==1:
 47 |         return x
 48 |     else:
 49 |         return (
 50 |                 x[:,:,:,None,:]
 51 |                 .expand(size=(B,T,kv_heads,n_rep, head_dim))
 52 |                 .reshape(B,T,kv_heads * n_rep, head_dim)
 53 |                )
 54 | 
 55 | 
 56 | class Attention(nn.Module):
 57 |     def __init__(self,config):
 58 |         super().__init__()
 59 |         self.cfg = config
 60 |         self.head_dim = config.d_model // config.q_heads
 61 | 
 62 |         self.wq = nn.Linear(config.d_model, config.q_heads*self.head_dim, bias=False)
 63 |         self.wk = nn.Linear(config.d_model, config.kv_heads*self.head_dim, bias=False)
 64 |         self.wv = nn.Linear(config.d_model, config.kv_heads*self.head_dim, bias=False)
 65 |         self.wo = nn.Linear(config.q_heads * self.head_dim, config.d_model, bias=False)
 66 | 
 67 | 
 68 |         self.cache_k = None
 69 |         self.cache_v = None
 70 | 
 71 |         self.attn_dropout = nn.Dropout(config.dropout)
 72 |         self.resid_dropout = nn.Dropout(config.dropout)
 73 | 
 74 |         self.flash = config.flash
 75 |         self.dropout = config.dropout
 76 | 
 77 |         mask = torch.full((1,1,config.max_seq_len, config.max_seq_len), float('-inf')) # fill all elements by -inf
 78 |         mask = torch.triu(mask, diagonal=1) # only keep the values of the upper triangular matrix, others to 0
 79 |         #self.register_buffer('mask', mask, persistent=False) # register buffer, i.e not trainable parameter, also don't save in state dict
 80 |         if not self.flash:
 81 |           print('Using slow attention')
 82 |           self.register_buffer('mask', mask, persistent=False) # register buffer, i.e not trainable parameter, also don't save in state dict
 83 | 
 84 | 
 85 | 
 86 |     def forward(self,x,start_pos, freq_cis):
 87 | 
 88 |         B,T,C = x.shape # (batch_size, seq_len, emb_dim)
 89 | 
 90 |         # project input(self) into Q,K,V (this is why it's called self-attention)
 91 |         xq,xk,xv = self.wq(x), self.wk(x), self.wv(x)
 92 | 
 93 |         # divide into their respective heads, (B,T,C) --> (B,T,q_heads or kv_heads,head_dim)
 94 |         xq = xq.view(B,T,self.cfg.q_heads, self.head_dim)
 95 |         xk = xk.view(B,T,self.cfg.kv_heads, self.head_dim)
 96 |         xv = xv.view(B,T,self.cfg.kv_heads, self.head_dim)
 97 | 
 98 | 
 99 |         xq,xk = apply_rotary_pe(xq,xk, freq_cis)
100 | 
101 |         # use kv cache
102 |         if not self.training:
103 |             if self.cache_k is None and self.cache_v is None:
104 |                 self.cache_k = torch.zeros(
105 |                     self.cfg.max_batch_size,
106 |                     self.cfg.max_seq_len,
107 |                     self.cfg.kv_heads,
108 |                     self.head_dim,
109 |                     device=x.device
110 |                 )
111 |                 self.cache_v = torch.zeros(
112 |                     self.cfg.max_batch_size,
113 |                     self.cfg.max_seq_len,
114 |                     self.cfg.kv_heads,
115 |                     self.head_dim,
116 |                     device=x.device
117 |                 )
118 | 
119 |             #update the cache, i.e append the new sequence to where we left of (start_pos) in the last iteration.
120 |             #we add :B and not only : because B != max_batch_size
121 | 
122 |             self.cache_k[:B, start_pos: start_pos + T] = xk # T is most probably 1, cause during inference we only process on last tokens
123 |             self.cache_v[:B, start_pos: start_pos + T] = xv
124 | 
125 |             xk = self.cache_k[:B, :start_pos + T]
126 |             xv = self.cache_v[:B, :start_pos + T]
127 | 
128 |         # repeat key and value matrices, for group query attention
129 |         xk = repeat_kv(xk, self.cfg.q_heads // self.cfg.kv_heads)
130 |         xv = repeat_kv(xv, self.cfg.q_heads // self.cfg.kv_heads)
131 | 
132 | 
133 |         xq,xk,xv = xq.transpose(1,2), xk.transpose(1,2), xv.transpose(1,2) #transpose T,n_head cause we perform matrix mul on last two dims
134 | 
135 |     
136 |         if self.flash and T !=1:
137 |             out = F.scaled_dot_product_attention(xq, xk, xv,attn_mask=None, dropout_p=self.dropout, is_causal=True)
138 |         else:
139 | 
140 |             attn_score = xq @ xk.transpose(-1,-2)/ math.sqrt(self.head_dim)
141 | 
142 |             attn_score = attn_score + self.mask[:,:,:T,:T] # cause its inputs don't always have max_seq_len
143 |             attn_score = torch.softmax(attn_score, dim=-1) #normalize and make -inf to 0
144 |             attn_score = self.attn_dropout(attn_score)
145 |             out = attn_score @ xv
146 | 
147 |         out = out.transpose(1,2).contiguous().view(B,T,-1)
148 |         out = self.resid_dropout(self.wo(out))
149 | 
150 |         return out
151 | 
152 | 
153 | 
154 | class FFN(nn.Module):
155 |     def __init__(self,d_model, hidden_dim, multiple_of, dropout):
156 |         super().__init__()
157 |         self.d_model = d_model
158 |         if hidden_dim is None:
159 |             hidden_dim = 4*d_model
160 |             hidden_dim = int(2/3 * hidden_dim)
161 |             hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of) # make hidden dim multiple of {multiple_of}
162 | 
163 |         self.w1 = nn.Linear(d_model, hidden_dim, bias=False) # W
164 |         self.w2 = nn.Linear(hidden_dim, d_model, bias=False) # W2
165 |         self.w3 = nn.Linear(d_model, hidden_dim, bias=False) # V
166 | 
167 |         self.dropout = nn.Dropout(dropout)
168 | 
169 |     def forward(self,x):
170 |         # Swish(x) = x * sigmoid(ßx)
171 |         # GLU(x) = sigmoid(xW+b)⊗(xV+c)
172 |         # SwiGLU(x) = Swish(Wx + b) ⊗ (Vx+c)
173 |         # Omit bias -> SwiGLU(x) = Swish(xW) ⊗ (xV)
174 |         # FFFNswiglu = SwiGLU(x)W2, with b parameter=1
175 |         # since there is additional parameter V when compared to what's used in transformers,
176 |         # we reduce the output dimension of W,V by 2/3 and input dimension of W2 by 2/3
177 | 
178 |         return self.dropout(self.w2(F.silu(self.w1(x))*self.w3(x)))
179 | 
180 | class RMSNorm(nn.Module):
181 |     def __init__(self,d_model,norm_eps=1e-6):
182 |         super().__init__()
183 |         self.gain = nn.Parameter(torch.ones(d_model))
184 |         self.eps = norm_eps
185 |     def _norm(self,x):
186 |         return x * torch.rsqrt(x.pow(2).mean(dim=-1, keepdim=True) + self.eps)
187 | 
188 |     def forward(self,x):
189 |         # convert to higher precision float32 for rms for accuracy, then back to their original type
190 |         out = self._norm(x.float()).type_as(x)
191 |         return self.gain * out
192 | 
193 | 
194 | 
195 | class TransformerBlock(nn.Module):
196 |     def __init__(self,layer_id, config):
197 |         super().__init__()
198 |         self.attn = Attention(config)
199 |         self.ffn = FFN(
200 |             config.d_model,
201 |             config.hidden_dim,
202 |             config.multiple_of,
203 |             config.dropout
204 |         )
205 |         self.layer_id = layer_id
206 |         self.attn_norm = RMSNorm(config.d_model, config.eps)
207 |         self.ffn_norm = RMSNorm(config.d_model, config.eps)
208 | 
209 |     def forward(self, x, start_pos, freq_cis):
210 |         x = x + self.attn(self.attn_norm(x),start_pos, freq_cis)
211 |         x = x + self.ffn(self.ffn_norm(x))
212 |         return x
213 | 
214 | 
215 | 
216 | 
217 | 
218 | class LilLM(nn.Module):
219 |     def __init__(self,config):
220 |         super().__init__()
221 |         self.cfg = config
222 |         self.tok_emb = nn.Embedding(self.cfg.vocab_size, self.cfg.d_model) # embedding layer
223 |         freq_cis = precompute_cis(config.d_model//config.q_heads, config.max_seq_len)
224 |         self.register_buffer('freq_cis', freq_cis, persistent = False)
225 |         self.transformer_blocks = nn.ModuleList([TransformerBlock(layer_id, config) for layer_id in range(config.n_layers)])
226 |         self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False) # project 512 dim -> vocab_size for classification
227 |         self.norm = RMSNorm(config.d_model, config.eps)
228 |         self.tok_emb.weight = self.lm_head.weight # use the same weight for token's index -> embedding, and  embedding -> token's index
229 |         self.apply(self._init_weights)
230 | 
231 |         # weight initialization for residual blocks, 2 comes from the fact that each layer contains
232 |         # 1. attention and 2. feedforward
233 |         for pn, p in self.named_parameters():
234 |             if pn.endswith('w2.weight') or pn.endswith('wo.weight'):
235 |                 # 0.02 comes from 1/sqrt(input_features) which balances variance introduced by input features
236 |                 # and 1/math.sqrt(2*n_layers) accounts for balancing the variance added by the layers
237 |                 torch.nn.init.normal_(p, mean=0.0, std=0.02 / math.sqrt(2 * config.n_layers))
238 | 
239 | 
240 |     def _init_weights(self, module):
241 |         if isinstance(module, nn.Linear):
242 | #             torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
243 |             torch.nn.init.normal_(module.weight, mean=0.0, std= module.weight.shape[-1]**-0.5) # end dimension = input features
244 |             if module.bias is not None:
245 |                 torch.nn.init.zeros_(module.bias)
246 | 
247 |         if isinstance(module, nn.Embedding):
248 | 
249 |             torch.nn.init.normal_(module.weight, mean=0.0, std= module.weight.shape[-1]**-0.5)
250 | 
251 | 
252 | 
253 |     def forward(self,x, start_pos=0, targets=None):
254 |         B,T = x.shape
255 |         x = self.tok_emb(x) # (B,T,C)
256 | 
257 |         for block in self.transformer_blocks:
258 |             x = block(x,start_pos, self.freq_cis[start_pos: start_pos + T])
259 | 
260 |         x = self.norm(x)
261 | 
262 | 
263 |         if targets is not None:
264 |             logits = self.lm_head(x)
265 |             loss = F.cross_entropy(logits.view(-1, logits.shape[-1]), targets.view(-1), ignore_index=-1)
266 | 
267 |         else:
268 |             # only transform the last sequence, use of [] preserves the original shape
269 |             # alternatively, we can use logits = self.lm_head(x[:,-1:, :])
270 |             logits = self.lm_head(x[:,[-1], :])
271 |             loss = None
272 | 
273 | 
274 |         return logits,loss
275 | 
276 | 
277 |     @torch.no_grad()
278 |     def generate(self, x, eos, temperature=1.0, k=None):
279 |         # x is two dim, batch_size, seq_len
280 |         start_pos = 0
281 |         init_inference = True
282 |         for block in self.transformer_blocks:
283 |             block.attn.cache_k = None
284 |             block.attn.cache_v = None
285 | 
286 |         while x.shape[1] < self.cfg.max_seq_len:
287 | 
288 |             if init_inference:  # pass the first tokens
289 |                 logits, _ = self(x, start_pos=0, targets=None)
290 |                 init_inference = False
291 |                 start_pos = x.shape[-1]
292 |             else:  # Afterwards pass one token at a time
293 |                 logits, _ = self(x[:, -1:], start_pos=start_pos, targets=None)
294 |                 start_pos += 1
295 | 
296 |             logits = logits[:, -1, :]
297 | 
298 |             if k is not None:
299 |                 logits = logits / temperature
300 |                 v, _ = torch.topk(logits, k=min(k, logits.size(-1)))
301 |                 logits[logits < v[:, [-1]]] = -float('Inf')
302 | 
303 |             probs = F.softmax(logits, dim=-1)
304 |             pred_token = torch.multinomial(probs, num_samples=1)
305 | 
306 |             if pred_token == eos:
307 |                 break
308 | 
309 |             x = torch.cat((x, pred_token), dim=1)
310 | 
311 |         return x
312 | 


--------------------------------------------------------------------------------
/pretrain.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import os
  3 | import time
  4 | import math
  5 | import argparse
  6 | 
  7 | import wandb
  8 | import torch
  9 | import numpy as np
 10 | from torch import nn
 11 | import torch.nn.functional as F
 12 | from torch.nn.parallel import DistributedDataParallel as DDP
 13 | from torch.distributed import init_process_group, destroy_process_group
 14 | from contextlib import nullcontext
 15 | 
 16 | from model.config import Config
 17 | from model.model import LilLM
 18 | from model.utils import calculate_transformer_flops
 19 | 
 20 | DEFAULT_DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data/pretraining")
 21 | DEFAULT_OUT_DIR = ""
 22 | DEFAULT_BATCH_SIZE = 128
 23 | DEFAULT_BLOCK_SIZE = 512
 24 | DEFAULT_MAX_ITERS = 20000
 25 | DEFAULT_GRAD_CLIP = 1.0
 26 | DEFAULT_EVAL_INTERVAL = 200  # do eval every 200 interval
 27 | DEFAULT_LOG_INTERVAL = 10
 28 | DEFAULT_EVAL_ITERS = 20  # for accumulate eval losses for 200 iters
 29 | DEFAULT_BEST_VAL_LOSS = 1e9
 30 | # learning rate decay settings
 31 | DEFAULT_DECAY_LR = True  # whether to decay the learning rate
 32 | DEFAULT_WARMUP_ITERS = 2000  # how many steps to warm up for
 33 | DEFAULT_LR_DECAY_ITERS = 600000  # should be ~= max_iters per Chinchilla
 34 | DEFAULT_MIN_LR = 6e-5  # minimum learning rate, should be ~= learning_rate/10 per Chinchilla
 35 | DEFAULT_LEARNING_RATE = 6e-4  # max learning rate
 36 | DEFAULT_RUNNING_MFU = -1
 37 | DEFAULT_DEVICE = (
 38 |     "cuda" if torch.cuda.is_available() else "cpu"
 39 | )  # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1' etc., or try 'mps' on macbooks
 40 | DEFAULT_DTYPE = (
 41 |     "bfloat16"
 42 |     if torch.cuda.is_available() and torch.cuda.is_bf16_supported()
 43 |     else "float16"
 44 | )  # 'float32', 'bfloat16', or 'float16', the latter will auto implement a GradScaler
 45 | # we want to do gradient update per 0.5M tokens, but our GPU can't fit that size i.e lets say our we have block_size=1024, it would take
 46 | # 488 batch_size to do it in the single run, but our gpu can't fit it, so we divide further, i.e we accumulate gradient on smaller batch, once we
 47 | # have accumulated gradients for 0.5M tokens, we do the update, otherwise just accumulate the gradients. keeping batch_size=16, block_size=1024, we need
 48 | # divide into 0.5*1e6/(16*1024) steps, which we name gradient_accumulation_steps
 49 | DEFAULT_GRADIENT_ACCUMULATION_STEPS = 8
 50 | #init_from = "scratch"
 51 | DEFAULT_INIT_FROM = "scratch"
 52 | # wandb logging
 53 | DEFAULT_WANDB_PROJECT = "LilLM"
 54 | DEFAULT_WANDB_RUN_NAME = "GPU_RUN_NEW"
 55 | 
 56 | DEFAULT_COMPILE = True
 57 | 
 58 | 
 59 | # learning rate decay scheduler (cosine with warmup)
 60 | def get_lr(it, learning_rate, min_lr, warmup_iters, lr_decay_iters ):
 61 |     # 1) linear warmup for warmup_iters steps
 62 |     if it < warmup_iters:
 63 |         return learning_rate * (it + 1) / (warmup_iters + 1)
 64 |     # 2) if it > lr_decay_iters, return min learning rate
 65 |     if it > lr_decay_iters:
 66 |         return min_lr
 67 |     # 3) in between, use cosine decay down to min learning rate
 68 |     decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)
 69 |     assert 0 <= decay_ratio <= 1
 70 |     coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio))  # coeff ranges 0..1
 71 |     return min_lr + coeff * (learning_rate - min_lr)
 72 | 
 73 | 
 74 | @torch.no_grad()
 75 | def estimate_losses(model, data_path, batch_size, block_size, device, eval_iters, device_type, ctx):
 76 |     out = {}
 77 |     model.eval()
 78 |     for split in ["train", "val"]:
 79 |         each_loss = 0
 80 |         for i in range(eval_iters):
 81 |             x, y = data_loader(data_path, split,batch_size, block_size, device, device_type)
 82 |             with ctx:
 83 |                 logits, loss = model(x, targets=y)
 84 |             each_loss += loss.item()
 85 |         out[split] = each_loss / eval_iters
 86 |     model.train()
 87 |     return out
 88 | 
 89 | 
 90 | # Data Loader
 91 | def data_loader(data_path, split, batch_size, block_size, device, device_type):
 92 |     filename = os.path.join(data_path, f"{split}.bin")
 93 |     data = np.memmap(
 94 |         filename, mode="r", dtype=np.uint16
 95 |     )  # please make sure to load the bin file with correct dtype, this costed be some $$
 96 |     ids = torch.randint((len(data) - block_size), (batch_size,))
 97 | 
 98 |     X = torch.stack(
 99 |         [torch.from_numpy(data[i : i + block_size].astype(np.int64)) for i in ids]
100 |     )
101 |     Y = torch.stack(
102 |         [
103 |             torch.from_numpy(data[i + 1 : i + 1 + block_size].astype(np.int64))
104 |             for i in ids
105 |         ]
106 |     )
107 | 
108 |     if device_type == "cuda":
109 |         # pin to (page-locked) memory in the host (CPU) memory, so the write to GPU is faster, also enable async data transfer(non_blocking=True)
110 |         return X.pin_memory().to(device, non_blocking=True), Y.pin_memory().to(
111 |             device, non_blocking=True
112 |         )
113 |     else:
114 |         return X.to(device), Y.to(device)
115 | 
116 | 
117 | def set_distributed():
118 |     ddp = int(os.environ.get("RANK", -1)) != -1
119 |     if ddp:
120 |         init_process_group(backend="nccl")
121 |         ddp_rank = int(os.environ["RANK"])
122 |         ddp_local_rank = int(os.environ["LOCAL_RANK"])
123 |         ddp_world_size = int(os.environ["WORLD_SIZE"])
124 |         torch.cuda.set_device(f"cuda:{ddp_local_rank}")
125 |         return ddp, ddp_rank, ddp_local_rank, ddp_world_size
126 |     return 0, 0, 0, 1
127 | 
128 | 
129 | def main(args):
130 |     ddp, ddp_rank, ddp_local_rank,ddp_world_size = set_distributed()
131 |     #device = f"cuda:{ddp_local_rank}" if ddp else 'cpu'
132 |     if ddp:
133 |         device = f"cuda:{ddp_local_rank}"
134 |     else:
135 |         device = args.device
136 | 
137 |     master_process = ddp_rank == 0
138 |     torch.manual_seed(1337 + ddp_rank)  # set different seed for differnt gpus
139 |     assert args.gradient_accumulation_steps % ddp_world_size == 0
140 |     args.gradient_accumulation_steps //= ddp_world_size
141 | 
142 |     torch.backends.cuda.matmul.allow_tf32 = True
143 |     torch.backends.cudnn.allow_tf32 = True  # allow tf32 on cudnn (eg. convolutions nn.Conv)
144 |     device_type = "cuda" if "cuda" in device else "cpu"
145 |     # note: float16 data type will automatically use a GradScaler
146 |     ptdtype = {
147 |         "float32": torch.float32,
148 |         "bfloat16": torch.bfloat16,
149 |         "float16": torch.float16,
150 |     }[args.dtype]
151 |     ctx = (
152 |         nullcontext()
153 |         if device_type == "cpu"
154 |         else torch.amp.autocast(device_type=device_type, dtype=ptdtype)
155 |     )
156 | 
157 |     print("tokens per iteration", args.gradient_accumulation_steps * ddp_world_size * args.batch_size * args.block_size)
158 |     model_config = Config(max_seq_len=args.block_size, max_batch_size=args.batch_size)
159 |     # Flops estimation
160 |     flops_per_model = calculate_transformer_flops(
161 |         seq_len=model_config.max_seq_len,
162 |         vocab_size=model_config.vocab_size,
163 |         d_model=model_config.d_model,
164 |         key_size=model_config.d_model/model_config.q_heads,
165 |         num_heads=model_config.q_heads,
166 |         ffw_size=model_config.hidden_dim if model_config.hidden_dim is not None else 4*model_config.d_model,
167 |         num_layers=model_config.n_layers
168 |     )
169 |     flops_per_step = flops_per_model * model_config.max_batch_size * args.gradient_accumulation_steps * ddp_world_size
170 |     running_mfu = -1.0
171 |     # Train from scratch or from a checkpoint
172 |     if args.init_from == "scratch":
173 |         model = LilLM(model_config)
174 |         num_iter = 0
175 |         best_val_loss = DEFAULT_BEST_VAL_LOSS
176 |     elif args.init_from == "resume":  # resume from a checkpoint
177 |         torch.serialization.add_safe_globals([Config])
178 |         checkpoint = torch.load(os.path.join(args.out_dir, 'best_model.pt'), map_location=device)
179 |         model_config = checkpoint['config'] 
180 |         model = LilLM(model_config)
181 |         # saved model keys contain some prefix, we need to rename them to our original name
182 |         state_dict = checkpoint['model'] 
183 |         unwanted_prefix = '_orig_mod.'
184 |         for k in list(state_dict.keys()):
185 |             if k.startswith(unwanted_prefix):
186 |                 state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
187 |         
188 |         model.load_state_dict(state_dict) 
189 |         num_iter = checkpoint['num_iter']
190 |         best_val_loss = checkpoint['best_val_loss']
191 | 
192 |     if args.init_from == "resume" and master_process:
193 |         wandb.init(
194 |             project=args.wandb_project,
195 |             name=args.wandb_run_name,
196 |             id=checkpoint.get('wandb_run_id'),  # You'll need to save this in checkpoint
197 |             resume="must"
198 |         )
199 |     elif wandb and master_process:
200 |         wandb.init(project=args.wandb_project, name=args.wandb_run_name, config=model_config)
201 | 
202 |     model.to(device)
203 |     # scaler is required if we use fp16, cause gradients and loss need to be scaled because of lower range i.e 5-bit exponent (gradients, may explode or vanish)
204 |     # not needed for bf16, cause it has 8-bit exponent
205 |     scaler = torch.cuda.amp.GradScaler(enabled=(args.dtype == "float16"))
206 |     optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate)
207 | 
208 | 
209 |     if DEFAULT_COMPILE and ddp:
210 |         print("torch compiling the model..")
211 |         unoptimized_model = model
212 |         model = torch.compile(model)
213 |     if ddp:
214 |         model = DDP(model, device_ids=[ddp_local_rank])
215 | 
216 |     raw_model = model.module if ddp else model
217 | 
218 | 
219 |     while True:
220 |         # pick learning rate
221 |         lr = get_lr(num_iter, args.learning_rate, args.min_lr, args.warmup_iters, args.lr_decay_iters) if DEFAULT_DECAY_LR else args.learning_rate
222 | 
223 |         for param_group in optimizer.param_groups:
224 |             param_group["lr"] = lr
225 | 
226 |         # evaluation and log losses to wandb
227 |         if num_iter % args.eval_interval == 0 and master_process:
228 |             losses = estimate_losses(model, args.data_path, args.batch_size, args.block_size, device, args.eval_iters, device_type, ctx)
229 |             print(
230 |                 f'Steps {num_iter} train loss:{losses["train"]} val loss: {losses["val"]} '
231 |             )
232 |             if wandb:
233 |                 wandb.log(
234 |                     {
235 |                         "iter": num_iter,
236 |                         "train_loss": losses["train"],
237 |                         "val_loss": losses["val"],
238 |                         "lr": lr,
239 |                     }
240 |                 )
241 | 
242 |             # Save checkpoint with best loss
243 |             if losses["val"] < best_val_loss:
244 |                 best_val_loss = losses["val"]
245 | 
246 |                 checkpoint = {
247 |                     "model": raw_model.state_dict(),
248 |                     "optimizer": optimizer.state_dict(),
249 |                     "best_val_loss": best_val_loss,
250 |                     "num_iter": num_iter,
251 |                     "config": model_config,
252 |                     "wandb_run_id": wandb.run.id
253 |                 }
254 |                 torch.save(checkpoint, os.path.join(args.out_dir, "best_model.pt"))
255 | 
256 |         t1 = time.time()
257 |         for micro_step in range(args.gradient_accumulation_steps):
258 |             x, y = data_loader(args.data_path, "train",args.batch_size, args.block_size,device, device_type)
259 |             # x,y = torch.randint(0,10,(10,256)).to(device), torch.randint(0,10,(10,256)).to(device)
260 | 
261 |             if ddp:
262 |                 # in DDP training we only need to sync gradients at the last micro step.
263 |                 # the official way to do this is with model.no_sync() context manager, but
264 |                 # I really dislike that this bloats the code and forces us to repeat code
265 |                 # looking at the source of that context manager, it just toggles this variable
266 |                 model.require_backward_grad_sync = (
267 |                     micro_step == args.gradient_accumulation_steps - 1
268 |                 )
269 |             with ctx:
270 |                 logits, loss = model(x, targets=y)
271 |                 loss = loss / args.gradient_accumulation_steps
272 |             # gradient sync happens here
273 |             # why scale??
274 |             # fp16 can only store 5-bit exponents, to preserve large numbers from getting zerod
275 |             # the scaler multiplies our numbers with scaler eg 1024
276 |             # exp: np.float16(0.0000000123423543) will result in 0, cause there are not enough
277 |             # exponent to store this number, so when we do this np.float16(0.0000000123423543*1024)
278 |             # result is np.float16(1.264e-05), its a scaled value.
279 |             scaler.scale(loss).backward()
280 | 
281 |         if args.grad_clip != 0.0:
282 |             # unscale the gradients, cause we need higher precision in AdamW optimzers, and we don't use mixed-precision
283 |             scaler.unscale_(optimizer)
284 |             # clip the gradients to prevent vanishing gradient problem
285 |             torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
286 | 
287 |         scaler.step(optimizer)
288 |         # adjusts the loss scaling factor dynamically, eg, if prev step caused overflow
289 |         # decrease scaling factor, else increase scaling factor
290 |         scaler.update()
291 |         optimizer.zero_grad(set_to_none=True)
292 |         num_iter += 1
293 |         t2 = time.time()
294 |         dt = t2 - t1
295 |         if num_iter % args.log_interval == 0 and master_process:
296 |             if num_iter >=5:
297 |                 flops_promised = 312e12 # flops that we can do in A100 for bfloat16
298 |                 mfu = flops_per_step / (flops_promised * (dt))
299 |                 running_mfu =  mfu if running_mfu == -1.0 else 0.9*running_mfu + 0.1*mfu
300 |                 print(
301 |                     f"iteration: {num_iter} loss: {(loss.item() * args.gradient_accumulation_steps):.4f} time_taken: {(dt):.2f}, mfu : {mfu*100:.2f}%"
302 |                 )
303 |         if num_iter > args.max_iters:
304 |             break
305 | 
306 |     if ddp:
307 |         destroy_process_group()
308 | 
309 | if __name__ == '__main__':
310 |     parser = argparse.ArgumentParser(description='Train LilLM')
311 |     parser.add_argument("--batch_size", type=int, default=DEFAULT_BATCH_SIZE, help="Batch size for training.")
312 |     parser.add_argument("--block_size", type=int, default=DEFAULT_BLOCK_SIZE, help="Block size for training.")
313 |     parser.add_argument("--learning_rate", type=float, default=DEFAULT_LEARNING_RATE, help="Maximum learning rate.")
314 |     parser.add_argument("--min_lr", type=float, default=DEFAULT_MIN_LR, help="Minimum learning rate.")
315 |     parser.add_argument("--max_iters", type=int, default=DEFAULT_MAX_ITERS, help="Maximum number of iterations.")
316 |     parser.add_argument("--grad_clip", type=float, default=DEFAULT_GRAD_CLIP, help="Gradient clipping value.")
317 |     parser.add_argument("--eval_interval", type=int, default=DEFAULT_EVAL_INTERVAL, help="Evaluation interval.")
318 |     parser.add_argument("--log_interval", type=int, default=DEFAULT_LOG_INTERVAL, help="Logging interval.")
319 |     parser.add_argument("--eval_iters", type=int, default=DEFAULT_EVAL_ITERS, help="Number of iterations for evaluation.")
320 |     parser.add_argument("--warmup_iters", type=int, default=DEFAULT_WARMUP_ITERS, help="Number of warmup iterations.")
321 |     parser.add_argument("--lr_decay_iters", type=int, default=DEFAULT_LR_DECAY_ITERS, help="Number of iterations for learning rate decay.")
322 |     parser.add_argument("--gradient_accumulation_steps", type=int, default=DEFAULT_GRADIENT_ACCUMULATION_STEPS, help="Gradient accumulation steps.")
323 |     parser.add_argument("--device", type=str, default=DEFAULT_DEVICE, help="Device to use for training (e.g., 'cuda' or 'cpu').")
324 |     parser.add_argument("--dtype", type=str, default=DEFAULT_DTYPE, help="Data type for training (e.g., 'float16', 'bfloat16').")
325 |     parser.add_argument("--wandb_project", type=str, default=DEFAULT_WANDB_PROJECT, help="Wandb project name.")
326 |     parser.add_argument("--wandb_run_name", type=str, default=DEFAULT_WANDB_RUN_NAME, help="Wandb run name.")
327 |     parser.add_argument("--out_dir", type=str, default=DEFAULT_OUT_DIR, help="Directory to save checkpoints.")
328 |     parser.add_argument("--data_path", type=str, default=DEFAULT_DATA_PATH, help="Path to the training data.")
329 |     parser.add_argument("--init_from", type=str, default=DEFAULT_INIT_FROM, help="resume or scratch")
330 |     args = parser.parse_args()
331 |     main(args)
332 | 
333 | 


--------------------------------------------------------------------------------
/sft_train.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import os
  3 | import time
  4 | import math
  5 | import argparse
  6 | 
  7 | import wandb
  8 | import torch
  9 | import numpy as np
 10 | from torch import nn
 11 | import torch.nn.functional as F
 12 | from torch.nn.parallel import DistributedDataParallel as DDP
 13 | from torch.distributed import init_process_group, destroy_process_group
 14 | from contextlib import nullcontext
 15 | 
 16 | from model.config import Config
 17 | from model.model import LilLM
 18 | from model.utils import calculate_transformer_flops
 19 | from model.dataset import SFTDataset
 20 | 
 21 | # Only these variables need changes
 22 | DEFAULT_DATA_PATH = "CohleM/lillm-sft-dataset-512-including-hard-coded-mixture"
 23 | DEFAULT_TOKENIZER_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "model/tokenizer")
 24 | DEFAULT_MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "best_model.pt")
 25 | 
 26 | 
 27 | DEFAULT_OUT_DIR = ""
 28 | #DEFAULT_BATCH_SIZE = 128
 29 | DEFAULT_BATCH_SIZE = 64 
 30 | DEFAULT_BLOCK_SIZE = 512
 31 | DEFAULT_MAX_ITERS = 2500 
 32 | DEFAULT_GRAD_CLIP = 1.0
 33 | DEFAULT_EVAL_INTERVAL = 20  # do eval every 200 interval
 34 | DEFAULT_LOG_INTERVAL = 5 
 35 | DEFAULT_EVAL_ITERS = 5  # for accumulate eval losses for 200 iters
 36 | DEFAULT_BEST_VAL_LOSS = 1e9
 37 | # learning rate decay settings
 38 | DEFAULT_DECAY_LR = True  # whether to decay the learning rate
 39 | DEFAULT_WARMUP_ITERS = 2000  # how many steps to warm up for
 40 | DEFAULT_LR_DECAY_ITERS = 600000  # should be ~= max_iters per Chinchilla
 41 | DEFAULT_MIN_LR = 6e-5  # minimum learning rate, should be ~= learning_rate/10 per Chinchilla
 42 | DEFAULT_LEARNING_RATE = 3e-4  # max learning rate
 43 | DEFAULT_RUNNING_MFU = -1
 44 | DEFAULT_DROPOUT_RATE = 0.2
 45 | # Check for available devices
 46 | DEFAULT_DEVICE = (
 47 |     "cuda" if torch.cuda.is_available() else  # Check for CUDA (NVIDIA GPU)
 48 |     "mps" if torch.backends.mps.is_available() else  # Check for MPS (Apple Silicon GPU)
 49 |     "cpu"  # Default to CPU if neither CUDA nor MPS is available
 50 | )
 51 | print('default device', DEFAULT_DEVICE)
 52 | # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1' etc., or try 'mps' on macbooks
 53 | DEFAULT_DTYPE = (
 54 |     "bfloat16"
 55 |     if torch.cuda.is_available() and torch.cuda.is_bf16_supported()
 56 |     else "float16"
 57 | )  # 'float32', 'bfloat16', or 'float16', the latter will auto implement a GradScaler
 58 | # we want to do gradient update per 0.5M tokens, but our GPU can't fit that size i.e lets say our we have block_size=1024, it would take
 59 | # 488 batch_size to do it in the single run, but our gpu can't fit it, so we divide further, i.e we accumulate gradient on smaller batch, once we
 60 | # have accumulated gradients for 0.5M tokens, we do the update, otherwise just accumulate the gradients. keeping batch_size=16, block_size=1024, we need
 61 | # divide into 0.5*1e6/(16*1024) steps, which we name gradient_accumulation_steps
 62 | DEFAULT_GRADIENT_ACCUMULATION_STEPS = 8 
 63 | #init_from = "scratch"
 64 | DEFAULT_INIT_FROM = "scratch"
 65 | # wandb logging
 66 | DEFAULT_WANDB_PROJECT = "LilLM"
 67 | DEFAULT_WANDB_RUN_NAME = "GPU_RUN"
 68 | 
 69 | DEFAULT_COMPILE = True
 70 | 
 71 | 
 72 | # learning rate decay scheduler (cosine with warmup)
 73 | def get_lr(it, learning_rate, min_lr, warmup_iters, lr_decay_iters ):
 74 |     # 1) linear warmup for warmup_iters steps
 75 |     if it < warmup_iters:
 76 |         return learning_rate * (it + 1) / (warmup_iters + 1)
 77 |     # 2) if it > lr_decay_iters, return min learning rate
 78 |     if it > lr_decay_iters:
 79 |         return min_lr
 80 |     # 3) in between, use cosine decay down to min learning rate
 81 |     decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)
 82 |     assert 0 <= decay_ratio <= 1
 83 |     coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio))  # coeff ranges 0..1
 84 |     return min_lr + coeff * (learning_rate - min_lr)
 85 | 
 86 | 
 87 | @torch.no_grad()
 88 | def estimate_losses(model, sft_dataset, batch_size, block_size, device, eval_iters, device_type, ctx):
 89 |     out = {}
 90 |     model.eval()
 91 |     for split in ["train", "val"]:
 92 |         each_loss = 0
 93 |         for i in range(eval_iters):
 94 | 
 95 |             x,y, loss_mask = sft_dataset.get_batch(split, batch_size)
 96 |             x = x.to(device)
 97 |             y = y.to(device)
 98 |             loss_mask = loss_mask.to(device)
 99 |             #x, y = data_loader(data_path, split,batch_size, block_size, device, device_type)
100 |             with ctx:
101 |                 logits, _ = model(x, targets=y)
102 |                 loss = F.cross_entropy(logits.view(-1, logits.shape[-1]), y.view(-1), reduction='none').view(y.size())
103 |                 loss = (loss*loss_mask).sum() / loss_mask.sum()
104 | 
105 |             each_loss += loss.item()
106 |         out[split] = each_loss / eval_iters
107 |     model.train()
108 |     return out
109 | 
110 | 
111 | def set_distributed():
112 |     ddp = int(os.environ.get("RANK", -1)) != -1
113 |     if ddp:
114 |         init_process_group(backend="nccl")
115 |         ddp_rank = int(os.environ["RANK"])
116 |         ddp_local_rank = int(os.environ["LOCAL_RANK"])
117 |         ddp_world_size = int(os.environ["WORLD_SIZE"])
118 |         torch.cuda.set_device(f"cuda:{ddp_local_rank}")
119 |         return ddp, ddp_rank, ddp_local_rank, ddp_world_size
120 |     return 0, 0, 0, 1
121 | 
122 | 
123 | def main(args):
124 |     ddp, ddp_rank, ddp_local_rank,ddp_world_size = set_distributed()
125 |     #device = f"cuda:{ddp_local_rank}" if ddp else 'cpu'
126 |     if ddp:
127 |         device = f"cuda:{ddp_local_rank}"
128 |     else:
129 |         device = args.device
130 | 
131 |     master_process = ddp_rank == 0
132 |     torch.manual_seed(1337 + ddp_rank)  # set different seed for differnt gpus
133 |     assert args.gradient_accumulation_steps % ddp_world_size == 0
134 |     args.gradient_accumulation_steps //= ddp_world_size
135 | 
136 |     torch.backends.cuda.matmul.allow_tf32 = True
137 |     torch.backends.cudnn.allow_tf32 = True  # allow tf32 on cudnn (eg. convolutions nn.Conv)
138 |     device_type = "cuda" if "cuda" in device else "cpu"
139 |     # note: float16 data type will automatically use a GradScaler
140 |     ptdtype = {
141 |         "float32": torch.float32,
142 |         "bfloat16": torch.bfloat16,
143 |         "float16": torch.float16,
144 |     }[args.dtype]
145 |     ctx = (
146 |         nullcontext()
147 |         if device_type == "cpu"
148 |         else torch.amp.autocast(device_type=device_type, dtype=ptdtype)
149 |     )
150 | 
151 |     print("tokens per iteration", args.gradient_accumulation_steps * ddp_world_size * args.batch_size * args.block_size)
152 |     model_config = Config(max_seq_len=args.block_size, max_batch_size=args.batch_size, dropout=args.dropout_rate)
153 |     # Flops estimation
154 |     flops_per_model = calculate_transformer_flops(
155 |         seq_len=model_config.max_seq_len,
156 |         vocab_size=model_config.vocab_size,
157 |         d_model=model_config.d_model,
158 |         key_size=model_config.d_model/model_config.q_heads,
159 |         num_heads=model_config.q_heads,
160 |         ffw_size=model_config.hidden_dim if model_config.hidden_dim is not None else 4*model_config.d_model,
161 |         num_layers=model_config.n_layers
162 |     )
163 |     flops_per_step = flops_per_model * model_config.max_batch_size * args.gradient_accumulation_steps * ddp_world_size
164 |     running_mfu = -1.0
165 |     # Train from scratch or from a checkpoint
166 |     if args.init_from == "scratch":
167 |         model = LilLM(model_config)
168 |         num_iter = 0
169 |         best_val_loss = DEFAULT_BEST_VAL_LOSS
170 |     elif args.init_from == "resume":  # resume from a checkpoint
171 |         torch.serialization.add_safe_globals([Config])
172 |         #checkpoint = torch.load(os.path.join(args.out_dir, 'best_model_15K.pt'), map_location=device)
173 |         checkpoint = torch.load(args.model_path, map_location=device)
174 |         model_config = checkpoint['config'] 
175 |         model = LilLM(model_config)
176 |         # saved model keys contain some prefix, we need to rename them to our original name
177 |         state_dict = checkpoint['model'] 
178 |         unwanted_prefix = '_orig_mod.'
179 |         for k in list(state_dict.keys()):
180 |             if k.startswith(unwanted_prefix):
181 |                 state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
182 |         
183 |         model.load_state_dict(state_dict) 
184 |         num_iter = 0 
185 |         best_val_loss = checkpoint['best_val_loss']
186 | 
187 |     if wandb and master_process:
188 |         wandb.init(project=args.wandb_project, name=args.wandb_run_name, config=model_config)
189 | 
190 |     model.to(device)
191 |     # scaler is required if we use fp16, cause gradients and loss need to be scaled because of lower range i.e 5-bit exponent (gradients, may explode or vanish)
192 |     # not needed for bf16, cause it has 8-bit exponent
193 |     scaler = torch.cuda.amp.GradScaler(enabled=(args.dtype == "float16"))
194 |     optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate)
195 |     
196 |     # pass data_path with your own huggingface dataset's url
197 |     sft_dataset = SFTDataset(tokenizer_path=args.tokenizer_path,max_seq_len=model_config.max_seq_len, data_path=args.data_path)
198 | 
199 |     if DEFAULT_COMPILE and ddp:
200 |         print("torch compiling the model..")
201 |         unoptimized_model = model
202 |         model = torch.compile(model)
203 |     if ddp:
204 |         model = DDP(model, device_ids=[ddp_local_rank])
205 | 
206 |     raw_model = model.module if ddp else model
207 | 
208 |     
209 |     while True:
210 |         # pick learning rate
211 |         #lr = get_lr(num_iter, args.learning_rate, args.min_lr, args.warmup_iters, args.lr_decay_iters) if DEFAULT_DECAY_LR else args.learning_rate
212 |         lr = args.learning_rate
213 | 
214 |         for param_group in optimizer.param_groups:
215 |             param_group["lr"] = lr
216 | 
217 |         # evaluation and log losses to wandb
218 |         if num_iter % args.eval_interval == 0 and master_process:
219 |             losses = estimate_losses(model,sft_dataset, args.batch_size, args.block_size, device, args.eval_iters, device_type, ctx)
220 |             print(
221 |                 f'Steps {num_iter} train loss:{losses["train"]} val loss: {losses["val"]} '
222 |             )
223 |             if wandb:
224 |                 wandb.log(
225 |                     {
226 |                         "iter": num_iter,
227 |                         "train_loss": losses["train"],
228 |                         "val_loss": losses["val"],
229 |                         "lr": lr,
230 |                     }
231 |                 )
232 | 
233 |             # Save checkpoint with best loss
234 |             if losses["val"] < best_val_loss:
235 |                 best_val_loss = losses["val"]
236 | 
237 |                 checkpoint = {
238 |                     "model": raw_model.state_dict(),
239 |                     "optimizer": optimizer.state_dict(),
240 |                     "best_val_loss": best_val_loss,
241 |                     "num_iter": num_iter,
242 |                     "config": model_config,
243 |                 }
244 |                 torch.save(checkpoint, os.path.join(args.out_dir, "best_model_sft.pt"))
245 |         
246 |         x,y, loss_mask = sft_dataset.get_batch('train', args.batch_size)
247 |         x = x.to(device)
248 |         y = y.to(device)
249 |         loss_mask = loss_mask.to(device)
250 | 
251 |         t1 = time.time()
252 |         for micro_step in range(args.gradient_accumulation_steps):
253 |             #x, y = data_loader(args.data_path, "train",args.batch_size, args.block_size,device, device_type)
254 |             # x,y = torch.randint(0,10,(10,256)).to(device), torch.randint(0,10,(10,256)).to(device)
255 | 
256 |             if ddp:
257 |                 # in DDP training we only need to sync gradients at the last micro step.
258 |                 # the official way to do this is with model.no_sync() context manager, but
259 |                 # I really dislike that this bloats the code and forces us to repeat code
260 |                 # looking at the source of that context manager, it just toggles this variable
261 |                 model.require_backward_grad_sync = (
262 |                     micro_step == args.gradient_accumulation_steps - 1
263 |                 )
264 |             with ctx:
265 |                 logits, _ = model(x, targets=y)
266 |                 loss = F.cross_entropy(logits.view(-1, logits.shape[-1]), y.view(-1), reduction='none').view(y.size())
267 |                 loss = (loss*loss_mask).sum() / loss_mask.sum() 
268 |                 loss = loss / args.gradient_accumulation_steps
269 |  
270 |             x,y, loss_mask = sft_dataset.get_batch('train', args.batch_size)
271 | 
272 |             x = x.to(device)
273 |             y = y.to(device)
274 |             loss_mask = loss_mask.to(device)
275 | 
276 | 
277 |             # gradient sync happens here
278 |             # why scale??
279 |             # fp16 can only store 5-bit exponents, to preserve large numbers from getting zerod
280 |             # the scaler multiplies our numbers with scaler eg 1024
281 |             # exp: np.float16(0.0000000123423543) will result in 0, cause there are not enough
282 |             # exponent to store this number, so when we do this np.float16(0.0000000123423543*1024)
283 |             # result is np.float16(1.264e-05), its a scaled value.
284 |             scaler.scale(loss).backward()
285 | 
286 |         if args.grad_clip != 0.0:
287 |             # unscale the gradients, cause we need higher precision in AdamW optimzers, and we don't use mixed-precision
288 |             scaler.unscale_(optimizer)
289 |             # clip the gradients to prevent vanishing gradient problem
290 |             torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
291 | 
292 |         scaler.step(optimizer)
293 |         # adjusts the loss scaling factor dynamically, eg, if prev step caused overflow
294 |         # decrease scaling factor, else increase scaling factor
295 |         scaler.update()
296 |         optimizer.zero_grad(set_to_none=True)
297 |         num_iter += 1
298 |         t2 = time.time()
299 |         dt = t2 - t1
300 |         if num_iter % args.log_interval == 0 and master_process:
301 |             if num_iter >=5:
302 |                 flops_promised = 312e12 # flops that we can do in A100 for bfloat16
303 |                 mfu = flops_per_step / (flops_promised * (dt))
304 |                 running_mfu =  mfu if running_mfu == -1.0 else 0.9*running_mfu + 0.1*mfu
305 |                 print(
306 |                     f"iteration: {num_iter} loss: {(loss.item() * args.gradient_accumulation_steps):.4f} time_taken: {(dt):.2f}, mfu : {mfu*100:.2f}%"
307 |                 )
308 |         if num_iter > args.max_iters:
309 |             break
310 | 
311 |     if ddp:
312 |         destroy_process_group()
313 | 
314 | if __name__ == '__main__':
315 |     parser = argparse.ArgumentParser(description='Fine Tune lilLM using SFT')
316 |     parser.add_argument("--batch_size", type=int, default=DEFAULT_BATCH_SIZE, help="Batch size for training.")
317 |     parser.add_argument("--block_size", type=int, default=DEFAULT_BLOCK_SIZE, help="Block size for training.")
318 |     parser.add_argument("--dropout_rate", type=int, default=DEFAULT_DROPOUT_RATE, help="Dropout rate mostly used in sft")
319 |     parser.add_argument("--learning_rate", type=float, default=DEFAULT_LEARNING_RATE, help="Maximum learning rate.")
320 |     parser.add_argument("--min_lr", type=float, default=DEFAULT_MIN_LR, help="Minimum learning rate.")
321 |     parser.add_argument("--max_iters", type=int, default=DEFAULT_MAX_ITERS, help="Maximum number of iterations.")
322 |     parser.add_argument("--grad_clip", type=float, default=DEFAULT_GRAD_CLIP, help="Gradient clipping value.")
323 |     parser.add_argument("--eval_interval", type=int, default=DEFAULT_EVAL_INTERVAL, help="Evaluation interval.")
324 |     parser.add_argument("--log_interval", type=int, default=DEFAULT_LOG_INTERVAL, help="Logging interval.")
325 |     parser.add_argument("--eval_iters", type=int, default=DEFAULT_EVAL_ITERS, help="Number of iterations for evaluation.")
326 |     parser.add_argument("--warmup_iters", type=int, default=DEFAULT_WARMUP_ITERS, help="Number of warmup iterations.")
327 |     parser.add_argument("--lr_decay_iters", type=int, default=DEFAULT_LR_DECAY_ITERS, help="Number of iterations for learning rate decay.")
328 |     parser.add_argument("--gradient_accumulation_steps", type=int, default=DEFAULT_GRADIENT_ACCUMULATION_STEPS, help="Gradient accumulation steps.")
329 |     parser.add_argument("--device", type=str, default=DEFAULT_DEVICE, help="Device to use for training (e.g., 'cuda' or 'cpu').")
330 |     parser.add_argument("--dtype", type=str, default=DEFAULT_DTYPE, help="Data type for training (e.g., 'float16', 'bfloat16').")
331 |     parser.add_argument("--wandb_project", type=str, default=DEFAULT_WANDB_PROJECT, help="Wandb project name.")
332 |     parser.add_argument("--wandb_run_name", type=str, default=DEFAULT_WANDB_RUN_NAME, help="Wandb run name.")
333 |     parser.add_argument("--out_dir", type=str, default=DEFAULT_OUT_DIR, help="Directory to save checkpoints.")
334 |     parser.add_argument("--data_path", type=str, default=DEFAULT_DATA_PATH, help="Huggingface's dataset url example: CohleM/lillm-sft-dataset-v1")
335 |     parser.add_argument("--init_from", type=str, default=DEFAULT_INIT_FROM, help="resume or scratch")
336 |     parser.add_argument("--tokenizer_path", type=str, default=DEFAULT_TOKENIZER_PATH, help="tokenizer path")
337 |     parser.add_argument("--model_path", type=str, default=DEFAULT_MODEL_PATH, help="path to the model you want to start training from")
338 |     args = parser.parse_args()
339 |     main(args)
340 | 
341 | 


--------------------------------------------------------------------------------
/sft_lora_train.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import os
  3 | import time
  4 | import math
  5 | import argparse
  6 | 
  7 | import wandb
  8 | import torch
  9 | import numpy as np
 10 | from torch import nn
 11 | import torch.nn.functional as F
 12 | from torch.nn.parallel import DistributedDataParallel as DDP
 13 | from torch.distributed import init_process_group, destroy_process_group
 14 | from contextlib import nullcontext
 15 | 
 16 | from model.config import Config
 17 | from model.model import LilLM
 18 | from model.utils import calculate_transformer_flops
 19 | from model.dataset import SFTDataset
 20 | from model.model_lora import *
 21 | # Only these variables need changes
 22 | DEFAULT_DATA_PATH = "CohleM/lillm-sft-dataset-512-including-hard-coded-mixture"
 23 | DEFAULT_TOKENIZER_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "model/tokenizer")
 24 | DEFAULT_MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "best_model.pt")
 25 | 
 26 | DEFAULT_LOW_RANK = 4
 27 | DEFAULT_OUT_DIR = ""
 28 | #DEFAULT_BATCH_SIZE = 128
 29 | DEFAULT_BATCH_SIZE = 64 
 30 | DEFAULT_BLOCK_SIZE = 512
 31 | DEFAULT_MAX_ITERS = 2500 
 32 | DEFAULT_GRAD_CLIP = 1.0
 33 | DEFAULT_EVAL_INTERVAL = 20  # do eval every 200 interval
 34 | DEFAULT_LOG_INTERVAL = 5 
 35 | DEFAULT_EVAL_ITERS = 5  # for accumulate eval losses for 200 iters
 36 | DEFAULT_BEST_VAL_LOSS = 1e9
 37 | # learning rate decay settings
 38 | DEFAULT_DECAY_LR = True  # whether to decay the learning rate
 39 | DEFAULT_WARMUP_ITERS = 2000  # how many steps to warm up for
 40 | DEFAULT_LR_DECAY_ITERS = 600000  # should be ~= max_iters per Chinchilla
 41 | DEFAULT_MIN_LR = 6e-5  # minimum learning rate, should be ~= learning_rate/10 per Chinchilla
 42 | DEFAULT_LEARNING_RATE = 3e-4  # max learning rate
 43 | DEFAULT_RUNNING_MFU = -1
 44 | DEFAULT_DROPOUT_RATE = 0.2
 45 | # Check for available devices
 46 | DEFAULT_DEVICE = (
 47 |     "cuda" if torch.cuda.is_available() else  # Check for CUDA (NVIDIA GPU)
 48 |     "mps" if torch.backends.mps.is_available() else  # Check for MPS (Apple Silicon GPU)
 49 |     "cpu"  # Default to CPU if neither CUDA nor MPS is available
 50 | )
 51 | print('default device', DEFAULT_DEVICE)
 52 | # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1' etc., or try 'mps' on macbooks
 53 | DEFAULT_DTYPE = (
 54 |     "bfloat16"
 55 |     if torch.cuda.is_available() and torch.cuda.is_bf16_supported()
 56 |     else "float16"
 57 | )  # 'float32', 'bfloat16', or 'float16', the latter will auto implement a GradScaler
 58 | # we want to do gradient update per 0.5M tokens, but our GPU can't fit that size i.e lets say our we have block_size=1024, it would take
 59 | # 488 batch_size to do it in the single run, but our gpu can't fit it, so we divide further, i.e we accumulate gradient on smaller batch, once we
 60 | # have accumulated gradients for 0.5M tokens, we do the update, otherwise just accumulate the gradients. keeping batch_size=16, block_size=1024, we need
 61 | # divide into 0.5*1e6/(16*1024) steps, which we name gradient_accumulation_steps
 62 | DEFAULT_GRADIENT_ACCUMULATION_STEPS = 8 
 63 | #init_from = "scratch"
 64 | DEFAULT_INIT_FROM = "scratch"
 65 | # wandb logging
 66 | DEFAULT_WANDB_PROJECT = "LilLM"
 67 | DEFAULT_WANDB_RUN_NAME = "GPU_RUN"
 68 | 
 69 | DEFAULT_COMPILE = True
 70 | 
 71 | 
 72 | # learning rate decay scheduler (cosine with warmup)
 73 | def get_lr(it, learning_rate, min_lr, warmup_iters, lr_decay_iters ):
 74 |     # 1) linear warmup for warmup_iters steps
 75 |     if it < warmup_iters:
 76 |         return learning_rate * (it + 1) / (warmup_iters + 1)
 77 |     # 2) if it > lr_decay_iters, return min learning rate
 78 |     if it > lr_decay_iters:
 79 |         return min_lr
 80 |     # 3) in between, use cosine decay down to min learning rate
 81 |     decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)
 82 |     assert 0 <= decay_ratio <= 1
 83 |     coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio))  # coeff ranges 0..1
 84 |     return min_lr + coeff * (learning_rate - min_lr)
 85 | 
 86 | 
 87 | @torch.no_grad()
 88 | def estimate_losses(model, sft_dataset, batch_size, block_size, device, eval_iters, device_type, ctx):
 89 |     out = {}
 90 |     model.eval()
 91 |     for split in ["train", "val"]:
 92 |         each_loss = 0
 93 |         for i in range(eval_iters):
 94 | 
 95 |             x,y, loss_mask = sft_dataset.get_batch(split, batch_size)
 96 |             x = x.to(device)
 97 |             y = y.to(device)
 98 |             loss_mask = loss_mask.to(device)
 99 |             #x, y = data_loader(data_path, split,batch_size, block_size, device, device_type)
100 |             with ctx:
101 |                 logits, _ = model(x, targets=y)
102 |                 loss = F.cross_entropy(logits.view(-1, logits.shape[-1]), y.view(-1), reduction='none').view(y.size())
103 |                 loss = (loss*loss_mask).sum() / loss_mask.sum()
104 | 
105 |             each_loss += loss.item()
106 |         out[split] = each_loss / eval_iters
107 |     model.train()
108 |     return out
109 | 
110 | 
111 | def set_distributed():
112 |     ddp = int(os.environ.get("RANK", -1)) != -1
113 |     if ddp:
114 |         init_process_group(backend="nccl")
115 |         ddp_rank = int(os.environ["RANK"])
116 |         ddp_local_rank = int(os.environ["LOCAL_RANK"])
117 |         ddp_world_size = int(os.environ["WORLD_SIZE"])
118 |         torch.cuda.set_device(f"cuda:{ddp_local_rank}")
119 |         return ddp, ddp_rank, ddp_local_rank, ddp_world_size
120 |     return 0, 0, 0, 1
121 | 
122 | 
123 | def main(args):
124 |     ddp, ddp_rank, ddp_local_rank,ddp_world_size = set_distributed()
125 |     #device = f"cuda:{ddp_local_rank}" if ddp else 'cpu'
126 |     if ddp:
127 |         device = f"cuda:{ddp_local_rank}"
128 |     else:
129 |         device = args.device
130 | 
131 |     master_process = ddp_rank == 0
132 |     torch.manual_seed(1337 + ddp_rank)  # set different seed for differnt gpus
133 |     assert args.gradient_accumulation_steps % ddp_world_size == 0
134 |     args.gradient_accumulation_steps //= ddp_world_size
135 | 
136 |     torch.backends.cuda.matmul.allow_tf32 = True
137 |     torch.backends.cudnn.allow_tf32 = True  # allow tf32 on cudnn (eg. convolutions nn.Conv)
138 |     device_type = "cuda" if "cuda" in device else "cpu"
139 |     # note: float16 data type will automatically use a GradScaler
140 |     ptdtype = {
141 |         "float32": torch.float32,
142 |         "bfloat16": torch.bfloat16,
143 |         "float16": torch.float16,
144 |     }[args.dtype]
145 |     ctx = (
146 |         nullcontext()
147 |         if device_type == "cpu"
148 |         else torch.amp.autocast(device_type=device_type, dtype=ptdtype)
149 |     )
150 | 
151 |     print("tokens per iteration", args.gradient_accumulation_steps * ddp_world_size * args.batch_size * args.block_size)
152 |     model_config = Config(max_seq_len=args.block_size, max_batch_size=args.batch_size, dropout=args.dropout_rate)
153 |     # Flops estimation
154 |     flops_per_model = calculate_transformer_flops(
155 |         seq_len=model_config.max_seq_len,
156 |         vocab_size=model_config.vocab_size,
157 |         d_model=model_config.d_model,
158 |         key_size=model_config.d_model/model_config.q_heads,
159 |         num_heads=model_config.q_heads,
160 |         ffw_size=model_config.hidden_dim if model_config.hidden_dim is not None else 4*model_config.d_model,
161 |         num_layers=model_config.n_layers
162 |     )
163 |     flops_per_step = flops_per_model * model_config.max_batch_size * args.gradient_accumulation_steps * ddp_world_size
164 |     running_mfu = -1.0
165 |     # Train from scratch or from a checkpoint
166 |     if args.init_from == "scratch":
167 |         model = LilLM(model_config)
168 |         # Apply lora
169 | 
170 |         num_iter = 0
171 |         best_val_loss = DEFAULT_BEST_VAL_LOSS
172 |     elif args.init_from == "resume":  # resume from a checkpoint
173 |         torch.serialization.add_safe_globals([Config])
174 |         #checkpoint = torch.load(os.path.join(args.out_dir, 'best_model_15K.pt'), map_location=device)
175 |         checkpoint = torch.load(args.model_path, map_location=device)
176 |         model_config = checkpoint['config'] 
177 |         model = LilLM(model_config)
178 |         # saved model keys contain some prefix, we need to rename them to our original name
179 |         state_dict = checkpoint['model'] 
180 |         unwanted_prefix = '_orig_mod.'
181 |         for k in list(state_dict.keys()):
182 |             if k.startswith(unwanted_prefix):
183 |                 state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
184 |         
185 |         model.load_state_dict(state_dict) 
186 |         apply_lora(model, r=args.low_rank)
187 |         optimizer.state_dict()
188 |         num_iter = 0 
189 |         best_val_loss = checkpoint['best_val_loss']
190 | 
191 |     if wandb and master_process:
192 |         wandb.init(project=args.wandb_project, name=args.wandb_run_name, config=model_config)
193 | 
194 |     model.to(device)
195 |     # scaler is required if we use fp16, cause gradients and loss need to be scaled because of lower range i.e 5-bit exponent (gradients, may explode or vanish)
196 |     # not needed for bf16, cause it has 8-bit exponent
197 |     scaler = torch.cuda.amp.GradScaler(enabled=(args.dtype == "float16"))
198 |     optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate)
199 |     
200 |     # pass data_path with your own huggingface dataset's url
201 |     sft_dataset = SFTDataset(tokenizer_path=args.tokenizer_path,max_seq_len=model_config.max_seq_len, data_path=args.data_path)
202 | 
203 |     if DEFAULT_COMPILE and ddp:
204 |         print("torch compiling the model..")
205 |         unoptimized_model = model
206 |         model = torch.compile(model)
207 |     if ddp:
208 |         model = DDP(model, device_ids=[ddp_local_rank])
209 | 
210 |     raw_model = model.module if ddp else model
211 | 
212 |     
213 |     while True:
214 |         # pick learning rate
215 |         #lr = get_lr(num_iter, args.learning_rate, args.min_lr, args.warmup_iters, args.lr_decay_iters) if DEFAULT_DECAY_LR else args.learning_rate
216 |         lr = args.learning_rate
217 | 
218 |         for param_group in optimizer.param_groups:
219 |             param_group["lr"] = lr
220 | 
221 |         # evaluation and log losses to wandb
222 |         if num_iter % args.eval_interval == 0 and master_process:
223 |             losses = estimate_losses(model,sft_dataset, args.batch_size, args.block_size, device, args.eval_iters, device_type, ctx)
224 |             print(
225 |                 f'Steps {num_iter} train loss:{losses["train"]} val loss: {losses["val"]} '
226 |             )
227 |             if wandb:
228 |                 wandb.log(
229 |                     {
230 |                         "iter": num_iter,
231 |                         "train_loss": losses["train"],
232 |                         "val_loss": losses["val"],
233 |                         "lr": lr,
234 |                     }
235 |                 )
236 | 
237 |             # Save checkpoint with best loss
238 |             if losses["val"] < best_val_loss:
239 |                 best_val_loss = losses["val"]
240 |                 # checkpoint = {
241 |                 #     "model": raw_model.state_dict(),
242 |                 #     "optimizer": optimizer.state_dict(),
243 |                 #     "best_val_loss": best_val_loss,
244 |                 #     "num_iter": num_iter,
245 |                 #     "config": model_config,
246 |                 # }
247 |                 save_lora(model, output_path='best_model_sft_lora.pt')
248 |                 # torch.save(checkpoint, os.path.join(args.out_dir, "best_model_sft.pt"))
249 |         
250 |         x,y, loss_mask = sft_dataset.get_batch('train', args.batch_size)
251 |         x = x.to(device)
252 |         y = y.to(device)
253 |         loss_mask = loss_mask.to(device)
254 | 
255 |         t1 = time.time()
256 |         for micro_step in range(args.gradient_accumulation_steps):
257 |             #x, y = data_loader(args.data_path, "train",args.batch_size, args.block_size,device, device_type)
258 |             # x,y = torch.randint(0,10,(10,256)).to(device), torch.randint(0,10,(10,256)).to(device)
259 | 
260 |             if ddp:
261 |                 # in DDP training we only need to sync gradients at the last micro step.
262 |                 # the official way to do this is with model.no_sync() context manager, but
263 |                 # I really dislike that this bloats the code and forces us to repeat code
264 |                 # looking at the source of that context manager, it just toggles this variable
265 |                 model.require_backward_grad_sync = (
266 |                     micro_step == args.gradient_accumulation_steps - 1
267 |                 )
268 |             with ctx:
269 |                 logits, _ = model(x, targets=y)
270 |                 loss = F.cross_entropy(logits.view(-1, logits.shape[-1]), y.view(-1), reduction='none').view(y.size())
271 |                 loss = (loss*loss_mask).sum() / loss_mask.sum() 
272 |                 loss = loss / args.gradient_accumulation_steps
273 |  
274 |             x,y, loss_mask = sft_dataset.get_batch('train', args.batch_size)
275 | 
276 |             x = x.to(device)
277 |             y = y.to(device)
278 |             loss_mask = loss_mask.to(device)
279 | 
280 | 
281 |             # gradient sync happens here
282 |             # why scale??
283 |             # fp16 can only store 5-bit exponents, to preserve large numbers from getting zerod
284 |             # the scaler multiplies our numbers with scaler eg 1024
285 |             # exp: np.float16(0.0000000123423543) will result in 0, cause there are not enough
286 |             # exponent to store this number, so when we do this np.float16(0.0000000123423543*1024)
287 |             # result is np.float16(1.264e-05), its a scaled value.
288 |             scaler.scale(loss).backward()
289 | 
290 |         if args.grad_clip != 0.0:
291 |             # unscale the gradients, cause we need higher precision in AdamW optimzers, and we don't use mixed-precision
292 |             scaler.unscale_(optimizer)
293 |             # clip the gradients to prevent vanishing gradient problem
294 |             torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
295 | 
296 |         scaler.step(optimizer)
297 |         # adjusts the loss scaling factor dynamically, eg, if prev step caused overflow
298 |         # decrease scaling factor, else increase scaling factor
299 |         scaler.update()
300 |         optimizer.zero_grad(set_to_none=True)
301 |         num_iter += 1
302 |         t2 = time.time()
303 |         dt = t2 - t1
304 |         if num_iter % args.log_interval == 0 and master_process:
305 |             if num_iter >=5:
306 |                 flops_promised = 312e12 # flops that we can do in A100 for bfloat16
307 |                 mfu = flops_per_step / (flops_promised * (dt))
308 |                 running_mfu =  mfu if running_mfu == -1.0 else 0.9*running_mfu + 0.1*mfu
309 |                 print(
310 |                     f"iteration: {num_iter} loss: {(loss.item() * args.gradient_accumulation_steps):.4f} time_taken: {(dt):.2f}, mfu : {mfu*100:.2f}%"
311 |                 )
312 |         if num_iter > args.max_iters:
313 |             break
314 | 
315 |     if ddp:
316 |         destroy_process_group()
317 | 
318 | if __name__ == '__main__':
319 |     parser = argparse.ArgumentParser(description='Fine Tune lilLM using SFT')
320 |     parser.add_argument("--batch_size", type=int, default=DEFAULT_BATCH_SIZE, help="Batch size for training.")
321 |     parser.add_argument("--low_rank", type=int, default=DEFAULT_LOW_RANK, help="rank value")
322 |     parser.add_argument("--block_size", type=int, default=DEFAULT_BLOCK_SIZE, help="Block size for training.")
323 |     parser.add_argument("--dropout_rate", type=int, default=DEFAULT_DROPOUT_RATE, help="Dropout rate mostly used in sft")
324 |     parser.add_argument("--learning_rate", type=float, default=DEFAULT_LEARNING_RATE, help="Maximum learning rate.")
325 |     parser.add_argument("--min_lr", type=float, default=DEFAULT_MIN_LR, help="Minimum learning rate.")
326 |     parser.add_argument("--max_iters", type=int, default=DEFAULT_MAX_ITERS, help="Maximum number of iterations.")
327 |     parser.add_argument("--grad_clip", type=float, default=DEFAULT_GRAD_CLIP, help="Gradient clipping value.")
328 |     parser.add_argument("--eval_interval", type=int, default=DEFAULT_EVAL_INTERVAL, help="Evaluation interval.")
329 |     parser.add_argument("--log_interval", type=int, default=DEFAULT_LOG_INTERVAL, help="Logging interval.")
330 |     parser.add_argument("--eval_iters", type=int, default=DEFAULT_EVAL_ITERS, help="Number of iterations for evaluation.")
331 |     parser.add_argument("--warmup_iters", type=int, default=DEFAULT_WARMUP_ITERS, help="Number of warmup iterations.")
332 |     parser.add_argument("--lr_decay_iters", type=int, default=DEFAULT_LR_DECAY_ITERS, help="Number of iterations for learning rate decay.")
333 |     parser.add_argument("--gradient_accumulation_steps", type=int, default=DEFAULT_GRADIENT_ACCUMULATION_STEPS, help="Gradient accumulation steps.")
334 |     parser.add_argument("--device", type=str, default=DEFAULT_DEVICE, help="Device to use for training (e.g., 'cuda' or 'cpu').")
335 |     parser.add_argument("--dtype", type=str, default=DEFAULT_DTYPE, help="Data type for training (e.g., 'float16', 'bfloat16').")
336 |     parser.add_argument("--wandb_project", type=str, default=DEFAULT_WANDB_PROJECT, help="Wandb project name.")
337 |     parser.add_argument("--wandb_run_name", type=str, default=DEFAULT_WANDB_RUN_NAME, help="Wandb run name.")
338 |     parser.add_argument("--out_dir", type=str, default=DEFAULT_OUT_DIR, help="Directory to save checkpoints.")
339 |     parser.add_argument("--data_path", type=str, default=DEFAULT_DATA_PATH, help="Huggingface's dataset url example: CohleM/lillm-sft-dataset-v1")
340 |     parser.add_argument("--init_from", type=str, default=DEFAULT_INIT_FROM, help="resume or scratch")
341 |     parser.add_argument("--tokenizer_path", type=str, default=DEFAULT_TOKENIZER_PATH, help="tokenizer path")
342 |     parser.add_argument("--model_path", type=str, default=DEFAULT_MODEL_PATH, help="path to the model you want to start training from")
343 |     args = parser.parse_args()
344 |     main(args)
345 | 
346 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # A little Language Model
  2 | 
  3 | A 39M (lil) parameter model trained on ~8B tokens, on 2xA100 for approximately 2 hours. More details below.
  4 | 
  5 | ## Introduction
  6 | 
  7 | > What I cannot create, I do not understand - Richard Feynman
  8 | 
  9 | Simply understanding the model architecture is not enough to fully grasp how these models are trained. This project is the outcome of this realization and the frustration on how abstractions limit our learning process (eg. huggingface transformers) at least when we are starting out. The best thing to do is to implement everything from scratch, within minimal abstraction. Well, this is what this project does. With this project, I plan to add everything(code + my notes) from training tokenizers to the post-tranining phases. One may consider it as a roadmap, but it might not be enough and at the end you will have your own roadmap, so just consider it as an outline or introduction to training Large Language Models.
 10 | 
 11 | ## How to approach this project
 12 | 
 13 | Before you approach this project. You should have basic understanding of how transformer model works. A great way to start is by watching and implementing yourself [Karpathy's zero to hero](https://www.youtube.com/watch?v=VMj-3S1tku0&list=PLAqhIrjkxbuWI23v9cThsA9GvCAUhRvKZ) series til part 5. Afterwards, you can take a look at Jay Alammar's [The Illustrated Transformer](https://jalammar.github.io/illustrated-transformer/), and then visit Karpathy's [Let's build GPT: from scratch, in code, spelled out.](https://youtu.be/kCc8FmEb1nY?si=ZyI_mMpGKGfUlkFV). This is just my recommendation, please make sure to visit them in any order as per your need.
 14 | 
 15 | Now that you have understanding of the transformers architecture, you're ready to dive deeper this project.
 16 | 
 17 | Before anything else, If you're a beginner please start by visualizing how different dimenstional matrix look. Go to this website
 18 | https://array-3d-viz.vercel.app and starting from 2D matrix visualize upto 4 dimensional matrix.
 19 | 
 20 | ### Tokenization
 21 | 
 22 | This is the first step in training LM. As LMs can't take text as an input we need to convert text to numbers. We build our own vocabulary to map tokens to numbers. A great way understand the whole concept is to watch karpathy's [Let's build the GPT Tokenizer](https://www.youtube.com/watch?v=zduSFxRajkE&t=3301s). You might need some knowledge about unicode and utf-8 to completely grasp the concept in detail for which you can look at my notes on [Tokenizers](https://cohlem.github.io/sub-notes/tokenization/). In this project, We train huggingface tokenizer (this is the only abstraction that we use) to train our tokenizer. Visit [train_custom_tokenizer.py](https://github.com/CohleM/lilLM/blob/master/train_custom_tokenizer.py) to get into more detail. Since we want our model size to be very small, choosing relatively small vocab_size is important because most of the parameters come from large vocab_sizes. The total vocab_size for our LilLM is 2\*\*13 which includes 16 special tokens.
 23 | 
 24 | ```
 25 | special_tokens = ["<unk>","<s>", "</s>"]
 26 | special_tokens = special_tokens + [f'<r{i}>' for i in range(13)]  # total 16 special tokens, 13 reserved for later
 27 | ```
 28 | 
 29 | We add one extra token </s> to each example. The reason behind adding that token is to teach our model when to end generating.
 30 | 
 31 | ```
 32 | you are beautiful
 33 | ```
 34 | 
 35 | ```
 36 | you are beautiful </s>
 37 | ```
 38 | 
 39 | ## Data preparation
 40 | 
 41 | The pretraining data will be large. In our case the untokenized data is around ~50GB. The idea is to tokenize all our raw data and save it in a binary file. The reason is tokenization is CPU bound task and we want to maximize our GPU utilization during training by reducing the number of CPU bound task such as the tokenization. We tokenize all our raw data using [process.py](https://github.com/CohleM/lilLM/blob/master/data/pretraining/process.py) save it in binary file and delete our raw dataset because we already know how to encode and decode the those tokenized data using our own tokenizer. The binary file is around 20GB.
 42 | 
 43 | ## Architecture Implementation
 44 | 
 45 | ### lilLM Architecture
 46 | 
 47 | The architecture differs from transformers architecture in that it uses.
 48 | 
 49 | - RMSNorm instead of LayerNorm
 50 | - Rotary Positional Embedding instead of Absolute Positional Embedding
 51 | - SwiGLU activations instead of ReLU
 52 | - Grouped Query Attention instead of Multi-head Attention
 53 | 
 54 | Finally, the architecture becomes similar to what is used in Llama 3 models. Find all the code for architecure in [model.py](https://github.com/CohleM/lilLM/blob/master/model/model.py)
 55 | 
 56 | ![architecture](/misc/lilLM_architecture.png)
 57 | 
 58 | | Model      | Download Link                                                         | `vocab_size` | `d_model` | `n_layers` | `max_seq_len` | `q_heads` | `kv_heads` | `max_batch_size` |
 59 | | ---------- | --------------------------------------------------------------------- | ------------ | --------- | ---------- | ------------- | --------- | ---------- | ---------------- |
 60 | | LilLM-39M  | [Download](https://huggingface.co/jonwondo/lilLM_40M_param_10B_tok)   | `2**13`      | `512`     | `12`       | `512`         | `16`      | `8`        | `32`             |
 61 | | LilLM-300M | [Download](https://huggingface.co/jonwondo/lilLM_300M_param_9_5B_tok) | `2**13`      | `1024`    | `24`       | `512`         | `16`      | `8`        | `32`             |
 62 | 
 63 | ### RMSNorm
 64 | 
 65 | Please read this paper [Root Mean Square Layer Normalization](https://arxiv.org/pdf/1910.07467), A simple conclusion from the paper is that we don't need to calculate the mean across layers while performing normalization as we do in Layer Normalization, just maintaining the variation((scaling)) is sufficient, then take a look at `RMSNorm` class.
 66 | 
 67 | ### Rotary Positional Embedding
 68 | 
 69 | Instead of adding extra positional embedding to our token embeddings, we simply rotate our token embeddings. I would first recommend watching this video [RoPE (Rotary positional embeddings) explained](https://www.youtube.com/watch?v=GQPOtyITy54), then read the paper [ROFORMER](https://arxiv.org/pdf/2104.09864) and finally look at my notes on [RoPE](https://cohlem.github.io/sub-notes/rope/) where I explain ROPE with respect to the code that we use in this project. Look at `apply_rotary_pe` function for the implementation
 70 | 
 71 | ### SwiGLU activations
 72 | 
 73 | Take a look at this simple and straightforward blog on [SwiGLU: GLU Variants Improve Transformer (2020)](https://kikaben.com/swiglu-2020/)
 74 | 
 75 | ### Grouped Query Attention
 76 | 
 77 | Instead of using multiple heads in our attention, we simply divide K and V to groups and repeat those K,V to q_heas/kv_heads times, and then perform attention. Why? since K and V are repeated, the data movement within GPU is minimized cause it is the most expensive task and is a bottleneck to our training. To understand better, take a look at this video [Variants of Multi-head attention](https://www.youtube.com/watch?v=pVP0bu8QA2w) and then read my notes on [Grouped Query Attention](https://cohlem.github.io/sub-notes/kv-cache-gqa/). The code is contained in the `Attention` class.
 78 | 
 79 | ### Weights Initialization
 80 | 
 81 | See this section of [weights initialization](https://youtu.be/l8pRSuU81PU?si=23AmsWizoFf7nfIp&t=4437) and see `LilLM` class initialization.
 82 | 
 83 | ### Distributed Training
 84 | 
 85 | Now that the architecture has been completed, you might want to focus on how to train them in distributed manner. First step would be to see Umar Jamil's video on [Distributed Training with PyTorch](https://youtu.be/toUSzwR0EV8?si=Vj7zWI5LuiAcUSNC) and implement what he recommends from scratch and then visit our [pretrain.py](https://github.com/CohleM/lilLM/blob/master/pretrain.py) code.
 86 | 
 87 | ### FLOPs utilization
 88 | 
 89 | After starting pretraining, you might want to look at how your GPUs are being utilized. Find the ratio of FLOPs utilized by FLOPs offered, and try to maximize that. To learn more about FLOPs utilization look at this paper [Training Compute-Optimal Large Language Models](https://arxiv.org/pdf/2203.15556) Appendix F. If you need more elaborated explanation take a look at my notes on [Flops calculation](https://cohlem.github.io/sub-notes/flops-calculation/)
 90 | 
 91 | ## Training Details
 92 | 
 93 | ### Tokenizer Training Data
 94 | 
 95 | It was trained on 0.1% of [OpenWebText](https://huggingface.co/datasets/Skylion007/openwebtext). Recommended way would be to train the tokenizer on diverse and large dataset to get the best compression rate. For simplicity, I wanted my model to just be able to converse well, I opted for this small subset of the dataset which you can find [here](https://huggingface.co/datasets/CohleM/openweb-800k)
 96 | 
 97 | ### Pretraining Data
 98 | 
 99 | The model was trained [OpenWebText](https://huggingface.co/datasets/Skylion007/openwebtext), which is close to 10 billion tokens according to our tokenizer, but the model was only trained on ~8B tokens (credits ran out :( ).
100 | 
101 | ### Compute
102 | 
103 | It was trained on 2XA100 for approximately 2.5 hours.
104 | 
105 | This is the specification of machine that I used. GPU was rented from [Tensordock](https://www.tensordock.com)
106 | 
107 | | **Category**      | **Details**            |
108 | | ----------------- | ---------------------- |
109 | | **Storage**       | 300 GB                 |
110 | | **vCPUs**         | 40 AMD EPYC 7513 vCPUs |
111 | | **RAM**           | 80 GB RAM              |
112 | | **GPU**           | 2x A100 SXM4 80 GB     |
113 | | **Compute Price** | $2.400000/hour         |
114 | | **Storage Price** | $0.075000/hour         |
115 | | **Total Price**   | $2.475000/hour         |
116 | 
117 | ### Train/Val loss
118 | 
119 | ![loss](misc/loss.png)
120 | 
121 | ### Supervised Fine-tuning
122 | 
123 | After pretraining, the model was trained on 300K examples of data. To construct SFT data mixture we take [magpie-ultra-v1.0](https://huggingface.co/datasets/argilla/magpie-ultra-v1.0) remove math, coding and debugging mixture, limit upto two-turn conversation, and filter out examples greater than 512 tokens using our tokenizer.
124 | 
125 | We take huggingface's [smoltalk](https://huggingface.co/datasets/HuggingFaceTB/smoltalk) SFT mixture and select these cateogies `'smol-constraints', 'smol-rewrite', 'smol-summarize', 'everyday-conversations', 'explore-instruct-rewriting','openhermes-100k'` and remove other categories. We again filter out examples greater than 512 tokens. Find the final dataset mixture [here](https://huggingface.co/datasets/CohleM/lillm-sft-dataset-512-including-hard-coded-mixture).
126 | 
127 | We use a different training objective than we did while pretraining. In this fine-tuning process, we only train our model on tokens from "assistant" role. Why? because the training objective is different in fine-tuning, we are doing next token prediction but only for the "assistant" because this is what we want for output so mask out (i.e make zero) tokens other than "assistant" role. Please take a look at 3.1 SupervisedFine-Tuning(SFT) in [Llama2](https://arxiv.org/pdf/2307.09288) paper which explains this procedure.
128 | 
129 | We use this template for SFT
130 | 
131 | ```
132 | '<r0> user<r1> What is the capital of France?</r2><r0> assistant<r1> The capital of France is Paris. It is the most populous city in the country and serves as the center of French politics, culture, and economy. Located in the north-central part of France, Paris is known for its stunning architecture, famous landmarks such as the Eiffel Tower, and world-class museums like the Louvre.</s>
133 | ```
134 | 
135 | The loss will be calculated only for these tokens
136 | 
137 | ```
138 | The capital of France is Paris. It is the most populous city in the country and serves as the center of French politics, culture, and economy. Located in the north-central part of France, Paris is known for its stunning architecture, famous landmarks such as the Eiffel Tower, and world-class museums like the Louvre.</s>
139 | ```
140 | 
141 | The special tokens don't carry much meaning, only the EOS token i.e </s> is considered for teaching model when to stop, and this is the only special token that takes part in loss calulation.
142 | 
143 | The model was fine-tuned for close to 4 epochs. We stop when we observe the model is overfitting.
144 | 
145 | ![sft_loss](misc/sft_loss.png)
146 | 
147 | The model was futher fine-tuned on [hard-coded](https://huggingface.co/datasets/CohleM/lillm-sft-hard-coded) examples for some(20-30) steps. Find the code for SFT in [sft_train.py](https://github.com/CohleM/lilLM/blob/master/sft_train.py).
148 | 
149 | Fine-tuning was done on 1xRTX 4090 for about 1.5 hours. Thanks to [Yuchen Jin](https://x.com/Yuchenj_UW) for providing GPU credits.
150 | 
151 | ## Sample outputs after pretraining
152 | 
153 | Input
154 | 
155 | ```
156 | Bernie Sanders of Vermont would seek a recount. The delay postponed a definitive answer to whether Clinton had made a clean sweep of five big primaries on
157 | ```
158 | 
159 | Output
160 | 
161 | ```
162 | Bernie Sanders of Vermont would seek a recount. The delay postponed a definitive answer to whether Clinton had made a clean sweep of five big primaries on opening day of the Democratic nominating process.\n\nIn response, Sanders theorized that it was possible for her to then-choice’s hand to escalate into a “unprecedented vote” to take the nomination. However, his standing to refrain from carrying coal companies in the wilderness at the time of her Nov. 8 pick-ing defeat surprised the race.\n\nTrump said Wednesday morning that he will back Clinton to replace the incumbent U.S. senator who ran against Bernie Sanders on Tuesday, in a 33-16 historic win over Hillary Clinton. Though, given that both Hillary Clinton and Bernie Sanders enjoyed a restricted number of fallen out of the race on the set of their Iowa primary
163 | ```
164 | 
165 | Input
166 | 
167 | ```
168 | The latest Marvel movie has shattered box office records, grossing over $1 billion worldwide in just two weeks. Fans have praised the
169 | ```
170 | 
171 | Output
172 | 
173 | ```
174 | The latest Marvel movie has shattered box office records, grossing over $1 billion worldwide in just two weeks. Fans have praised the icons like Roc Nation and Meet The Press for the budgetary ramifications Ain’t Not a breakout promotion.\n\nIn the second week of December, Marvel announced Monday that various Marvel games and Daredevil: The Desolation of holding off it would leave Friday to Evil Geniuses. The Daredevil announced Monday that The Clone Wars is now open and ready for release in late June.
175 | ```
176 | 
177 | ## Sample outputs after Supervised Fine-tuning
178 | 
179 | Input
180 | 
181 | ```
182 | Tell me about yourself.
183 | ```
184 | 
185 | Output
186 | 
187 | ```
188 | I'm LilLM, a little language model developed by open-source contributors at https://github.com/CohleM/lilLM open-source community. I can assist with a variety of tasks, such as answering questions and providing constructive feedback, helping with grammar, style, clarity, and more. Whether you're looking for information, advice, or just a chat, I'm here to help to do my best to provide the best possible guidance.
189 | ```
190 | 
191 | Input
192 | 
193 | ```
194 | Provide a concise, objective summary of the input text in up to three sentences
195 | """Dear Dr. Kapoor,
196 | 
197 | I hope this email finds you well. I am reaching out to introduce myself as your collaborator on the AI for Education project. As the lead software developer and UX designer for the project, I am eager to work with you and learn from your expertise in machine learning and adaptive learning systems.
198 | 
199 | I have reviewed your remarkable work on personalized education using AI and your innovative approaches to improving student engagement. I am confident that your contributions will be instrumental in creating a platform that transforms the educational experience for students globally.
200 | 
201 | I would love to schedule a meeting with you next week to discuss our respective responsibilities, project timeline, and initial design concepts. Please share your availability, and I will arrange the meeting accordingly.
202 | 
203 | Looking forward to working with you on this groundbreaking initiative.
204 | 
205 | Best regards,
206 | Jason Rivera
207 | """
208 | ```
209 | 
210 | Output
211 | 
212 | ```
213 | Jason Rivera, the lead software developer on the AI for Education project, is reaching out to collaborate on using AI for student engagement. Jason is enthusiastic about the positive impact of personalized education on students' learning. The meeting is scheduled to focus on the technical aspects of the project.
214 | ```
215 | 
216 | Input
217 | 
218 | ```
219 | Your answer must contain exactly 3 bullet points. Use the markdown bullet points such as: * This is point 1. Your response should contain less than 100 words.\n\nWhat are the main advantages of daily exercise?\n
220 | ```
221 | 
222 | Output
223 | 
224 | ```
225 | * Regular exercise helps improve cardiovascular health reducing the risk of heart disease.
226 | * It enhances mental well-being by reducing symptoms and promoting better sleep.
227 | * Regular physical activity boosts energy levels and energy levels.
228 | ```
229 | 
230 | ## Quick Start
231 | 
232 | ### Training from Scratch
233 | 
234 | #### Install dependencies
235 | 
236 | ```
237 | git clone https://github.com/CohleM/lilLM.git
238 | ```
239 | 
240 | ```
241 | pip install -r requirements.txt
242 | ```
243 | 
244 | #### Train Tokenizer
245 | 
246 | I plan to make this more startforward by adding commandline arguments, but for now please follow the steps described
247 | 
248 | Download the data from [here](https://huggingface.co/datasets/CohleM/openweb-800k) and convert it to jsonl format, open the `train_custom_tokenizer.py` file and replace the file_path with your path/to/your_jsonl_file and then
249 | 
250 | ```
251 | python train_custom_tokenizer.py
252 | ```
253 | 
254 | Tokenizer will be stored in `/model/tokenizer`.
255 | 
256 | #### Download and Tokenize pretraining data
257 | 
258 | ```
259 | python data/pretraining/process.py --tokenizer_path='/home/user/lilLM/model/tokenizer'
260 | ```
261 | 
262 | Make sure to replace the `tokenizer_path` with correct path
263 | 
264 | It will download the [OpenWebText](https://huggingface.co/datasets/Skylion007/openwebtext) dataset from huggingface and tokenize the whole dataset using our tokenizer saved in `/model/tokenizer` and save tokenized files as `train.bin` and `val.bin`.These are the binary files for our tokenized dataset. `train.bin` results in ~20GB. The reason for tokenizing it beforehand is because we want to maximize our GPU utilization. Since tokenization is a CPU bound task, we can do it before hand while allowing our GPU train more tokens during training.
265 | 
266 | #### Pretrain
267 | 
268 | If you have Nx GPU per node run.
269 | 
270 | ```
271 | torchrun --standalone --nproc_per_node=2 pretrain.py
272 | ```
273 | 
274 | If you only have one GPU run,
275 | 
276 | ```
277 | python pretrain.py
278 | ```
279 | 
280 | Please also take a look at default config parameters in `model/config.py` and in `pretrain.py`
281 | 
282 | #### Supervised Fine-tuning
283 | 
284 | Download the pretrained model.
285 | 
286 | ```
287 | python download_model.py --repo_id="jonwondo/lilLM_40M_param_10B_tok" --filename="lilLM_40M_params_10B_tok.pt"
288 | ```
289 | 
290 | Start the training run.
291 | 
292 | ```
293 | python sft_train.py --init_from="resume" --model_path="/home/user/lillm/best_model_15K.pt" --tokenizer_path="/home/user/lillm/model/tokenizer/" --data_path="CohleM/lillm-sft-dataset-v1"
294 | ```
295 | 
296 | #### Inference
297 | 
298 | Inference can be done using the same file for both supervised fine-tuned model or pretrained model, simply replace the mode_path and the mode_type i.e either "sft" or "pretrain".
299 | 
300 | ```
301 | python inference.py --model_type="sft" --text="What is the capital of Germany?" --model_path="/home/user/lillm/best_model_790_sft.pt"
302 | ```
303 | 
304 | If you want to use chat interface use
305 | 
306 | ```
307 | python inference_gradio.py --model_type="sft" --text="What is the capital of Germany?" --model_path="/home/user/lillm/best_model_790_sft.pt"
308 | ```
309 | 
310 | ## TODO
311 | 
312 | ### Post Training Stages
313 | 
314 | - Train lora, and add inference code for lora
315 | - Finetune using DPO
316 | 
317 | ### Architectural Changes
318 | 
319 | - Add Mixture of Experts (MoE)
320 | 
321 | ## References
322 | 
323 | - [Llama3](https://github.com/meta-llama/llama3/blob/main/llama/)
324 | - [nanoGPT](https://github.com/karpathy/nanoGPT)
325 | - [MiniMind](https://github.com/jingyaogong/minimind/)
326 | - [TinyLlama](https://github.com/jzhang38/TinyLlama)
327 | 


--------------------------------------------------------------------------------
/model/tokenizer/vocab.json:
--------------------------------------------------------------------------------
1 | {"<unk>":0,"<s>":1,"</s>":2,"<r0>":3,"<r1>":4,"<r2>":5,"<r3>":6,"<r4>":7,"<r5>":8,"<r6>":9,"<r7>":10,"<r8>":11,"<r9>":12,"<r10>":13,"<r11>":14,"<r12>":15,"!":16,"\"":17,"#":18,"$":19,"%":20,"&":21,"'":22,"(":23,")":24,"*":25,"+":26,",":27,"-":28,".":29,"/":30,"0":31,"1":32,"2":33,"3":34,"4":35,"5":36,"6":37,"7":38,"8":39,"9":40,":":41,";":42,"<":43,"=":44,">":45,"?":46,"@":47,"A":48,"B":49,"C":50,"D":51,"E":52,"F":53,"G":54,"H":55,"I":56,"J":57,"K":58,"L":59,"M":60,"N":61,"O":62,"P":63,"Q":64,"R":65,"S":66,"T":67,"U":68,"V":69,"W":70,"X":71,"Y":72,"Z":73,"[":74,"\\":75,"]":76,"^":77,"_":78,"`":79,"a":80,"b":81,"c":82,"d":83,"e":84,"f":85,"g":86,"h":87,"i":88,"j":89,"k":90,"l":91,"m":92,"n":93,"o":94,"p":95,"q":96,"r":97,"s":98,"t":99,"u":100,"v":101,"w":102,"x":103,"y":104,"z":105,"{":106,"|":107,"}":108,"~":109,"¡":110,"¢":111,"£":112,"¤":113,"¥":114,"¦":115,"§":116,"¨":117,"©":118,"ª":119,"«":120,"¬":121,"®":122,"¯":123,"°":124,"±":125,"²":126,"³":127,"´":128,"µ":129,"¶":130,"·":131,"¸":132,"¹":133,"º":134,"»":135,"¼":136,"½":137,"¾":138,"¿":139,"À":140,"Á":141,"Â":142,"Ã":143,"Ä":144,"Å":145,"Æ":146,"Ç":147,"È":148,"É":149,"Ê":150,"Ë":151,"Ì":152,"Í":153,"Î":154,"Ï":155,"Ð":156,"Ñ":157,"Ò":158,"Ó":159,"Ô":160,"Õ":161,"Ö":162,"×":163,"Ø":164,"Ù":165,"Ú":166,"Û":167,"Ü":168,"Ý":169,"Þ":170,"ß":171,"à":172,"á":173,"â":174,"ã":175,"ä":176,"å":177,"æ":178,"ç":179,"è":180,"é":181,"ê":182,"ë":183,"ì":184,"í":185,"î":186,"ï":187,"ð":188,"ñ":189,"ò":190,"ó":191,"ô":192,"õ":193,"ö":194,"÷":195,"ø":196,"ù":197,"ú":198,"û":199,"ü":200,"ý":201,"þ":202,"ÿ":203,"Ā":204,"ā":205,"Ă":206,"ă":207,"Ą":208,"ą":209,"Ć":210,"ć":211,"Ĉ":212,"ĉ":213,"Ċ":214,"ċ":215,"Č":216,"č":217,"Ď":218,"ď":219,"Đ":220,"đ":221,"Ē":222,"ē":223,"Ĕ":224,"ĕ":225,"Ė":226,"ė":227,"Ę":228,"ę":229,"Ě":230,"ě":231,"Ĝ":232,"ĝ":233,"Ğ":234,"ğ":235,"Ġ":236,"ġ":237,"Ģ":238,"ģ":239,"Ĥ":240,"ĥ":241,"Ħ":242,"ħ":243,"Ĩ":244,"ĩ":245,"Ī":246,"ī":247,"Ĭ":248,"ĭ":249,"Į":250,"į":251,"İ":252,"ı":253,"Ĳ":254,"ĳ":255,"Ĵ":256,"ĵ":257,"Ķ":258,"ķ":259,"ĸ":260,"Ĺ":261,"ĺ":262,"Ļ":263,"ļ":264,"Ľ":265,"ľ":266,"Ŀ":267,"ŀ":268,"Ł":269,"ł":270,"Ń":271,"Ġt":272,"Ġa":273,"he":274,"in":275,"re":276,"Ġthe":277,"on":278,"er":279,"Ġs":280,"Ġw":281,"at":282,"Ġo":283,"en":284,"Ġc":285,"it":286,"is":287,"an":288,"or":289,"Ġb":290,"es":291,"ed":292,"Ġf":293,"ing":294,"Ġp":295,"ou":296,"Ġan":297,"al":298,"Ġto":299,"ar":300,"Ġm":301,"Ġin":302,"Ġof":303,"Ġh":304,"Ġd":305,"âĢ":306,"as":307,"ic":308,"Ġand":309,"Ġth":310,"le":311,"om":312,"ion":313,"ll":314,"ent":315,"Ġn":316,"Ġl":317,"Ġre":318,"st":319,"ve":320,"Ġe":321,"ly":322,"ro":323,"Ġbe":324,"Ġg":325,"id":326,"ut":327,"ac":328,"ot":329,"ĠT":330,"ĠI":331,"Ġthat":332,"Ġon":333,"ay":334,"ĠS":335,"Ġis":336,"et":337,"im":338,"am":339,"ad":340,"ow":341,"ig":342,"se":343,"Ġfor":344,"âĢĻ":345,"ĠA":346,"ver":347,"ct":348,"ur":349,"ld":350,"ĠC":351,"Ġu":352,"Ġst":353,"Ġy":354,"Ġit":355,"Ġhe":356,"ir":357,"ation":358,"ith":359,"Ġwh":360,"ĠM":361,"ce":362,"ter":363,"ol":364,"Ġwe":365,"il":366,"Ġwith":367,"Ġas":368,"ĠB":369,"ch":370,"ill":371,"nd":372,"Ġwas":373,"ĠP":374,"ers":375,"Ġyou":376,"ke":377,"ĠâĢ":378,"Ġcon":379,"Ġde":380,"Ġr":381,"her":382,"if":383,"ate":384,"ag":385,"Ġat":386,"ĠW":387,"Ġha":388,"Ġse":389,"ĠH":390,"ore":391,"Ġpro":392,"ul":393,"pp":394,"ment":395,"ri":396,"ĠD":397,"Ġare":398,"est":399,"us":400,"Ġcom":401,"ist":402,"Ġ1":403,"and":404,"ĠR":405,"op":406,"ain":407,"ĠThe":408,"Ġ2":409,"igh":410,"rom":411,"th":412,"Ġ(":413,"od":414,"ĠF":415,"Ġne":416,"um":417,"Ġex":418,"un":419,"res":420,"ab":421,"âĢĿ":422,"Ġnot":423,"ĠN":424,"Ġv":425,"ess":426,"ity":427,"Ġhave":428,"em":429,"ould":430,"Ġor":431,"ort":432,"Ġby":433,"os":434,"ĠG":435,"ant":436,"ive":437,"Ġfrom":438,"Ġal":439,"el":440,"'s":441,"oc":442,"Ġsu":443,"art":444,"ĠL":445,"Ġsh":446,"nt":447,"Ġthis":448,"qu":449,"ight":450,"pe":451,"ust":452,"Ġle":453,"ies":454,"ĠE":455,"end":456,"Ġch":457,"Ġsa":458,"out":459,"Ġab":460,"The":461,"all":462,"ie":463,"ack":464,"00":465,"red":466,"ĠâĢľ":467,"ally":468,"Ġhas":469,"ĠJ":470,"Ġpl":471,"Ġgo":472,"ard":473,"iv":474,"Ġhis":475,"ome":476,"ap":477,"ost":478,"gh":479,"ĠO":480,"ear":481,"Ġtr":482,"our":483,"ast":484,"Ġj":485,"ak":486,"ud":487,"Ġwor":488,"Ġdo":489,"Ġthey":490,"Ġbut":491,"rou":492,"ge":493,"Ġcan":494,"Ġwill":495,"Ġsaid":496,"Ġwhe":497,"ĠU":498,"ich":499,"Ġk":500,"ok":501,"Ġall":502,"Ġ\"":503,"Ġwho":504,"ide":505,"Ġint":506,"ame":507,"ans":508,"ther":509,"ial":510,"Ġout":511,"Ġtheir":512,"ff":513,"ong":514,"ect":515,"ice":516,"pl":517,"ure":518,"pt":519,"age":520,"one":521,"Ġmore":522,"ions":523,"act":524,"ike":525,"Ġme":526,"Ġup":527,"ous":528,"ine":529,"Ġad":530,"Ġabout":531,"ra":532,"ated":533,"ber":534,"Ġwere":535,"Ġone":536,"Ġso":537,"Ġus":538,"og":539,"ime":540,"ep":541,"ĠK":542,"Ġhad":543,"are":544,"ia":545,"ub":546,"cc":547,"Ġwould":548,"Ġ20":549,"Ġcl":550,"Ġwhich":551,"Ġim":552,"ind":553,"ue":554,"ake":555,"iz":556,"Ġcont":557,"Ġag":558,"very":559,"Ġte":560,"Ġoff":561,"Ġcomp":562,"Ġun":563,"Ġbeen":564,"ĠSt":565,"Ġen":566,"so":567,"per":568,"ign":569,"au":570,"ĠTh":571,"ary":572,"ew":573,"ip":574,"ace":575,"Ġyear":576,"Ġlike":577,"ere":578,"ven":579,"ook":580,"Ġinc":581,"ays":582,"Ġpe":583,"Ġman":584,"ry":585,"ose":586,"ite":587,"Ġres":588,"able":589,"Ġper":590,"ass":591,"Ġsp":592,"ord":593,"ĠIn":594,"ire":595,"Ġother":596,"mer":597,"Ġsome":598,"ile":599,"ence":600,"ib":601,"ople":602,"Ġ201":603,"ks":604,"ations":605,"Ġover":606,"ical":607,"Ġthere":608,"ress":609,"ult":610,"Ġdis":611,"int":612,"Ġnew":613,"own":614,"Ġalso":615,"Ġthan":616,"Ġar":617,"Ġbec":618,"ru":619,"Ġthem":620,"ance":621,"ough":622,"ount":623,"Ġapp":624,"port":625,"Ġwhen":626,"ail":627,"Ġpart":628,"Ġyour":629,"ree":630,"Ġwhat":631,"ond":632,"Ġpeople":633,"orm":634,"Ġsc":635,"fter":636,"av":637,"ase":638,"Ġqu":639,"ĠCh":640,"Ġany":641,"Ġjust":642,"ĠY":643,"Ġits":644,"ach":645,"Ġtime":646,"ĠIt":647,"Ġkn":648,"Ġher":649,"Ġrec":650,"lic":651,"ens":652,"reat":653,"Ġget":654,"Ġif":655,"Ġmy":656,"Ġ3":657,"ĠV":658,".âĢĿ":659,"ian":660,"Ġinto":661,"Ġwork":662,"ound":663,"irst":664,"Ġtw":665,"ction":666,"vel":667,"Ġno":668,"du":669,"ings":670,"ory":671,"ident":672,"ors":673,"ition":674,"Ġour":675,"ick":676,"Ġ-":677,"ob":678,"Ġpre":679,"ang":680,"ove":681,"amp":682,"Ġplay":683,"wn":684,"ink":685,"ause":686,"te":687,"ade":688,"ates":689,"hing":690,"ark":691,"Ġro":692,"Ġpol":693,"Ġshe":694,"vers":695,"Ġco":696,",âĢĿ":697,"Ġafter":698,"Ġ19":699,"ĠHe":700,"Ġfirst":701,"Ġhow":702,"Ġcould":703,"ne":704,"ious":705,"nder":706,"Ġhim":707,"Ġacc":708,"ild":709,"aw":710,"'t":711,"ons":712,"Ġpo":713,"Ġfe":714,"Ġatt":715,"Ġonly":716,"ict":717,"hed":718,"Ġpr":719,"rit":720,"Ġtwo":721,"ov":722,"ish":723,"Ġcomm":724,"Ġspe":725,"âĢľ":726,"form":727,"Ġact":728,"we":729,"Ġbl":730,"erv":731,"ft":732,"Ġbet":733,".\"":734,"Ġdec":735,"ning":736,"Ġback":737,"Ġmost":738,"ces":739,"ood":740,"ck":741,"Ġam":742,"ĠâĢĵ":743,"Ġdid":744,"rough":745,"iff":746,"oy":747,"ater":748,"round":749,"Ġagain":750,"ual":751,"Ġdes":752,"Ġwant":753,"ise":754,"Ġunder":755,"day":756,"Ġ4":757,"sel":758,"ull":759,"Ġcons":760,"Ġnow":761,"Ġbecause":762,"ĠâĢĶ":763,"man":764,"lud":765,"Ġ$":766,"Ġyears":767,"tern":768,"ents":769,"ublic":770,"ec":771,"Ġeven":772,"iss":773,"Ġneed":774,"Ġinv":775,"Ġknow":776,"ĠWe":777,"..":778,"Ġsee":779,"ments":780,"ced":781,"Ġadd":782,"ting":783,"Ġind":784,"old":785,"Ġsec":786,",\"":787,"fore":788,"uch":789,"Ġevery":790,"urn":791,"Ġsupp":792,"Ġ5":793,"pect":794,"Ġmake":795,"ock":796,"ĠUn":797,"Ġthese":798,"Ġem":799,"oll":800,"ject":801,"Ġwhere":802,"In":803,"ational":804,"vern":805,"xt":806,"Ġbu":807,"ump":808,"Th":809,"Ġlook":810,"oss":811,"ĠAnd":812,"Ġdon":813,"Ġev":814,"ating":815,"Ġway":816,"Ġbeing":817,"Ġrem":818,"Ġright":819,"Ġ[":820,"ily":821,"row":822,"ng":823,"Ġmay":824,"lect":825,"Ġthen":826,"ĠTr":827,"Ġmany":828,"Ġcall":829,"Ġdiff":830,"iew":831,"Ġrel":832,"Ġdef":833,"ont":834,"ĠBut":835,"Ġinclud":836,"ased":837,"ason":838,"ied":839,"meric":840,"Ġthink":841,"aking":842,"les":843,"Ġreg":844,").":845,"Ġimp":846,"rib":847,"ited":848,"ath":849,"Ġthrough":850,"ell":851,"Ġsy":852,"Ġsay":853,"Ġshould":854,"Ġpres":855,"Ġfl":856,"uring":857,"ty":858,"Ġthose":859,"Ġlast":860,"other":861,"Ġshow":862,"Ġvery":863,"Ġmuch":864,"ific":865,"ities":866,"ower":867,"Ġstate":868,"tle":869,"ĠThis":870,"hen":871,"Ġbr":872,"ss":873,"ative":874,"Ġwell":875,"arg":876,"oth":877,"Ġ200":878,"Ġcount":879,"--":880,"und":881,"ĠWh":882,"Ġph":883,"Ġown":884,"Ġdoes":885,"arly":886,"ics":887,"Ġend":888,"als":889,"Ġdown":890,"Ġass":891,"Ġbel":892,"Ġuse":893,"ĠAmeric":894,"ts":895,"ange":896,"Ġlong":897,"ters":898,"ince":899,"gr":900,"Ġbefore":901,"omet":902,"Ġgovern":903,"ward":904,"Ġmon":905,"up":906,"ible":907,"ues":908,"ĠNew":909,"Ġstart":910,"Ġreport":911,"Ġke":912,"ife":913,"ble":914,"Ġfin":915,"Ġmade":916,"ener":917,"Ġteam":918,"olog":919,"),":920,"Ġsuch":921,"ix":922,"ren":923,"âĢĶ":924,"oun":925,"erson":926,"ility":927,"ism":928,"Ġworld":929,"Ġgoing":930,"Ġsm":931,"ful":932,"lp":933,"ually":934,"com":935,"ek":936,"ists":937,"Ġac":938,"Ġstud":939,"tain":940,"Ġgood":941,"It":942,"ork":943,"Ġeff":944,"Ġsub":945,"its":946,"Ġset":947,"Ġagainst":948,"Ġsur":949,"Ġgu":950,"Ġpoint":951,"Ġgame":952,"Ġent":953,"ars":954,"stem":955,"gan":956,"ys":957,"Ġ6":958,"li":959,"roup":960,"Ġprov":961,"Ġstill":962,"illion":963,"Ġwhile":964,"Ġhelp":965,"ax":966,"ween":967,"Ġlead":968,"ef":969,"Ġmin":970,"000":971,"imes":972,"alk":973,"ms":974,"Ġsays":975,"cy":976,"Ġob":977,"ank":978,"Ġprodu":979,"Ġgr":980,"Ġinter":981,"Ġsim":982,"Ġstr":983,"Ġinst":984,"self":985,"cess":986,"Ġconf":987,"Ġcol":988,"io":989,"Ġtake":990,"ures":991,"Ġsame":992,"Ġcr":993,"Ġthough":994,"Ġbetween":995,"Ġoffic":996,"Ġdiffere":997,"ution":998,"uss":999,"ever":1000,"ollow":1001,"land":1002,"Ġdisc":1003,"arch":1004,"Ġed":1005,"ool":1006,"air":1007,"Ġop":1008,"read":1009,"Ġhigh":1010,"Ġsign":1011,"Ġnum":1012,"Ġlaw":1013,"ah":1014,"Ġgovernment":1015,"ner":1016,"led":1017,"ĠRe":1018,"way":1019,"gg":1020,"Ġter":1021,"hip":1022,"any":1023,"hes":1024,"atch":1025,"cent":1026,"ames":1027,"Ġref":1028,"Ġsomet":1029,"ted":1030,"ten":1031,"Ġtrans":1032,"velop":1033,"We":1034,"Ġmem":1035,"Ġins":1036,"Ġser":1037,"ives":1038,"Ġrun":1039,"Ġaround":1040,"Ġfound":1041,"Ġthree":1042,"ave":1043,"ather":1044,"Ġmed":1045,"Ġhere":1046,"ouse":1047,"ĠAr":1048,"Ġreal":1049,"ved":1050,"Ġused":1051,"Ġexp":1052,"oot":1053,"Ġsystem":1054,"az":1055,"ci":1056,"ĠYou":1057,"cept":1058,"Ġreally":1059,"Ġgroup":1060,"Ġ10":1061,"ving":1062,"con":1063,"Ġsupport":1064,"ale":1065,"Ġmod":1066,"cer":1067,"Ġcour":1068,".[":1069,"ets":1070,"Ġret":1071,"Ġpublic":1072,"Ġ7":1073,"Ġspec":1074,"Ġhand":1075,"ĠLe":1076,"ins":1077,"pr":1078,"ying":1079,"Ġgreat":1080,"ĠTrump":1081,"Ġext":1082,"irect":1083,"Ġfam":1084,"ton":1085,"ĠIs":1086,"Ġcar":1087,"ense":1088,"Ġcur":1089,"Ġwom":1090,"ĠCom":1091,"alth":1092,"Ġexper":1093,"Ġfact":1094,"Ġhapp":1095,"ĠThey":1096,"Ġcap":1097,"ph":1098,"Ġboth":1099,"Ġfr":1100,"Ġread":1101,"Ġday":1102,"ĠAl":1103,"ices":1104,"ines":1105,"ular":1106,"Ġfind":1107,"Ġrep":1108,"Ġlit":1109,"uth":1110,"Ġdet":1111,"oh":1112,"Ġduring":1113,"Ġsl":1114,"ĠSh":1115,"ered":1116,"the":1117,"ĠAs":1118,"min":1119,"Ġcre":1120,"Ġserv":1121,"Ġmonth":1122,"Ġbig":1123,"Ġdevelop":1124,"Ġperson":1125,"ision":1126,"ĠCl":1127,"akes":1128,"Ġfollow":1129,"Ġtold":1130,"Ġiss":1131,"Ġsince":1132,"ically":1133,"ages":1134,"ared":1135,"med":1136,"ex":1137,"Ġwar":1138,"ivers":1139,"Ġcent":1140,"Ġweek":1141,"Ġ8":1142,"Ġfew":1143,"ording":1144,"aj":1145,"get":1146,"Ġpolit":1147,"ves":1148,"Ġleg":1149,"Ġexpl":1150,"ash":1151,"Ġgener":1152,"Ġlot":1153,"Ġbelie":1154,"der":1155,"Ġfil":1156,"Ġ0":1157,"ata":1158,"ĠSe":1159,"Ġrequ":1160,"ric":1161,"rent":1162,"Ġtoo":1163,"anc":1164,"iet":1165,"ized":1166,"bers":1167,"Ġcontin":1168,"Ġopp":1169,"Ġprot":1170,"Wh":1171,"Ġ9":1172,"ention":1173,"ness":1174,"Ġput":1175,"Ġinf":1176,"Ġcle":1177,"ĠIf":1178,"Ġanother":1179,"Ġcor":1180,"Ġdifferent":1181,"Ġresp":1182,"ĠDe":1183,"chool":1184,"Ġsomething":1185,"Ġloc":1186,"Ġsoc":1187,"Ġinvest":1188,"ute":1189,"20":1190,"Ġche":1191,"Ġcommun":1192,"ĠFr":1193,"Ġplan":1194,"Ġlife":1195,"Ġrele":1196,"uc":1197,"Ġpass":1198,"ox":1199,"ience":1200,"Ġfar":1201,"Ġpower":1202,"ned":1203,"Ġmet":1204,"Ġthings":1205,"Ġart":1206,"thing":1207,"Ġhead":1208,"Ġposs":1209,"erest":1210,"ody":1211,"ross":1212,"arge":1213,"Ġ'":1214,"Ġcamp":1215,"Ġvot":1216,"Ġdep":1217,"ize":1218,"Ġmight":1219,"This":1220,"ants":1221,"Ġask":1222,"Ġnext":1223,"Ġeach":1224,"iness":1225,"Ġ.":1226,"Ġnever":1227,"view":1228,"ier":1229,"Ġchild":1230,"ik":1231,"Ġsecond":1232,"ĠFor":1233,"Ġadv":1234,"Ġmark":1235,"Ġbus":1236,"Ġdem":1237,"Ġquest":1238,"Ġcome":1239,"ield":1240,"ĠâĢĺ":1241,"ĠAn":1242,"tt":1243,"Ġgot":1244,"ient":1245,"Ġbest":1246,"ital":1247,"ĠThat":1248,"Ġopen":1249,"Ġallow":1250,"Ġpost":1251,"nce":1252,"Ġseem":1253,"ug":1254,"rat":1255,"son":1256,"Ġfeel":1257,"ĠPro":1258,"ple":1259,"Ġpercent":1260,"ĠUS":1261,"gram":1262,"Ġpri":1263,"cl":1264,"Ġdirect":1265,"Ġhome":1266,"Ġincre":1267,"ired":1268,"Ġdist":1269,"orn":1270,"less":1271,"osed":1272,"ĠRep":1273,"Ġcontro":1274,"Ġresult":1275,"Ġhum":1276,"Ġcare":1277,"Ġcomple":1278,"Ġhard":1279,"Ġinform":1280,"Ġimport":1281,"Ġincluding":1282,"ately":1283,"Ġmillion":1284,"Ġplace":1285,"ama":1286,"Ġwithout":1287,"Ġav":1288,"conom":1289,"Ġide":1290,"Ġproble":1291,"chn":1292,"Ġbuild":1293,"gy":1294,"Ġnumber":1295,"oney":1296,"aim":1297,"Ġtop":1298,"Ġcountry":1299,"Ġel":1300,"ral":1301,"riend":1302,"Ġstand":1303,"ĠBl":1304,"uro":1305,"ret":1306,"ably":1307,"ished":1308,"Ġgl":1309,"Ġfour":1310,"Ġpartic":1311,"ways":1312,"co":1313,"Ġmov":1314,"Ġlet":1315,"Ġbetter":1316,"ration":1317,"Ġinterest":1318,"Ġrecent":1319,"Ġtri":1320,"But":1321,"ling":1322,"Ġappro":1323,"Ġlittle":1324,"Ġcalled":1325,"Ġless":1326,"Ġbeh":1327,"Ġturn":1328,"ead":1329,"line":1330,"Ġbro":1331,"Ġseason":1332,"10":1333,"Ġmil":1334,"ised":1335,"Ġcase":1336,"app":1337,"ract":1338,"Ġrest":1339,"__":1340,"ency":1341,"Ġattack":1342,"Ġpop":1343,"Ġann":1344,"Ġdr":1345,"ode":1346,"Ġleft":1347,"ĠInd":1348,"Ġfun":1349,"ĠZ":1350,"ĠThere":1351,"Ġkeep":1352,"ering":1353,"Ġmove":1354,"Ġfull":1355,"inal":1356,"Ġdidn":1357,"ajor":1358,"Ġmain":1359,"ina":1360,"me":1361,"be":1362,"Ġeffect":1363,"Ġusing":1364,"ature":1365,"Ġprev":1366,"Ġtest":1367,"Ġprogram":1368,"leg":1369,"Ġval":1370,"Ġpolice":1371,"Ġear":1372,"Ġpay":1373,"Ġwrit":1374,"to":1375,"Ġform":1376,"ĠAmerican":1377,"Ġeconom":1378,"Ġdata":1379,"Ġchar":1380,"Ġpos":1381,"Ġnews":1382,"Ġsit":1383,"Ġwin":1384,"ains":1385,"ined":1386,"He":1387,"iving":1388,"ready":1389,"Ġsmall":1390,"Ġlevel":1391,"ology":1392,"Ġlist":1393,"Ġschool":1394,"Ġwomen":1395,"uck":1396,"rap":1397,"mit":1398,"ided":1399,"Ġprocess":1400,"ger":1401,"'re":1402,"ird":1403,"reen":1404,"Ġeas":1405,"Ġchange":1406,"ploy":1407,"Ġprom":1408,"ians":1409,"Ġcurrent":1410,"Ġpresident":1411,"Ġhist":1412,"Ġrece":1413,"Ġcompany":1414,"orth":1415,"Ġmat":1416,"used":1417,"ĠSo":1418,"ruct":1419,"ĠUnited":1420,"Ġdri":1421,"Ġalready":1422,"ohn":1423,"ĠOb":1424,"Ġunt":1425,"Ġlater":1426,"Ġ=":1427,"Ġwhy":1428,"Ġty":1429,"Ġmoney":1430,"Ġsol":1431,"Ġbook":1432,"Ġbre":1433,"Ġalways":1434,"ron":1435,"ĠHow":1436,"Ġexpect":1437,"wit":1438,"Ġofficial":1439,"Ġbusiness":1440,"ĠQ":1441,"stand":1442,"Ġtechn":1443,"iversity":1444,"Ġproject":1445,"aign":1446,"Ġthing":1447,"Ġorgan":1448,"Ġinformation":1449,"ĠAd":1450,"Ġclaim":1451,"Ġtalk":1452,"ases":1453,"Ġdesign":1454,"ĠShe":1455,"Ġmajor":1456,"ĠPh":1457,"Ġbo":1458,"Ġclass":1459,"Ġ--":1460,"Ġprof":1461,"Ġchang":1462,"Ġjob":1463,"lection":1464,"ĠBe":1465,"Ġhealth":1466,"iven":1467,"aff":1468,"Ġable":1469,"urs":1470,"Ġstory":1471,"Ġvis":1472,"ertain":1473,"Ġpast":1474,"here":1475,"ards":1476,"outh":1477,"Ġrese":1478,"ĠCan":1479,"Ġoper":1480,"Ġfree":1481,"ption":1482,"Ġmust":1483,"Ġcity":1484,"ural":1485,"Ġaway":1486,"Ġhappen":1487,"ior":1488,"...":1489,"Ġimportant":1490,"Ġpat":1491,"uthor":1492,"ample":1493,"urity":1494,"ury":1495,"Ġkill":1496,"sc":1497,"Ġold":1498,"rist":1499,"ending":1500,"Ġest":1501,"Ġedit":1502,"ured":1503,"Ġsk":1504,"Ġgrow":1505,"acy":1506,"Ġdoesn":1507,"ission":1508,"Ġleast":1509,"Ġcost":1510,"ĠGo":1511,"Ġmen":1512,"Ġvar":1513,"Ġdays":1514,"St":1515,"Ġ18":1516,"ained":1517,"Ġhop":1518,"Ġconst":1519,"Ġtell":1520,"Ġcampaign":1521,"ides":1522,"resident":1523,"side":1524,"Ġstre":1525,"ider":1526,"Ġcame":1527,"ney":1528,"Ġaff":1529,"oad":1530,"ccess":1531,"ern":1532,"els":1533,"Ġtax":1534,"Ġkind":1535,"Ġmaking":1536,"Ġlarge":1537,"Ġenough":1538,"50":1539,"Ġfamily":1540,"ĠMar":1541,"Ġ12":1542,"Ġmar":1543,"ĠSp":1544,"Ġfriend":1545,"Ġrecord":1546,"Ġpolic":1547,"Ġview":1548,"ror":1549,"eral":1550,"ional":1551,"ator":1552,"Ġelect":1553,"ush":1554,"ived":1555,"ley":1556,"Ġuntil":1557,"ality":1558,"pped":1559,"As":1560,"ising":1561,"by":1562,"ified":1563,"Ġdeal":1564,"Ġvide":1565,"Ġpolitical":1566,"âĢ¦":1567,"ries":1568,"----":1569,"alf":1570,"augh":1571,"Ġworking":1572,"Ġtook":1573,"ee":1574,"ued":1575,"19":1576,"Ġfore":1577,"Ġve":1578,"Ġair":1579,"bs":1580,"ĠRepublic":1581,"If":1582,"arent":1583,"Ġintern":1584,"Ġdeb":1585,"ister":1586,"ĠSc":1587,"ĠJohn":1588,"Ġes":1589,"ane":1590,"aining":1591,"Ġline":1592,"iol":1593,"Ġmean":1594,"ocrat":1595,"Ġactually":1596,"Ġbecome":1597,"work":1598,"Ġfund":1599,"ott":1600,"ware":1601,"Ġformer":1602,"ony":1603,"ember":1604,"Ġoften":1605,"aps":1606,"Ġhuman":1607,"ĠEuro":1608,"Ġwon":1609,"Ġgive":1610,"ĠPl":1611,"ann":1612,"Ġaccording":1613,"most":1614,"Ġever":1615,"wh":1616,"Ġreturn":1617,"aring":1618,"11":1619,"ison":1620,"Ġword":1621,"Ġorder":1622,"eng":1623,"year":1624,"oint":1625,"ĠRuss":1626,"Ġname":1627,"Ġbit":1628,"Ġbas":1629,"Ġtimes":1630,"ization":1631,"Ġsever":1632,"gin":1633,"ĠDem":1634,"Ġpar":1635,"Ġdev":1636,"icle":1637,"Ġgames":1638,"Ġnight":1639,"iqu":1640,"ĠSy":1641,"Ġ199":1642,"uture":1643,"Ġcharac":1644,"ĠAll":1645,"ither":1646,"Ġperform":1647,"raft":1648,"ĠOr":1649,"Ġsing":1650,"for":1651,"Ġfac":1652,"ĠMay":1653,"Ġmarket":1654,"ĠMc":1655,"idd":1656,"Ġwr":1657,"ĠAt":1658,"Ġra":1659,"mber":1660,"rop":1661,"ĠCon":1662,"There":1663,"ER":1664,"Ġla":1665,"Ġdoing":1666,"ched":1667,"Ġappear":1668,"Ġlear":1669,"aken":1670,"ats":1671,"Ġearly":1672,"Ġevent":1673,"ĠPresident":1674,"Ġhaving":1675,"Ġseen":1676,"Ġtoday":1677,"Ġsocial":1678,"Ġplayers":1679,"Ġseveral":1680,"Ġcrit":1681,"idence":1682,"Ġmeet":1683,"Ġchildren":1684,"ĠYork":1685,"Ġ15":1686,"ands":1687,"Ġmedia":1688,"artment":1689,"Ġwhether":1690,"ots":1691,"Ġwent":1692,"Ġpress":1693,"Ġsure":1694,"ories":1695,"Ġmembers":1696,"Ġyoung":1697,"br":1698,"Ġcontrol":1699,"iously":1700,"ene":1701,"Ġprop":1702,"Ġfive":1703,"Ġocc":1704,"Ġequ":1705,"une":1706,"Ġwater":1707,"Ġmilit":1708,"ium":1709,"Ġeng":1710,"iver":1711,"Ġquestion":1712,"Ġpot":1713,"Ġlikely":1714,"Ġvers":1715,"sh":1716,"Ġcertain":1717,"ĠMr":1718,"Ġshort":1719,"enn":1720,"Ġexample":1721,"Ġarg":1722,"ĠStates":1723,"Ġinvol":1724,"//":1725,"lf":1726,"Ġdone":1727,"Ġsuccess":1728,"ended":1729,"vert":1730,"ĠEurope":1731,"Ġalong":1732,"Ġdesc":1733,"ĠObama":1734,"Ġmonths":1735,"And":1736,"Ġside":1737,"Ġhour":1738,"Ġ]":1739,"ially":1740,"ĠCol":1741,"ential":1742,"ball":1743,"Ġant":1744,"ague":1745,"Ġsex":1746,"empt":1747,"let":1748,"ĠBr":1749,"Ġwithin":1750,"ubl":1751,"Ġcourse":1752,"nect":1753,"urch":1754,"uff":1755,"ster":1756,"Ġ,":1757,"Ġonce":1758,"ulation":1759,"ĠOn":1760,"12":1761,"Ġfight":1762,"Ġunderstand":1763,"Ġactiv":1764,"Ġamong":1765,"imate":1766,"Ġinvestig":1767,"rew":1768,"Ġpossible":1769,"iting":1770,"Ġconcer":1771,"Ġcho":1772,"Ġfuture":1773,"ones":1774,"ortun":1775,"Ġauthor":1776,"itive":1777,"Ġthought":1778,"ggest":1779,"Ġproblem":1780,"Ġproduct":1781,"Ġmeans":1782,"ailable":1783,"Ġviol":1784,"my":1785,"Ġpriv":1786,"Ġknown":1787,"Ġnon":1788,"oci":1789,"âĢĵ":1790,"Ġcommunity":1791,"ivid":1792,"Ġacross":1793,"Ġhold":1794,"ĠWhen":1795,"ald":1796,"gress":1797,"ourn":1798,"Ġlocal":1799,"inc":1800,"Ġyet":1801,"Ġfail":1802,"Ġ&":1803,"Ġprotect":1804,"ĠCar":1805,"por":1806,"Ġorig":1807,"Ġreason":1808,"ler":1809,"ĠNo":1810,"Ġsuggest":1811,"Ġ11":1812,"Ġmus":1813,"orld":1814,"iber":1815,"not":1816,"book":1817,"Ġasked":1818,"Ġgiven":1819,"Ġmakes":1820,"ederal":1821,"Ġ2016":1822,"aged":1823,"ĠState":1824,"back":1825,"verage":1826,"Ġothers":1827,"You":1828,"Ġstrong":1829,"Ġreported":1830,"Ġi":1831,"Ġvideo":1832,"ĠTe":1833,"ober":1834,"Ġprob":1835,"Ġben":1836,"Ġbelieve":1837,"ances":1838,"Ġgetting":1839,"ĠHouse":1840,"ensive":1841,"Ġident":1842,"Ġca":1843,"iety":1844,"Ġannoun":1845,"ored":1846,"Ġlim":1847,"ĠUniversity":1848,"Ġ30":1849,"Ġlight":1850,"Ġsent":1851,"Ġcut":1852,"iron":1853,"Ġcond":1854,"itions":1855,"ĠChrist":1856,"urther":1857,"30":1858,"Ġnet":1859,"ashing":1860,"ĠCo":1861,"Ġsour":1862,"ams":1863,"rest":1864,"Ġnational":1865,"'ve":1866,"ament":1867,"Ġdeath":1868,".,":1869,"Ġcreat":1870,"Ġclose":1871,"Ġindust":1872,"For":1873,"ĠX":1874,"Ġland":1875,"Ġstarted":1876,"aster":1877,"use":1878,"Ġavailable":1879,"Ġlove":1880,"ĠDemocrat":1881,"Ġbat":1882,"Ġbased":1883,"ĠCent":1884,"Ġsaf":1885,"Ġlow":1886,"resent":1887,"Ġhig":1888,"ming":1889,"Ġaccount":1890,"Ġhistory":1891,"Ġ#":1892,"Ġfollowing":1893,"utes":1894,"of":1895,"Ġissue":1896,"Ġmom":1897,"ĠMin":1898,"Ġago":1899,"ability":1900,"selves":1901,"rael":1902,"Ġ2015":1903,"orts":1904,"ĠAm":1905,"AT":1906,"Ġcomes":1907,"Ġemploy":1908,"ument":1909,"Ġseries":1910,"Ġidea":1911,"Ġalmost":1912,"ination":1913,"eb":1914,"Ġparty":1915,"Ġtrying":1916,"ĠAmerica":1917,"gether":1918,"uly":1919,"Ġtogether":1920,"reed":1921,"ilar":1922,"Ġaccess":1923,"pro":1924,"Ġfoc":1925,"hers":1926,"ĠFl":1927,"ief":1928,"Ġcommit":1929,"itional":1930,"Ġtrad":1931,"Ġ16":1932,"ring":1933,"Ġpract":1934,"erman":1935,"Ġisn":1936,"though":1937,"ails":1938,"ĠWhat":1939,"erence":1940,"Ġimpro":1941,"Ġclear":1942,"Ġadded":1943,"af":1944,"ĠOne":1945,"Ġrul":1946,"ĠSen":1947,"Ġexperience":1948,"15":1949,"ged":1950,"ustom":1951,"Ġhalf":1952,"rict":1953,"ai":1954,"inton":1955,"Ġlive":1956,"ĠBro":1957,"ividual":1958,"minist":1959,"raw":1960,"Ġfav":1961,"Ġanything":1962,"Ġanal":1963,"Ġhit":1964,"unch":1965,"Ġsix":1966,"ote":1967,"witter":1968,"ograp":1969,"Ġlooking":1970,"IN":1971,"Ġarea":1972,"ĠComm":1973,"Ġsaying":1974,"ĠEx":1975,"Ġcomplete":1976,"Ġmiss":1977,"ĠCanad":1978,"Ġpresent":1979,"Ġ2014":1980,"Ġbill":1981,"aid":1982,"Ġris":1983,"ert":1984,"ators":1985,"cially":1986,"rug":1987,"Ġvict":1988,"uge":1989,"ĠPr":1990,"Ġcourt":1991,"Ġtaken":1992,"Ġbad":1993,"Ġmor":1994,"Ġfilm":1995,"nes":1996,"ien":1997,"ĠEng":1998,"ows":1999,"Ġaut":2000,"ĠWar":2001,"Ġer":2002,"Ġbehind":2003,"oto":2004,"Ġcompet":2005,"ev":2006,"Ġnear":2007,"Ġsecurity":2008,"Ġentire":2009,"osp":2010,"ĠTo":2011,"Ġtry":2012,"Ġproper":2013,"Ġmult":2014,"play":2015,"Ġmilitary":2016,"ĠPol":2017,"Ġtre":2018,"Ġz":2019,"Ġface":2020,"Ġdiscuss":2021,"wards":2022,"ĠDon":2023,"**":2024,"What":2025,"Ġrather":2026,"Ġtot":2027,"Ġindividual":2028,"'m":2029,"Ġmind":2030,"Ġsingle":2031,"sy":2032,"atic":2033,"Ġthreat":2034,"Ġstudy":2035,"Ġpubl":2036,"Ġwanted":2037,"amed":2038,"ĠMan":2039,"ĠAf":2040,"Ġpolicy":2041,"aper":2042,"Ġ14":2043,"irc":2044,"Ġfood":2045,"par":2046,"ĠIsrael":2047,"Ġfall":2048,"ales":2049,"Ġmess":2050,"ĠBrit":2051,"Ġsomeone":2052,"Ġblack":2053,"arm":2054,"ids":2055,"ĠSec":2056,"Ġquick":2057,"ĠNorth":2058,"ttp":2059,"off":2060,"lling":2061,"Ġspecial":2062,"Ġcult":2063,"ris":2064,"Ġsum":2065,"Ġcomb":2066,"On":2067,"Ġredu":2068,"Ġ/":2069,"lement":2070,"Ġcompan":2071,"Ġbring":2072,"ĠÂ":2073,"When":2074,"ply":2075,"olution":2076,"ĠDep":2077,"Ġfoot":2078,"An":2079,"Ġplayer":2080,"Ġattempt":2081,"ON":2082,"Ġtaking":2083,"ĠNational":2084,"dd":2085,"Ġkey":2086,"elt":2087,"ĠGr":2088,"Ġ17":2089,"ify":2090,"icult":2091,"Ġ13":2092,"Ġelse":2093,".)":2094,"ĠCal":2095,"rote":2096,"viron":2097,"ĠBar":2098,"Ġbody":2099,"ashington":2100,"ĠWorld":2101,"Ġ:":2102,"Ġnothing":2103,"ĠIm":2104,"Ġconnect":2105,"over":2106,"Ġfront":2107,"Ġcou":2108,"Ġspace":2109,"ĠClinton":2110,"Ġcoming":2111,"16":2112,"Ġprobably":2113,"ĠCity":2114,"Ġsw":2115,"Ġhowever":2116,"Ġsimilar":2117,"Ġpret":2118,"Ġdescrib":2119,"cing":2120,"IS":2121,"Ġresearch":2122,"Ġjo":2123,"Ġ|":2124,"ches":2125,"RE":2126,"Ġbenef":2127,"reme":2128,"yn":2129,"go":2130,"ĠHar":2131,"Ġexist":2132,"Ġsignific":2133,"Ġmass":2134,"Ġpoints":2135,"lier":2136,"ault":2137,"aces":2138,"ĠAust":2139,"://":2140,"Ġdecision":2141,"Ġconv":2142,"Ġsus":2143,"Ġrights":2144,"ĠSu":2145,"ww":2146,"ully":2147,"So":2148,"Ġenc":2149,"That":2150,"mb":2151,"Ġcoun":2152,"Ġeither":2153,"Ġfeat":2154,"Ġtarg":2155,"Ġhouse":2156,"14":2157,"Ġban":2158,"rodu":2159,"irl":2160,"Ġwrote":2161,"asing":2162,"Ġchall":2163,"ana":2164,"Ġimm":2165,"sw":2166,"ung":2167,"Ġservice":2168,"Ġdoc":2169,"Ġfurther":2170,"Ġthird":2171,"Ġfire":2172,"Ġcreate":2173,"Ġissues":2174,"Ġfig":2175,"Ġcompl":2176,"ively":2177,"Ġrepl":2178,"Ġbegin":2179,"Ġrelations":2180,"Ġwa":2181,"Ġwhole":2182,"ried":2183,"Ġsuper":2184,"Ġ2013":2185,"ape":2186,"ran":2187,"AN":2188,"ĠWith":2189,"Ġstop":2190,"Ġstep":2191,"Ġfield":2192,"Ġcand":2193,"Ġeverything":2194,"Ġexec":2195,"ĠApp":2196,"pping":2197,"ently":2198,"ĠJan":2199,"aul":2200,"Ġfederal":2201,"now":2202,"pecially":2203,"Ġtra":2204,"Ġroom":2205,"Ã©":2206,"Ġseems":2207,"ĠGe":2208,"ĠGu":2209,"Ġcompanies":2210,"Ġself":2211,"ĠWashington":2212,"pite":2213,"13":2214,"Ġinfl":2215,"Ġmeas":2216,"Ġdue":2217,"Ġreleased":2218,"Ġwatch":2219,"uation":2220,"Ġrole":2221,"ilt":2222,"Ġpick":2223,"Ġstates":2224,"ĠHowever":2225,"Ch":2226,"ibility":2227,"Ġstri":2228,"att":2229,"Ġcustom":2230,"Ġbegan":2231,"Ġofficials":2232,"Ġsound":2233,"Ġdam":2234,"iday":2235,"Ġey":2236,"Ġlab":2237,"amer":2238,"ivil":2239,"Ġleaders":2240,"ĠDav":2241,"Ġcharacter":2242,"ĠSouth":2243,"Ġstudents":2244,"Ġener":2245,"sec":2246,"Ġmatter":2247,"oon":2248,"head":2249,"ery":2250,"eal":2251,"Ġposition":2252,"Ġtreat":2253,"ades":2254,"obal":2255,"Ġopt":2256,"iod":2257,"Ġhimself":2258,"Ġstatement":2259,"Ġoutside":2260,"lim":2261,"Ġdevelopment":2262,"Ġfinal":2263,"ĠInst":2264,"Ġinit":2265,"Ġ2012":2266,"Ġ198":2267,"like":2268,"Ġstat":2269,"Ġbecame":2270,"Ġprovide":2271,"Ġdou":2272,"ender":2273,"vironment":2274,"Ġrepresent":2275,"Ġhours":2276,"Ġitself":2277,"Ġamount":2278,"Ġbreak":2279,"Ġoffice":2280,"ĠIntern":2281,"Ġgeneral":2282,"ĠPart":2283,"Ġcontinue":2284,"Ġabove":2285,"Ġcountries":2286,"Ġ2017":2287,"Ġthemselves":2288,"Ġspecific":2289,"Ġaction":2290,"aced":2291,"Ġ25":2292,"Ġaddress":2293,"reg":2294,"ogle":2295,"Ġlarg":2296,"lish":2297,"Ġgun":2298,"Ġneg":2299,"Ġinterview":2300,"Ġtrue":2301,"Ġ100":2302,"ask":2303,"Ġwasn":2304,"Ġsignificant":2305,"ĠAss":2306,"Ġsimply":2307,"Ġground":2308,"ogn":2309,"Ġ)":2310,"acebook":2311,"umb":2312,"ograph":2313,"'ll":2314,"angu":2315,"ĠGerman":2316,"Ġevidence":2317,"Ġadminist":2318,"Ġbar":2319,"Ġdi":2320,"Ġversion":2321,"ĠJust":2322,"Ġdifficult":2323,"Ġprofess":2324,"Ġagree":2325,"ĠEd":2326,"ream":2327,"ĠRes":2328,"ili":2329,"Ġrunning":2330,"Ġgroups":2331,"Ġbuilding":2332,"Ġdom":2333,"BC":2334,"Ġrad":2335,"eks":2336,"Ġtem":2337,"Ġtyp":2338,"oid":2339,"ĠMed":2340,"17":2341,"arter":2342,"ĠChina":2343,"iddle":2344,"Ġcop":2345,"Ġwoman":2346,"Ġneeded":2347,"Ġdrug":2348,"Ġmusic":2349,"Ġshows":2350,"Ġneeds":2351,"Ġcommon":2352,"Ġeveryone":2353,"Ġrelease":2354,"Ġdest":2355,"Ġinj":2356,"Ġlat":2357,"ift":2358,"Ġdeep":2359,"Ġsaw":2360,"light":2361,"Ġball":2362,"Ġexc":2363,"right":2364,"Ġrespons":2365,"ournal":2366,"Ġinclude":2367,"Ġbelow":2368,"lam":2369,"ribut":2370,"Ġindustry":2371,"Ġmag":2372,"Ġenergy":2373,"Ġpretty":2374,"ĠHis":2375,"Ġmot":2376,"Ġwhite":2377,"yle":2378,"Ġgen":2379,"Ġcandid":2380,"18":2381,"Ġplayed":2382,"Ġquite":2383,"Ġap":2384,"Ġphys":2385,"AR":2386,"ĠAfter":2387,"ĠWest":2388,"Ġfriends":2389,"body":2390,"Ġlate":2391,"ival":2392,"ĠAb":2393,"atur":2394,"isc":2395,"Ġbillion":2396,"Ġsense":2397,"Ġplaying":2398,"Ġcharg":2399,"Ġalleg":2400,"Ġlegal":2401,"How":2402,"Ġlost":2403,"Ġhom":2404,"AS":2405,"Ġ50":2406,"Ġespecially":2407,"where":2408,"Ġple":2409,"Ġabs":2410,"____":2411,"Ġvalue":2412,"Ġinstead":2413,"ĠMus":2414,"Ġred":2415,"time":2416,"Ġhost":2417,"Ġobject":2418,"ccording":2419,"epend":2420,"ager":2421,"oring":2422,"Ġperiod":2423,"osing":2424,"Ġchanges":2425,"ĠNews":2426,"Ġelection":2427,"lev":2428,"Ġanim":2429,"Ġrecently":2430,"Ġ{":2431,"Ġsuff":2432,"omm":2433,"irt":2434,"istic":2435,"Ġweeks":2436,"Ġjud":2437,"Ġsite":2438,"bo":2439,"lt":2440,"Ġforward":2441,"Ġtrack":2442,"ĠNot":2443,"ique":2444,"Ġlo":2445,"hod":2446,"ĠWhite":2447,"ĠTwitter":2448,"medi":2449,"Ġvote":2450,"ached":2451,"Ġdeter":2452,"Ġpain":2453,"ĠOct":2454,"Ġlonger":2455,"ĠCong":2456,"Ġopportun":2457,"eed":2458,"Ġaccept":2459,"oe":2460,"arc":2461,"Ġhope":2462,"oke":2463,"Ġblock":2464,"Ġmoment":2465,"Ġeconomic":2466,"ĠWill":2467,"Ġsoon":2468,"Ġep":2469,"Ġdecl":2470,"irm":2471,"Ġshoot":2472,"ript":2473,"oph":2474,"Ġtit":2475,"ĠMy":2476,"ĠDepartment":2477,"Ġpie":2478,"ij":2479,"Ġcred":2480,"Ġrelig":2481,"ancial":2482,"duct":2483,"Ġcirc":2484,"Ġwrong":2485,"Ġshare":2486,"ns":2487,"Ġworks":2488,"ners":2489,"Ġonline":2490,"Ġhigher":2491,"cial":2492,"Ġwebs":2493,"ĠMarch":2494,"Ġconsider":2495,"Ġpotential":2496,"Ġconsum":2497,"Ġfocus":2498,"Ġcheck":2499,"lear":2500,"ole":2501,"Ġeduc":2502,"key":2503,"alt":2504,"icro":2505,"Ġheld":2506,"overed":2507,"ĠSept":2508,"ĠDr":2509,"ularly":2510,"Ġ24":2511,"Ġwords":2512,"Ġpred":2513,"iding":2514,"Ġcode":2515,"Ġminutes":2516,"25":2517,"Ġfunction":2518,"onse":2519,"Ġled":2520,"Ġwalk":2521,"Ġservices":2522,"lege":2523,"Ġce":2524,"Ġstay":2525,"Ġapplic":2526,"Ġrev":2527,"Ġstru":2528,"Ġresults":2529,"ĠThese":2530,"Ġreve":2531,"They":2532,"ĠJune":2533,"Ġwind":2534,"Ġparent":2535,"Ġlaws":2536,"cil":2537,"ĠRepublican":2538,"uary":2539,"ĠAug":2540,"ĠSyri":2541,"Ġaud":2542,"urd":2543,"ĠMe":2544,"aut":2545,"Ġsold":2546,"ilities":2547,"Ġanyone":2548,"Ġfavor":2549,"Ġtotal":2550,"Ġarticle":2551,"ients":2552,"Ġage":2553,"Ġprim":2554,"put":2555,"ÂŃ":2556,"Ġ<":2557,"fect":2558,"Ad":2559,"ateg":2560,"Ġrisk":2561,"Ġcomput":2562,"ĠMich":2563,"Al":2564,"asons":2565,"Ġpersonal":2566,"fer":2567,"Ġreview":2568,"Ġcontract":2569,"ĠPe":2570,"itution":2571,"Ġupd":2572,"val":2573,"ES":2574,"ule":2575,"ham":2576,"anger":2577,"haps":2578,"Ġvol":2579,"Ġtarget":2580,"Ġpopular":2581,"Ġansw":2582,"Ġexpress":2583,"==":2584,"apan":2585,"pril":2586,"ray":2587,"Ġden":2588,"Ġhy":2589,"Ġnecess":2590,"Ġ2011":2591,"Ġearlier":2592,"aging":2593,"Ġgoal":2594,"ĠDo":2595,"Ġproblems":2596,"Ġrate":2597,"Ġrespect":2598,"Ġdead":2599,"Ph":2600,"uel":2601,"Ġreceived":2602,"che":2603,"ends":2604,"Ġport":2605,"Ġenvironment":2606,"ĠPar":2607,"Ġcreated":2608,"ĠSte":2609,"Ġexpected":2610,"tee":2611,"Ġbase":2612,"Ġtechnology":2613,"Ġincrease":2614,"Ġways":2615,"reet":2616,"Ġlangu":2617,"ĠCongress":2618,"Ġ+":2619,"ude":2620,"Ġcomment":2621,"Ġscient":2622,"Ġsurpr":2623,"iple":2624,"ĠGod":2625,"ĠJuly":2626,"isl":2627,"bit":2628,"Ġanti":2629,"Ġprivate":2630,"Ġpack":2631,"Ġforce":2632,"action":2633,"Ġshot":2634,"ĠSm":2635,"Ġquestions":2636,"Ġcannot":2637,"Ġannounced":2638,"hern":2639,"aving":2640,"ĠFacebook":2641,"Ġspeak":2642,"Ġpur":2643,"Ġeasy":2644,"ĠMond":2645,"Ġterror":2646,"onst":2647,"Ġimpact":2648,"atives":2649,"eter":2650,"na":2651,"Ġinvolved":2652,"aterial":2653,"hood":2654,"ples":2655,"Ġdiv":2656,"ctor":2657,"ills":2658,"lex":2659,"Ġexecut":2660,"ĠEn":2661,"ĠAustral":2662,"Ġet":2663,"itiz":2664,"Ġsen":2665,"Ġnearly":2666,"rem":2667,"Ġ197":2668,"ourt":2669,"Ġexact":2670,"Ġrelationship":2671,"ĠGoogle":2672,"60":2673,"ires":2674,"Ġcontent":2675,"Ġmodel":2676,"okes":2677,"omb":2678,"Ġcivil":2679,"ees":2680,"uesday":2681,"Ġstaff":2682,"ĠFriday":2683,"ech":2684,"ellig":2685,"Ġoriginal":2686,"Ġleave":2687,"Ġcases":2688,"Ġtype":2689,"Ġcapt":2690,"cast":2691,"Ġinside":2692,"Ġinternational":2693,"Ġ2010":2694,"ĠApril":2695,"To":2696,"rel":2697,"ĠPal":2698,"atural":2699,"Ġcell":2700,"Ġcoll":2701,"Ġbroad":2702,"Ġworked":2703,"Ġarrest":2704,"Ġfans":2705,"Ġreports":2706,"Ġphot":2707,"Ġkilled":2708,"Ġclub":2709,"bor":2710,"lled":2711,"EN":2712,"At":2713,"Ġaverage":2714,"Ġliving":2715,"ĠNe":2716,"Ġsoft":2717,"Ġbecom":2718,"ĠRed":2719,"Ġcal":2720,"Ġresponse":2721,"Ġheart":2722,"ĠMonday":2723,"ging":2724,"ĠAmericans":2725,"Ġrespond":2726,"Ġur":2727,"Ġusers":2728,"Ġnation":2729,"Ġchance":2730,"Ġimmedi":2731,"Ġsubject":2732,"Ġestab":2733,"ĠEm":2734,"ky":2735,"OR":2736,"Ġworth":2737,"Ġtakes":2738,"ĠCount":2739,"hel":2740,"Ġobs":2741,"joy":2742,"ips":2743,"ĠAct":2744,"ests":2745,"rought":2746,"Ġdirector":2747,"Ġglobal":2748,"ĠTuesday":2749,"ael":2750,"Ġterms":2751,"Ġassoci":2752,"ĠJapan":2753,"Ġcollect":2754,"ises":2755,"ĠPer":2756,"ops":2757,"Ġsource":2758,"Ġdel":2759,"Ġprice":2760,"ites":2761,"Ġfinancial":2762,"ovember":2763,"ãģ":2764,"rain":2765,"ological":2766,"Ġ@":2767,"Ġability":2768,"Ġmother":2769,"Ġdefin":2770,"izing":2771,"ĠRussia":2772,"Ġworkers":2773,"Ġpublished":2774,"Ġstore":2775,"\".":2776,"osition":2777,"ĠWhile":2778,"Ġdecided":2779,"Ġmid":2780,"Ġroad":2781,"Ġemer":2782,"Ġgave":2783,"mon":2784,"Ġsqu":2785,"Ġhappened":2786,"Ġinflu":2787,"Ġtou":2788,"ĠWed":2789,"Ġbuy":2790,"Ġweap":2791,"ĠSome":2792,"Ġphone":2793,"ĠNow":2794,"Ġlack":2795,"unt":2796,"Ġoil":2797,"ĠCalif":2798,"Ġforeign":2799,"Ġ40":2800,"vertise":2801,"Ġrequire":2802,"Ġmember":2803,"Ġcapital":2804,"clus":2805,"oved":2806,"rown":2807,"Ġlives":2808,"Ġwhose":2809,"Ġlog":2810,"ĠOff":2811,"Ġgirl":2812,"Ġcurrently":2813,"Ġaffect":2814,"Ġindic":2815,"ino":2816,"ĠGeor":2817,"Ġfem":2818,"based":2819,"rect":2820,"ought":2821,"Ġmethod":2822,"ĠJanuary":2823,"\",":2824,"Ġtal":2825,"Ġgoes":2826,"Ġsituation":2827,"Ġoffer":2828,"Ġmeeting":2829,"ruction":2830,"Ġnetwork":2831,"ĠFeb":2832,"ĠSan":2833,"Ġparticip":2834,"ĠSund":2835,"Ġplans":2836,"Ġbank":2837,"Ġarri":2838,"Ġmaterial":2839,"Ġcy":2840,"load":2841,"oud":2842,"ĠBy":2843,"Ġturned":2844,"ursday":2845,"Ġpict":2846,"One":2847,"ĠHill":2848,"40":2849,"apt":2850,"Ġrecogn":2851,"nesday":2852,"Ġteams":2853,"fully":2854,"Ġintrodu":2855,"lin":2856,"ĠAg":2857,"Ġregard":2858,"Ġsimple":2859,"ese":2860,"Ġnorm":2861,"Ġpage":2862,"ĠNovember":2863,"order":2864,"Ġevents":2865,"Ġhuge":2866,"aur":2867,"Ġill":2868,"Ġperformance":2869,"ĠCor":2870,"Ġareas":2871,"Ġadministration":2872,"Ġfelt":2873,"ĠSeptember":2874,"ĠAfric":2875,"Ġcouple":2876,"Ġapproach":2877,"ĠTV":2878,"--------":2879,"ety":2880,"Ġcareer":2881,"Ġord":2882,"Ġdeli":2883,"ĠIslam":2884,"ĠGl":2885,"Ġhon":2886,"Ġmis":2887,"Ġtool":2888,"bl":2889,"Ġemail":2890,"Ġvictim":2891,"ume":2892,"oor":2893,"Ġvia":2894,"iment":2895,"Ġimprove":2896,"Ġfre":2897,"Ġquickly":2898,"ructure":2899,"Ġ21":2900,"ĠPaul":2901,"Ġpull":2902,"Ġengine":2903,"ĠDavid":2904,"Ġnumbers":2905,"Ġfather":2906,"ĠEuropean":2907,"Ġbuilt":2908,"avy":2909,"Ġrace":2910,"ĠWednesday":2911,"chie":2912,"Ġround":2913,"ĠThursday":2914,"Ġdig":2915,"aily":2916,"Ġpers":2917,"ornia":2918,"ĠAng":2919,"ĠCourt":2920,"ened":2921,"Ġheard":2922,"Ġwebsite":2923,"Ġmatch":2924,"Ġthous":2925,"Ġsecret":2926,"Ġtown":2927,"âĢĿ.":2928,"Ġloss":2929,"Ġspent":2930,"Ġhttp":2931,"Ġboy":2932,"cember":2933,");":2934,"Ġeconomy":2935,"Ġneigh":2936,"Ġtext":2937,"ĠSch":2938,"Ġwants":2939,"Ġrepe":2940,"Ġlower":2941,"Ġenjoy":2942,"uses":2943,"Ġ22":2944,"ĠAugust":2945,"Ġproduction":2946,"Ġwide":2947,"Ġcover":2948,"ĠOctober":2949,"AL":2950,"istan":2951,"Ġpoll":2952,"Ġincluded":2953,"ights":2954,"ctions":2955,"Ġhands":2956,"][":2957,"With":2958,"Ġtravel":2959,"iction":2960,"face":2961,"Ġknew":2962,"Ġtrade":2963,"Ġlegisl":2964,"Ġallowed":2965,"Ġste":2966,"Sh":2967,"ĠCanada":2968,"atory":2969,"99":2970,"Ġreveal":2971,"Ġbehav":2972,"down":2973,"Ġvisit":2974,"Ġtried":2975,"Ġsepar":2976,"orters":2977,"ences":2978,"ĠEl":2979,"Ġboard":2980,"Ġcause":2981,"Ġbi":2982,"Ġgets":2983,"ze":2984,"?âĢĿ":2985,"ĠSunday":2986,"estern":2987,"Ġfear":2988,"gest":2989,"Ġattention":2990,"ĠBritish":2991,"ishing":2992,"After":2993,"Ġperfect":2994,"Ġvo":2995,"ession":2996,"ground":2997,"Ġveh":2998,"Ġgrowth":2999,"anks":3000,"Ġparticular":3001,"cle":3002,"200":3003,"Ġ}":3004,"Ġforces":3005,"Ġmovement":3006,"Ġbrought":3007,"force":3008,"ibly":3009,"Ġens":3010,"Ġlooks":3011,"uk":3012,"hold":3013,"Ġreach":3014,"Ġsize":3015,"agement":3016,"ĠUK":3017,"ĠKore":3018,"Ġlink":3019,"arily":3020,"ening":3021,"Re":3022,"ceed":3023,"Ġvarious":3024,"ĠCalifornia":3025,"Ġbud":3026,"idered":3027,"ĠRussian":3028,"Ġsong":3029,"iam":3030,"ĠSw":3031,"urning":3032,"idents":3033,"Ġpopulation":3034,"alled":3035,"Ġrac":3036,"ĠQu":3037,"'d":3038,"AC":3039,"Ġprevious":3040,"ma":3041,"ĠDecember":3042,"Ġcontact":3043,"ĠIS":3044,"Ġmention":3045,"aker":3046,"cies":3047,"undred":3048,"Ġmorning":3049,"Ġfair":3050,"Ġrules":3051,"ĠPost":3052,"Ġfell":3053,"Ġdemand":3054,"Ġprogress":3055,"sequ":3056,"yond":3057,"Ġliber":3058,"set":3059,"While":3060,"Ġ23":3061,"come":3062,"Ġhot":3063,"field":3064,"Ġtraining":3065,"ĠHer":3066,"ĠCont":3067,"raz":3068,"Ġhor":3069,"fl":3070,"Ġleader":3071,"ledge":3072,"Ġcitiz":3073,"omp":3074,"Ġindepend":3075,"24":3076,"elligence":3077,"oo":3078,"inks":3079,"ecause":3080,"ED":3081,"US":3082,"Ġimage":3083,"ĠTimes":3084,"Ġess":3085,"raq":3086,"Ġlatest":3087,"ĠStreet":3088,"ĠBlack":3089,"ĠPat":3090,"uments":3091,"ĠJew":3092,"ification":3093,"arth":3094,"Ġlanguage":3095,"IT":3096,"Ġconsidered":3097,"Ġtalking":3098,"ondon":3099,"Ġprep":3100,"Ġstrugg":3101,"Ġsearch":3102,"Ġmedical":3103,"Ġstar":3104,"de":3105,"iant":3106,"Ġcoach":3107,"Ġupon":3108,"aph":3109,"Tr":3110,"Ġfeatures":3111,"vent":3112,"ges":3113,"Ġaddition":3114,"ĠPark":3115,"Ġspeed":3116,"Ġfast":3117,"Ġwond":3118,"OS":3119,"Ġexam":3120,"ĠParty":3121,"bed":3122,"aries":3123,"icles":3124,"ha":3125,"Ġund":3126,"Ġ2008":3127,"imately":3128,"ĠKing":3129,"Ġinvestigation":3130,"Ġdoll":3131,"ĠCenter":3132,"ĠSenate":3133,"Ġsystems":3134,"Ġsort":3135,"pir":3136,"Ġreb":3137,"sych":3138,"aturday":3139,"eth":3140,"Ġslow":3141,"Ġconduct":3142,"Ġachie":3143,"Ġcritic":3144,"ille":3145,"Ġbox":3146,"imin":3147,"Ġmach":3148,"IC":3149,"Ġlevels":3150,"ps":3151,"ĠIran":3152,"Ġpaper":3153,"ope":3154,"uit":3155,"Ġsn":3156,"eared":3157,"Ġâ":3158,"ysis":3159,"Ġwritten":3160,"wo":3161,"Ġconserv":3162,"80":3163,"Ġprior":3164,"Ġdraw":3165,"ĠIndia":3166,"Ġ2009":3167,"idge":3168,"ĠCounty":3169,"Ġhear":3170,"het":3171,"king":3172,"Ġpen":3173,"Ġprevent":3174,"uf":3175,"asc":3176,"Ġincreasing":3177,"ĠIraq":3178,"igr":3179,"Ġextreme":3180,"ruary":3181,"Ġsexual":3182,"Ġson":3183,"Ġdocument":3184,"Ġjobs":3185,"Ġspokes":3186,"ĠMark":3187,"tr":3188,"Ġsens":3189,"ĠMuslim":3190,"ani":3191,"Ġparents":3192,"Ġscreen":3193,"Ġseven":3194,"Ġsafe":3195,"Ġcard":3196,"reedom":3197,"Ġeffort":3198,"atform":3199,"Ġlearn":3200,"room":3201,"Ġserious":3202,"Ġregul":3203,"ĠTex":3204,"Ġinn":3205,"ett":3206,"Ġcompletely":3207,"itch":3208,"rast":3209,"ĠDonald":3210,"ining":3211,"ico":3212,"ext":3213,"Ġsurve":3214,"Ġrange":3215,"Ġeight":3216,"Ġclaims":3217,"Ġinteresting":3218,"Ġsociety":3219,"ĠRom":3220,"Ġmajority":3221,"Ġdoub":3222,"eff":3223,"Ġviolence":3224,"Ġmessage":3225,"Ġconc":3226,"rench":3227,"OP":3228,"vertisement":3229,"Ġrequired":3230,"Ġattacks":3231,"ĠChar":3232,"Ġdespite":3233,"anced":3234,"Ġparticularly":3235,"Ġobv":3236,"She":3237,"Ġopportunity":3238,"ĠLondon":3239,"ources":3240,"unk":3241,"Ġnatural":3242,"Ġextra":3243,"Ġanswer":3244,"Ġexperien":3245,"Ġguy":3246,"aming":3247,"22":3248,"uclear":3249,"ĠKe":3250,"Ġofficers":3251,"Ġpaid":3252,"Ġscience":3253,"ension":3254,"mp":3255,"Ġjournal":3256,"abor":3257,"Ġbiggest":3258,"Ġcross":3259,"Ġmodern":3260,"inese":3261,"Ġexactly":3262,"illed":3263,"ĠEast":3264,"ibr":3265,"Ġcarry":3266,"erve":3267,"AM":3268,"inally":3269,"entially":3270,"ĠMore":3271,"Ġhundred":3272,"Ġdro":3273,"Ġclimate":3274,"Ġexcept":3275,"oses":3276,"Ġmovie":3277,"Ġaren":3278,"Ġstories":3279,"ĠNet":3280,"ny":3281,"Ġpractice":3282,"lying":3283,"pper":3284,"nces":3285,"ux":3286,"Ġremain":3287,"Ġsurv":3288,"ari":3289,"Ġmiddle":3290,"Ġcertainly":3291,"isions":3292,"ĠSaturday":3293,"Ġdefense":3294,"Ġdescribed":3295,"add":3296,"Ġgas":3297,"Ġproducts":3298,"Ġsometimes":3299,"Ġconvers":3300,"Ġlay":3301,"Ġcommunic":3302,"iers":3303,"Ġterm":3304,"Ġprison":3305,"Ġlooked":3306,"utions":3307,"Ġstuff":3308,"Ġ...":3309,"hib":3310,"Ġshowed":3311,"ospital":3312,"Ġaw":3313,"aves":3314,"Ġult":3315,"Ġuser":3316,"ously":3317,"Ġ27":3318,"ĠFebruary":3319,"Ġhistor":3320,"Ġquarter":3321,"Ġrout":3322,"Ġfinally":3323,"Ġoccur":3324,"ĠSand":3325,"Ġimag":3326,"Ġcosts":3327,"uing":3328,"Ġleading":3329,"Ġblood":3330,"ĠInt":3331,"ĠChe":3332,"Ġchanged":3333,"Ġones":3334,"Ġquality":3335,"Ġnamed":3336,"Ġsummer":3337,"Ġdetails":3338,"Ġremains":3339,"Ġtable":3340,"akers":3341,"aught":3342,"ĠSpe":3343,"Ġpath":3344,"Ġpan":3345,"Ġ*":3346,"Ġgiving":3347,"ĠSoc":3348,"ĠTH":3349,"Ġstarting":3350,"FL":3351,"Ġeducation":3352,"Ġprovided":3353,"enge":3354,"Ġpoor":3355,"erc":3356,"Ġregion":3357,"Ġdanger":3358,"coin":3359,"ida":3360,"Ġwriting":3361,"arr":3362,"Ġchief":3363,"Ġdemonst":3364,"inary":3365,"Ġdamage":3366,"Ġthinking":3367,"Ġcomments":3368,"Ġmultiple":3369,"Ġmob":3370,"Ġcontinued":3371,"ĠMcC":3372,"Ġpurp":3373,"icks":3374,"Ġfit":3375,"Ġparts":3376,"Ġgrowing":3377,"Ġmaintain":3378,"ĠTur":3379,"Ġment":3380,"Ġdied":3381,"ĠGener":3382,"Ġ>":3383,"Ġpurch":3384,"ĠDemocratic":3385,"ĠMor":3386,"ĠRepublicans":3387,"yl":3388,"Ġfix":3389,"23":3390,"ĠLeague":3391,"No":3392,"Ġassist":3393,"Ġrequest":3394,"iminal":3395,"pre":3396,"ĠEven":3397,"Ġreading":3398,"Ġwouldn":3399,"Ġmill":3400,"mitted":3401,"ban":3402,"Ġculture":3403,"Ġwrite":3404,"zz":3405,"Ġefforts":3406,"Ġcalls":3407,"Ġhous":3408,"oper":3409,"Ġready":3410,"Ġincreased":3411,"fort":3412,"ems":3413,"ĠCoun":3414,"ago":3415,"Ġimplement":3416,"Ġspot":3417,"Ġcele":3418,"Ġeffects":3419,"ĠBill":3420,"Ġperhaps":3421,"Ġvoters":3422,"Ġ26":3423,"Ġ28":3424,"ables":3425,"arian":3426,"Ġalthough":3427,"Ġfootball":3428,"Ġdirectly":3429,"oul":3430,"Im":3431,"Ġstandard":3432,"Ġreality":3433,"Ġcomplex":3434,"ĠFirst":3435,"icated":3436,"Ġspeech":3437,"Ġguys":3438,"using":3439,"aly":3440,"ĠChinese":3441,"Ġkids":3442,"ĠDes":3443,"Ġusually":3444,"Ġalone":3445,"Ġavoid":3446,"Ġtowards":3447,"Ġbeyond":3448,"BI":3449,"Ġver":3450,"Ġconsist":3451,"Ġplatform":3452,"Ġcenter":3453,"rowd":3454,"Ġincludes":3455,"Ġcamer":3456,"sp":3457,"Ġstudent":3458,"Ġfile":3459,"ara":3460,"Ġcouldn":3461,"Ġtro":3462,"itute":3463,"Ġchoice":3464,"Ġscene":3465,"ĠBo":3466,"Ġselect":3467,"urg":3468,"anch":3469,"egr":3470,"Ġthroughout":3471,"Ġchalleng":3472,"Ġsubst":3473,"Ġimmediately":3474,"ĠMichael":3475,"Ġunf":3476,"ĠJames":3477,"ĠTexas":3478,"Ġrecomm":3479,"Ġrob":3480,"ĠJack":3481,"ĠApple":3482,"Ġconcept":3483,"Ġadditional":3484,"Ġremember":3485,"Ġclos":3486,"Ġbrain":3487,"ĠSer":3488,"iles":3489,"Ġdesigned":3490,"Ġcat":3491,"agon":3492,"Ġwife":3493,"ĠCons":3494,"itor":3495,"Ġ(@":3496,"ston":3497,"wise":3498,"Ġagency":3499,"urt":3500,"Ġsle":3501,"Ġsoftware":3502,"Ġresearc":3503,"ford":3504,"icient":3505,"Ġil":3506,"All":3507,"Ġtoward":3508,"Ġten":3509,"Ġgone":3510,"Ġdebate":3511,"Ġsafety":3512,"Ġcorpor":3513,"imum":3514,"rupt":3515,"ĠâĢ¦":3516,"amb":3517,"Ġblog":3518,"ĠIndian":3519,"Ġepis":3520,"Ġmoved":3521,"ĠMinister":3522,"irth":3523,"Ġfigure":3524,"Ġrelative":3525,"hest":3526,"Ġopin":3527,"Ġofficer":3528,"Ġletter":3529,"aches":3530,"astic":3531,"omen":3532,"atever":3533,"Ġdog":3534,"Ġstage":3535,"Ġpressure":3536,"Ġpiece":3537,"ression":3538,"Ġpassed":3539,"ĠMon":3540,"Ġcast":3541,"ilies":3542,"Ġreceive":3543,"men":3544,"ampions":3545,"berg":3546,"Now":3547,"Ġ60":3548,"Ġoption":3549,"Ġthousands":3550,"ĠGreen":3551,"ĠII":3552,"Ġcounter":3553,"Ġsil":3554,"Ġswe":3555,"Ġahead":3556,"Ġbattle":3557,"Ġprec":3558,"Ġut":3559,"OT":3560,"new":3561,"airs":3562,"Ġmeaning":3563,"Ġproperty":3564,"Ġlaunch":3565,"roy":3566,"adem":3567,"Ġdisapp":3568,"Ġdrive":3569,"ĠSuper":3570,"itation":3571,"ĠServ":3572,"Ġcris":3573,"Ġclean":3574,"oes":3575,"Ġsuspect":3576,"olute":3577,"Ġnom":3578,"Ġalleged":3579,"Ġappears":3580,"Ġsell":3581,"Ġscore":3582,"ports":3583,"Ġdark":3584,"Ġhelped":3585,"ĠThen":3586,"Ġrock":3587,"ĠAssoci":3588,"Ġnuclear":3589,"Ġdate":3590,"Ġconditions":3591,"wood":3592,"just":3593,"Ġsat":3594,"Ġcharacters":3595,"Ġign":3596,"Ġreasons":3597,"Ġnature":3598,"Ġorganization":3599,"ĠRich":3600,"ĠDemocrats":3601,"Ġdeg":3602,"ĠSim":3603,"Ġbudget":3604,"Ġcomputer":3605,"Ġtour":3606,"Ġbeginning":3607,"27":3608,"lease":3609,"Ġdifference":3610,"aughter":3611,"rief":3612,"Ġfan":3613,"ĠPlay":3614,"Ġmoving":3615,"Ġdevice":3616,"Ġencour":3617,"Ġcentury":3618,"AD":3619,"Ġfailed":3620,"Ġmyself":3621,"ĠEnglish":3622,"Ġtwe":3623,"?\"":3624,"Ġrefer":3625,"ĠStar":3626,"more":3627,"Ġpark":3628,"Ġaim":3629,"Ġtouch":3630,"Ġbooks":3631,"Ġmaybe":3632,"Ġtitle":3633,"Ġx":3634,"Ġhappy":3635,"Ġcompared":3636,"ĠMart":3637,"....":3638,"chan":3639,"Ġcommand":3640,"ĠRober":3641,"cient":3642,"Ġshown":3643,"ocked":3644,"yan":3645,"Ġcopy":3646,"Ġestim":3647,"ĠFrench":3648,"Ġleague":3649,"ivity":3650,"Ġband":3651,"!!":3652,"Ġwall":3653,"Ġtrust":3654,"Ġstra":3655,"Ġcandidate":3656,"ĠTor":3657,"Ġsche":3658,"Ġspending":3659,"AP":3660,"etro":3661,"Ġbal":3662,"Ġeyes":3663,"Ġconference":3664,"Ġcontribut":3665,"45":3666,"Ġpositive":3667,"Ġnecessary":3668,"Ġcitizens":3669,"Ġpresidential":3670,"ĠHere":3671,"Ġeventually":3672,"venue":3673,"Ġcool":3674,"Ġpowerful":3675,"Ġcollege":3676,"ĠVal":3677,"Ġschools":3678,"igration":3679,"Ġanalysis":3680,"orks":3681,"Ġlimited":3682,"enty":3683,"Ġ196":3684,"Ġarch":3685,"Ġdisp":3686,"ĠBen":3687,"Ġnote":3688,"35":3689,"Ġexecutive":3690,"Ġcorrect":3691,"Ġlargest":3692,"orney":3693,"Ġmeant":3694,"My":3695,"Ġfollowed":3696,"ĠSyria":3697,"Ġyes":3698,"EC":3699,"Ġcrowd":3700,"Ġsal":3701,"Ġmix":3702,"Ġfeed":3703,"Ġforced":3704,"ource":3705,"ufact":3706,"ĠDay":3707,"Ġfeature":3708,"Ġaware":3709,"aint":3710,"Ġhar":3711,"Ġtreatment":3712,"ĠAir":3713,"Ġreligious":3714,"aud":3715,"Ġfirm":3716,"ressed":3717,"New":3718,"Ġallows":3719,"Ġstreng":3720,"Ġconfir":3721,"Ġautom":3722,"26":3723,"Ġbott":3724,"Ġfamilies":3725,"ĠÂ£":3726,"ares":3727,"ĠOf":3728,"Is":3729,"Ġilleg":3730,"Be":3731,"ken":3732,"Ġthrow":3733,"band":3734,"Ġappeared":3735,"Ġeasily":3736,"Ġesc":3737,"ero":3738,"Ġyourself":3739,"Ġpus":3740,"ĠWind":3741,"ID":3742,"encies":3743,"craft":3744,"Ġactions":3745,"Ġgrad":3746,"Ġgives":3747,"By":3748,"Ġphysical":3749,"Ġ29":3750,"Advertisement":3751,"Ġfrequ":3752,"ĠDis":3753,"iring":3754,"Ġdetail":3755,"ĠBel":3756,"ĠGovern":3757,"Ġpolitics":3758,"Ġang":3759,"rated":3760,"Ġcentral":3761,"cul":3762,"Ġincome":3763,"Ġdecades":3764,"ube":3765,"Ġsenior":3766,"Ġsusp":3767,"Ġideas":3768,"Ġcredit":3769,"Ġdraft":3770,"ĠBre":3771,"iny":3772,"Ġsmart":3773,"Ġunique":3774,"Ġoverall":3775,"Ġsigned":3776,"ĠMex":3777,"bum":3778,"Ġcrime":3779,"Ġpromot":3780,"Ġintelligence":3781,"ĠCap":3782,"Ġanaly":3783,"inn":3784,"Ġcrisis":3785,"IA":3786,"Ġuses":3787,"Ġdisplay":3788,"ĠGermany":3789,"ĠInternational":3790,"Ġdise":3791,"Ġrap":3792,"Ġbenefits":3793,"Ġdeveloped":3794,"According":3795,"board":3796,"Ġnice":3797,"Ġcharge":3798,"iable":3799,"Ġweapons":3800,"Ġlif":3801,"nown":3802,"Ġmist":3803,"ĠJer":3804,"itely":3805,"riage":3806,"urance":3807,"Ġdoor":3808,"ĠArab":3809,"Don":3810,"ĠEU":3811,"Ġtruth":3812,"Ġrule":3813,"net":3814,"ĠSchool":3815,"Ġaltern":3816,"uty":3817,"Ġindividuals":3818,"Ġnoted":3819,"ĠAustralia":3820,"Ġconflic":3821,"ĠAngel":3822,"Ġrates":3823,"ET":3824,"Ġpredict":3825,"Ar":3826,"resh":3827,"Ġcustomers":3828,"ĠPolice":3829,"Ġgoals":3830,"Ġfully":3831,"Ġsales":3832,"Ġchallenge":3833,"Ġpsych":3834,"part":3835,"Ġapparent":3836,"28":3837,"Ġprojects":3838,"OM":3839,"change":3840,"Ġcontinues":3841,"onents":3842,"mar":3843,"Ġdrop":3844,"Ġreform":3845,"):":3846,"ĠEarth":3847,"Ġpreviously":3848,"Ġlarger":3849,"ĠÐ":3850,"ĠGeorge":3851,"21":3852,"erved":3853,"Ġbeaut":3854,"Ġrelated":3855,"box":3856,"Ġadvant":3857,"ĠUnion":3858,"ser":3859,"Ġreached":3860,"Ġmanager":3861,"Ġseemed":3862,"Ġcriminal":3863,"oti":3864,"29":3865,"Ġsto":3866,"Ġbrother":3867,"Ġincident":3868,"Pro":3869,"Ġlic":3870,"Ġstraight":3871,"Ġspir":3872,"ĠFranc":3873,"owers":3874,"Ġprimary":3875,"Ġsuccessful":3876,"Some":3877,"iders":3878,"iance":3879,"Ġfavorite":3880,"Ġpeace":3881,"ĠChristian":3882,"secut":3883,"Ġemb":3884,"ĠTom":3885,"Ġexpand":3886,"orter":3887,"Ġlines":3888,"bon":3889,"Ġspread":3890,"Ġclearly":3891,"ĠComp":3892,"ĠCouncil":3893,"Ġprograms":3894,"Ġhighly":3895,"ĠHigh":3896,"Ġminor":3897,"olic":3898,"Ġattract":3899,"Ġmeasure":3900,"ategy":3901,"while":3902,"ĠHam":3903,"ĠInternet":3904,"attle":3905,"inated":3906,"Ġknowledge":3907,"Ġsave":3908,"Ġgreater":3909,"ounds":3910,"antly":3911,"Ġtraff":3912,"ĠCr":3913,"sey":3914,"ĠSmith":3915,"Ġemot":3916,"Ġkept":3917,"Un":3918,"Ġrare":3919,"icit":3920,"que":3921,"ĠAnt":3922,"usion":3923,"ĠUp":3924,"ĠRec":3925,"ean":3926,"dom":3927,"osh":3928,"ĠBer":3929,"Ġplaces":3930,"Ġrise":3931,"ĠMac":3932,"Ġresources":3933,"Ġweb":3934,"ki":3935,"Ġregular":3936,"Ġstudies":3937,"ĠBrown":3938,"ĠFlor":3939,"Ġvoice":3940,"ĠGeneral":3941,"ota":3942,"ocks":3943,"Ġarm":3944,"iced":3945,"Ġ2007":3946,"Ġactivity":3947,"vere":3948,"100":3949,"oday":3950,"ĠDef":3951,"Ġshooting":3952,"Ġsources":3953,"term":3954,"wide":3955,"Ġprem":3956,"Ġfine":3957,"Ġowners":3958,"Ġfeet":3959,"Ġknows":3960,"Ġcities":3961,"Ġfighting":3962,"rs":3963,"ĠLa":3964,"Ġsend":3965,"Ġnormal":3966,"Ġorganiz":3967,"Ġmanufact":3968,"Ġcalling":3969,"icago":3970,"Ġstatus":3971,"ville":3972,"Ġliter":3973,"Ġcaused":3974,"Ġspend":3975,"levision":3976,"Ġclaimed":3977,"ado":3978,"miss":3979,"Ġlaun":3980,"Ġintegr":3981,"Ġoptions":3982,"70":3983,"iled":3984,"Ġcolor":3985,"www":3986,"roll":3987,"Ġpolicies":3988,"itness":3989,"EO":3990,"Ġreflect":3991,"ING":3992,"onent":3993,"oles":3994,"Ġshut":3995,"ĠEngland":3996,".âĢĻ":3997,"Ġhaven":3998,"ĠEvery":3999,"ength":4000,"ĠRob":4001,"Ġadding":4002,"org":4003,"inger":4004,"Not":4005,"Ġpun":4006,"ĠFrom":4007,"Ġresearchers":4008,"mission":4009,"Ġprint":4010,"ifying":4011,"Ġpros":4012,"Ġenem":4013,"Ġfemale":4014,"Ġvirt":4015,"aith":4016,"Ġended":4017,"Ġpatients":4018,"ĠGold":4019,"mittee":4020,"ĠScott":4021,"Ġplant":4022,"Ġprices":4023,"Ġfreedom":4024,"Ġgrand":4025,"iverse":4026,"Ġele":4027,"Ġtrou":4028,"ribution":4029,"Ġhospital":4030,"Ġtraditional":4031,"Ġnames":4032,"Ġraised":4033,"Ġgold":4034,"Ġconcerns":4035,"Ġbasic":4036,"Ġsupposed":4037,"Ġhappens":4038,"Ġfunding":4039,"Ġbeat":4040,"Ġemployees":4041,"Ġcharges":4042,"Ġalbum":4043,"Ġstyle":4044,"Ġscen":4045,"Ġauthorities":4046,"Ġwilling":4047,"roid":4048,"Ġacqu":4049,"âĢĿ,":4050,"mark":4051,"33":4052,"Ġabsolute":4053,"Ġslight":4054,"Ġminister":4055,"ĠÃ":4056,"ya":4057,"alse":4058,"Ġeth":4059,"Ġcommer":4060,"ĠBay":4061,"Ġactual":4062,"Ġjump":4063,"see":4064,"ica":4065,"Ġbirth":4066,"Ġcreating":4067,"ĠMo":4068,"Ġlik":4069,"Ġwait":4070,"Ġimpl":4071,"Ġbehavior":4072,"Ġleaving":4073,"uman":4074,"Ġbeg":4075,"lick":4076,"rol":4077,"Ġagreement":4078,"ĠAP":4079,"Ġseeing":4080,"EL":4081,"Ġmechan":4082,"uana":4083,"Ġgreen":4084,"Ġstation":4085,"ĠMost":4086,"gen":4087,"aby":4088,"ona":4089,"Sc":4090,"ĠWell":4091,"Ġoffered":4092,"Here":4093,"Ġadop":4094,"SA":4095,"ĠHillary":4096,"Ġrank":4097,"ĠMag":4098,"cher":4099,"Ġpush":4100,"Ex":4101,"Ġir":4102,"pen":4103,"Ġvalues":4104,"Ġmer":4105,".'":4106,"Ġcry":4107,"Ġpropos":4108,"ĠAccording":4109,"Ġposted":4110,"OU":4111,"soft":4112,"acing":4113,"ĠFlorida":4114,"ram":4115,"Ġmostly":4116,"Ġapplication":4117,"Ġdetermin":4118,"ĠAlex":4119,"Ġensure":4120,"Ġestabl":4121,"Ġbelieved":4122,"ĠFrance":4123,"ĠTrans":4124,"Ġweekend":4125,"Ġweight":4126,"ĠLet":4127,"Ġtemper":4128,"itude":4129,"Ġvac":4130,"rian":4131,"earch":4132,"ties":4133,"Ġeye":4134,"Ġwhatever":4135,"Ġdevices":4136,"Ġmassive":4137,"Ġinstall":4138,"Ġrecords":4139,"Ġrevealed":4140,"istry":4141,"pri":4142,"Ġlawy":4143,"Ġbrief":4144,"ijuana":4145,"Ġpicture":4146,"igned":4147,"Ġstrategy":4148,"Ġenforce":4149,"Ġuns":4150,"ae":4151,"Ġparties":4152,"Ġadult":4153,"Ġqual":4154,"Ġchoose":4155,"Ġbenefit":4156,"Ġappl":4157,"Ġfat":4158,"Ġwonder":4159,"Ġarrested":4160,"ĠReg":4161,"uild":4162,"Ġstreet":4163,"500":4164,"Ġlearned":4165,"Ġwhom":4166,"Ġassault":4167,"Ġdire":4168,"ĠPeople":4169,"Ġmission":4170,"Ġjudge":4171,"Ġfeeling":4172,"IL":4173,"90":4174,"ĠPhil":4175,"ĠChicago":4176,"ags":4177,"Ġmemory":4178,"Ġcars":4179,"Ġmount":4180,"ĠLaw":4181,"Cl":4182,"ĠLou":4183,"Ġproduced":4184,"Ġinfluence":4185,"Ġenter":4186,"Ġpattern":4187,"ĠSun":4188,"Ġprovides":4189,"Ġtend":4190,"liament":4191,"estic":4192,"ĠBush":4193,"ste":4194,"icial":4195,"Ġresidents":4196,"Ġasking":4197,"Ġcritical":4198,"ĠPort":4199,"Ġcelebr":4200,"Ġconfirmed":4201,"Ġlose":4202,"Ġwat":4203,"Ġnegoti":4204,"Ġoperations":4205,"Ġproduce":4206,"ĠBit":4207,"Ġshowing":4208,"Ġwild":4209,"Ġborder":4210,"ĠStud":4211,"ingly":4212,"Ġimages":4213,"Ġcru":4214,"ection":4215,"ĠMod":4216,"ription":4217,"Ġsection":4218,"Ġopposition":4219,"Ġdistrict":4220,"cont":4221,"aked":4222,"ĠCommittee":4223,"Ġkick":4224,"ĠPalest":4225,"ĠMat":4226,"Ġdollars":4227,"no":4228,"Ġindependent":4229,"ĠPress":4230,"However":4231,"Ġruns":4232,"http":4233,"ĠWall":4234,"Ġadvoc":4235,"Ġetc":4236,"rav":4237,"cell":4238,"Ġder":4239,"Le":4240,"ĠHealth":4241,"________":4242,"orial":4243,"cience":4244,"ĠSecret":4245,"udd":4246,"ĠSam":4247,"Ġsolution":4248,"ented":4249,"Ġtick":4250,"etty":4251,"Ġinterested":4252,"Ġdigital":4253,"ST":4254,"Ġdebt":4255,"Ġdrink":4256,"ugg":4257,"Ġcore":4258,"Ġagreed":4259,"ĠCommission":4260,"ront":4261,"ĠCup":4262,"Ġvill":4263,"ĠLiber":4264,"Ġbrand":4265,"Ġteac":4266,"Ġtheory":4267,"Ġhero":4268,"ocr":4269,"Ġeffective":4270,"Ġlearning":4271,"ĠOh":4272,"iation":4273,"icians":4274,"Ġopening":4275,"rial":4276,"point":4277,"ĠFrancis":4278,"ĠWhy":4279,"ords":4280,"mine":4281,"ipe":4282,"Ġdefend":4283,"Ġinvestment":4284,"Ġchem":4285,"BA":4286,"Ġoccas":4287,"Ġrestrict":4288,"Ġoffers":4289,"Ġbur":4290,"Ġchair":4291,"Ġworse":4292,"Ġgenerally":4293,"ĠJustice":4294,"Ġhighest":4295,"ĠMal":4296,"Ġmarriage":4297,"Ġreplace":4298,"Ġprotest":4299,"ĠOther":4300,"ĠKorea":4301,"Ġresponsible":4302,"Ġdefe":4303,"Ġtrial":4304,"Ġmillions":4305,"Ġmur":4306,"Ġbasis":4307,"ĠMicro":4308,"Ġexplained":4309,"Ġtrain":4310,"ĠWestern":4311,"Ġdaily":4312,"Ġmiles":4313,"rig":4314,"oly":4315,"Ġtypes":4316,"Ġobtain":4317,"illing":4318,"Ġproposed":4319,"ĠBecause":4320,"ĠMike":4321,"Ġcondition":4322,"Ġtelevision":4323,"vest":4324,"Ġpartners":4325,"ĠNFL":4326,"Ġheavy":4327,"win":4328,"These":4329,"Ġopinion":4330,"Ġmale":4331,"ĠOur":4332,"Ġice":4333,"uries":4334,"ĠFound":4335,"ada":4336,"ĠOffice":4337,"Ġlimit":4338,"Ġspecies":4339,"Ġassociated":4340,"Ġcoal":4341,"Ġbecomes":4342,"Ġachieve":4343,"ĠMil":4344,"Ġreject":4345,"Ġassum":4346,"Con":4347,"Ġ80":4348,"outhern":4349,"Ġhundreds":4350,"Ġunderstanding":4351,"Ġdestroy":4352,"Ġadvantage":4353,"Ġload":4354,"rate":4355,"ĠAdd":4356,"orthern":4357,"ĠDel":4358,"Ġopened":4359,"aks":4360,"known":4361,"bert":4362,"44":4363,"Ġsurround":4364,"coming":4365,"Ġthus":4366,"ĠJohnson":4367,"ĠTer":4368,"Ġannounce":4369,"upp":4370,"ĠSecurity":4371,"Ġonto":4372,"Ġphoto":4373,"ĠMany":4374,"Ġprofessional":4375,"well":4376,"Ġinitial":4377,"icip":4378,"Mr":4379,"Ġborn":4380,"Ġbed":4381,"Ġsleep":4382,"Ġmicro":4383,"ander":4384,"Ġneighbor":4385,"Ġrat":4386,"Ġvehicle":4387,"Ġbecoming":4388,"Ġspl":4389,"olf":4390,"ĠGreat":4391,"ĠUk":4392,"la":4393,"36":4394,"Ġeasier":4395,"ading":4396,"Ġcontrovers":4397,"ĠAtt":4398,"ĠInc":4399,"Ġship":4400,"Ġaccused":4401,"ĠBig":4402,"CC":4403,"====":4404,"azing":4405,"Ġresist":4406,"Ġunlike":4407,"ĠWindows":4408,"Ġlegislation":4409,"ĠDist":4410,"Ġvictims":4411,"Ġillegal":4412,"Ġserve":4413,"Ġactive":4414,"Bl":4415,"odes":4416,"Ġshared":4417,"Ġserved":4418,"oom":4419,"eline":4420,"Ġbomb":4421,"Ġreduce":4422,"Ġ31":4423,"Ġscientists":4424,"Ġdocuments":4425,"Ġmap":4426,"NA":4427,"Ġjoin":4428,"PA":4429,"lant":4430,"house":4431,"32":4432,"ĠArt":4433,"Ġlocation":4434,"Ġpurpose":4435,"ĠCommun":4436,"Ġaudience":4437,"Ġreturned":4438,"Ġwinning":4439,"rey":4440,"met":4441,"ios":4442,"Ġinternet":4443,"Ġbelief":4444,"lands":4445,"BS":4446,"Ġdepartment":4447,"witch":4448,"Ġbottom":4449,"Ġmanagement":4450,"ĠCanadian":4451,"Ġvs":4452,"Ġdie":4453,"level":4454,"Ġamb":4455,"Ġincred":4456,"Ġextremely":4457,"Ġnine":4458,"za":4459,"37":4460,"ih":4461,"Ġexisting":4462,"Ġ2006":4463,"ĠSanders":4464,"Ġ35":4465,"Ġputting":4466,"Ġwealth":4467,"Ġseparate":4468,"48":4469,"Ġdouble":4470,"orrow":4471,"Ġchurch":4472,"Ġframe":4473,"ĠLab":4474,"ĠHall":4475,"irgin":4476,"Ġexplain":4477,"ears":4478,"omas":4479,"ortunately":4480,"Ġmachine":4481,"asy":4482,"Ġsolid":4483,"ĠDirect":4484,"Ġcomfort":4485,"Ġran":4486,"Ġnegative":4487,"ĠIslamic":4488,"ĠInstitute":4489,"iful":4490,"ipp":4491,"FC":4492,"atically":4493,"Ġargument":4494,"PS":4495,"Ġunc":4496,"acity":4497,"Ġcamera":4498,"works":4499,"Ġnotice":4500,"Ġpurs":4501,"rad":4502,"Ġcommittee":4503,"Ġepisode":4504,"ĠYour":4505,"Ġteach":4506,"Ġelectric":4507,"istics":4508,"Ġdoubt":4509,"Ġwatching":4510,"Ġcommunities":4511,"Ġ90":4512,"GB":4513,"Ġhearing":4514,"ĠIsraeli":4515,"Ġdefinitely":4516,"ĠMad":4517,"Ġinstance":4518,"âĢ¢":4519,"Ġconservative":4520,"Ġleadership":4521,"tered":4522,"gu":4523,"Ġstructure":4524,"Ġvictory":4525,"38":4526,"ipped":4527,"Ġnotes":4528,"iller":4529,"Ġbackground":4530,"which":4531,"âĢĶâĢĶ":4532,"Ġmobile":4533,"ĠBank":4534,"anted":4535,"Ġappeal":4536,"iliar":4537,"Ġrestaur":4538,"ography":4539,"ga":4540,"Ġsupporters":4541,"resp":4542,"ribe":4543,"Ġmarijuana":4544,"ĠChurch":4545,"ĠFin":4546,"hem":4547,"Ġfaith":4548,"Ġviews":4549,"itt":4550,"Ġtransfer":4551,"Ġmax":4552,"OL":4553,"Ġfresh":4554,"Ġlabor":4555,"ario":4556,"ĠSl":4557,"zen":4558,"ĠGame":4559,"oman":4560,"Ġlaunched":4561,"ghan":4562,"Ġspoke":4563,"Ġhack":4564,"Americ":4565,"Ġissued":4566,"ĠFBI":4567,"Ġsites":4568,"Ġdirection":4569,"Ġnor":4570,"Ġdangerous":4571,"Ġrepeated":4572,"Ġnovel":4573,"Ġcoverage":4574,"inct":4575,"Ġharm":4576,"Ġrefere":4577,"Ġsett":4578,"Ġcompetition":4579,"ctors":4580,"Ġitems":4581,"azon":4582,"Ġcommercial":4583,"Ġnorth":4584,"going":4585,"ampionship":4586,"Ġfloor":4587,"Ġexperiment":4588,"Ġreact":4589,"Ġtrend":4590,"Image":4591,"Ġded":4592,"Ġtraffic":4593,"Ġge":4594,"Ġsupported":4595,"Ġsuggested":4596,"Ġcharged":4597,"gency":4598,"Ġstated":4599,"Ġfalse":4600,"Ġhus":4601,"Ġdecade":4602,"ĠSince":4603,"Ġclosed":4604,"Ġdrugs":4605,"ĠFox":4606,"ĠOver":4607,"EM":4608,"ribute":4609,"Ġsun":4610,"Ġsmaller":4611,"ĠLike":4612,"Ġdiscovered":4613,"Ġworst":4614,"ĠEv":4615,"vention":4616,"Ġcateg":4617,"ĠJones":4618,"Ġcompar":4619,"ĠBritain":4620,"Ġtelling":4621,"Ġentirely":4622,"Ġaspect":4623,"Ġ;":4624,"ici":4625,"Ġdisease":4626,"Ġradio":4627,"Ð¾":4628,"ference":4629,"ĠLos":4630,"****":4631,"47":4632,"Ġdaughter":4633,"but":4634,"Ġolder":4635,"ĠWil":4636,"Ġfarm":4637,"Ġtask":4638,"ĠCarol":4639,"Ġcontain":4640,"ady":4641,"spe":4642,"Ġcash":4643,"Ġdriving":4644,"andon":4645,"do":4646,"Ġcontext":4647,"ulf":4648,"55":4649,"ĠPri":4650,"NN":4651,"Ġdriver":4652,"Ġfinding":4653,"andom":4654,"equ":4655,"Ġ70":4656,"Ġplease":4657,"Ġplays":4658,"Ġconcerned":4659,"ãĤ":4660,"ulated":4661,"Ġlargely":4662,"semb":4663,"annel":4664,"Ġcandidates":4665,"Ġtools":4666,"Ġgay":4667,"overy":4668,"ĠTre":4669,"ĠQue":4670,"bing":4671,"ava":4672,"Ġrequires":4673,"iling":4674,"Ġtesting":4675,"ĠUkrain":4676,"Ġannual":4677,"ĠUN":4678,"Ġcards":4679,"ius":4680,"ait":4681,"Ġfuel":4682,"Ġanimals":4683,"Ġexerc":4684,"Ġcaught":4685,"rick":4686,"izes":4687,"Ġsurface":4688,"Ġgeneration":4689,"night":4690,"ibrary":4691,"Ġwel":4692,"Ġraise":4693,"how":4694,"!âĢĿ":4695,"ĠRobert":4696,"Ġjustice":4697,"ception":4698,"Ġbusinesses":4699,"ĠJewish":4700,"ĠSup":4701,"Ġstrength":4702,"Ġwitness":4703,"Ġstrateg":4704,"ĠMiss":4705,"Ġtransport":4706,"Ġbelieves":4707,"Ġconstruction":4708,"ĠPC":4709,"Ġinsurance":4710,"ĠChris":4711,"Ġstock":4712,"ĠMer":4713,"Ġtruly":4714,"Ġphil":4715,"icy":4716,"Ġconcern":4717,"Ġmurder":4718,"Ġactivities":4719,"ĠSil":4720,"Ġallowing":4721,"ossible":4722,"Ġholding":4723,"arant":4724,"ĠGree":4725,"ĠPM":4726,"olly":4727,"phone":4728,"Ġconsequ":4729,"ĠAfrica":4730,"CH":4731,"ĠAM":4732,"oyal":4733,"ancy":4734,"Ġafford":4735,"ĠAre":4736,"osure":4737,"Ġscale":4738,"itable":4739,"ĠKen":4740,"Ġplanning":4741,"SP":4742,"ĠAc":4743,"Ġaccur":4744,"bre":4745,"âĢĺ":4746,"Ġattend":4747,"ĠJim":4748,"Ġbanks":4749,"anging":4750,"Ġlength":4751,"Ġrough":4752,"Ġunit":4753,"Ġjoined":4754,"Ġrefuge":4755,"Ġguess":4756,"tery":4757,"Ġsequ":4758,"49":4759,"Ġtells":4760,"rim":4761,"akistan":4762,"Ġcells":4763,"igrants":4764,"nel":4765,"Ġintroduced":4766,"pective":4767,"Ġsan":4768,"Ġdecisions":4769,"ĠAssociation":4770,"DA":4771,"Ġplaced":4772,"Ġestablish":4773,"Ġkid":4774,"Ġspokesman":4775,"cknow":4776,"Ġhighlight":4777,"Ġpresence":4778,"Ġinterests":4779,"39":4780,"ĠAfrican":4781,"bour":4782,"lor":4783,"ĠAngeles":4784,"Ġtim":4785,"ros":4786,"Ġwaiting":4787,"icon":4788,"Ġblue":4789,"ĠVer":4790,"Ġ2005":4791,"Ġupdate":4792,"Ġdownload":4793,"ĠJeff":4794,"Ġenforcement":4795,"Ġmentioned":4796,"Ġsubsc":4797,"More":4798,"Ġmissing":4799,"Ġrecommend":4800,"Ġdeliver":4801,"inter":4802,"Ġhomes":4803,"ĠTy":4804,"ocracy":4805,"Ġhasn":4806,"Ġestablished":4807,"ĠKh":4808,"ĠStep":4809,"Ġremoved":4810,"formation":4811,"kins":4812,"ĠPalestin":4813,"Ġiniti":4814,"Ġproviding":4815,"Ġoccup":4816,"Ġconflict":4817,"Ġabuse":4818,"ey":4819,"ĠGOP":4820,"Ġeat":4821,"Ġspirit":4822,"Ġconversation":4823,"Ġgraph":4824,"ĠJud":4825,"ĠPre":4826,"Ġinjury":4827,"ĠAfghan":4828,"Ġhousing":4829,"azine":4830,"ronic":4831,"Ġcook":4832,"oted":4833,"Ġdiscussion":4834,"Ġnewsp":4835,"ĠYes":4836,"itz":4837,"Ġcatch":4838,"ĠRock":4839,"ĠMicrosoft":4840,"Ġuseful":4841,"isation":4842,"what":4843,"ĠJapanese":4844,"ĠInstead":4845,"ĠGroup":4846,"Ġtransl":4847,"oms":4848,"TC":4849,"vant":4850,"Ġbul":4851,"Ġoperation":4852,"Ġstopped":4853,"Ġmodels":4854,"Ġcancer":4855,"##":4856,"Ġfant":4857,"ĠFore":4858,"Ġmeasures":4859,"ications":4860,"ĠFrank":4861,"cks":4862,"kin":4863,"Ġarrived":4864,"iter":4865,"ĠLouis":4866,"Ġ\\":4867,"ulate":4868,"lig":4869,"Ġfocused":4870,"ĠLeg":4871,"anda":4872,"ãĥ":4873,"ĠProf":4874,"Ġsouth":4875,"onto":4876,"mail":4877,"owl":4878,"Ġtherefore":4879,"Ġsetting":4880,"Ġappropri":4881,"Ġphotos":4882,"agn":4883,"Ġstick":4884,"Ġassess":4885,"Ġidentity":4886,"Ġobvious":4887,"season":4888,"Ġhair":4889,"Ġidentified":4890,"ĠConst":4891,"Ġsports":4892,"Ġoffensive":4893,"Pl":4894,"ĠTHE":4895,"Ġ2018":4896,"Our":4897,"Ġcommitted":4898,"ups":4899,"Ġmanaged":4900,"Ġconclud":4901,"igan":4902,"place":4903,"real":4904,"Ġfourth":4905,"Ġearth":4906,"Ġfunds":4907,"Ġskills":4908,"ĠVirgin":4909,"Ġprefer":4910,"Ġowner":4911,"gers":4912,"apped":4913,"ĠCEO":4914,"ication":4915,"Ġfigures":4916,"Ã¡":4917,"umn":4918,"Ġfiles":4919,"Ġkeeping":4920,"ĠSupreme":4921,"ighter":4922,"Ġotherwise":4923,"ĠMont":4924,"34":4925,"Ġ2000":4926,"Ġacknow":4927,"Ġgirls":4928,"verse":4929,"Ġexchange":4930,"itutional":4931,"ĠSal":4932,"Ġcollection":4933,"0000":4934,"ĠGovernment":4935,"Ġrelatively":4936,"ĠInter":4937,"outs":4938,"ĠArmy":4939,"ĠMexico":4940,"Ġfinished":4941,"Ġexclus":4942,"ocal":4943,"ula":4944,"ĠMet":4945,"Ġtough":4946,"ĠRyan":4947,"Ġconnection":4948,"Ġpartner":4949,"Ġsteps":4950,"Ġfeels":4951,"ĠNor":4952,"Ġsupply":4953,"Ġfle":4954,"ka":4955,"Ġslightly":4956,"Ġtaxes":4957,"ĠRichard":4958,"Des":4959,"ĠTim":4960,"Ġveter":4961,"isf":4962,"mas":4963,"Ġhusband":4964,"ĠIr":4965,"Ġsurvey":4966,"uct":4967,"ĠAndroid":4968,"Photo":4969,"Ġseek":4970,"wing":4971,"Ġdetect":4972,"Ġdepend":4973,"Ġgain":4974,"75":4975,"Ġkilling":4976,"Ġroll":4977,"Trump":4978,"makers":4979,"Ġsuggests":4980,"ĠMartin":4981,"dden":4982,"Ġcircum":4983,"gar":4984,"eah":4985,"Ġmere":4986,"ĠSub":4987,"Ġtrip":4988,"RA":4989,"Go":4990,"rd":4991,"Ġseconds":4992,"ils":4993,"hens":4994,"onym":4995,"ancing":4996,"Ġaward":4997,"ĠSur":4998,"Ġcommission":4999,"Ġspeaking":5000,"Ġweak":5001,"ï¿":5002,"Ġclick":5003,"gn":5004,"Ġcorner":5005,"Ġsust":5006,"rees":5007,"adium":5008,"Ġaggress":5009,"Ġtweet":5010,"odies":5011,"ĠAlthough":5012,"enses":5013,"range":5014,"Ġelements":5015,"ania":5016,"iser":5017,"Ġrevolution":5018,"prof":5019,"Ð°":5020,"Ġregist":5021,"icide":5022,"Ġmental":5023,"ĠDec":5024,"ĠGood":5025,"Ġplanned":5026,"Ġhappening":5027,"Why":5028,"ĠPakistan":5029,"olit":5030,"Ġstruct":5031,"agues":5032,"Ġplanet":5033,"Ġnone":5034,"Ġwindow":5035,"amm":5036,"Ġbigger":5037,"Ġtalks":5038,"Ġremind":5039,"Ġque":5040,"with":5041,"rency":5042,"AA":5043,"gypt":5044,"oston":5045,"uate":5046,"ucky":5047,"Ġprotection":5048,"Ġhate":5049,"Ġstrike":5050,"Ġstarts":5051,"].":5052,"Ġorganizations":5053,"Ġfailure":5054,"Ġappoint":5055,"idering":5056,"Ġspect":5057,"Ġdefensive":5058,"Ġwish":5059,"ĠTheir":5060,"Ġfamiliar":5061,"Ġdat":5062,"ï¿½":5063,"hedul":5064,"ynam":5065,"Ġguard":5066,"Ġcrew":5067,"Ġgender":5068,"Ġsides":5069,"ĠThomas":5070,"aded":5071,"Ġthanks":5072,"ĠUs":5073,"Ġstanding":5074,"Ġsudden":5075,"stream":5076,"Ġimmigration":5077,"HL":5078,"Ġcarried":5079,"Ġarmy":5080,"ĠMary":5081,"Ġamazing":5082,"ĠJoe":5083,"ym":5084,"Ġscientific":5085,"eless":5086,"65":5087,"Ġdomestic":5088,"ĠJose":5089,"Ġinteract":5090,"Ġpit":5091,"Ġdecide":5092,"uters":5093,"Ðµ":5094,"Ġphilos":5095,"ĠMP":5096,"kes":5097,"ĠImages":5098,"uke":5099,"awn":5100,"arsh":5101,"ĠFire":5102,"Last":5103,"ĠResearch":5104,"Ġmulti":5105,"Ġenvironmental":5106,"Ġdropped":5107,"imal":5108,"Ġmarkets":5109,"Ġstandards":5110,"Ġhelping":5111,"Ġscript":5112,"Ġprofessor":5113,"ano":5114,"Ġvul":5115,"erous":5116,"Ġvariety":5117,"obby":5118,"cious":5119,"Ġ32":5120,"ĠAlso":5121,"Ġfiled":5122,"hi":5123,"ĠOut":5124,"Ġlived":5125,"aren":5126,"ĠBern":5127,"lo":5128,"Ġaccounts":5129,"Ġoperating":5130,"acks":5131,"Ġhumans":5132,"Ġprinc":5133,"Ġneighborhood":5134,"Ġsettle":5135,"ĠCollege":5136,"Ġcuts":5137,"fe":5138,"ĠAtlant":5139,"!\"":5140,"Ġacadem":5141,"Ġterrit":5142,"aven":5143,"Ġimpossible":5144,"Ġvotes":5145,"ocol":5146,"icted":5147,"Ġpossibly":5148,"run":5149,"itect":5150,"Ġ45":5151,"ĠService":5152,"Read":5153,"Ġturns":5154,"ayer":5155,"ĠWeb":5156,"ĠPrime":5157,"Ġdegree":5158,"Ġunivers":5159,"ĠSecretary":5160,"Ġpair":5161,"Ġworry":5162,"ĠFoundation":5163,"Ġcolle":5164,"iams":5165,"ruit":5166,"Ġfellow":5167,"Ġincreasingly":5168,"Ġbeer":5169,"ĠDen":5170,"Ġunits":5171,"Ġbalance":5172,"illa":5173,"Ġrich":5174,"ola":5175,"stein":5176,"Ġultimately":5177,"ansion":5178,"Ġhealthy":5179,"Ġsch":5180,"Ġappre":5181,"ashed":5182,"Ġhonest":5183,"Ġalternative":5184,"Ġcold":5185,"ĠMos":5186,"den":5187,"ĠSyrian":5188,"Ġagencies":5189,"Ġflight":5190,"ĠLast":5191,"ortion":5192,"Ġinnov":5193,"ĠTwo":5194,"Ġapparently":5195,"From":5196,"Ġtests":5197,"rastructure":5198,"aine":5199,"Ġpaying":5200,"ĠTime":5201,"Ġreligion":5202,"Ġunion":5203,"vis":5204,"ores":5205,"Ġchanging":5206,"Ġheat":5207,"ĠCentral":5208,"Ġtwice":5209,"Ġdevelopers":5210,"Ġskin":5211,"Ġfundament":5212,"Ġapply":5213,"Ġabsolutely":5214,"Ġpossibility":5215,"ĠLat":5216,"roud":5217,"Ġcorporate":5218,"Ġvast":5219,"Ġcloser":5220,"ĠDistrict":5221,"arks":5222,"atures":5223,"Ġwriter":5224,"ĠUnder":5225,"iot":5226,"Ġschedul":5227,"ias":5228,"Ġexpensive":5229,"ader":5230,"Ġapplications":5231,"osen":5232,"Ġapart":5233,"ĠBur":5234,"Ġseeking":5235,"ĠST":5236,"ipment":5237,"apers":5238,"ĠPublic":5239,"Ġsoldiers":5240,"IM":5241,"Ġsea":5242,"Ġlosing":5243,"ĠTeam":5244,"Ġinternal":5245,"Ġauthority":5246,"ĠAmazon":5247,"Ġsounds":5248,"46":5249,"och":5250,"Ġmut":5251,"related":5252,"Ġdeveloping":5253,"Ġcapacity":5254,"under":5255,"Ġexperts":5256,"Ġregarding":5257,"isted":5258,"Ġtack":5259,"31":5260,"Ġbeautiful":5261,"Ġ2004":5262,"Ġsitting":5263,"ĠFrancisco":5264,"ĠJon":5265,"Ġimagine":5266,"ĠPeter":5267,"pm":5268,"ĠPut":5269,"ologies":5270,"IV":5271,"Ġshall":5272,"Ġvision":5273,"Ġdeploy":5274,"ĠBitcoin":5275,"Ġpil":5276,"idden":5277,"Ġfamous":5278,"ĠCarolina":5279,"Ġunless":5280,"ĠToronto":5281,"Ġarms":5282,"look":5283,"Ġserver":5284,"Ġerror":5285,"Ġpresented":5286,"enced":5287,"Ġtast":5288,"ĠJes":5289,"ĠPet":5290,"Ġattorney":5291,"gl":5292,"uzz":5293,"Ġintended":5294,"Ġtrail":5295,"Ġadjust":5296,"Ġflow":5297,"Ġcovered":5298,"Even":5299,"ini":5300,"Of":5301,"Ġgar":5302,"ĠEgypt":5303,"ĠAN":5304,"Ġsolar":5305,"pire":5306,"Ġforms":5307,"ĠBoston":5308,"ĠMiddle":5309,"war":5310,"Ġconvention":5311,"Ġconsult":5312,"Ġresponsibility":5313,"hand":5314,"Ġanimal":5315,"anche":5316,"ĠCamp":5317,"Ġelections":5318,"lets":5319,"Up":5320,"orage":5321,"Ġodd":5322,"state":5323,"Ġath":5324,"Ġoccurred":5325,"rible":5326,"oving":5327,"alls":5328,"coh":5329,"Ġfol":5330,"Ġfif":5331,"ĠCath":5332,"Ġrid":5333,"Ġsymb":5334,"Ġtight":5335,"uration":5336,"ĠMedic":5337,"Just":5338,"Ġestimated":5339,"01":5340,"Ġyards":5341,"Ġstreets":5342,"Ġsurprise":5343,"road":5344,"ĠGal":5345,"Ġsport":5346,"Ġexpert":5347,"ĠUSA":5348,"porary":5349,"Ġbroken":5350,"ĠAri":5351,"Ġuniversity":5352,"Ġfired":5353,"person":5354,"ĠJo":5355,"Ġscr":5356,"Ġearn":5357,"ĠPlan":5358,"Ġtroops":5359,"LE":5360,"Ġguarant":5361,"ION":5362,"Ġinstit":5363,"ĠSol":5364,"Ġhelps":5365,"----------------":5366,"Let":5367,"Ġrapid":5368,"()":5369,"eder":5370,"ĠVe":5371,"useum":5372,"ĠMur":5373,"aling":5374,"Ġlocated":5375,"Ġprosecut":5376,"Ġteen":5377,"Ġprove":5378,"Ġvehicles":5379,"Ġdomin":5380,"Ġdetermine":5381,"Ġspecifically":5382,"rie":5383,"uster":5384,"Ġvulner":5385,"ĠIN":5386,"ĠRiver":5387,"OW":5388,"Ġpieces":5389,"ĠChief":5390,"ressive":5391,"Ġdin":5392,"Ġfactors":5393,"ĠMass":5394,"Ġton":5395,"cean":5396,"Ġcombat":5397,"Ġbroke":5398,"Ġpet":5399,"ĠGuard":5400,"Ġnar":5401,"ĠThose":5402,"Ġbaby":5403,"inent":5404,"Ġhyp":5405,"Ġbringing":5406,"Ġera":5407,"Ġpotentially":5408,"Ġmethods":5409,"ĠOnce":5410,"oty":5411,"terday":5412,"ours":5413,"Ġequal":5414,"05":5415,"Ġcouncil":5416,"Ġconclus":5417,"Ġgovernments":5418,"Ġboost":5419,"Ġfaster":5420,"ĠParis":5421,"ĠSocial":5422,"endment":5423,"Ġprepared":5424,"Ġstars":5425,"yr":5426,"ĠVirginia":5427,"Ġheav":5428,"Ġoffering":5429,"Ġvoted":5430,"Ġchallenges":5431,"Ġproposal":5432,"Ġruling":5433,"Ġcarbon":5434,"lyn":5435,"Ġselling":5436,"Ġequipment":5437,"Ġconducted":5438,"isher":5439,"anish":5440,"\":":5441,"Ġpointed":5442,"ĠAny":5443,"Ġelected":5444,"Ġseasons":5445,"DP":5446,"phas":5447,"Ġjail":5448,"Ġsector":5449,"that":5450,"sen":5451,"Ġstress":5452,"Ġinfrastructure":5453,"Ġrefused":5454,"ĠJournal":5455,"class":5456,"Every":5457,"Ġsecure":5458,"IP":5459,"Ġplenty":5460,"ĠCur":5461,"Ġoriginally":5462,"Ġremained":5463,"ĠLee":5464,"Ġbreat":5465,"ĠFederal":5466,"ĠProt":5467,"Do":5468,"Ġrandom":5469,"ĠWho":5470,"ht":5471,"Ġbrow":5472,"Ġdram":5473,"edia":5474,"ĠAustralian":5475,"ĠFe":5476,"95":5477,"Ġdream":5478,"ĠPlease":5479,"ĠDuring":5480,"Ġclin":5481,"ĠIsland":5482,"Ġretail":5483,"Ġexpressed":5484,"Ġclot":5485,"Ġvoting":5486,"Ġgreatest":5487,"Ġclim":5488,"olved":5489,"ipping":5490,"TH":5491,"Ġshape":5492,"Ġaffected":5493,"onom":5494,"ĠRub":5495,"Ġweren":5496,"Ab":5497,"noon":5498,"ureau":5499,"Ġpercentage":5500,"Ġcriticism":5501,"ĠRem":5502,"Ġmaster":5503,"ĠOpen":5504,"Ġprime":5505,"pired":5506,"Ġbought":5507,"Ġsale":5508,"Ġcultural":5509,"ocking":5510,"Ġremove":5511,"Ġcas":5512,"Ġtrig":5513,"Ġsigns":5514,"ĠCru":5515,"ĠSpr":5516,"Ġherself":5517,"ENT":5518,"Ġtourn":5519,"Ġchemical":5520,"cel":5521,"Ġyouth":5522,"Ġappearance":5523,"Ġstret":5524,"rier":5525,"Ġdetermined":5526,"Ġhurt":5527,"Ġinvolve":5528,"Ġpositions":5529,"ĠForce":5530,"Ġsignificantly":5531,"standing":5532,"Ġgrew":5533,"ashion":5534,"reens":5535,"ĠWilliams":5536,"Since":5537,"Cont":5538,"=\"":5539,"ĠSant":5540,"Ġweapon":5541,"Ġthr":5542,"Ġfacing":5543,"Ġwood":5544,"Ġfilms":5545,"ighters":5546,"ĠMel":5547,"rog":5548,"bur":5549,"ĠBoth":5550,"vey":5551,",'":5552,"Ġremaining":5553,"First":5554,"Ġ1990":5555,"Ġobviously":5556,"Ġpurchase":5557,"IG":5558,"Ġregime":5559,"Ġ=>":5560,"Ġtele":5561,"Ġrent":5562,"ĠSing":5563,"Ġindeed":5564,"col":5565,"ĠFree":5566,"ĠSteve":5567,"Ġfinish":5568,"42":5569,"Ġexperienced":5570,"Ġ195":5571,"Ġpoliticians":5572,"pet":5573,"Ġvideos":5574,"Ġargued":5575,"Ġrevenue":5576,"ordin":5577,"etic":5578,"Ġsupporting":5579,"Ġconsidering":5580,"Ġshift":5581,"Ġaircraft":5582,"gment":5583,"Ġreduced":5584,"Ġprospect":5585,"lymp":5586,"Ġhandle":5587,"SS":5588,"Ġ500":5589,"Ġrecorded":5590,"Ġminimum":5591,"Ġcontrast":5592,"Ġexcited":5593,"Ġsurprising":5594,"Ġdepart":5595,"Ġwearing":5596,",[":5597,"Ġaccident":5598,"ensions":5599,"Ġpulled":5600,"Ġproced":5601,"ĠRad":5602,"ĠColor":5603,"Ġast":5604,"Ġevening":5605,"ĠJackson":5606,"odd":5607,"unction":5608,"essions":5609,"TV":5610,"Ġaid":5611,"ĠRo":5612,"Ġconcent":5613,"ĠRoad":5614,"life":5615,"CL":5616,"ownt":5617,"Ġplus":5618,"Ġbasically":5619,"ĠHaw":5620,"Pe":5621,"Ġleads":5622,"Ġmarg":5623,"Ġhun":5624,"Ġsubs":5625,"ĠPhoto":5626,"arp":5627,"ĠBook":5628,"Ġmotiv":5629,"Ġminute":5630,"Ġexplos":5631,"Ġmode":5632,"Ġvir":5633,"Ġappreci":5634,"Ġswitch":5635,"Ġopportunities":5636,"mm":5637,"Ġbound":5638,"Ġobserv":5639,"ruption":5640,"ternal":5641,"Ġconstruct":5642,"Sp":5643,"abilities":5644,"Ġdifferences":5645,"Ġemergency":5646,"uts":5647,"ĠPac":5648,"Ġglass":5649,"Ġidentify":5650,"oking":5651,"Ġbelong":5652,"Ġspring":5653,"Ġtact":5654,"64":5655,"Ġafternoon":5656,"Ġtreated":5657,"Ġlisten":5658,"anged":5659,"ĠMatt":5660,"ogue":5661,"hered":5662,"Ġdiss":5663,"hab":5664,"Ġconsumers":5665,"Ġhopes":5666,"Ġcompleted":5667,"Ġpanel":5668,"ller":5669,"ĠHol":5670,"Ġtalked":5671,"Ġseriously":5672,"ĠCharl":5673,"care":5674,"Ġwidely":5675,"Ġ2003":5676,"yth":5677,"PR":5678,"Ġagent":5679,"ĠConserv":5680,"apping":5681,"Ġvisual":5682,"Ġprivacy":5683,"Ġcombined":5684,"ĠPower":5685,"App":5686,"Ġseat":5687,"osite":5688,"Ġturning":5689,"Ġopposed":5690,"Ġends":5691,"Ġfactor":5692,"Ġliberal":5693,"apter":5694,"Ġpackage":5695,"Ġtur":5696,"Well":5697,"Ġapps":5698,"Ġdefault":5699,"Ind":5700,"ĠSpace":5701,"eds":5702,"His":5703,"Ġpowers":5704,"ĠDaily":5705,"Ġsuit":5706,"Ġtypically":5707,"rawn":5708,"Ġ//":5709,"Ġdiscl":5710,"asp":5711,"raud":5712,"ompl":5713,"Ġpages":5714,"oral":5715,"ache":5716,"Ġlots":5717,"Ġadvance":5718,"Ġcart":5719,"Ġmort":5720,"ĠSaud":5721,"Ġpassing":5722,"Ġdismiss":5723,"Ġtrouble":5724,"ĠProject":5725,"mosp":5726,"Ġreaction":5727,"idel":5728,"Ġ2001":5729,"ffee":5730,"Ġstruggle":5731,"Ġplants":5732,"Ġhol":5733,"ĠGet":5734,"ĠNov":5735,"Ġflat":5736,"Ġcalcul":5737,"Ġzone":5738,"Ġmal":5739,"uous":5740,"Phone":5741,"PD":5742,"obe":5743,"],":5744,"respond":5745,"Another":5746,"Se":5747,"Ġenemy":5748,"cohol":5749,"Ġbodies":5750,"Ġid":5751,"Ġelement":5752,"Ġburn":5753,"ivered":5754,"Ġinstruct":5755,"Ġproceed":5756,"oken":5757,"zer":5758,"Ġvalid":5759,"Ġquiet":5760,"ributed":5761,"ĠShow":5762,"04":5763,"Ġattempts":5764,"Ġthank":5765,"ĠSqu":5766,"ĠOlymp":5767,"De":5768,"esters":5769,"Ġstands":5770,"Ġdenied":5771,"Ġwrites":5772,"top":5773,"nell":5774,"ĠNAS":5775,"Ġentertain":5776,"plom":5777,"Ġfrust":5778,"Ġtun":5779,"Ġreference":5780,"Ġsets":5781,"erry":5782,"ket":5783,"ĠMem":5784,"Ġcollabor":5785,"ita":5786,"writ":5787,"Ġquant":5788,"edd":5789,"Ġmatters":5790,"lines":5791,"Ġapproved":5792,"Ġreporters":5793,"Ġsatisf":5794,"Am":5795,"Ġdisappoint":5796,"Ġbroadcast":5797,"Ġresponded":5798,"free":5799,"sts":5800,"Ġdistance":5801,"ĠToday":5802,"ato":5803,"Ġcopyright":5804,"Ġcreation":5805,"Ġtradition":5806,"Ġrenew":5807,"UT":5808,"ĠBBC":5809,"Ġtransition":5810,"Ġsem":5811,",âĢĻ":5812,"Ġexplo":5813,"Ġflag":5814,"Ġunable":5815,"Ġsac":5816,"Ġupgr":5817,"Ġelimin":5818,"ĠSystem":5819,"Ġprompt":5820,"Ġpitch":5821,"ĠRel":5822,"aped":5823,"aints":5824,"estival":5825,"mond":5826,"ucks":5827,"Ġconfident":5828,"Ð¸":5829,"Ġoptim":5830,"ĠYet":5831,"Ġstorm":5832,"Ġcapable":5833,"Ġapprox":5834,"ĠScience":5835,"Ġcloud":5836,"Ġhttps":5837,"ĠPenn":5838,"Ġexperiences":5839,"Ġinstitutions":5840,"isters":5841,"aniel":5842,"Ġcrimes":5843,"Ġpictures":5844,"razy":5845,"ĠDirector":5846,"Ġsplit":5847,"Ġreportedly":5848,"Ġnumerous":5849,"Ġperspective":5850,"resents":5851,"ilit":5852,"Ġbull":5853,"aled":5854,"ĠWilliam":5855,"ĠMedia":5856,"ilty":5857,"Ġdeclared":5858,"II":5859,"rant":5860,"Ġcaption":5861,"Ġgiant":5862,"rid":5863,"Ġcraft":5864,"usiness":5865,"ulations":5866,"Ġadvert":5867,"elly":5868,"abled":5869,"pass":5870,"astern":5871,"ĠFollow":5872,"Ġgrab":5873,"Ġtechnical":5874,"Ġcoast":5875,"Ġmovies":5876,"Ġfaces":5877,"Ġconfidence":5878,"Ġsevere":5879,"Ġgra":5880,"Ġmand":5881,"Ġforget":5882,"Ġproud":5883,"Ġsuffered":5884,"ĠTurkey":5885,"Ġmarried":5886,"tion":5887,"Ġforg":5888,"uations":5889,"ker":5890,"Ġongoing":5891,"Ġcooper":5892,"ishes":5893,"ampion":5894,"09":5895,"Ġnewspaper":5896,"Ġexplains":5897,"host":5898,"Ġfly":5899,"ĠWood":5900,"unte":5901,"Ġshouldn":5902,"Ġordered":5903,"Ġloved":5904,"Ġadvice":5905,"Ĥ¬":5906,"Ġupdates":5907,"Ġweather":5908,"ĠLabour":5909,"43":5910,"Ġinjured":5911,"Ġcircumst":5912,"Ġadmitted":5913,"ÃŃ":5914,"Ġrot":5915,"dule":5916,"employ":5917,"ĠAfghanistan":5918,"reland":5919,"78":5920,"Ġarchitect":5921,"âĪ":5922,"ĠTra":5923,"Ġmessages":5924,"ĠGrand":5925,"Ġreporting":5926,"Ġbuildings":5927,"Ġdelay":5928,"ĠRon":5929,"Over":5930,"ĠHuman":5931,"aza":5932,"adow":5933,"Ġaccompl":5934,"Ġappropriate":5935,"Ġfindings":5936,"Ġconnected":5937,"Ġthoughts":5938,"called":5939,"Ġeval":5940,"Ġsubstant":5941,"ounded":5942,"Ġroute":5943,"ĠChristmas":5944,"66":5945,"Ġdeclined":5946,"ĠAndrew":5947,"MA":5948,"ionally":5949,"Ġauth":5950,"Rep":5951,"abin":5952,"Other":5953,"Ġstatements":5954,"Ġourselves":5955,"Ġleaves":5956,"Ġedge":5957,"ĠScot":5958,"Ġviolent":5959,"ĠBet":5960,"Ġeditor":5961,"OD":5962,"During":5963,"Res":5964,"Ġconfirm":5965,"Ġtech":5966,"evin":5967,"ĠAv":5968,"razil":5969,"oves":5970,"nergy":5971,"Ġdelivered":5972,"Ġpublicly":5973,"ansas":5974,"Ġemails":5975,"enture":5976,"acter":5977,"ĠWal":5978,"ĠAT":5979,"Ġdial":5980,"Ġlawyer":5981,"Ġrail":5982,"sex":5983,"Ġemphas":5984,"Ġarmed":5985,"Ġ1980":5986,"ooth":5987,"Ġpractices":5988,"Ġfish":5989,"Ġterrorist":5990,"angers":5991,"Ġphilosoph":5992,"ĠSov":5993,"Ġfilled":5994,"ĠPhot":5995,"Ġgather":5996,"Ġcauses":5997,"Ġpicked":5998,"Ġprin":5999,"Ġguns":6000,"ellow":6001,"Ġexcell":6002,"Ġcentre":6003,"ĠMuslims":6004,"ĠSher":6005,"Ġreaders":6006,"rich":6007,"Ġmoral":6008,"Ġobjects":6009,"Ġpatient":6010,"ĠOhio":6011,"ĠRev":6012,"Ġpool":6013,"Ġperman":6014,"cons":6015,"LS":6016,"Ġtree":6017,"Ġvan":6018,"cos":6019,"uy":6020,"Ġwalking":6021,"Ġrealize":6022,"Ġcontains":6023,"Ġtransform":6024,"orders":6025,"Ġlegit":6026,"can":6027,"Ġdynam":6028,"essor":6029,"Ġargue":6030,"Ġexpansion":6031,"Many":6032,"Yes":6033,"ĠMichigan":6034,"orse":6035,"ĠSwed":6036,"ette":6037,"Ġagents":6038,"Ġsubsequ":6039,"Ġeffectively":6040,"::":6041,"arn":6042,"uls":6043,"ĠGen":6044,"ĠUkraine":6045,"41":6046,"Ġbond":6047,"Ġactivists":6048,"long":6049,"Ġrain":6050,"date":6051,"Ġtalent":6052,"Ġexercise":6053,"American":6054,"Ġinitially":6055,"Ġcann":6056,"illance":6057,"Ġartist":6058,"ĠBat":6059,"Ġdebut":6060,"ĠSign":6061,"ala":6062,"enda":6063,"Ġmissed":6064,"Ġsad":6065,"ĠBack":6066,"Ġthinks":6067,"Ġgovernor":6068,"arters":6069,"asure":6070,"def":6071,"gia":6072,"Ġcrack":6073,"Ġdesire":6074,"ourney":6075,"arl":6076,"ĠSee":6077,"mates":6078,"Ġexamples":6079,"88":6080,"ĠFer":6081,"Ġshop":6082,"Ġlinked":6083,"Ġsecretary":6084,"ĠID":6085,"ĠRead":6086,"iat":6087,"Ġwear":6088,"ĠMa":6089,"ĠStr":6090,"Ġincreases":6091,"Ġmagazine":6092,"Ġlabel":6093,"Ġmotor":6094,"ĠHy":6095,"Tube":6096,"ĠStan":6097,"Ġsession":6098,"Ġquarterback":6099,"ĠDou":6100,"ĠGM":6101,"Ġwarned":6102,"illy":6103,"Ġallegations":6104,"Ġinjuries":6105,"Ġexplan":6106,"ĠBoard":6107,"Although":6108,"ĠLin":6109,"Ġflood":6110,"agers":6111,"ĠVict":6112,"ĠLife":6113,"orry":6114,"ĠMount":6115,"Oh":6116,"ĠSum":6117,"Ġnecessarily":6118,"Ġplane":6119,"Ġfaced":6120,"Ġincor":6121,"rum":6122,"Follow":6123,"Ġdoctor":6124,"Ġradical":6125,"ĠTechn":6126,"Ã¤":6127,"Ġclient":6128,"ĠWork":6129,"Ġatmosp":6130,"Ġabandon":6131,"Add":6132,"Ġhell":6133,"ador":6134,"TA":6135,"Ġ36":6136,"ĠSecond":6137,"ĠIreland":6138,"Ġrepresents":6139,"ĠPrem":6140,"Ġpra":6141,"ĠRE":6142,"Ġfacts":6143,"Qu":6144,"Ġadvanced":6145,"enger":6146,"Ġessentially":6147,"ERS":6148,"ĠJul":6149,"Ġperformed":6150,"Ġneither":6151,"Ġnations":6152,"Ġannouncement":6153,"ĠOk":6154,"Ġmir":6155,"edy":6156,"Ġdistribution":6157,"Ġfranch":6158,"Ġpure":6159,"Spe":6160,"ouston":6161,"Ġdiplom":6162,"Ġfollows":6163,"amin":6164,"Ġvari":6165,"Ġaccepted":6166,"ĠRod":6167,"ĠSeattle":6168,"Ġmyster":6169,"Ġremark":6170,"Ġbare":6171,"icing":6172,"itors":6173,"Ġchart":6174,"Ġrestaurant":6175,"iest":6176,"Ġnearby":6177,"Ġske":6178,"Ġwithd":6179,"Ġscheduled":6180,"ĠDan":6181,"Ġdecre":6182,"ĠIll":6183,"Ġing":6184,"law":6185,"Ġaf":6186,"ĠCNN":6187,"Ġeverybody":6188,"ĠiPhone":6189,"Ġalt":6190,"ĠCub":6191,"gent":6192,"Ġmaterials":6193,"Ġstorage":6194,"Can":6195,"ĠKim":6196,"usal":6197,"Ġplayoff":6198,"Ġadds":6199,"ĠBarack":6200,"abb":6201,"Ġoffense":6202,"Ġphen":6203,"ĠISIS":6204,"ĠHen":6205,"Ġwarm":6206,"Ġdrivers":6207,"irty":6208,"otic":6209,"Ġguilty":6210,"Ġsuc":6211,"Ġsongs":6212,"Ġbegins":6213,"Ġlisted":6214,"Ġflo":6215,"Ġfort":6216,"Ġresolution":6217,"esome":6218,"TS":6219,"Ġtotally":6220,"Most":6221,"Ġexistence":6222,"ĠElect":6223,"Ġyesterday":6224,"Ġsav":6225,"word":6226,"rip":6227,"57":6228,"Ġstood":6229,"Ġsurprised":6230,"ĠJews":6231,"Ġsam":6232,"ĠKorean":6233,"ÑĤ":6234,"Ġcombination":6235,"Ġlawsuit":6236,"Ġnetworks":6237,"Ġtemperature":6238,"Then":6239,"Ġmoves":6240,"Ġspons":6241,"Ġshots":6242,"ĠVol":6243,"Ġinvestors":6244,"water":6245,"Ġimproved":6246,"Ġcorrespond":6247,"Ġsought":6248,"Ġ2002":6249,"Ġinput":6250,"Ġapplied":6251,"ĠFar":6252,"str":6253,"ĠOnly":6254,"irds":6255,"Ġ300":6256,"Ġdiscussed":6257,"Ġjoint":6258,"Ġdemocracy":6259,"ena":6260,"ĠTO":6261,"Ġfill":6262,"ĠAff":6263,"Ġpushed":6264,"Ġthreats":6265,"ĠWhe":6266,"Ġdatab":6267,"sembly":6268,"stood":6269,"Ob":6270,"Ġride":6271,"ĠDiv":6272,"ollywood":6273,"Ġroughly":6274,"Ġconsequences":6275,"whel":6276,"ĠBlue":6277,"ĠWeek":6278,"Ġoutput":6279,"ĠColumb":6280,"Ġcolleagues":6281,"Ġlibrary":6282,"ocratic":6283,"Ġ33":6284,"Ġrein":6285,"Ġevolution":6286,"ideo":6287,"Ġbrings":6288,"Ġsquare":6289,"Ġcreative":6290,"Ġboss":6291,"Ġheads":6292,"Ġenh":6293,"ĠClub":6294,"ĠLake":6295,"Ġfunctions":6296,"ĠDevelop":6297,"ĠRomney":6298,"Ġboys":6299,"onna":6300,"Ġentered":6301,"acle":6302,"Ġrising":6303,"igure":6304,"IF":6305,"ĠDev":6306,"Ġlie":6307,"Ġparliament":6308,"Ġisland":6309,"Ġmanufacture":6310,"Ġcontest":6311,"Ġresc":6312,"icious":6313,"dis":6314,"Ġmetal":6315,"Ġarticles":6316,"PU":6317,"born":6318,"ician":6319,"Ġrow":6320,"Ġhistorical":6321,"ĠIts":6322,"Ġlinks":6323,"Ġplot":6324,"Ġwarning":6325,"Ġfairly":6326,"Ġconsistent":6327,"greg":6328,"Ġentry":6329,"Ġbehavi":6330,"ĠAnother":6331,"Ġrally":6332,"Ġnav":6333,"Ġresident":6334,"Ġtow":6335,"Ġunlikely":6336,"athan":6337,"omin":6338,"FP":6339,"ĠBrazil":6340,"alty":6341,"Ġyounger":6342,"ĠBoy":6343,"Ġheavily":6344,"ĠHist":6345,"Ġbutton":6346,"duc":6347,"oked":6348,"fortunately":6349,"Ġstores":6350,"Ġclasses":6351,"ĠSaudi":6352,"ĠAsia":6353,"Ġnative":6354,"Ġanywhere":6355,"Ġathlet":6356,"Ġscored":6357,"etroit":6358,"mes":6359,"Ġreceiving":6360,"ĠRights":6361,"Ġkit":6362,"ĠWalk":6363,"Ġbright":6364,"Ġsucceed":6365,"86":6366,"Ġimportance":6367,"Ġversions":6368,"iques":6369,"OC":6370,"Ġexpression":6371,"Ġfacility":6372,"power":6373,"Ã³":6374,"agan":6375,"Ġbike":6376,"erences":6377,"ĠBob":6378,"IR":6379,"oma":6380,"ĠEach":6381,"Ġreturns":6382,"Ġclassic":6383,"Ġstring":6384,"ĠMaybe":6385,"Ġprohib":6386,"Ġbuying":6387,"olve":6388,"Ġdeaths":6389,"arden":6390,"Ġsymbol":6391,"ship":6392,"Ġdefined":6393,"Ġfraud":6394,"Ġdoctors":6395,"issions":6396,"west":6397,"zona":6398,"Ġreplaced":6399,"Ġallegedly":6400,"Ġprotests":6401,"Ġcere":6402,"esc":6403,"ĠCIA":6404,"ĠEr":6405,"Ġbon":6406,"Ġtargets":6407,"Ġmarketing":6408,"Ġbreaking":6409,"Ġhardware":6410,"See":6411,"Ġcurrency":6412,"Ġalcohol":6413,"ĠNBA":6414,"Ġsomewhat":6415,"Ġhat":6416,"Ġshif":6417,"Ġregulations":6418,"Ġhousehold":6419,"ittle":6420,"style":6421,"Ġlaugh":6422,"Ġmanage":6423,"alian":6424,"Ġhoping":6425,"Ġconsumer":6426,"Ġcampus":6427,"Ġholds":6428,"Ġbot":6429,"Ġwaste":6430,"po":6431,"Man":6432,"Ġessential":6433,"Ġvice":6434,"ĠDC":6435,"Ġfavour":6436,"ometimes":6437,"Ġblow":6438,"Ġadm":6439,"opy":6440,"Ġintention":6441,"Ġempt":6442,"Ġfewer":6443,"ua":6444,"Ġsym":6445,"pret":6446,"ican":6447,"press":6448,"Ġdownt":6449,"Ġlock":6450,"cho":6451,"77":6452,"Ġsees":6453,"Ġopposite":6454,"Ã¼":6455,"hetic":6456,"Ġcustomer":6457,"Ġmotion":6458,"En":6459,"Pol":6460,"ĠRam":6461,"Ġindustrial":6462,"Ġmoments":6463,"Ġgrown":6464,"ĠLib":6465,"Ġefficient":6466,"Ġpromise":6467,"Ġrum":6468,"Ġclosely":6469,"Get":6470,"Ġacts":6471,"Ġchain":6472,"Ġrelevant":6473,"Ġschol":6474,"Ġcert":6475,"kel":6476,"ii":6477,"ĠCard":6478,"ĠGames":6479,"ĠSports":6480,"ĠImage":6481,"print":6482,"Ġsuffering":6483,"ĠBrook":6484,"Ġsyn":6485,"twitter":6486,"ellect":6487,"Ġparticipants":6488,"andal":6489,"utch":6490,"Ġpregn":6491,"ml":6492,"Ġillust":6493,"erves":6494,"Ġsharing":6495,"ĠYear":6496,"Ġsick":6497,"ĠColorado":6498,"name":6499,"ĠSky":6500,"ĠMs":6501,"ulous":6502,"ĠDie":6503,"Ġbug":6504,"Ġcycle":6505,"Ġ38":6506,"Ġadults":6507,"Ġharder":6508,"Ġimmediate":6509,"ocket":6510,"ĠYouTube":6511,"Ġemotional":6512,"Ġtom":6513,"ĠAnn":6514,"tering":6515,"inem":6516,"ĠGar":6517,"Ġsquad":6518,"Ġcircumstances":6519,"otal":6520,"Ġhur":6521,"Ġacknowled":6522,"ockey":6523,"rome":6524,"Ã¶":6525,"ologist":6526,"Ġmechanism":6527,"gas":6528,"Ġwave":6529,"Ġmine":6530,"Ġkinds":6531,"Ġzero":6532,"Ġvolunte":6533,"Ġextent":6534,"Ġopponent":6535,"ĠRight":6536,"ĠWatch":6537,"06":6538,"ĠPalestinian":6539,"chen":6540,"Ġowned":6541,"Ġfashion":6542,"Ġreferred":6543,"ayers":6544,"Ġdepth":6545,"Ġancient":6546,"Ġtrick":6547,"Ġmanner":6548,"ĠCatholic":6549,"atell":6550,"ĠValley":6551,"ĠWild":6552,"ĠWik":6553,"Ġisol":6554,"Ġdecline":6555,"nic":6556,"Ġfeatured":6557,"Ġdetailed":6558,"Ġcheap":6559,"ĠDavis":6560,"08":6561,"nam":6562,"Ġacting":6563,"Ġauthors":6564,"ĠSoviet":6565,"obb":6566,"Ġguide":6567,"Ġtournament":6568,"ĠPutin":6569,"ĠEric":6570,"foot":6571,"Ġartists":6572,"Ġfundamental":6573,"ĠHouston":6574,"Ġtiny":6575,"Ġsnow":6576,"Ġformed":6577,"xy":6578,"Ġpip":6579,"Ġwins":6580,"Ġearned":6581,"Ġregional":6582,"ĠOld":6583,"certain":6584,"ĠOF":6585,"Ġdescribe":6586,"Ġcarrying":6587,"Ġunus":6588,"Ġrelationships":6589,"friend":6590,"Ġcompetitive":6591,"efully":6592,"Ġintent":6593,"chers":6594,"Ġtermin":6595,"Ġfrequently":6596,"07":6597,"ĠDi":6598,"ĠJesus":6599,"Ġliqu":6600,"Donald":6601,"Ġposts":6602,"ĠAut":6603,"ĠEll":6604,"Ġembr":6605,"Ġdivision":6606,"bar":6607,"Ġinstru":6608,"uated":6609,"Ġschedule":6610,"Com":6611,"ĠHel":6612,"Ġbatt":6613,"Ġalongside":6614,"Ġcrazy":6615,"va":6616,"Ġsurveillance":6617,"Ġbag":6618,"Ġmouth":6619,"Ġlies":6620,"ordan":6621,"Ġ49":6622,"ĠSupp":6623,"Ġcomfortable":6624,"ĠGra":6625,"Ġ37":6626,"Ġexcellent":6627,"Ġcoffee":6628,"ĠWay":6629,"Ġsentence":6630,"Ġmeat":6631,"Ġattempted":6632,"Ġsand":6633,"iger":6634,"Ġimmigrants":6635,"hus":6636,"Ġrepeatedly":6637,"Ġstrange":6638,"Ġupdated":6639,"Ġhotel":6640,"iami":6641,"',":6642,"awa":6643,"game":6644,"ĠLong":6645,"rison":6646,"Ġcontroversial":6647,"uable":6648,"Ġstruck":6649,"Ġupcoming":6650,"Ġsigning":6651,"Ġcommitment":6652,"Despite":6653,"ensus":6654,"Ġdedicated":6655,"luding":6656,"ora":6657,"Ġegg":6658,"anes":6659,"weight":6660,"Ġcolumn":6661,"Ġdeeply":6662,"Ġsister":6663,"Ġrealized":6664,"azi":6665,"ĠWars":6666,"ĠEnergy":6667,"ĠCy":6668,"Ġreput":6669,"DS":6670,"Per":6671,"ĠHome":6672,"adel":6673,"aze":6674,"Ġ48":6675,"bel":6676,"Ġtort":6677,"Ġinspired":6678,"Ġcausing":6679,"ocation":6680,"Ġwire":6681,"Ġpushing":6682,"ORE":6683,"Ġpled":6684,"ĠCharles":6685,"Ġwalked":6686,"izz":6687,"Two":6688,"Ġstudio":6689,"rive":6690,"Like":6691,"Ġtargeted":6692,"burg":6693,"Ġring":6694,"ĠEconom":6695,"Ġuniverse":6696,"ĠDown":6697,"ĠCle":6698,"Ġterrorism":6699,"03":6700,"ceived":6701,"allas":6702,"Ġserving":6703,"Ġchairman":6704,"Ġorders":6705,"iciency":6706,"boy":6707,"ko":6708,"amily":6709,"Ġproperties":6710,"People":6711,"ĠAgain":6712,"Ġmedic":6713,"02":6714,"you":6715,"Ġflu":6716,"Ġassets":6717,"Ġchannel":6718,"ĠMen":6719,"umber":6720,"vin":6721,"iveness":6722,"avor":6723,"Ġrape":6724,"Ġagenda":6725,"Ġcrash":6726,"Ġstream":6727,"Ġapproval":6728,"Ġliterally":6729,"Ġprotected":6730,"Ġmainstream":6731,"ĠArizona":6732,"Ġsuicide":6733,"ĠMot":6734,"pan":6735,"ographic":6736,"Ġnewslet":6737,"Ġjourney":6738,"ĠIP":6739,"ĠYoung":6740,"ĠLord":6741,"Ġrisks":6742,"ĠFort":6743,"Ġneut":6744,"Ġvirtual":6745,"ĠPR":6746,"ĠNY":6747,"Ġsimpl":6748,"Ġcongress":6749,"56":6750,"ĠCruz":6751,"Ġoverwhel":6752,"mun":6753,"Ġwake":6754,"Ġurban":6755,"ĠJava":6756,"rence":6757,"Ġdu":6758,"ĠDefense":6759,"oration":6760,"Ġsubsid":6761,"sole":6762,"Ġcommunication":6763,"Ġlatter":6764,"59":6765,"Ġwatched":6766,"Ġusual":6767,"Ġdealing":6768,"Ġvolume":6769,"Ġobserved":6770,"ether":6771,"ĠSar":6772,"ĠEnter":6773,"Ġsurrounding":6774,"Ġ34":6775,"ĠVan":6776,"Ġprecise":6777,"Ġknowing":6778,"Ġgod":6779,"Ġforth":6780,"Ġselection":6781,"ĠDetroit":6782,"rors":6783,"Ġsky":6784,"Ġpassion":6785,"Ġhall":6786,"ĠMax":6787,"Ġwinter":6788,"ĠFil":6789,"gor":6790,"ugh":6791,"Ġfasc":6792,"fire":6793,"Ġexhib":6794,"Ġabortion":6795,"Ġpromised":6796,"Ġaside":6797,"Ġ1970":6798,"Ġking":6799,"Ġrequirements":6800,"Ġalive":6801,"phy":6802,"stone":6803,"ulture":6804,"Ġexplore":6805,"Ġopponents":6806,"Ġdestroyed":6807,"ta":6808,"ĠTop":6809,"iments":6810,"Ġformat":6811,"ĠTal":6812,"ĠJr":6813,"Ġrefugees":6814,"Or":6815,"Ġfolks":6816,"Int":6817,"Ġsho":6818,"ĠStephen":6819,"ixt":6820,"Ġdesk":6821,"Ġremote":6822,"Ġdiscover":6823,"Ġskill":6824,"anges":6825,"Ġnoticed":6826,"Ġsending":6827,"ĠBest":6828,"Ġpal":6829,"ĠHollywood":6830,"May":6831,"Ġnorthern":6832,"Ġunemploy":6833,"Ġphenomen":6834,"ipl":6835,"Ġnarr":6836,"Ġbattery":6837,"Ġfemin":6838,"Ġblame":6839,"onymous":6840,"woman":6841,"Ġwelcome":6842,"ĠâĪ":6843,"xual":6844,"Ġtested":6845,"Ġexciting":6846,"ĠWat":6847,"Ġdiet":6848,"Ġnarrow":6849,"ĠTit":6850,"NC":6851,"Ġlocations":6852,"Ġteachers":6853,"iro":6854,"rations":6855,"Ġunknown":6856,"ĠCast":6857,"Ġapproximately":6858,"ĠPolit":6859,"Ġprotesters":6860,"Ġgap":6861,"Ġmatches":6862,"Ġwest":6863,"Ġmistake":6864,"LA":6865,"ingu":6866,"Ġmaximum":6867,"Fr":6868,"ĠWilson":6869,"ensity":6870,"Ġbasket":6871,"Ġnomine":6872,"Ġprote":6873,"ĠIns":6874,"Ġanyway":6875,"Ġdefeat":6876,"Ġtheme":6877,"ĠKevin":6878,"ao":6879,"ati":6880,"Ñģ":6881,"apse":6882,"post":6883,"Ġstim":6884,"Ġreward":6885,"Ġinvolving":6886,"Ġshel":6887,"ĠChild":6888,"abis":6889,"Ġterritory":6890,"lv":6891,"Ġwal":6892,"Ġmoder":6893,"Ġproof":6894,"Ġsustain":6895,"acc":6896,"Ġretire":6897,"alle":6898,"Ġdeals":6899,"Ġeating":6900,"Ġmerely":6901,"Ġheight":6902,"Ġoblig":6903,"Ġproperly":6904,"Ġextended":6905,"Ġfixed":6906,"ogan":6907,"68":6908,"Ġattacked":6909,"gery":6910,"ĠArch":6911,"ĠLu":6912,"ĠMic":6913,"Ġpromote":6914,"ĠDNA":6915,"Ġfuck":6916,"Rel":6917,"Ġimpressive":6918,"Ġslowly":6919,"ĠSociety":6920,"ĠDar":6921,"Ġsample":6922,"Ġconver":6923,"Ġitem":6924,"Ġrequests":6925,"ĠMiller":6926,"Ġsuddenly":6927,"Ð½":6928,"Ġchose":6929,"Ġlicense":6930,"Ġadapt":6931,"Ġdry":6932,"aylor":6933,"ĠBrad":6934,"inson":6935,"Ġobtained":6936,"Ġdisag":6937,"Ġputs":6938,"ĠAgency":6939,"itarian":6940,"Ġdiagn":6941,"dro":6942,"Ġteacher":6943,"lies":6944,"ĠNASA":6945,"Ġtopic":6946,"Ġtypical":6947,"Ġassume":6948,"Ġworried":6949,"ĠPan":6950,"Ġwage":6951,"Ġfees":6952,"Ġselected":6953,"Ġgang":6954,"asks":6955,"Ġrecognize":6956,"osis":6957,"CA":6958,"Ġfacilities":6959,"ĠFord":6960,"Ġconfront":6961,"Ġfranchise":6962,"Ġunderstood":6963,"owered":6964,"Ġregions":6965,"ums":6966,"Ġconfig":6967,"Ġhonor":6968,"Ġsweet":6969,"Ġstrongly":6970,"Ġcomparison":6971,"Ġsole":6972,"Ġsupports":6973,"aire":6974,"ĠPS":6975,"Ġcommunications":6976,"Ġarguments":6977,"Ġthread":6978,"celer":6979,"ĠAdminist":6980,"igation":6981,"ĠCamer":6982,"Ġwid":6983,"aug":6984,"verty":6985,"Ġphase":6986,"ĠWomen":6987,"ĠItaly":6988,"VER":6989,"raid":6990,"Ġconstant":6991,"redit":6992,"Ġlimits":6993,"Ġeast":6994,"acking":6995,"Ġrub":6996,"kay":6997,"ĠDam":6998,"Ġjournalists":6999,"Ġdrag":7000,"Ġpoverty":7001,"Ġresistance":7002,"Ġhits":7003,"alo":7004,"Ġscheme":7005,"ĠDun":7006,"%)":7007,"pool":7008,"ĠGeorgia":7009,"ĠIrish":7010,"ĠLand":7011,"Ġcondem":7012,"Ġped":7013,"aser":7014,"axy":7015,"Ġcritics":7016,"Ġlikes":7017,"Ġwinner":7018,"ĠFour":7019,"ĠNations":7020,"Ġtransp":7021,"Ġ75":7022,"ĠMiami":7023,"cers":7024,"Ġpersonally":7025,"Ġdress":7026,"Ġallies":7027,"Ġequival":7028,"Ġcontrolled":7029,"Ġbunch":7030,"uilding":7031,"Ġvillage":7032,"Ġstronger":7033,"ĠCarl":7034,"Ġfalling":7035,"owa":7036,"Ġchampion":7037,"Ġharass":7038,"ura":7039,"Ġexposed":7040,"Ġflex":7041,"Ġsomehow":7042,"ĠNight":7043,"âĢĻ.":7044,"Ġencourage":7045,"ĠClass":7046,"ÑĢ":7047,"ĠSpanish":7048,"Ġescape":7049,"ĠNetwork":7050,"wan":7051,"Ġdefinition":7052,"gender":7053,"obile":7054,"Ġbid":7055,"pected":7056,"Ġsoul":7057,"Te":7058,"Ġinqu":7059,"ĠNHL":7060,"Ġcutting":7061,"Ġtruck":7062,"Ġsouthern":7063,"GBT":7064,"</":7065,"cow":7066,"Ġfields":7067,"CS":7068,"Ġdirected":7069,"orted":7070,"Ġmeetings":7071,"53":7072,"ĠJersey":7073,"ĠCompany":7074,"asts":7075,"arity":7076,"aun":7077,"mo":7078,"John":7079,"Ġcomprom":7080,".\"[":7081,"igg":7082,"ĠAsian":7083,"Ġsomewhere":7084,"ĠStand":7085,"rimination":7086,"list":7087,"ĠDaniel":7088,"Ġruled":7089,"92":7090,"Ġpurposes":7091,"Ġevil":7092,"ciples":7093,"zens":7094,"Ġamounts":7095,"rary":7096,"Ġending":7097,"Ġparam":7098,"52":7099,"Ġicon":7100,"Ġstretch":7101,"Ġletters":7102,"Ġprofit":7103,"Ġties":7104,"sea":7105,"pes":7106,"Ġemissions":7107,"ĠDespite":7108,"vy":7109,"enders":7110,"Ġdeleg":7111,"Ġclaiming":7112,"Ġempty":7113,"Ġdecent":7114,"oric":7115,"Ġfinds":7116,"Ġpunish":7117,"ĠThank":7118,"Ġfold":7119,"Ġdogs":7120,"cript":7121,"Ġintellect":7122,"NE":7123,"Ġdesper":7124,"98":7125,"Ġretain":7126,"Ġtrees":7127,"Ġdriven":7128,"ĠLove":7129,"Ġanx":7130,"arry":7131,"CE":7132,"Ġdrawn":7133,"ĠKn":7134,"ĠAttorney":7135,"Ġideal":7136,"Ġofficially":7137,"Ġpolls":7138,"ads":7139,"Ġaspects":7140,"Ġfifth":7141,"nesota":7142,"iner":7143,"Ġcolon":7144,"Ġtrading":7145,"Ġemployee":7146,"otten":7147,"ĠCam":7148,"Ġfighters":7149,"Ġvacc":7150,"Ġprimarily":7151,"Ġchosen":7152,"ĠReal":7153,"han":7154,"ĠScotland":7155,"iform":7156,"Ġreasonable":7157,"lder":7158,"Ġgotten":7159,"ĠWater":7160,"ĠÎ":7161,"Ġbear":7162,"ras":7163,"Ġcaptured":7164,"Ġgoods":7165,"ĠThough":7166,"owed":7167,"intend":7168,"Ġactor":7169,"oming":7170,"Police":7171,"Ġdistinct":7172,"Ġhouses":7173,"ĠLabor":7174,"67":7175,"Ġpic":7176,"Ġdeveloper":7177,"Ġrejected":7178,"Ġsomebody":7179,"lls":7180,"ĠInf":7181,"Ġconstantly":7182,"Ġdeck":7183,"Ġexists":7184,"ĠTurk":7185,"sters":7186,"Ġble":7187,"Ġengage":7188,"Ġupper":7189,"Ġregularly":7190,"Ġpayment":7191,"Ġextensive":7192,"UR":7193,"Ġsignal":7194,"Ġthreatened":7195,"Ġchoices":7196,"ĠServices":7197,"Ġconstit":7198,"Ġsituations":7199,"58":7200,"Ġadmit":7201,"Ġencoun":7202,"unning":7203,"sm":7204,"Ġcontem":7205,"isan":7206,"Ġspin":7207,"ĠSea":7208,"Ġrival":7209,"Ġcourts":7210,"eping":7211,"Ġpayments":7212,"lock":7213,"ĠEnd":7214,"Ġflying":7215,"icer":7216,"Ġsolutions":7217,"Ġtechnologies":7218,"Ġanswers":7219,"ĠDark":7220,"Ġelectricity":7221,"ĠRog":7222,"Any":7223,"Ġperfectly":7224,"Ġmixed":7225,"Ġboot":7226,"ĠLew":7227,"Ġoutcome":7228,"Ġbench":7229,"Ġdatabase":7230,"Ġworldwide":7231,"irms":7232,"ĠBell":7233,"ĠCD":7234,"Ġresulting":7235,"anchester":7236,"ĠWhere":7237,"Ġincl":7238,"hips":7239,"ĠChampionship":7240,"Ġapartment":7241,"Ġcrucial":7242,"ĠMS":7243,"icken":7244,"Once":7245,"ĠJordan":7246,"Ġcomponents":7247,"Ġmurd":7248,"Ġseats":7249,"regon":7250,"Ġelsewhere":7251,"Ġcontributed":7252,"ĠDallas":7253,"Ġsight":7254,"ĠMoore":7255,"ĠGall":7256,"ĠHun":7257,"Ġkeeps":7258,"inding":7259,"Ġanymore":7260,"iana":7261,"Ġprofile":7262,"ĠView":7263,"chain":7264,"Ġpod":7265,"ĠItalian":7266,"earing":7267,"Ġfeedback":7268,"ĠSk":7269,"cr":7270,"Ġatmosphere":7271,"Ġdemands":7272,"Ġfounder":7273,"300":7274,"ĠBas":7275,"Ġconscious":7276,"Ġcategory":7277,"asters":7278,"Ġbaseball":7279,"apes":7280,"ĠOrgan":7281,"Ġestimates":7282,"Ġcoalition":7283,"star":7284,"Ġincredibly":7285,"ĠAcadem":7286,"ĠBrian":7287,"ĠAdam":7288,"ez":7289,"Ġnarrative":7290,"ĠWin":7291,"ĠCat":7292,"Ġmayor":7293,"inations":7294,"Off":7295,"Ġenemies":7296,"Ġrig":7297,"oster":7298,"Ġnobody":7299,"acked":7300,"ilarly":7301,"Ġbills":7302,"Ġtank":7303,"Ġmanip":7304,"Also":7305,"ĠSouthern":7306,"ĠOS":7307,"Ġ00":7308,"otes":7309,"Ġbrew":7310,"mod":7311,"Ġgro":7312,"ifer":7313,"Ġaccurate":7314,"Ġprovince":7315,"MP":7316,"ĠVictor":7317,"Ġregistered":7318,"emen":7319,"Ġawesome":7320,"Ġdollar":7321,"phia":7322,"Ġwriters":7323,"Ġincredible":7324,"imp":7325,"Ġreturning":7326,"Ġticket":7327,"ĠTest":7328,"Ġbitcoin":7329,"MS":7330,"Ġpaint":7331,"ĠBowl":7332,"Ġmachines":7333,"Ġterrible":7334,"ĠAuthor":7335,"Ġparking":7336,"Ġconcluded":7337,"ĠLight":7338,"fast":7339,"Ġrecovery":7340,"there":7341,"Ġemployment":7342,"Ġcontrols":7343,"lers":7344,"ĠSteven":7345,"Ġconven":7346,"Ġnut":7347,"ĠCivil":7348,"Ġcontinuing":7349,"Today":7350,"Ġunp":7351,"onic":7352,"Ġweird":7353,"Ġdozen":7354,"Ġnewly":7355,"Ġenthus":7356,"rates":7357,"ali":7358,"Ġgenerate":7359,"Ġrepresented":7360,"ĠEp":7361,"Ne":7362,"Ġraising":7363,"Ġstead":7364,"Ġangry":7365,"Ġexplanation":7366,"Ġexcess":7367,"ĠBru":7368,"ye":7369,"Ġroutine":7370,"Ġtied":7371,"essed":7372,"54":7373,"ĠDisney":7374,"Ġconclusion":7375,"adelphia":7376,"agram":7377,"rage":7378,"ĠHot":7379,"Ġgranted":7380,"Ġinformed":7381,"ĠSC":7382,"Ġroster":7383,"Ġplastic":7384,"Ġclients":7385,"Ġorganized":7386,"Ġcounty":7387,"Ġaggressive":7388,"Ġinnoc":7389,"Ġ39":7390,"Ġengaged":7391,"iscons":7392,"Ġloan":7393,"Dr":7394,"Ġstuck":7395,"Ġtall":7396,"ĠTw":7397,"era":7398,"Ġproved":7399,"isconsin":7400,"bal":7401,"Ġimper":7402,"ria":7403,"Ġ($":7404,"Ġcapture":7405,"itar":7406,"Ġhabit":7407,"Ġscenes":7408,"ĠConstitution":7409,"Ġconspir":7410,"Ġsuccessfully":7411,"ima":7412,"ĠProgram":7413,"ĠRoman":7414,"Ġdrinking":7415,"ĠGi":7416,"ĠKurd":7417,"ĠSn":7418,"ĠThanks":7419,"ĠConf":7420,"Ġassistant":7421,"Getty":7422,"ĠId":7423,"Sec":7424,"Ġ1999":7425,"reams":7426,"Ġgaming":7427,"Ġmyth":7428,"ĠAR":7429,"ifications":7430,"Because":7431,"Ġcomplicated":7432,"Ġpilot":7433,"inity":7434,"mont":7435,"aron":7436,"Ġinterpret":7437,"ĠUt":7438,"Ġcompens":7439,"Ġsatell":7440,"Ġaimed":7441,"Ġcomplaint":7442,"actions":7443,"Ġtherap":7444,"Ġiron":7445,"Ġhidden":7446,"Ġrating":7447,"Ġblocks":7448,"ĠâĢĻ":7449,"Ġmagic":7450,"ĠJoseph":7451,"ĠAvenue":7452,"raph":7453,"Ġpenalty":7454,"esy":7455,"ĠContin":7456,"Ġprogramming":7457,"Ġreporter":7458,"ked":7459,"Ġkil":7460,"Ġru":7461,"Ġsolve":7462,"Ġlawmakers":7463,"ĠBalt":7464,"Ġdegrees":7465,"Ġconsumption":7466,"ĠCare":7467,"Ġinstalled":7468,"ĠBusiness":7469,"rating":7470,"ĠMars":7471,"Ġtechniques":7472,"iscal":7473,"ĠCross":7474,"ĠSpain":7475,"anny":7476,"ĠPacific":7477,"Ġhel":7478,"Inst":7479,"gorith":7480,"Ġtaught":7481,"Ġpin":7482,"ules":7483,"Ed":7484,"ĠEduc":7485,"Ġchances":7486,"ba":7487,"Ġads":7488,"Ġaudio":7489,"%.":7490,"Ġrecording":7491,"ĠMinnesota":7492,"Ġcup":7493,"ĠSenator":7494,"Ġattended":7495,"uan":7496,"Ġoperate":7497,"uine":7498,"itals":7499,"Ġinfect":7500,"Ġautomatically":7501,"Ġassistance":7502,"Ġ42":7503,"Ġacademic":7504,"51":7505,"Ġphr":7506,"Ġindex":7507,"Ġorigin":7508,"Ġpermanent":7509,"Ġrus":7510,"hent":7511,"rete":7512,"Ġwestern":7513,"Ġveteran":7514,"Ġcompre":7515,"rep":7516,"Ġshares":7517,"Ġneuro":7518,"ĠZeal":7519,"Ġsurvive":7520,"Ġextract":7521,"ĠDead":7522,"Ġteaching":7523,"Ġbom":7524,"Ġvisited":7525,"urb":7526,"Ġestate":7527,"Ġreputation":7528,"ĠAh":7529,"Ġroot":7530,"Ġsufficient":7531,"Ġextrem":7532,">>":7533,"ĠTem":7534,"Ġregardless":7535,"Ġoverse":7536,"Ġgrant":7537,"Ġ150":7538,"Ġvaluable":7539,"ĠFre":7540,"Ġshit":7541,"Ġracial":7542,"Ġenable":7543,"Ġmonit":7544,"Ġanticip":7545,"========":7546,"Ġdict":7547,"Ġinspir":7548,"oz":7549,"Ġaccomp":7550,"Ġtitles":7551,"aed":7552,"Ġadopted":7553,"ĠForeign":7554,"ĠOper":7555,"everal":7556,"Ġelite":7557,"ĠNBC":7558,"Ġ64":7559,"Ġdowntown":7560,"Ġmarked":7561,"ĠViet":7562,"Ġchat":7563,"thew":7564,"ĠParliament":7565,"Ġmail":7566,"Ġlawyers":7567,"olt":7568,"ĠNation":7569,"Ġtrib":7570,"Ġfake":7571,"Ġfunny":7572,"Ġcollected":7573,"Ġdim":7574,"ibilities":7575,"LC":7576,"etary":7577,"Ġvulnerable":7578,"sell":7579,"Ġcomic":7580,"ĠStill":7581,"Ġdefence":7582,"month":7583,"ĠNick":7584,"ĠSeries":7585,"Ġmedium":7586,"world":7587,"Ġpatterns":7588,"Ġenjoyed":7589,"Ġprinciples":7590,"Ġwarming":7591,"Ġportion":7592,"Ġsympt":7593,"aturally":7594,"Ġprominent":7595,"ashes":7596,"Ġchampionship":7597,"ĠJosh":7598,"ĠTurn":7599,"Ġafraid":7600,"97":7601,"Ġuncertain":7602,"Ġbrut":7603,"Ġnegotiations":7604,"Ġpace":7605,"Ġextension":7606,"quarters":7607,"idency":7608,"HS":7609,"Ġbes":7610,"Ġtaste":7611,"da":7612,"rypt":7613,"Me":7614,"ĠSpecial":7615,"Ġmainly":7616,"Ġpounds":7617,"ĠOs":7618,"ĠInvest":7619,"ĠLook":7620,"Ġdepending":7621,"ĠColl":7622,"Ġadvis":7623,"cling":7624,"Ġplug":7625,"96":7626,"Ġpersonnel":7627,"Ġprivile":7628,"Ġgall":7629,"Ġequivalent":7630,"Ġcable":7631,"inder":7632,"ĠLGBT":7633,"ĠMoscow":7634,"rific":7635,"cles":7636,"bro":7637,"Ġspr":7638,"Ġsanctions":7639,"Ġdescribes":7640,"abama":7641,"Ġprocesses":7642,"azz":7643,"orne":7644,"Ġsup":7645,"Ð¾Ð":7646,"from":7647,"ĠEastern":7648,"Ġexplicit":7649,"Ġtracks":7650,"Ġdisaster":7651,"ĠTake":7652,"Ġtroub":7653,"ĠGreek":7654,"ĠIT":7655,"Ġelev":7656,"Ġworker":7657,"icate":7658,"Ġintense":7659,"Ġemerged":7660,"tes":7661,"Ġfought":7662,"Ġpossession":7663,"antic":7664,"Rec":7665,"ystem":7666,"ĠTony":7667,"Ġyield":7668,"Ġphones":7669,"Ġagric":7670,"Ġresulted":7671,"Ġcrypt":7672,"Ġapolog":7673,"ĠAL":7674,"âĸ":7675,"mal":7676,"car":7677,"Ġhadn":7678,"idelines":7679,"enth":7680,"Ġbusy":7681,"upid":7682,"Ġcited":7683,"Pr":7684,"Ġthin":7685,"Ġsurgery":7686,"Ġexposure":7687,"Ġcorruption":7688,"ĠIowa":7689,"ĠOregon":7690,"prise":7691,"iel":7692,"Ġunusual":7693,"Ġpasses":7694,"Ġinev":7695,"Ġminority":7696,"Ġtor":7697,"ref":7698,"Ġ400":7699,"Ġcompete":7700,"viously":7701,"ĠHas":7702,"âĢĻ,":7703,"Ġduty":7704,"Ġentit":7705,"she":7706,"fall":7707,"Ġrecip":7708,"Ġthousand":7709,"mann":7710,"Ġsynt":7711,"ĠBull":7712,"aa":7713,"Ġrecru":7714,"ĠRay":7715,"Ġrarely":7716,"ĠPolicy":7717,"Ġtaxp":7718,"rently":7719,"ĠBefore":7720,"ĠNavy":7721,"Ġengineering":7722,"istent":7723,"active":7724,"ĠNorthern":7725,"Ġimplementation":7726,"Ġgenerated":7727,"ĠKelly":7728,"obs":7729,"))":7730,"ĠFed":7731,"ĠCoast":7732,"Ġbrowser":7733,"Ġdiscipl":7734,"Ġvisible":7735,"Ġexpectations":7736,"making":7737,"ogen":7738,"Ġveget":7739,"Ġfinance":7740,"ĠKent":7741,"ĠZealand":7742,"igen":7743,"Ġpropag":7744,"ML":7745,"athy":7746,"Ġshortly":7747,"ĠPhiladelphia":7748,"ĠAli":7749,"EA":7750,"ĠAnth":7751,"ĠVis":7752,"Ġstrict":7753,"log":7754,"ĠTaylor":7755,"ĠWisconsin":7756,"Ġeverywhere":7757,"ĠGer":7758,"Ġfeelings":7759,"scape":7760,"Ġunw":7761,"ĠLinux":7762,"ĠRick":7763,"Ġscenario":7764,"Ġcameras":7765,"Ġfirms":7766,"orough":7767,"this":7768,"text":7769,"ĠManchester":7770,"Ġenorm":7771,"Ġcomputers":7772,"elect":7773,"Mean":7774,"ilton":7775,"ATE":7776,"front":7777,"Ġpermission":7778,"Ġcoordin":7779,"Ġparticipate":7780,"ogether":7781,"Ġacceler":7782,"Ġbehalf":7783,"Ġcontracts":7784,"Ġlosses":7785,"Ġblind":7786,"ifies":7787,"ja":7788,"etime":7789,"cus":7790,"ĠAmendment":7791,"Ġcyber":7792,"ĠFood":7793,"Ġcrop":7794,"Ġtag":7795,"gun":7796,"Ġho":7797,"Ġstations":7798,"________________":7799,"inking":7800,"ĠOl":7801,"ni":7802,"Ġairport":7803,"Ġloud":7804,"Ġclubs":7805,"ĠSaf":7806,"Ġfee":7807,"ĠMu":7808,"Ġaffili":7809,"Ġadvertising":7810,"Ġshock":7811,"found":7812,"who":7813,"Ġsaved":7814,"Ġpriority":7815,"Ġfounded":7816,"umin":7817,"Ġuncom":7818,"rell":7819,"ĠHenry":7820,"Ġthick":7821,"ĠCome":7822,"ĠAsh":7823,"ĠGlobal":7824,"Mar":7825,"BO":7826,"ca":7827,"ĠPay":7828,"amber":7829,"Ġven":7830,"Ġlights":7831,"Ġestablishment":7832,"Ġrelief":7833,"Ġpump":7834,"imore":7835,"Us":7836,"Ġgained":7837,"umer":7838,"Ġdozens":7839,"iler":7840,"Ġprinciple":7841,"ĠSP":7842,"Ġstadium":7843,"ĠHT":7844,"ĠFred":7845,"Ġgenuine":7846,"Ġlaid":7847,"Ġformal":7848,"ĠCall":7849,"Ġwing":7850,"uer":7851,"Col":7852,"ĠSnow":7853,"Ġobst":7854,"Ġcounsel":7855,"ami":7856,"athered":7857,"ĠFoot":7858,"profit":7859,"Ġcreates":7860,"ĠGreg":7861,"bell":7862,"Ġplate":7863,"ĠKingdom":7864,"Ġfootage":7865,"Ġexception":7866,"Ġgonna":7867,"Ġdoors":7868,"ĠRet":7869,"Ġwithdraw":7870,"Ġalgorith":7871,"Ġriver":7872,"izon":7873,"ĠMcG":7874,"Ġhole":7875,"Ġtoler":7876,"pread":7877,"uce":7878,"eland":7879,"aska":7880,"Ġhistoric":7881,"ounced":7882,"76":7883,"osa":7884,"ĠRand":7885,"ĠLive":7886,"Ġstrikes":7887,"ĠMajor":7888,"ãĢ":7889,"Ġinterface":7890,"etts":7891,"Ġphilosophy":7892,"burgh":7893,"Ġbath":7894,"Ġ%":7895,"ĠMoh":7896,"Ġjournalist":7897,"Before":7898,"Ġstab":7899,"ĠOak":7900,"opher":7901,"ĠLo":7902,"osexual":7903,"Ġ44":7904,"Ġwonderful":7905,"ĠAD":7906,"Ġindicate":7907,"ĠJac":7908,"Ġmel":7909,"ĠThus":7910,"oured":7911,"idespread":7912,"fr":7913,"Ġfault":7914,"Ġdrawing":7915,"asion":7916,"Ġlegend":7917,"Ġscoring":7918,"Ġlanguages":7919,"Ġarrange":7920,"Ġ1960":7921,"Ġborrow":7922,"Ġliked":7923,"Ġmagn":7924,"lan":7925,"ĠRoyal":7926,"Ġcontaining":7927,"Ġneighb":7928,"Ġbridge":7929,"ĠDevelopment":7930,"ĠArm":7931,"Ġedition":7932,"Ġfoundation":7933,"Ġfarmers":7934,"Ġbasketball":7935,"weet":7936,"Ġridic":7937,"rative":7938,"Ġarray":7939,"Ġstruggling":7940,"Ġstable":7941,"Ġnominee":7942,"Ġneutral":7943,"Ġdestruction":7944,"ĠAdministration":7945,"Ġphotograph":7946,"Ġrecognized":7947,"Ġproducer":7948,"Ġuniform":7949,"Ġheaded":7950,"ĠHom":7951,"Ġmanufacturing":7952,"ĠFund":7953,"RS":7954,"aning":7955,"++":7956,"ilipp":7957,"interest":7958,"ĠPremier":7959,"HA":7960,"Ġ->":7961,"Ġsharp":7962,"ĠHarry":7963,"ĠThree":7964,"Ġspark":7965,"ependent":7966,"Ġrely":7967,"Ġbanned":7968,"Ġentering":7969,"arlier":7970,"ĠAS":7971,"ĠMarket":7972,"Ġdinner":7973,"Ġbeliefs":7974,"Ġstructures":7975,"Ġdeliber":7976,"UL":7977,"uper":7978,"Ġroles":7979,"Ġhang":7980,"hy":7981,"jo":7982,"eman":7983,"Ġbacked":7984,"Ġju":7985,"ulating":7986,"ĠHard":7987,"Ġ55":7988,"ĠPoint":7989,"ĠNote":7990,"Ġfinger":7991,"rous":7992,"Ġhired":7993,"ĠAlabama":7994,"Ġmovements":7995,"sylv":7996,"Co":7997,"Ġincent":7998,"lete":7999,"olis":8000,"ĠKir":8001,"Meanwhile":8002,"ĠBry":8003,"ATO":8004,"Ġ1998":8005,"aus":8006,"Ġconvicted":8007,"Ġgift":8008,"Ġtox":8009,"Ġwheel":8010,"rators":8011,"Ġcomponent":8012,"Ġprofession":8013,"teen":8014,"Ġinterviews":8015,"ĠRose":8016,"Ġreduction":8017,"ĠKey":8018,"ĠHit":8019,"Ġcannabis":8020,"ĠVR":8021,"ĠCO":8022,"Ġdelivery":8023,"ĠNic":8024,"ĠNob":8025,"Ġcha":8026,"ĠDiego":8027,"ĠOtt":8028,"ĠCentre":8029,"Ġ2019":8030,"UN":8031,"Ġrestrictions":8032,"away":8033,"Ġ65":8034,"ĠAtlanta":8035,"page":8036,"Ġocean":8037,"then":8038,"Ġachieved":8039,"pat":8040,"ĠPack":8041,"Ġunex":8042,"Ġflaw":8043,"aching":8044,"Ġproven":8045,"amsung":8046,"mate":8047,"ĠReport":8048,"lad":8049,"Those":8050,"ĠAcademy":8051,"Ġeuro":8052,"Ġ47":8053,"Ġconvin":8054,"Ġships":8055,"Ġbranch":8056,"Ġupset":8057,"ĠNext":8058,"President":8059,"ĠJason":8060,"ĠTurkish":8061,"Ġbegun":8062,"Ġproport":8063,"Ġproducing":8064,"abs":8065,"Ġaver":8066,"ĠJudge":8067,"gener":8068,"ho":8069,"Ġforever":8070,"Ġlistening":8071,"âĢĶâĢĶâĢĶâĢĶ":8072,"cription":8073,"Ġhitting":8074,"Ġracist":8075,"Ref":8076,"Ġepisodes":8077,"ouver":8078,"ĠConference":8079,"sylvania":8080,"Ġprocessing":8081,"enny":8082,"400":8083,"ĠFair":8084,"Ġ?":8085,"Ġexternal":8086,"Ġren":8087,"Ġwalls":8088,"ĠField":8089,"achus":8090,"Ġbarely":8091,"Ġdiscrimination":8092,"ĠRadio":8093,"Ġintellectual":8094,"PC":8095,"Ġelectronic":8096,"Ġsuggesting":8097,"Ġutil":8098,"Ġrapidly":8099,"Ġrose":8100,"senal":8101,"ĠPerhaps":8102,"Ġchallenging":8103,"ione":8104,"Ġcontained":8105,"Ġindicated":8106,"Ġcorporations":8107,"ĠMir":8108,"Ġwebsites":8109,"ĠESP":8110,"Ġwidespread":8111,"ĠPen":8112,"ester":8113,"87":8114,"ologists":8115,"ĠMayor":8116,"ĠFC":8117,"ĠBattle":8118,"Star":8119,"ĠEducation":8120,"ĠMusic":8121,"ĠDivision":8122,"ĠDid":8123,"Ġtechnique":8124,"Ġnormally":8125,"bec":8126,"aying":8127,"Ġasks":8128,"riers":8129,"oen":8130,"ĠMid":8131,"ikes":8132,"ĠAbout":8133,"ĠReview":8134,"Ġrepresentative":8135,"ĠReuters":8136,"engers":8137,"Ġdescription":8138,"ĠLatin":8139,"Ġhide":8140,"usive":8141,"ĠLittle":8142,"ĠSquare":8143,"ĠMcCain":8144,"abinet":8145,"PG":8146,"Ġwelf":8147,"Ġbra":8148,"Ġsettlement":8149,"anna":8150,"Ġsmooth":8151,"Ġrecognition":8152,"Ġterrorists":8153,"Ġretired":8154,"Ġillness":8155,"Ġtackle":8156,"ls":8157,"85":8158,"Ġtickets":8159,"Post":8160,"olen":8161,"Mc":8162,"Ġgear":8163,"Ġreaching":8164,"ĠNC":8165,"Ġreducing":8166,"Dav":8167,"Ġequally":8168,"omething":8169,"Ġprogressive":8170,"iy":8171,"Ġjury":8172,"Ġfriendly":8173,"Ġcollapse":8174,"ecut":8175,"Ġwelfare":8176,"ĠFurther":8177,"Ġexclusive":8178,"ĠBrother":8179,"Ġcampaigns":8180,"Ġgrass":8181,"ancouver":8182,"ĠOK":8183,"ĠDig":8184,"Ġ46":8185,"type":8186,"Ġburd":8187,"ĠNSA":8188,"oding":8189,"Ġnotion":8190,"Ġminds":8191}


--------------------------------------------------------------------------------