├── .gitignore
├── requirements.txt
├── .gitattributes
├── Dockerfile.cpu
├── CONTRIBUTORS.md
├── Dockerfile.gpu
├── download_model.py
├── LICENSE
├── DEVELOPERS.md
├── README.md
├── src
    ├── generate_unconditional_samples.py
    ├── sample.py
    ├── interactive_conditional_samples.py
    ├── encoder.py
    └── model.py
├── model_card.md
└── domains.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .mypy_cache/
3 | models/
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | fire>=0.1.3
2 | regex==2017.4.5
3 | requests==2.21.0
4 | tqdm==4.31.1
5 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # convert to OS line endings on checkout, back to LF on commit
2 | * text=auto
3 | 
4 | # ensure anything copied to the container has unix style line endings
5 | *.sh text eol=lf
6 | requirements.txt text eol=lf


--------------------------------------------------------------------------------
/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:1.12.0-py3
 2 | 
 3 | ENV LANG=C.UTF-8
 4 | RUN mkdir /gpt-2
 5 | WORKDIR /gpt-2
 6 | ADD . /gpt-2
 7 | RUN pip3 install -r requirements.txt
 8 | RUN python3 download_model.py 124M
 9 | RUN python3 download_model.py 355M
10 | RUN python3 download_model.py 774M
11 | RUN python3 download_model.py 1558M
12 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
 1 | # Contributors (alphabetically)
 2 | 
 3 | * **[madisonmay](https://github.com/madisonmay)**
 4 | 
 5 |   Added Dockerfiles
 6 | 
 7 | * **[Margaret Mitchell et al](https://arxiv.org/abs/1810.03993)**
 8 | 
 9 |   Our [usage](./README.md#usage) writeup was loosely inspired by the paper
10 |   [Model Cards for Model Reporting](https://arxiv.org/abs/1810.03993)
11 |   and related conversations with some of the authors.
12 | 
13 | * **[webproduktion01](https://github.com/webproduktion01)**
14 | 
15 |   Ported download script to python.
16 | 
17 | **[Full code contributors list](https://github.com/openai/gpt-2/contributors).**
18 | 


--------------------------------------------------------------------------------
/Dockerfile.gpu:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:1.12.0-gpu-py3
 2 | 
 3 | # nvidia-docker 1.0
 4 | LABEL com.nvidia.volumes.needed="nvidia_driver"
 5 | LABEL com.nvidia.cuda.version="${CUDA_VERSION}"
 6 | 
 7 | # nvidia-container-runtime
 8 | ENV NVIDIA_VISIBLE_DEVICES=all \
 9 |     NVIDIA_DRIVER_CAPABILITIES=compute,utility \
10 |     NVIDIA_REQUIRE_CUDA="cuda>=8.0" \
11 |     LANG=C.UTF-8
12 | 
13 | RUN mkdir /gpt-2
14 | WORKDIR /gpt-2
15 | ADD . /gpt-2
16 | RUN pip3 install -r requirements.txt
17 | RUN python3 download_model.py 124M
18 | RUN python3 download_model.py 355M
19 | RUN python3 download_model.py 774M
20 | RUN python3 download_model.py 1558M
21 | 


--------------------------------------------------------------------------------
/download_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import requests
 4 | from tqdm import tqdm
 5 | 
 6 | if len(sys.argv) != 2:
 7 |     print('You must enter the model name as a parameter, e.g.: download_model.py 124M')
 8 |     sys.exit(1)
 9 | 
10 | model = sys.argv[1]
11 | 
12 | subdir = os.path.join('models', model)
13 | if not os.path.exists(subdir):
14 |     os.makedirs(subdir)
15 | subdir = subdir.replace('\\','/') # needed for Windows
16 | 
17 | for filename in ['checkpoint','encoder.json','hparams.json','model.ckpt.data-00000-of-00001', 'model.ckpt.index', 'model.ckpt.meta', 'vocab.bpe']:
18 | 
19 |     r = requests.get("https://openaipublic.blob.core.windows.net/gpt-2/" + subdir + "/" + filename, stream=True)
20 | 
21 |     with open(os.path.join(subdir, filename), 'wb') as f:
22 |         file_size = int(r.headers["content-length"])
23 |         chunk_size = 1000
24 |         with tqdm(ncols=100, desc="Fetching " + filename, total=file_size, unit_scale=True) as pbar:
25 |             # 1k for chunk_size, since Ethernet packet size is around 1500 bytes
26 |             for chunk in r.iter_content(chunk_size=chunk_size):
27 |                 f.write(chunk)
28 |                 pbar.update(chunk_size)
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Modified MIT License
 2 | 
 3 | Software Copyright (c) 2019 OpenAI
 4 | 
 5 | We don’t claim ownership of the content you create with GPT-2, so it is yours to do with as you please.
 6 | We only ask that you use GPT-2 responsibly and clearly indicate your content was created using GPT-2.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
 9 | associated documentation files (the "Software"), to deal in the Software without restriction,
10 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
12 | subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included
15 | in all copies or substantial portions of the Software.
16 | The above copyright notice and this permission notice need not be included
17 | with content created by the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
20 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
24 | OR OTHER DEALINGS IN THE SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/DEVELOPERS.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | Git clone this repository, and `cd` into directory for remaining commands
 4 | ```
 5 | git clone https://github.com/openai/gpt-2.git && cd gpt-2
 6 | ```
 7 | 
 8 | Then, follow instructions for either native or Docker installation.
 9 | 
10 | ## Native Installation
11 | 
12 | All steps can optionally be done in a virtual environment using tools such as `virtualenv` or `conda`.
13 | 
14 | Install tensorflow 1.12 (with GPU support, if you have a GPU and want everything to run faster)
15 | ```
16 | pip3 install tensorflow==1.12.0
17 | ```
18 | or
19 | ```
20 | pip3 install tensorflow-gpu==1.12.0
21 | ```
22 | 
23 | Install other python packages:
24 | ```
25 | pip3 install -r requirements.txt
26 | ```
27 | 
28 | Download the model data
29 | ```
30 | python3 download_model.py 124M
31 | python3 download_model.py 355M
32 | python3 download_model.py 774M
33 | python3 download_model.py 1558M
34 | ```
35 | 
36 | ## Docker Installation
37 | 
38 | Build the Dockerfile and tag the created image as `gpt-2`:
39 | ```
40 | docker build --tag gpt-2 -f Dockerfile.gpu . # or Dockerfile.cpu
41 | ```
42 | 
43 | Start an interactive bash session from the `gpt-2` docker image.
44 | 
45 | You can opt to use the `--runtime=nvidia` flag if you have access to a NVIDIA GPU
46 | and a valid install of [nvidia-docker 2.0](https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)).
47 | ```
48 | docker run --runtime=nvidia -it gpt-2 bash
49 | ```
50 | 
51 | # Running
52 | 
53 | | WARNING: Samples are unfiltered and may contain offensive content. |
54 | | --- |
55 | 
56 | Some of the examples below may include Unicode text characters. Set the environment variable:
57 | ```
58 | export PYTHONIOENCODING=UTF-8
59 | ```
60 | to override the standard stream settings in UTF-8 mode.
61 | 
62 | ## Unconditional sample generation
63 | 
64 | To generate unconditional samples from the small model:
65 | ```
66 | python3 src/generate_unconditional_samples.py | tee /tmp/samples
67 | ```
68 | There are various flags for controlling the samples:
69 | ```
70 | python3 src/generate_unconditional_samples.py --top_k 40 --temperature 0.7 | tee /tmp/samples
71 | ```
72 | 
73 | To check flag descriptions, use:
74 | ```
75 | python3 src/generate_unconditional_samples.py -- --help
76 | ```
77 | 
78 | ## Conditional sample generation
79 | 
80 | To give the model custom prompts, you can use:
81 | ```
82 | python3 src/interactive_conditional_samples.py --top_k 40
83 | ```
84 | 
85 | To check flag descriptions, use:
86 | ```
87 | python3 src/interactive_conditional_samples.py -- --help
88 | ```
89 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | **Status:** Archive (code is provided as-is, no updates expected)
 2 | 
 3 | # gpt-2
 4 | 
 5 | Code and models from the paper ["Language Models are Unsupervised Multitask Learners"](https://d4mucfpksywv.cloudfront.net/better-language-models/language-models.pdf).
 6 | 
 7 | You can read about GPT-2 and its staged release in our [original blog post](https://openai.com/research/better-language-models/), [6 month follow-up post](https://openai.com/blog/gpt-2-6-month-follow-up/), and [final post](https://www.openai.com/blog/gpt-2-1-5b-release/).
 8 | 
 9 | We have also [released a dataset](https://github.com/openai/gpt-2-output-dataset) for researchers to study their behaviors.
10 | 
11 | <sup>*</sup> *Note that our original parameter counts were wrong due to an error (in our previous blog posts and paper).  Thus you may have seen small referred to as 117M and medium referred to as 345M.*
12 | 
13 | ## Usage
14 | 
15 | This repository is meant to be a starting point for researchers and engineers to experiment with GPT-2.
16 | 
17 | For basic information, see our [model card](./model_card.md).
18 | 
19 | ### Some caveats
20 | 
21 | - GPT-2 models' robustness and worst case behaviors are not well-understood.  As with any machine-learned model, carefully evaluate GPT-2 for your use case, especially if used without fine-tuning or in safety-critical applications where reliability is important.
22 | - The dataset our GPT-2 models were trained on contains many texts with [biases](https://twitter.com/TomerUllman/status/1101485289720242177) and factual inaccuracies, and thus GPT-2 models are likely to be biased and inaccurate as well.
23 | - To avoid having samples mistaken as human-written, we recommend clearly labeling samples as synthetic before wide dissemination.  Our models are often incoherent or inaccurate in subtle ways, which takes more than a quick read for a human to notice.
24 | 
25 | ### Work with us
26 | 
27 | Please [let us know](mailto:languagequestions@openai.com) if you’re doing interesting research with or working on applications of GPT-2!  We’re especially interested in hearing from and potentially working with those who are studying
28 | - Potential malicious use cases and defenses against them (e.g. the detectability of synthetic text)
29 | - The extent of problematic content (e.g. bias) being baked into the models and effective mitigations
30 | 
31 | ## Development
32 | 
33 | See [DEVELOPERS.md](./DEVELOPERS.md)
34 | 
35 | ## Contributors
36 | 
37 | See [CONTRIBUTORS.md](./CONTRIBUTORS.md)
38 | 
39 | ## Citation
40 | 
41 | Please use the following bibtex entry:
42 | ```
43 | @article{radford2019language,
44 |   title={Language Models are Unsupervised Multitask Learners},
45 |   author={Radford, Alec and Wu, Jeff and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
46 |   year={2019}
47 | }
48 | ```
49 | 
50 | ## Future work
51 | 
52 | We may release code for evaluating the models on various benchmarks.
53 | 
54 | We are still considering release of the larger models.
55 | 
56 | ## License
57 | 
58 | [Modified MIT](./LICENSE)
59 | 


--------------------------------------------------------------------------------
/src/generate_unconditional_samples.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import fire
 4 | import json
 5 | import os
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | 
 9 | import model, sample, encoder
10 | 
11 | def sample_model(
12 |     model_name='124M',
13 |     seed=None,
14 |     nsamples=0,
15 |     batch_size=1,
16 |     length=None,
17 |     temperature=1,
18 |     top_k=0,
19 |     top_p=1,
20 |     models_dir='models',
21 | ):
22 |     """
23 |     Run the sample_model
24 |     :model_name=124M : String, which model to use
25 |     :seed=None : Integer seed for random number generators, fix seed to
26 |      reproduce results
27 |     :nsamples=0 : Number of samples to return, if 0, continues to
28 |      generate samples indefinately.
29 |     :batch_size=1 : Number of batches (only affects speed/memory).
30 |     :length=None : Number of tokens in generated text, if None (default), is
31 |      determined by model hyperparameters
32 |     :temperature=1 : Float value controlling randomness in boltzmann
33 |      distribution. Lower temperature results in less random completions. As the
34 |      temperature approaches zero, the model will become deterministic and
35 |      repetitive. Higher temperature results in more random completions.
36 |     :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
37 |      considered for each step (token), resulting in deterministic completions,
38 |      while 40 means 40 words are considered at each step. 0 (default) is a
39 |      special setting meaning no restrictions. 40 generally is a good value.
40 |      :models_dir : path to parent folder containing model subfolders
41 |      (i.e. contains the <model_name> folder)
42 |     """
43 |     models_dir = os.path.expanduser(os.path.expandvars(models_dir))
44 |     enc = encoder.get_encoder(model_name, models_dir)
45 |     hparams = model.default_hparams()
46 |     with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
47 |         hparams.override_from_dict(json.load(f))
48 | 
49 |     if length is None:
50 |         length = hparams.n_ctx
51 |     elif length > hparams.n_ctx:
52 |         raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)
53 | 
54 |     with tf.Session(graph=tf.Graph()) as sess:
55 |         np.random.seed(seed)
56 |         tf.set_random_seed(seed)
57 | 
58 |         output = sample.sample_sequence(
59 |             hparams=hparams, length=length,
60 |             start_token=enc.encoder['<|endoftext|>'],
61 |             batch_size=batch_size,
62 |             temperature=temperature, top_k=top_k, top_p=top_p
63 |         )[:, 1:]
64 | 
65 |         saver = tf.train.Saver()
66 |         ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
67 |         saver.restore(sess, ckpt)
68 | 
69 |         generated = 0
70 |         while nsamples == 0 or generated < nsamples:
71 |             out = sess.run(output)
72 |             for i in range(batch_size):
73 |                 generated += batch_size
74 |                 text = enc.decode(out[i])
75 |                 print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
76 |                 print(text)
77 | 
78 | if __name__ == '__main__':
79 |     fire.Fire(sample_model)
80 | 
81 | 


--------------------------------------------------------------------------------
/src/sample.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | import model
 4 | 
 5 | def top_k_logits(logits, k):
 6 |     if k == 0:
 7 |         # no truncation
 8 |         return logits
 9 | 
10 |     def _top_k():
11 |         values, _ = tf.nn.top_k(logits, k=k)
12 |         min_values = values[:, -1, tf.newaxis]
13 |         return tf.where(
14 |             logits < min_values,
15 |             tf.ones_like(logits, dtype=logits.dtype) * -1e10,
16 |             logits,
17 |         )
18 |     return tf.cond(
19 |        tf.equal(k, 0),
20 |        lambda: logits,
21 |        lambda: _top_k(),
22 |     )
23 | 
24 | 
25 | def top_p_logits(logits, p):
26 |     """Nucleus sampling"""
27 |     batch, _ = logits.shape.as_list()
28 |     sorted_logits = tf.sort(logits, direction='DESCENDING', axis=-1)
29 |     cumulative_probs = tf.cumsum(tf.nn.softmax(sorted_logits, axis=-1), axis=-1)
30 |     indices = tf.stack([
31 |         tf.range(0, batch),
32 |         # number of indices to include
33 |         tf.maximum(tf.reduce_sum(tf.cast(cumulative_probs <= p, tf.int32), axis=-1) - 1, 0),
34 |     ], axis=-1)
35 |     min_values = tf.gather_nd(sorted_logits, indices)
36 |     return tf.where(
37 |         logits < min_values,
38 |         tf.ones_like(logits) * -1e10,
39 |         logits,
40 |     )
41 | 
42 | 
43 | def sample_sequence(*, hparams, length, start_token=None, batch_size=None, context=None, temperature=1, top_k=0, top_p=1):
44 |     if start_token is None:
45 |         assert context is not None, 'Specify exactly one of start_token and context!'
46 |     else:
47 |         assert context is None, 'Specify exactly one of start_token and context!'
48 |         context = tf.fill([batch_size, 1], start_token)
49 | 
50 |     def step(hparams, tokens, past=None):
51 |         lm_output = model.model(hparams=hparams, X=tokens, past=past, reuse=tf.AUTO_REUSE)
52 | 
53 |         logits = lm_output['logits'][:, :, :hparams.n_vocab]
54 |         presents = lm_output['present']
55 |         presents.set_shape(model.past_shape(hparams=hparams, batch_size=batch_size))
56 |         return {
57 |             'logits': logits,
58 |             'presents': presents,
59 |         }
60 | 
61 |     with tf.name_scope('sample_sequence'):
62 |         def body(past, prev, output):
63 |             next_outputs = step(hparams, prev, past=past)
64 |             logits = next_outputs['logits'][:, -1, :]  / tf.to_float(temperature)
65 |             logits = top_k_logits(logits, k=top_k)
66 |             logits = top_p_logits(logits, p=top_p)
67 |             samples = tf.multinomial(logits, num_samples=1, output_dtype=tf.int32)
68 |             return [
69 |                 next_outputs['presents'] if past is None else tf.concat([past, next_outputs['presents']], axis=-2),
70 |                 samples,
71 |                 tf.concat([output, samples], axis=1)
72 |             ]
73 | 
74 |         past, prev, output = body(None, context, context)
75 | 
76 |         def cond(*args):
77 |             return True
78 | 
79 |         _, _, tokens = tf.while_loop(
80 |             cond=cond, body=body,
81 |             maximum_iterations=length - 1,
82 |             loop_vars=[
83 |                 past,
84 |                 prev,
85 |                 output
86 |             ],
87 |             shape_invariants=[
88 |                 tf.TensorShape(model.past_shape(hparams=hparams, batch_size=batch_size)),
89 |                 tf.TensorShape([batch_size, None]),
90 |                 tf.TensorShape([batch_size, None]),
91 |             ],
92 |             back_prop=False,
93 |         )
94 | 
95 |         return tokens
96 | 


--------------------------------------------------------------------------------
/src/interactive_conditional_samples.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import fire
 4 | import json
 5 | import os
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | 
 9 | import model, sample, encoder
10 | 
11 | def interact_model(
12 |     model_name='124M',
13 |     seed=None,
14 |     nsamples=1,
15 |     batch_size=1,
16 |     length=None,
17 |     temperature=1,
18 |     top_k=0,
19 |     top_p=1,
20 |     models_dir='models',
21 | ):
22 |     """
23 |     Interactively run the model
24 |     :model_name=124M : String, which model to use
25 |     :seed=None : Integer seed for random number generators, fix seed to reproduce
26 |      results
27 |     :nsamples=1 : Number of samples to return total
28 |     :batch_size=1 : Number of batches (only affects speed/memory).  Must divide nsamples.
29 |     :length=None : Number of tokens in generated text, if None (default), is
30 |      determined by model hyperparameters
31 |     :temperature=1 : Float value controlling randomness in boltzmann
32 |      distribution. Lower temperature results in less random completions. As the
33 |      temperature approaches zero, the model will become deterministic and
34 |      repetitive. Higher temperature results in more random completions.
35 |     :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
36 |      considered for each step (token), resulting in deterministic completions,
37 |      while 40 means 40 words are considered at each step. 0 (default) is a
38 |      special setting meaning no restrictions. 40 generally is a good value.
39 |      :models_dir : path to parent folder containing model subfolders
40 |      (i.e. contains the <model_name> folder)
41 |     """
42 |     models_dir = os.path.expanduser(os.path.expandvars(models_dir))
43 |     if batch_size is None:
44 |         batch_size = 1
45 |     assert nsamples % batch_size == 0
46 | 
47 |     enc = encoder.get_encoder(model_name, models_dir)
48 |     hparams = model.default_hparams()
49 |     with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
50 |         hparams.override_from_dict(json.load(f))
51 | 
52 |     if length is None:
53 |         length = hparams.n_ctx // 2
54 |     elif length > hparams.n_ctx:
55 |         raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)
56 | 
57 |     with tf.Session(graph=tf.Graph()) as sess:
58 |         context = tf.placeholder(tf.int32, [batch_size, None])
59 |         np.random.seed(seed)
60 |         tf.set_random_seed(seed)
61 |         output = sample.sample_sequence(
62 |             hparams=hparams, length=length,
63 |             context=context,
64 |             batch_size=batch_size,
65 |             temperature=temperature, top_k=top_k, top_p=top_p
66 |         )
67 | 
68 |         saver = tf.train.Saver()
69 |         ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
70 |         saver.restore(sess, ckpt)
71 | 
72 |         while True:
73 |             raw_text = input("Model prompt >>> ")
74 |             while not raw_text:
75 |                 print('Prompt should not be empty!')
76 |                 raw_text = input("Model prompt >>> ")
77 |             context_tokens = enc.encode(raw_text)
78 |             generated = 0
79 |             for _ in range(nsamples // batch_size):
80 |                 out = sess.run(output, feed_dict={
81 |                     context: [context_tokens for _ in range(batch_size)]
82 |                 })[:, len(context_tokens):]
83 |                 for i in range(batch_size):
84 |                     generated += 1
85 |                     text = enc.decode(out[i])
86 |                     print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
87 |                     print(text)
88 |             print("=" * 80)
89 | 
90 | if __name__ == '__main__':
91 |     fire.Fire(interact_model)
92 | 
93 | 


--------------------------------------------------------------------------------
/src/encoder.py:
--------------------------------------------------------------------------------
  1 | """Byte pair encoding utilities"""
  2 | 
  3 | import os
  4 | import json
  5 | import regex as re
  6 | from functools import lru_cache
  7 | 
  8 | @lru_cache()
  9 | def bytes_to_unicode():
 10 |     """
 11 |     Returns list of utf-8 byte and a corresponding list of unicode strings.
 12 |     The reversible bpe codes work on unicode strings.
 13 |     This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
 14 |     When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
 15 |     This is a signficant percentage of your normal, say, 32K bpe vocab.
 16 |     To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
 17 |     And avoids mapping to whitespace/control characters the bpe code barfs on.
 18 |     """
 19 |     bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
 20 |     cs = bs[:]
 21 |     n = 0
 22 |     for b in range(2**8):
 23 |         if b not in bs:
 24 |             bs.append(b)
 25 |             cs.append(2**8+n)
 26 |             n += 1
 27 |     cs = [chr(n) for n in cs]
 28 |     return dict(zip(bs, cs))
 29 | 
 30 | def get_pairs(word):
 31 |     """Return set of symbol pairs in a word.
 32 | 
 33 |     Word is represented as tuple of symbols (symbols being variable-length strings).
 34 |     """
 35 |     pairs = set()
 36 |     prev_char = word[0]
 37 |     for char in word[1:]:
 38 |         pairs.add((prev_char, char))
 39 |         prev_char = char
 40 |     return pairs
 41 | 
 42 | class Encoder:
 43 |     def __init__(self, encoder, bpe_merges, errors='replace'):
 44 |         self.encoder = encoder
 45 |         self.decoder = {v:k for k,v in self.encoder.items()}
 46 |         self.errors = errors # how to handle errors in decoding
 47 |         self.byte_encoder = bytes_to_unicode()
 48 |         self.byte_decoder = {v:k for k, v in self.byte_encoder.items()}
 49 |         self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
 50 |         self.cache = {}
 51 | 
 52 |         # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
 53 |         self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")
 54 | 
 55 |     def bpe(self, token):
 56 |         if token in self.cache:
 57 |             return self.cache[token]
 58 |         word = tuple(token)
 59 |         pairs = get_pairs(word)
 60 | 
 61 |         if not pairs:
 62 |             return token
 63 | 
 64 |         while True:
 65 |             bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf')))
 66 |             if bigram not in self.bpe_ranks:
 67 |                 break
 68 |             first, second = bigram
 69 |             new_word = []
 70 |             i = 0
 71 |             while i < len(word):
 72 |                 try:
 73 |                     j = word.index(first, i)
 74 |                     new_word.extend(word[i:j])
 75 |                     i = j
 76 |                 except:
 77 |                     new_word.extend(word[i:])
 78 |                     break
 79 | 
 80 |                 if word[i] == first and i < len(word)-1 and word[i+1] == second:
 81 |                     new_word.append(first+second)
 82 |                     i += 2
 83 |                 else:
 84 |                     new_word.append(word[i])
 85 |                     i += 1
 86 |             new_word = tuple(new_word)
 87 |             word = new_word
 88 |             if len(word) == 1:
 89 |                 break
 90 |             else:
 91 |                 pairs = get_pairs(word)
 92 |         word = ' '.join(word)
 93 |         self.cache[token] = word
 94 |         return word
 95 | 
 96 |     def encode(self, text):
 97 |         bpe_tokens = []
 98 |         for token in re.findall(self.pat, text):
 99 |             token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8'))
100 |             bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' '))
101 |         return bpe_tokens
102 | 
103 |     def decode(self, tokens):
104 |         text = ''.join([self.decoder[token] for token in tokens])
105 |         text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors=self.errors)
106 |         return text
107 | 
108 | def get_encoder(model_name, models_dir):
109 |     with open(os.path.join(models_dir, model_name, 'encoder.json'), 'r') as f:
110 |         encoder = json.load(f)
111 |     with open(os.path.join(models_dir, model_name, 'vocab.bpe'), 'r', encoding="utf-8") as f:
112 |         bpe_data = f.read()
113 |     bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]]
114 |     return Encoder(
115 |         encoder=encoder,
116 |         bpe_merges=bpe_merges,
117 |     )
118 | 


--------------------------------------------------------------------------------
/model_card.md:
--------------------------------------------------------------------------------
 1 | # GPT-2 model card
 2 | 
 3 | Last updated: November 2019
 4 | 
 5 | Inspired by [Model Cards for Model Reporting (Mitchell et al.)](https://arxiv.org/abs/1810.03993), we’re providing some accompanying information about the GPT-2 family of models we're releasing.
 6 | 
 7 | ## Model Details.
 8 | 
 9 | This model was developed by researchers at OpenAI to help us understand how the capabilities of language model capabilities scale as a function of the size of the models (by parameter count) combined with very large internet-scale datasets (WebText).
10 | 
11 | ### Model date
12 | 
13 | February 2019, trained on data that cuts off at the end of 2017.
14 | 
15 | ### Model type
16 | 
17 | Language model
18 | 
19 | ### Model version
20 | 
21 | 1.5 billion parameters: the fourth and largest GPT-2 version. We have also released 124 million, 355 million, and 774 million parameter models.
22 | 
23 | ### Paper or other resource for more information
24 | [Blog post](https://openai.com/blog/better-language-models/) and [paper](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)
25 | 
26 | ### Where to send questions or comments about the model
27 | Please use this [Google Form](https://forms.gle/A7WBSbTY2EkKdroPA)
28 | 
29 | ## Intended Uses:
30 | 
31 | ### Primary intended uses
32 | 
33 | The primary intended users of these models are *AI researchers and practitioners*.
34 | 
35 | We primarily imagine these language models will be used by researchers to better understand the behaviors, capabilities, biases, and constraints of large-scale generative language models.
36 | 
37 | ### Secondary uses
38 | 
39 | Here are some secondary use cases we believe are likely:
40 | 
41 | - **Writing assistance**: Grammar assistance, autocompletion (for normal prose or code)
42 | - **Creative writing and art**: exploring the generation of creative, fictional texts; aiding creation of poetry and other literary art.
43 | - **Entertainment**: Creation of games, chat bots, and amusing generations.
44 | 
45 | ### Out-of-scope use cases
46 | 
47 | Because large-scale language models like GPT-2 do not distinguish fact from fiction, we don’t support use-cases that require the generated text to be true.
48 | 
49 | Additionally, language models like GPT-2 reflect the biases inherent to the systems they were trained on, so we do not recommend that they be deployed into systems that interact with humans unless the deployers first carry out a study of biases relevant to the intended use-case. We found no statistically significant difference in gender, race, and religious bias probes between 774M and 1.5B, implying all versions of GPT-2 should be approached with similar levels of caution around use cases that are sensitive to biases around human attributes.
50 | 
51 | ## Evaluation Data
52 | 
53 | ### Datasets
54 | 
55 | This model was trained on (and evaluated against) WebText, a dataset consisting of the text contents of 45 million links posted by users of the ‘Reddit’ social network. WebText is made of data derived from outbound links from Reddit and does not consist of data taken directly from Reddit itself. Before generating the dataset we used a blocklist to ensure we didn’t sample from a variety of subreddits which contain sexually explicit or otherwise offensive content.
56 | 
57 | To get a sense of the data that went into GPT-2, we’ve [published a list](domains.txt) of the top 1,000 domains present in WebText and their frequency.  The top 15 domains by volume in WebText are: Google, Archive, Blogspot, GitHub, NYTimes, Wordpress, Washington Post, Wikia, BBC, The Guardian, eBay, Pastebin, CNN, Yahoo!, and the Huffington Post.
58 | 
59 | ### Motivation
60 | 
61 | The motivation behind WebText was to create an Internet-scale, heterogeneous dataset that we could use to test large-scale language models against. WebText was (and is) intended to be primarily for research purposes rather than production purposes.
62 | 
63 | ### Caveats and Recommendations
64 | 
65 | Because GPT-2 is an internet-scale language model, it’s currently difficult to know what disciplined testing procedures can be applied to it to fully understand its capabilities and how the data it is trained on influences its vast range of outputs. We recommend researchers investigate these aspects of the model and share their results.
66 | 
67 | Additionally, as indicated in our discussion of issues relating to potential misuse of the model, it remains unclear what the long-term dynamics are of detecting outputs from these models. We conducted [in-house automated ML-based detection research](https://github.com/openai/gpt-2-output-dataset/tree/master/detector) using simple classifiers, zero shot, and fine-tuning methods. Our fine-tuned detector model reached accuracy levels of approximately 95%. However, no one detection method is a panacea; automated ML-based detection, human detection, human-machine teaming, and metadata-based detection are all methods that can be combined for more confident classification. Developing better approaches to detection today will give us greater intuitions when thinking about future models and could help us understand ahead of time if detection methods will eventually become ineffective.
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/src/model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tensorflow.contrib.training import HParams
  4 | 
  5 | def default_hparams():
  6 |     return HParams(
  7 |         n_vocab=0,
  8 |         n_ctx=1024,
  9 |         n_embd=768,
 10 |         n_head=12,
 11 |         n_layer=12,
 12 |     )
 13 | 
 14 | def shape_list(x):
 15 |     """Deal with dynamic shape in tensorflow cleanly."""
 16 |     static = x.shape.as_list()
 17 |     dynamic = tf.shape(x)
 18 |     return [dynamic[i] if s is None else s for i, s in enumerate(static)]
 19 | 
 20 | def softmax(x, axis=-1):
 21 |     x = x - tf.reduce_max(x, axis=axis, keepdims=True)
 22 |     ex = tf.exp(x)
 23 |     return ex / tf.reduce_sum(ex, axis=axis, keepdims=True)
 24 | 
 25 | def gelu(x):
 26 |     return 0.5*x*(1+tf.tanh(np.sqrt(2/np.pi)*(x+0.044715*tf.pow(x, 3))))
 27 | 
 28 | def norm(x, scope, *, axis=-1, epsilon=1e-5):
 29 |     """Normalize to mean = 0, std = 1, then do a diagonal affine transform."""
 30 |     with tf.variable_scope(scope):
 31 |         n_state = x.shape[-1].value
 32 |         g = tf.get_variable('g', [n_state], initializer=tf.constant_initializer(1))
 33 |         b = tf.get_variable('b', [n_state], initializer=tf.constant_initializer(0))
 34 |         u = tf.reduce_mean(x, axis=axis, keepdims=True)
 35 |         s = tf.reduce_mean(tf.square(x-u), axis=axis, keepdims=True)
 36 |         x = (x - u) * tf.rsqrt(s + epsilon)
 37 |         x = x*g + b
 38 |         return x
 39 | 
 40 | def split_states(x, n):
 41 |     """Reshape the last dimension of x into [n, x.shape[-1]/n]."""
 42 |     *start, m = shape_list(x)
 43 |     return tf.reshape(x, start + [n, m//n])
 44 | 
 45 | def merge_states(x):
 46 |     """Smash the last two dimensions of x into a single dimension."""
 47 |     *start, a, b = shape_list(x)
 48 |     return tf.reshape(x, start + [a*b])
 49 | 
 50 | def conv1d(x, scope, nf, *, w_init_stdev=0.02):
 51 |     with tf.variable_scope(scope):
 52 |         *start, nx = shape_list(x)
 53 |         w = tf.get_variable('w', [1, nx, nf], initializer=tf.random_normal_initializer(stddev=w_init_stdev))
 54 |         b = tf.get_variable('b', [nf], initializer=tf.constant_initializer(0))
 55 |         c = tf.reshape(tf.matmul(tf.reshape(x, [-1, nx]), tf.reshape(w, [-1, nf]))+b, start+[nf])
 56 |         return c
 57 | 
 58 | def attention_mask(nd, ns, *, dtype):
 59 |     """1's in the lower triangle, counting from the lower right corner.
 60 | 
 61 |     Same as tf.matrix_band_part(tf.ones([nd, ns]), -1, ns-nd), but doesn't produce garbage on TPUs.
 62 |     """
 63 |     i = tf.range(nd)[:,None]
 64 |     j = tf.range(ns)
 65 |     m = i >= j - ns + nd
 66 |     return tf.cast(m, dtype)
 67 | 
 68 | 
 69 | def attn(x, scope, n_state, *, past, hparams):
 70 |     assert x.shape.ndims == 3  # Should be [batch, sequence, features]
 71 |     assert n_state % hparams.n_head == 0
 72 |     if past is not None:
 73 |         assert past.shape.ndims == 5  # Should be [batch, 2, heads, sequence, features], where 2 is [k, v]
 74 | 
 75 |     def split_heads(x):
 76 |         # From [batch, sequence, features] to [batch, heads, sequence, features]
 77 |         return tf.transpose(split_states(x, hparams.n_head), [0, 2, 1, 3])
 78 | 
 79 |     def merge_heads(x):
 80 |         # Reverse of split_heads
 81 |         return merge_states(tf.transpose(x, [0, 2, 1, 3]))
 82 | 
 83 |     def mask_attn_weights(w):
 84 |         # w has shape [batch, heads, dst_sequence, src_sequence], where information flows from src to dst.
 85 |         _, _, nd, ns = shape_list(w)
 86 |         b = attention_mask(nd, ns, dtype=w.dtype)
 87 |         b = tf.reshape(b, [1, 1, nd, ns])
 88 |         w = w*b - tf.cast(1e10, w.dtype)*(1-b)
 89 |         return w
 90 | 
 91 |     def multihead_attn(q, k, v):
 92 |         # q, k, v have shape [batch, heads, sequence, features]
 93 |         w = tf.matmul(q, k, transpose_b=True)
 94 |         w = w * tf.rsqrt(tf.cast(v.shape[-1].value, w.dtype))
 95 | 
 96 |         w = mask_attn_weights(w)
 97 |         w = softmax(w)
 98 |         a = tf.matmul(w, v)
 99 |         return a
100 | 
101 |     with tf.variable_scope(scope):
102 |         c = conv1d(x, 'c_attn', n_state*3)
103 |         q, k, v = map(split_heads, tf.split(c, 3, axis=2))
104 |         present = tf.stack([k, v], axis=1)
105 |         if past is not None:
106 |             pk, pv = tf.unstack(past, axis=1)
107 |             k = tf.concat([pk, k], axis=-2)
108 |             v = tf.concat([pv, v], axis=-2)
109 |         a = multihead_attn(q, k, v)
110 |         a = merge_heads(a)
111 |         a = conv1d(a, 'c_proj', n_state)
112 |         return a, present
113 | 
114 | 
115 | def mlp(x, scope, n_state, *, hparams):
116 |     with tf.variable_scope(scope):
117 |         nx = x.shape[-1].value
118 |         h = gelu(conv1d(x, 'c_fc', n_state))
119 |         h2 = conv1d(h, 'c_proj', nx)
120 |         return h2
121 | 
122 | 
123 | def block(x, scope, *, past, hparams):
124 |     with tf.variable_scope(scope):
125 |         nx = x.shape[-1].value
126 |         a, present = attn(norm(x, 'ln_1'), 'attn', nx, past=past, hparams=hparams)
127 |         x = x + a
128 |         m = mlp(norm(x, 'ln_2'), 'mlp', nx*4, hparams=hparams)
129 |         x = x + m
130 |         return x, present
131 | 
132 | def past_shape(*, hparams, batch_size=None, sequence=None):
133 |     return [batch_size, hparams.n_layer, 2, hparams.n_head, sequence, hparams.n_embd // hparams.n_head]
134 | 
135 | def expand_tile(value, size):
136 |     """Add a new axis of given size."""
137 |     value = tf.convert_to_tensor(value, name='value')
138 |     ndims = value.shape.ndims
139 |     return tf.tile(tf.expand_dims(value, axis=0), [size] + [1]*ndims)
140 | 
141 | def positions_for(tokens, past_length):
142 |     batch_size = tf.shape(tokens)[0]
143 |     nsteps = tf.shape(tokens)[1]
144 |     return expand_tile(past_length + tf.range(nsteps), batch_size)
145 | 
146 | 
147 | def model(hparams, X, past=None, scope='model', reuse=False):
148 |     with tf.variable_scope(scope, reuse=reuse):
149 |         results = {}
150 |         batch, sequence = shape_list(X)
151 | 
152 |         wpe = tf.get_variable('wpe', [hparams.n_ctx, hparams.n_embd],
153 |                              initializer=tf.random_normal_initializer(stddev=0.01))
154 |         wte = tf.get_variable('wte', [hparams.n_vocab, hparams.n_embd],
155 |                              initializer=tf.random_normal_initializer(stddev=0.02))
156 |         past_length = 0 if past is None else tf.shape(past)[-2]
157 |         h = tf.gather(wte, X) + tf.gather(wpe, positions_for(X, past_length))
158 | 
159 |         # Transformer
160 |         presents = []
161 |         pasts = tf.unstack(past, axis=1) if past is not None else [None] * hparams.n_layer
162 |         assert len(pasts) == hparams.n_layer
163 |         for layer, past in enumerate(pasts):
164 |             h, present = block(h, 'h%d' % layer, past=past, hparams=hparams)
165 |             presents.append(present)
166 |         results['present'] = tf.stack(presents, axis=1)
167 |         h = norm(h, 'ln_f')
168 | 
169 |         # Language model loss.  Do tokens <n predict token n?
170 |         h_flat = tf.reshape(h, [batch*sequence, hparams.n_embd])
171 |         logits = tf.matmul(h_flat, wte, transpose_b=True)
172 |         logits = tf.reshape(logits, [batch, sequence, hparams.n_vocab])
173 |         results['logits'] = logits
174 |         return results
175 | 


--------------------------------------------------------------------------------
/domains.txt:
--------------------------------------------------------------------------------
   1 | 1542261 google
   2 | 596207 archive
   3 | 456344 blogspot
   4 | 414695 github
   5 | 333160 nytimes
   6 | 321622 wordpress
   7 | 315368 washingtonpost
   8 | 313137 wikia
   9 | 311917 bbc
  10 | 246303 theguardian
  11 | 210714 ebay
  12 | 209416 pastebin
  13 | 199360 cnn
  14 | 196124 yahoo
  15 | 186668 huffingtonpost
  16 | 186137 go
  17 | 183592 reuters
  18 | 183080 imdb
  19 | 160553 goo
  20 | 139965 nih
  21 | 135562 cbc
  22 | 128011 apple
  23 | 125615 medium
  24 | 118676 dailymail
  25 | 108012 steampowered
  26 | 106417 independent
  27 | 105239 etsy
  28 | 98941 craigslist
  29 | 93048 businessinsider
  30 | 92712 telegraph
  31 | 90262 wizards
  32 | 83266 usatoday
  33 | 80384 thehill
  34 | 79655 nhl
  35 | 79494 foxnews
  36 | 79167 taobao
  37 | 78070 bloomberg
  38 | 77515 npr
  39 | 77407 mlb
  40 | 77172 latimes
  41 | 75676 megalodon
  42 | 72525 espn
  43 | 72523 kickstarter
  44 | 71743 breitbart
  45 | 69334 abc
  46 | 68009 newegg
  47 | 67008 wwe
  48 | 66278 myanimelist
  49 | 65520 microsoft
  50 | 64723 buzzfeed
  51 | 63162 vice
  52 | 62911 indiatimes
  53 | 61845 forbes
  54 | 61772 tappedout
  55 | 60889 wsj
  56 | 60240 vid
  57 | 60239 battle
  58 | 59996 adf
  59 | 58706 politico
  60 | 58345 redditgifts
  61 | 56769 nexusmods
  62 | 56469 goodreads
  63 | 54866 magiccards
  64 | 53973 nbcnews
  65 | 53060 gamepedia
  66 | 52110 mediafire
  67 | 50567 time
  68 | 50144 cbsnews
  69 | 49203 ppy
  70 | 48442 gstatic
  71 | 48042 nfl
  72 | 47460 steamusercontent
  73 | 47046 thestar
  74 | 46603 bugguide
  75 | 46340 fanfiction
  76 | 45505 mturk
  77 | 45458 cbslocal
  78 | 44729 theglobeandmail
  79 | 44134 nydailynews
  80 | 42992 theatlantic
  81 | 42941 netflix
  82 | 42328 theverge
  83 | 41952 smh
  84 | 40694 nbcsports
  85 | 40613 cnbc
  86 | 40469 slate
  87 | 40071 ign
  88 | 39655 dotabuff
  89 | 38968 wired
  90 | 38779 chicagotribune
  91 | 38590 urbandictionary
  92 | 38575 rt
  93 | 38092 wuxiaworld
  94 | 38065 wowhead
  95 | 37954 wolframalpha
  96 | 37749 guardian
  97 | 37594 xboxdvr
  98 | 36841 nypost
  99 | 36741 ravelry
 100 | 36321 thedailybeast
 101 | 36298 nba
 102 | 36188 yelp
 103 | 36008 arstechnica
 104 | 35485 csgo
 105 | 35365 flic
 106 | 35269 stackexchange
 107 | 35124 vidble
 108 | 35024 googleusercontent
 109 | 34311 msn
 110 | 34121 gizmodo
 111 | 34120 boardgamegeek
 112 | 33867 aljazeera
 113 | 33598 rawstory
 114 | 33516 scryfall
 115 | 33467 bleacherreport
 116 | 33419 bit
 117 | 33395 thinkprogress
 118 | 33170 dailycaller
 119 | 32843 ap
 120 | 32433 fangraphs
 121 | 31742 salon
 122 | 31728 mirror
 123 | 31496 nintendo
 124 | 31294 nationalpost
 125 | 31278 nasa
 126 | 31110 oddshot
 127 | 31057 hltv
 128 | 30952 amzn
 129 | 30877 quora
 130 | 30586 engadget
 131 | 30397 stackoverflow
 132 | 30201 aliexpress
 133 | 29710 cnet
 134 | 28850 leagueoflegends
 135 | 28822 surveymonkey
 136 | 28704 ctvnews
 137 | 28650 walmart
 138 | 28644 plays
 139 | 28536 sfgate
 140 | 28375 cbssports
 141 | 28210 globo
 142 | 27992 discogs
 143 | 27630 wiktionary
 144 | 27588 ibb
 145 | 27544 stuff
 146 | 27349 nature
 147 | 27112 news
 148 | 27020 biblegateway
 149 | 26801 subtletv
 150 | 26427 change
 151 | 26355 zippyshare
 152 | 26311 guildwars2
 153 | 26231 vox
 154 | 26205 zkillboard
 155 | 26174 techcrunch
 156 | 25993 economist
 157 | 25964 globalnews
 158 | 25621 washingtontimes
 159 | 25610 hollywoodreporter
 160 | 25351 archiveofourown
 161 | 25336 ibtimes
 162 | 25257 newsweek
 163 | 25139 zerohedge
 164 | 25074 fav
 165 | 25050 sciencedirect
 166 | 24894 bestbuy
 167 | 24870 spiegel
 168 | 24869 247sports
 169 | 24866 smmry
 170 | 24764 xda-developers
 171 | 24726 tvtropes
 172 | 24698 phys
 173 | 24663 teamliquid
 174 | 24619 state
 175 | 23953 gleam
 176 | 23676 sbnation
 177 | 23644 asahi
 178 | 23620 foxsports
 179 | 23240 ndtv
 180 | 23189 si
 181 | 23183 alternet
 182 | 23009 redbubble
 183 | 22846 metro
 184 | 22845 theonion
 185 | 22835 playstation
 186 | 22808 washingtonexaminer
 187 | 22682 thehindu
 188 | 22557 espncricinfo
 189 | 22482 mozilla
 190 | 22219 op
 191 | 22038 t
 192 | 21984 nj
 193 | 21921 indianexpress
 194 | 21707 apnews
 195 | 21603 dw
 196 | 21422 nationalgeographic
 197 | 21399 pinterest
 198 | 21368 ft
 199 | 21319 wiley
 200 | 21254 about
 201 | 21074 skysports
 202 | 21033 gamespot
 203 | 21014 dailykos
 204 | 21009 goal
 205 | 20858 patheos
 206 | 20842 irishtimes
 207 | 20664 variety
 208 | 20592 kotaku
 209 | 20584 mashable
 210 | 20575 scientificamerican
 211 | 20448 basketball-reference
 212 | 20262 yle
 213 | 20218 theage
 214 | 20176 usnews
 215 | 20133 animenewsnetwork
 216 | 20092 livejournal
 217 | 20068
 218 | 20024 pbs
 219 | 19802 nhk
 220 | 19741 newyorker
 221 | 19727 seattletimes
 222 | 19672 mlssoccer
 223 | 19619 meetup
 224 | 19543 nzherald
 225 | 19509 philly
 226 | 19496 uol
 227 | 19470 patreon
 228 | 19429 wikileaks
 229 | 19400 gravitytales
 230 | 19294 oregonlive
 231 | 19267 xbox
 232 | 19216 linkedin
 233 | 19202 crunchyroll
 234 | 19045 target
 235 | 19021 ew
 236 | 18922 redditpoll
 237 | 18875 homedepot
 238 | 18867 qz
 239 | 18865 donmai
 240 | 18653 baseball-reference
 241 | 18646 talkingpointsmemo
 242 | 18576 pathofexile
 243 | 18536 makeameme
 244 | 18489 postimg
 245 | 18308 clyp
 246 | 18175 scribd
 247 | 18120 thegatewaypundit
 248 | 18097 removeddit
 249 | 18063 deadspin
 250 | 18049 sciencedaily
 251 | 18019 huffpost
 252 | 17987 dallasnews
 253 | 17956 europa
 254 | 17878 merriam-webster
 255 | 17816 haaretz
 256 | 17746 deadline
 257 | 17637 msnbc
 258 | 17579 hindustantimes
 259 | 17531 nymag
 260 | 17429 gph
 261 | 17208 typepad
 262 | 17204 express
 263 | 17098 naver
 264 | 17085 bizjournals
 265 | 17084 mlive
 266 | 16834 rollingstone
 267 | 16793 motherjones
 268 | 16704 okcupid
 269 | 16441 tinyurl
 270 | 16410 espnfc
 271 | 16397 bostonglobe
 272 | 16374 thingiverse
 273 | 16351 denverpost
 274 | 16332 bitcointalk
 275 | 16256 timesofisrael
 276 | 16209 xnxx
 277 | 16202 wikihow
 278 | 16051 neopets
 279 | 16043 indiegogo
 280 | 16033 al
 281 | 16032 chron
 282 | 16004 avclub
 283 | 15970 marketwatch
 284 | 15933 mercurynews
 285 | 15675 startribune
 286 | 15646 pro-football-reference
 287 | 15568 d20pfsrd
 288 | 15545 pcgamer
 289 | 15451 reason
 290 | 15422 uesp
 291 | 15356 lds
 292 | 15152 polygon
 293 | 15132 humblebundle
 294 | 14962 tradingview
 295 | 14931 baltimoresun
 296 | 14914 strava
 297 | 14912 firstpost
 298 | 14856 commondreams
 299 | 14801 sky
 300 | 14739 eventbrite
 301 | 14722 nicovideo
 302 | 14697 fortune
 303 | 14693 knowyourmeme
 304 | 14666 robertsspaceindustries
 305 | 14471 pitchfork
 306 | 14466 psychologytoday
 307 | 14435 combodeck
 308 | 14392 mixcloud
 309 | 14372 lemonde
 310 | 14290 sciencemag
 311 | 14060 jpost
 312 | 13926 miamiherald
 313 | 13902 patch
 314 | 13850 nationalreview
 315 | 13849 gofundme
 316 | 13798 thelocal
 317 | 13763 derpibooru
 318 | 13726 techdirt
 319 | 13658 townhall
 320 | 13596 mtg
 321 | 13588 gettyimages
 322 | 13530 mit
 323 | 13436 challonge
 324 | 13369 mediaite
 325 | 13357 tsn
 326 | 13350 pokemonshowdown
 327 | 13176 neogaf
 328 | 13130 publico
 329 | 13126 snopes
 330 | 13092 scmp
 331 | 13082 cleveland
 332 | 13044 thesun
 333 | 13025 mtggoldfish
 334 | 12994 freep
 335 | 12984 grailed
 336 | 12948 standard
 337 | 12923 theconversation
 338 | 12913 upi
 339 | 12870 bing
 340 | 12778 blockchain
 341 | 12774 people
 342 | 12771 arxiv
 343 | 12760 hearthpwn
 344 | 12668 reference
 345 | 12626 edhrec
 346 | 12611 sputniknews
 347 | 12551 nordstrom
 348 | 12550 lapresse
 349 | 12496 metacritic
 350 | 12447 last
 351 | 12395 ajc
 352 | 12355 mangadex
 353 | 12349 ycombinator
 354 | 12345 csmonitor
 355 | 12240 sportsnet
 356 | 12229 cornell
 357 | 12205 smithsonianmag
 358 | 12201 sephora
 359 | 12194 bulbagarden
 360 | 12181 japantimes
 361 | 12171 zdnet
 362 | 12152 comicbook
 363 | 12139 whitehouse
 364 | 12109 theregister
 365 | 12089 libsyn
 366 | 12052 asos
 367 | 12016 neatclip
 368 | 12001 imirhil
 369 | 12000 boston
 370 | 11973 behance
 371 | 11966 eveonline
 372 | 11954 androidpolice
 373 | 11935 livescience
 374 | 11843 instructables
 375 | 11817 hs
 376 | 11788 infowars
 377 | 11712 ca
 378 | 11704 runescape
 379 | 11699 suntimes
 380 | 11697 eurogamer
 381 | 11654 roblox
 382 | 11622 genius
 383 | 11602 stltoday
 384 | 11499 elpais
 385 | 11494 motorsport
 386 | 11461 ceddit
 387 | 11426 france24
 388 | 11373 bungie
 389 | 11371 youtubedoubler
 390 | 11362 openload
 391 | 11348 jstor
 392 | 11328 thefreedictionary
 393 | 11307 inquisitr
 394 | 11215 nhentai
 395 | 11204 zeit
 396 | 11198 ikea
 397 | 11114 springer
 398 | 11108 tripadvisor
 399 | 11082 thescore
 400 | 11036 kerbalspaceprogram
 401 | 11007 cdc
 402 | 10995 dailywire
 403 | 10965 gawker
 404 | 10953 a
 405 | 10950 brooksbaseball
 406 | 10940 dn
 407 | 10927 sltrib
 408 | 10867 brickset
 409 | 10823 dictionary
 410 | 10821 squarespace
 411 | 10819 battlefield
 412 | 10807 harvard
 413 | 10786 afpbb
 414 | 10734 steemit
 415 | 10730 billboard
 416 | 10707 tampabay
 417 | 10654 nola
 418 | 10621 stanford
 419 | 10602 sbs
 420 | 10524 cc
 421 | 10520 dailydot
 422 | 10510 straitstimes
 423 | 10493 itch
 424 | 10490 foreignpolicy
 425 | 10465 vancouversun
 426 | 10440 rottentomatoes
 427 | 10419 dnainfo
 428 | 10389 digi24
 429 | 10348 dropboxusercontent
 430 | 10332 complex
 431 | 10330 scp-wiki
 432 | 10327 prnt
 433 | 10313 ottawacitizen
 434 | 10304 anandtech
 435 | 10269 thenation
 436 | 10253 fivethirtyeight
 437 | 10244 newscientist
 438 | 10240 svt
 439 | 10240 inquirer
 440 | 10236 coindesk
 441 | 10227 codepen
 442 | 10208 lichess
 443 | 10204 sankei
 444 | 10189 ted
 445 | 10181 roosterteeth
 446 | 10170 livemint
 447 | 10161 teamfortress
 448 | 10141 sourceforge
 449 | 10119 sapo
 450 | 10113 countle
 451 | 10086 mtv
 452 | 10075 sacbee
 453 | 10066 fimfiction
 454 | 10057 hentai-foundry
 455 | 10054 gamesplanet
 456 | 10044 io9
 457 | 10032 lifehacker
 458 | 10007 cracked
 459 | 9991 mainichi
 460 | 9984 itmedia
 461 | 9966 warthunder
 462 | 9936 nos
 463 | 9935 boingboing
 464 | 9925 vulture
 465 | 9904 lanacion
 466 | 9892 qualtrics
 467 | 9884 muthead
 468 | 9856 jcrew
 469 | 9814 jsonline
 470 | 9787 spacebattles
 471 | 9748 worldstarhiphop
 472 | 9734 jalopnik
 473 | 9721 welt
 474 | 9717 curbed
 475 | 9708 dbr
 476 | 9705 mmafighting
 477 | 9697 bigcartel
 478 | 9682 transfermarkt
 479 | 9680 vlive
 480 | 9659 vanityfair
 481 | 9658 dawn
 482 | 9621 dnaindia
 483 | 9601 theblaze
 484 | 9599 allrecipes
 485 | 9576 thejournal
 486 | 9572 dailystar
 487 | 9521 minecraftforum
 488 | 9505 theweek
 489 | 9502 kansascity
 490 | 9494 anilist
 491 | 9443 gog
 492 | 9420 bato
 493 | 9401 oxforddictionaries
 494 | 9400 soompi
 495 | 9394 sagepub
 496 | 9389 wikiwand
 497 | 9382 lolking
 498 | 9322 torontosun
 499 | 9319 mangapanda
 500 | 9316 politifact
 501 | 9306 realclearpolitics
 502 | 9278 tagpro
 503 | 9261 webmd
 504 | 9206 app
 505 | 9202 hotnews
 506 | 9184 9news
 507 | 9174 bhphotovideo
 508 | 9147 giantbomb
 509 | 9132 gamestop
 510 | 9073 azcentral
 511 | 9053 noaa
 512 | 9040 repubblica
 513 | 9021 mangaupdates
 514 | 8998 space
 515 | 8998 researchgate
 516 | 8971 bitcoin
 517 | 8957 sueddeutsche
 518 | 8898 rightwingwatch
 519 | 8892 mediacru
 520 | 8890 afl
 521 | 8862 fasttech
 522 | 8858 tmz
 523 | 8841 orlandosentinel
 524 | 8832 tomshardware
 525 | 8828 altomfotball
 526 | 8822 mtgprice
 527 | 8821 haskell
 528 | 8816 discovery
 529 | 8810 destinytracker
 530 | 8808 massdrop
 531 | 8800 csgolounge
 532 | 8791 weather
 533 | 8778 daddyleagues
 534 | 8720 govtrack
 535 | 8678 mentalfloss
 536 | 8678 justice
 537 | 8663 frontier
 538 | 8655 youporn
 539 | 8641 paradoxplaza
 540 | 8640 rockstargames
 541 | 8632 derstandard
 542 | 8622 pinknews
 543 | 8619 macrumors
 544 | 8598 gamefaqs
 545 | 8587 thepiratebay
 546 | 8586 4chan
 547 | 8582 post-gazette
 548 | 8573 faz
 549 | 8563 e-hentai
 550 | 8530 jiji
 551 | 8525 quoracdn
 552 | 8519 fullmatchesandshows
 553 | 8516 sun-sentinel
 554 | 8513 xboxclips
 555 | 8488 financialpost
 556 | 8476 audible
 557 | 8439 investopedia
 558 | 8425 loc
 559 | 8418 venturebeat
 560 | 8414 amazonaws
 561 | 8368 ubi
 562 | 8345 etymonline
 563 | 8326 wsws
 564 | 8316 jezebel
 565 | 8300 americanthinker
 566 | 8284 wikidot
 567 | 8269 digitaltrends
 568 | 8260 nrk
 569 | 8232 weebly
 570 | 8228 thenextweb
 571 | 8225 snahp
 572 | 8223 gematsu
 573 | 8210 daum
 574 | 8206 ea
 575 | 8189 liverpoolecho
 576 | 8186 freebeacon
 577 | 8178 thetimes
 578 | 8168 naturalcrit
 579 | 8153 warframe
 580 | 8150 1drv
 581 | 8143 gap
 582 | 8131 seriouseats
 583 | 8119 myfigurecollection
 584 | 8109 gov
 585 | 8086 eporner
 586 | 8080 hulu
 587 | 8077 senate
 588 | 8046 esquire
 589 | 8015 gosugamers
 590 | 8000 radionz
 591 | 7997 eater
 592 | 7982 politicususa
 593 | 7978 rte
 594 | 7956 marvel
 595 | 7942 metronews
 596 | 7917 starcitygames
 597 | 7917 hotair
 598 | 7914 marca
 599 | 7872 eurekalert
 600 | 7840 screenrant
 601 | 7834 dota2
 602 | 7797 truth-out
 603 | 7784 dell
 604 | 7783 eldiario
 605 | 7782 pcworld
 606 | 7782 doi
 607 | 7780 comicbookresources
 608 | 7765 dr
 609 | 7729 howstuffworks
 610 | 7727 gocomics
 611 | 7715 worldoftanks
 612 | 7707 tandfonline
 613 | 7690 examiner
 614 | 7688 newrepublic
 615 | 7682 curseforge
 616 | 7680 findlaw
 617 | 7673 nikkei
 618 | 7665 heraldsun
 619 | 7652 podbean
 620 | 7645 aftonbladet
 621 | 7638 duckduckgo
 622 | 7633 ynetnews
 623 | 7629 timesofindia
 624 | 7628 freshphase
 625 | 7591 westeros
 626 | 7576 youjizz
 627 | 7574 spectator
 628 | 7548 justia
 629 | 7537 antiwar
 630 | 7536 mmajunkie
 631 | 7516 yomiuri
 632 | 7485 newstatesman
 633 | 7481 greenmangaming
 634 | 7475 joystiq
 635 | 7444 jsfiddle
 636 | 7424 anime-planet
 637 | 7415 counterpunch
 638 | 7410 autosport
 639 | 7395 archlinux
 640 | 7384 berkeley
 641 | 7383 smbc-comics
 642 | 7374 rockpapershotgun
 643 | 7372 pjmedia
 644 | 7367 estadao
 645 | 7365 intoday
 646 | 7361 newsmax
 647 | 7346 newsbusters
 648 | 7337 grantland
 649 | 7329 voanews
 650 | 7292 myshopify
 651 | 7286 wnd
 652 | 7265 9to5mac
 653 | 7257 hurriyetdailynews
 654 | 7229 bleedingcool
 655 | 7225 indiewire
 656 | 7222 radio-canada
 657 | 7216 viewsync
 658 | 7211 cambridge
 659 | 7204 drsd
 660 | 7197 house
 661 | 7185 uproxx
 662 | 7152 mlbtraderumors
 663 | 7145 gamasutra
 664 | 7134 bricklink
 665 | 7122 foodnetwork
 666 | 7122 presstv
 667 | 7119 opensecrets
 668 | 7118 canada
 669 | 7116 bgr
 670 | 7097 democracynow
 671 | 7091 businessweek
 672 | 7085 smash
 673 | 7080 usda
 674 | 7078 cloudfront
 675 | 7044 psu
 676 | 7028 detroitnews
 677 | 7028 explosm
 678 | 7013 woobox
 679 | 7011 football-italia
 680 | 7005 academia
 681 | 6948 channelnewsasia
 682 | 6927 siliconera
 683 | 6923 rei
 684 | 6917 deseretnews
 685 | 6916 supload
 686 | 6914 mises
 687 | 6905 rotoworld
 688 | 6886 gsmarena
 689 | 6878 rappler
 690 | 6876 kijiji
 691 | 6866 metal-archives
 692 | 6826 theaustralian
 693 | 6823 mediamatters
 694 | 6823 wa
 695 | 6818 bodybuilding
 696 | 6811 memedad
 697 | 6803 ucsd
 698 | 6802 barnesandnoble
 699 | 6791 india
 700 | 6780 readability
 701 | 6777 today
 702 | 6726 indystar
 703 | 6720 scotsman
 704 | 6694 impress
 705 | 6689 torrentfreak
 706 | 6675 heise
 707 | 6668 sportingnews
 708 | 6658 pnas
 709 | 6650 chzbgr
 710 | 6650 milb
 711 | 6631 business-standard
 712 | 6630 bustle
 713 | 6623 square-enix
 714 | 6622 madison
 715 | 6615 moddb
 716 | 6613 uniqlo
 717 | 6599 zillow
 718 | 6577 tribune
 719 | 6556 airliners
 720 | 6552 svd
 721 | 6547 gameinformer
 722 | 6536 brisbanetimes
 723 | 6536 ocregister
 724 | 6533 swtor
 725 | 6526 calgaryherald
 726 | 6521 c-span
 727 | 6518 slashdot
 728 | 6505 belfasttelegraph
 729 | 6499 hiyo
 730 | 6494 news24
 731 | 6484 theintercept
 732 | 6479 technologyreview
 733 | 6455 gutenberg
 734 | 6449 cinemablend
 735 | 6438 dailytelegraph
 736 | 6424 globalresearch
 737 | 6411 lefigaro
 738 | 6405 tenor
 739 | 6381 redstate
 740 | 6374 aclu
 741 | 6361 bloodyelbow
 742 | 6357 axios
 743 | 6353 thewrap
 744 | 6349 redditmetrics
 745 | 6345 evike
 746 | 6339 aol
 747 | 6327 ulta
 748 | 6326 plos
 749 | 6324 periscope
 750 | 6312 drivethrurpg
 751 | 6308 infobae
 752 | 6300 debian
 753 | 6298 congress
 754 | 6289 warcraftlogs
 755 | 6284 gothamist
 756 | 6281 mangastream
 757 | 6276 newgrounds
 758 | 6275 berniesanders
 759 | 6263 lolesports
 760 | 6262 mayoclinic
 761 | 6242 sfchronicle
 762 | 6235 edmontonjournal
 763 | 6200 dhgate
 764 | 6194 cincinnati
 765 | 6180 history
 766 | 6176 xtube
 767 | 6169 nike
 768 | 6160 kiji
 769 | 6147 tube8
 770 | 6140 vdare
 771 | 6133 unity3d
 772 | 6130 twincities
 773 | 6127 escapistmagazine
 774 | 6126 komonews
 775 | 6104 openneo
 776 | 6090 oup
 777 | 6082 dispatch
 778 | 6079 newsobserver
 779 | 6060 ballotpedia
 780 | 6058 indiegala
 781 | 6054 index
 782 | 6050 charlotteobserver
 783 | 6048 androidcentral
 784 | 6032 webtoons
 785 | 6028 tcgplayer
 786 | 6018 zappos
 787 | 6004 intel
 788 | 5998 seattlepi
 789 | 5996 profootballfocus
 790 | 5990 ksl
 791 | 5989 macleans
 792 | 5984 atlasobscura
 793 | 5981 yugiohprices
 794 | 5980 ubuntu
 795 | 5964 gq
 796 | 5952 myvidster
 797 | 5941 tv2
 798 | 5930 paizo
 799 | 5926 montrealgazette
 800 | 5919 al-monitor
 801 | 5919 herokuapp
 802 | 5918 volarenovels
 803 | 5909 usgs
 804 | 5906 nme
 805 | 5906 society6
 806 | 5905 vg247
 807 | 5902 popsci
 808 | 5895 lowes
 809 | 5893 thefederalist
 810 | 5878 amiami
 811 | 5862 nyti
 812 | 5848 steamdb
 813 | 5841 crooksandliars
 814 | 5833 popularmechanics
 815 | 5832 slashfilm
 816 | 5826 woot
 817 | 5818 ev
 818 | 5807 illinois
 819 | 5792 nps
 820 | 5791 destructoid
 821 | 5790 mysanantonio
 822 | 5772 sbtl
 823 | 5742 smashboards
 824 | 5700 biblehub
 825 | 5696 euronews
 826 | 5694 urbanoutfitters
 827 | 5687 itv
 828 | 5685 fastcompany
 829 | 5684 techpowerup
 830 | 5674 hearthhead
 831 | 5656 mic
 832 | 5649 autoblog
 833 | 5646 futbin
 834 | 5638 voat
 835 | 5636 statesman
 836 | 5626 zap2it
 837 | 5623 userbenchmark
 838 | 5623 legaliq
 839 | 5622 mspaintadventures
 840 | 5622 familysearch
 841 | 5616 themoscowtimes
 842 | 5606 theprovince
 843 | 5604 allkpop
 844 | 5594 Omegle
 845 | 5570 activistpost
 846 | 5565 thefreethoughtproject
 847 | 5565 in
 848 | 5559 sandiegouniontribune
 849 | 5556 consumerist
 850 | 5554 eff
 851 | 5532 lego
 852 | 5520 translationnations
 853 | 5515 clickhole
 854 | 5498 etherscan
 855 | 5491 live
 856 | 5486 vndb
 857 | 5484 poll-maker
 858 | 5481 mtgsalvation
 859 | 5481 computerworld
 860 | 5475 comicvine
 861 | 5470 python
 862 | 5469 digitalspy
 863 | 5468 citylab
 864 | 5458 expressen
 865 | 5455 oxfordjournals
 866 | 5451 collider
 867 | 5447 statista
 868 | 5437 apa
 869 | 5434 g
 870 | 5430 thenational
 871 | 5430 eslgaming
 872 | 5425 politiken
 873 | 5421 ktla
 874 | 5420 webmshare
 875 | 5408 bostonherald
 876 | 5407 comixology
 877 | 5400 ustream
 878 | 5399 sony
 879 | 5396 tennessean
 880 | 5377 scout
 881 | 5374 drop
 882 | 5372 ieee
 883 | 5359 sverigesradio
 884 | 5356 sherdog
 885 | 5353 viooz
 886 | 5353 marxists
 887 | 5353 adobe
 888 | 5349 myfitnesspal
 889 | 5342 seahawks
 890 | 5339 rferl
 891 | 5338 thediplomat
 892 | 5335 storeparser
 893 | 5332 prnewswire
 894 | 5330 midwayusa
 895 | 5327 liverpoolfc
 896 | 5326 cisco
 897 | 5326 windowsphone
 898 | 5323 toysrus
 899 | 5321 archivesofnethys
 900 | 5317 eluniversal
 901 | 5309 gmanetwork
 902 | 5303 asus
 903 | 5297 android
 904 | 5297 finalfantasyxiv
 905 | 5296 cyclingnews
 906 | 5293 worldbank
 907 | 5288 boxingscene
 908 | 5285 ticketmaster
 909 | 5279 grooveshark
 910 | 5277 khl
 911 | 5276 gallup
 912 | 5268 britannica
 913 | 5263 abc7
 914 | 5260 penny-arcade
 915 | 5257 hsreplay
 916 | 5257 oculus
 917 | 5256 bt
 918 | 5250 theroot
 919 | 5246 makeagif
 920 | 5246 cnsnews
 921 | 5243 nbc
 922 | 5243 rbc
 923 | 5243 fextralife
 924 | 5234 legislation
 925 | 5225 sendvid
 926 | 5221 sciencealert
 927 | 5214 wbur
 928 | 5212 myfonts
 929 | 5207 picsarus
 930 | 5206 phoronix
 931 | 5204 nerdist
 932 | 5203 eonline
 933 | 5195 advocate
 934 | 5191 king5
 935 | 5189 xkcd
 936 | 5183 kitsu
 937 | 5182 weibo
 938 | 5181 mangareader
 939 | 5178 palmbeachpost
 940 | 5176 go1dfish
 941 | 5175 livestrong
 942 | 5174 truthdig
 943 | 5173 lgbtqnation
 944 | 5172 nikkansports
 945 | 5167 slickdeals
 946 | 5166 streamja
 947 | 5164 irs
 948 | 5158 readms
 949 | 5152 microcenter
 950 | 5137 telesurtv
 951 | 5135 lastwordonsports
 952 | 5129 alarabiya
 953 | 5117 cointelegraph
 954 | 5114 iltalehti
 955 | 5112 fc2
 956 | 5108 wral
 957 | 5108 thinkgeek
 958 | 5102 bitbucket
 959 | 5101 letterboxd
 960 | 5098 ehow
 961 | 5092 abc13
 962 | 5083 beeradvocate
 963 | 5077 umich
 964 | 5067 macys
 965 | 5064 factorio
 966 | 5063 comicbookmovie
 967 | 5042 telegram
 968 | 5039 scroll
 969 | 5034 setlist
 970 | 5028 dailyherald
 971 | 5019 games-workshop
 972 | 5015 irishexaminer
 973 | 5008 fbi
 974 | 5007 heraldscotland
 975 | 5001 jellyneo
 976 | 4999 yale
 977 | 4996 cbr
 978 | 4994 masslive
 979 | 4984 thestranger
 980 | 4982 bundlestars
 981 | 4981 alibaba
 982 | 4977 filedropper
 983 | 4974 monoprice
 984 | 4968 forward
 985 | 4964 parliament
 986 | 4960 theringer
 987 | 4950 hobbyking
 988 | 4950 manchestereveningnews
 989 | 4949 bmj
 990 | 4948 thewire
 991 | 4947 ff2ebook
 992 | 4938 ashemaletube
 993 | 4937 Twitch
 994 | 4933 sketchtoy
 995 | 4932 mcclatchydc
 996 | 4931 memory-alpha
 997 | 4925 newsok
 998 | 4911 desmoinesregister
 999 | 4901 puzzledragonx
1000 | 4889 memecrunch
1001 | 


--------------------------------------------------------------------------------