├── scripts ├── pdfToRawText.py ├── metadata.py ├── sqliteImport.sql ├── bodyText.py ├── emailsNoId.py └── outputCsvs.py ├── .gitignore ├── LICENSE ├── Makefile ├── README.md └── versionedInput └── alias_person.csv /scripts/pdfToRawText.py: -------------------------------------------------------------------------------- 1 | import os 2 | from subprocess import call 3 | 4 | for subdir, dirs, files in os.walk("working/pdfs"): 5 | if subdir=="working/pdfs": 6 | continue 7 | newdir = os.path.join("working/rawText", os.path.split(subdir)[1]) 8 | if not os.path.exists(newdir): 9 | call(["mkdir", newdir]) 10 | for filename in files: 11 | filepath = os.path.join(subdir, filename) 12 | if not filepath.endswith(".pdf"): 13 | raise Exception("Unexpected file path: %s" % os.path.join(subdir, filename)) 14 | call(["pdftotext", 15 | "-raw", 16 | os.path.join(subdir, filename), 17 | os.path.join(newdir, os.path.splitext(filename)[0]+".txt")]) 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore input data files 2 | input 3 | working 4 | output 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *,cover 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | 58 | # Sphinx documentation 59 | docs/_build/ 60 | 61 | # PyBuilder 62 | target/ 63 | -------------------------------------------------------------------------------- /scripts/metadata.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import re 4 | import requests 5 | 6 | response = requests.get("https://foia.state.gov/searchapp/Search/SubmitSimpleQuery", 7 | params = {"searchText": "*", 8 | "beginDate": "false", 9 | "endDate": "false", 10 | "collectionMatch": "Clinton_Email", 11 | "postedBeginDate": "false", 12 | "postedEndDate": "false", 13 | "caseNumber": "false", 14 | "page": 1, 15 | "start": 0, 16 | "limit": 100000}, 17 | verify=False) 18 | 19 | return_json = re.sub(r'new Date\(([0-9]{1,})\)',r'\1',response.text) 20 | return_json = re.sub(r'new ?Date\((-[0-9]{1,})\)',r'null',return_json) 21 | 22 | data = json.loads(return_json) 23 | header = list(data["Results"][0].keys()) 24 | print(header) 25 | 26 | f = open("input/metadata.csv", "w") 27 | writer = csv.writer(f) 28 | writer.writerow(header) 29 | 30 | for row in data["Results"]: 31 | writer.writerow([row[col] for col in header]) 32 | 33 | f.close() 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This project is released under the MIT License (copied below). 2 | 3 | It includes data from the [WSJ's Clinton Email Cruncher](https://github.com/wsjdata/clinton-email-cruncher), which is also MIT Licensed. 4 | 5 | The MIT License (MIT) 6 | ===================== 7 | 8 | Copyright (c) 2015 Ben Hamner, Dow Jones, other contributors 9 | 10 | Permission is hereby granted, free of charge, to any person obtaining a copy 11 | of this software and associated documentation files (the "Software"), to deal 12 | in the Software without restriction, including without limitation the rights 13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | copies of the Software, and to permit persons to whom the Software is 15 | furnished to do so, subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in all 18 | copies or substantial portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | SOFTWARE. 27 | 28 | -------------------------------------------------------------------------------- /scripts/sqliteImport.sql: -------------------------------------------------------------------------------- 1 | .separator "," 2 | 3 | CREATE TABLE Emails ( 4 | Id INTEGER PRIMARY KEY, 5 | DocNumber TEXT, 6 | MetadataSubject TEXT, 7 | MetadataTo TEXT, 8 | MetadataFrom TEXT, 9 | SenderPersonId INTEGER, 10 | MetadataDateSent TEXT, 11 | MetadataDateReleased TEXT, 12 | MetadataPdfLink TEXT, 13 | MetadataCaseNumber TEXT, 14 | MetadataDocumentClass TEXT, 15 | ExtractedSubject TEXT, 16 | ExtractedTo TEXT, 17 | ExtractedFrom TEXT, 18 | ExtractedCc TEXT, 19 | ExtractedDateSent TEXT, 20 | ExtractedCaseNumber TEXT, 21 | ExtractedDocNumber TEXT, 22 | ExtractedDateReleased TEXT, 23 | ExtractedReleaseInPartOrFull TEXT, 24 | ExtractedBodyText TEXT, 25 | RawText TEXT); 26 | 27 | CREATE TABLE Persons ( 28 | Id INTEGER PRIMARY KEY, 29 | Name TEXT); 30 | 31 | CREATE TABLE Aliases ( 32 | Id INTEGER PRIMARY KEY, 33 | Alias TEXT, 34 | PersonId INTEGER); 35 | 36 | CREATE TABLE EmailReceivers ( 37 | Id INTEGER PRIMARY KEY, 38 | EmailId INTEGER, 39 | PersonId INTEGER); 40 | 41 | .import "working/noHeader/Emails.csv" Emails 42 | .import "working/noHeader/Persons.csv" Persons 43 | .import "working/noHeader/Aliases.csv" Aliases 44 | .import "working/noHeader/EmailReceivers.csv" EmailReceivers 45 | 46 | CREATE INDEX emails_senderpersonid_ix ON Emails (SenderPersonId); 47 | CREATE INDEX emails_docnumber_ix ON Emails (DocNumber); 48 | 49 | CREATE INDEX aliases_personid_ix ON Aliases (PersonId); 50 | 51 | CREATE INDEX emailreceivers_emailid_ix ON EmailReceivers (EmailId); 52 | CREATE INDEX emailreceivers_personid_ix ON EmailReceivers (PersonId); 53 | -------------------------------------------------------------------------------- /scripts/bodyText.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from subprocess import call 4 | 5 | def filter_body(text): 6 | patterns = [r"\x0c", 7 | r"(\n|^)UNCLASSIFIED.*", 8 | r"(\n|^)CONFIDENTIAL.*", 9 | r"(\n|^)Classified by.*", 10 | r"(\n|^)Attachments:.*"] 11 | for repeat in range(3): 12 | for pattern in patterns: 13 | text = re.sub(pattern, "", text) 14 | return text.strip() 15 | 16 | def extract_body(raw_text): 17 | m = re.search(r"\nSubject.*?\n(.*?)(Original Message|From:)", raw_text, re.DOTALL) 18 | if m: 19 | return filter_body(m.groups()[0]) 20 | m = re.search(r"\nSubject.*?\n(.+)", raw_text, re.DOTALL) 21 | if m: 22 | return filter_body(m.groups()[0]) 23 | m = re.search(r"\nTo:.*?\n(.*?)(Original Message|From:)", raw_text, re.DOTALL) 24 | if m: 25 | return filter_body(m.groups()[0]) 26 | m = re.search(r"\nTo:.*?\n(.+)", raw_text, re.DOTALL) 27 | if m: 28 | return filter_body(m.groups()[0]) 29 | return "" 30 | 31 | for subdir, dirs, files in os.walk("working/rawText"): 32 | if subdir=="working/rawText": 33 | continue 34 | newdir = os.path.join("working/bodyText", os.path.split(subdir)[1]) 35 | if not os.path.exists(newdir): 36 | call(["mkdir", "-p", newdir]) 37 | for filename in files: 38 | input_file = os.path.join(subdir, filename) 39 | output_file = os.path.join(newdir, filename) 40 | if not input_file.endswith(".txt"): 41 | raise Exception("Unexpected file path: %s" % os.path.join(subdir, filename)) 42 | raw_text = open(input_file).read() 43 | f = open(output_file, "w") 44 | f.write(extract_body(raw_text)) 45 | f.close() 46 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | input/HRC_Email_296.zip: 3 | mkdir -p input 4 | curl http://graphics.wsj.com/hillary-clinton-email-documents/zips/HRC_Email_296.zip -o input/HRC_Email_296.zip 5 | input/HRCEmail_JuneWeb.zip: 6 | mkdir -p input 7 | curl http://graphics.wsj.com/hillary-clinton-email-documents/zips/HRCEmail_JuneWeb.zip -o input/HRCEmail_JuneWeb.zip 8 | input/HRCEmail_JulyWeb.zip: 9 | mkdir -p input 10 | curl http://graphics.wsj.com/hillary-clinton-email-documents/zips/HRCEmail_JulyWeb.zip -o input/HRCEmail_JulyWeb.zip 11 | input/Clinton_Email_August_Release.zip: 12 | mkdir -p input 13 | curl http://graphics.wsj.com/hillary-clinton-email-documents/zips/Clinton_Email_August_Release.zip -o input/Clinton_Email_August_Release.zip 14 | INPUT_FILES=input/HRC_Email_296.zip input/HRCEmail_JuneWeb.zip input/HRCEmail_JulyWeb.zip input/Clinton_Email_August_Release.zip 15 | input/metadata.csv: 16 | mkdir -p input 17 | python scripts/metadata.py 18 | input: $(INPUT_FILES) input/metadata.csv 19 | 20 | working/pdfs/.sentinel: $(INPUT_FILES) 21 | mkdir -p working/pdfs 22 | unzip input/HRC_Email_296.zip -d working/pdfs/may 23 | unzip input/HRCEmail_JuneWeb.zip -d working/pdfs/june 24 | unzip input/HRCEmail_JulyWeb.zip -d working/pdfs/july 25 | unzip input/Clinton_Email_August_Release.zip -d working/pdfs/august 26 | touch working/pdfs/.sentinel 27 | unzip: working/pdfs/.sentinel 28 | 29 | working/rawText/.sentinel: working/pdfs/.sentinel 30 | mkdir -p working/rawText 31 | python scripts/pdfToRawText.py 32 | touch working/rawText/.sentinel 33 | 34 | working/bodyText/.sentinel: working/rawText/.sentinel 35 | mkdir -p working/bodyText 36 | python scripts/bodyText.py 37 | touch working/bodyText/.sentinel 38 | text: working/bodyText/.sentinel 39 | 40 | input/emailsNoId.csv: working/rawText/.sentinel working/bodyText/.sentinel input/metadata.csv 41 | python scripts/emailsNoId.py 42 | 43 | output/Emails.csv: input/emailsNoId.csv 44 | mkdir -p output 45 | python scripts/outputCsvs.py 46 | output/Persons.csv: output/Emails.csv 47 | output/Aliases.csv: output/Emails.csv 48 | output/EmailReceivers.csv: output/Emails.csv 49 | csv: output/Emails.csv output/Persons.csv output/Aliases.csv output/EmailReceivers.csv 50 | 51 | working/noHeader/Emails.csv: output/Emails.csv 52 | mkdir -p working/noHeader 53 | tail +2 $^ > $@ 54 | 55 | working/noHeader/Persons.csv: output/Persons.csv 56 | mkdir -p working/noHeader 57 | tail +2 $^ > $@ 58 | 59 | working/noHeader/Aliases.csv: output/Aliases.csv 60 | mkdir -p working/noHeader 61 | tail +2 $^ > $@ 62 | 63 | working/noHeader/EmailReceivers.csv: output/EmailReceivers.csv 64 | mkdir -p working/noHeader 65 | tail +2 $^ > $@ 66 | 67 | output/database.sqlite: working/noHeader/Emails.csv working/noHeader/Persons.csv working/noHeader/Aliases.csv working/noHeader/EmailReceivers.csv 68 | -rm output/database.sqlite 69 | sqlite3 -echo $@ < scripts/sqliteImport.sql 70 | 71 | output/hashes.txt: output/database.sqlite 72 | -rm output/hashes.txt 73 | echo "Current git commit:" >> output/hashes.txt 74 | git rev-parse HEAD >> output/hashes.txt 75 | echo "\nCurrent ouput md5 hashes:" >> output/hashes.txt 76 | md5 output/*.csv >> output/hashes.txt 77 | md5 output/*.sqlite >> output/hashes.txt 78 | hashes: output/hashes.txt 79 | 80 | sqlite: output/database.sqlite 81 | 82 | release: output/database.sqlite output/hashes.txt 83 | zip -r -X output/release-`date -u +'%Y-%m-%d-%H-%M-%S'` output/* 84 | 85 | all: csv sqlite hashes 86 | 87 | clean: 88 | rm -rf working 89 | rm -rf output 90 | -------------------------------------------------------------------------------- /scripts/emailsNoId.py: -------------------------------------------------------------------------------- 1 | import arrow 2 | import csv 3 | import os 4 | import numpy as np 5 | import pandas as pd 6 | import re 7 | from subprocess import call 8 | 9 | def extract_release_type(raw_text): 10 | if re.search(r"RELEASE\s+IN\s+PART", raw_text): 11 | return "RELEASE IN PART" 12 | if re.search(r"RELEASE\s+IN\s+FULL", raw_text): 13 | return "RELEASE IN FULL" 14 | return "UNKNOWN" 15 | 16 | def extract_field(regex, raw_text): 17 | m=re.search(regex, raw_text) 18 | if m: 19 | return m.groups()[0].strip() 20 | return "" 21 | 22 | def metadata_timestamp_to_string(timestamp): 23 | if np.isnan(timestamp): 24 | return "" 25 | return str(arrow.get(timestamp/1000)) 26 | 27 | metadata = pd.read_csv("input/metadata.csv") 28 | metadata["DocNumber"] = [os.path.splitext(os.path.split(pdf_link)[1])[0] for pdf_link in metadata["pdfLink"]] 29 | 30 | f = open("input/emailsNoId.csv", "w") 31 | writer = csv.writer(f) 32 | writer.writerow(["DocNumber", 33 | "MetadataSubject", 34 | "MetadataTo", 35 | "MetadataFrom", 36 | "MetadataDateSent", 37 | "MetadataDateReleased", 38 | "MetadataPdfLink", 39 | "MetadataCaseNumber", 40 | "MetadataDocumentClass", 41 | "ExtractedSubject", 42 | "ExtractedTo", 43 | "ExtractedFrom", 44 | "ExtractedCc", 45 | "ExtractedDateSent", 46 | "ExtractedCaseNumber", 47 | "ExtractedDocNumber", 48 | "ExtractedDateReleased", 49 | "ExtractedReleaseInPartOrFull", 50 | "ExtractedBodyText", 51 | "RawText"]) 52 | 53 | for subdir, dirs, files in sorted(os.walk("working/rawText")): 54 | if subdir=="working/rawText": 55 | continue 56 | for filename in sorted(files): 57 | doc_number = os.path.splitext(filename)[0] 58 | locs = np.where(metadata["DocNumber"]==doc_number)[0] 59 | if len(locs) != 1: 60 | raise Exception("There isn't exactly one matching filename for %s: %s" % (filename, locs)) 61 | loc = locs[0] 62 | filepath = os.path.join(subdir, filename) 63 | raw_text = open(filepath).read() 64 | body_text = open(os.path.join("working/bodyText", os.path.split(subdir)[1], filename)).read() 65 | writer.writerow([doc_number, 66 | metadata["subject"][loc], 67 | metadata["to"][loc], 68 | metadata["from"][loc], 69 | metadata_timestamp_to_string(metadata["docDate"][loc]), 70 | metadata_timestamp_to_string(metadata["postedDate"][loc]), 71 | metadata["pdfLink"][loc], 72 | metadata["caseNumber"][loc], 73 | metadata["documentClass"][loc], 74 | extract_field(r"Subject:(.*?)\n", raw_text), 75 | extract_field(r"To:(.*?)\n", raw_text), 76 | extract_field(r"From:(.*?)\n", raw_text), 77 | extract_field(r"Cc:(.*?)\n", raw_text), 78 | extract_field(r"Sent:(.*?)\n", raw_text), 79 | extract_field(r"Case No. (.+?-\d+-\d+)", raw_text), 80 | extract_field(r"Doc No. (.\d+)", raw_text), 81 | extract_field(r"Date: (\d\d/\d\d/\d\d\d\d)", raw_text), 82 | extract_release_type(raw_text), 83 | body_text, 84 | raw_text]) 85 | 86 | f.close() 87 | -------------------------------------------------------------------------------- /scripts/outputCsvs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def normalize_address(raw_address): 5 | for c in ["'", ",", "°", "•", "`", '"', "‘", "-"]: 6 | raw_address = raw_address.replace(c, "") 7 | raw_address = raw_address.lower() 8 | if "<" in raw_address: 9 | prefix = raw_address[:raw_address.index("<")].strip() 10 | if prefix: 11 | return prefix 12 | return raw_address.strip() 13 | 14 | emails = pd.read_csv("input/emailsNoId.csv") 15 | emails["MetadataTo"].replace(np.nan, "", inplace=True) 16 | emails["ExtractedTo"].replace(np.nan, "", inplace=True) 17 | emails["MetadataFrom"].replace(np.nan, "", inplace=True) 18 | emails["ExtractedFrom"].replace(np.nan, "", inplace=True) 19 | emails.sort(columns=["DocNumber"], inplace=True) 20 | emails.insert(0, "Id", list(range(1, len(emails)+1))) 21 | emails.insert(5, "SenderPersonId", np.nan) 22 | 23 | alias_person = pd.read_csv("versionedInput/alias_person.csv") 24 | alias_person["AliasName"] = [normalize_address(alias) for alias in alias_person["AliasName"]] 25 | 26 | persons = pd.DataFrame(columns=["Id", "Name"]) 27 | aliases = pd.DataFrame(columns=["Id", "Alias", "PersonId"]) 28 | email_receivers = pd.DataFrame(columns=["Id", "EmailId", "PersonId"]).astype(int) 29 | 30 | def add_alias(aliases, persons, alias_name, person_name): 31 | if len(np.where(aliases["Alias"]==alias_name)[0])>0: 32 | return 33 | locs = np.where(persons["Name"]==person_name)[0] 34 | if len(locs)>0: 35 | person_id = persons["Id"][locs[0]] 36 | else: 37 | person_id = len(persons)+1 38 | persons.loc[person_id-1] = [person_id, person_name] 39 | alias_id = len(aliases)+1 40 | aliases.loc[alias_id-1] = [alias_id, alias_name.lower(), person_id] 41 | 42 | for (i, alias_person) in alias_person.iterrows(): 43 | add_alias(aliases, persons, alias_person["AliasName"], alias_person["PersonName"]) 44 | 45 | log = open("working/outputCsvsLog.txt", "w") 46 | 47 | for (i, email) in emails.iterrows(): 48 | from_person_id = None 49 | from_address = normalize_address(email["MetadataFrom"].split(";")[0]) 50 | if from_address != "": 51 | locs = np.where(aliases["Alias"]==from_address)[0] 52 | if len(locs)==0: 53 | add_alias(aliases, persons, from_address, from_address) 54 | log.write("Added From Person: %s\n" % from_address) 55 | loc = np.where(aliases["Alias"]==from_address)[0][0] 56 | from_person_id = aliases["PersonId"][loc] 57 | from_person_name = persons["Name"][from_person_id-1] 58 | emails.loc[i, "SenderPersonId"] = from_person_id 59 | if email["ExtractedFrom"] != "": 60 | add_alias(aliases, persons, normalize_address(email["ExtractedFrom"]), from_person_name) 61 | to_addresses = email["MetadataTo"].split(";") + email["ExtractedTo"].split(";") 62 | to_addresses = sorted(list(set([normalize_address(x) for x in to_addresses]))) 63 | if "" in to_addresses: 64 | to_addresses.remove("") 65 | for to_address in to_addresses: 66 | locs = np.where(aliases["Alias"]==to_address)[0] 67 | if len(locs)==0: 68 | add_alias(aliases, persons, to_address, to_address) 69 | log.write("Added To Person: %s\n" % to_address) 70 | loc = np.where(aliases["Alias"]==to_address)[0][0] 71 | # don't add a receiver if they were also the sender 72 | if from_person_id != aliases["PersonId"][loc]: 73 | email_receivers.loc[len(email_receivers)] = [len(email_receivers)+1, email["Id"], aliases["PersonId"][loc]] 74 | 75 | persons.to_csv("output/Persons.csv", index=False) 76 | aliases.to_csv("output/Aliases.csv", index=False) 77 | emails.to_csv("output/Emails.csv", index=False, float_format="%0.0f") 78 | email_receivers.to_csv("output/EmailReceivers.csv", index=False, float_format="%0.0f") 79 | 80 | log.close() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hillary-clinton-emails 2 | 3 | *This is a work in progress - any help normalizing and extracting this data's much appreciated!* 4 | 5 | This repo contains code to transform [Hillary Clinton's emails released through the FOIA request](https://foia.state.gov/Search/Results.aspx?collection=Clinton_Email) from raw PDF documents to CSV files and a SQLite database, making it easier to understand and analyze the documents. 6 | 7 | **[A zip of the extracted data is available for download on Kaggle](https://www.kaggle.com/c/hillary-clinton-emails/data)**. 8 | 9 | Check out some analytics on this data on **[Kaggle Scripts](https://www.kaggle.com/c/hillary-clinton-emails/scripts)**. 10 | 11 | Note that conversion is very imprecise: there's plenty of room to improve the PDF conversion, the sender/receiver extraction, and the body text extraction. 12 | 13 | # Extracted data 14 | 15 | There are five main output files this produces: four CSV files and one SQLite database. 16 | 17 | Note that each table contains a numeric `Id` column. This `Id` column is only meant to be used to join the tables: it is internally consistent, but each entity may have a different `Id` when the data's updated. 18 | 19 | ## Emails.csv 20 | 21 | This file currently contains the following fields: 22 | 23 | - **Id** - unique identifier for internal reference 24 | - **DocNumber** - FOIA document number 25 | - **MetadataSubject** - Email SUBJECT field (from the FOIA metadata) 26 | - **MetadataTo** - Email TO field (from the FOIA metadata) 27 | - **MetadataFrom** - Email FROM field (from the FOIA metadata) 28 | - **SenderPersonId** - PersonId of the email sender (linking to Persons table) 29 | - **MetadataDateSent** - Date the email was sent (from the FOIA metadata) 30 | - **MetadataDateReleased** - Date the email was released (from the FOIA metadata) 31 | - **MetadataPdfLink** - Link to the original PDF document (from the FOIA metadata) 32 | - **MetadataCaseNumber** - Case number (from the FOIA metadata) 33 | - **MetadataDocumentClass** - Document class (from the FOIA metadata) 34 | - **ExtractedSubject** - Email SUBJECT field (extracted from the PDF) 35 | - **ExtractedTo** - Email TO field (extracted from the PDF) 36 | - **ExtractedFrom** - Email FROM field (extracted from the PDF) 37 | - **ExtractedCc** - Email CC field (extracted from the PDF) 38 | - **ExtractedDateSent** - Date the email was sent (extracted from the PDF) 39 | - **ExtractedCaseNumber** - Case number (extracted from the PDF) 40 | - **ExtractedDocNumber** - Doc number (extracted from the PDF) 41 | - **ExtractedDateReleased** - Date the email was released (extracted from the PDF) 42 | - **ExtractedReleaseInPartOrFull** - Whether the email was partially censored (extracted from the PDF) 43 | - **ExtractedBodyText** - Attempt to only pull out the text in the body that the email sender wrote (extracted from the PDF) 44 | - **RawText** - Raw email text (extracted from the PDF) 45 | 46 | ## Persons.csv 47 | 48 | - **Id** - unique identifier for internal reference 49 | - **Name** - person's name 50 | 51 | ## Aliases.csv 52 | 53 | - **Id** - unique identifier for internal reference 54 | - **Alias** - text in the From/To email fields that refers to the person 55 | - **PersonId** - person that the alias refers to 56 | 57 | ## EmailReceivers.csv 58 | 59 | - **Id** - unique identifier for internal reference 60 | - **EmailId** - Id of the email 61 | - **PersonId** - Id of the person that received the email 62 | 63 | ## database.sqlite 64 | 65 | This SQLite database contains all of the above tables (Emails, Persons, Aliases, and EmailReceivers) with their corresponding fields. You can see the schema and ingest code under [scripts/sqlImport.sql](https://github.com/benhamner/hillary-clinton-emails/blob/master/scripts/sqliteImport.sql) 66 | 67 | # Contributing: next steps 68 | 69 | - Improve the From/To address extraction mechanisms 70 | - Normalize various email address representations to people 71 | - Improve the BodyText extraction 72 | 73 | # Running the download and extraction code 74 | 75 | Running `make all` in the root directory will download the data (~162mb total) and create the output files, assuming you have all the requirements installed. 76 | 77 | # Requirements 78 | 79 | *This has only been tested on OS X, it may or may not work on other operating systems.* 80 | 81 | - python3 82 | - pandas 83 | - arrow 84 | - numpy 85 | - pdftotext (utility to transform a PDF document to text) 86 | - GNU make 87 | - sqlite3 88 | 89 | # References 90 | 91 | The source PDF documents for this repo were downlaoded from the [WSJ Clinton Inbox search](http://graphics.wsj.com/hillary-clinton-email-documents/). 92 | 93 | I created this project before I realized the WSJ also open-sourced some code they used to create the Inbox Search. Subsequently, I've included some material from their open source project as well: I used their [HRCEMAIL_names.csv](https://raw.githubusercontent.com/wsjdata/clinton-email-cruncher/d8dc1916465b90e4147460f9e432cf9cafc8d3b5/HRCEMAIL_names.csv) to seed [alias_person.csv](https://github.com/benhamner/hillary-clinton-emails/blob/master/versionedInput/alias_person.csv). I also scraped metadata from foia.state.gov in a similar fashion as they did in [downloadMetadata.py](https://github.com/wsjdata/clinton-email-cruncher/blob/master/downloadMetadata.py). -------------------------------------------------------------------------------- /versionedInput/alias_person.csv: -------------------------------------------------------------------------------- 1 | AliasName,PersonName 2 | "111th Congress","111th Congress" 3 | "AGNA USEMB Kabul Afghanistan","AGNA USEMB Kabul Afghanistan" 4 | AP,AP 5 | ASUNCION,ASUNCION 6 | Alec,Alec 7 | "Dupuy, Alex","Alex Dupuy" 8 | "American Beverage Association","American Beverage Association" 9 | "Mayock, Andrew","Andrew Mayock" 10 | "Shapiro, Andrew J","Andrew Shapiro" 11 | shapiroa@state.gov,"Andrew Shapiro" 12 | "Slaughter, Ann-Marie","Anne-Marie Slaughter" 13 | "Slaughter, Anne- Marie","Anne-Marie Slaughter" 14 | "Slaughter, Anne-Marie","Anne-Marie Slaughter" 15 | slaughtera@state.gov,"Anne-Marie Slaughter" 16 | "Lake, Anthony","Anthony Lake" 17 | "Valenzuela, Arturo A","Arturo Valenzuela" 18 | valenzuelaaa@state.gov,"Arturo Valenzuela" 19 | "Ki-moon, Ban","Ban Ki-moon" 20 | "Obama, Barack","Barack Obama" 21 | President,"Barack Obama" 22 | BAM@Mikulski.senate.gov,"Barbara Mikulski" 23 | "Mikulski, BAM","Barbara Mikulski" 24 | "Mikulski, BAM (Mikulski)","Barbara Mikulski" 25 | "Mikulski, Bam (Mitkulski)","Barbara Mikulski" 26 | "Mikulski,BAM (Mikulski)","Barbara Mikulski" 27 | Betsy.Ebeling,"Betsy Ebeling" 28 | "Ebeling, Betsy","Betsy Ebeling" 29 | betsyebeling,"Betsy Ebeling" 30 | betsyebeling1050,"Betsy Ebeling" 31 | "Clinton, William J","Bill Clinton" 32 | Dad,"Bill Clinton" 33 | Biography,Biography 34 | "Klehr, Bonnie","Bonnie Klehr" 35 | brian,"Brian Greenspun" 36 | BStrider,"Burns Strider" 37 | "Strider, Burns","Burns Strider" 38 | "Capricia Marshall","Capricia Marshall" 39 | "Marshall, Capricia","Capricia Marshall" 40 | "Marshall, Capricia P","Capricia Marshall" 41 | capriciamarshall,"Capricia Marshall" 42 | capriciamarshall@,"Capricia Marshall" 43 | cmarshall,"Capricia Marshall" 44 | marshallcp@state.gov,"Capricia Marshall" 45 | "Pascual, Carlos","Carlos Pascual" 46 | "Adler, Caroline E","Caroline Adler" 47 | "Button, Case","Case Button" 48 | "Richards, Cecile","Cecile Richards" 49 | "EUR/RUS:Weson, Chad","Chad Weston" 50 | "NEA/PI/CE:Kiamie, Charles","Charles Kiamie" 51 | Chelsea,"Chelsea Clinton" 52 | "Blair Cherie","Cherie Blair" 53 | "Blair, Cherie","Cherie Blair" 54 | "CHERIE BLAIR","Cherie Blair" 55 | cb,"Cherie Blair" 56 | cherieblair,"Cherie Blair" 57 | Cheryl,Cheryl 58 | "C:Mills, Cheryl","Cheryl Mills" 59 | "Cheryl Mills","Cheryl Mills" 60 | "Cheryl Mills, COS","Cheryl Mills" 61 | "Mill, Cheryl","Cheryl Mills" 62 | "Mills, Cherlyl D","Cheryl Mills" 63 | "Mills, Chery D","Cheryl Mills" 64 | "Mills, Cheryl","Cheryl Mills" 65 | "Mills, Cheryl D","Cheryl Mills" 66 | "Mills,Cheryl D","Cheryl Mills" 67 | "Mills. Cherl D","Cheryl Mills" 68 | "Mills. Cheryl D","Cheryl Mills" 69 | MillsCD@state.gov,"Cheryl Mills" 70 | cheryl.mills,"Cheryl Mills" 71 | cheryl.mills@,"Cheryl Mills" 72 | "Crocker, Chester A","Chester Crocker" 73 | "Butzgy, Christopher H","Christopher Butzgy" 74 | "Edwards, Christopher","Christopher Edwards" 75 | "EAP/J:Green, Christopher","Christopher Green" 76 | "Hill, Christopher R (Baghdad)","Christopher Hill" 77 | hillcr@state.gov,"Christopher Hill" 78 | "Coleman, Claire L","Claire Coleman" 79 | "Colin Powell","Colin Powell" 80 | "Council on Foreign Relations","Council on Foreign Relations" 81 | "Beale, Courtney A Karamer","Courtney Beale" 82 | "Beale, Courtney A Kramer","Courtney Beale" 83 | bealeca@state.gov,"Courtney Beale" 84 | "Kelly, Craig A","Craig Kelly" 85 | "Daily Sun","Daily Sun" 86 | "Hyde, Dana","Dana Hyde" 87 | Daniel,Daniel 88 | "Baer, Daniel","Daniel Baer" 89 | "Baer, Daniel B","Daniel Baer" 90 | "Baer, Daniel D","Daniel Baer" 91 | baer.daniel,"Daniel Baer" 92 | daniel.baer,"Daniel Baer" 93 | "Inonye, Daniel","Daniel Inonye" 94 | "Schwerin, Daniel B","Daniel Schwerin" 95 | SchwerinDB@state.gov,"Daniel Schwerin" 96 | "Brian, Danielle","Danielle Brian" 97 | "Axelrod, David M","David Axelrod" 98 | Axelrod_D,"David Axelrod" 99 | "Brock, David","David Brock" 100 | David_Garten@lautenberg.senate.gov,"David Garten" 101 | "Garten, David (Lautenberg)","David Garten" 102 | "INL:Johnson, David T","David Johnson" 103 | d.gunners2010,"David Miliband" 104 | "Department of State","Department of State" 105 | "Chollet, Derek H","Derek Chollet" 106 | cholletdh@state.gov,"Derek Chollet" 107 | "Reynolds, Diane","Diane Reynolds" 108 | donald,Donald 109 | "Band, Doug","Doug Band" 110 | "Hattaway, Doug","Doug Hattaway" 111 | doug,"Doug Hattaway" 112 | "Pelton, E","E. Pelton" 113 | "Politico - Drew, Elizabeth","Elizabeth Drew" 114 | "Tauscher, Ellen O","Ellen Tauscher" 115 | tauschereo@state.gov,"Ellen Tauscher" 116 | "Faleomavaega, Eni F","Eni Faleomavaega" 117 | "Woodard, Eric W","Eric Woodard" 118 | "Brimmer, Esther D","Esther Brimmer" 119 | brimmere@state.gov,"Esther Brimmer" 120 | "FINCA International","FINCA International" 121 | "Foreign Affairs Magazine","Foreign Affairs Magazine" 122 | "PRM/MCE:Wills, G","G Wills" 123 | "Lou de Bac, G/TIP","G. Lou de Bac" 124 | "PRM/MCE:Wills,G","G. Wills" 125 | "Mitchell, George","George Mitchell" 126 | "Glantz, Gina","Gina Glantz" 127 | glantz.,"Gina Glantz" 128 | "Govenman Etazini","Govenman Etazini" 129 | Haiti,Haiti 130 | "Duk-soo, Han","Han Duk-soo" 131 | "Koh, Harold Hongju","Harold Hongju Koh" 132 | kohhh@state.gov,"Harold Hongju Koh" 133 | Heintz,Heintz 134 | Hill,Hill 135 | "Hillary Clinton","Hillary Clinton" 136 | Clinton,"Hillary Clinton" 137 | "Clinton Hillary R","Hillary Clinton" 138 | "Clinton, Hillary","Hillary Clinton" 139 | "Clinton, Hillary R","Hillary Clinton" 140 | "Clinton, Hillary Rodham","Hillary Clinton" 141 | H,"Hillary Clinton" 142 | H2,"Hillary Clinton" 143 | HRC,"Hillary Clinton" 144 | Hillary,"Hillary Clinton" 145 | "Hillary Rodham Clinton","Hillary Clinton" 146 | "Madam Secretary","Hillary Clinton" 147 | Secretary,"Hillary Clinton" 148 | "Secretary Clinton","Hillary Clinton" 149 | "Secretary of State","Hillary Clinton" 150 | hr15@mycingular.blackberry.net,"Hillary Clinton" 151 | hrod17@clintonemail.com,"Hillary Clinton" 152 | "the honorable hillary rodham clinton secretary of state","Hillary Clinton" 153 | hdr22@clintonemail.com,"Hillary Clinton" 154 | "Abedin, Huma","Huma Abedin" 155 | AbedinH@state.gov,"Huma Abedin" 156 | "Abein, Huma","Huma Abedin" 157 | "Abendin, Huma","Huma Abedin" 158 | "Adedin, Huma","Huma Abedin" 159 | "Huma Abedin","Huma Abedin" 160 | "Huma, Abedin","Huma Abedin" 161 | Huma@clintonemail.com,"Huma Abedin" 162 | abedin@state.gov,"Huma Abedin" 163 | abendinh@state.gov,"Huma Abedin" 164 | adedinh@state.gov,"Huma Abedin" 165 | "Kelly, Ian","Ian Kelly" 166 | "DS/PA:Finkle, J","J. Finkle" 167 | JAMA,JAMA 168 | "Newmyer, Jackie","Jackie Newmyer" 169 | newmyer,"Jackie Newmyer" 170 | Lew,"Jacob Lew" 171 | "Lew, Jacob","Jacob Lew" 172 | "Lew, Jacob J","Jacob Lew" 173 | jacobjlew,"Jacob Lew" 174 | lewjj@state.gov,"Jacob Lew" 175 | "Sullivan JJ@state.gov","Jake Sullivan" 176 | "Sullivan, Jacbo J","Jake Sullivan" 177 | "Sullivan, Jack","Jake Sullivan" 178 | "Sullivan, Jacob","Jake Sullivan" 179 | "Sullivan, Jacob H","Jake Sullivan" 180 | "Sullivan, Jacob J","Jake Sullivan" 181 | "Sullivan, Jake","Jake Sullivan" 182 | "Sullivan, Jake J","Jake Sullivan" 183 | SullivanJJ@state.gov,"Jake Sullivan" 184 | "jake. sullivan","Jake Sullivan" 185 | jake.sullivan,"Jake Sullivan" 186 | jake.sullivan@,"Jake Sullivan" 187 | sulllivanjj@state.gov,"Jake Sullivan" 188 | sullivanil@state.gov,"Jake Sullivan" 189 | sullivann@state.gov.,"Jake Sullivan" 190 | "McGovern, James P","James McGovern" 191 | "Smith, James E","James Smith" 192 | "Steinberg, James B","James Steinberg" 193 | SteinbergJB@state.gov,"James Steinberg" 194 | SteinbertJB@state.gov,"James Steinberg" 195 | jpiercy,"Jan Piercy" 196 | "Jacobs, Janice L","Janice Jacobs" 197 | "Farrow, Jeffrey","Jeffrey Farrow" 198 | "Farrow, Jeffrey L","Jeffrey Farrow" 199 | jfarrow,"Jeffrey Farrow" 200 | "Feltman, Jeffrey D","Jeffrey Feltman" 201 | feltmanjd@state.gov,"Jeffrey Feltman" 202 | "Robinson, Jennifer","Jennifer Robinson" 203 | "Hoagland, Jim","Jim Hoagland" 204 | "Kennedy, Jim","Jim Kennedy" 205 | "Laszczych, Joanne","Joanne Laszczych" 206 | "Olver, John","John Olver" 207 | "Podesta, John","John Podesta" 208 | jpodesta,"John Podesta" 209 | "Carson, Johnnie","Johnnie Carson" 210 | carsonj@state.gov,"Johnnie Carson" 211 | "Jonathan Prince","Jonathan Prince" 212 | "Daniel, Joshua J","Joshua Daniel" 213 | "Kieffer, Judith","Judith Kieffer" 214 | "McHale, Judith","Judith McHale" 215 | "McHale, Judith A","Judith McHale" 216 | mchaleja@state.gov,"Judith McHale" 217 | "Cooper, Justin","Justin Cooper" 218 | KPK,KPK 219 | "Kabul LGF Request","Kabul LGF Request" 220 | "Eikenberry, Karl W","Karl Eikenberry" 221 | KellyC@state.gov,KellyC@state.gov 222 | "Conrad, Kent","Kent Conrad" 223 | BaldersonKM@state.gov,"Kris Balderston" 224 | "Balderston, Kris M","Kris Balderston" 225 | "Balderstone, Kris M","Kris Balderston" 226 | balderstonkm@state.gov,"Kris Balderston" 227 | "Campbell, Kurt M","Kurt Campbell" 228 | CampbellKM@state.gov,"Kurt Campbell" 229 | "EAP/K:Rosenberger, L","L. Rosenberger" 230 | LGraham,LGraham 231 | "Jilloty, Lauren C","Lauren Jiloty" 232 | "Jiloty, Cheryl D","Lauren Jiloty" 233 | "Jiloty, Lauren","Lauren Jiloty" 234 | "Jiloty, Lauren C","Lauren Jiloty" 235 | "Jiloty, Lauren CD","Lauren Jiloty" 236 | "Jiloty. Lauren C","Lauren Jiloty" 237 | JilotyLC@state.gov,"Lauren Jiloty" 238 | "Jjiloty, Lauren C","Lauren Jiloty" 239 | JjilotyLC@state.gov,"Lauren Jiloty" 240 | "Lauren Jiloty","Lauren Jiloty" 241 | "Rubiner, Laurie","Laurie Rubiner" 242 | "Brown, Lee R","Lee Brown" 243 | "Feinstein, Lee","Lee Feinstein" 244 | "Dewan, Linda L","Linda Dewan" 245 | captuol,"Lisa Caputo" 246 | caputol,"Lisa Caputo" 247 | Imuscatine,"Lissa Muscatine" 248 | "Lissa, Muscatine","Lissa Muscatine" 249 | "Muscantine, LIssa","Lissa Muscatine" 250 | "Muscatine Lissa","Lissa Muscatine" 251 | "Muscatine, Lisa","Lissa Muscatine" 252 | "Muscatine, Lissa","Lissa Muscatine" 253 | "Muscatine,Lissa","Lissa Muscatine" 254 | "Muscatine. Lissa","Lissa Muscatine" 255 | MuscatineL@state.gov,"Lissa Muscatine" 256 | lmuscatine,"Lissa Muscatine" 257 | muscantinel@state.gov,"Lissa Muscatine" 258 | "Quam, Lois","Lois Quam" 259 | ValmmoroLJ@state.gov,"Lona Valmoro" 260 | "Valmoro, Lona","Lona Valmoro" 261 | "Valmoro, Lona J","Lona Valmoro" 262 | ValmoroLI@state.gov,"Lona Valmoro" 263 | ValmoroLJ@state.gov,"Lona Valmoro" 264 | "Walmoro, Lona J","Lona Valmoro" 265 | "Long Term Strategy Group","Long Term Strategy Group" 266 | "Diamond, Louise","Louise Diamond" 267 | "Cue, Lourdes C","Lourdes Cue" 268 | "G/TIP:CdeBaca, Luis","Luis CdeBaca" 269 | Luzzatto,Luzzatto 270 | lynn,"Lynn Forester de Rothschild" 271 | "Albright, M K","M. Albright" 272 | "Albright, Madeleine","Madeleine Albright" 273 | Williamsbarrett,"Maggie Williams" 274 | "Calivis, Maria","Maria Calivis" 275 | "Otero, Maria","Maria Otero" 276 | oterom2@state.gov,"Maria Otero" 277 | "AF/PDPA:Scott, Marianne","Marianne Scott" 278 | mark,"Mark Hyman" 279 | markjpenn,"Mark Penn" 280 | mtorrey,"Marty Torrey" 281 | "ECA:Pally, Maura","Maura Pally" 282 | "Pally, Maura M","Maura Pally" 283 | "Baucus, Max","Max Baucus" 284 | "Rooney, Megan","Megan Rooney" 285 | PVerveer,"Melanne Verveer" 286 | "Vereer, Melanne S","Melanne Verveer" 287 | "Verveer, Melanne E","Melanne Verveer" 288 | "Verveer, Melanne S","Melanne Verveer" 289 | verveerms@state.gov,"Melanne Verveer" 290 | "Fuchs, Michael H","Michael Fuchs" 291 | FuchsMH@state.gov,"Michael Fuchs" 292 | "Posner, Michael H","Michael Posner" 293 | PosnerMH@state.gov,"Michael Posner" 294 | "Bond, Michele T","Michele Bond" 295 | michele.flournoy,"Michele Flournoy" 296 | "Rodriguez, Miguel E","Miguel Rodriguez" 297 | Mike,Mike 298 | "Hanley, Monica R","Monica Hanley" 299 | hanleymr@state.gov,"Monica Hanley" 300 | NHLA,NHLA 301 | nancy,"Nancy Parrish" 302 | "Neera, Tanden","Neera Tanden" 303 | "Tandem, Neera","Neera Tanden" 304 | "Tanden, Neera","Neera Tanden" 305 | ntanden,"Neera Tanden" 306 | "New York Times","New York Times" 307 | "Norman, Nicholas","Nicholas Norman" 308 | "Toiv, Nora F","Nora Toiv" 309 | toivnf@state.gov,"Nora Toiv" 310 | "Tov, Nora F","Nora Tov" 311 | "Opinion Research","Opinion Research" 312 | "Sanchez, Oscar Arias","Oscar Arias Sanchez" 313 | "Flores, Oscar","Oscar Flores" 314 | "Lores, Oscar","Oscar Lores" 315 | PVervee,PVervee 316 | "Kennedy, Patrick F","Patrick Kennedy" 317 | "M:Kennedy, Patrick F","Patrick Kennedy" 318 | "Collier, Paul","Paul Collier" 319 | "Jones, Paul W","Paul Jones" 320 | "NEA/IPA:Knopf, Payton","Payton Knopf" 321 | "Robinson, Peter","Peter Robinson" 322 | "Crowley, Philip","Philip Crowley" 323 | "Crowley, Philip J","Philip Crowley" 324 | "Gordon, Philip H","Philip Gordon" 325 | "Gordon. Philip H","Philip Gordon" 326 | gordonph@state.gov,"Philip Gordon" 327 | PIR,"Philippe Reines" 328 | "Reines, Philipe I","Philippe Reines" 329 | "Reines, Philippe","Philippe Reines" 330 | "Reines, Philippe I","Philippe Reines" 331 | "Reines, Phillippe I","Philippe Reines" 332 | "Rines, Philippe I","Philippe Reines" 333 | preines,"Philippe Reines" 334 | reines@state.gov,"Philippe Reines" 335 | reinesp@state.gov,"Philippe Reines" 336 | "Crowley, Phillip J","Phillip Crowley" 337 | crowleypj@state.gov,"Phillip Crowley" 338 | "Campbell, Piper","Piper Campbell" 339 | "Prime Minister","Prime Minister" 340 | "Shah, Rajiv","Rajiv Shah" 341 | rshah,"Rajiv Shah" 342 | Recos,Recos 343 | "Philippe, Reines","Reines Philippe" 344 | "Preval, Rene","Rene Preval" 345 | "Lewis, Reta Jo","Reta Jo Lewis" 346 | HolbrookeRC@state.gov,"Richard Holbrooke" 347 | "Verma, Richard","Richard Verma" 348 | "Verma, Richard R","Richard Verma" 349 | vermarr@state.gov,"Richard Verma" 350 | rsloan,"Rick Sloan" 351 | "Blake, Robert O","Robert Blake" 352 | "Danford, Robert A","Robert Danford" 353 | "Hormats, Robert D","Robert Hormats" 354 | hormatsrd@state.gov,"Robert Hormats" 355 | "Russo, Robert V","Robert Russo" 356 | "Russo, Robert V","Robert Russo" 357 | Russorv@state.gov,"Robert Russo" 358 | rodriguezme@state.gov,"Rodriguez Miguel" 359 | "Howe, Rosemarie","Rosemarie Howe" 360 | "rosemarie.howe","Rosemarie Howe" 361 | "Zaidi, S Akbar","S. Akbar Zaidi" 362 | S/SRGIA,SRGIA 363 | STATE,STATE 364 | "Berger, Samuel R","Samuel (""Sandy"") Berger" 365 | SBerger,"Samuel (""Sandy"") Berger" 366 | Sandy,"Samuel (""Sandy"") Berger" 367 | "Berger, Samuel","Samuel Berger" 368 | sgration,"Scott Gration" 369 | "Blumenthal, Sidney","Sidney Blumenthal" 370 | Sid,"Sidney Blumenthal" 371 | sbwhoeop,"Sidney Blumenthal" 372 | sbwhoeop@,"Sidney Blumenthal" 373 | sbwhoop,"Sidney Blumenthal" 374 | Sir,Sir 375 | "Talbott, Strobe","Strobe Talbott" 376 | stalbott,"Strobe Talbott" 377 | stallbott,"Strobe Talbott" 378 | "Rice, Susan E","Susan Rice" 379 | "Rice, Susan E.","Susan Rice" 380 | "Grantham, Suzanne L","Suzanne Grantham" 381 | Terry.Duffy,"Terry Duffy" 382 | "Donilon, Thomas E","Thomas Donilon" 383 | "Nides, Thomas R","Thomas Nides" 384 | NidesTR@state.gov,"Thomas Nides" 385 | "Shannon, Thomas A","Thomas Shannon" 386 | "WHA: Shannon, Thomas A","Thomas Shannon" 387 | shannonta@state.gov,"Thomas Shannon" 388 | TFlourno,"Tina Flournoy" 389 | "Stern, Todd","Todd Stern" 390 | "Stern, Todd D","Todd Stern" 391 | "Stern, Todd D (S/SECC)","Todd Stern" 392 | sterntd@state.gov,"Todd Stern" 393 | "Tillemann, Tomicah","Tomicah Tillemann" 394 | "Tillemann, Tomicah S","Tomicah Tillemann" 395 | TillemannTS@state.gov,"Tomicah Tillemann" 396 | "Elbegdori, Tsakina","Tsakina Elbegdori" 397 | "U.S. Global Leadership Coalition","U.S. Global Leadership Coalition" 398 | "Estados Unidos Da America","United States of America" 399 | "Estados Unidos De NorteAmerica","United States of America" 400 | "Etat-Unis D'Amerique","United States of America" 401 | "Etats-Unis D'Amerique","United States of America" 402 | "United States of America","United States of America" 403 | "Nuland, Victoria J","Victoria Nuland" 404 | "Ebeling, Voda","Voda Ebeling" 405 | "Ebelling, Voda","Voda Ebeling" 406 | WHADP,WHADP 407 | "Washington Post","Washington Post" 408 | "Sherman, Wendy","Wendy Sherman" 409 | "Sherman, Wendy R","Wendy Sherman" 410 | wsherman,"Wendy Sherman" 411 | "Ilic, Werner X","Werner Ilic" 412 | "White House","White House" 413 | "Burns, William J","William Burns" 414 | Burnswj@state.gov,"William Burns" 415 | wburns,"William Burns" 416 | wburns66,"William Burns" 417 | wburns66@,"William Burns" 418 | "Hubbard, William","William Hubbard" 419 | "Iscol, Zachary","Zachary Iscol" 420 | aclb,aclb 421 | alcb,alcb 422 | l,l 423 | latimes.com,latimes.com 424 | mh.interiors,mh.interiors 425 | mhcaleja@state.gov,mhcaleja@state.gov 426 | postmaster@state.gov,postmaster@state.gov 427 | rooneym@state.gov,rooneym@state.gov 428 | rrh.interiors,rrh.interiors 429 | --------------------------------------------------------------------------------