├── .gitignore ├── figs ├── worldwide_normalized_tweets.pdf └── worldwide_normalized_tweets.png ├── tweets_hydrator ├── package │ └── tweets_hydrator.jar ├── twitter.properties ├── Readme.md ├── pom.xml ├── src │ └── main │ │ └── java │ │ └── qa │ │ └── qcri │ │ └── tweetsretrieval │ │ ├── TweetsRetrievalTool.java │ │ └── TwitterAPI.java └── sample_tweet_ids.txt ├── LICENSE ├── parsers ├── base_file_data_extractor.py └── meta_file_parser.py ├── preprocessing └── user_location_preprocessing.py ├── meta_data └── meta_file_monthly_ids_range.tsv └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | Tweets hydrator/.DS_Store 2 | Preprocessing scripts/.DS_Store 3 | .DS_Store -------------------------------------------------------------------------------- /figs/worldwide_normalized_tweets.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrisisComputing/TBCOV/HEAD/figs/worldwide_normalized_tweets.pdf -------------------------------------------------------------------------------- /figs/worldwide_normalized_tweets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrisisComputing/TBCOV/HEAD/figs/worldwide_normalized_tweets.png -------------------------------------------------------------------------------- /tweets_hydrator/package/tweets_hydrator.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrisisComputing/TBCOV/HEAD/tweets_hydrator/package/tweets_hydrator.jar -------------------------------------------------------------------------------- /tweets_hydrator/twitter.properties: -------------------------------------------------------------------------------- 1 | consumer.key=XXXXXXXXXXXXXXXX 2 | consumer.secret=XXXXXXXXXXXXXXXX 3 | access.token=XXXXXXXXXXXXXXXX 4 | access.token.secret=XXXXXXXXXXXXXXXX 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 CrisisComputing 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tweets_hydrator/Readme.md: -------------------------------------------------------------------------------- 1 | # Tweets Hydrator Usage Guide 2 | 3 | ## Description 4 | This Java-based program hydrates tweets from the Twiteer APIs. The tool makes 180 API calls per 15 minutes. Each API call downloads up to 100 tweets i.e. it can download up to 72,000 tweets per hour. 5 | 6 | ## How to use 7 | 8 | 1. Add tweets ids in a text file (one per line). A sample tweets-ids file is provided in the package. 9 | 2. Make a Twitter app (if you don't have one) to get the following four tokens. Once obtained, add them into the `twitter.properties` file. 10 | 11 | `consumer.key=XXXX` 12 | 13 | `consumer.secret=XXXX` 14 | 15 | `access.token=XXXX` 16 | 17 | `access.token.secret=XXXX` 18 | 19 | 3. Run the `tweets_hydrrator.jar` file from the package folder as shown in the following command. The command excepts two parameters. The first parameter is the file containing tweets-ids. And, the second parameter is the path and name of output file where the tool should store the downloaded tweets. 20 | 21 | `java -classpath TweetsRetrieval-1.2-jar-with-dependencies.jar qa.qcri.tweetsretrieval.TweetsRetrievalTool sample_tweet_ids.txt output.txt` 22 | 23 | ## Compilation 24 | In case of new changes, compilation can be done using this command. 25 | `mvn clean compile assembly:single` 26 | -------------------------------------------------------------------------------- /tweets_hydrator/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | qa.qcri 5 | TweetsRetrieval 6 | 1.2 7 | jar 8 | 9 | UTF-8 10 | 1.8 11 | 1.8 12 | 13 | 14 | 15 | oauth.signpost 16 | signpost-core 17 | 1.2.1.2 18 | 19 | 20 | javax.json 21 | javax.json-api 22 | 1.0 23 | 24 | 25 | org.glassfish 26 | javax.json 27 | 1.0.4 28 | runtime 29 | 30 | 31 | 32 | 33 | 34 | maven-assembly-plugin 35 | 36 | 37 | 38 | qa.qcri.tweetsretrieval.TweetsRetrievalTool 39 | 40 | 41 | 42 | jar-with-dependencies 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /tweets_hydrator/src/main/java/qa/qcri/tweetsretrieval/TweetsRetrievalTool.java: -------------------------------------------------------------------------------- 1 | package qa.qcri.tweetsretrieval; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.FileWriter; 6 | import java.io.IOError; 7 | import java.io.IOException; 8 | import java.io.Writer; 9 | import java.nio.file.Files; 10 | import java.util.List; 11 | import java.util.Map; 12 | import java.util.Properties; 13 | import java.util.stream.Collectors; 14 | 15 | import javax.json.JsonArray; 16 | 17 | public class TweetsRetrievalTool { 18 | 19 | private static final String NEWLINE = System.getProperty("line.separator"); 20 | 21 | public static void main(String[] args) throws IOException { 22 | if (args.length<2) { 23 | System.err.println("This app needs two parameters: source and destination files."); 24 | return; 25 | } 26 | 27 | Properties p = new Properties(); 28 | p.load(new FileInputStream("twitter.properties")); 29 | 30 | TwitterAPI twitter = new TwitterAPI(p); 31 | 32 | List lines = Files.readAllLines(new File(args[0]).toPath()); 33 | int[] r = new int[]{0}; 34 | Map> groups = lines.stream() 35 | .collect(Collectors.groupingBy(x -> r[0]++ / 100)); 36 | 37 | try(Writer dest = new FileWriter(new File(args[1]))) { 38 | groups.forEach((key, value) -> { 39 | List ids = value.stream() 40 | .map(s -> s.replace("'", "")) 41 | .collect(Collectors.toList()); 42 | try { 43 | JsonArray tweets = 44 | (JsonArray) twitter.getStatusesLookup(String.join(",", ids)); 45 | tweets.forEach(t -> { 46 | try { 47 | dest.write(t.toString()); 48 | dest.write(NEWLINE); 49 | } catch (IOException e) { 50 | throw new IOError(e); 51 | } 52 | }); 53 | //System.out.println(key+" "+tweets.size()+"/"+ids.size()); 54 | } catch (Exception e) { 55 | throw new IOError(e); 56 | } 57 | }); 58 | } 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /parsers/base_file_data_extractor.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | #Sample output run 4 | #python base_file_data_extractor.py required_monthly_files.txt test_for_meta_parsing.txt '/some/path' 5 | 6 | 7 | #Pass the output of the previous script to this script 8 | meta_parser_output = open(sys.argv[1]) 9 | 10 | 11 | 12 | #Pass the IDs file through which you want to extract all the tweets/full details 13 | ids_file = open(sys.argv[2]) 14 | 15 | #base release files path #expects '/' at the end 16 | base_release_file_path = sys.argv[3] 17 | 18 | #output_file 19 | output_file = open(sys.argv[2].replace('.txt', '') + "_detailed.tsv", "w+") 20 | 21 | 22 | #This function is for inserting a value in a dictionary 23 | def insert_in_dict(dictionary, key, value): 24 | if key in dictionary: 25 | dictionary[key] = value 26 | else: 27 | if key != None: 28 | dictionary[key] = value 29 | 30 | return dictionary 31 | 32 | 33 | ids_dict = {} 34 | 35 | for line in ids_file: 36 | insert_in_dict(ids_dict, line.strip(), True) 37 | 38 | ids_file.close() 39 | 40 | output_file.write("tweet_id date_time lang user_id retweeted_id quoted_id in_reply_to_id sentiment_conf sentiment_label user_type gender_label tweet_text_named_entities geo_coordinates_lat_lon geo_country_code geo_state geo_county geo_city place_bounding_box place_country_code place_state place_county place_city user_loc_toponym user_loc_country_code user_loc_state user_loc_county user_loc_city User_profile_description_toponyms user_profile_description_country_code user_profile_description_state user_profile_description_county user_profile_description_city tweet_text_toponyms tweet_text_country_code tweet_text_state tweet_text_county tweet_text_city") 41 | output_file.write('\n') 42 | print('\n') 43 | print("Parsing stated... wait for the data extraction....") 44 | 45 | #Extract the required full items 46 | for key in meta_parser_output: 47 | 48 | file_to_be_read = open(base_release_file_path + key.strip()) 49 | 50 | 51 | for full_info_line in file_to_be_read: 52 | 53 | data = full_info_line.split('\t') 54 | 55 | if(ids_dict.get(data[0]) != None): 56 | output_file.write(full_info_line) 57 | 58 | print('\n') 59 | print(key.strip() + " file completely parsed *******") 60 | 61 | output_file.close() 62 | 63 | meta_parser_output.close() 64 | 65 | -------------------------------------------------------------------------------- /parsers/meta_file_parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from tqdm import tqdm 3 | 4 | #Sample output run 5 | #python meta_file_parser.py test_for_meta_parsing.txt meta_data/meta_file_monthly_ids_range.tsv 6 | 7 | 8 | #Pass the country/lang file that you want to extract 9 | opened_file = open(sys.argv[1]) 10 | 11 | #Pass the meta-file as provided in the repo 12 | meta_file_opened = open(sys.argv[2]) 13 | 14 | meta_file_dict = {} 15 | 16 | output_file = open(str(sys.argv[1].split('.')[0]) + "_required_monthly_files.txt", "w+") 17 | 18 | 19 | #This function is for inserting a value in a dictionary 20 | def insert_in_dict(dictionary, key, value): 21 | #check key 22 | # key 23 | # value 24 | if key in dictionary: 25 | dictionary[key] = value 26 | else: 27 | if key != None: 28 | dictionary[key] = value 29 | 30 | return dictionary 31 | 32 | 33 | #start from the second line 34 | next(meta_file_opened) 35 | 36 | #Loading the meta-file in a dictionary 37 | for meta_line in meta_file_opened: 38 | 39 | meta_array = meta_line.strip().split('\t') 40 | 41 | #print(meta_array) 42 | 43 | insert_in_dict(meta_file_dict, meta_array[0], [int(meta_array[1]), int(meta_array[2])]) 44 | 45 | meta_file_opened.close() 46 | 47 | print("\n") 48 | print("File 'required_monthly_files.txt' is being generated, please wait...") 49 | print("\n") 50 | 51 | #Which files need to be used 52 | files_to_be_downloaded = set() 53 | 54 | num_lines = sum(1 for line in open(sys.argv[1],'r')) 55 | 56 | # num_lines = sum(1 for line in opened_file) 57 | #To check which files are needed to be downloaded, run this part. 58 | # with Bar('Processing...') as bar: 59 | # for i in tqdm(range(100)): 60 | for line in tqdm(opened_file, total=num_lines): 61 | 62 | tweet_id = int(line.strip()) 63 | 64 | for key in meta_file_dict: 65 | start_id = meta_file_dict[key][0] 66 | end_id = meta_file_dict[key][1] 67 | 68 | if(tweet_id >= start_id and tweet_id <= end_id): 69 | #files_to_be_downloaded.add(key.split('_')[0].capitalize() + " "+ key.split('_')[1]) 70 | files_to_be_downloaded.add(key) 71 | # bar.next() 72 | 73 | #list of all files 74 | list_files = [] 75 | 76 | #output file names and then take input from reader 77 | for value in files_to_be_downloaded: 78 | list_files.append(value) 79 | 80 | list_files.sort() 81 | 82 | for i in range(0, len(list_files)): 83 | #output in a text file 84 | output_file.write(list_files[i] + '\n') 85 | 86 | opened_file.close() 87 | output_file.close() 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /tweets_hydrator/src/main/java/qa/qcri/tweetsretrieval/TwitterAPI.java: -------------------------------------------------------------------------------- 1 | package qa.qcri.tweetsretrieval; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.net.HttpURLConnection; 6 | import java.net.URL; 7 | import java.util.Properties; 8 | import java.util.logging.Logger; 9 | import java.util.zip.GZIPInputStream; 10 | 11 | import javax.json.Json; 12 | import javax.json.JsonArray; 13 | import javax.json.JsonReader; 14 | 15 | import oauth.signpost.OAuthConsumer; 16 | import oauth.signpost.basic.DefaultOAuthConsumer; 17 | import oauth.signpost.exception.OAuthException; 18 | 19 | public class TwitterAPI { 20 | 21 | private static final Logger log = Logger.getLogger(TwitterAPI.class.getName()); 22 | private static final String BASE = "https://api.twitter.com"; 23 | 24 | private OAuthConsumer consumer; 25 | 26 | public TwitterAPI(Properties p) { 27 | consumer = new DefaultOAuthConsumer(p.getProperty("consumer.key"), p.getProperty("consumer.secret")); 28 | consumer.setTokenWithSecret(p.getProperty("access.token"), p.getProperty("access.token.secret")); 29 | } 30 | 31 | protected Object call(String url) throws IOException, InterruptedException, OAuthException { 32 | URL url2 = new URL(consumer.sign(url)); 33 | HttpURLConnection conn = (HttpURLConnection) url2.openConnection(); 34 | conn.setRequestProperty("Accept-Encoding", "gzip"); 35 | int rc = conn.getResponseCode(); 36 | InputStream stream = null; 37 | switch (rc) { 38 | case HttpURLConnection.HTTP_OK: 39 | stream = conn.getInputStream(); 40 | break; 41 | case HttpURLConnection.HTTP_FORBIDDEN: 42 | stream = conn.getErrorStream(); 43 | break; 44 | case 429: // https://dev.twitter.com/rest/public/rate-limiting 45 | long reset = 1000 * conn.getHeaderFieldLong("X-Rate-Limit-Reset", 0); 46 | long millis = reset - System.currentTimeMillis(); 47 | if (millis < 60000) { 48 | log.info(String.format("waiting for %d sec (API call rate limit exceeded)", millis/1_000)); 49 | } else { 50 | log.info(String.format("waiting for %d min (API call rate limit exceeded)", millis/60_000)); 51 | } 52 | Thread.sleep(millis+1000); 53 | return call(url); 54 | default: 55 | throw new IOException(conn.getResponseMessage()); 56 | } 57 | if ("gzip".equals(conn.getHeaderField("Content-Encoding"))) 58 | stream = new GZIPInputStream(stream); 59 | 60 | int limit = conn.getHeaderFieldInt("X-Rate-Limit-Limit", 0); 61 | int remaining = conn.getHeaderFieldInt("X-Rate-Limit-Remaining", 0); 62 | long reset = conn.getHeaderFieldLong("X-Rate-Limit-Reset", 0); 63 | log.info(String.format("%d/%d(%d)", limit, remaining, reset)); 64 | 65 | JsonReader in = Json.createReader(stream); 66 | return in.read(); 67 | } 68 | 69 | public JsonArray getStatusesLookup(String jobid) throws IOException, InterruptedException, OAuthException { 70 | String endpoint = BASE+"/1.1/statuses/lookup.json?id="+jobid; 71 | return (JsonArray) call(endpoint); 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /preprocessing/user_location_preprocessing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3.7.0 2 | 3 | # -*- coding: utf-8 -*- 4 | from cleantext import clean 5 | import re 6 | import emoji 7 | import unicodedata 8 | import sys 9 | 10 | PUNCT_TRANSLATE_UNICODE = dict.fromkeys( 11 | (i for i in range(sys.maxunicode) if unicodedata.category(chr(i)).startswith("P")), 12 | " ", 13 | ) 14 | 15 | def preprocess_user_location(text): 16 | 17 | if(text == None): 18 | return None 19 | text = text.strip() 20 | # remove URLs 21 | text = re.sub('((www\.[^\s]+)|(https?://[^\s]+)|(http?://[^\s]+))', ' ', text) 22 | text = re.sub(r'http\S+', ' ', text) 23 | # remove usernames 24 | text = re.sub('@[^\s]+', ' ', text) 25 | # remove the # in #hashtag 26 | text = re.sub(r'#([^\s]+)', r'\1', text) 27 | 28 | #remove emojies 29 | text = emoji.get_emoji_regexp().sub(u'', text) 30 | 31 | text = text.replace('\n',' ') 32 | text = text.replace('\t',' ') 33 | text = text.replace('\r',' ') 34 | text = text.replace('"',' ') 35 | text = text.replace('~',' ') 36 | text = text.replace('|',' ') 37 | text = text.replace(',','<<<>>>') 38 | 39 | 40 | preprocessed_text = clean(text, 41 | fix_unicode=True, # fix various unicode errors 42 | to_ascii=True, # transliterate to closest ASCII representation 43 | lower=False, # lowercase text 44 | no_line_breaks=True, # fully strip line breaks as opposed to only normalizing them 45 | no_urls=True, # replace all URLs with a special token 46 | no_emails=True, # replace all email addresses with a special token 47 | no_phone_numbers=True, # replace all phone numbers with a special token 48 | no_numbers=False, # replace all numbers with a special token 49 | no_digits=False, # replace all digits with a special token 50 | no_currency_symbols=True, # replace all currency symbols with a special token 51 | no_punct=True, # fully remove punctuation 52 | replace_with_url="", 53 | replace_with_email="", 54 | replace_with_phone_number="", 55 | replace_with_currency_symbol="", 56 | # replace_with_punct=" ", 57 | # lang="en" # set to 'de' for German special handling 58 | ) 59 | 60 | #accent remove 61 | text = text.translate(PUNCT_TRANSLATE_UNICODE) 62 | 63 | text = text.replace('<<<>>>',',') 64 | 65 | #remove extra spaces 66 | text = re.sub(' +', ' ', text) 67 | 68 | 69 | #print("Final: "+ text) 70 | 71 | # to avoid cases like ', ' 72 | #to avoid cases like ' ', 'a', ',' etc 73 | #to avoid cases like ' E', 74 | if(len(text.replace(',','')) > 1 and len(text.replace(',','').replace(' ','')) > 1 and len(text.replace(' ','')) > 1 and len(text) > 1 and text != '' and text != ' '): 75 | return text 76 | else: 77 | return None 78 | 79 | #some tests 80 | print(preprocess_user_location('Doha, Qatar ####')) 81 | #Converts it to Doha, Qatar 82 | 83 | print(preprocess_user_location(' USA ')) 84 | #Converts it to USA 85 | -------------------------------------------------------------------------------- /meta_data/meta_file_monthly_ids_range.tsv: -------------------------------------------------------------------------------- 1 | File_name Start_id End_id 2 | february_2020_f1.tsv 1223395535882768385 1231201257739649025 3 | february_2020_f2.tsv 1231201258092011523 1233487061354762240 4 | february_2020_f3.tsv 1233487061400813568 1233904783833976833 5 | march_2020_f1.tsv 1233904784010358785 1235614779965927425 6 | march_2020_f10.tsv 1244321291236245511 1245075687599464448 7 | march_2020_f11.tsv 1245075687767035905 1245138807797710849 8 | march_2020_f2.tsv 1235614780003684352 1237088322662645760 9 | march_2020_f3.tsv 1237088322696159232 1238201450951565313 10 | march_2020_f4.tsv 1238201451123531778 1239190893497901056 11 | march_2020_f5.tsv 1239190893506199552 1240091884468555776 12 | march_2020_f6.tsv 1240091884472565761 1241643047554887681 13 | march_2020_f7.tsv 1241643047559061506 1242485419352002573 14 | march_2020_f8.tsv 1242485419457097734 1243478992633491457 15 | march_2020_f9.tsv 1243478992641982467 1244321291227877382 16 | april_2020_f1.tsv 1245138807957213188 1245885018100006913 17 | april_2020_f10.tsv 1252230722540060672 1253025036719599616 18 | april_2020_f11.tsv 1253025036736385030 1253833370259333120 19 | april_2020_f12.tsv 1253833370284380161 1254690078980362241 20 | april_2020_f13.tsv 1254690079030812672 1255591483429793802 21 | april_2020_f14.tsv 1255591483450802176 1256010443749015554 22 | april_2020_f2.tsv 1245885018150277120 1246767531500670977 23 | april_2020_f3.tsv 1246767531504828416 1247560749041496065 24 | april_2020_f4.tsv 1247560749066698753 1248282920172863488 25 | april_2020_f5.tsv 1248282920185282560 1249049719042527232 26 | april_2020_f6.tsv 1249049719046901760 1249852958428278787 27 | april_2020_f7.tsv 1249852958508023809 1250639237420122113 28 | april_2020_f8.tsv 1250639237441032192 1251425458081628163 29 | april_2020_f9.tsv 1251425458094129152 1252230722485579778 30 | may_2020_f1.tsv 1256010443790811137 1257060910776102915 31 | may_2020_f10.tsv 1265592235187453959 1267121083511169024 32 | may_2020_f11.tsv 1267121083553021952 1267244467603550212 33 | may_2020_f2.tsv 1257060910792916992 1258065104886730752 34 | may_2020_f3.tsv 1258065104941322240 1259114424977195008 35 | may_2020_f4.tsv 1259114425010667520 1260172229393780736 36 | may_2020_f5.tsv 1260172229452726272 1261204076987121666 37 | may_2020_f6.tsv 1261204076991254528 1262280725505560577 38 | may_2020_f7.tsv 1262280725518258176 1263351336470380546 39 | may_2020_f8.tsv 1263351336516681728 1264481336284327937 40 | may_2020_f9.tsv 1264481336401825798 1265592235166498816 41 | june_2020_f2.tsv 1268678185518010368 1270188480736325632 42 | june_2020_f3.tsv 1270188480770068480 1271734074185527296 43 | june_2020_f4.tsv 1271734074269466624 1273249326085660675 44 | june_2020_f5.tsv 1273249326135984133 1274813759215681536 45 | june_2020_f6.tsv 1274813759488430086 1276368684643016705 46 | june_2020_f7.tsv 1276368684680753152 1278116103579893760 47 | july_2020_f1.tsv 1278116103714193408 1279672350317916162 48 | july_2020_f2.tsv 1279672350322184193 1281127821038030852 49 | july_2020_f3.tsv 1281127821105135617 1282607432242466816 50 | july_2020_f4.tsv 1282607432259252224 1283761237512163329 51 | july_2020_f5.tsv 1283761237596282880 1285121873857183745 52 | july_2020_f6.tsv 1285121873886375936 1286598075998449664 53 | july_2020_f7.tsv 1286598076128468994 1288118797161783304 54 | july_2020_f8.tsv 1288118797325357061 1289350127384059906 55 | june_2020_f1.tsv 1267244467695824897 1268678185484419073 56 | august_2020_f1.tsv 1289350127459606528 1290914738906554368 57 | august_2020_f2.tsv 1290914738944086016 1292487386212048896 58 | august_2020_f3.tsv 1292487386375622656 1294236478365741056 59 | august_2020_f4.tsv 1294236478394920960 1295945048639635457 60 | august_2020_f5.tsv 1295945048782249985 1297582288671330310 61 | august_2020_f6.tsv 1297582288792936448 1299160074728865792 62 | august_2020_f7.tsv 1299160074741374977 1300584151045885956 63 | september_2020_f1.tsv 1300584151356186629 1302269940070780928 64 | september_2020_f2.tsv 1302269940104417285 1303958352255483904 65 | september_2020_f3.tsv 1303958352289030149 1305661526779596800 66 | september_2020_f4.tsv 1305661526867611648 1310597145033158658 67 | september_2020_f5.tsv 1310597145075101697 1311455787064217603 68 | october_2020_f1.tsv 1311455787303268354 1312899927576436737 69 | october_2020_f2.tsv 1312899927580848134 1314318111148068866 70 | october_2020_f3.tsv 1314318111202586624 1315998364996120577 71 | october_2020_f4.tsv 1315998365000359936 1317864059375529984 72 | october_2020_f5.tsv 1317864059409031170 1319666221038567425 73 | october_2020_f6.tsv 1319666221055234048 1321479885110546433 74 | october_2020_f7.tsv 1321479885139968000 1322689810998480898 75 | november_2020_f1.tsv 1322689811350724611 1324795676618858501 76 | november_2020_f2.tsv 1324795676811821057 1326866251117965317 77 | november_2020_f3.tsv 1326866251222806528 1328700302992826368 78 | november_2020_f4.tsv 1328700302997024771 1330567656244609024 79 | november_2020_f5.tsv 1330567656299253762 1332444617116057600 80 | november_2020_f6.tsv 1332444617225211907 1333561446932865024 81 | december_2020_f1.tsv 1333561447054499842 1335345359833337856 82 | december_2020_f2.tsv 1335345359959298055 1337051156506435586 83 | december_2020_f3.tsv 1337051156598693898 1338884363908476929 84 | december_2020_f4.tsv 1338884363967229955 1340433130780540928 85 | december_2020_f5.tsv 1340433131057233921 1341804754494697474 86 | december_2020_f6.tsv 1341804754561691650 1343612648198438913 87 | december_2020_f7.tsv 1343612648227745805 1344795470808420352 88 | january_2021_f1.tsv 1344795470900715522 1346349629365837824 89 | january_2021_f2.tsv 1346349629437341696 1347920318686441472 90 | january_2021_f3.tsv 1347920318761992193 1349513280482631683 91 | january_2021_f4.tsv 1349513280688046081 1350963889895256064 92 | january_2021_f5.tsv 1350963890079862786 1352429045670162433 93 | january_2021_f6.tsv 1352429045716115457 1353877599127134209 94 | january_2021_f7.tsv 1353877599165022209 1355348167848566790 95 | january_2021_f8.tsv 1355348167861137410 1356029494600065025 96 | february_2021_f1.tsv 1356029494696534021 1357596728745201664 97 | february_2021_f2.tsv 1357596728778772480 1359330790413369355 98 | february_2021_f3.tsv 1359330790547591169 1361176857119350784 99 | february_2021_f4.tsv 1361176857123692545 1362859197721956353 100 | february_2021_f5.tsv 1362859197784678400 1364685866174533632 101 | february_2021_f6.tsv 1364685866191495177 1366176354841018368 102 | march_2021_f1.tsv 1366176354941730818 1367814444961722368 103 | march_2021_f2.tsv 1367814445112692745 1369646816979947523 104 | march_2021_f3.tsv 1369646817038774273 1371280166681477121 105 | march_2021_f4.tsv 1371280166853431304 1372929918892380169 106 | march_2021_f5.tsv 1372929918976233485 1374584034257342464 107 | march_2021_f6.tsv 1374584034316099587 1376276403436580864 108 | march_2021_f7.tsv 1376276403457511431 1377410378527744003 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TBCOV: Two Billion Multilingual COVID-19 Tweets with Sentiment, Entity, Geo, and Gender Labels 2 | Welcome to the code repository for the TBCOV dataset. This code repository offers several scripts helpful to hydrate and process the shared dataset. 3 | 4 | The TBCOV dataset comprises more than two billion tweets from 218 countries worldwide. The following map shows worldwide tweets normalized by the total population from each country per 100,000 persons. 5 | 6 | ![Tweets normalized by countries population](figs/worldwide_normalized_tweets.png) 7 | 8 | # Data descriptor for base release files 9 | |Attribute|Type|Description| 10 | |--- |--- |--- | 11 | |tweet_id|Int64|The integer representation of the unique identifier a tweet. This number is greater than 53 bits and some programming languages may have difficulty/silent defects in interpreting it.| 12 | |date_time|String|UTC time when the tweet was created.| 13 | |lang|String|ISO-6391 Alpha-2 language code consisting of two characters.| 14 | |user_id|String|Represents the id of the author of the tweet.| 15 | |retweeted_id|Int64|If the tweet is a retweet, the retweeted_id represents the id of the parent tweet.| 16 | |quoted_id|Int64|If the tweet is a quoted tweet, the quoted_id represents the id of the parent tweet.| 17 | |in_reply_to_id|Int64|If the tweet is a reply to an existing tweet, the in_reply_to_id represents the id of the parent/original tweet.| 18 | |sentiment_label|Int64|Represents the sentiment label values: -1 (negative), 0 (neutral), 1 (positive).| 19 | |sentiment_conf|Float|Represents the confidence score of the sentiment classifier for a given sentiment label to a tweet.| 20 | |user_type|String|The user types represents the identified type of the user such as person, organization, location, etc.| 21 | |gender_label|String|One character code representing the identified gender of the users. F represents "female" and M represents "male" user types.| 22 | |tweet_text_named_entities|Dictionary array|Named-entities (persons, organizations, locations, etc.) extracted from tweet text are provided in this attribute in the array of dictionary format.| 23 | |geo_coordinates_lat_lon|Float|GPS coordinates in the latitude, longitude format retrieved from the user's GPS-enabled device.| 24 | |geo_country_code|String|Two characters country code learned through resolving the GPS coordinates (latitude, longitude).| 25 | |geo_state|String|The name of the state/province learned through resolving the GPS coordinates (latitude, longitude).| 26 | |geo_county|String|The name of the county learned through resolving the GPS coordinates (latitude, longitude).| 27 | |geo_city|String|The name of the city learned through resolving the GPS coordinates (latitude, longitude).| 28 | |place_bounding_box|Float|Twitter provided bounding boxes representing place tags.| 29 | |place_country_code|String|Two characters country code learned through resolving the place bounding boxes.| 30 | |place_state|String|The name of the state/province learned through resolving the place bounding boxes.| 31 | |place_county|String|The name of the county learned through resolving the place bounding boxes.| 32 | |place_city|String|The name of the city learned through resolving the place bounding boxes.| 33 | |user_loc_toponyms|Dictionary array|Toponyms recognized and extracted from the user location field provided as an array of dictionary.| 34 | |user_loc_country_code|String|Two characters country code learned through resolving the user location toponyms.| 35 | |user_loc_state|String|The name of the state/province learned through resolving the user location toponyms.| 36 | |user_loc_county|String|The name of the county learned through resolving the user location toponyms.| 37 | |user_loc_city|String|The name of the city learned through resolving the user location toponyms.| 38 | |user_profile_description_toponyms|Dictionary array|Toponyms recognized and extracted from the user profile description field provided as an array of dictionary.| 39 | |user_profile_description_country_code|String|Two characters country code learned through resolving the recognized user profile description toponyms.| 40 | |user_profile_description_state|String|The name of the state/province learned through resolving the recognized user profile description toponyms.| 41 | |user_profile_description_county|String|The name of the county learned through resolving the recognized user profile description toponyms.| 42 | |user_profile_description_city|String|The name of the city learned through resolving the recognized user profile description toponyms.| 43 | |tweet_text_toponyms|Dictionary array|Toponyms recognized and extracted from the tweet full_text field in the dictionary array format.| 44 | |tweet_text_country_code|String|Two characters country code learned through resolving the recognized tweet text toponyms.| 45 | |tweet_text_state|String|The name of the state/province learned through resolving the recognized tweet text toponyms.| 46 | |tweet_text_county|String|The name of the county learned through resolving the recognized tweet text toponyms.| 47 | |tweet_text_city|String|The name of the city learned through resolving the recognized tweet text toponyms.| 48 | 49 | # Tweets hydration 50 | The tweets hydration process fetches full tweet content from Twitter using tweet-ids. To assist users with hydrating TBCOV tweets, this code reposity 51 | provides a tool written in the Java language that takes tweet-ids as input and retrieves full tweet content from Twitter APIs. More details and a usage guide of the Tweets hydrator are available [here](https://github.com/CrisisComputing/TBCOV/tree/main/tweets_hydrator). 52 | 53 | # Preprocessing 54 | Different types of preprocessing were applied on different attributes before using them for any analysis. The preprocessing is important to replicate results. The code reposity provides several scripts used to preprocess different fileds. The preprocessing scripts are avaialablel [here](https://github.com/CrisisComputing/TBCOV/tree/main/preprocessing). 55 | 56 | # Meta-data file 57 | The meta-data file provides a convenient and faster way to retrieve tweets from the base files. The meta-data file holds the start and the end tweet-id of all base files. So, given a tweet-id file (e.g., a language or a country), the provided script determines which base files to parse to retrieve tweets matching the ids instead of parsing all two billion tweets. 58 | 59 | [meta_file_monthly_ids_range.tsv](https://github.com/CrisisComputing/TBCOV/blob/main/meta_data/meta_file_monthly_ids_range.tsv) file lists range of tweet IDs (between Start_id and End_id) contained in the specific monthly base file as follows: 60 | |File_name|Start_id|End_id| 61 | |--- |--- |--- | 62 | |february_2020_f1.tsv|1223395535882768385|1231201257739649025| 63 | 64 | # Parsing using meta-file 65 | In the folder [parsers](https://github.com/CrisisComputing/TBCOV/tree/main/parsers), there are two scripts that are needed to extract the tweet details from base release files, given a specific language or country IDs file. 66 | 67 | * [meta_file_parser.py](https://github.com/CrisisComputing/TBCOV/blob/main/parsers/meta_file_parser.py) requires two arguments as input. 68 | 1. Country/Language IDs file 69 | 1. [meta_file_monthly_ids_range.tsv](https://github.com/CrisisComputing/TBCOV/blob/main/meta_data/meta_file_monthly_ids_range.tsv) 70 | 71 | Sample to run the script is as follows: 72 | 73 | `python meta_file_parser.py [IDs file] meta_data/meta_file_monthly_ids_range.tsv` 74 | 75 | And it creates an output file name `required_monthly_files.txt` 76 | The contents of the above file look something like this: 77 | ```bash 78 | february_2020_f3.tsv 79 | february_2020_f2.tsv 80 | february_2020_f1.tsv 81 | ``` 82 | 83 | Download the above files from [Crisis-NLP TBCov](https://crisisnlp.qcri.org/tbcov) 84 | It lists the monthly base files required to extract the full data from IDs files. Make sure to download the full monthly base zip file to get the individual files stated in the above `required_monthly_files.txt` file. 85 | 86 | Each line indicates which monthly base file is required for download so that it can be used to extract tweet details with the help of the next script. 87 | 88 | * [base_file_data_extractor.py](https://github.com/CrisisComputing/TBCOV/blob/main/parsers/base_file_data_extractor.py) requires three arguments as input. 89 | 1. `required_monthly_files.txt` which the output of the previous script 90 | 1. Country/Language IDs file 91 | 1. Base release files path (expects '/' at the end - for example - /home/downloads/) 92 | 93 | Sample to run the script is as follows: 94 | `python base_file_data_extractor.py required_monthly_files.txt test_for_meta_parsing.txt '/some/path/'` 95 | 96 | The output will be a .tsv file which will have the same formate as montly base files. 97 | -------------------------------------------------------------------------------- /tweets_hydrator/sample_tweet_ids.txt: -------------------------------------------------------------------------------- 1 | '503642976422096896' 2 | '503642976976113664' 3 | '503642977588494337' 4 | '503642979429388288' 5 | '503642980881022976' 6 | '503642982189252608' 7 | '503642983666032640' 8 | '503642984429395968' 9 | '503642985167609856' 10 | '503642985431830528' 11 | '503642988522647552' 12 | '503642992541171712' 13 | '503642993677443072' 14 | '503642995879849985' 15 | '503642996584513536' 16 | '503642998664884225' 17 | '503642998832242689' 18 | '503643000913006593' 19 | '503643001185636352' 20 | '503643001722118144' 21 | '503643002875969536' 22 | '503643003508883458' 23 | '503643004759195650' 24 | '503643009817128960' 25 | '503643010127499264' 26 | '503643011990183936' 27 | '503643013852463104' 28 | '503643016653860864' 29 | '503643018906570752' 30 | '503643019653181440' 31 | '503643027076694016' 32 | '503643028591218688' 33 | '503643029559734272' 34 | '503643031367467008' 35 | '503643033016233985' 36 | '503643033280462848' 37 | '503643033905410048' 38 | '503643033909202947' 39 | '503643036996227073' 40 | '503643039764860932' 41 | '503643040171700225' 42 | '503643042926977025' 43 | '503643044642848768' 44 | '503643044827394048' 45 | '503643045305516033' 46 | '503643045867188224' 47 | '503643050128592896' 48 | '503643051378483200' 49 | '503643054327484418' 50 | '503643056567255040' 51 | '503643056629768192' 52 | '503643062715703296' 53 | '503643062913212416' 54 | '503643063450083328' 55 | '503643063529398272' 56 | '503643073335685120' 57 | '503643074514272256' 58 | '503643077836546048' 59 | '503643080005013505' 60 | '503643081066184704' 61 | '503643081871089664' 62 | '503643093589958657' 63 | '503643095204782081' 64 | '503643096567926784' 65 | '503643096601858048' 66 | '503643097293914112' 67 | '503643098891575296' 68 | '503643100204773377' 69 | '503643104709066752' 70 | '503643105338216448' 71 | '503643105485004800' 72 | '503643109620588544' 73 | '503643111206027264' 74 | '503643112363671552' 75 | '503643113793933312' 76 | '503643117308764160' 77 | '503643117648486400' 78 | '503643124787609600' 79 | '503643126141947904' 80 | '503643126683009025' 81 | '503643127132221440' 82 | '503643127253450753' 83 | '503643128931565568' 84 | '503643128994086912' 85 | '503643130059829248' 86 | '503643130923466753' 87 | '503643131624292352' 88 | '503643136636506112' 89 | '503643137663725568' 90 | '503643137835675648' 91 | '503643138674528256' 92 | '503643138968133632' 93 | '503643143729082369' 94 | '503643144148099072' 95 | '503643144991567872' 96 | '503643146115223552' 97 | '503643149374205952' 98 | '503643149776875521' 99 | '503643151970869249' 100 | '503643154797854720' 101 | '503643156358115328' 102 | '503643156668096513' 103 | '503643156941111296' 104 | '503643158580690944' 105 | '503643158748880896' 106 | '503643159088218112' 107 | '503643159239622656' 108 | '503643162515349505' 109 | '503643163857526784' 110 | '503643164834803712' 111 | '503643166453825537' 112 | '503643168324071424' 113 | '503643168941015040' 114 | '503643169402003456' 115 | '503643169524047873' 116 | '503643171327602689' 117 | '503643171352739842' 118 | '503643171775983616' 119 | '503643175466983425' 120 | '503643175550861314' 121 | '503643176272265216' 122 | '503643180412436480' 123 | '503643180508909570' 124 | '503643180756393984' 125 | '503643181620015104' 126 | '503643183675219968' 127 | '503643187034853376' 128 | '503643187798228992' 129 | '503643187802406912' 130 | '503643187903488000' 131 | '503643190054764544' 132 | '503643190675505153' 133 | '503643190864257024' 134 | '503643191061401601' 135 | '503643191531167744' 136 | '503643192734916608' 137 | '503643194832060417' 138 | '503643195897810944' 139 | '503643196111327232' 140 | '503643196661174272' 141 | '503643196945997825' 142 | '503643197294514176' 143 | '503643197919469568' 144 | '503643197969424384' 145 | '503643198116225024' 146 | '503643202125955073' 147 | '503643202403196929' 148 | '503643204156006400' 149 | '503643204865236992' 150 | '503643205280468992' 151 | '503643205879861250' 152 | '503643206219624450' 153 | '503643207486283776' 154 | '503643208169967616' 155 | '503643209151807490' 156 | '503643209671925761' 157 | '503643209894211587' 158 | '503643210288496643' 159 | '503643210330435584' 160 | '503643210892464128' 161 | '503643212108795904' 162 | '503643212112994305' 163 | '503643213979070465' 164 | '503643214180392961' 165 | '503643214834724864' 166 | '503643214943764480' 167 | '503643214998282241' 168 | '503643215950389249' 169 | '503643216084602880' 170 | '503643216424357888' 171 | '503643216952844288' 172 | '503643217514868736' 173 | '503643218278240256' 174 | '503643218714439680' 175 | '503643219885031424' 176 | '503643220949991424' 177 | '503643221218824192' 178 | '503643222598356992' 179 | '503643227074080768' 180 | '503643227858030593' 181 | '503643229460262912' 182 | '503643232266620929' 183 | '503643233834893313' 184 | '503643234498015232' 185 | '503643235265576960' 186 | '503643236217270274' 187 | '503643236267982850' 188 | '503643237022564352' 189 | '503643238574456832' 190 | '503643238712872961' 191 | '503643239782420480' 192 | '503643240000540672' 193 | '503643244337451008' 194 | '503643245361242112' 195 | '503643250842796032' 196 | '503643253875286016' 197 | '503643256409030657' 198 | '503643259571167232' 199 | '503643260590759936' 200 | '503643261315985408' 201 | '503643262541111296' 202 | '503643263018872832' 203 | '503643264348467200' 204 | '503643266764374016' 205 | '503643266907402240' 206 | '503643271743037441' 207 | '503643274196684800' 208 | '503643274926518272' 209 | '503643275266228224' 210 | '503643275643719680' 211 | '503643276335779840' 212 | '503643276499353600' 213 | '503643276654559233' 214 | '503643278441324544' 215 | '503643278697189378' 216 | '503643279083061248' 217 | '503643280400453633' 218 | '503643280475582464' 219 | '503643283294142464' 220 | '503643284174929920' 221 | '503643285572055042' 222 | '503643287706931201' 223 | '503643287954001920' 224 | '503643288612913153' 225 | '503643289317552128' 226 | '503643290307403776' 227 | '503643291150458880' 228 | '503643291167248384' 229 | '503643291758260224' 230 | '503643291859292161' 231 | '503643292505210880' 232 | '503643293176332289' 233 | '503643293406609409' 234 | '503643293880963072' 235 | '503643294573006849' 236 | '503643295218958338' 237 | '503643296577880064' 238 | '503643297685200896' 239 | '503643298310148097' 240 | '503643298657861632' 241 | '503643298918334465' 242 | '503643299329359872' 243 | '503643299996241920' 244 | '503643300877058049' 245 | '503643301535551490' 246 | '503643302130753536' 247 | '503643302147915777' 248 | '503643302915489795' 249 | '503643303490109440' 250 | '503643304039575552' 251 | '503643304790360065' 252 | '503643305352372224' 253 | '503643306291896320' 254 | '503643306971398144' 255 | '503643307071639552' 256 | '503643307684421632' 257 | '503643308460347392' 258 | '503643309190156288' 259 | '503643309622181889' 260 | '503643309903183873' 261 | '503643310557503488' 262 | '503643311337639936' 263 | '503643312063258625' 264 | '503643314424655872' 265 | '503643315548323840' 266 | '503643317330903040' 267 | '503643317704212480' 268 | '503643317855207424' 269 | '503643318560239616' 270 | '503643318937739265' 271 | '503643319713673217' 272 | '503643320405725184' 273 | '503643321248395264' 274 | '503643321412378624' 275 | '503643321823420416' 276 | '503643323173969920' 277 | '503643323236904961' 278 | '503643323844685825' 279 | '503643324017020928' 280 | '503643325715730432' 281 | '503643325761851392' 282 | '503643326789062657' 283 | '503643326990778368' 284 | '503643327632519168' 285 | '503643328320385025' 286 | '503643329922215937' 287 | '503643330543357952' 288 | '503643330551775232' 289 | '503643331080257537' 290 | '503643331281555456' 291 | '503643331696803840' 292 | '503643332015554560' 293 | '503643332229480448' 294 | '503643333470982144' 295 | '503643334280482816' 296 | '503643334938988544' 297 | '503643335857569792' 298 | '503643337199718400' 299 | '503643338332180482' 300 | '503643339187830784' 301 | '503643339392966657' 302 | '503643340228014081' 303 | '503643341205299200' 304 | '503643341880565760' 305 | '503643341905752064' 306 | '503643342195138563' 307 | '503643342585233408' 308 | '503643343151448065' 309 | '503643343197577216' 310 | '503643344644239360' 311 | '503643344653012992' 312 | '503643344791031808' 313 | '503643344946601984' 314 | '503643345496047616' 315 | '503643346938523649' 316 | '503643347807129602' 317 | '503643349119549440' 318 | '503643349224804353' 319 | '503643349954613248' 320 | '503643350701191168' 321 | '503643352106299392' 322 | '503643353146064896' 323 | '503643353863294976' 324 | '503643354677407744' 325 | '503643355310743552' 326 | '503643355990220800' 327 | '503643356715835392' 328 | '503643357495963648' 329 | '503643357688500225' 330 | '503643358284509184' 331 | '503643358934605824' 332 | '503643359668604928' 333 | '503643359794434049' 334 | '503643360381652992' 335 | '503643360469725184' 336 | '503643360557412352' 337 | '503643361254047744' 338 | '503643362030026754' 339 | '503643362075742208' 340 | '503643362658750464' 341 | '503643363632226304' 342 | '503643364265193472' 343 | '503643364286550017' 344 | '503643364827598848' 345 | '503643365217693697' 346 | '503643365771341825' 347 | '503643365850628097' 348 | '503643366446608384' 349 | '503643366580424704' 350 | '503643368237580288' 351 | '503643448092942337' 352 | '503643449279938561' 353 | '503643449988751361' 354 | '503643450034491392' 355 | '503643450680803328' 356 | '503643451465162752' 357 | '503643451490324480' 358 | '503643452140453889' 359 | '503643452324581376' 360 | '503643452748615681' 361 | '503643453419311104' 362 | '503643453633613825' 363 | '503643455734956032' 364 | '503643456133419008' 365 | '503643456489943040' 366 | '503643456900972544' 367 | '503643457186168832' 368 | '503643458033438720' 369 | '503643458708717568' 370 | '503643459379789825' 371 | '503643460101230594' 372 | '503643460726173696' 373 | '503643460797485056' 374 | '503643461405663232' 375 | '503643461602775040' 376 | '503643461690466304' 377 | '503643462370328576' 378 | '503643463125311488' 379 | '503643464047681536' 380 | '503643464899518464' 381 | '503643465667076096' 382 | '503643465729581056' 383 | '503643465792507904' 384 | '503643465826058240' 385 | '503643466073915393' 386 | '503643466531102720' 387 | '503643467134664704' 388 | '503643467160248320' 389 | '503643467869085696' 390 | '503643468510822400' 391 | '503643469228019712' 392 | '503643469982605312' 393 | '503643470628937728' 394 | '503643471262285824' 395 | '503643471304216577' 396 | '503643472080171008' 397 | '503643472445071361' 398 | '503643472654393344' 399 | '503643473178673152' 400 | '503643473434906624' 401 | '503643474122788864' 402 | '503643474768703489' 403 | '503643475544645632' 404 | '503643476207337472' 405 | '503643476899405824' 406 | '503643477683752961' 407 | '503643478166102017' 408 | '503643479025909761' 409 | '503643479734755329' 410 | '503643480372314112' 411 | '503643481244725248' 412 | '503643481328611328' 413 | '503643482029047808' 414 | '503643482456457216' 415 | '503643482733682688' 416 | '503643483442532353' 417 | '503643484646277121' 418 | '503643485035978752' 419 | '503643485418037249' 420 | '503643485493559299' 421 | '503643486076538881' 422 | '503643486516559872' 423 | '503643486835712000' 424 | '503643486923403264' 425 | '503643486956957697' 426 | '503643487531978752' 427 | '503643488219852800' 428 | '503643489037737984' 429 | '503643489742385152' 430 | '503643489956290560' 431 | '503643490316607488' 432 | '503643490967109632' 433 | '503643491143282688' 434 | '503643491852111872' 435 | '503643493286567936' 436 | '503643493961838592' 437 | '503643494725197824' 438 | '503643494846836736' 439 | '503643495815720960' 440 | '503643497426341888' 441 | '503643497426350081' 442 | '503643497703153664' 443 | '503643499137609728' 444 | '503643499808718850' 445 | '503643500576272384' 446 | '503643501255720961' 447 | '503643501943615488' 448 | '503643502606311426' 449 | '503643502723735553' 450 | '503643503726178305' 451 | '503643504493731840' 452 | '503643505026011136' 453 | '503643505336782848' 454 | '503643506100150274' 455 | '503643506141708288' 456 | '503643507215839235' 457 | '503643507865968641' 458 | '503643508537049091' 459 | '503643508549623808' 460 | '503643509052932097' 461 | '503643509292023808' 462 | '503643509929558016' 463 | '503643510911021056' 464 | '503643511355617280' 465 | '503643512160923649' 466 | '503643512467103744' 467 | '503643512932667393' 468 | '503643514321002498' 469 | '503643514505527297' 470 | '503643515738673152' 471 | '503643516174868480' 472 | '503643516883709952' 473 | '503643517512851456' 474 | '503643517907111936' 475 | '503643518276206593' 476 | '503643518922145792' 477 | '503643519719071746' 478 | '503643521140928512' 479 | '503643521807831041' 480 | '503643522986045440' 481 | '503643523296821248' 482 | '503643524487577600' 483 | '503643524840308737' 484 | '503643525712736256' 485 | '503643526375407616' 486 | '503643527101034497' 487 | '503643527746949120' 488 | '503643528447016960' 489 | '503643528455790592' 490 | '503643528732221440' 491 | '503643529324036097' 492 | '503643529382748163' 493 | '503643529621823488' 494 | '503643529697320960' 495 | '503643532541046784' 496 | '503643533647949825' 497 | '503643535044653056' 498 | '503643535057227776' 499 | '503643535112163328' 500 | '503643536106196992' --------------------------------------------------------------------------------