├── .gitignore
├── figs
    ├── worldwide_normalized_tweets.pdf
    └── worldwide_normalized_tweets.png
├── tweets_hydrator
    ├── package
    │   └── tweets_hydrator.jar
    ├── twitter.properties
    ├── Readme.md
    ├── pom.xml
    ├── src
    │   └── main
    │   │   └── java
    │   │       └── qa
    │   │           └── qcri
    │   │               └── tweetsretrieval
    │   │                   ├── TweetsRetrievalTool.java
    │   │                   └── TwitterAPI.java
    └── sample_tweet_ids.txt
├── LICENSE
├── parsers
    ├── base_file_data_extractor.py
    └── meta_file_parser.py
├── preprocessing
    └── user_location_preprocessing.py
├── meta_data
    └── meta_file_monthly_ids_range.tsv
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | Tweets hydrator/.DS_Store
2 | Preprocessing scripts/.DS_Store
3 | .DS_Store


--------------------------------------------------------------------------------
/figs/worldwide_normalized_tweets.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrisisComputing/TBCOV/HEAD/figs/worldwide_normalized_tweets.pdf


--------------------------------------------------------------------------------
/figs/worldwide_normalized_tweets.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrisisComputing/TBCOV/HEAD/figs/worldwide_normalized_tweets.png


--------------------------------------------------------------------------------
/tweets_hydrator/package/tweets_hydrator.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrisisComputing/TBCOV/HEAD/tweets_hydrator/package/tweets_hydrator.jar


--------------------------------------------------------------------------------
/tweets_hydrator/twitter.properties:
--------------------------------------------------------------------------------
1 | consumer.key=XXXXXXXXXXXXXXXX
2 | consumer.secret=XXXXXXXXXXXXXXXX
3 | access.token=XXXXXXXXXXXXXXXX
4 | access.token.secret=XXXXXXXXXXXXXXXX
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 CrisisComputing
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tweets_hydrator/Readme.md:
--------------------------------------------------------------------------------
 1 | # Tweets Hydrator Usage Guide
 2 | 
 3 | ## Description
 4 | This Java-based program hydrates tweets from the Twiteer APIs. The tool makes 180 API calls per 15 minutes. Each API call downloads up to 100 tweets i.e. it can download up to 72,000 tweets per hour.
 5 | 
 6 | ## How to use
 7 | 
 8 | 1. Add tweets ids in a text file (one per line). A sample tweets-ids file is provided in the package.
 9 | 2. Make a Twitter app (if you don't have one) to get the following four tokens. Once obtained, add them into the `twitter.properties` file.
10 | 
11 | `consumer.key=XXXX`
12 | 
13 | `consumer.secret=XXXX`
14 | 
15 | `access.token=XXXX`
16 | 
17 | `access.token.secret=XXXX`
18 | 
19 | 3. Run the `tweets_hydrrator.jar` file from the package folder as shown in the following command. The command excepts two parameters. The first parameter is the file containing tweets-ids. And, the second parameter is the path and name of output file where the tool should store the downloaded tweets.
20 | 
21 | `java -classpath TweetsRetrieval-1.2-jar-with-dependencies.jar qa.qcri.tweetsretrieval.TweetsRetrievalTool sample_tweet_ids.txt output.txt`
22 | 
23 | ## Compilation
24 | In case of new changes, compilation can be done using this command.
25 | `mvn clean compile assembly:single`
26 | 


--------------------------------------------------------------------------------
/tweets_hydrator/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 3 |     <modelVersion>4.0.0</modelVersion>
 4 |     <groupId>qa.qcri</groupId>
 5 |     <artifactId>TweetsRetrieval</artifactId>
 6 |     <version>1.2</version>
 7 |     <packaging>jar</packaging>
 8 |     <properties>
 9 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
10 | 	<maven.compiler.source>1.8</maven.compiler.source>
11 | 	<maven.compiler.target>1.8</maven.compiler.target>
12 | 	</properties>
13 | 	<dependencies>
14 | 		<dependency>
15 | 			<groupId>oauth.signpost</groupId>
16 | 			<artifactId>signpost-core</artifactId>
17 | 			<version>1.2.1.2</version>
18 | 		</dependency>
19 | 		<dependency>
20 | 			<groupId>javax.json</groupId>
21 | 			<artifactId>javax.json-api</artifactId>
22 | 			<version>1.0</version>
23 | 		</dependency>
24 | 		<dependency>
25 | 			<groupId>org.glassfish</groupId>
26 | 			<artifactId>javax.json</artifactId>
27 | 			<version>1.0.4</version>
28 | 			<scope>runtime</scope>
29 | 		</dependency>
30 | 	</dependencies>
31 |         <build>
32 |   <plugins>
33 |     <plugin>
34 |       <artifactId>maven-assembly-plugin</artifactId>
35 |       <configuration>
36 |         <archive>
37 |           <manifest>
38 |             <mainClass>qa.qcri.tweetsretrieval.TweetsRetrievalTool</mainClass>
39 |           </manifest>
40 |         </archive>
41 |         <descriptorRefs>
42 |           <descriptorRef>jar-with-dependencies</descriptorRef>
43 |         </descriptorRefs>
44 |       </configuration>
45 |     </plugin>
46 |   </plugins>
47 | </build>
48 | </project>


--------------------------------------------------------------------------------
/tweets_hydrator/src/main/java/qa/qcri/tweetsretrieval/TweetsRetrievalTool.java:
--------------------------------------------------------------------------------
 1 | package qa.qcri.tweetsretrieval;
 2 | 
 3 | import java.io.File;
 4 | import java.io.FileInputStream;
 5 | import java.io.FileWriter;
 6 | import java.io.IOError;
 7 | import java.io.IOException;
 8 | import java.io.Writer;
 9 | import java.nio.file.Files;
10 | import java.util.List;
11 | import java.util.Map;
12 | import java.util.Properties;
13 | import java.util.stream.Collectors;
14 | 
15 | import javax.json.JsonArray;
16 | 
17 | public class TweetsRetrievalTool {
18 | 	
19 | 	private static final String NEWLINE = System.getProperty("line.separator");
20 | 
21 | 	public static void main(String[] args) throws IOException {
22 | 		if (args.length<2) {
23 | 			System.err.println("This app needs two parameters: source and destination files.");
24 | 			return;
25 | 		}
26 | 		
27 | 		Properties p = new Properties();
28 | 		p.load(new FileInputStream("twitter.properties"));
29 | 		
30 | 		TwitterAPI twitter = new TwitterAPI(p);
31 | 		
32 | 		List<String> lines = Files.readAllLines(new File(args[0]).toPath());
33 | 		int[] r = new int[]{0};
34 | 		Map<Object, List<String>> groups = lines.stream()
35 | 				.collect(Collectors.groupingBy(x -> r[0]++ / 100));
36 | 		
37 | 		try(Writer dest = new FileWriter(new File(args[1]))) {
38 | 			groups.forEach((key, value) -> {
39 | 				List<String> ids = value.stream()
40 | 						.map(s -> s.replace("'", ""))
41 | 						.collect(Collectors.toList());
42 | 				try {
43 | 					JsonArray tweets = 
44 | 							(JsonArray) twitter.getStatusesLookup(String.join(",", ids));
45 | 					tweets.forEach(t -> {
46 | 						try { 
47 | 							dest.write(t.toString());
48 | 							dest.write(NEWLINE);
49 | 						} catch (IOException e) {
50 | 							throw new IOError(e);
51 | 						}
52 | 					});
53 | 					//System.out.println(key+" "+tweets.size()+"/"+ids.size());
54 | 				} catch (Exception e) {
55 | 					throw new IOError(e);
56 | 				}
57 | 			});
58 | 		}
59 | 	}
60 | 	
61 | }
62 | 


--------------------------------------------------------------------------------
/parsers/base_file_data_extractor.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | #Sample output run
 4 | #python base_file_data_extractor.py required_monthly_files.txt test_for_meta_parsing.txt '/some/path'
 5 | 
 6 | 
 7 | #Pass the output of the previous script to this script
 8 | meta_parser_output = open(sys.argv[1])
 9 | 
10 | 
11 | 
12 | #Pass the IDs file through which you want to extract all the tweets/full details
13 | ids_file = open(sys.argv[2])
14 | 
15 | #base release files path #expects '/' at the end
16 | base_release_file_path = sys.argv[3]
17 | 
18 | #output_file
19 | output_file = open(sys.argv[2].replace('.txt', '') + "_detailed.tsv", "w+")
20 | 
21 | 
22 | #This function is for inserting a value in a dictionary
23 | def insert_in_dict(dictionary, key, value):
24 |     if key in dictionary:
25 |         dictionary[key] = value
26 |     else:
27 |         if key != None:
28 |             dictionary[key] = value
29 | 
30 |     return dictionary
31 | 
32 | 
33 | ids_dict = {}
34 | 
35 | for line in ids_file:
36 |     insert_in_dict(ids_dict, line.strip(), True)
37 | 
38 | ids_file.close()
39 | 
40 | output_file.write("tweet_id date_time   lang    user_id retweeted_id    quoted_id   in_reply_to_id  sentiment_conf  sentiment_label user_type   gender_label    tweet_text_named_entities   geo_coordinates_lat_lon geo_country_code    geo_state   geo_county  geo_city    place_bounding_box  place_country_code  place_state place_county    place_city  user_loc_toponym    user_loc_country_code   user_loc_state  user_loc_county user_loc_city   User_profile_description_toponyms   user_profile_description_country_code   user_profile_description_state  user_profile_description_county user_profile_description_city   tweet_text_toponyms tweet_text_country_code tweet_text_state    tweet_text_county   tweet_text_city")
41 | output_file.write('\n')
42 | print('\n')
43 | print("Parsing stated... wait for the data extraction....")
44 | 
45 | #Extract the required full items
46 | for key in meta_parser_output:
47 | 
48 |     file_to_be_read = open(base_release_file_path + key.strip())
49 | 
50 | 
51 |     for full_info_line in file_to_be_read:
52 | 
53 |         data = full_info_line.split('\t')
54 | 
55 |         if(ids_dict.get(data[0]) != None):
56 |             output_file.write(full_info_line)
57 | 
58 |     print('\n')
59 |     print(key.strip() + " file completely parsed *******")
60 | 
61 | output_file.close()
62 | 
63 | meta_parser_output.close()
64 | 
65 | 


--------------------------------------------------------------------------------
/parsers/meta_file_parser.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from tqdm import tqdm
 3 | 
 4 | #Sample output run
 5 | #python meta_file_parser.py test_for_meta_parsing.txt meta_data/meta_file_monthly_ids_range.tsv
 6 | 
 7 | 
 8 | #Pass the country/lang file that you want to extract
 9 | opened_file = open(sys.argv[1])
10 | 
11 | #Pass the meta-file as provided in the repo
12 | meta_file_opened = open(sys.argv[2])
13 | 
14 | meta_file_dict = {}
15 | 
16 | output_file = open(str(sys.argv[1].split('.')[0]) + "_required_monthly_files.txt", "w+")
17 | 
18 | 
19 | #This function is for inserting a value in a dictionary
20 | def insert_in_dict(dictionary, key, value):
21 |     #check key
22 |     # key
23 |     # value
24 |     if key in dictionary:
25 |         dictionary[key] = value
26 |     else:
27 |         if key != None:
28 |             dictionary[key] = value
29 | 
30 |     return dictionary
31 | 
32 | 
33 | #start from the second line
34 | next(meta_file_opened)
35 | 
36 | #Loading the meta-file in a dictionary
37 | for meta_line in meta_file_opened:
38 | 
39 | 	meta_array = meta_line.strip().split('\t')
40 | 
41 | 	#print(meta_array)
42 | 	
43 | 	insert_in_dict(meta_file_dict, meta_array[0], [int(meta_array[1]), int(meta_array[2])])
44 | 
45 | meta_file_opened.close()
46 | 
47 | print("\n")
48 | print("File 'required_monthly_files.txt' is being generated, please wait...")
49 | print("\n")
50 | 
51 | #Which files need to be used
52 | files_to_be_downloaded = set()
53 | 
54 | num_lines = sum(1 for line in open(sys.argv[1],'r'))
55 | 
56 | # num_lines = sum(1 for line in opened_file)
57 | #To check which files are needed to be downloaded, run this part.
58 | # with Bar('Processing...') as bar:
59 | # for i in tqdm(range(100)):
60 | for line in tqdm(opened_file, total=num_lines):
61 | 
62 | 	tweet_id = int(line.strip())
63 | 
64 | 	for key in meta_file_dict:
65 | 		start_id = meta_file_dict[key][0]
66 | 		end_id = meta_file_dict[key][1]
67 | 
68 | 		if(tweet_id >= start_id and tweet_id <= end_id):
69 | 			#files_to_be_downloaded.add(key.split('_')[0].capitalize()  + " "+ key.split('_')[1])
70 | 			files_to_be_downloaded.add(key)
71 | 		# bar.next()
72 | 
73 | #list of all files
74 | list_files = []
75 | 
76 | #output file names and then take input from reader
77 | for value in files_to_be_downloaded:
78 | 	list_files.append(value)
79 | 
80 | list_files.sort()
81 | 
82 | for i in range(0, len(list_files)):
83 | 	#output in a text file
84 | 	output_file.write(list_files[i] + '\n')
85 | 
86 | opened_file.close()
87 | output_file.close()
88 | 
89 | 
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/tweets_hydrator/src/main/java/qa/qcri/tweetsretrieval/TwitterAPI.java:
--------------------------------------------------------------------------------
 1 | package qa.qcri.tweetsretrieval;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.InputStream;
 5 | import java.net.HttpURLConnection;
 6 | import java.net.URL;
 7 | import java.util.Properties;
 8 | import java.util.logging.Logger;
 9 | import java.util.zip.GZIPInputStream;
10 | 
11 | import javax.json.Json;
12 | import javax.json.JsonArray;
13 | import javax.json.JsonReader;
14 | 
15 | import oauth.signpost.OAuthConsumer;
16 | import oauth.signpost.basic.DefaultOAuthConsumer;
17 | import oauth.signpost.exception.OAuthException;
18 | 
19 | public class TwitterAPI {
20 | 	
21 | 	private static final Logger log = Logger.getLogger(TwitterAPI.class.getName());
22 | 	private static final String BASE = "https://api.twitter.com";
23 | 
24 | 	private OAuthConsumer consumer;
25 | 	
26 | 	public TwitterAPI(Properties p) {
27 | 		consumer = new DefaultOAuthConsumer(p.getProperty("consumer.key"), p.getProperty("consumer.secret"));
28 | 		consumer.setTokenWithSecret(p.getProperty("access.token"), p.getProperty("access.token.secret"));
29 | 	}
30 | 	
31 | 	protected Object call(String url) throws IOException, InterruptedException, OAuthException {
32 | 		URL url2 = new URL(consumer.sign(url));
33 | 		HttpURLConnection conn = (HttpURLConnection) url2.openConnection();
34 | 		conn.setRequestProperty("Accept-Encoding", "gzip");
35 | 		int rc = conn.getResponseCode();
36 | 		InputStream stream = null;
37 | 		switch (rc) {
38 | 		case HttpURLConnection.HTTP_OK:
39 | 			stream = conn.getInputStream();
40 | 			break;
41 | 		case HttpURLConnection.HTTP_FORBIDDEN:
42 | 			stream = conn.getErrorStream();
43 | 			break;
44 | 		case 429: // https://dev.twitter.com/rest/public/rate-limiting
45 | 			long reset = 1000 * conn.getHeaderFieldLong("X-Rate-Limit-Reset", 0);
46 | 			long millis = reset - System.currentTimeMillis();
47 | 			if (millis < 60000) {
48 | 				log.info(String.format("waiting for %d sec (API call rate limit exceeded)", millis/1_000));
49 | 			} else {
50 | 				log.info(String.format("waiting for %d min (API call rate limit exceeded)", millis/60_000));
51 | 			}
52 | 			Thread.sleep(millis+1000);
53 | 			return call(url);
54 | 		default:
55 | 			throw new IOException(conn.getResponseMessage());
56 | 		}
57 | 		if ("gzip".equals(conn.getHeaderField("Content-Encoding")))
58 | 			stream = new GZIPInputStream(stream);
59 | 		
60 | 		int limit = conn.getHeaderFieldInt("X-Rate-Limit-Limit", 0);
61 | 		int remaining = conn.getHeaderFieldInt("X-Rate-Limit-Remaining", 0);
62 | 		long reset = conn.getHeaderFieldLong("X-Rate-Limit-Reset", 0);
63 | 		log.info(String.format("%d/%d(%d)", limit, remaining, reset));
64 | 		
65 | 		JsonReader in = Json.createReader(stream);
66 | 		return in.read();
67 | 	}
68 | 
69 | 	public JsonArray getStatusesLookup(String jobid) throws IOException, InterruptedException, OAuthException {
70 | 		String endpoint = BASE+"/1.1/statuses/lookup.json?id="+jobid;
71 | 		return (JsonArray) call(endpoint);
72 | 	}
73 | 
74 | }
75 | 


--------------------------------------------------------------------------------
/preprocessing/user_location_preprocessing.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.7.0
 2 | 
 3 | # -*- coding: utf-8 -*-
 4 | from cleantext import clean
 5 | import re
 6 | import emoji
 7 | import unicodedata
 8 | import sys
 9 | 
10 | PUNCT_TRANSLATE_UNICODE = dict.fromkeys(
11 |     (i for i in range(sys.maxunicode) if unicodedata.category(chr(i)).startswith("P")),
12 |     " ",
13 | )
14 | 
15 | def preprocess_user_location(text):
16 | 
17 |     if(text == None):
18 |         return None
19 |     text = text.strip()
20 |     # remove URLs
21 |     text = re.sub('((www\.[^\s]+)|(https?://[^\s]+)|(http?://[^\s]+))', ' ', text)
22 |     text = re.sub(r'http\S+', ' ', text)
23 |     # remove usernames
24 |     text = re.sub('@[^\s]+', ' ', text)
25 |     # remove the # in #hashtag
26 |     text = re.sub(r'#([^\s]+)', r'\1', text)
27 | 
28 |     #remove emojies
29 |     text = emoji.get_emoji_regexp().sub(u'', text)
30 | 
31 |     text = text.replace('\n',' ')
32 |     text = text.replace('\t',' ')
33 |     text = text.replace('\r',' ')
34 |     text = text.replace('"',' ')
35 |     text = text.replace('~',' ')
36 |     text = text.replace('|',' ')
37 |     text = text.replace(',','<<<<comma>>>>')
38 | 
39 | 
40 |     preprocessed_text = clean(text,
41 |         fix_unicode=True,               # fix various unicode errors
42 |         to_ascii=True,                  # transliterate to closest ASCII representation
43 |         lower=False,                     # lowercase text
44 |         no_line_breaks=True,           # fully strip line breaks as opposed to only normalizing them
45 |         no_urls=True,                  # replace all URLs with a special token
46 |         no_emails=True,                # replace all email addresses with a special token
47 |         no_phone_numbers=True,         # replace all phone numbers with a special token
48 |         no_numbers=False,               # replace all numbers with a special token
49 |         no_digits=False,                # replace all digits with a special token
50 |         no_currency_symbols=True,      # replace all currency symbols with a special token
51 |         no_punct=True,                 # fully remove punctuation
52 |         replace_with_url="",
53 |         replace_with_email="",
54 |         replace_with_phone_number="",
55 |         replace_with_currency_symbol="",
56 |         # replace_with_punct=" ",
57 |         # lang="en"                       # set to 'de' for German special handling
58 |     )
59 | 
60 |     #accent remove
61 |     text = text.translate(PUNCT_TRANSLATE_UNICODE)
62 | 
63 |     text = text.replace('<<<<comma>>>>',',')
64 | 
65 |     #remove extra spaces
66 |     text = re.sub(' +', ' ', text)
67 | 
68 | 
69 |     #print("Final: "+ text)
70 | 
71 |     # to avoid cases like ', '
72 |     #to avoid cases like ' ', 'a', ',' etc
73 |     #to avoid cases like ' E',
74 |     if(len(text.replace(',','')) > 1 and len(text.replace(',','').replace(' ','')) > 1 and len(text.replace(' ','')) > 1 and len(text) > 1 and text != '' and text != ' '):
75 |         return text
76 |     else:
77 |         return None
78 | 
79 | #some tests
80 | print(preprocess_user_location('Doha, Qatar    ####'))
81 | #Converts it to Doha, Qatar
82 | 
83 | print(preprocess_user_location(' USA     '))
84 | #Converts it to USA
85 | 


--------------------------------------------------------------------------------
/meta_data/meta_file_monthly_ids_range.tsv:
--------------------------------------------------------------------------------
  1 | File_name	Start_id	End_id
  2 | february_2020_f1.tsv	1223395535882768385	1231201257739649025
  3 | february_2020_f2.tsv	1231201258092011523	1233487061354762240
  4 | february_2020_f3.tsv	1233487061400813568	1233904783833976833
  5 | march_2020_f1.tsv	1233904784010358785	1235614779965927425
  6 | march_2020_f10.tsv	1244321291236245511	1245075687599464448
  7 | march_2020_f11.tsv	1245075687767035905	1245138807797710849
  8 | march_2020_f2.tsv	1235614780003684352	1237088322662645760
  9 | march_2020_f3.tsv	1237088322696159232	1238201450951565313
 10 | march_2020_f4.tsv	1238201451123531778	1239190893497901056
 11 | march_2020_f5.tsv	1239190893506199552	1240091884468555776
 12 | march_2020_f6.tsv	1240091884472565761	1241643047554887681
 13 | march_2020_f7.tsv	1241643047559061506	1242485419352002573
 14 | march_2020_f8.tsv	1242485419457097734	1243478992633491457
 15 | march_2020_f9.tsv	1243478992641982467	1244321291227877382
 16 | april_2020_f1.tsv	1245138807957213188	1245885018100006913
 17 | april_2020_f10.tsv	1252230722540060672	1253025036719599616
 18 | april_2020_f11.tsv	1253025036736385030	1253833370259333120
 19 | april_2020_f12.tsv	1253833370284380161	1254690078980362241
 20 | april_2020_f13.tsv	1254690079030812672	1255591483429793802
 21 | april_2020_f14.tsv	1255591483450802176	1256010443749015554
 22 | april_2020_f2.tsv	1245885018150277120	1246767531500670977
 23 | april_2020_f3.tsv	1246767531504828416	1247560749041496065
 24 | april_2020_f4.tsv	1247560749066698753	1248282920172863488
 25 | april_2020_f5.tsv	1248282920185282560	1249049719042527232
 26 | april_2020_f6.tsv	1249049719046901760	1249852958428278787
 27 | april_2020_f7.tsv	1249852958508023809	1250639237420122113
 28 | april_2020_f8.tsv	1250639237441032192	1251425458081628163
 29 | april_2020_f9.tsv	1251425458094129152	1252230722485579778
 30 | may_2020_f1.tsv	1256010443790811137	1257060910776102915
 31 | may_2020_f10.tsv	1265592235187453959	1267121083511169024
 32 | may_2020_f11.tsv	1267121083553021952	1267244467603550212
 33 | may_2020_f2.tsv	1257060910792916992	1258065104886730752
 34 | may_2020_f3.tsv	1258065104941322240	1259114424977195008
 35 | may_2020_f4.tsv	1259114425010667520	1260172229393780736
 36 | may_2020_f5.tsv	1260172229452726272	1261204076987121666
 37 | may_2020_f6.tsv	1261204076991254528	1262280725505560577
 38 | may_2020_f7.tsv	1262280725518258176	1263351336470380546
 39 | may_2020_f8.tsv	1263351336516681728	1264481336284327937
 40 | may_2020_f9.tsv	1264481336401825798	1265592235166498816
 41 | june_2020_f2.tsv	1268678185518010368	1270188480736325632
 42 | june_2020_f3.tsv	1270188480770068480	1271734074185527296
 43 | june_2020_f4.tsv	1271734074269466624	1273249326085660675
 44 | june_2020_f5.tsv	1273249326135984133	1274813759215681536
 45 | june_2020_f6.tsv	1274813759488430086	1276368684643016705
 46 | june_2020_f7.tsv	1276368684680753152	1278116103579893760
 47 | july_2020_f1.tsv	1278116103714193408	1279672350317916162
 48 | july_2020_f2.tsv	1279672350322184193	1281127821038030852
 49 | july_2020_f3.tsv	1281127821105135617	1282607432242466816
 50 | july_2020_f4.tsv	1282607432259252224	1283761237512163329
 51 | july_2020_f5.tsv	1283761237596282880	1285121873857183745
 52 | july_2020_f6.tsv	1285121873886375936	1286598075998449664
 53 | july_2020_f7.tsv	1286598076128468994	1288118797161783304
 54 | july_2020_f8.tsv	1288118797325357061	1289350127384059906
 55 | june_2020_f1.tsv	1267244467695824897	1268678185484419073
 56 | august_2020_f1.tsv	1289350127459606528	1290914738906554368
 57 | august_2020_f2.tsv	1290914738944086016	1292487386212048896
 58 | august_2020_f3.tsv	1292487386375622656	1294236478365741056
 59 | august_2020_f4.tsv	1294236478394920960	1295945048639635457
 60 | august_2020_f5.tsv	1295945048782249985	1297582288671330310
 61 | august_2020_f6.tsv	1297582288792936448	1299160074728865792
 62 | august_2020_f7.tsv	1299160074741374977	1300584151045885956
 63 | september_2020_f1.tsv	1300584151356186629	1302269940070780928
 64 | september_2020_f2.tsv	1302269940104417285	1303958352255483904
 65 | september_2020_f3.tsv	1303958352289030149	1305661526779596800
 66 | september_2020_f4.tsv	1305661526867611648	1310597145033158658
 67 | september_2020_f5.tsv	1310597145075101697	1311455787064217603
 68 | october_2020_f1.tsv	1311455787303268354	1312899927576436737
 69 | october_2020_f2.tsv	1312899927580848134	1314318111148068866
 70 | october_2020_f3.tsv	1314318111202586624	1315998364996120577
 71 | october_2020_f4.tsv	1315998365000359936	1317864059375529984
 72 | october_2020_f5.tsv	1317864059409031170	1319666221038567425
 73 | october_2020_f6.tsv	1319666221055234048	1321479885110546433
 74 | october_2020_f7.tsv	1321479885139968000	1322689810998480898
 75 | november_2020_f1.tsv	1322689811350724611	1324795676618858501
 76 | november_2020_f2.tsv	1324795676811821057	1326866251117965317
 77 | november_2020_f3.tsv	1326866251222806528	1328700302992826368
 78 | november_2020_f4.tsv	1328700302997024771	1330567656244609024
 79 | november_2020_f5.tsv	1330567656299253762	1332444617116057600
 80 | november_2020_f6.tsv	1332444617225211907	1333561446932865024
 81 | december_2020_f1.tsv	1333561447054499842	1335345359833337856
 82 | december_2020_f2.tsv	1335345359959298055	1337051156506435586
 83 | december_2020_f3.tsv	1337051156598693898	1338884363908476929
 84 | december_2020_f4.tsv	1338884363967229955	1340433130780540928
 85 | december_2020_f5.tsv	1340433131057233921	1341804754494697474
 86 | december_2020_f6.tsv	1341804754561691650	1343612648198438913
 87 | december_2020_f7.tsv	1343612648227745805	1344795470808420352
 88 | january_2021_f1.tsv	1344795470900715522	1346349629365837824
 89 | january_2021_f2.tsv	1346349629437341696	1347920318686441472
 90 | january_2021_f3.tsv	1347920318761992193	1349513280482631683
 91 | january_2021_f4.tsv	1349513280688046081	1350963889895256064
 92 | january_2021_f5.tsv	1350963890079862786	1352429045670162433
 93 | january_2021_f6.tsv	1352429045716115457	1353877599127134209
 94 | january_2021_f7.tsv	1353877599165022209	1355348167848566790
 95 | january_2021_f8.tsv	1355348167861137410	1356029494600065025
 96 | february_2021_f1.tsv	1356029494696534021	1357596728745201664
 97 | february_2021_f2.tsv	1357596728778772480	1359330790413369355
 98 | february_2021_f3.tsv	1359330790547591169	1361176857119350784
 99 | february_2021_f4.tsv	1361176857123692545	1362859197721956353
100 | february_2021_f5.tsv	1362859197784678400	1364685866174533632
101 | february_2021_f6.tsv	1364685866191495177	1366176354841018368
102 | march_2021_f1.tsv	1366176354941730818	1367814444961722368
103 | march_2021_f2.tsv	1367814445112692745	1369646816979947523
104 | march_2021_f3.tsv	1369646817038774273	1371280166681477121
105 | march_2021_f4.tsv	1371280166853431304	1372929918892380169
106 | march_2021_f5.tsv	1372929918976233485	1374584034257342464
107 | march_2021_f6.tsv	1374584034316099587	1376276403436580864
108 | march_2021_f7.tsv	1376276403457511431	1377410378527744003


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TBCOV: Two Billion Multilingual COVID-19 Tweets with Sentiment, Entity, Geo, and Gender Labels
 2 | Welcome to the code repository for the TBCOV dataset. This code repository offers several scripts helpful to hydrate and process the shared dataset.
 3 | 
 4 | The TBCOV dataset comprises more than two billion tweets from 218 countries worldwide. The following map shows worldwide tweets normalized by the total population from each country per 100,000 persons.
 5 | 
 6 | ![Tweets normalized by countries population](figs/worldwide_normalized_tweets.png)
 7 | 
 8 | # Data descriptor for base release files
 9 | |Attribute|Type|Description|
10 | |--- |--- |--- |
11 | |tweet_id|Int64|The integer representation of the unique identifier a tweet. This number is greater than 53 bits and some programming languages may have difficulty/silent defects in interpreting it.|
12 | |date_time|String|UTC time when the tweet was created.|
13 | |lang|String|ISO-6391 Alpha-2 language code consisting of two characters.|
14 | |user_id|String|Represents the id of the author of the tweet.|
15 | |retweeted_id|Int64|If the tweet is a retweet, the retweeted_id represents the id of the parent tweet.|
16 | |quoted_id|Int64|If the tweet is a quoted tweet, the quoted_id represents the id of the parent tweet.|
17 | |in_reply_to_id|Int64|If the tweet is a reply to an existing tweet, the in_reply_to_id represents the id of the parent/original tweet.|
18 | |sentiment_label|Int64|Represents the sentiment label values: -1 (negative), 0 (neutral), 1 (positive).|
19 | |sentiment_conf|Float|Represents the confidence score of the sentiment classifier for a given sentiment label to a tweet.|
20 | |user_type|String|The user types represents the identified type of the user such as person, organization, location, etc.|
21 | |gender_label|String|One character code representing the identified gender of the users. F represents "female" and M represents "male" user types.|
22 | |tweet_text_named_entities|Dictionary array|Named-entities (persons, organizations, locations, etc.) extracted from tweet text are provided in this attribute in the array of dictionary format.|
23 | |geo_coordinates_lat_lon|Float|GPS coordinates in the latitude, longitude format retrieved from the user's GPS-enabled device.|
24 | |geo_country_code|String|Two characters country code learned through resolving the GPS coordinates (latitude, longitude).|
25 | |geo_state|String|The name of the state/province learned through resolving the GPS coordinates (latitude, longitude).|
26 | |geo_county|String|The name of the county learned through resolving the GPS coordinates (latitude, longitude).|
27 | |geo_city|String|The name of the city learned through resolving the GPS coordinates (latitude, longitude).|
28 | |place_bounding_box|Float|Twitter provided bounding boxes representing place tags.|
29 | |place_country_code|String|Two characters country code learned through resolving the place bounding boxes.|
30 | |place_state|String|The name of the state/province learned through resolving the place bounding boxes.|
31 | |place_county|String|The name of the county learned through resolving the place bounding boxes.|
32 | |place_city|String|The name of the city learned through resolving the place bounding boxes.|
33 | |user_loc_toponyms|Dictionary array|Toponyms recognized and extracted from the user location field provided as an array of dictionary.|
34 | |user_loc_country_code|String|Two characters country code learned through resolving the user location toponyms.|
35 | |user_loc_state|String|The name of the state/province learned through resolving the user location toponyms.|
36 | |user_loc_county|String|The name of the county learned through resolving the user location toponyms.|
37 | |user_loc_city|String|The name of the city learned through resolving the user location toponyms.|
38 | |user_profile_description_toponyms|Dictionary array|Toponyms recognized and extracted from the user profile description field provided as an array of dictionary.|
39 | |user_profile_description_country_code|String|Two characters country code learned through resolving the recognized user profile description toponyms.|
40 | |user_profile_description_state|String|The name of the state/province learned through resolving the recognized user profile description toponyms.|
41 | |user_profile_description_county|String|The name of the county learned through resolving the recognized user profile description toponyms.|
42 | |user_profile_description_city|String|The name of the city learned through resolving the recognized user profile description toponyms.|
43 | |tweet_text_toponyms|Dictionary array|Toponyms recognized and extracted from the tweet full_text field in the dictionary array format.|
44 | |tweet_text_country_code|String|Two characters country code learned through resolving the recognized tweet text toponyms.|
45 | |tweet_text_state|String|The name of the state/province learned through resolving the recognized tweet text toponyms.|
46 | |tweet_text_county|String|The name of the county learned through resolving the recognized tweet text toponyms.|
47 | |tweet_text_city|String|The name of the city learned through resolving the recognized tweet text toponyms.|
48 | 
49 | # Tweets hydration
50 | The tweets hydration process fetches full tweet content from Twitter using tweet-ids. To assist users with hydrating TBCOV tweets, this code reposity
51 | provides a tool written in the Java language that takes tweet-ids as input and retrieves full tweet content from Twitter APIs. More details and a usage guide of the Tweets hydrator are available [here](https://github.com/CrisisComputing/TBCOV/tree/main/tweets_hydrator).
52 | 
53 | # Preprocessing
54 | Different types of preprocessing were applied on different attributes before using them for any analysis. The preprocessing is important to replicate results. The code reposity provides several scripts used to preprocess different fileds. The preprocessing scripts are avaialablel [here](https://github.com/CrisisComputing/TBCOV/tree/main/preprocessing).
55 | 
56 | # Meta-data file
57 | The meta-data file provides a convenient and faster way to retrieve tweets from the base files. The meta-data file holds the start and the end tweet-id of all base files. So, given a tweet-id file (e.g., a language or a country), the provided script determines which base files to parse to retrieve tweets matching the ids instead of parsing all two billion tweets.
58 | 
59 | [meta_file_monthly_ids_range.tsv](https://github.com/CrisisComputing/TBCOV/blob/main/meta_data/meta_file_monthly_ids_range.tsv) file lists range of tweet IDs (between Start_id and End_id) contained in the specific monthly base file as follows:
60 | |File_name|Start_id|End_id|
61 | |--- |--- |--- |
62 | |february_2020_f1.tsv|1223395535882768385|1231201257739649025|
63 | 
64 | # Parsing using meta-file
65 | In the folder [parsers](https://github.com/CrisisComputing/TBCOV/tree/main/parsers), there are two scripts that are needed to extract the tweet details from base release files, given a specific language or country IDs file.
66 | 
67 | * [meta_file_parser.py](https://github.com/CrisisComputing/TBCOV/blob/main/parsers/meta_file_parser.py) requires two arguments as input.
68 | 1. Country/Language IDs file
69 | 1. [meta_file_monthly_ids_range.tsv](https://github.com/CrisisComputing/TBCOV/blob/main/meta_data/meta_file_monthly_ids_range.tsv)
70 | 
71 | Sample to run the script is as follows:
72 | 
73 | `python meta_file_parser.py [IDs file] meta_data/meta_file_monthly_ids_range.tsv`
74 | 
75 | And it creates an output file name `required_monthly_files.txt`
76 | The contents of the above file look something like this:
77 | ```bash
78 | february_2020_f3.tsv
79 | february_2020_f2.tsv
80 | february_2020_f1.tsv
81 | ```
82 | 
83 | Download the above files from [Crisis-NLP TBCov](https://crisisnlp.qcri.org/tbcov)
84 | It lists the monthly base files required to extract the full data from IDs files. Make sure to download the full monthly base zip file to get the individual files stated in the above `required_monthly_files.txt` file.
85 | 
86 | Each line indicates which monthly base file is required for download so that it can be used to extract tweet details with the help of the next script.
87 | 
88 | * [base_file_data_extractor.py](https://github.com/CrisisComputing/TBCOV/blob/main/parsers/base_file_data_extractor.py) requires three arguments as input.
89 | 1. `required_monthly_files.txt` which the output of the previous script
90 | 1. Country/Language IDs file
91 | 1. Base release files path  (expects '/' at the end - for example - /home/downloads/)
92 | 
93 | Sample to run the script is as follows:
94 | `python base_file_data_extractor.py required_monthly_files.txt test_for_meta_parsing.txt '/some/path/'`
95 | 
96 | The output will be a .tsv file which will have the same formate as montly base files.
97 | 


--------------------------------------------------------------------------------
/tweets_hydrator/sample_tweet_ids.txt:
--------------------------------------------------------------------------------
  1 | '503642976422096896'
  2 | '503642976976113664'
  3 | '503642977588494337'
  4 | '503642979429388288'
  5 | '503642980881022976'
  6 | '503642982189252608'
  7 | '503642983666032640'
  8 | '503642984429395968'
  9 | '503642985167609856'
 10 | '503642985431830528'
 11 | '503642988522647552'
 12 | '503642992541171712'
 13 | '503642993677443072'
 14 | '503642995879849985'
 15 | '503642996584513536'
 16 | '503642998664884225'
 17 | '503642998832242689'
 18 | '503643000913006593'
 19 | '503643001185636352'
 20 | '503643001722118144'
 21 | '503643002875969536'
 22 | '503643003508883458'
 23 | '503643004759195650'
 24 | '503643009817128960'
 25 | '503643010127499264'
 26 | '503643011990183936'
 27 | '503643013852463104'
 28 | '503643016653860864'
 29 | '503643018906570752'
 30 | '503643019653181440'
 31 | '503643027076694016'
 32 | '503643028591218688'
 33 | '503643029559734272'
 34 | '503643031367467008'
 35 | '503643033016233985'
 36 | '503643033280462848'
 37 | '503643033905410048'
 38 | '503643033909202947'
 39 | '503643036996227073'
 40 | '503643039764860932'
 41 | '503643040171700225'
 42 | '503643042926977025'
 43 | '503643044642848768'
 44 | '503643044827394048'
 45 | '503643045305516033'
 46 | '503643045867188224'
 47 | '503643050128592896'
 48 | '503643051378483200'
 49 | '503643054327484418'
 50 | '503643056567255040'
 51 | '503643056629768192'
 52 | '503643062715703296'
 53 | '503643062913212416'
 54 | '503643063450083328'
 55 | '503643063529398272'
 56 | '503643073335685120'
 57 | '503643074514272256'
 58 | '503643077836546048'
 59 | '503643080005013505'
 60 | '503643081066184704'
 61 | '503643081871089664'
 62 | '503643093589958657'
 63 | '503643095204782081'
 64 | '503643096567926784'
 65 | '503643096601858048'
 66 | '503643097293914112'
 67 | '503643098891575296'
 68 | '503643100204773377'
 69 | '503643104709066752'
 70 | '503643105338216448'
 71 | '503643105485004800'
 72 | '503643109620588544'
 73 | '503643111206027264'
 74 | '503643112363671552'
 75 | '503643113793933312'
 76 | '503643117308764160'
 77 | '503643117648486400'
 78 | '503643124787609600'
 79 | '503643126141947904'
 80 | '503643126683009025'
 81 | '503643127132221440'
 82 | '503643127253450753'
 83 | '503643128931565568'
 84 | '503643128994086912'
 85 | '503643130059829248'
 86 | '503643130923466753'
 87 | '503643131624292352'
 88 | '503643136636506112'
 89 | '503643137663725568'
 90 | '503643137835675648'
 91 | '503643138674528256'
 92 | '503643138968133632'
 93 | '503643143729082369'
 94 | '503643144148099072'
 95 | '503643144991567872'
 96 | '503643146115223552'
 97 | '503643149374205952'
 98 | '503643149776875521'
 99 | '503643151970869249'
100 | '503643154797854720'
101 | '503643156358115328'
102 | '503643156668096513'
103 | '503643156941111296'
104 | '503643158580690944'
105 | '503643158748880896'
106 | '503643159088218112'
107 | '503643159239622656'
108 | '503643162515349505'
109 | '503643163857526784'
110 | '503643164834803712'
111 | '503643166453825537'
112 | '503643168324071424'
113 | '503643168941015040'
114 | '503643169402003456'
115 | '503643169524047873'
116 | '503643171327602689'
117 | '503643171352739842'
118 | '503643171775983616'
119 | '503643175466983425'
120 | '503643175550861314'
121 | '503643176272265216'
122 | '503643180412436480'
123 | '503643180508909570'
124 | '503643180756393984'
125 | '503643181620015104'
126 | '503643183675219968'
127 | '503643187034853376'
128 | '503643187798228992'
129 | '503643187802406912'
130 | '503643187903488000'
131 | '503643190054764544'
132 | '503643190675505153'
133 | '503643190864257024'
134 | '503643191061401601'
135 | '503643191531167744'
136 | '503643192734916608'
137 | '503643194832060417'
138 | '503643195897810944'
139 | '503643196111327232'
140 | '503643196661174272'
141 | '503643196945997825'
142 | '503643197294514176'
143 | '503643197919469568'
144 | '503643197969424384'
145 | '503643198116225024'
146 | '503643202125955073'
147 | '503643202403196929'
148 | '503643204156006400'
149 | '503643204865236992'
150 | '503643205280468992'
151 | '503643205879861250'
152 | '503643206219624450'
153 | '503643207486283776'
154 | '503643208169967616'
155 | '503643209151807490'
156 | '503643209671925761'
157 | '503643209894211587'
158 | '503643210288496643'
159 | '503643210330435584'
160 | '503643210892464128'
161 | '503643212108795904'
162 | '503643212112994305'
163 | '503643213979070465'
164 | '503643214180392961'
165 | '503643214834724864'
166 | '503643214943764480'
167 | '503643214998282241'
168 | '503643215950389249'
169 | '503643216084602880'
170 | '503643216424357888'
171 | '503643216952844288'
172 | '503643217514868736'
173 | '503643218278240256'
174 | '503643218714439680'
175 | '503643219885031424'
176 | '503643220949991424'
177 | '503643221218824192'
178 | '503643222598356992'
179 | '503643227074080768'
180 | '503643227858030593'
181 | '503643229460262912'
182 | '503643232266620929'
183 | '503643233834893313'
184 | '503643234498015232'
185 | '503643235265576960'
186 | '503643236217270274'
187 | '503643236267982850'
188 | '503643237022564352'
189 | '503643238574456832'
190 | '503643238712872961'
191 | '503643239782420480'
192 | '503643240000540672'
193 | '503643244337451008'
194 | '503643245361242112'
195 | '503643250842796032'
196 | '503643253875286016'
197 | '503643256409030657'
198 | '503643259571167232'
199 | '503643260590759936'
200 | '503643261315985408'
201 | '503643262541111296'
202 | '503643263018872832'
203 | '503643264348467200'
204 | '503643266764374016'
205 | '503643266907402240'
206 | '503643271743037441'
207 | '503643274196684800'
208 | '503643274926518272'
209 | '503643275266228224'
210 | '503643275643719680'
211 | '503643276335779840'
212 | '503643276499353600'
213 | '503643276654559233'
214 | '503643278441324544'
215 | '503643278697189378'
216 | '503643279083061248'
217 | '503643280400453633'
218 | '503643280475582464'
219 | '503643283294142464'
220 | '503643284174929920'
221 | '503643285572055042'
222 | '503643287706931201'
223 | '503643287954001920'
224 | '503643288612913153'
225 | '503643289317552128'
226 | '503643290307403776'
227 | '503643291150458880'
228 | '503643291167248384'
229 | '503643291758260224'
230 | '503643291859292161'
231 | '503643292505210880'
232 | '503643293176332289'
233 | '503643293406609409'
234 | '503643293880963072'
235 | '503643294573006849'
236 | '503643295218958338'
237 | '503643296577880064'
238 | '503643297685200896'
239 | '503643298310148097'
240 | '503643298657861632'
241 | '503643298918334465'
242 | '503643299329359872'
243 | '503643299996241920'
244 | '503643300877058049'
245 | '503643301535551490'
246 | '503643302130753536'
247 | '503643302147915777'
248 | '503643302915489795'
249 | '503643303490109440'
250 | '503643304039575552'
251 | '503643304790360065'
252 | '503643305352372224'
253 | '503643306291896320'
254 | '503643306971398144'
255 | '503643307071639552'
256 | '503643307684421632'
257 | '503643308460347392'
258 | '503643309190156288'
259 | '503643309622181889'
260 | '503643309903183873'
261 | '503643310557503488'
262 | '503643311337639936'
263 | '503643312063258625'
264 | '503643314424655872'
265 | '503643315548323840'
266 | '503643317330903040'
267 | '503643317704212480'
268 | '503643317855207424'
269 | '503643318560239616'
270 | '503643318937739265'
271 | '503643319713673217'
272 | '503643320405725184'
273 | '503643321248395264'
274 | '503643321412378624'
275 | '503643321823420416'
276 | '503643323173969920'
277 | '503643323236904961'
278 | '503643323844685825'
279 | '503643324017020928'
280 | '503643325715730432'
281 | '503643325761851392'
282 | '503643326789062657'
283 | '503643326990778368'
284 | '503643327632519168'
285 | '503643328320385025'
286 | '503643329922215937'
287 | '503643330543357952'
288 | '503643330551775232'
289 | '503643331080257537'
290 | '503643331281555456'
291 | '503643331696803840'
292 | '503643332015554560'
293 | '503643332229480448'
294 | '503643333470982144'
295 | '503643334280482816'
296 | '503643334938988544'
297 | '503643335857569792'
298 | '503643337199718400'
299 | '503643338332180482'
300 | '503643339187830784'
301 | '503643339392966657'
302 | '503643340228014081'
303 | '503643341205299200'
304 | '503643341880565760'
305 | '503643341905752064'
306 | '503643342195138563'
307 | '503643342585233408'
308 | '503643343151448065'
309 | '503643343197577216'
310 | '503643344644239360'
311 | '503643344653012992'
312 | '503643344791031808'
313 | '503643344946601984'
314 | '503643345496047616'
315 | '503643346938523649'
316 | '503643347807129602'
317 | '503643349119549440'
318 | '503643349224804353'
319 | '503643349954613248'
320 | '503643350701191168'
321 | '503643352106299392'
322 | '503643353146064896'
323 | '503643353863294976'
324 | '503643354677407744'
325 | '503643355310743552'
326 | '503643355990220800'
327 | '503643356715835392'
328 | '503643357495963648'
329 | '503643357688500225'
330 | '503643358284509184'
331 | '503643358934605824'
332 | '503643359668604928'
333 | '503643359794434049'
334 | '503643360381652992'
335 | '503643360469725184'
336 | '503643360557412352'
337 | '503643361254047744'
338 | '503643362030026754'
339 | '503643362075742208'
340 | '503643362658750464'
341 | '503643363632226304'
342 | '503643364265193472'
343 | '503643364286550017'
344 | '503643364827598848'
345 | '503643365217693697'
346 | '503643365771341825'
347 | '503643365850628097'
348 | '503643366446608384'
349 | '503643366580424704'
350 | '503643368237580288'
351 | '503643448092942337'
352 | '503643449279938561'
353 | '503643449988751361'
354 | '503643450034491392'
355 | '503643450680803328'
356 | '503643451465162752'
357 | '503643451490324480'
358 | '503643452140453889'
359 | '503643452324581376'
360 | '503643452748615681'
361 | '503643453419311104'
362 | '503643453633613825'
363 | '503643455734956032'
364 | '503643456133419008'
365 | '503643456489943040'
366 | '503643456900972544'
367 | '503643457186168832'
368 | '503643458033438720'
369 | '503643458708717568'
370 | '503643459379789825'
371 | '503643460101230594'
372 | '503643460726173696'
373 | '503643460797485056'
374 | '503643461405663232'
375 | '503643461602775040'
376 | '503643461690466304'
377 | '503643462370328576'
378 | '503643463125311488'
379 | '503643464047681536'
380 | '503643464899518464'
381 | '503643465667076096'
382 | '503643465729581056'
383 | '503643465792507904'
384 | '503643465826058240'
385 | '503643466073915393'
386 | '503643466531102720'
387 | '503643467134664704'
388 | '503643467160248320'
389 | '503643467869085696'
390 | '503643468510822400'
391 | '503643469228019712'
392 | '503643469982605312'
393 | '503643470628937728'
394 | '503643471262285824'
395 | '503643471304216577'
396 | '503643472080171008'
397 | '503643472445071361'
398 | '503643472654393344'
399 | '503643473178673152'
400 | '503643473434906624'
401 | '503643474122788864'
402 | '503643474768703489'
403 | '503643475544645632'
404 | '503643476207337472'
405 | '503643476899405824'
406 | '503643477683752961'
407 | '503643478166102017'
408 | '503643479025909761'
409 | '503643479734755329'
410 | '503643480372314112'
411 | '503643481244725248'
412 | '503643481328611328'
413 | '503643482029047808'
414 | '503643482456457216'
415 | '503643482733682688'
416 | '503643483442532353'
417 | '503643484646277121'
418 | '503643485035978752'
419 | '503643485418037249'
420 | '503643485493559299'
421 | '503643486076538881'
422 | '503643486516559872'
423 | '503643486835712000'
424 | '503643486923403264'
425 | '503643486956957697'
426 | '503643487531978752'
427 | '503643488219852800'
428 | '503643489037737984'
429 | '503643489742385152'
430 | '503643489956290560'
431 | '503643490316607488'
432 | '503643490967109632'
433 | '503643491143282688'
434 | '503643491852111872'
435 | '503643493286567936'
436 | '503643493961838592'
437 | '503643494725197824'
438 | '503643494846836736'
439 | '503643495815720960'
440 | '503643497426341888'
441 | '503643497426350081'
442 | '503643497703153664'
443 | '503643499137609728'
444 | '503643499808718850'
445 | '503643500576272384'
446 | '503643501255720961'
447 | '503643501943615488'
448 | '503643502606311426'
449 | '503643502723735553'
450 | '503643503726178305'
451 | '503643504493731840'
452 | '503643505026011136'
453 | '503643505336782848'
454 | '503643506100150274'
455 | '503643506141708288'
456 | '503643507215839235'
457 | '503643507865968641'
458 | '503643508537049091'
459 | '503643508549623808'
460 | '503643509052932097'
461 | '503643509292023808'
462 | '503643509929558016'
463 | '503643510911021056'
464 | '503643511355617280'
465 | '503643512160923649'
466 | '503643512467103744'
467 | '503643512932667393'
468 | '503643514321002498'
469 | '503643514505527297'
470 | '503643515738673152'
471 | '503643516174868480'
472 | '503643516883709952'
473 | '503643517512851456'
474 | '503643517907111936'
475 | '503643518276206593'
476 | '503643518922145792'
477 | '503643519719071746'
478 | '503643521140928512'
479 | '503643521807831041'
480 | '503643522986045440'
481 | '503643523296821248'
482 | '503643524487577600'
483 | '503643524840308737'
484 | '503643525712736256'
485 | '503643526375407616'
486 | '503643527101034497'
487 | '503643527746949120'
488 | '503643528447016960'
489 | '503643528455790592'
490 | '503643528732221440'
491 | '503643529324036097'
492 | '503643529382748163'
493 | '503643529621823488'
494 | '503643529697320960'
495 | '503643532541046784'
496 | '503643533647949825'
497 | '503643535044653056'
498 | '503643535057227776'
499 | '503643535112163328'
500 | '503643536106196992'


--------------------------------------------------------------------------------