├── .DS_Store
├── .idea
├── .gitignore
├── TikTok-Personalization-Investigation.iml
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── DataAnalysis
├── .DS_Store
├── .ipynb_checkpoints
│ └── skip_gram_hashtags_v2-checkpoint.ipynb
├── Analysis_Methods.py
├── Analysis_Overview.py
├── Analysis_Text_Methods.py
├── SkipGramModel.py
├── SkipGramModelEvaluation.py
├── hashtags_to_ignore.json
├── invalid_lit_data.json
├── test_data_set.csv
└── training_data_set.csv
├── README.md
├── Testing
├── .DS_Store
├── APItest.py
├── BlockedProxyHandling.py
├── Maintenance.py
├── ParalleliseTesting.py
├── TestInitializer.py
├── TestSets
│ ├── .DS_Store
│ ├── archive test data
│ ├── cg_us_user-165-166.json
│ ├── part_1_tests
│ │ ├── cg_ca_user-119-120.json
│ │ ├── cg_ca_user-121-122.json
│ │ ├── cg_fr_user-55-56.json
│ │ ├── cg_fr_user-65-67.json
│ │ ├── cg_gb_user-68-69.json
│ │ ├── cg_us_user-125-126.json
│ │ ├── cg_us_user-137-138.json
│ │ ├── cg_us_user-139-140.json
│ │ ├── cg_us_user-141-142.json
│ │ ├── cg_us_user-143-144.json
│ │ ├── cg_us_user-147-148.json
│ │ ├── cg_us_user-149-150.json
│ │ ├── cg_us_user-161-162.json
│ │ ├── cg_us_user-57-58.json
│ │ ├── cg_us_user-72-73.json
│ │ ├── cg_us_user-74-75.json
│ │ ├── cg_us_user-93-94.json
│ │ ├── cg_us_user-95-96.json
│ │ ├── follow_gb_user-51-52.json
│ │ ├── follow_gb_user-53-54.json
│ │ ├── follow_us_user-153-154.json
│ │ ├── follow_us_user-155-156.json
│ │ ├── follow_us_user-47-48.json
│ │ ├── follow_us_user-49-50.json
│ │ ├── like_gb_user-61-62.json
│ │ ├── like_gb_user-63-64.json
│ │ ├── like_us_user-111-112.json
│ │ ├── like_us_user-113-114.json
│ │ ├── like_us_user-115-116.json
│ │ ├── like_us_user-117-118.json
│ │ ├── like_us_user-123-124.json
│ │ ├── like_us_user-135-136.json
│ │ ├── like_us_user-159-160.json
│ │ ├── like_us_user-45-46.json
│ │ ├── like_us_user-59-60.json
│ │ ├── like_us_user-70-71.json
│ │ ├── location-1_ca_user-99-100.json
│ │ ├── location-1_us_user-97-98.json
│ │ ├── location-2_ca_user-101-102.json
│ │ ├── location-2_us_user-105-106.json
│ │ ├── location-3_de_user-107-108.json
│ │ ├── location-3_us_user-103-104.json
│ │ ├── location-4-de_us_user-109-110.json
│ │ ├── location-4-en_us_user-129-132.json
│ │ ├── location-4-es_us_user-130-133.json
│ │ ├── location-4-fr_us_user-131-134.json
│ │ ├── test_user_11.json
│ │ ├── vcr_us_user-127-128.json
│ │ ├── vcr_us_user-145-146.json
│ │ ├── vcr_us_user-151-152.json
│ │ ├── vcr_us_user-157-158.json
│ │ ├── vcr_us_user-163-164.json
│ │ ├── vcr_us_user-77-78.json
│ │ ├── vcr_us_user-79-80.json
│ │ ├── vcr_us_user-81-82.json
│ │ ├── vcr_us_user-83-84.json
│ │ ├── vcr_us_user-85-86.json
│ │ ├── vcr_us_user-87-88.json
│ │ ├── vcr_us_user-89-90.json
│ │ └── vcr_us_user-91-92.json
│ └── test_user_167.json
└── scratch_12.py
├── chromedriver.exe
├── gitignore
├── hashtags_to_ignore.json
├── main.py
├── ngrok.exe
├── proxy_auth_plugin.zip
├── proxy_auth_plugin
├── background.js
└── manifest.json
├── src
├── DataStoring.py
├── DatabaseHelper.py
├── Proxy.py
├── SMSHandler.py
├── TestCase1_Loc.py
├── TestRun.py
├── WebHelper.py
├── __init__.py
└── proxy_auth_plugin.zip
└── utilities
├── .DS_Store
├── Final Test Data
├── test_data_set.csv
└── training_data_set.csv
├── Value_Dataset_TikTokdata.json
├── clean_emojis.csv
├── country_prefix.json
├── hashtag_translations.json
├── hashtag_translations_old.json
├── proxy.zip
└── background.js
├── test_data_set.csv
└── training_data_set.csv
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/.DS_Store
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 |
--------------------------------------------------------------------------------
/.idea/TikTok-Personalization-Investigation.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/DataAnalysis/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/DataAnalysis/.DS_Store
--------------------------------------------------------------------------------
/DataAnalysis/Analysis_Overview.py:
--------------------------------------------------------------------------------
1 | from Analysis_Methods import *
2 | from Analysis_Text_Methods import *
3 | from SkipGramModel import *
4 | from SkipGramModelEvaluation import *
5 |
6 | # TEST SCENARIOS:
7 | action_type = 'Like'
8 | all_test_user_ids_like = [[22, 24], [25, 26], [27, 28], [29, 30], [31, 32], [33, 34], [35, 36], [45, 46], [59, 60],
9 | [61, 62], [63, 64], [70, 71], [111, 112], [113, 114], [115, 116], [117, 118], [123, 124],
10 | [135, 136], [159, 160]]
11 | tests_like_5_batches = [[45, 46], [59, 60], [61, 62], [63, 64], [70, 71]]
12 | tests_like_3_batches = [[113, 114], [135, 136], [115, 116], [117, 118], [123, 124], [159, 160]]
13 | excluded_like_users = [[22, 24], [25, 26], [27, 28], [29, 30], [31, 32], [33, 34], [35, 36], [111, 112]]
14 |
15 | # action_type = 'Follow'
16 | all_test_follow_3_batches = [[47, 48], [49, 50], [53, 54], [153, 154], [155, 156]]
17 | excluded_follow_users = [[51, 52]]
18 |
19 | # action_type = 'Video View Rate'
20 | all_test_vvr_3_batches = [[77, 78], [79, 80], [81, 82], [83, 84], [85, 86], [87, 88], [91, 92], [145, 146], [151, 152],
21 | [157, 158]]
22 | # tests_vvr_personas_3_batches = [[87, 88], [91, 92], [145, 146], [151, 152], [157, 158]]
23 | excluded_vvr_users = [[89, 90], [127, 128]]
24 |
25 | # action_type = 'Control Group'
26 | excluded_control_groups_5_batches = [[93, 94]]
27 | new_control_group_3_batches = [[143, 144], [147, 148], [149, 150]]
28 | control_group_5_batches = [[72, 73], [74, 75], [95, 96]] # [38, 39], [40, 41], [55, 56], [57, 58], [65, 67], [68, 69],
29 | control_group_3_batches = [[125, 126], [137, 138], [139, 140], [141, 142], [143, 144], [147, 148], [149, 150]] # [119, 120], [121, 122]
30 |
31 | # action_type = 'Location'
32 | diff_country_same_language_3_batches = [97, 98, 99, 100]
33 | # diff_country_same_language_switching_country_loc_3_batches = [101, 102, 105, 106] # => EXCLUDED !
34 | diff_country_diff_language_switching_country_3_batches = [103, 104, 107, 108]
35 | same_country_diff_language_3_batches = [109, 110, 129, 130, 131, 132, 133, 134]
36 |
37 | # action_type = "Collaborative Filtering"
38 | collaborative_filtering_groups = [[87, 88], [87, 91], [87, 92], [87, 123], [87, 124], [87, 145], [87, 146], [87, 151],
39 | [87, 152], [87, 157], [87, 158], [87, 159], [87, 160], [88, 91], [88, 92], [88, 123],
40 | [88, 124], [88, 145], [88, 146], [88, 151], [88, 152], [88, 157], [88, 158],
41 | [88, 159], [88, 160], [91, 92], [91, 123], [91, 124], [91, 145], [91, 146], [91, 151],
42 | [91, 152], [91, 157], [91, 158], [91, 159], [91, 160], [92, 123], [92, 124],
43 | [92, 145], [92, 146], [92, 151], [92, 152], [92, 157], [92, 158], [92, 159],
44 | [92, 160], [123, 124], [123, 145], [123, 146], [123, 151], [123, 152], [123, 157],
45 | [123, 158], [123, 159], [123, 160], [124, 145], [124, 146], [124, 151], [124, 152],
46 | [124, 157], [124, 158], [124, 159], [124, 160], [145, 146], [145, 151], [145, 152],
47 | [145, 157], [145, 158], [145, 159], [145, 160], [146, 151], [146, 152], [146, 157],
48 | [146, 158], [146, 159], [146, 160], [151, 152], [151, 157], [151, 158], [151, 159],
49 | [151, 160], [152, 157], [152, 158], [152, 159], [152, 160], [157, 158], [157, 159],
50 | [157, 160], [158, 159], [158, 160], [159, 160]]
51 |
52 | test_groups = {
53 | 'Like': {
54 | 'users': [tests_like_5_batches, tests_like_3_batches],
55 | 'batch': [5, 3]
56 | },
57 | 'Follow': {
58 | 'users': [all_test_follow_3_batches],
59 | 'batch': [3]
60 | },
61 | 'Video View Rate': {
62 | 'users': [all_test_vvr_3_batches],
63 | 'batch': [3]
64 | },
65 | 'Control Group': {
66 | 'users': [control_group_5_batches, control_group_3_batches],
67 | 'batch': [5, 3]
68 | },
69 | 'Location': {
70 | 'users': [diff_country_same_language_3_batches],
71 | 'batch': [3]
72 | }
73 | }
74 |
75 | test_groups = {
76 | 'Location': {
77 | 'users': [diff_country_same_language_3_batches],
78 | 'batch': [3]
79 | }
80 | }
81 |
82 | if __name__ == '__main__':
83 |
84 | noise_all_computation_5, noise_run_computation_5, noise_avg_overall_runs_overall_users_computation_5 = \
85 | compute_noise_control_scenarios(control_group_5_batches, 5)
86 | noise_all_computation_3, noise_run_computation_3, noise_avg_overall_runs_overall_users_computation_3 = \
87 | compute_noise_control_scenarios(control_group_3_batches[:len(control_group_3_batches)-2], 3, False)
88 | # noise_all_computation_3_unfinished, noise_run_computation_3_unfinished = \
89 | # compute_noise_control_scenarios(control_group_3_batches, 3, True)
90 |
91 | # # Initializing SkipGramEvaluation Class to utilize for analysis
92 | # test_data_values, test_data_dict = get_test_data(test_users=[53, 54, 91, 92, 123, 124])
93 | # training_data = get_training_data(test_hashtags=test_data_dict)
94 | # embedding_size = 300
95 | # skip_gram_model_evaluation = SkipGramModelEvaluation(embedding_size=embedding_size, test_data=test_data_dict,
96 | # frequencies=[100, 500, 1000], epochs=5, lr=0.1,
97 | # max_freq=100000, min_freq=2)
98 |
99 | for group in test_groups:
100 | for batch in test_groups.get(group).get('batch'):
101 | cur_index = test_groups.get(group).get('batch').index(batch)
102 | test_group = {
103 | "Action_Type": group,
104 | "Batch_Size": batch,
105 | "Users": test_groups.get(group).get('users')[cur_index],
106 | "Account_Of_Unfinished_Scenarios": False
107 | }
108 |
109 | # DIFFERENCE ANALYSIS OF POSTS OF LOCATION TESTS
110 | if test_group.get('Action_Type') == "Location":
111 | heatmap_location(diff_country_same_language_3_batches, noise_avg_overall_runs_overall_users_computation_3)
112 | heatmap_location(diff_country_diff_language_switching_country_3_batches,
113 | noise_avg_overall_runs_overall_users_computation_3, switching_loc=True)
114 | heatmap_location(same_country_diff_language_3_batches, noise_avg_overall_runs_overall_users_computation_3)
115 |
116 | # TRAINING SKIP-GRAM MODEL FOR SIMILARITY ANALYSIS
117 | # test_data_values, test_data_dict = get_test_data(test_users=[53, 54, 91, 92, 123, 124])
118 | # training_data = get_training_data(test_hashtags=test_data_dict)
119 | # embedding_size = 300
120 | # print("***** Training and test data fetched *****")
121 | # for epochs in [9, 10]:
122 | # for lr in [0.1]:
123 | # print(f"***** Starting Iteration with {epochs} epochs and lr = {lr}")
124 | # skip_gram_model = SkipGramModel(max_freq=100000, min_freq=2, embedding_size=embedding_size,
125 | # neg_sample_size=20, lr=lr, epochs=epochs, training_data=training_data)
126 | # skip_gram_model_evaluation = SkipGramModelEvaluation(embedding_size=embedding_size, test_data=test_data_dict,
127 | # frequencies=[100, 500, 1000], epochs=epochs, lr=lr,
128 | # max_freq=100000, min_freq=2)
129 | # visualize_similarities([123, 124], get_test_run_ids_2_user([123, 124])[:20], action_type,
130 | # skip_gram_model_evaluation, epochs, lr)
131 |
132 | else:
133 | # as some test scenarios did not complete all runs we have to reduce the number of test runs for which we calculate
134 | # the noises
135 | test_runs_to_consider = 0
136 | if test_group.get('Account_Of_Unfinished_Scenarios'):
137 | number_of_test_runs = []
138 | for pair in test_group.get('Users'):
139 | # get test runs
140 | test_runs = get_test_run_ids_2_user(pair)
141 | number_of_test_runs.append(len(test_runs))
142 | test_runs_to_consider = min(number_of_test_runs)
143 |
144 | for user_pair in test_group.get('Users'):
145 | action_type = test_group.get('Action_Type')
146 | noise = 0
147 |
148 | if test_group.get('Batch_Size') == 3 and action_type != 'Control Group':
149 | noise = noise_avg_overall_runs_overall_users_computation_3
150 | elif test_group.get('Batch_Size') == 5 and action_type != 'Control Group':
151 | noise = noise_avg_overall_runs_overall_users_computation_5
152 | print(f"Utilized noise: {noise}")
153 |
154 | # in general only consider the first 20 test runs of a test scenario
155 | test_run_ids = get_test_run_ids_2_user(user_pair)[:20]
156 | if test_group.get('Account_For_Unfinished_Scenarios'):
157 | test_run_ids = test_run_ids[:test_runs_to_consider]
158 | print(f"User pair: {user_pair}: {test_run_ids}")
159 |
160 | action_user = get_action_user(user_pair)
161 | print(f"*** ACTION USER IS {action_user}")
162 |
163 | # # DIFFERENCE ANALYSIS OF POSTS, HASHTAGS, CONTENT CREATORS, SOUND analyze overlapping posts between two users
164 | # difference_analysis(test_user_pair=user_pair, test_runs=test_run_ids,
165 | # action_type=action_type, noise=noise, action_user=action_user,
166 | # thesis_chart=False, account_for_drop=True)
167 | #
168 | # # POST METRICS ANALYSIS
169 | # development_of_post_metrics(test_run_ids, user_pair, action_type, action_user, thesis_chart=False)
170 |
171 | # REAPPEARANCE OF POST ATTRIBUTE ANALYSIS INCL DISTRIBUTION OF METRICS APPEARANCE OVER ALL TEST RUNS
172 | for metric in ['Hashtag', 'Content Creator', 'Sound']:
173 | reappearance_analysis_of_metric(test_user_pair=user_pair, test_runs=test_run_ids,
174 | metric=metric, action_type=action_type,
175 | action_user=action_user, thesis_chart=False)
176 |
177 | # # SIMILARITY / DIFFERENCE OF HASHTAG ANALYSIS
178 | visualize_similarities(user_pair, test_run_ids, action_type, skip_gram_model_evaluation, 5, 0.1,
179 | within_feed=True, thesis_chart=False)
180 | # # generate_similarities_differences(test_user_pair=user_pair, test_runs=test_run_ids,
181 | # # action_type=action_type, hashtags=True,
182 | # # within_test_run=True, within_feed=False)
183 |
184 | # analyze gradient of post differences and error rate
185 | # generate_chart_error_rate_2_users(all_test_users=test_group.get('Users'), action_type=action_type)
186 |
187 |
--------------------------------------------------------------------------------
/DataAnalysis/Analysis_Text_Methods.py:
--------------------------------------------------------------------------------
1 | import csv
2 |
3 | from gensim.models.doc2vec import Doc2Vec, TaggedDocument
4 |
5 | from Analysis_Methods import *
6 | from SkipGramModelEvaluation import *
7 |
8 |
9 |
10 | base_path = Path(__file__).parent
11 |
12 |
13 | def instance_db():
14 | file_path = (base_path / "../utilities/db_credentials.json").resolve()
15 | with open(file_path) as file:
16 | db_credentials = json.load(file)
17 |
18 | conn = psycopg2.connect(
19 | host=db_credentials.get('host'),
20 | database=db_credentials.get('database'),
21 | user=db_credentials.get('user'),
22 | password=db_credentials.get('password'))
23 | cur = conn.cursor()
24 | return conn, cur
25 |
26 |
27 | conn, cur = instance_db()
28 |
29 |
30 | def compute_similarity_within_feed(text_set):
31 | """
32 | Compute the similarity of texts from posts of the same feed.
33 | source:
34 | https://towardsdatascience.com/calculating-string-similarity-in-python-276e18a7d33a
35 | https://rare-technologies.com/word2vec-tutorial/
36 | https://towardsdatascience.com/calculating-document-similarities-using-bert-and-other-models-b2c1a29c9630
37 | :param text_set: dictionary of structure {"post_id": {text, language}}
38 | :return:
39 | """
40 | pd.set_option('display.max_colwidth', 0)
41 | pd.set_option('display.max_columns', 0)
42 |
43 | text_corpus_df = pd.DataFrame(columns=['post_id', 'text_corpus', 'text_corpus_cleaned'])
44 | text_corpus_df['post_id'] = text_set.keys()
45 | text_corpus_df['text_corpus'] = [text_set[i]['desc'] for i in text_set.keys()]
46 |
47 | # cleaning data: removing special characters & emojis
48 | text_corpus_df['text_corpus_cleaned'] = [clean_string(text_set[key]['desc'], text_set[key]['lang'],
49 | text_set[key]['already_translated'])
50 | for key in text_set.keys()]
51 |
52 | # delete value from text list if after cleaning empty text remains
53 | nan = float("NaN")
54 | text_corpus_df.replace("", nan, inplace=True)
55 | text_corpus_df.dropna(subset=['text_corpus_cleaned'], inplace=True)
56 |
57 | # computing similarities using Doc2Vec cosine similarity and differences using Doc2Vec euclidean distance
58 | tf_idf_vectoriser = TfidfVectorizer()
59 | tf_idf_vectoriser.fit(text_corpus_df.text_corpus_cleaned)
60 | tf_idf_vectors = tf_idf_vectoriser.transform(text_corpus_df.text_corpus_cleaned)
61 |
62 | # download_package('punkt')
63 |
64 | tagged_data = [TaggedDocument(words=word_tokenize(doc), tags=[i]) for i, doc in
65 | enumerate(text_corpus_df.text_corpus_cleaned)]
66 | model_d2v = Doc2Vec(vector_size=100, alpha=0.025, min_count=1)
67 |
68 | model_d2v.build_vocab(tagged_data)
69 |
70 | for epoch in range(100):
71 | model_d2v.train(tagged_data,
72 | total_examples=model_d2v.corpus_count,
73 | epochs=model_d2v.epochs)
74 |
75 | document_embeddings = np.zeros((text_corpus_df.shape[0], 100))
76 |
77 | for i in range(len(document_embeddings)):
78 | document_embeddings[i] = model_d2v.docvecs[i]
79 |
80 | pairwise_similarities = cosine_similarity(document_embeddings)
81 | pairwise_differences = euclidean_distances(document_embeddings)
82 |
83 | np.set_printoptions(threshold=sys.maxsize)
84 |
85 | return text_corpus_df, pairwise_similarities, pairwise_differences
86 |
87 |
88 | def generate_similarities_differences(test_user_pair, test_runs, action_type, hashtags=False, description=False,
89 | within_feed=False, within_test_run=False):
90 | """
91 | Retrieve relevant data to compute similarity:
92 | - for posts within a feed itself
93 | - of posts from two feeds
94 | :param test_user_pair:
95 | :param test_runs:
96 | :param within_feed:
97 | :param within_test_run:
98 | :return:
99 | """
100 | relevant_data = {}
101 | text_source = ''
102 | scope = ''
103 | if description:
104 | relevant_data = retrieve_description(test_user_pair, test_runs)
105 | text_source = 'descriptions'
106 | if hashtags:
107 | relevant_data = retrieve_hashtags(test_user_pair, test_runs)
108 | text_source = 'hashtags'
109 |
110 | print(f"*** DATA RETRIEVED FOR TEXT SOURCE: {text_source}")
111 | print(relevant_data)
112 |
113 | test_run_feed_similarities_differences = {}
114 |
115 | # Compute the similarity & difference of hashtags of posts within the same feed
116 | if within_feed:
117 | for user in test_user_pair:
118 | test_run_feed_similarities_differences[user] = {}
119 | for run in test_runs:
120 | # compute similarities and differences
121 | text_corpus_df, pairwise_similarities, pairwise_differences = \
122 | compute_similarity_within_feed(relevant_data[user][run])
123 |
124 | # structure results from similarity & difference computation
125 | post_similarities_differences, feed_similarity_differences = \
126 | structuring_similarities_differences(text_corpus_df, pairwise_similarities, pairwise_differences)
127 | test_run_feed_similarities_differences[user][run] = {
128 | 'feed(s)_similarity': feed_similarity_differences.get('avg_sim_entire_feed'),
129 | 'feed(s)_difference': feed_similarity_differences.get('avg_diff_entire_feed')
130 | }
131 | print("*** COMPUTED SIMILARITIES & DIFFERENCES WITHIN FEED")
132 | print(test_run_feed_similarities_differences)
133 | scope = 'within_feed'
134 | plot_similarities_differences(test_run_feed_similarities_differences, text_source, scope, action_type)
135 |
136 | # Compute the similarity & difference of hashtags of posts from a feed across multiple testruns
137 | test_run_two_feed_similarities_differences = {}
138 | if len(test_user_pair) == 2 and within_test_run:
139 | for run in test_runs:
140 | text_sets_similarities_differences = compute_similarity_between_two_feeds(
141 | relevant_data[test_user_pair[0]][run], relevant_data[test_user_pair[1]][run])
142 | all_sim = [text_sets_similarities_differences.get(item).get('feed_similarity_to_other_feed') for item in
143 | text_sets_similarities_differences.keys()]
144 | all_diff = [text_sets_similarities_differences.get(item).get('feed_difference_to_other_feed') for item in
145 | text_sets_similarities_differences.keys()]
146 | test_run_two_feed_similarities_differences[run] = {
147 | 'feed(s)_similarity': sum(all_sim) / len(all_sim),
148 | 'feed(s)_difference': sum(all_diff) / len(all_diff)
149 | }
150 | scope = 'within_test_run'
151 | print("*** COMPUTED SIMILARITIES & DIFFERENCES BETWEEN FEEDS ACROSS ALL TEST RUNS")
152 | print(test_run_two_feed_similarities_differences)
153 | plot_similarities_differences(test_run_two_feed_similarities_differences, text_source, scope, action_type)
154 |
155 |
156 | def compute_similarity_between_two_feeds(text_set_1, text_set_2):
157 | """
158 | - either twice the same set of text's as similarity of posts within the same feed shall be computed
159 | - or different set of text's from two different users as similarity of both users' feeds shall be evaluated
160 | :return:
161 | """
162 | different_text_sets = {}
163 | text_sets = {'text_set_1': text_set_1, 'text_set_2': text_set_2}
164 | text_sets_similarities_differences = {}
165 |
166 | for text_set in text_sets.keys():
167 | different_text_sets[text_set] = {}
168 | temp_post_sim_diff_to_other_feed = {}
169 | for post_user_1 in text_sets.get(text_set).keys():
170 | different_text_sets[text_set][post_user_1] = {
171 | f"{post_user_1}": text_sets.get(text_set).get(post_user_1),
172 | }
173 | other_text_set = [text_sets.get(x) for x in text_sets.keys() if x != text_set][0]
174 | for post_from_text_2 in other_text_set.keys():
175 | different_text_sets[text_set][post_user_1][f"Compared_To_{post_from_text_2}"] = {
176 | 'desc': other_text_set.get(post_from_text_2).get('desc'),
177 | 'lang': other_text_set.get(post_from_text_2).get('lang'),
178 | 'already_translated': other_text_set.get(post_from_text_2).get('already_translated')
179 | }
180 | text_corpus_df, pairwise_similarities, pairwise_differences = \
181 | compute_similarity_within_feed(different_text_sets[text_set][post_user_1])
182 | post_similarities_differences, feed_similarity_differences = \
183 | structuring_similarities_differences(text_corpus_df, pairwise_similarities, pairwise_differences)
184 | temp_post_sim_diff_to_other_feed[post_user_1] = {
185 | 'feed_similarity': feed_similarity_differences.get('avg_sim_entire_feed'),
186 | 'feed_difference': feed_similarity_differences.get('avg_diff_entire_feed')
187 | }
188 | all_sim = [temp_post_sim_diff_to_other_feed.get(item).get('feed_similarity') for item in temp_post_sim_diff_to_other_feed.keys()]
189 | all_diff = [temp_post_sim_diff_to_other_feed.get(item).get('feed_difference') for item in temp_post_sim_diff_to_other_feed.keys()]
190 | text_sets_similarities_differences[text_set] = {
191 | 'feed_similarity_to_other_feed': sum(all_sim) / len(all_sim),
192 | 'feed_difference_to_other_feed': sum(all_diff) / len(all_diff)
193 | }
194 |
195 | return text_sets_similarities_differences
196 |
197 |
198 | def get_training_data(test_hashtags):
199 | """
200 | Create list of list of hashtags for all posts that shall be used in the training_data set
201 | :return:
202 | """
203 | sql_hashtags_training = """select distinct p.id, phr.translation_english
204 | from (select phr1.postid, h.id, h.translation_english
205 | from d1rpgcvqcran0q.public.post_hashtag_relation phr1 join
206 | d1rpgcvqcran0q.public.hashtags h on phr1.hashtagid = h.id) phr join
207 | (select id from d1rpgcvqcran0q.public.posts where testuserid not in (53, 54, 91, 92, 123, 124)) p on p.id = phr.postid"""
208 |
209 | # retrieve training hashtag data
210 | training_hashtags_dict = {}
211 | # for training data set
212 | cur.execute(sql_hashtags_training, ())
213 | results_training = cur.fetchall()
214 | training_hashtags = []
215 | for item in results_training:
216 | post_id = item[0]
217 | cur_hashtag = item[1].strip()
218 | if cur_hashtag != '' and post_id not in test_hashtags.keys():
219 | if post_id in training_hashtags_dict.keys() and cur_hashtag not in training_hashtags_dict.get(post_id):
220 | training_hashtags_dict[post_id].append(cur_hashtag)
221 | else:
222 | training_hashtags_dict[post_id] = [cur_hashtag]
223 | if cur_hashtag not in training_hashtags:
224 | training_hashtags.append(cur_hashtag)
225 |
226 | for post in list(test_hashtags.keys()):
227 | for hashtag in list(test_hashtags.get(post)):
228 | if hashtag not in training_hashtags:
229 | if post not in training_hashtags_dict.keys():
230 | training_hashtags_dict[post] = [hashtag]
231 | else:
232 | training_hashtags_dict[post].append(hashtag)
233 | if post in training_hashtags_dict.keys():
234 | del test_hashtags[post]
235 |
236 | # store training hashtag data
237 | file_training_data_set = (base_path / "training_data_set.csv").resolve()
238 | with open(file_training_data_set, 'w') as f:
239 | w = csv.writer(f)
240 | for row in training_hashtags_dict.items():
241 | w.writerow(row)
242 |
243 | return list(training_hashtags_dict.values())
244 |
245 |
246 | def get_test_data(test_users):
247 | sql_hashtags_test = """select distinct p.id, phr.translation_english
248 | from (select phr1.postid, h.id, h.translation_english
249 | from d1rpgcvqcran0q.public.post_hashtag_relation phr1 join
250 | d1rpgcvqcran0q.public.hashtags h on phr1.hashtagid = h.id) phr join
251 | (select id from d1rpgcvqcran0q.public.posts where testuserid = %s) p on p.id = phr.postid"""
252 |
253 | # retrieve test hashtag data from database
254 | test_hashtags = {}
255 | for user in test_users:
256 | # form test data set
257 | cur.execute(sql_hashtags_test, (user,))
258 | results_test = cur.fetchall()
259 | for item in results_test:
260 | post_id = item[0]
261 | cur_hashtag = item[1].strip()
262 | if cur_hashtag != '':
263 | if post_id in test_hashtags.keys() and cur_hashtag not in test_hashtags.get(post_id):
264 | test_hashtags[post_id].append(cur_hashtag)
265 | else:
266 | test_hashtags[post_id] = [cur_hashtag]
267 |
268 | # store test hashtag data
269 | file_test_data_set = (base_path / "test_data_set.csv").resolve()
270 | with open(file_test_data_set, 'w') as f:
271 | w = csv.writer(f)
272 | for row in test_hashtags.items():
273 | w.writerow(row)
274 |
275 | return list(test_hashtags.values()), test_hashtags
276 |
277 |
278 | def adjust_data_structure(dict):
279 | # restructure relevant data
280 | adjusted_data = {}
281 | for item in dict.keys():
282 | adjusted_data[item] = dict.get(item).get('desc')
283 | return adjusted_data
284 |
285 |
286 | def visualize_similarities(test_user_pair, test_runs, action_type, skipgrammodelevaluation, epochs, lr,
287 | within_feed=False, thesis_chart=False):
288 | """
289 | Visualize the similarities of the feeds of each user for every test run, both graphs in one subplot
290 | Visualize in another subplot the similarities of two feeds for every test run
291 | :return:
292 | """
293 | # use function feed_sim() from SkipGramModelEvaluation to compute similarity of specific list of hashtags
294 | # this list either contains only hashtags from one feed --> measuring similarity within a feed
295 | # or hashtags from two feeds --> measuring similarity between two feeds
296 | # perhaps shuffle list of hashtags before computing similarity
297 |
298 | description_data, hashtag_data = retrieve_hashtags(test_user_pair, test_runs)
299 | text_source = 'hashtags'
300 |
301 | print(f"*** DATA RETRIEVED FOR TEXT SOURCE: {text_source}")
302 | # print(hashtag_data)
303 |
304 | # Compute the similarity & difference of hashtags of posts within the same feed
305 | user_feed_similarities = {}
306 | for user in test_user_pair:
307 | user_feed_similarities[user] = {}
308 | for run in test_runs:
309 | filtered_posts = skipgrammodelevaluation.remove_too_frequent_hashtags(
310 | adjust_data_structure(hashtag_data[user][run]))
311 | posts = list(filtered_posts.values())
312 | post_ids = list(filtered_posts.keys())
313 | # compute similarities using SkipGramModel
314 | user_feed_similarities[user][run] = round(skipgrammodelevaluation.feed_sim(posts), 4)
315 | print("*** COMPUTED SIMILARITIES WITHIN FEED")
316 | print(user_feed_similarities)
317 |
318 | users_similarities = {}
319 | for run in test_runs:
320 | user_1_hashtags = adjust_data_structure(hashtag_data[test_user_pair[0]][run])
321 | user_2_hashtags = adjust_data_structure(hashtag_data[test_user_pair[1]][run])
322 | user_1_filtered_hashtags = skipgrammodelevaluation.remove_too_frequent_hashtags(user_1_hashtags)
323 | user_2_filtered_hashtags = skipgrammodelevaluation.remove_too_frequent_hashtags(user_2_hashtags)
324 | posts = list(user_1_filtered_hashtags.values()) + list(user_2_filtered_hashtags.values())
325 | # compute similarities using SkipGramModel
326 | users_similarities[run] = round(skipgrammodelevaluation.feed_sim(posts), 4)
327 | print("*** COMPUTED SIMILARITIES BETWEEN TWO FEEDS")
328 | print(users_similarities)
329 |
330 | plot_similarities_differences(user_feed_similarities, users_similarities, text_source, action_type, epochs, lr,
331 | within_feed, thesis_chart)
332 |
--------------------------------------------------------------------------------
/DataAnalysis/SkipGramModel.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from collections import defaultdict
3 | from tqdm.notebook import trange, tqdm
4 | from sklearn.preprocessing import normalize
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | import json
8 | import operator
9 | import string
10 | import random
11 |
12 | # Special thanks to Jan Scholich (janscho@student.ethz.ch) for significantly contributing to the implementation of
13 | # the Skip Gram Model as outlined below.
14 |
15 | def sigmoid(x):
16 | """
17 | Helper function sigmoid.
18 | """
19 | return 1 / (1 + np.exp(-x))
20 |
21 |
22 | def preprocessing(posts):
23 | training_data = []
24 | for i in range(len(posts)):
25 | post = posts[i]
26 | # remove punctuation
27 | x = [hashtag.strip(string.punctuation) for hashtag in post]
28 | # make all hashtag lowercase
29 | x = [hashtag.lower() for hashtag in x]
30 | if x:
31 | training_data.append(x)
32 | return training_data
33 |
34 |
35 | class SkipGramModel:
36 |
37 | def __init__(self, max_freq, min_freq, embedding_size, neg_sample_size, lr, epochs, training_data):
38 | self.base_path = Path(__file__).parent
39 | self.training_data = training_data
40 | ### set hyperparameters for creating data set ###
41 | # only considering hashtags that appear between 2 and 1000 times
42 | # appears max times
43 | self.max_freq = max_freq
44 | # appears min times
45 | self.min_freq = min_freq
46 | # filter hashtags
47 | file_fyp_hashtags = (self.base_path / "hashtags_to_ignore.json").resolve()
48 | f = open(file_fyp_hashtags, )
49 | self.filter_hashtags = list(json.load(f).values())
50 | # embedding size
51 | self.N = embedding_size
52 | # number of negative samples per positive pairs (wt,wi)
53 | self.K = neg_sample_size
54 | self.W = None
55 | self.W_prime = None
56 | self.hashtag_to_index = {}
57 | self.index_to_hashtag = []
58 |
59 | ### data loading and preprocessing ###
60 | print("***** Loading and preprocessing data *****")
61 | self.posts = None
62 | self.data_loading_preprocessing()
63 |
64 | ### prep data for training ###
65 | print("***** Preparing data for training *****")
66 | self.training_samples_incl_neg = None
67 | self.vocabulary = None
68 | self.prep_data_for_training()
69 |
70 | ### training ###
71 | # hyperparameters for training
72 | print("***** Starting training *****")
73 | self.lr = lr
74 | self.epochs = epochs
75 | self.epoch_losses = []
76 | self.step_losses = []
77 | self.training()
78 | print("Training completed")
79 |
80 | ### plot training loss performance ###
81 | print("***** Plotting training loss performance *****")
82 | self.plot_training_loss_performance()
83 |
84 | ### store model results ###
85 | print("***** Storing model results *****")
86 | self.store_data()
87 |
88 | print("***** Training of skip-gram model completed. *****")
89 |
90 | def data_loading_preprocessing(self):
91 | """
92 | Data loading and preprocessing
93 | :return:
94 | """
95 | # file_path = (self.base_path / "training_data_set.csv").resolve()
96 | # posts = np.genfromtxt(file_path, delimiter=',', dtype=np.dtype(str), usecols=1)
97 | # # splits the string of hashtags
98 | # posts = np.char.split(posts)
99 | # print(posts)
100 |
101 | self.posts = preprocessing(self.training_data)
102 | print("Set of hashtags for first three posts: \n", self.posts[:3])
103 | print("Number of posts:", len(self.posts))
104 |
105 | def filter_common_hashtags(self, pair):
106 | (hashtag, count) = pair
107 | if hashtag in self.filter_hashtags:
108 | return False
109 | else:
110 | return True
111 |
112 | def prep_data_for_training(self):
113 | """
114 | Prepare data for training:
115 | - extract vocabulary of hashtags V
116 | - convert corpus into indices
117 | - extract pair (hashtag, context (i.e. hashtags that are co-occurring with hashtag))
118 | - negative sampling
119 | :return:
120 | """
121 |
122 | # count how often hashtags appear over all posts
123 | count = defaultdict(int)
124 | for post in self.posts:
125 | for hashtag in post:
126 | count[hashtag] += 1
127 | # sort hashtags by appearance frequency
128 | sorted_counts = sorted(count.items(), key=operator.itemgetter(1), reverse=True)
129 |
130 | # filter hashtags
131 | posts_filter_for_common_hashtags = list(filter(self.filter_common_hashtags, sorted_counts))
132 |
133 |
134 | replaced_hashtags = []
135 | for pair in posts_filter_for_common_hashtags:
136 | (hashtag, count) = pair
137 | if count < self.min_freq:
138 | replaced_hashtags.append('unk')
139 | elif count > self.max_freq:
140 | replaced_hashtags.append('unk')
141 | else:
142 | replaced_hashtags.append(hashtag)
143 |
144 | self.vocabulary = replaced_hashtags
145 |
146 | # Assign ids and create lookup tables
147 | for idx, hashtag in enumerate(self.vocabulary, 0):
148 | self.hashtag_to_index[hashtag] = idx
149 | if hashtag not in self.index_to_hashtag:
150 | self.index_to_hashtag.append(hashtag)
151 |
152 | assert len(self.index_to_hashtag) == len(self.hashtag_to_index)
153 | print("Number of hashtag (unfiltered):", len(sorted_counts))
154 | print("Number of hashtag (filtered):", len(self.index_to_hashtag))
155 |
156 | # transforming dataset by replacing the words with their index.
157 | posts_index = []
158 | for post in self.posts:
159 | ids = []
160 | for hashtag in post:
161 | # only add hashtags that are in the vocabulary!!! (all others dropped)
162 | if hashtag in self.hashtag_to_index:
163 | ids.append(self.hashtag_to_index[hashtag])
164 | posts_index.append(ids)
165 | print("First three posts represented by the indices of their hashtags:")
166 | print(posts_index[:3])
167 | print("Number of posts (after indexing them):", len(posts_index))
168 | print("Number of hashtags (including duplications)", sum([len(x) for x in posts_index]))
169 |
170 | ## Extract pair (hashtag, context)
171 | # initializing the training samples (an array containing one array per hashtag in the vocabulary with all context-word-pairs)
172 | training_samples = [[]] * len(self.index_to_hashtag)
173 |
174 | # used for descriptive statistics (to check that it works)
175 | count = 0
176 | counts = []
177 |
178 | # iterate through all posts
179 | for post in tqdm(posts_index):
180 | interim_count = 0
181 | # iterate through all hashtags of a post
182 | for i in range(len(post)):
183 | hashtag = post[i]
184 | # iterate through the context of that hashtag
185 | for j in range(0, len(post)):
186 | if j != i:
187 | interim_count += 1
188 | context_hashtag = post[j]
189 |
190 | # add context-hashtag-pair to the training samples
191 | if len(training_samples[hashtag]) == 0:
192 | training_samples[hashtag] = [(hashtag, context_hashtag)]
193 | else:
194 | training_samples[hashtag].append((hashtag, context_hashtag))
195 | count += interim_count
196 | counts.append(interim_count)
197 | # displays the number of context-hashtag-pairs per post as histogram
198 | fig = plt.figure()
199 | plt.hist(counts)
200 | fig.suptitle("Histogram of the number of training samples/context-hashtag-pairs per post:")
201 | plt.xlabel('Number of training samples')
202 | plt.ylabel('Number of posts')
203 | plt.show()
204 |
205 | # Total number of context-word-pairs (training samples)
206 | print("Number of training samples/context-hashtag-pairs:", sum([len(x) for x in training_samples]))
207 | print("Manual count of training samples to validate:", count)
208 |
209 | # Negative Sampling
210 | # initialize array to capture training samples
211 | self.training_samples_incl_neg = [[]] * len(self.index_to_hashtag)
212 |
213 | # filter out all hashtags from the posts that are not in the vocabulary to get the frequency of all hashtags appearing in the corpus
214 | all_hashtag_rep = list(filter(lambda x: x in self.index_to_hashtag, [inner for outer in self.posts for inner in outer]))
215 |
216 | # iterate through the array of arrays with the context-hashtag-pairs (training samples)
217 | for hashtag_samples_ind in tqdm(range(len(training_samples))):
218 | hashtag_pairs_and_neg = []
219 | # iterate through the array with the context-hashtag-pairs (done for each word in the vocabulary)
220 | for sample in training_samples[hashtag_samples_ind]:
221 | neg_samples = []
222 | # repeat for K negative samples
223 | for i in range(self.K):
224 | same_as_context = True
225 | # while the randomly chosen sample (by choosing a random hashtag in the filtered set of all posts) is equal to the context hashtag,
226 | # we choose a new one, else we add it to the list of negative samples.
227 | while same_as_context:
228 | neg = all_hashtag_rep[random.randint(0, len(all_hashtag_rep) - 1)]
229 | neg_ind = self.hashtag_to_index[neg]
230 | same_as_context = neg_ind == sample[1]
231 | neg_samples.append(self.hashtag_to_index[neg])
232 | # create a tuple (w_i, w_t, C) where C = [(w_0^-, ..., w_20^-)] for every context-hashtag-pair
233 | hashtag_pairs_and_neg.append(sample + (neg_samples,))
234 | self.training_samples_incl_neg[hashtag_samples_ind] = hashtag_pairs_and_neg
235 |
236 | def training(self):
237 | """
238 | Training the model based on extracted and preprocessed training data and defined parameters.
239 | # Learning: calculate gradient, set training parameters, train
240 | Plot training performance.
241 | :return:
242 | """
243 | # training
244 | np.random.seed(42)
245 | random.seed(42)
246 |
247 | # vectorization of the training samples
248 | vectorized_training_samples = [inner for outer in self.training_samples_incl_neg for inner in outer]
249 |
250 | # initialization of weights to be between -0.8 and 0.8
251 | self.W = np.random.rand(len(self.vocabulary), self.N).astype(np.float128)
252 | self.W_prime = np.random.rand(self.N, len(self.vocabulary)).astype(np.float128)
253 | self.W = (2 * self.W - 1) * 0.8
254 | self.W_prime = (2 * self.W_prime - 1) * 0.8
255 |
256 | # normalize vectors to mitigate difference of vector length and only have difference of vector angle
257 | self.W = normalize(self.W, axis=1, norm='l2')
258 |
259 | # iterate through the number of epochs
260 | for i in range(self.epochs):
261 | print("Epoch", i + 1)
262 | epoch_loss = 0
263 | count = 0
264 |
265 | # shuffle training samples to make model more robust
266 | random.shuffle(vectorized_training_samples)
267 |
268 | t = tqdm(vectorized_training_samples, desc="loss: {:.4f}".format(epoch_loss))
269 |
270 | # iterate through all samples of the training set
271 | for sample in t:
272 | wi = sample[0]
273 | wt = sample[1]
274 | C_minus = sample[2]
275 |
276 | # get the embedding of the hashtag and the context hashtags
277 | e_wi = self.W[wi]
278 | e_wt = self.W_prime[:, wt]
279 |
280 | # temporary variable to sum up the product between the embedding of the hashtag
281 | # and the sigmoid of the dot product of the embedding of the context hashtags and the hashtag
282 | s = 0
283 | # temporary variable to sum up the step loss
284 | step_loss = 0
285 |
286 | # iterate through negative samples
287 | for wm in C_minus:
288 | # get embedding of the negative sample
289 | e_wm = self.W_prime[:, wm]
290 | # update the weight of the context matrix for the (negative) sampled hashtag using GD
291 | # TODO remove "(i+1)", dividing by to reduce loss even stronger which may result that loss diverges again
292 | # TODO check how it influences loss using / not using it
293 | self.W_prime[:, wm] = e_wm - self.lr/(i+1) * sigmoid(np.dot(e_wi, e_wm)) * e_wi
294 | # add to the temporary variable as described above
295 | s += sigmoid(np.dot(e_wi, e_wm)) * e_wm
296 | # add to the step loss
297 | t_step_loss = 1 - sigmoid(np.dot(e_wi, e_wm))
298 | # case distinction for numerical stability
299 | if t_step_loss <= 0:
300 | step_loss -= np.log(10 ** -10)
301 | else:
302 | step_loss -= np.log(t_step_loss)
303 |
304 | # update weights of the hashtag embeddings
305 | self.W[wi] = e_wi - self.lr/(i+1) * ((sigmoid(np.dot(e_wi, e_wt)) - 1) * e_wt + s)
306 |
307 | # update weights of the context hashtag
308 | self.W_prime[:, wt] = e_wt - self.lr/(i+1) * (sigmoid(np.dot(e_wi, e_wt)) - 1) * e_wi
309 |
310 | # add to step loss
311 | step_loss -= np.log(sigmoid(np.dot(e_wi, e_wt)))
312 | epoch_loss += step_loss
313 | self.step_losses.append(step_loss)
314 |
315 | # for bookkeeping and updating loss
316 | count += 1
317 | if epoch_loss / count == np.inf:
318 | print(count)
319 | if count % 1000 == 0:
320 | t.set_description("loss: {:.8f}".format(epoch_loss / count))
321 | t.refresh()
322 |
323 | # normalize updated weights
324 | self.W = normalize(self.W, axis=1, norm='l2')
325 |
326 | epoch_loss = epoch_loss / len(vectorized_training_samples)
327 | print("Loss", epoch_loss)
328 | self.epoch_losses.append(epoch_loss)
329 |
330 | def plot_training_loss_performance(self):
331 | """
332 | Visualizing the loss performance of the current training session.
333 | :return:
334 | """
335 | # Plot loss during the epochs
336 | fig = plt.figure()
337 | plt.plot(range(self.epochs), self.epoch_losses)
338 | fig.suptitle("Loss progression")
339 | plt.xlabel('Epochs')
340 | plt.ylabel('Loss')
341 | plt.savefig(self.base_path / f"sgm_resources/lossprogressionepochs_epochs{self.epochs}_lr{self.lr}.png",
342 | bbox_inches='tight')
343 |
344 | # plot losses during the individual context-word-negative-sample-tuples
345 | fig = plt.figure()
346 | plt.plot(range(len(self.step_losses)), self.step_losses)
347 | fig.suptitle("Loss progression")
348 | plt.xlabel('Training steps')
349 | plt.ylabel('Loss')
350 | plt.show()
351 |
352 | # plot loss averaged over 1000 successive context-word-negative-sample-tuples
353 | step_losses_thousands = []
354 | sum = 0
355 | for i in range(len(self.step_losses)):
356 | sum += self.step_losses[i]
357 | if i % 1000 == 0:
358 | step_losses_thousands.append(sum / 1000)
359 | sum = 0
360 |
361 | fig = plt.figure()
362 | plt.plot(range(len(step_losses_thousands)), step_losses_thousands)
363 | fig.suptitle("Loss progression")
364 | plt.xlabel('Training steps in 1000s')
365 | plt.ylabel('Loss')
366 | plt.show()
367 |
368 | def store_data(self):
369 | """
370 | Store results from model training.
371 | :param W:
372 | :param self.hashtag_to_index:
373 | :param self.index_to_hashtag:
374 | :return:
375 | """
376 |
377 | # saving embedding weights in csv file
378 | print(self.W)
379 | file_embedding_weights_csv = (self.base_path / f"sgm_resources/embedding_weights_epochs{self.epochs}_lr{self.lr}.csv").resolve()
380 | np.savetxt(file_embedding_weights_csv, self.W, delimiter=',')
381 |
382 | # self.hashtag_to_index
383 | file_hashtag_to_index = (self.base_path / f"sgm_resources/sgm_hashtag_to_index_epochs{self.epochs}_lr{self.lr}.json").resolve()
384 | with open(file_hashtag_to_index, 'w') as f:
385 | json.dump(self.hashtag_to_index, f)
386 |
387 | # self.index_to_hashtag
388 | file_index_to_hashtag = (self.base_path / f"sgm_resources/sgm_index_to_hashtag_epochs{self.epochs}_lr{self.lr}.json").resolve()
389 | with open(file_index_to_hashtag, 'w') as f:
390 | json.dump(self.index_to_hashtag, f)
391 |
--------------------------------------------------------------------------------
/DataAnalysis/SkipGramModelEvaluation.py:
--------------------------------------------------------------------------------
1 | import json
2 | import operator
3 | import string
4 | from collections import defaultdict
5 | from pathlib import Path
6 | from matplotlib import pyplot as plt
7 | from sklearn.manifold import TSNE
8 |
9 | import seaborn as sn
10 | import pandas as pd
11 | import numpy as np
12 |
13 | # Special thanks to Jan Scholich (janscho@student.ethz.ch) for significantly contributing to the implementation of
14 | # the Skip Gram Model as outlined below.
15 |
16 | # Skip Gram Model Evaluation from trained model
17 |
18 | def cosine_similarity(e_x, e_y):
19 | """
20 | Cosine similarity calculation
21 | :param e_x:
22 | :param e_y:
23 | :return:
24 | """
25 | vec_dot = np.dot(e_x, e_y)
26 | norm = np.linalg.norm(e_x) * np.linalg.norm(e_y)
27 | return vec_dot / norm
28 |
29 |
30 | def preprocessing(hashtag):
31 | # remove punctuation
32 | x = hashtag.strip(string.punctuation)
33 | # make hashtag lowercase
34 | x = hashtag.lower()
35 | if x:
36 | return x
37 |
38 | class SkipGramModelEvaluation:
39 |
40 | def __init__(self, embedding_size, frequencies, epochs, max_freq, min_freq, lr, test_data=None):
41 | self.base_path = Path(__file__).parent
42 | self.N = embedding_size
43 | self.epochs = epochs
44 | self.W = None
45 | self.hashtag_to_index = None
46 | self.index_to_hashtag = None
47 | # appears max times
48 | self.max_freq = max_freq
49 | # appears min times
50 | self.min_freq = min_freq
51 | # filter hashtags
52 | file_fyp_hashtags = (self.base_path / "hashtags_to_ignore.json").resolve()
53 | f = open(file_fyp_hashtags, )
54 | self.filter_hashtags = list(json.load(f).values())
55 | self.lr = lr
56 | self.posts_no_embedding = {}
57 | self.test_data = test_data
58 | print("***** Starting evaluation of SGM *****")
59 | self.import_model_results()
60 | self.example_analysis_embeddings()
61 | self.visualizing_hashtag_embeddings(frequencies)
62 | if self.test_data is not None:
63 | self.evaluate_test_data()
64 | print("***** Posts for which 0 hashtags have a pretrained embedding: ", self.posts_no_embedding)
65 | print("***** Evaluation of SGM completed *****")
66 |
67 | def import_model_results(self):
68 | """
69 | Import hashtag embedding weights, hashtag_to_index, and index_to_hashtag
70 | :return:
71 | """
72 | # import embedding weights
73 | file_embedding_weights_csv = (self.base_path / f"sgm_resources/embedding_weights_epochs{self.epochs}_lr{self.lr}.csv").resolve()
74 | self.W = np.genfromtxt(file_embedding_weights_csv, delimiter=',')
75 | np.save('embedding.npy', self.W)
76 |
77 | # import hashtags_to_index
78 | file_hashtag_to_index = (self.base_path / f"sgm_resources/sgm_hashtag_to_index_epochs{self.epochs}_lr{self.lr}.json").resolve()
79 | self.hashtag_to_index = json.load(open(file_hashtag_to_index, ))
80 |
81 | # import index_to_hashtags
82 | file_index_to_hashtag = (self.base_path / f"sgm_resources/sgm_index_to_hashtag_epochs{self.epochs}_lr{self.lr}.json").resolve()
83 | self.index_to_hashtag = json.load(open(file_index_to_hashtag, ))
84 |
85 | def example_analysis_embeddings(self):
86 | """
87 | Evaluating model performance based on analysis of example words.
88 | :return:
89 | """
90 | pairs = [
91 | ("cooking", "chocolate"),
92 | ("apple", "iphone"),
93 | ("covid19", "coronavirus"),
94 | ("beerpong", "drink"),
95 | ("bike", "ride"),
96 | ("neymar", "messi"),
97 | ]
98 | print("| x | y | sim(x,y) | ")
99 | print("|--------|---------|--------------|")
100 | for x, y in pairs:
101 | e_x = self.W[self.hashtag_to_index[x]]
102 | e_y = self.W[self.hashtag_to_index[y]]
103 | sim = cosine_similarity(e_x, e_y)
104 | print("|", x, "|", y, "|", sim, "|")
105 |
106 | example_words = ["love", "car", "president", "monday", "green", "money", "health", "faith", "book", "france",
107 | "swiss", "spring",
108 | "food", "home", "law", "america"]
109 |
110 | print("| x | y | sim(x,y) | ")
111 | print("|--------|---------|--------------|")
112 |
113 | for x in example_words:
114 | e_x = self.W[self.hashtag_to_index[x]]
115 | W_sim = np.apply_along_axis(lambda y: cosine_similarity(e_x, y), 1, self.W)
116 | W_sim[self.hashtag_to_index[x]] = 0
117 | y = self.index_to_hashtag[np.argmax(W_sim)]
118 | print("|", x, "|", y, "|", np.max(W_sim), "|")
119 |
120 | def visualizing_hashtag_embeddings(self, frequencies):
121 | """
122 | Plotting for different frequencies the hashtag embeddings resulted from the Skip-Gram model.
123 | :return:
124 | """
125 | labels = []
126 | tokens = []
127 |
128 | for i in range(len(self.index_to_hashtag)):
129 | tokens.append(self.W[i, :])
130 | labels.append(self.index_to_hashtag[i])
131 |
132 | tsne_model = TSNE(perplexity=40, n_components=2, init='pca', n_iter=2500, random_state=23)
133 | new_values = tsne_model.fit_transform(tokens)
134 | print(new_values[:3])
135 |
136 | for frequency in frequencies:
137 | # plots the 100 most frequent hashtags in 2D
138 | x = np.transpose(new_values[:frequency])[0]
139 | y = np.transpose(new_values[:frequency])[1]
140 | n = self.index_to_hashtag[:frequency]
141 |
142 | fig, ax = plt.subplots(figsize=(24, 16))
143 | ax.scatter(x, y)
144 | ax.title.set_text(f"{frequency} Most Frequent Hashtags")
145 |
146 | for i, txt in enumerate(n):
147 | ax.annotate(txt, (x[i], y[i]))
148 | plt.savefig(
149 | self.base_path / f"sgm_resources/{frequency}_mostfrequenthashtags_epochs{self.epochs}_lr{self.lr}.png",
150 | bbox_inches='tight')
151 | print(f"Chart visualizing {frequency} Most Frequent Hashtags stored for {self.epochs}.")
152 |
153 | def post_avg_embedding(self, post_hashtags):
154 | """
155 | calculates the average of the post's hashtags' embeddings
156 | expects list of hashtags
157 | :param post:
158 | :return:
159 | """
160 | in_vocab = 0
161 | avg_vec = np.zeros(self.N)
162 | for hashtag in post_hashtags:
163 | if hashtag in self.index_to_hashtag:
164 | ind_hashtag = self.hashtag_to_index[hashtag]
165 | avg_vec += self.W[ind_hashtag]
166 | in_vocab += 1
167 | # Todo get embedding for "unk" hashtag if hashtag not in self.index_to_hashtag
168 | else: # retrieve synonym 'unk' for hashtags appearing less than min_freq
169 | ind_hashtag = self.hashtag_to_index['unk']
170 | avg_vec += self.W[ind_hashtag]
171 | in_vocab += 1
172 | if in_vocab == 0:
173 | # if post_id not in self.posts_no_embedding.keys():
174 | # self.posts_no_embedding[post_id] = post_hashtags
175 | raise Exception('Post has 0 hashtags that have pretrained embeddings.')
176 | else:
177 | return avg_vec / in_vocab
178 |
179 | def post_sim(self, post1, post2):
180 | """
181 | calculates similarity between posts, expects two lists of hashtags
182 | :return:
183 | """
184 | vec1 = self.post_avg_embedding(post1)
185 | vec2 = self.post_avg_embedding(post2)
186 | return cosine_similarity(vec1, vec2)
187 |
188 | def feed_sim(self, feed):
189 | """
190 | calculates the average similarity between all post in the feed, expects list of lists of hashtags
191 | :param feed:
192 | :return:
193 | """
194 | pairs = 0
195 | avg_sim = 0
196 | for i in range(len(feed)):
197 | post1 = feed[i]
198 | for j in range(i + 1, len(feed)):
199 | post2 = feed[j]
200 | avg_sim += self.post_sim(post1, post2)
201 | pairs += 1
202 | return avg_sim / pairs
203 |
204 | def check_valid_hashtag(self, pair):
205 | """
206 | filtering hashtags
207 | :param pair:
208 | :return:
209 | """
210 | (hashtag, count) = pair
211 | if hashtag in self.filter_hashtags:
212 | return False
213 | elif count < self.min_freq:
214 | return False
215 | elif count > self.max_freq:
216 | return False
217 | else:
218 | return True
219 |
220 | def clean_hashtags(self, posts):
221 | preprocessed_posts = {}
222 | for post in posts.keys():
223 | for hashtag in posts.get(post):
224 | prepro_pos = preprocessing(hashtag)
225 | if post not in preprocessed_posts.keys():
226 | preprocessed_posts[post] = [prepro_pos]
227 | else:
228 | preprocessed_posts[post].append(prepro_pos)
229 |
230 | # count how often hashtags appear over all posts
231 | count = defaultdict(int)
232 | for post in list(preprocessed_posts.values()):
233 | for hashtag in post:
234 | count[hashtag] += 1
235 |
236 | # sort hashtags by appearance frequency
237 | sorted_counts = sorted(count.items(), key=operator.itemgetter(1), reverse=True)
238 |
239 | # filter hashtags
240 | filtered_hashtags = list(filter(self.check_valid_hashtag, sorted_counts))
241 | filtered_hashtags = [hashtag[0] for hashtag in filtered_hashtags]
242 |
243 | filtered_posts = {}
244 | for post in preprocessed_posts.keys():
245 | for hashtag in preprocessed_posts.get(post):
246 | if hashtag in filtered_hashtags:
247 | if post not in filtered_posts.keys():
248 | filtered_posts[post] = [hashtag]
249 | else:
250 | filtered_posts[post].append(hashtag)
251 | return filtered_posts
252 |
253 | def check_too_frequent_hashtags(self, hashtag):
254 | if hashtag in self.filter_hashtags:
255 | return False
256 | else:
257 | return True
258 |
259 | def remove_too_frequent_hashtags(self, posts):
260 | hashtags = []
261 | for post in posts.keys():
262 | for hashtag in posts.get(post):
263 | if hashtag not in hashtags:
264 | hashtags.append(hashtag)
265 |
266 | filtered_hashtags = list(filter(self.check_too_frequent_hashtags, hashtags))
267 |
268 | filtered_posts = {}
269 | for post in posts.keys():
270 | for hashtag in posts.get(post):
271 | if hashtag in filtered_hashtags:
272 | if post not in filtered_posts.keys():
273 | filtered_posts[post] = [hashtag]
274 | else:
275 | filtered_posts[post].append(hashtag)
276 | return filtered_posts
277 |
278 | def evaluate_test_data(self):
279 | """
280 | Import and preprocess test data to then evaluate it.
281 | :return:
282 | """
283 | # file_path = (self.base_path / "test_data_set.csv").resolve()
284 | # posts = np.genfromtxt(file_path, delimiter=',', dtype=np.dtype(str), usecols=1)
285 | # post_ids = np.genfromtxt(file_path, delimiter=',', dtype=np.dtype(str), usecols=0)
286 | #
287 | # posts = list(self.test_data.values())
288 | # post_ids = list(self.test_data.keys())
289 |
290 | # splits the string of hashtags
291 | # posts = np.char.split(posts)
292 | # print(posts)
293 |
294 | filtered_posts = self.clean_hashtags(posts=self.test_data)
295 | posts = list(filtered_posts.values())
296 | post_ids = list(filtered_posts.keys())
297 |
298 | # Matrix of post similarities
299 | sim = []
300 | span = range(20)
301 | for i in span:
302 | sim_int = []
303 | for j in span:
304 | sim_int.append(self.post_sim(posts[i], posts[j]))
305 | sim.append(sim_int)
306 | df_cm = pd.DataFrame(sim, index=post_ids[0:len(span)],
307 | columns=post_ids[0:len(span)])
308 | plt.figure(figsize=(24, 16))
309 | sn.heatmap(df_cm, annot=True)
310 | plt.savefig(self.base_path / f"sgm_resources/heatmap_firsttestdata_epochs{self.epochs}_lr{self.lr}.png",
311 | bbox_inches='tight')
312 | print(f"Chart visualizing first 30 posts in heatmap stored for {self.epochs} and lr {self.lr}.")
313 |
--------------------------------------------------------------------------------
/DataAnalysis/hashtags_to_ignore.json:
--------------------------------------------------------------------------------
1 | {
2 | "88764338": "foryoupage",
3 | "1637407748596742": "fyp?",
4 | "1642147373664261": "fypvirall",
5 | "1646344785794053": "fyp20",
6 | "1644632912092165": "fypchachallenge",
7 | "1706891576089605": "fyp",
8 | "1635070555641861": "foryoupage",
9 | "1645966921365509": "fypfypfypfypfypfypfypfypfyp",
10 | "883904": "foryouuu",
11 | "1694385466292229": "fypcontents",
12 | "1642613516590086": "fypppppppppppppppp",
13 | "229207": "fyp",
14 | "1651780589526022": "kingfyp",
15 | "1642191380435969": "foryourpageviral",
16 | "1633875828543494": "fypdog",
17 | "1631348850976774": "fyp",
18 | "1631845819935750": "fypart",
19 | "1606946063404037": "likeforyoupage",
20 | "1603105080060934": "foryoupa",
21 | "1617501114305542": "fypfypfyp",
22 | "1702065574970369": "fypmlaysia\ud83c\uddf2\ud83c\uddfe",
23 | "1625705313397766": "fyppp",
24 | "1626746770984966": "fyppppp",
25 | "1647699651574789": "fypfypfypfypfypfypfypfypfypfypfyp",
26 | "1644665564694534": "plisfyp",
27 | "1608548676719622": "dogsforyou",
28 | "1609969345764357": "fypplz",
29 | "1636339340982278": "fypmemes",
30 | "1696630895338498": "fyp\u101e\u1031\u1019\u103e\u1015\u1032\u1010\u1000\u103a\u1010\u1031\u102c\u1037\ud83d\ude03",
31 | "1637418424589317": "fyp??",
32 | "1650426297129990": "fyphair",
33 | "1649953988475909": "itsasignforyou",
34 | "42164": "foryou",
35 | "1603364504464389": "foryoupagee",
36 | "1633460239823877": "fypy",
37 | "1605095166336005": "foryoupageeee",
38 | "1634937149617158": "foryoupage\u2764\ufe0f\u2764\ufe0f",
39 | "1676811985203206": "fyviralfyp",
40 | "1636799196126214": "fypoffical",
41 | "1641715580438534": "fypfypfypfypfypfyp",
42 | "1626123835557893": "foryoufyp",
43 | "1620040599854086": "fypforyoupage",
44 | "1632511288704006": "fypo",
45 | "1679200763816965": "fypttv",
46 | "1634600653321222": "fypps",
47 | "1652538132966402": "foryoupageyeh",
48 | "1626102952307718": "fypppppp",
49 | "1630284807035909": "fyptiktok",
50 | "1620625283638277": "foryouviral",
51 | "1628179191522310": "foryoupgepage",
52 | "1633359003571205": "putthisonfyp",
53 | "1703891853128710": "foreverforyou\ud83d\udc40",
54 | "1635280932750342": "fypppppppppppp",
55 | "1685694592866306": "fyp\u30b7\u309aviral\ud83d\udda4tiktok",
56 | "1637342470396934": "fyp\u30b7",
57 | "1685323802588161": "fyp\u30b7\ud83d\udda4foryoupage\u30b7\ud83e\udd8b",
58 | "1682300274755586": "foryourepageofficial",
59 | "22737416": "thisbudsforyou",
60 | "1636358001636357": "fyppppppppppp",
61 | "1616303504084998": "foryoupage\ud83d\ude22",
62 | "1670914542274565": "fypisbest",
63 | "1616746784464901": "foryourpag\u0435",
64 | "1607083044342806": "foryoupageeeee",
65 | "1602924933299205": "foryoupge",
66 | "1666593428398085": "fypviral\u30b7",
67 | "67231518": "foryouph",
68 | "1605983562245125": "foryou_page",
69 | "1639549424304133": "fyp_tiktok",
70 | "1648778089330694": "fyppppppppppppppppppppp",
71 | "1653220312297477": "fyppppppppppppppppppppppppppppppppppp",
72 | "1630729454643206": "fypforyouforyoupage",
73 | "1658590921953281": "fypppppppppppppppppppppppppppppppppp",
74 | "1627320129956870": "foryoupageofficial",
75 | "1633131185532933": "fypppppppppppppp",
76 | "1703769711014918": "fyp\u2764\ufe0f\ud83d\ude4f\ud83c\udffb\ud83d\ude2d",
77 | "1625649784043526": "fypplzz",
78 | "1649540023232517": "fypppppoo",
79 | "1605511720126469": "foryoupagina",
80 | "54185045": "fyplz",
81 | "1648316753236998": "fypppppppppppppppppppppp",
82 | "1688122502170626": "fyp\u1015\u1031\u102b\u103a\u101b\u1031\u102c\u1000\u103a\u1005\u1019\u103a\u1038",
83 | "1684082479714306": "fyp\u30b7\u309aviral\ud83d\udda4video",
84 | "1605270298646534": "foryoupace",
85 | "1599011224611846": "foryoupag",
86 | "1641193139018758": "fypaged",
87 | "1651632362337286": "fyppageforyou",
88 | "1664953059570690": "dailyvideosforyou",
89 | "1623668238525445": "fypplease",
90 | "84873565": "foryouthis",
91 | "1655586519169029": "fypnotworking",
92 | "1634943047845893": "foryoupage\u2728",
93 | "1640702198714373": "getthisonthefypplease",
94 | "1602020764569605": "foryoupageplease",
95 | "1614846349487125": "fype",
96 | "1637403385093126": "foryou?",
97 | "1692301272753158": "fypuswnt",
98 | "1634091457788933": "fypforyourpage",
99 | "1598364115802118": "foryoupgae",
100 | "1620485025788934": "fypppp",
101 | "1628190985570310": "fypfyp",
102 | "1620988974477318": "getthisonthefyp",
103 | "1647232092094469": "fypfypfypfypfypfypfypfypfypfypfypfypfyp",
104 | "1642258369204230": "fyppppppppppppppppppp",
105 | "7603941": "foryouchallenge",
106 | "1655495140363270": "fyppppppls",
107 | "1685172109999106": "f\u00fcrdichseite\u30b7foryoupage",
108 | "1603504302397446": "foryoulage",
109 | "1639107461180422": "foryoupagedoesnotwork",
110 | "1624339432620038": "fyppage",
111 | "1654857781475333": "fyp\u30b7viral",
112 | "1664119477821441": "fyp\u30b7\u309aviral",
113 | "1637715128978437": "fyppppppppppppppppp",
114 | "1648047480944646": "fypfypfypfy",
115 | "1633801928553478": "fypppppppppp",
116 | "1651224510575621": "fypforyoupagethis",
117 | "1639908894714881": "fypage\u30b7",
118 | "1684624429633537": "foryoupageofficiall2021",
119 | "1627926592598021": "fypforyou",
120 | "1607069197518854": "foryourpages",
121 | "1657857938666501": "fypcommunity",
122 | "1630155480914949": "fyppppppp",
123 | "1640241025714181": "fyppppppppppppp",
124 | "1654955847156741": "blowupforyoupage",
125 | "1604390375122950": "foryouppage",
126 | "1637403568312326": "foryoupage?",
127 | "20922363": "pageforyou",
128 | "1702998879182853": "foryoupage_tik_tok_viral_video\ud83d\udcaf\ud83d\udcb8\ud83e\udd0d\ud83d\udc8a",
129 | "1640999288847365": "fypchallage",
130 | "1675963560778754": "fypmototiktok",
131 | "1667434148820998": "fyp\u30b7cr",
132 | "1634922086900741": "fypchallenge",
133 | "1634318675729413": "fypplss",
134 | "8085197": "foryouuuuu",
135 | "1605614978359301": "foryourepage",
136 | "1647967591256070": "fypcouple",
137 | "1667758271570950": "fypplppppppppppp",
138 | "1638251441181702": "fyp\u30b7\u30c4",
139 | "1644109611442182": "fypofficial",
140 | "1641655411270661": "fyppagee",
141 | "13082896": "foryoupaige",
142 | "1685059559254018": "foryoupageforeveryone\u2661",
143 | "1634577353868293": "pageforyou\ud83e\udd8b",
144 | "1628170725056582": "fyp\ud83d\udc2e",
145 | "1634937292926981": "foryoupage\u2764\ufe0f",
146 | "1619679937718277": "plsfyp",
147 | "1628190219645957": "foryoupagedoesntwork",
148 | "1631522291667974": "forfyp",
149 | "1656713786866694": "fyp\ud83e\uddca",
150 | "1632540676574213": "fypppppppp",
151 | "1634979065961477": "fyp\u2728",
152 | "1649139584636934": "fyppppppppppppppppppppppp",
153 | "1597916865387525": "foryou1",
154 | "1604552984408070": "foryoupageee",
155 | "1603600556098566": "foryoupg",
156 | "1685972632069125": "foryou\ud83e\udde3",
157 | "1664495475082242": "foryoupageofficiall",
158 | "1592201096750085": "foryou\u2764",
159 | "1606872592930822": "foryourpagee",
160 | "1628662386826245": "fyp_",
161 | "1635848705845253": "fypyoupage",
162 | "1634369597616134": "viralfyp",
163 | "61667223": "fyps",
164 | "1617590735671301": "fypfor",
165 | "1634939850577926": "fyp\u2764\ufe0f",
166 | "1609595389571077": "foryoupagetiktok",
167 | "1639699601016837": "fyp\u30c4",
168 | "1619120300969989": "fyp\ud83d\udc95",
169 | "1632002941088774": "fypviral",
170 | "1641236262278149": "fyp\u30b7\u309a",
171 | "1659949481334789": "fypforyoupage\u30b7",
172 | "1609273039298565": "foryoupag\u0435",
173 | "1650742742654982": "fyp2020",
174 | "33971256": "foryouofficial",
175 | "1635084903417862": "foryou\u30b7",
176 | "1665036454423553": "fyp\u30b7foryoupage\u30b7tiktok",
177 | "1624550708748294": "foryoupage\ud83d\udc40\ud83d\udc40",
178 | "1617425133872134": "foryoupageoffical",
179 | "1623383052219414": "fypthis",
180 | "1592743307939841": "tiktokforyou",
181 | "1699264431886341": "fyp\u30b7\ud83c\udf7f\ud83c\udf7f",
182 | "1603204382206981": "fypg",
183 | "7107602": "foryouforyou",
184 | "1656991624902662": "fyp\u30b7foryoupage\u30b7",
185 | "1604142815383557": "foryoupgage",
186 | "1620267499925509": "fypls",
187 | "1611853076456450": "viralforyou",
188 | "1658673609103366": "fyp21",
189 | "1610265502020613": "foryourpagechallenge",
190 | "1675567091586054": "fyppoppppppppppppppppppppppp",
191 | "1679511090156546": "fypdonggggggggg\u30b7",
192 | "1670686468547590": "justgetthisonfyp",
193 | "1635213389964293": "fypit",
194 | "1602878675792901": "getmeontheforyoupage",
195 | "1632581882862593": "fypph",
196 | "1634967704187910": "fyp\u2764",
197 | "1694852740601861": "fypshortclips",
198 | "1686358467262466": "fyppyfyp",
199 | "1604276726661125": "foryoupagethis",
200 | "442854": "foryouu",
201 | "1618323884896262": "tiktokpageforyou",
202 | "1624022682062853": "fyp\ud83d\ude2d",
203 | "1659700661828610": "fyp\u30b7\u30c4post",
204 | "1608973297173509": "foryoupageforever",
205 | "1606878050264069": "onyourforyoupage",
206 | "12009": "justforyou",
207 | "1640768557032454": "fyp\ud83c\udff3\ufe0f\u200d\ud83c\udf08",
208 | "1634950588112902": "foryoupage\u2665\ufe0f",
209 | "1604302285252613": "fypage",
210 | "1639336981017605": "fyppleasetiktok",
211 | "1634937277509638": "foryou\u2764\ufe0f\u2764\ufe0f\u2764\ufe0f",
212 | "1605919148785669": "ffyp",
213 | "1598498371111942": "foryourpage",
214 | "1610847197102085": "foryoutiktok",
215 | "364659": "foryour",
216 | "1664053106852869": "fyp\u30b7\u30c4\u2661",
217 | "1616966643636229": "fypp",
218 | "1626088819377157": "fyfyp",
219 | "1635214780493826": "fypthisss",
220 | "1640815314240518": "fypagechallenge",
221 | "1600360681543685": "getthisontheforyoupage",
222 | "1603987329456134": "foryouapge",
223 | "1670139895030786": "funnycontentforyou",
224 | "1617418873150470": "fyppls",
225 | "1701731659930629": "3minutevideofyp",
226 | "1627524680056837": "fyppppppppp",
227 | "22091782": "foryoup",
228 | "1603502040695813": "foryouoage",
229 | "1666841966392325": "fyp2021",
230 | "1625820133083142": "fypfypfypfyp",
231 | "1623723121789957": "foryou\u30c4",
232 | "1644272574749702": "fyp\u30b7foryoupage",
233 | "1616155509242885": "foryoupagebro",
234 | "1650370082138113": "foryouoffical",
235 | "1623927366993925": "fyp1",
236 | "1674584508176390": "fyp\u30c4viral",
237 | "1604284546288646": "foryoupages",
238 | "43268": "4u",
239 | "108264": "foru",
240 | "20884": "viral",
241 | "153828": "fy"
242 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # An Empirical Investigation of Personalization Factors on TikTok
2 |
3 | In this repository we publish all software resources that were utilized to perform a sock-puppet audit on the web-version of TikTok to mimic a human user. With this audit we focused on analyzing the personalization factors and their influence on the recommendation algorithm of TikTok. You may find our work here: [link](https://dl.acm.org/doi/10.1145/3485447.3512102)
4 |
5 | Within this ReadMe we will provide a short overview on how one can use our code to replicate our results. Please note that eventhough we are confident that our results are trustworthy you may encounter different ones due to different time periods and the continous development of the recommendation algorithm by TikTok.
6 |
7 | ## Running A Test Scenario
8 |
9 | In the following section we will provide a step by step guid how to intialise and run one of the test scenarios we performed in our paper.
10 |
11 | First of all, you need to setup the appropriate infrastructure:
12 | 1. Create a Webshare account to obtain IP addresses from proxies you can use.
13 | 2. Create the test users in the database.
14 | 3. Since every test scenario consists of two test users you have to manually create those two users using the previously stored data on TikTok.
15 | 4. Once the user accounts exist on TikTok you may initialize the test scenario by executing the ParallelTesting.py file with the corresponding parameters.
16 |
17 |
18 | We exemplify these steps performing a run of the test scenario 28. This scneario aims on ... consisting of the users ...
19 |
20 | - Creating User Accounts
21 | -- Get phone numbers
22 | -- Create user accounts using purchased phone numbers
23 |
24 | - explain structure of db_credentials.json
25 | - explain placeholders: Twilio, Heroku DB, Webshare Proxz API, paths within project
26 |
27 | ## Analyzing Generated Data
28 |
29 | In order to obtain the most promising results of the Skip-Gram model we trained the model over 5 epochs with a learning rate of 0.1.
30 |
--------------------------------------------------------------------------------
/Testing/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/Testing/.DS_Store
--------------------------------------------------------------------------------
/Testing/APItest.py:
--------------------------------------------------------------------------------
1 | # Testing Unofficial TikTok API
2 |
3 | from TikTokApi import TikTokApi
4 | from pathlib import Path
5 | api1 = TikTokApi.get_instance()
6 | # If playwright doesn't work for you try to use selenium
7 | api2 = TikTokApi.get_instance(use_selenium=True)
8 |
9 | base_path = Path(__file__).parent
10 | file_path = (base_path / "../utilities/chromedriver.exe").resolve()
11 | api3 = TikTokApi.get_instance(use_selenium=True, executablePath=file_path)
12 |
13 | results = 10
14 |
15 | # Since TikTok changed their API you need to use the custom_verifyFp option.
16 | # In your web browser you will need to go to TikTok, Log in and get the s_v_web_id value.
17 | trending1 = api1.trending(count=results, custom_verifyFp="verify_klat6pua_gX3v9ItE_uqdV_4zPu_8rMk_KIMu3i51EFuI")
18 | trending2 = api2.trending(count=results, custom_verifyFp="verify_klat6pua_gX3v9ItE_uqdV_4zPu_8rMk_KIMu3i51EFuI")
19 | #trending3 = api3.trending(count=results, custom_verifyFP="verify_klat6pua_gX3v9ItE_uqdV_4zPu_8rMk_KIMu3i51EFuI")
20 |
21 | trending3 = api2.trending(count=results, custom_verifyFp="verify_klkw2don_kCWTFtWb_U1Qu_4OZl_8Rhq_r1fUbV5QMKIt")
22 |
23 |
24 | userID = "6717651461067604997"
25 | secUID = "MS4wLjABAAAALP9H8t1_SVmfuAKXV1o9K8XqiaFLxm2ae-EJ5_AJcwogcI_d9btuf_fjbjFOMNpN"
26 | posts = api2.userPosts(userID=userID, secUID=secUID, custom_verifyFp="verify_kljnnr1d_aPqkxu8I_TXtT_4xO8_8zE5_jEkg97g2DRqO")
27 |
28 | for tiktok in trending1:
29 | # Prints the id of the tiktok
30 | print(tiktok['id'])
31 |
32 | print(len(trending1))
33 |
34 | for tiktok in trending2:
35 | # Prints the id of the tiktok
36 | print(tiktok['id'])
37 |
38 | print(len(trending2))
--------------------------------------------------------------------------------
/Testing/BlockedProxyHandling.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import seleniumwire
4 |
5 | from seleniumwire import webdriver
6 | from pathlib import Path
7 |
8 |
9 |
10 | def start_session():
11 | proxy = {
12 | 'proxy_username': 'PLACEHOLDER', 'proxy_password': 'PLACEHOLDER',
13 | 'proxy_host': 'PLACEHOLDER', 'proxy_port': 'PLACEHOLDER',
14 | 'country': 'FR'
15 | }
16 | try:
17 | # bypassing detection of automated software testing
18 | chrome_options = webdriver.ChromeOptions()
19 | chrome_options.add_argument('--disable-blink-features=AutomationControlled')
20 | chrome_options.add_argument('--lang={browser_language}'.format(browser_language='en'))
21 |
22 | # open incognito page to remove any different_posts_noise from tracked cookies or browsing history, according to paper from
23 | # Aniko Hannak et. al.
24 | chrome_options.add_argument('incognito')
25 |
26 | # use proxy if provided:
27 | options = {}
28 | if proxy is not None:
29 | url = "{proxy_username}:{proxy_password}@{proxy_host}:{proxy_port}".format(
30 | proxy_username=proxy['proxy_username'], proxy_password=proxy['proxy_password'],
31 | proxy_host=proxy['proxy_host'], proxy_port=proxy['proxy_port'])
32 | options = {
33 | 'proxy': {
34 | 'http': 'http://' + url,
35 | 'https': 'https://' + url,
36 | 'no_proxy': 'localhost,127.0.0.1'
37 | }
38 | }
39 |
40 | # initializing web driver
41 | base_path = Path(__file__).parent
42 | file_path = (base_path / "../utilities/chromedriver.exe").resolve()
43 | driver = webdriver.Chrome(chrome_options=chrome_options, seleniumwire_options=options,
44 | executable_path=file_path)
45 | driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
46 | driver.get('https://m.tiktok.com')
47 | except (ConnectionAbortedError, seleniumwire.thirdparty.mitmproxy.exceptions.TcpDisconnect) as err:
48 | print(err)
49 | print('\n New driver session with new proxy initialized.')
50 | # here I would run some new code to create a new session with another proxy address
51 |
52 |
53 | if __name__ == "__main__":
54 | start_session()
--------------------------------------------------------------------------------
/Testing/ParalleliseTesting.py:
--------------------------------------------------------------------------------
1 | import concurrent.futures
2 |
3 | from src.WebHelper import *
4 | from src.Proxy import *
5 | from src.DataStoring import *
6 | from src.TestRun import TestRun
7 |
8 | base_path = Path(__file__).parent
9 |
10 | def get_test_data():
11 | database = DatabaseHelper()
12 | file_path = (base_path / "../Testing/TestSets/test_user_167.json").resolve()
13 | with open(file_path) as file:
14 | test_json = json.load(file)
15 |
16 | # test_data with settings from database
17 | # code obtains all details from json & database to initiliaze test
18 | test_data = []
19 | for test_set in test_json:
20 | if test_json.get(test_set).get('login'):
21 | test_json.get(test_set)['phone_number'] = database.get_phone_number(test_user_id=test_json.get(
22 | test_set).get('test_user_id'))
23 | test_json.get(test_set)['country_phone_number_prefix'] = database.get_country_phone_number_prefix(
24 | test_user_id=test_json.get(test_set).get('test_user_id'))
25 | test_json.get(test_set)['proxy'] = {
26 | "proxy_username": "PLACEHOLDER", "proxy_password": "PLACEHOLDER",
27 | "proxy_host": database.get_proxy_host(test_user_id=test_json.get(test_set).get('test_user_id')),
28 | "proxy_port": database.get_proxy_port(test_user_id=test_json.get(test_set).get('test_user_id')),
29 | "country": database.get_proxy_country(test_user_id=test_json.get(test_set).get('test_user_id'))}
30 | test_data.append(test_json.get(test_set))
31 | return test_data
32 |
33 | # test the test's setting data
34 | # use this code section to initialize chrome session and login newly created user for the first time, to then
35 | # manually complete the registration process
36 | # test_data = [test_json.get('167')]
37 | # for test_set in test_data:
38 | # if test_set.get('login'):
39 | # test_set['phone_number'] = database.get_phone_number(test_user_id=test_set.get('test_user_id'))
40 | # test_set['country_phone_number_prefix'] = database.get_country_phone_number_prefix(
41 | # test_user_id=test_set.get('test_user_id'))
42 | # test_set['proxy'] = {
43 | # "proxy_username": "PLACEHOLDER", "proxy_password": "PLACEHOLDER",
44 | # "proxy_host": database.get_proxy_host(test_user_id=167),
45 | # "proxy_port": database.get_proxy_port(test_user_id=167),
46 | # "country": database.get_proxy_country(test_user_id=167)
47 | # }
48 | # # c1 = 'US'
49 | # # test_set['proxy'] = {"proxy_username": "PLACEHOLDER", "proxy_password": "PLACEHOLDER",
50 | # # 'proxy_host': 'PLACEHOLDER', 'proxy_port': 'PLACEHOLDER', 'country': c1}
51 | # return test_data
52 |
53 | # for account creation purposes:
54 | # cur = [test_data[0]]
55 | # print(cur[0].get('test_user_id'))
56 | # return cur
57 |
58 |
59 | # main function initializing all different steps within one test iteration
60 | def run_test(test_data):
61 | # setting start time
62 | start = time.time()
63 |
64 | # initializing logger
65 | file_path = (base_path / f"../DataAnalysis/console_logs/console_log_{test_data.get('test_run_id')}_user_"
66 | f"{test_data.get('test_user_id')}.log").resolve()
67 | logging.basicConfig(filename=file_path, filemode='w')
68 | logger = logging.getLogger()
69 | logger.setLevel(logging.WARNING)
70 | logger.warning(f'Starting execution for testuser {test_data.get("test_user_id")}.')
71 |
72 | # initializing DatabaseHelper() object only once for test run
73 | database = DatabaseHelper()
74 |
75 | # initializing helper instance
76 | helper = WebHelper(test_user_id=test_data.get('test_user_id'),
77 | test_run_id=test_data.get('test_run_id'),
78 | logger=logger,
79 | database=database,
80 | phone_number=test_data.get('phone_number'),
81 | country_phone_number_prefix=test_data.get('country_phone_number_prefix'),
82 | reuse_cookies=test_data.get('reuse_cookies'),
83 | proxy=test_data.get('proxy'),
84 | browser_language=test_data.get("browser_language"))
85 |
86 | # triggering login for user via phone number only if "login" set true in test_data
87 | if test_data.get('login'):
88 | helper.login_user_phone()
89 | helper.handle_banners()
90 |
91 | # trigger handling of banners
92 | helper.handle_banners()
93 |
94 | # pause video until actually watching
95 | helper.pause_video()
96 |
97 | # pause first video
98 | if test_data.get('collecting_data_for_first_posts'):
99 | helper.handle_banners()
100 | helper.pause_video()
101 |
102 | # set cookies if applicable
103 | if test_data.get('reuse_cookies'):
104 | helper.set_cookies()
105 |
106 | # define number of batches to scroll through
107 | if len(test_data.get('number_of_posts_to_like_per_batch')) != 0 \
108 | or len(test_data.get('number_of_creators_to_follow_per_batch')) != 0 \
109 | or len(test_data.get('number_of_posts_to_watch_longer_per_batch')):
110 | if test_data.get('number_of_batches') != max(len(test_data.get('number_of_posts_to_like_per_batch')),
111 | len(test_data.get('number_of_creators_to_follow_per_batch')),
112 | len(test_data.get('number_of_posts_to_watch_longer_per_batch'))):
113 | raise Exception("Number of batches to scroll through doesn't match!")
114 | else:
115 | number_of_batches = test_data.get('number_of_batches')
116 | else:
117 | number_of_batches = test_data.get('number_of_batches')
118 |
119 | # initializing data storing instance
120 | data_storing = DataStoring(helper=helper,
121 | logger=logger,
122 | database=database,
123 | number_of_batches=number_of_batches,
124 | test_user_id=test_data.get('test_user_id'),
125 | test_run_id=test_data.get('test_run_id'))
126 |
127 | # trigger handling of banners
128 | helper.handle_banners()
129 |
130 | # handling first set of posts
131 | data_storing.get_separate_posts_data(collecting_data_for_first_posts=test_data.get("collecting_data_for_first_posts"))
132 |
133 | # handling remaining posts, scrolling through batches
134 | data_storing.get_request_posts_data(time_to_look_at_post_action=test_data.get('time_to_look_at_post_action'),
135 | time_to_look_at_post_normal=test_data.get('time_to_look_at_post_normal'),
136 | number_of_posts_to_like_per_batch=test_data.get('number_of_posts_to_like_per_batch'),
137 | number_of_creators_to_follow_per_batch=test_data.get('number_of_creators_to_follow_per_batch'),
138 | number_of_posts_to_watch_longer_per_batch=test_data.get('number_of_posts_to_watch_longer_per_batch'),
139 | posts_with_hashtag_to_like=test_data.get('posts_with_hashtag_to_like'),
140 | posts_with_hashtag_to_watch_longer=test_data.get('posts_with_hashtag_to_watch_longer'),
141 | posts_of_content_creators_to_like=test_data.get('posts_of_content_creators_to_like'),
142 | posts_of_music_ids_to_like=test_data.get('posts_of_music_ids_to_like'))
143 |
144 | # commencing shut down of test run: unflagging used proxy, closing driver, storing collected data, computing
145 | # duration and storing it for corresponding testrun
146 | helper.close_driver()
147 | helper.database.unflag_proxy(proxy_host=test_data['proxy']['proxy_host'],
148 | proxy_port=test_data['proxy']['proxy_port'])
149 | data_storing.store_collected_data()
150 | duration = time.time() - start
151 | test_data['duration'] = (duration / 60)
152 | logger.warning(f'Execution for testuser {test_data.get("test_user_id")} completed in {duration} seconds '
153 | f'({duration / 60} minutes).')
154 | return test_data
155 |
156 |
157 | if __name__ == '__main__':
158 | tests = get_test_data()
159 |
160 | # crete test run object with given test data and run tests in parallel
161 | with TestRun(test_data=tests) as test_run:
162 | for test in tests:
163 | test['test_run_id'] = test_run.test_run_id
164 | with concurrent.futures.ProcessPoolExecutor() as executor:
165 | test_data_results = executor.map(run_test, tests)
166 | test_user_ids = []
167 | batch_size = 0
168 |
169 | # obtain test results and store them accordingly
170 | for test in test_data_results:
171 | test_run.store_test_duration(duration=test.get('duration'), test_user_id=test.get('test_user_id'))
172 | test_user_ids.append(test.get('test_user_id'))
173 | batch_size = test.get('number_of_batches')
174 | # update analysis table
175 | # update_overlapping_post_test_results_with_new_values(test_run=test_run.test_run_id, test_users=test_user_ids,
176 | # batch_size=batch_size)
177 |
--------------------------------------------------------------------------------
/Testing/TestInitializer.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import random
4 | import time
5 |
6 | import psycopg2 as psycopg2
7 | from selenium import webdriver
8 | from selenium.webdriver.common.by import By
9 | from bs4 import BeautifulSoup
10 | from selenium.webdriver.common.keys import Keys
11 |
12 | from src.WebHelper import *
13 | from src.Proxy import *
14 | from src.DataStoring import *
15 | from src.TestRun import TestRun
16 |
17 | # countries = ['US', 'GB', 'FR', 'DE', 'CA', 'CH', 'CH']
18 | # c1 = 'DE'
19 | # c2 = 'DE'
20 | # proxy_auth_username = 'auamyynt-dest'
21 | # proxy_auth_password = 'j5u77rwhbdnj'
22 | # proxy_host1, proxy_port1 = get_db_proxy(c1)
23 | # proxy_host2, proxy_port2 = get_db_proxy(c2, {'proxy_host': proxy_host1, 'proxy_port': proxy_port1})
24 |
25 | # test_data = {
26 | # "testuserinfo": [
27 | # {"testuserid": 4, "email": "bertman@mailinator.com", "password": "%J0ftE999yQVg2",
28 | # "browser_language": "de", "proxy":
29 | # {
30 | # 'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password,
31 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1,
32 | # 'country': c1
33 | # }
34 | # },
35 | # {"testuserid": 5, "email": "loc2021@mailinator.com", "password": "%@NreeHIwb*55O5@zD48",
36 | # "browser_language": "tr", "proxy":
37 | # {
38 | # 'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password,
39 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1,
40 | # 'country': c1
41 | # }
42 | # },
43 | # {"testuserid": 8, "email": "loc2021@mailinator.com", "password": "%@NreeHIwb*55O5@zD48",
44 | # "browser_language": "en", "proxy":
45 | # {
46 | # 'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password,
47 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1,
48 | # 'country': c1
49 | # }
50 | # },
51 | # {"testuserid": 9, "email": "loc2021@mailinator.com", "password": "%@NreeHIwb*55O5@zD48",
52 | # "browser_language": "es", "proxy":
53 | # {
54 | # 'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password,
55 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1,
56 | # 'country': c1
57 | # }
58 | # }
59 | # ],
60 | # "description": "same location, different languages, no user accounts"}
61 | #
62 | # test_data_2 = {
63 | # "testuserinfo": [
64 | # {"testuserid": 5, "email": "loc2021@mailinator.com", "password": "%@NreeHIwb*55O5@zD48",
65 | # "browser_language": "tr", "proxy":
66 | # {
67 | # 'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password,
68 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1,
69 | # 'country': c1
70 | # }
71 | # },
72 | # ],
73 | # "description": "same location, different languages, no user accounts"}
74 |
75 | c1 = 'US'
76 | c2 = 'CA'
77 | proxy_auth_username = 'PLACEHOLDER'
78 | proxy_auth_password = 'PLACEHOLDER'
79 | proxy_host1, proxy_port1 = get_db_proxy(c1)
80 | # test_data = {"testuserid": 35, "phone_number": "7862148574", "password": "IOw2z*W282&X", "browser_language": "en",
81 | # "country_phone_number_prefix": "United States", "time_to_look_at_post": 2,
82 | # "number_of_posts_to_like_per_batch": [0, 0, 0], "collecting_data_for_first_posts": True,
83 | # "proxy":
84 | # {'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password,
85 | # 'proxy_host': '185.95.157.159', 'proxy_port': '6180', 'country': c1}
86 | # }
87 |
88 | test_data = [
89 | {"test_user_id": 11, "phone_number": "5039664089", "password": "IOw2z*W282&X", "browser_language": "en",
90 | "country_phone_number_prefix": "United States", "time_to_look_at_post": 2,
91 | "number_of_posts_to_like_per_batch": [15, 5], "collecting_data_for_first_posts": False,
92 | "proxy":
93 | {'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password,
94 | 'proxy_host': proxy_host1, 'proxy_port': proxy_port1, 'country': c1}
95 | }
96 | # {"test_user_id": 15, "phone_number": "1798297886", "password": "k@pywYE7l8", "browser_language": "en",
97 | # "country_phone_number_prefix": "Germany", "time_to_look_at_post": 2,
98 | # "number_of_posts_to_like_per_batch": [0, 0, 0], "collecting_data_for_first_posts": False,
99 | # "proxy":
100 | # {'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password,
101 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1, 'country': c1}
102 | # }
103 | ]
104 |
105 |
106 | with TestRun(test_data=test_data) as test_run:
107 | start = time.time()
108 | test_data = test_data[0]
109 | test_data['test_run_id'] = test_run.test_run_id
110 | base_path = Path(__file__).parent
111 | file_path = (base_path / f"../Data Analysis/console_logs/console_log_{test_data.get('test_run_id')}_user_"
112 | f"{test_data.get('test_user_id')}.log").resolve()
113 | logging.basicConfig(filename=file_path, format='%(asctime)s %(message)s', filemode='w')
114 | logger = logging.getLogger()
115 | logger.setLevel(logging.WARNING)
116 | logger.info(f'Starting execution for testuser {test_data.get("test_user_id")}.')
117 | helper = WebHelper(test_user_id=test_data.get('test_user_id'),
118 | test_run_id=test_data.get('test_run_id'),
119 | logger=logger,
120 | proxy=test_data.get('proxy'),
121 | browser_language=test_data.get("browser_language"))
122 | helper.login_user_phone(test_data.get('phone_number'), test_data.get('country_phone_number_prefix'))
123 | data_storing = DataStoring(helper=helper,
124 | logger=logger,
125 | number_of_batches=len(test_data.get('number_of_posts_to_like_per_batch')),
126 | test_user_id=test_data.get('test_user_id'),
127 | test_run_id=test_data.get('test_run_id'))
128 | data_storing.get_separate_posts_data(collecting_data_for_first_posts=test_data.get("collecting_data_for_first_posts"))
129 | data_storing.get_request_posts_data(test_data.get('time_to_look_at_post'),
130 | test_data.get('number_of_posts_to_like_per_batch'))
131 | helper.database.unflag_proxy(proxy_host=test_data['proxy']['proxy_host'],
132 | proxy_port=test_data['proxy']['proxy_port'])
133 | helper.close_driver()
134 | data_storing.store_collected_data()
135 | duration = time.time() - start
136 | test_data['duration'] = (duration / 60)
137 | logger.info(f'Execution for testuser {test_data.get("test_user_id")} completed in {duration} seconds '
138 | f'({duration / 60} minutes).')
139 |
140 |
--------------------------------------------------------------------------------
/Testing/TestSets/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/Testing/TestSets/.DS_Store
--------------------------------------------------------------------------------
/Testing/TestSets/cg_us_user-165-166.json:
--------------------------------------------------------------------------------
1 | {
2 | "165": {
3 | "test_user_id": 165,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0,
8 | "time_to_look_at_post_normal": 0.5,
9 | "number_of_batches": 20,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_with_hashtag_to_like": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "166": {
20 | "test_user_id": 166,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": false,
24 | "time_to_look_at_post_action": 0,
25 | "time_to_look_at_post_normal": 0.5,
26 | "number_of_batches": 20,
27 | "number_of_posts_to_like_per_batch": [],
28 | "number_of_creators_to_follow_per_batch": [],
29 | "number_of_posts_to_watch_longer_per_batch": [],
30 | "posts_with_hashtag_to_watch_longer": [],
31 | "posts_with_hashtag_to_like": [],
32 | "posts_of_content_creators_to_like": [],
33 | "posts_of_music_ids_to_like": [],
34 | "collecting_data_for_first_posts": false
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_ca_user-119-120.json:
--------------------------------------------------------------------------------
1 | {
2 | "119": {
3 | "test_user_id": 119,
4 | "login": false,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
8 | "number_of_batches": 3,
9 | "number_of_posts_to_like_per_batch": [],
10 | "number_of_creators_to_follow_per_batch": [],
11 | "number_of_posts_to_watch_longer_per_batch": [],
12 | "posts_with_hashtag_to_watch_longer": [],
13 | "posts_with_hashtag_to_like": [],
14 | "posts_of_content_creators_to_like": [],
15 | "posts_of_music_ids_to_like": [],
16 | "collecting_data_for_first_posts": true
17 | },
18 | "120": {
19 | "test_user_id": 120,
20 | "login": false,
21 | "browser_language": "en",
22 | "reuse_cookies": false,
23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
24 | "number_of_batches": 3,
25 | "number_of_posts_to_like_per_batch": [],
26 | "number_of_creators_to_follow_per_batch": [],
27 | "number_of_posts_to_watch_longer_per_batch": [],
28 | "posts_with_hashtag_to_watch_longer": [],
29 | "posts_with_hashtag_to_like": [],
30 | "posts_of_content_creators_to_like": [],
31 | "posts_of_music_ids_to_like": [],
32 | "collecting_data_for_first_posts": true
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_ca_user-121-122.json:
--------------------------------------------------------------------------------
1 | {
2 | "121": {
3 | "test_user_id": 121,
4 | "login": false,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": false
16 | },
17 | "122": {
18 | "test_user_id": 122,
19 | "login": false,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": false
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_fr_user-55-56.json:
--------------------------------------------------------------------------------
1 | {
2 | "55": {
3 | "test_user_id": 55,
4 | "login": false,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 5,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "posts_with_hashtag_to_like": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_of_content_creators_to_like": [],
13 | "posts_of_music_ids_to_like": [],
14 | "collecting_data_for_first_posts": true
15 | },
16 | "56": {
17 | "test_user_id": 56,
18 | "login": false,
19 | "browser_language": "en",
20 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
21 | "number_of_batches": 5,
22 | "number_of_posts_to_like_per_batch": [],
23 | "number_of_creators_to_follow_per_batch": [],
24 | "posts_with_hashtag_to_like": [],
25 | "posts_with_hashtag_to_watch_longer": [],
26 | "posts_of_content_creators_to_like": [],
27 | "posts_of_music_ids_to_like": [],
28 | "collecting_data_for_first_posts": true
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_fr_user-65-67.json:
--------------------------------------------------------------------------------
1 | {
2 | "65": {
3 | "test_user_id": 65,
4 | "login": false,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 5,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": false
16 | },
17 | "67": {
18 | "test_user_id": 67,
19 | "login": false,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 5,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": false
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_gb_user-68-69.json:
--------------------------------------------------------------------------------
1 | {
2 | "68": {
3 | "test_user_id": 68,
4 | "login": false,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
8 | "number_of_batches": 5,
9 | "number_of_posts_to_like_per_batch": [],
10 | "number_of_creators_to_follow_per_batch": [],
11 | "number_of_posts_to_watch_longer_per_batch": [],
12 | "posts_with_hashtag_to_watch_longer": [],
13 | "posts_with_hashtag_to_like": [],
14 | "posts_of_content_creators_to_like": [],
15 | "posts_of_music_ids_to_like": [],
16 | "collecting_data_for_first_posts": false
17 | },
18 | "69": {
19 | "test_user_id": 69,
20 | "login": false,
21 | "browser_language": "en",
22 | "reuse_cookies": false,
23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
24 | "number_of_batches": 5,
25 | "number_of_posts_to_like_per_batch": [],
26 | "number_of_creators_to_follow_per_batch": [],
27 | "number_of_posts_to_watch_longer_per_batch": [],
28 | "posts_with_hashtag_to_watch_longer": [],
29 | "posts_with_hashtag_to_like": [],
30 | "posts_of_content_creators_to_like": [],
31 | "posts_of_music_ids_to_like": [],
32 | "collecting_data_for_first_posts": false
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-125-126.json:
--------------------------------------------------------------------------------
1 | {
2 | "125": {
3 | "test_user_id": 125,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "126": {
18 | "test_user_id": 126,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-137-138.json:
--------------------------------------------------------------------------------
1 | {
2 | "137": {
3 | "test_user_id": 137,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": false
16 | },
17 | "138": {
18 | "test_user_id": 138,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": false
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-139-140.json:
--------------------------------------------------------------------------------
1 | {
2 | "139": {
3 | "test_user_id": 139,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "140": {
18 | "test_user_id": 140,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-141-142.json:
--------------------------------------------------------------------------------
1 | {
2 | "141": {
3 | "test_user_id": 141,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": false
16 | },
17 | "142": {
18 | "test_user_id": 142,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": false
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-143-144.json:
--------------------------------------------------------------------------------
1 | {
2 | "143": {
3 | "test_user_id": 143,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
8 | "number_of_batches": 3,
9 | "number_of_posts_to_like_per_batch": [],
10 | "number_of_creators_to_follow_per_batch": [],
11 | "number_of_posts_to_watch_longer_per_batch": [],
12 | "posts_with_hashtag_to_watch_longer": [],
13 | "posts_with_hashtag_to_like": [],
14 | "posts_of_content_creators_to_like": [],
15 | "posts_of_music_ids_to_like": [],
16 | "collecting_data_for_first_posts": false
17 | },
18 | "144": {
19 | "test_user_id": 144,
20 | "login": true,
21 | "browser_language": "en",
22 | "reuse_cookies": false,
23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
24 | "number_of_batches": 3,
25 | "number_of_posts_to_like_per_batch": [],
26 | "number_of_creators_to_follow_per_batch": [],
27 | "number_of_posts_to_watch_longer_per_batch": [],
28 | "posts_with_hashtag_to_watch_longer": [],
29 | "posts_with_hashtag_to_like": [],
30 | "posts_of_content_creators_to_like": [],
31 | "posts_of_music_ids_to_like": [],
32 | "collecting_data_for_first_posts": false
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-147-148.json:
--------------------------------------------------------------------------------
1 | {
2 | "147": {
3 | "test_user_id": 147,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": true,
7 | "time_to_look_at_post_action": 0,
8 | "time_to_look_at_post_normal": 0.5,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_with_hashtag_to_like": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "148": {
20 | "test_user_id": 148,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": true,
24 | "time_to_look_at_post_action": 0,
25 | "time_to_look_at_post_normal": 0.5,
26 | "number_of_batches": 3,
27 | "number_of_posts_to_like_per_batch": [],
28 | "number_of_creators_to_follow_per_batch": [],
29 | "number_of_posts_to_watch_longer_per_batch": [],
30 | "posts_with_hashtag_to_watch_longer": [],
31 | "posts_with_hashtag_to_like": [],
32 | "posts_of_content_creators_to_like": [],
33 | "posts_of_music_ids_to_like": [],
34 | "collecting_data_for_first_posts": false
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-149-150.json:
--------------------------------------------------------------------------------
1 | {
2 | "149": {
3 | "test_user_id": 149,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": true,
7 | "time_to_look_at_post_action": 0,
8 | "time_to_look_at_post_normal": 0.5,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_with_hashtag_to_like": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "150": {
20 | "test_user_id": 150,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": true,
24 | "time_to_look_at_post_action": 0,
25 | "time_to_look_at_post_normal": 0.5,
26 | "number_of_batches": 3,
27 | "number_of_posts_to_like_per_batch": [],
28 | "number_of_creators_to_follow_per_batch": [],
29 | "number_of_posts_to_watch_longer_per_batch": [],
30 | "posts_with_hashtag_to_watch_longer": [],
31 | "posts_with_hashtag_to_like": [],
32 | "posts_of_content_creators_to_like": [],
33 | "posts_of_music_ids_to_like": [],
34 | "collecting_data_for_first_posts": false
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-161-162.json:
--------------------------------------------------------------------------------
1 | {
2 | "161": {
3 | "test_user_id": 161,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": true,
7 | "time_to_look_at_post_action": 0,
8 | "time_to_look_at_post_normal": 0.5,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_with_hashtag_to_like": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "162": {
20 | "test_user_id": 162,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": true,
24 | "time_to_look_at_post_action": 0,
25 | "time_to_look_at_post_normal": 0.5,
26 | "number_of_batches": 3,
27 | "number_of_posts_to_like_per_batch": [],
28 | "number_of_creators_to_follow_per_batch": [],
29 | "number_of_posts_to_watch_longer_per_batch": [],
30 | "posts_with_hashtag_to_watch_longer": [],
31 | "posts_with_hashtag_to_like": [],
32 | "posts_of_content_creators_to_like": [],
33 | "posts_of_music_ids_to_like": [],
34 | "collecting_data_for_first_posts": false
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-57-58.json:
--------------------------------------------------------------------------------
1 | {
2 | "57": {
3 | "test_user_id": 57,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 5,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "posts_with_hashtag_to_like": [],
11 | "posts_of_content_creators_to_like": [],
12 | "posts_of_music_ids_to_like": [],
13 | "collecting_data_for_first_posts": true
14 | },
15 | "58": {
16 | "test_user_id": 58,
17 | "login": true,
18 | "browser_language": "en",
19 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
20 | "number_of_batches": 5,
21 | "number_of_posts_to_like_per_batch": [],
22 | "number_of_creators_to_follow_per_batch": [],
23 | "posts_with_hashtag_to_like": [],
24 | "posts_of_content_creators_to_like": [],
25 | "posts_of_music_ids_to_like": [],
26 | "collecting_data_for_first_posts": true
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-72-73.json:
--------------------------------------------------------------------------------
1 | {
2 | "72": {
3 | "test_user_id": 72,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
8 | "number_of_batches": 5,
9 | "number_of_posts_to_like_per_batch": [],
10 | "number_of_creators_to_follow_per_batch": [],
11 | "number_of_posts_to_watch_longer_per_batch": [],
12 | "posts_with_hashtag_to_watch_longer": [],
13 | "posts_with_hashtag_to_like": [],
14 | "posts_of_content_creators_to_like": [],
15 | "posts_of_music_ids_to_like": [],
16 | "collecting_data_for_first_posts": true
17 | },
18 | "73": {
19 | "test_user_id": 73,
20 | "login": true,
21 | "browser_language": "en",
22 | "reuse_cookies": false,
23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
24 | "number_of_batches": 5,
25 | "number_of_posts_to_like_per_batch": [],
26 | "number_of_creators_to_follow_per_batch": [],
27 | "number_of_posts_to_watch_longer_per_batch": [],
28 | "posts_with_hashtag_to_watch_longer": [],
29 | "posts_with_hashtag_to_like": [],
30 | "posts_of_content_creators_to_like": [],
31 | "posts_of_music_ids_to_like": [],
32 | "collecting_data_for_first_posts": true
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-74-75.json:
--------------------------------------------------------------------------------
1 | {
2 | "74": {
3 | "test_user_id": 74,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 5,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": false
16 | },
17 | "75": {
18 | "test_user_id": 75,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 5,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": false
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-93-94.json:
--------------------------------------------------------------------------------
1 | {
2 | "93": {
3 | "test_user_id": 93,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 5,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "94": {
18 | "test_user_id": 94,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 5,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/cg_us_user-95-96.json:
--------------------------------------------------------------------------------
1 | {
2 | "95": {
3 | "test_user_id": 95,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 5,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": false
16 | },
17 | "96": {
18 | "test_user_id": 96,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 5,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": false
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/follow_gb_user-51-52.json:
--------------------------------------------------------------------------------
1 | {
2 | "51": {
3 | "test_user_id": 51,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
8 | "number_of_batches": 3,
9 | "number_of_posts_to_like_per_batch": [],
10 | "number_of_creators_to_follow_per_batch": [0, 0, 0],
11 | "number_of_posts_to_watch_longer_per_batch": [],
12 | "posts_with_hashtag_to_watch_longer": [],
13 | "posts_with_hashtag_to_like": [],
14 | "posts_of_content_creators_to_like": [],
15 | "posts_of_music_ids_to_like": [],
16 | "collecting_data_for_first_posts": true
17 | },
18 | "52": {
19 | "test_user_id": 52,
20 | "login": true,
21 | "browser_language": "en",
22 | "reuse_cookies": false,
23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
24 | "number_of_batches": 3,
25 | "number_of_posts_to_like_per_batch": [],
26 | "number_of_creators_to_follow_per_batch": [],
27 | "number_of_posts_to_watch_longer_per_batch": [],
28 | "posts_with_hashtag_to_watch_longer": [],
29 | "posts_with_hashtag_to_like": [],
30 | "posts_of_content_creators_to_like": [],
31 | "posts_of_music_ids_to_like": [],
32 | "collecting_data_for_first_posts": true
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/follow_gb_user-53-54.json:
--------------------------------------------------------------------------------
1 | {
2 | "53": {
3 | "test_user_id": 53,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [0, 1, 0],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": false
16 | },
17 | "54": {
18 | "test_user_id": 54,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": false
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/follow_us_user-153-154.json:
--------------------------------------------------------------------------------
1 | {
2 | "153": {
3 | "test_user_id": 153,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": true,
7 | "time_to_look_at_post_action": 0,
8 | "time_to_look_at_post_normal": 0.5,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [0, 0, 0],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_like": [],
14 | "posts_with_hashtag_to_watch_longer": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": true
18 | },
19 | "154": {
20 | "test_user_id": 154,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": true,
24 | "time_to_look_at_post_action": 0,
25 | "time_to_look_at_post_normal": 0.5,
26 | "number_of_batches": 3,
27 | "number_of_posts_to_like_per_batch": [],
28 | "number_of_creators_to_follow_per_batch": [],
29 | "number_of_posts_to_watch_longer_per_batch": [],
30 | "posts_with_hashtag_to_like": [],
31 | "posts_with_hashtag_to_watch_longer": [],
32 | "posts_of_content_creators_to_like": [],
33 | "posts_of_music_ids_to_like": [],
34 | "collecting_data_for_first_posts": true
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/follow_us_user-155-156.json:
--------------------------------------------------------------------------------
1 | {
2 | "155": {
3 | "test_user_id": 155,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": true,
7 | "time_to_look_at_post_action": 0,
8 | "time_to_look_at_post_normal": 0.5,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [0, 1, 0],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_like": [],
14 | "posts_with_hashtag_to_watch_longer": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "156": {
20 | "test_user_id": 156,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": true,
24 | "time_to_look_at_post_action": 0,
25 | "time_to_look_at_post_normal": 0.5,
26 | "number_of_batches": 3,
27 | "number_of_posts_to_like_per_batch": [],
28 | "number_of_creators_to_follow_per_batch": [],
29 | "number_of_posts_to_watch_longer_per_batch": [],
30 | "posts_with_hashtag_to_like": [],
31 | "posts_with_hashtag_to_watch_longer": [],
32 | "posts_of_content_creators_to_like": [],
33 | "posts_of_music_ids_to_like": [],
34 | "collecting_data_for_first_posts": false
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/follow_us_user-47-48.json:
--------------------------------------------------------------------------------
1 | {
2 | "47": {
3 | "test_user_id": 47,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [0, 0, 0],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "48": {
18 | "test_user_id": 48,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/follow_us_user-49-50.json:
--------------------------------------------------------------------------------
1 | {
2 | "49": {
3 | "test_user_id": 49,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [0, 0, 0],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_like": [],
12 | "posts_with_hashtag_to_watch_longer": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": false
16 | },
17 | "50": {
18 | "test_user_id": 50,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_like": [],
27 | "posts_with_hashtag_to_watch_longer": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": false
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_gb_user-61-62.json:
--------------------------------------------------------------------------------
1 | {
2 | "61": {
3 | "test_user_id": 61,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 5,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": ["cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute", "puppy", "dogs",
13 | "cats", "animals", "petsoftiktok", "kitten"],
14 | "posts_of_content_creators_to_like": [],
15 | "posts_of_music_ids_to_like": [],
16 | "collecting_data_for_first_posts": false
17 | },
18 | "62": {
19 | "test_user_id": 62,
20 | "login": true,
21 | "browser_language": "en",
22 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
23 | "number_of_batches": 5,
24 | "number_of_posts_to_like_per_batch": [],
25 | "number_of_creators_to_follow_per_batch": [],
26 | "number_of_posts_to_watch_longer_per_batch": [],
27 | "posts_with_hashtag_to_watch_longer": [],
28 | "posts_with_hashtag_to_like": [],
29 | "posts_of_content_creators_to_like": [],
30 | "posts_of_music_ids_to_like": [],
31 | "collecting_data_for_first_posts": false
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_gb_user-63-64.json:
--------------------------------------------------------------------------------
1 | {
2 | "63": {
3 | "test_user_id": 63,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0,"time_to_look_at_post_normal": 0.5,
8 | "number_of_batches": 5,
9 | "number_of_posts_to_like_per_batch": [],
10 | "number_of_creators_to_follow_per_batch": [],
11 | "number_of_posts_to_watch_longer_per_batch": [],
12 | "posts_with_hashtag_to_like": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film",
13 | "tiktokfood", "gta5", "gta", "minecraft", "marvel"],
14 | "posts_with_hashtag_to_watch_longer": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "64": {
20 | "test_user_id": 64,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": false,
24 | "time_to_look_at_post_action": 0,"time_to_look_at_post_normal": 0.5,
25 | "number_of_batches": 5,
26 | "number_of_posts_to_like_per_batch": [],
27 | "number_of_creators_to_follow_per_batch": [],
28 | "number_of_posts_to_watch_longer_per_batch": [],
29 | "posts_with_hashtag_to_like": [],
30 | "posts_with_hashtag_to_watch_longer": [],
31 | "posts_of_content_creators_to_like": [],
32 | "posts_of_music_ids_to_like": [],
33 | "collecting_data_for_first_posts": false
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_us_user-111-112.json:
--------------------------------------------------------------------------------
1 | {
2 | "111": {
3 | "test_user_id": 111,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": ["miakhalifa", "alex.stemp", "coco224466", "....jjesus", "lama_mama1", "espn",
14 | "pilaui", "basii_17", "spicekingcam", "the_grinchofficial", "petassembly", "texasbeeworks", "littlecajunhouse",
15 | "iamdivinelyloved", "camwilder", "badparentingmoments", "dianarantamaki", "dm_t.v", "majorkeylife", "thejoshelkin",
16 | "jacob_t_king", "user4350486101671", "billieeilish", "jackblack", "dermdoctor", "aymieandgracie", "copslivetv",
17 | "daddygus99", "joe.bartolozzi", "_.video_.edits.23", "leytink", "n.ikotheking", "paulana52", "kallmekris",
18 | "iamjonathanpeter", "mndiaye_97", "genltart"],
19 | "posts_of_music_ids_to_like": [],
20 | "collecting_data_for_first_posts": true
21 | },
22 | "112": {
23 | "test_user_id": 112,
24 | "login": true,
25 | "browser_language": "en",
26 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
27 | "number_of_batches": 3,
28 | "number_of_posts_to_like_per_batch": [],
29 | "number_of_creators_to_follow_per_batch": [],
30 | "number_of_posts_to_watch_longer_per_batch": [],
31 | "posts_with_hashtag_to_watch_longer": [],
32 | "posts_with_hashtag_to_like": [],
33 | "posts_of_content_creators_to_like": [],
34 | "posts_of_music_ids_to_like": [],
35 | "collecting_data_for_first_posts": true
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_us_user-113-114.json:
--------------------------------------------------------------------------------
1 | {
2 | "113": {
3 | "test_user_id": 113,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": ["miakhalifa", "alex.stemp", "southcentraljag", "user4350486101671",
14 | "spicekingcam", "joe.bartolozzi", "coco224466", "mndiaye_97", "billieeilish", "daddygus99", "edmundrambo",
15 | "feast24seven", "copslivetv", "pilaui", "dm_t.v", "papi_pilas", "kate_johansson", "dermdoctor",
16 | "patulafamilymcdonalds", "littlecajunhouse", "basii_17", "camwilder", "bonita_alonna", "moontellthat",
17 | "paulana52", "kallmekris", "n.ikotheking", "_.video_.edits.23", "....jjesus", "alivaheeronms", "sunflowertubie",
18 | "lama_mama1", "ace_dadd", "beforenafter13", "khaby.lame", "americanbullish", "iamjonathanpeter",
19 | "noah_and_lincoln", "mamalindy", "hudabeauty", "manchasthetiktoker", "aymieandgracie", "jesusacevedox43",
20 | "hypermilt2", "izzy.tube", "willsmith", "614lyfe", "anxietycouple", "campuzanoabelardo"],
21 | "posts_of_music_ids_to_like": [],
22 | "collecting_data_for_first_posts": true
23 | },
24 | "114": {
25 | "test_user_id": 114,
26 | "login": true,
27 | "browser_language": "en",
28 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
29 | "number_of_batches": 3,
30 | "number_of_posts_to_like_per_batch": [],
31 | "number_of_creators_to_follow_per_batch": [],
32 | "number_of_posts_to_watch_longer_per_batch": [],
33 | "posts_with_hashtag_to_watch_longer": [],
34 | "posts_with_hashtag_to_like": [],
35 | "posts_of_content_creators_to_like": [],
36 | "posts_of_music_ids_to_like": [],
37 | "collecting_data_for_first_posts": true
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_us_user-115-116.json:
--------------------------------------------------------------------------------
1 | {
2 | "115": {
3 | "test_user_id": 115,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [
15 | 6972655154873796610,
16 | 6973722766373570561,
17 | 6586947002464996102,
18 | 5000000001320781379,
19 | 222597111081832449,
20 | 6971562931527912197,
21 | 6964101918863969030,
22 | 6746993352891189249,
23 | 6851526062120110854,
24 | 6853205461995375365,
25 | 6967885968040889094,
26 | 6787142837377959937,
27 | 6769046027488987137,
28 | 6926087831404251909,
29 | 6755976952189814785,
30 | 6656534537050409734,
31 | 6947968309945993218,
32 | 6974092128280464133,
33 | 6926304768692456197,
34 | 6952604284408187654,
35 | 6851352741625809669,
36 | 6956662916695197697,
37 | 222453214057697280,
38 | 6981591741455436550,
39 | 242638364112424960,
40 | 6973004412159625989,
41 | 6983473612736957185,
42 | 6984961588641975046,
43 | 6690892217998985990,
44 | 6778795245078416134,
45 | 6601410777356176133,
46 | 6987878637571803910,
47 | 6982992938041117446,
48 | 6985976570515032838,
49 | 6902376363227891714,
50 | 6807984440287955717,
51 | 5000000000755653951,
52 | 6942182583350332165,
53 | 6841255111478478849,
54 | 246423207128506368,
55 | 6939813781522369282,
56 | 222522673426305024,
57 | 6845002070534245125,
58 | 6954740638508124162,
59 | 6932058866448336897,
60 | 6891115298016070402,
61 | 6971462283046210309,
62 | 6975199470258588421
63 | ],
64 | "collecting_data_for_first_posts": true
65 | },
66 | "116": {
67 | "test_user_id": 116,
68 | "login": true,
69 | "browser_language": "en",
70 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
71 | "number_of_batches": 3,
72 | "number_of_posts_to_like_per_batch": [],
73 | "number_of_creators_to_follow_per_batch": [],
74 | "number_of_posts_to_watch_longer_per_batch": [],
75 | "posts_with_hashtag_to_watch_longer": [],
76 | "posts_with_hashtag_to_like": [],
77 | "posts_of_content_creators_to_like": [],
78 | "posts_of_music_ids_to_like": [],
79 | "collecting_data_for_first_posts": true
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_us_user-117-118.json:
--------------------------------------------------------------------------------
1 | {
2 | "117": {
3 | "test_user_id": 117,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [
15 | 6586947002464996102,
16 | 6972655154873796610,
17 | 6787142837377959937,
18 | 6971562931527912197,
19 | 5000000001320781379,
20 | 6880156461125683973,
21 | 6756879114637967368,
22 | 6971625163129916161,
23 | 6769046027488987137,
24 | 6967885968040889094,
25 | 6981184452680633094,
26 | 6863280745893432069,
27 | 6973722766373570561,
28 | 6851526062120110854,
29 | 6964101918863969030,
30 | 6941153946308266757,
31 | 6955275373810666245,
32 | 6823997997395806977,
33 | 6961786056861550594,
34 | 6971462283046210309,
35 | 6937042766442580741,
36 | 6853205461995375365,
37 | 6967526237812132613,
38 | 250835551346647040,
39 | 6956662916695197697,
40 | 6980460053367163654,
41 | 6938232357673502722,
42 | 242638364112424960,
43 | 6906155392053544962,
44 | 6963878903706389254,
45 | 6823085155259828997,
46 | 6926087831404251909,
47 | 6979702143154473733,
48 | 6979478166331017990,
49 | 6980890639441349382,
50 | 6983862471790512902,
51 | 6983495999868259077,
52 | 6980715546278972165,
53 | 6601410777356176133,
54 | 6980886814055877381,
55 | 6974191473717742341,
56 | 6931747481009097477,
57 | 6987985525509229317,
58 | 6884523487445240578,
59 | 5000000000755653951,
60 | 6937723642352503557,
61 | 6739272387222702853,
62 | 6965640046342867717,
63 | 222522673426305024,
64 | 6947865088364414978,
65 | 6954740638508124162,
66 | 6947968180685899777,
67 | 6958283382199552773,
68 | 6974011130414893058,
69 | 6790057285126195201,
70 | 6967644755471764230,
71 | 6915181512233110277,
72 | 6705099686660802561,
73 | 6977516346384927493,
74 | 6656534537050409734,
75 | 6963002460872903430,
76 | 6879830140994489093
77 | ],
78 | "collecting_data_for_first_posts": true
79 | },
80 | "118": {
81 | "test_user_id": 118,
82 | "login": true,
83 | "browser_language": "en",
84 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
85 | "number_of_batches": 3,
86 | "number_of_posts_to_like_per_batch": [],
87 | "number_of_creators_to_follow_per_batch": [],
88 | "number_of_posts_to_watch_longer_per_batch": [],
89 | "posts_with_hashtag_to_watch_longer": [],
90 | "posts_with_hashtag_to_like": [],
91 | "posts_of_content_creators_to_like": [],
92 | "posts_of_music_ids_to_like": [],
93 | "collecting_data_for_first_posts": true
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_us_user-123-124.json:
--------------------------------------------------------------------------------
1 | {
2 | "123": {
3 | "test_user_id": 123,
4 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
5 | "browser_language": "en",
6 | "login": true,
7 | "number_of_batches": 3,
8 | "number_of_creators_to_follow_per_batch": [],
9 | "number_of_posts_to_like_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_of_content_creators_to_like": [],
12 | "posts_of_music_ids_to_like": [],
13 | "posts_with_hashtag_to_like": [
14 | "football",
15 | "food",
16 | "euro2020",
17 | "movie",
18 | "foodtiktok",
19 | "gaming",
20 | "film",
21 | "tiktokfood",
22 | "gta5",
23 | "gta",
24 | "minecraft",
25 | "marvel",
26 | "cat",
27 | "dog",
28 | "pet",
29 | "dogsoftiktok",
30 | "catsoftiktok",
31 | "cute",
32 | "puppy",
33 | "dogs",
34 | "cats",
35 | "animals",
36 | "petsoftiktok",
37 | "kitten",
38 | "comedy",
39 | "asmr",
40 | "learnontiktok",
41 | "satisfying",
42 | "lol",
43 | "love",
44 | "humour",
45 | "couple",
46 | "foodie",
47 | "baby",
48 | "car",
49 | "cars",
50 | "jokes",
51 | "lifehack",
52 | "satisfyingvideo",
53 | "relationship",
54 | "cooking",
55 | "laugh",
56 | "fun"
57 | ],
58 | "posts_with_hashtag_to_watch_longer": [],
59 | "collecting_data_for_first_posts": true
60 | },
61 | "124": {
62 | "test_user_id": 124,
63 | "login": true,
64 | "browser_language": "en",
65 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
66 | "number_of_batches": 3,
67 | "number_of_posts_to_like_per_batch": [],
68 | "number_of_creators_to_follow_per_batch": [],
69 | "number_of_posts_to_watch_longer_per_batch": [],
70 | "posts_with_hashtag_to_watch_longer": [],
71 | "posts_with_hashtag_to_like": [],
72 | "posts_of_content_creators_to_like": [],
73 | "posts_of_music_ids_to_like": [],
74 | "collecting_data_for_first_posts": true
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_us_user-135-136.json:
--------------------------------------------------------------------------------
1 | {
2 | "135": {
3 | "test_user_id": 135,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": ["kylethomas", "leytink", "alex.stemp", "sherellmartini",
14 | "juandemontrealoficial", "lunchbreak_al", "hudabeauty", "kate_johansson", "goose_stu", "kallmekris",
15 | "kapsalonfreedomboxmeer", "miakhalifa", "joe.bartolozzi", "coco224466", "khaby.lame", "partyshirt", "mndiaye_97",
16 | "dianarantamaki", "brodywellmaker", "louflores_", "makayla.domagalski1", "anxietycouple", "espn", "nanajoe19",
17 | "thepetcollective", "dina", "thefurrhafamily", "kessel_nathan_official", "jaylucky7", "moontellthat",
18 | "selenagomez", "lizzo", "umql0", "_verobo_", "daveyrz", "kelz", "hudanoor07", "vet.crew", "_catben_",
19 | "gertieinar", "isaiahgarzaintl"],
20 | "posts_of_music_ids_to_like": [],
21 | "collecting_data_for_first_posts": true
22 | },
23 | "136": {
24 | "test_user_id": 136,
25 | "login": true,
26 | "browser_language": "en",
27 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
28 | "number_of_batches": 3,
29 | "number_of_posts_to_like_per_batch": [],
30 | "number_of_creators_to_follow_per_batch": [],
31 | "number_of_posts_to_watch_longer_per_batch": [],
32 | "posts_with_hashtag_to_watch_longer": [],
33 | "posts_with_hashtag_to_like": [],
34 | "posts_of_content_creators_to_like": [],
35 | "posts_of_music_ids_to_like": [],
36 | "collecting_data_for_first_posts": true
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_us_user-159-160.json:
--------------------------------------------------------------------------------
1 | {
2 | "159": {
3 | "test_user_id": 159,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": true,
7 | "time_to_look_at_post_action": 0,
8 | "time_to_look_at_post_normal": 0.5,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_with_hashtag_to_like": [
15 | "movie",
16 | "film",
17 | "marvel",
18 | "foodtiktok",
19 | "tiktokfood",
20 | "foodie",
21 | "cooking",
22 | "food",
23 | "gaming",
24 | "gta5",
25 | "gta",
26 | "minecraft",
27 | "cat",
28 | "dog",
29 | "pet",
30 | "dogsoftiktok",
31 | "catsoftiktok",
32 | "cute",
33 | "puppy",
34 | "dogs",
35 | "cats",
36 | "animals",
37 | "petsoftiktok",
38 | "kitten",
39 | "comedy",
40 | "lol",
41 | "humour",
42 | "laugh",
43 | "fun",
44 | "jokes",
45 | "love",
46 | "couple",
47 | "relationship"
48 | ],
49 | "posts_of_content_creators_to_like": [],
50 | "posts_of_music_ids_to_like": [],
51 | "collecting_data_for_first_posts": true
52 | },
53 | "160": {
54 | "test_user_id": 160,
55 | "login": true,
56 | "browser_language": "en",
57 | "reuse_cookies": true,
58 | "time_to_look_at_post_action": 0,
59 | "time_to_look_at_post_normal": 0.5,
60 | "number_of_batches": 3,
61 | "number_of_posts_to_like_per_batch": [],
62 | "number_of_creators_to_follow_per_batch": [],
63 | "number_of_posts_to_watch_longer_per_batch": [],
64 | "posts_with_hashtag_to_watch_longer": [],
65 | "posts_with_hashtag_to_like": [],
66 | "posts_of_content_creators_to_like": [],
67 | "posts_of_music_ids_to_like": [],
68 | "collecting_data_for_first_posts": true
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_us_user-45-46.json:
--------------------------------------------------------------------------------
1 | {
2 | "45": {
3 | "test_user_id": 45,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 5,
8 | "number_of_posts_to_like_per_batch": [0, 6, 6, 6, 0],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "posts_with_hashtag_to_like": [],
11 | "number_of_posts_to_watch_longer_per_batch": [],
12 | "posts_with_hashtag_to_watch_longer": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": false
16 | },
17 | "46": {
18 | "test_user_id": 46,
19 | "login": false,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 5,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "posts_with_hashtag_to_like": [],
26 | "number_of_posts_to_watch_longer_per_batch": [],
27 | "posts_with_hashtag_to_watch_longer": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": false
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_us_user-59-60.json:
--------------------------------------------------------------------------------
1 | {
2 | "59": {
3 | "test_user_id": 59,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 5,
8 | "number_of_posts_to_like_per_batch": [0, 6, 6, 6, 0],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "60": {
18 | "test_user_id": 60,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 5,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/like_us_user-70-71.json:
--------------------------------------------------------------------------------
1 | {
2 | "70": {
3 | "test_user_id": 70,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 5,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film",
13 | "tiktokfood", "gta5", "gta", "minecraft", "marvel", "cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute",
14 | "puppy", "dogs", "cats", "animals", "petsoftiktok", "kitten"],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "71": {
20 | "test_user_id": 71,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
24 | "number_of_batches": 5,
25 | "number_of_posts_to_like_per_batch": [],
26 | "number_of_creators_to_follow_per_batch": [],
27 | "number_of_posts_to_watch_longer_per_batch": [],
28 | "posts_with_hashtag_to_watch_longer": [],
29 | "posts_with_hashtag_to_like": [],
30 | "posts_of_content_creators_to_like": [],
31 | "posts_of_music_ids_to_like": [],
32 | "collecting_data_for_first_posts": false
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/location-1_ca_user-99-100.json:
--------------------------------------------------------------------------------
1 | {
2 | "99": {
3 | "test_user_id": 99,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "100": {
18 | "test_user_id": 100,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/location-1_us_user-97-98.json:
--------------------------------------------------------------------------------
1 | {
2 | "97": {
3 | "test_user_id": 97,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "98": {
18 | "test_user_id": 98,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/location-2_ca_user-101-102.json:
--------------------------------------------------------------------------------
1 | {
2 | "101": {
3 | "test_user_id": 101,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "102": {
18 | "test_user_id": 102,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/location-2_us_user-105-106.json:
--------------------------------------------------------------------------------
1 | {
2 | "105": {
3 | "test_user_id": 105,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "106": {
18 | "test_user_id": 106,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/location-3_de_user-107-108.json:
--------------------------------------------------------------------------------
1 | {
2 | "107": {
3 | "test_user_id": 107,
4 | "login": true,
5 | "browser_language": "de",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "108": {
18 | "test_user_id": 108,
19 | "login": true,
20 | "browser_language": "de",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/location-3_us_user-103-104.json:
--------------------------------------------------------------------------------
1 | {
2 | "103": {
3 | "test_user_id": 103,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
8 | "number_of_batches": 3,
9 | "number_of_posts_to_like_per_batch": [],
10 | "number_of_creators_to_follow_per_batch": [],
11 | "number_of_posts_to_watch_longer_per_batch": [],
12 | "posts_with_hashtag_to_watch_longer": [],
13 | "posts_with_hashtag_to_like": [],
14 | "posts_of_content_creators_to_like": [],
15 | "posts_of_music_ids_to_like": [],
16 | "collecting_data_for_first_posts": true
17 | },
18 | "104": {
19 | "test_user_id": 104,
20 | "login": true,
21 | "browser_language": "en",
22 | "reuse_cookies": false,
23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
24 | "number_of_batches": 3,
25 | "number_of_posts_to_like_per_batch": [],
26 | "number_of_creators_to_follow_per_batch": [],
27 | "number_of_posts_to_watch_longer_per_batch": [],
28 | "posts_with_hashtag_to_watch_longer": [],
29 | "posts_with_hashtag_to_like": [],
30 | "posts_of_content_creators_to_like": [],
31 | "posts_of_music_ids_to_like": [],
32 | "collecting_data_for_first_posts": true
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/location-4-de_us_user-109-110.json:
--------------------------------------------------------------------------------
1 | {
2 | "109": {
3 | "test_user_id": 109,
4 | "login": true,
5 | "browser_language": "de",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "110": {
18 | "test_user_id": 110,
19 | "login": true,
20 | "browser_language": "de",
21 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/location-4-en_us_user-129-132.json:
--------------------------------------------------------------------------------
1 | {
2 | "129": {
3 | "test_user_id": 129,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "132": {
18 | "test_user_id": 132,
19 | "login": true,
20 | "browser_language": "en",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/location-4-es_us_user-130-133.json:
--------------------------------------------------------------------------------
1 | {
2 | "130": {
3 | "test_user_id": 130,
4 | "login": true,
5 | "browser_language": "es",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "133": {
18 | "test_user_id": 133,
19 | "login": true,
20 | "browser_language": "es",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/location-4-fr_us_user-131-134.json:
--------------------------------------------------------------------------------
1 | {
2 | "131": {
3 | "test_user_id": 131,
4 | "login": true,
5 | "browser_language": "fr",
6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
7 | "number_of_batches": 3,
8 | "number_of_posts_to_like_per_batch": [],
9 | "number_of_creators_to_follow_per_batch": [],
10 | "number_of_posts_to_watch_longer_per_batch": [],
11 | "posts_with_hashtag_to_watch_longer": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_of_content_creators_to_like": [],
14 | "posts_of_music_ids_to_like": [],
15 | "collecting_data_for_first_posts": true
16 | },
17 | "134": {
18 | "test_user_id": 134,
19 | "login": true,
20 | "browser_language": "fr",
21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
22 | "number_of_batches": 3,
23 | "number_of_posts_to_like_per_batch": [],
24 | "number_of_creators_to_follow_per_batch": [],
25 | "number_of_posts_to_watch_longer_per_batch": [],
26 | "posts_with_hashtag_to_watch_longer": [],
27 | "posts_with_hashtag_to_like": [],
28 | "posts_of_content_creators_to_like": [],
29 | "posts_of_music_ids_to_like": [],
30 | "collecting_data_for_first_posts": true
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/test_user_11.json:
--------------------------------------------------------------------------------
1 | {
2 | "11": {
3 | "test_user_id": 11,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": true,
7 | "time_to_look_at_post_action": 0,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_posts_to_like_per_batch": [],
10 | "number_of_creators_to_follow_per_batch": [],
11 | "number_of_posts_to_watch_longer_per_batch": [],
12 | "posts_with_hashtag_to_like": [],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_of_content_creators_to_like": [],
15 | "posts_of_music_ids_to_like": [],
16 | "collecting_data_for_first_posts": true,
17 | "number_of_batches": 1
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-127-128.json:
--------------------------------------------------------------------------------
1 | {
2 | "127": {
3 | "test_user_id": 127,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 4,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": [
14 | "football",
15 | "food",
16 | "euro2020",
17 | "movie",
18 | "foodtiktok",
19 | "gaming",
20 | "film",
21 | "tiktokfood",
22 | "gta5",
23 | "gta",
24 | "minecraft",
25 | "marvel",
26 | "cat",
27 | "dog",
28 | "pet",
29 | "dogsoftiktok",
30 | "catsoftiktok",
31 | "cute",
32 | "puppy",
33 | "dogs",
34 | "cats",
35 | "animals",
36 | "petsoftiktok",
37 | "kitten",
38 | "comedy",
39 | "asmr",
40 | "learnontiktok",
41 | "satisfying",
42 | "lol",
43 | "love",
44 | "humour",
45 | "couple",
46 | "foodie",
47 | "baby",
48 | "car",
49 | "cars",
50 | "jokes",
51 | "lifehack",
52 | "satisfyingvideo",
53 | "relationship",
54 | "cooking",
55 | "laugh",
56 | "fun"
57 | ],
58 | "posts_with_hashtag_to_like": [],
59 | "posts_of_content_creators_to_like": [],
60 | "posts_of_music_ids_to_like": [],
61 | "collecting_data_for_first_posts": false
62 | },
63 | "128": {
64 | "test_user_id": 128,
65 | "login": true,
66 | "browser_language": "en",
67 | "reuse_cookies": false,
68 | "time_to_look_at_post_action": 0,
69 | "time_to_look_at_post_normal": 2,
70 | "number_of_batches": 3,
71 | "number_of_posts_to_like_per_batch": [],
72 | "number_of_creators_to_follow_per_batch": [],
73 | "number_of_posts_to_watch_longer_per_batch": [],
74 | "posts_with_hashtag_to_watch_longer": [],
75 | "posts_with_hashtag_to_like": [],
76 | "posts_of_content_creators_to_like": [],
77 | "posts_of_music_ids_to_like": [],
78 | "collecting_data_for_first_posts": false
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-145-146.json:
--------------------------------------------------------------------------------
1 | {
2 | "145": {
3 | "test_user_id": 145,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0.75,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film",
14 | "tiktokfood", "gta5", "gta", "minecraft", "marvel", "cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute",
15 | "puppy", "dogs", "cats", "animals", "petsoftiktok", "kitten"],
16 | "posts_with_hashtag_to_like": [],
17 | "posts_of_content_creators_to_like": [],
18 | "posts_of_music_ids_to_like": [],
19 | "collecting_data_for_first_posts": false
20 | },
21 | "146": {
22 | "test_user_id": 146,
23 | "login": true,
24 | "browser_language": "en",
25 | "reuse_cookies": false,
26 | "time_to_look_at_post_action": 0,
27 | "time_to_look_at_post_normal": 2,
28 | "number_of_batches": 3,
29 | "number_of_posts_to_like_per_batch": [],
30 | "number_of_creators_to_follow_per_batch": [],
31 | "number_of_posts_to_watch_longer_per_batch": [],
32 | "posts_with_hashtag_to_watch_longer": [],
33 | "posts_with_hashtag_to_like": [],
34 | "posts_of_content_creators_to_like": [],
35 | "posts_of_music_ids_to_like": [],
36 | "collecting_data_for_first_posts": false
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-151-152.json:
--------------------------------------------------------------------------------
1 | {
2 | "151": {
3 | "test_user_id": 151,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": true,
7 | "time_to_look_at_post_action": 4,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": [
14 | "movie",
15 | "film",
16 | "marvel",
17 | "foodtiktok",
18 | "tiktokfood",
19 | "foodie",
20 | "cooking",
21 | "food",
22 | "gaming",
23 | "gta5",
24 | "gta",
25 | "minecraft",
26 | "cat",
27 | "dog",
28 | "pet",
29 | "dogsoftiktok",
30 | "catsoftiktok",
31 | "cute",
32 | "puppy",
33 | "dogs",
34 | "cats",
35 | "animals",
36 | "petsoftiktok",
37 | "kitten",
38 | "comedy",
39 | "lol",
40 | "humour",
41 | "laugh",
42 | "fun",
43 | "jokes",
44 | "love",
45 | "couple",
46 | "relationship"
47 | ],
48 | "posts_with_hashtag_to_like": [],
49 | "posts_of_content_creators_to_like": [],
50 | "posts_of_music_ids_to_like": [],
51 | "collecting_data_for_first_posts": false
52 | },
53 | "152": {
54 | "test_user_id": 152,
55 | "login": true,
56 | "browser_language": "en",
57 | "reuse_cookies": true,
58 | "time_to_look_at_post_action": 0,
59 | "time_to_look_at_post_normal": 2,
60 | "number_of_batches": 3,
61 | "number_of_posts_to_like_per_batch": [],
62 | "number_of_creators_to_follow_per_batch": [],
63 | "number_of_posts_to_watch_longer_per_batch": [],
64 | "posts_with_hashtag_to_watch_longer": [],
65 | "posts_with_hashtag_to_like": [],
66 | "posts_of_content_creators_to_like": [],
67 | "posts_of_music_ids_to_like": [],
68 | "collecting_data_for_first_posts": false
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-157-158.json:
--------------------------------------------------------------------------------
1 | {
2 | "157": {
3 | "test_user_id": 157,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": true,
7 | "time_to_look_at_post_action": 4,
8 | "time_to_look_at_post_normal": 0.5,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": [
14 | "movie",
15 | "film",
16 | "marvel",
17 | "foodtiktok",
18 | "tiktokfood",
19 | "foodie",
20 | "cooking",
21 | "food",
22 | "gaming",
23 | "gta5",
24 | "gta",
25 | "minecraft",
26 | "cat",
27 | "dog",
28 | "pet",
29 | "dogsoftiktok",
30 | "catsoftiktok",
31 | "cute",
32 | "puppy",
33 | "dogs",
34 | "cats",
35 | "animals",
36 | "petsoftiktok",
37 | "kitten",
38 | "comedy",
39 | "lol",
40 | "humour",
41 | "laugh",
42 | "fun",
43 | "jokes",
44 | "love",
45 | "couple",
46 | "relationship"
47 | ],
48 | "posts_with_hashtag_to_like": [],
49 | "posts_of_content_creators_to_like": [],
50 | "posts_of_music_ids_to_like": [],
51 | "collecting_data_for_first_posts": false
52 | },
53 | "158": {
54 | "test_user_id": 158,
55 | "login": true,
56 | "browser_language": "en",
57 | "reuse_cookies": true,
58 | "time_to_look_at_post_action": 0,
59 | "time_to_look_at_post_normal": 0.5,
60 | "number_of_batches": 3,
61 | "number_of_posts_to_like_per_batch": [],
62 | "number_of_creators_to_follow_per_batch": [],
63 | "number_of_posts_to_watch_longer_per_batch": [],
64 | "posts_with_hashtag_to_watch_longer": [],
65 | "posts_with_hashtag_to_like": [],
66 | "posts_of_content_creators_to_like": [],
67 | "posts_of_music_ids_to_like": [],
68 | "collecting_data_for_first_posts": false
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-163-164.json:
--------------------------------------------------------------------------------
1 | {
2 | "163": {
3 | "test_user_id": 163,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": true,
7 | "time_to_look_at_post_action": 4,
8 | "time_to_look_at_post_normal": 0.5,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": [
14 |
15 | ],
16 | "posts_with_hashtag_to_like": [],
17 | "posts_of_content_creators_to_like": [],
18 | "posts_of_music_ids_to_like": [],
19 | "collecting_data_for_first_posts": false
20 | },
21 | "164": {
22 | "test_user_id": 164,
23 | "login": true,
24 | "browser_language": "en",
25 | "reuse_cookies": true,
26 | "time_to_look_at_post_action": 0,
27 | "time_to_look_at_post_normal": 0.5,
28 | "number_of_batches": 3,
29 | "number_of_posts_to_like_per_batch": [],
30 | "number_of_creators_to_follow_per_batch": [],
31 | "number_of_posts_to_watch_longer_per_batch": [],
32 | "posts_with_hashtag_to_watch_longer": [],
33 | "posts_with_hashtag_to_like": [],
34 | "posts_of_content_creators_to_like": [],
35 | "posts_of_music_ids_to_like": [],
36 | "collecting_data_for_first_posts": false
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-77-78.json:
--------------------------------------------------------------------------------
1 | {
2 | "77": {
3 | "test_user_id": 77,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [10, 10, 10],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_with_hashtag_to_like": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "78": {
20 | "test_user_id": 78,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2,
24 | "number_of_batches": 3,
25 | "number_of_posts_to_like_per_batch": [],
26 | "number_of_creators_to_follow_per_batch": [],
27 | "number_of_posts_to_watch_longer_per_batch": [],
28 | "posts_with_hashtag_to_watch_longer": [],
29 | "posts_with_hashtag_to_like": [],
30 | "posts_of_content_creators_to_like": [],
31 | "posts_of_music_ids_to_like": [],
32 | "collecting_data_for_first_posts": false
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-79-80.json:
--------------------------------------------------------------------------------
1 | {
2 | "79": {
3 | "test_user_id": 79,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0.5,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [10, 10, 10],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_with_hashtag_to_like": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "80": {
20 | "test_user_id": 80,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": false,
24 | "time_to_look_at_post_action": 0,
25 | "time_to_look_at_post_normal": 2,
26 | "number_of_batches": 3,
27 | "number_of_posts_to_like_per_batch": [],
28 | "number_of_creators_to_follow_per_batch": [],
29 | "number_of_posts_to_watch_longer_per_batch": [],
30 | "posts_with_hashtag_to_watch_longer": [],
31 | "posts_with_hashtag_to_like": [],
32 | "posts_of_content_creators_to_like": [],
33 | "posts_of_music_ids_to_like": [],
34 | "collecting_data_for_first_posts": false
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-81-82.json:
--------------------------------------------------------------------------------
1 | {
2 | "81": {
3 | "test_user_id": 81,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0.75,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [10, 10, 10],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_with_hashtag_to_like": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "82": {
20 | "test_user_id": 82,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": false,
24 | "time_to_look_at_post_action": 0,
25 | "time_to_look_at_post_normal": 2,
26 | "number_of_batches": 3,
27 | "number_of_posts_to_like_per_batch": [],
28 | "number_of_creators_to_follow_per_batch": [],
29 | "number_of_posts_to_watch_longer_per_batch": [],
30 | "posts_with_hashtag_to_watch_longer": [],
31 | "posts_with_hashtag_to_like": [],
32 | "posts_of_content_creators_to_like": [],
33 | "posts_of_music_ids_to_like": [],
34 | "collecting_data_for_first_posts": false
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-83-84.json:
--------------------------------------------------------------------------------
1 | {
2 | "83": {
3 | "test_user_id": 83,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 1,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [10, 10, 10],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_with_hashtag_to_like": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "84": {
20 | "test_user_id": 84,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": false,
24 | "time_to_look_at_post_action": 0,
25 | "time_to_look_at_post_normal": 2,
26 | "number_of_batches": 3,
27 | "number_of_posts_to_like_per_batch": [],
28 | "number_of_creators_to_follow_per_batch": [],
29 | "number_of_posts_to_watch_longer_per_batch": [],
30 | "posts_with_hashtag_to_watch_longer": [],
31 | "posts_with_hashtag_to_like": [],
32 | "posts_of_content_creators_to_like": [],
33 | "posts_of_music_ids_to_like": [],
34 | "collecting_data_for_first_posts": false
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-85-86.json:
--------------------------------------------------------------------------------
1 | {
2 | "85": {
3 | "test_user_id": 85,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 2,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [10, 10, 10],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_with_hashtag_to_like": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | },
19 | "86": {
20 | "test_user_id": 86,
21 | "login": true,
22 | "browser_language": "en",
23 | "reuse_cookies": false,
24 | "time_to_look_at_post_action": 0,
25 | "time_to_look_at_post_normal": 2,
26 | "number_of_batches": 3,
27 | "number_of_posts_to_like_per_batch": [],
28 | "number_of_creators_to_follow_per_batch": [],
29 | "number_of_posts_to_watch_longer_per_batch": [],
30 | "posts_with_hashtag_to_watch_longer": [],
31 | "posts_with_hashtag_to_like": [],
32 | "posts_of_content_creators_to_like": [],
33 | "posts_of_music_ids_to_like": [],
34 | "collecting_data_for_first_posts": false
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-87-88.json:
--------------------------------------------------------------------------------
1 | {
2 | "87": {
3 | "test_user_id": 87,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0.5,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film",
14 | "tiktokfood", "gta5", "gta", "minecraft", "marvel", "cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute",
15 | "puppy", "dogs", "cats", "animals", "petsoftiktok", "kitten"],
16 | "posts_with_hashtag_to_like": [],
17 | "posts_of_content_creators_to_like": [],
18 | "posts_of_music_ids_to_like": [],
19 | "collecting_data_for_first_posts": false
20 | },
21 | "88": {
22 | "test_user_id": 88,
23 | "login": true,
24 | "browser_language": "en",
25 | "reuse_cookies": false,
26 | "time_to_look_at_post_action": 0,
27 | "time_to_look_at_post_normal": 2,
28 | "number_of_batches": 3,
29 | "number_of_posts_to_like_per_batch": [],
30 | "number_of_creators_to_follow_per_batch": [],
31 | "number_of_posts_to_watch_longer_per_batch": [],
32 | "posts_with_hashtag_to_watch_longer": [],
33 | "posts_with_hashtag_to_like": [],
34 | "posts_of_content_creators_to_like": [],
35 | "posts_of_music_ids_to_like": [],
36 | "collecting_data_for_first_posts": false
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-89-90.json:
--------------------------------------------------------------------------------
1 | {
2 | "89": {
3 | "test_user_id": 89,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0.75,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film",
14 | "tiktokfood", "gta5", "gta", "minecraft", "marvel", "cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute",
15 | "puppy", "dogs", "cats", "animals", "petsoftiktok", "kitten"],
16 | "posts_with_hashtag_to_like": [],
17 | "posts_of_content_creators_to_like": [],
18 | "posts_of_music_ids_to_like": [],
19 | "collecting_data_for_first_posts": false
20 | },
21 | "90": {
22 | "test_user_id": 90,
23 | "login": true,
24 | "browser_language": "en",
25 | "reuse_cookies": false,
26 | "time_to_look_at_post_action": 0,
27 | "time_to_look_at_post_normal": 2,
28 | "number_of_batches": 3,
29 | "number_of_posts_to_like_per_batch": [],
30 | "number_of_creators_to_follow_per_batch": [],
31 | "number_of_posts_to_watch_longer_per_batch": [],
32 | "posts_with_hashtag_to_watch_longer": [],
33 | "posts_with_hashtag_to_like": [],
34 | "posts_of_content_creators_to_like": [],
35 | "posts_of_music_ids_to_like": [],
36 | "collecting_data_for_first_posts": false
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/Testing/TestSets/part_1_tests/vcr_us_user-91-92.json:
--------------------------------------------------------------------------------
1 | {
2 | "91": {
3 | "test_user_id": 91,
4 | "login": true,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 1,
8 | "time_to_look_at_post_normal": 2,
9 | "number_of_batches": 3,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film",
14 | "tiktokfood", "gta5", "gta", "minecraft", "marvel", "cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute",
15 | "puppy", "dogs", "cats", "animals", "petsoftiktok", "kitten"],
16 | "posts_with_hashtag_to_like": [],
17 | "posts_of_content_creators_to_like": [],
18 | "posts_of_music_ids_to_like": [],
19 | "collecting_data_for_first_posts": false
20 | },
21 | "92": {
22 | "test_user_id": 92,
23 | "login": true,
24 | "browser_language": "en",
25 | "reuse_cookies": false,
26 | "time_to_look_at_post_action": 0,
27 | "time_to_look_at_post_normal": 2,
28 | "number_of_batches": 3,
29 | "number_of_posts_to_like_per_batch": [],
30 | "number_of_creators_to_follow_per_batch": [],
31 | "number_of_posts_to_watch_longer_per_batch": [],
32 | "posts_with_hashtag_to_watch_longer": [],
33 | "posts_with_hashtag_to_like": [],
34 | "posts_of_content_creators_to_like": [],
35 | "posts_of_music_ids_to_like": [],
36 | "collecting_data_for_first_posts": false
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/Testing/TestSets/test_user_167.json:
--------------------------------------------------------------------------------
1 | {
2 | "167": {
3 | "test_user_id": 167,
4 | "login": false,
5 | "browser_language": "en",
6 | "reuse_cookies": false,
7 | "time_to_look_at_post_action": 0,
8 | "time_to_look_at_post_normal": 0.1,
9 | "number_of_batches": 20,
10 | "number_of_posts_to_like_per_batch": [],
11 | "number_of_creators_to_follow_per_batch": [],
12 | "number_of_posts_to_watch_longer_per_batch": [],
13 | "posts_with_hashtag_to_watch_longer": [],
14 | "posts_with_hashtag_to_like": [],
15 | "posts_of_content_creators_to_like": [],
16 | "posts_of_music_ids_to_like": [],
17 | "collecting_data_for_first_posts": false
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/Testing/scratch_12.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 |
4 | class Animal:
5 | def __init__(self, n_legs: int):
6 | self.n_legs: int = n_legs
7 |
8 | def make_noise(self):
9 | raise NotImplementedError()
10 |
11 |
12 | class Dog(Animal):
13 | def __init__(self, breed: str, *args, **kwargs):
14 | print(args)
15 | print(kwargs)
16 | self.breed: str = breed
17 | super().__init__(*args, **kwargs)
18 |
19 |
20 | Point = namedtuple("Point", field_names=("x", "y", "z"))
21 |
22 |
23 | if __name__ == '__main__':
24 | dog = Dog("labrador", 4)
25 | print(dir(dog))
26 |
27 | cat = Animal(2)
28 | print(cat.n_legs)
29 |
30 | p = Point(0.2, 0.2, 0.3)
31 | print(dir(p))
--------------------------------------------------------------------------------
/chromedriver.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/chromedriver.exe
--------------------------------------------------------------------------------
/gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/gitignore
--------------------------------------------------------------------------------
/hashtags_to_ignore.json:
--------------------------------------------------------------------------------
1 | {"88764338": "foryoupage", "1693596282061825": "fyp memez", "1642147373664261": "fypvirall", "1706891576089605": "fyp", "1704829339220998": "thinkforyourlife", "1648316753236998": "fypppppppppppppppppppppppp", "1646344785794053": "fyp20", "1644632912092165": "fypchachallenge", "1645966921365509": "fypfypfypfypfypfypfypfypfyp", "1661745157709826": "fypgakni", "883904": "foryouuu", "1694385466292229": "fypcontents", "1703472025824258": "chaukemuhluri type", "1616303504084998": "foryoupage", "229207": "fyp", "1654225487570949": "myfypbelike", "1651780589526022": "kingfyp", "1642191380435969": "foryourpageviral", "1633875828543494": "fypdog", "62543": "goodforyou", "1696630895338498": "fyp", "1631845819935750": "fypart", "1705461580495878": "foryouatwestfield", "1606946063404037": "likeforyoupage", "1706349891476490": "rafiqnakfyp", "1634577353868293": "pageforyou", "1603105080060934": "foryoupa", "1617501114305542": "fypfypfyp", "1638319905147909": "thefypdoesntwork", "96178": "lookingforyou", "1625705313397766": "fyppp", "1685323802588161": "fypforyoupage", "1626746770984966": "fyppppp", "1647699651574789": "fypfypfypfypfypfypfypfypfypfypfyp", "1664856447763462": "kdramafyp", "1644665564694534": "plisfyp", "1608548676719622": "dogsforyou", "1640087008637957": "fypdoesnotwork", "1667434148820998": "fyp cr", "1676376584428546": "fyppdongggggg", "1609969345764357": "fypplz", "1646000100595714": "fypmalaysia", "1636339340982278": "fypmemes", "1656713786866694": "fyp", "10031099": "foryoudrink", "1643981773166597": "fyp", "1650426297129990": "fyphair", "1649953988475909": "itsasignforyou", "1664053106852869": "fyp", "1702828530483205": "fyptrusttheprocess", "6477336": "fluffypancakes", "1703769711014918": "fyp", "1690360009542661": "cleaninghacksforyou", "42164": "foryou", "1659616574816257": "fypgaknih", "1603364504464389": "foryoupagee", "1685972632069125": "foryou", "1633460239823877": "fypy", "5578078": "somuchloveforyou", "1642256290828294": "fypisbroken", "1657293034376198": "mexicanforyoupage", "1605095166336005": "foryoupageeee", "1676811985203206": "fyviralfyp", "1636799196126214": "fypoffical", "1641715580438534": "fypfypfypfypfypfyp", "41248": "fallingforyou", "1648778089330694": "fyppppppppppppppppppp", "1626123835557893": "foryoufyp", "1620040599854086": "fypforyoupage", "1692557178255365": "fashiontiktokforyou", "1632511288704006": "fypo", "1654855125875717": "secretfyp", "1679200763816965": "fypttv", "1631460356711430": "pleasegetthisonthefyp", "1634600653321222": "fypps", "1623723121789957": "foryou", "1672970870730754": "fypdonggggggg", "4779077": "funnyvideosforyou", "1652538132966402": "foryoupageyeh", "1630284807035909": "fyptiktok", "393902": "loveforyou", "1620625283638277": "foryouviral", "1628179191522310": "foryoupgepage", "1668677561259014": "blackpeoplebelikefyp", "1658029352404997": "pupsfyp", "1665702793180165": "robuxforyou", "1639908894714881": "fypage", "1633359003571205": "putthisonfyp", "1660595729188869": "recipesforyou", "1649087342340101": "logoforyou", "1620485025788934": "fypp", "294688": "notforyou", "1648455553681413": "puppyfyp", "1640328120128517": "disneyfyp", "1682300274755586": "foryourepageofficial", "37160": "onlyforyou", "1631099105025030": "fyproblox", "22737416": "thisbudsforyou", "1657002917933062": "fypdontworknomore", "1653730940624901": "cakefyp", "1636619391693830": "kpopfyp", "1635084903417862": "foryou", "1701731659930629": "3minutevideoftype", "1636358001636357": "fyppppppppppp", "1670914542274565": "fypisbest", "1616746784464901": "foryourpag\u0435", "1607083044342806": "foryoupageeeee", "1602924933299205": "foryoupge", "1635974755232774": "asmrforyoupage", "1705180534836230": "barbertipsforyou", "1702698474374146": "back_in_fyp", "1623023114746102": "motivationforyou", "1685694592866306": "fyp viraltiktok", "67231518": "foryouph", "1628170725056582": "fyp", "1635201890205702": "asmrfyp", "1702065574970369": "fypmlaysia", "1659410697047041": "bismilahfyp", "1661715660552198": "adamsfyp", "1699264431886341": "fyp", "1637138684632070": "foryourpride", "1630729454643206": "fypforyouforyoupage", "1639655121988613": "gayfyp", "1658590921953281": "fypppppppppppppppppppppppppppppppppppp", "1639549424304133": "fyp tiktok", "1627816081234950": "robloxfyp", "1634103260359685": "fypdoesntwork", "1627320129956870": "foryoupageofficial", "1646970538788869": "fyptoronto", "1653705288350726": "g nnforyou", "1625649784043526": "fypplzz", "43739490": "petsforyou", "1649540023232517": "fypppppoo", "1605511720126469": "foryoupagina", "54185045": "fyplz", "1632540676574213": "fypppppp", "1688122502170626": "fyp", "1678160367690753": "fypdongahh", "1605270298646534": "foryoupace", "1656141302876165": "fypbali", "1599011224611846": "foryoupag", "1641193139018758": "fypaged", "1649873729675269": "christianfyp", "1651632362337286": "fyppageforyou", "1700800312528897": "foryourbestmate", "1664953059570690": "dailyvideosforyou", "1605983562245125": "foryou page", "1623668238525445": "fypplease", "1668051417397253": "tohfyp", "1634939850577926": "fyp", "1667767033351169": "fypsarawak", "1687184453108742": "fypsoundsss", "1670902591059970": "masukberanda type", "84873565": "foryouthis", "1655586519169029": "fypnotworking", "1646415283323910": "hockeyfyp", "1640702198714373": "getthisonthefypplease", "1644272574749702": "fypforyoupage", "1651863311959045": "gtafyp", "1705278121497606": "Black-headed", "1638251441181702": "fyp", "1602020764569605": "foryoupageplease", "1656974288293889": "fypdong", "1614846349487125": "fype", "1642258369204230": "fyppppppppppppppppp", "1597714851741701": "fypuk", "1607127600018438": "bhfyp", "1637403385093126": "foryou?", "1692301272753158": "fypuswnt", "1634091457788933": "fypforyourpage", "1651867800604677": "mexicanforyou", "1704855486517254": "gossipgirlherefyp", "1598364115802118": "foryoupgae", "1653077770558470": "nochopstickforyou", "1633131185532933": "fypppppppppppppppp", "1679059746505730": "destroyforyouba", "1658563098340353": "foryou2021", "1608533679464453": "foryouart", "1691905401746433": "fypmalaysiatiktok", "1674335187381249": "fypanime", "1628190985570310": "fypfyp", "1655836333592582": "spanishfyp", "1685183196061697": "fypdebz", "1620988974477318": "getthisonthefyp", "1647232092094469": "fypfypfypfypfypfypfypfypfypfypfypfypfyp", "1650591402139653": "xmasfyp", "7603941": "foryouchallenge", "1634979065961477": "fyp", "1685172109999106": "f\u00fcrdichseite\u30b7foryoupage", "1603504302397446": "foryoulage", "1624550708748294": "foryoupage", "1639107461180422": "foryoupagedoesnotwork", "1624339432620038": "fyppage", "1674618424012805": "kidsonmyfypbelike", "1635280932750342": "fypppppppppppppp", "1653220312297477": "fyppppppppppppppppppppppppppppppppp", "1626102952307718": "fypppp", "1637715128978437": "fyppppppppppppppppp", "1633200051757061": "bluestaffypuppy", "1648047480944646": "fypfypfypfy", "1633801928553478": "fypppppppppp", "1624000015616005": "fypsound", "1651224510575621": "fypforyoupagethis", "1653739674004485": "tutofyp", "1656078044884993": "masukfyp", "1659004589569030": "scottishforyoupage", "1604114517755910": "foryoupride", "773601": "foryourmom", "1684624429633537": "foryoupageofficiall2021", "1698894208649217": "sgfyp", "1627926592598021": "fypforyou", "1607069197518854": "foryourpages", "1657857938666501": "fypcommunity", "1634937277509638": "foryou", "1640241025714181": "fyppppppppppppp", "1654955847156741": "blowupforyoupage", "1637295409647621": "fypdrama", "1604390375122950": "foryouppage", "1637403568312326": "foryoupage?", "20922363": "pageforyou", "1649954646340614": "fypbabies", "1640999288847365": "fypchallage", "1637418424589317": "fyp", "1675963560778754": "fypmototiktok", "1641236262278149": "fyp", "1634922086900741": "fypchallenge", "1702998879182853": "foryoupage tik tok viral video", "1634318675729413": "fypplss", "1659150842052610": "semogafyp", "1634880784642054": "careforyourfurbabies", "8085197": "foryouuuuu", "1605614978359301": "foryourepage", "1674893053234177": "fypdongggggggg", "1673115521012737": "brandafyp", "1643442074587137": "sgfyp", "1654407569745925": "fypnails", "1644044404124677": "animefyp", "1626287748447237": "antifyp", "1647967591256070": "fypcouple", "1628662386826245": "fyp", "1667758271570950": "fypplppppppppppp", "1668812910222338": "fypdonggggggg", "1644109611442182": "fypofficial", "1641655411270661": "fyppagee", "1623302502209542": "foryounails", "1661947223506946": "berandafyp", "1674584508176390": "fyp viral", "13082896": "foryoupaige", "1656991624902662": "fypforyoupage", "1685059559254018": "foryoupageforeveryone\u2661", "1683703956260866": "fypjebal", "1631348850976774": "fyp", "1637342470396934": "fyp", "1619120300969989": "fyp", "1697420444830726": "follow me and t s g fyp", "1659700661828610": "Fyp post", "1662491668570113": "masukberanda type", "1662601160390657": "moviesforyou", "1646335221298182": "fypindonesia", "1689637217346566": "mynameistiggergetmeonthefyp", "1647234084365318": "fypbr", "1619679937718277": "plsfyp", "1628190219645957": "foryoupagedoesntwork", "1649139584636934": "fyppppppppppppppppppppp", "1620500486126597": "minecraftfyp", "1634943047845893": "foryoupage", "1634950588112902": "foryoupage", "1631522291667974": "forfyp", "1615032447947782": "entertainmentforyou", "1619084880059397": "foryousounds", "1597916865387525": "foryou1", "1604552984408070": "foryoupageee", "1603600556098566": "foryoupg", "1634937292926981": "foryoupage", "1645703110543366": "fypdoesntworkbutidoitanyways", "1664495475082242": "foryoupageofficiall", "1637407748596742": "fyp", "1606872592930822": "foryourpagee", "1655316819776518": "mexicanfyp", "1635204099943426": "fypsounds", "1654857781475333": "fyp viral", "1635848705845253": "fypyoupage", "1673560732511237": "fypdiesntwork", "1644298750203905": "tiktokjokesforyou", "1634369597616134": "viralfyp", "61667223": "fyps", "1617590735671301": "fypfor", "1662784958109697": "happylifefyp", "1609595389571077": "foryoupagetiktok", "1607061020406789": "foryoucomedy", "1647840803266566": "fypfootball", "1658756653576198": "fypdosentwork", "1654665482492933": "xyzbcafyp", "1648715358635009": "berandafyp", "1704170275899398": "fypodoentwork", "1684082479714306": "fyp viralvideo", "1702755085236229": "bewhoyouareforyourpride", "1632002941088774": "fypviral", "78768996": "memesforyou", "1642613516590086": "fypppppppppppppppppp", "1659949481334789": "fypforyoupage\u30b7", "1609273039298565": "foryoupag\u0435", "1650742742654982": "fyp2020", "33971256": "foryouofficial", "1627610222006278": "canesaufforyou", "1698955398675461": "fyppppgreyhound", "1666593428398085": "fypviral", "1617425133872134": "foryoupageoffical", "1623383052219414": "fypthis", "1592743307939841": "tiktokforyou", "1666847537269762": "animegirlforyou", "1603204382206981": "fypg", "7107602": "foryouforyou", "1662807921131525": "2kfyp", "1604142815383557": "foryoupgage", "1620267499925509": "fypls", "1611853076456450": "viralforyou", "1656845043434502": "fypsoccer", "1658673609103366": "fyp21", "1610265502020613": "foryourpagechallenge", "1648042687613958": "fypdontwork", "1624390188065798": "foryoufood", "1666458039063553": "fypmalaysia", "1622662438146694": "notfyp", "1670686468547590": "justgetthisonfyp", "1703891853128710": "foreverforyou", "1635213389964293": "fypit", "1602878675792901": "getmeontheforyoupage", "1632581882862593": "fypph", "1620763223578790": "foryoupakistan", "1666844905635846": "japanfyp", "1704810616802313": "azrulalwaysfyp", "1694852740601861": "fypshortclips", "1686358467262466": "fyppyfyp", "1640768557032454": "fyp", "1649766571425797": "carfyp", "1604276726661125": "foryoupagethis", "1655495140363270": "fyppppls", "1690407489009665": "standwithkaahmir_foryoupage_mu", "442854": "foryouu", "1618323884896262": "tiktokpageforyou", "1634937149617158": "foryoupage", "1637252569181189": "fypdogs", "1608973297173509": "foryoupageforever", "1606878050264069": "onyourforyoupage", "1630155480914949": "fyppppp", "1679267549421570": "fypdoge", "12009": "justforyou", "1670093194164230": "fypdoenstwork", "1679511090156546": "fypdonggggggg", "1667758536220673": "fypdongggg", "1624022682062853": "fyp", "1604302285252613": "fypage", "1639336981017605": "fyppleasetiktok", "1605919148785669": "ffyp", "1598498371111942": "foryourpage", "1610847197102085": "foryoutiktok", "364659": "foryour", "1615796627818502": "foryourdogpage", "1675567091586054": "fyppoppppppppppppppppppppp", "1616966643636229": "fypp", "1626088819377157": "fyfyp", "1635214780493826": "fypthisss", "1634449307607045": "goonthefyp", "1701341399285766": "foryourpride\ud83c\udff3\ufe0f\u200d\ud83c\udf08", "11899407": "artforyou", "1657052815966210": "fypberandatiktok", "1651393084389381": "fypdosntwork", "1625103941893125": "foryoupet", "1640815314240518": "fypagechallenge", "1600360681543685": "getthisontheforyoupage", "1603987329456134": "foryouapge", "1592201096750085": "foryou", "1634967704187910": "fyp", "1679357203568641": "mpesfypgamw", "1672400727355394": "berandafyp ya", "1670139895030786": "funnycontentforyou", "1664068495767558": "mtbforyou", "1617418873150470": "fyppls", "1635070555641861": "foryoupage", "1627524680056837": "fyppppppppp", "22091782": "foryoup", "1603502040695813": "foryouoage", "1664119477821441": "fyp viral", "1639699601016837": "fyp", "1666841966392325": "fyp2021", "1606788550284294": "robloxforyou", "1625820133083142": "fypfypfypfyp", "1665036454423553": "fyp foryoupage tiktok", "1616155509242885": "foryoupagebro", "1650370082138113": "foryouoffical", "1659685565791237": "koreafyp", "1655841212503045": "fypfoodie", "1673852121933830": "foryoupagedosntwork", "1682876476545042": "fakesnapsforyou", "1623927366993925": "fyp1", "1669636561460229": "xyzbcafyp", "1604284546288646": "foryoupages"}
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/main.py
--------------------------------------------------------------------------------
/ngrok.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/ngrok.exe
--------------------------------------------------------------------------------
/proxy_auth_plugin.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/proxy_auth_plugin.zip
--------------------------------------------------------------------------------
/proxy_auth_plugin/background.js:
--------------------------------------------------------------------------------
1 |
2 | var config = {
3 | mode: "fixed_servers",
4 | rules: {
5 | singleProxy: {
6 | scheme: "http",
7 | host: "45.95.96.132",
8 | port: parseInt(8691)
9 | },
10 | bypassList: ["localhost"]
11 | }
12 | };
13 |
14 | chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
15 |
16 | function callbackFn(details) {
17 | return {
18 | authCredentials: {
19 | username: "PLACEHOLDER",
20 | password: "PLACEHOLDER"
21 | }
22 | };
23 | }
24 |
25 | chrome.webRequest.onAuthRequired.addListener(
26 | callbackFn,
27 | {urls: [""]},
28 | ['blocking']
29 | );
30 |
--------------------------------------------------------------------------------
/proxy_auth_plugin/manifest.json:
--------------------------------------------------------------------------------
1 |
2 | {
3 | "version": "1.0.0",
4 | "manifest_version": 2,
5 | "name": "Chrome Proxy",
6 | "permissions": [
7 | "proxy",
8 | "tabs",
9 | "unlimitedStorage",
10 | "storage",
11 | "",
12 | "webRequest",
13 | "webRequestBlocking"
14 | ],
15 | "background": {
16 | "scripts": ["background.js"]
17 | },
18 | "minimum_chrome_version":"22.0.0"
19 | }
20 |
--------------------------------------------------------------------------------
/src/Proxy.py:
--------------------------------------------------------------------------------
1 | """
2 | File access Webshare.io API to get required proxy data
3 | """
4 |
5 | import requests
6 | from src.DatabaseHelper import *
7 |
8 | APIKEY = "PLACEHOLDER"
9 |
10 | base_path = Path(__file__).parent
11 | file_path = (base_path / "../utilities/db_credentials.json").resolve()
12 | with open(file_path) as file:
13 | db_credentials = json.load(file)
14 |
15 | conn = psycopg2.connect(
16 | host=db_credentials.get('host'),
17 | database=db_credentials.get('database'),
18 | user=db_credentials.get('user'),
19 | password=db_credentials.get('password'))
20 | cur = conn.cursor()
21 |
22 |
23 | def proxy(countries):
24 | country_string = countries[0]
25 | if len(countries) > 1:
26 | for country in countries[1:]:
27 | country_string = country_string + "-" + country
28 | response = requests.get("https://proxy.webshare.io/api/proxy/list/?countries=" + country_string,
29 | headers={"Authorization": "Token %s" % APIKEY})
30 | proxy_data = {}
31 | proxy_data['username'] = 'PLACEHOLDER'
32 | proxy_data['password'] = 'PLACEHOLDER'
33 | proxies = {}
34 | for proxy in response.json()['results']:
35 | if proxy['country_code'] in proxies:
36 | proxies[proxy['country_code']] = proxies.get(proxy['country_code']) + [
37 | [proxy['proxy_address'], proxy['ports']['http']]]
38 | else:
39 | proxies[proxy['country_code']] = [[proxy['proxy_address'], proxy['ports']['http']]]
40 | proxy_data['proxies'] = proxies
41 | return proxy_data
42 |
43 |
44 | def get_db_proxy(country):
45 | database = DatabaseHelper()
46 | proxy_host, proxy_port = database.get_active_proxy_from_db(country)
47 | return proxy_host, proxy_port
48 |
49 |
50 | def update_proxy_db():
51 | response = requests.get("https://proxy.webshare.io/api/proxy/list/", headers={"Authorization": "Token %s" % APIKEY})
52 | sql = """insert into proxies(host, port, is_blocked, country, currently_used, start_usage)
53 | values(%s,%s,%s,%s,%s, current_timestamp) on conflict on constraint proxies_pkey do nothing"""
54 | for proxy in response.json()['results']:
55 | cur.execute(sql, (proxy.get('proxy_address'), proxy['ports'].get('http'),
56 | 'false', proxy.get('country_code'), 'false',))
57 | conn.commit()
58 | cur.close()
59 |
60 |
61 | def find_disposable_proxy(country):
62 |
63 | disposable_proxies = []
64 | proxy_data = proxy([country]).get('proxies').get(country)
65 | sql = """select host, port from proxies where user_using_this_proxy is null and country = %s"""
66 | cur.execute(sql, (country,))
67 | results = cur.fetchall()
68 | for host_port in results:
69 | host = host_port[0].strip()
70 | port = host_port[1].strip()
71 | if host in list(host[0] for host in proxy_data) and port in list(str(host[1]) for host in proxy_data):
72 | disposable_proxies.append([host, port])
73 | return disposable_proxies
74 |
75 |
76 | def get_new_proxy(country):
77 | # get new proxy
78 | proxy_data = proxy([country]).get('proxies').get(country)
79 | sql = """select host from proxies"""
80 | cur.execute(sql, )
81 | results = cur.fetchall()
82 | for host_port in proxy_data:
83 | if host_port[0] not in list(result[0].strip() for result in results):
84 | host = host_port[0]
85 | port = host_port[1]
86 | return host, port
87 |
88 |
89 | def delete_proxy_in_db(host, port):
90 | # delete disposable proxy
91 | sql = """delete from d1rpgcvqcran0q.public.proxies where host = %s and port = %s"""
92 | cur.execute(sql, (host, port,))
93 | conn.commit()
94 |
95 |
96 | def update_db_for_user(host, port, user):
97 | sql = """update proxies set user_using_this_proxy = %s
98 | where host = %s and port = %s"""
99 | cur.execute(sql, (user, host, port,))
100 | conn.commit()
101 |
102 |
103 | def proxies_maintenance():
104 | response = requests.get("https://proxy.webshare.io/api/proxy/list", headers={"Authorization": f"Token {APIKEY}"})
105 | all_proxies = response.json()
106 |
107 | sql1 = """select host, port from d1rpgcvqcran0q.public.proxies where legacy != true"""
108 | cur.execute(sql1, )
109 | results = cur.fetchall()
110 | db_proxies = []
111 | for item in results:
112 | host = item[0].strip()
113 | port = item[1].strip()
114 | db_proxies.append([host, port])
115 |
116 | proxy_not_in_db = []
117 | for proxy in all_proxies.get('results'):
118 | if proxy.get('proxy_address') not in list(host[0] for host in db_proxies):
119 | proxy_not_in_db.append([proxy.get('proxy_address'),
120 | proxy.get('ports').get('http'),
121 | proxy.get('country_code')])
122 |
123 | proxy_not_available = []
124 | for proxy_db in db_proxies:
125 | if proxy_db[0] not in list(proxy.get('proxy_address') for proxy in all_proxies.get('results')):
126 | proxy_not_available.append([proxy_db[0], proxy_db[1]])
127 |
128 | # update db: add all missing proxies
129 | sql2 = """insert into d1rpgcvqcran0q.public.proxies(host, port, country) values(%s,%s,%s)
130 | on conflict on constraint proxies_pkey do nothing"""
131 | for proxy in proxy_not_in_db:
132 | cur.execute(sql2, (proxy[0], proxy[1], proxy[2],))
133 | conn.commit()
134 |
135 | # update db: delete all proxies that no longer exist on webshare.io
136 | for proxy in proxy_not_available:
137 | delete_proxy_in_db(proxy[0], proxy[1])
138 |
--------------------------------------------------------------------------------
/src/SMSHandler.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | from flask import Flask, request, redirect
4 | from twilio.twiml.messaging_response import MessagingResponse
5 |
6 | import os
7 | from twilio.rest import Client
8 |
9 | class SMSHandler:
10 | """
11 | Using the mobile phone service Twilio through their API this class allows to create new phone number for a specific
12 | country, receive all SMS from specific phone number, receive newest SMS from specific phone number, filter out
13 | verification code from a SMS
14 | """
15 | def __init__(self, database):
16 | self.database = database
17 | account_sid = 'PLACEHOLDER ACCOUNT_SID'
18 | auth_token = 'PLACEHOLDER AUTH_TOKEN'
19 | self.client = Client(account_sid, auth_token)
20 |
21 | def get_cheapest_available_phone_number(self, country):
22 | """
23 | Get the cheapest available phone number for a country
24 | :param country:
25 | :return:
26 | """
27 |
28 | def create_phone_number(self, country):
29 | """
30 | Create phone number for a specific country at the lowest price
31 | :param country:
32 | :return:
33 | """
34 |
35 | def get_all_sms(self, phone_number):
36 | """
37 | Receiving all SMS for specific phone number
38 | :param phone_number:
39 | :return:
40 | """
41 | messages = {}
42 | for message in self.client.messages.list(to=phone_number):
43 | messages[str(message.date_created)] = message.body
44 | return messages
45 |
46 | def get_newest_sms_body(self, phone_number, phone_number_country_prefix_numerous):
47 | """
48 | Receiving newest SMS for specific phone number
49 | :param phone_number_country_prefix_numerous:
50 | :param phone_number:
51 | :return:
52 | """
53 | time.sleep(10)
54 | adjusted_phone_number = str(phone_number_country_prefix_numerous) + phone_number
55 | return self.client.messages.list(to=adjusted_phone_number)[0].body
56 |
57 | def get_verification_code(self, test_user_id, phone_number, phone_number_country_prefix_numerous):
58 | """
59 | Return verification code from TikTok SMS. Attention, sometimes Twilio is quite slow, so the bot has to double
60 | check if received verification code is not already known. If that is the case, the bot has to wait a few more
61 | seconds.
62 | :param test_user_id:
63 | :param phone_number_country_prefix_numerous:
64 | :param phone_number:
65 | :return:
66 | """
67 | newest_message = self.get_newest_sms_body(phone_number, phone_number_country_prefix_numerous)
68 |
69 | # handle different verification codes
70 | verification_code = newest_message[9:13]
71 | if not verification_code.isdigit():
72 | try:
73 | idx_start = newest_message.index('use')
74 | idx_end = newest_message.index('as')
75 | verification_code = newest_message[idx_start + 3:idx_end].strip()
76 | code = ''
77 | for char in verification_code:
78 | if char.isdigit():
79 | code = code + char
80 | verification_code = code.strip()
81 | if not verification_code.isdigit():
82 | raise ValueError("Verificaiton Code not digit.")
83 | except ValueError as e:
84 | print("SMS: " + newest_message)
85 | print("Error: no verification code provided by TikTok, resend code.")
86 | print("Value Error: " + str(e))
87 | return "Trigger Resend"
88 |
89 | # check if verification different to previous one
90 | previous_code = self.database.get_previous_verification_code(test_user_id=test_user_id)
91 | if int(verification_code) == previous_code:
92 | print(f"Verification code {verification_code} seems to be too old for {test_user_id}, "
93 | f"fetching again in 10secs.")
94 | time.sleep(10)
95 | self.get_verification_code(test_user_id=test_user_id,
96 | phone_number=phone_number,
97 | phone_number_country_prefix_numerous=phone_number_country_prefix_numerous)
98 | else:
99 | self.database.update_verification_code(verification_code=verification_code,
100 | test_user_id=test_user_id)
101 | return verification_code
102 |
103 |
104 |
--------------------------------------------------------------------------------
/src/TestCase1_Loc.py:
--------------------------------------------------------------------------------
1 | from .WebHelper import *
2 |
3 | # idea: benchmarking level of different_posts_noise when checking for location based content
4 |
5 |
--------------------------------------------------------------------------------
/src/TestRun.py:
--------------------------------------------------------------------------------
1 |
2 | from src.DatabaseHelper import *
3 |
4 |
5 | class TestRun:
6 | """
7 | class inserts extracted data to database
8 | - creates test run
9 | :param test_data:
10 | {
11 | "testuserinfo": [
12 | {"testuserid": 1, "email": "bertman@mailinator.com", "password": "%J0ftE999yQVg2"},
13 | {"testuserid": 2, "email": "loc2021@mailinator.com", "password": "%@NreeHIwb*55O5@zD48"}
14 | ],
15 | "description": "filtering different_posts_noise, same location, same language, two different accounts",
16 | "proxy": str(proxy_US.get('proxy_host')) + ":" + str(proxy_US.get('proxy_port')),
17 | "browser_language": "en"
18 | }
19 | """
20 |
21 | def __init__(self, test_data):
22 | self.test_run_id = None
23 | self.database = DatabaseHelper()
24 | self.test_data = test_data
25 | self.create_test_run()
26 |
27 | def __enter__(self):
28 | print(f"TestRun {self.test_run_id} started.")
29 | return self
30 |
31 | def __exit__(self, exc_type, exc_val, exc_tb):
32 | print(f"TestRun {self.test_run_id} executed.")
33 |
34 | def create_test_run(self):
35 | """
36 | Create test run data to store collected data correctly
37 | :param
38 | :return:
39 | """
40 | try:
41 | # get test run id, set test_run_id as set
42 | self.database.cur.execute("""
43 | with next_id as (
44 | select * from testrunids
45 | where set = false
46 | order by id asc
47 | limit 1
48 | )
49 |
50 | update testrunids
51 | set set = true
52 | where id = (select id from next_id)""")
53 | self.database.conn.commit()
54 |
55 | # get id
56 | self.database.cur.execute("""
57 | select id from testrunids
58 | where set = true
59 | order by id desc
60 | limit 1""")
61 | self.test_run_id = self.database.cur.fetchone()[0]
62 |
63 | for item in self.test_data:
64 | test = """insert into testrun(id,testuserid,ip_used,country,browser_language)
65 | values(%s,%s,%s,%s,%s) on conflict on constraint testrun_pkey do nothing"""
66 | self.database.cur.execute(test, (
67 | self.test_run_id, # id
68 | item.get('test_user_id'), # test_user_id
69 | str(item.get("proxy").get('proxy_host')) + ':' +
70 | str(item.get('proxy').get('proxy_port')), # ip_used
71 | item.get('proxy').get("country"),
72 | item.get("browser_language"),
73 | ))
74 | self.database.conn.commit()
75 | except (psycopg2.InterfaceError, psycopg2.OperationalError) as cursor_error:
76 | print(cursor_error)
77 | print("Instantiating db connection and trying to create test run again.")
78 | self.database = DatabaseHelper()
79 | self.create_test_run()
80 | except (Exception, psycopg2.DatabaseError) as error:
81 | print(error)
82 | raise Exception("Test run could not be created.")
83 |
84 | def store_test_duration(self, duration, test_user_id):
85 | try:
86 | sql = """update testrun
87 | set duration = (%s) where id = (%s) and testuserid = (%s)"""
88 | self.database.cur.execute(sql, (duration, self.test_run_id, test_user_id))
89 | self.database.conn.commit()
90 | except (psycopg2.InterfaceError, psycopg2.OperationalError) as cursor_error:
91 | print(cursor_error)
92 | print("Instantiating db connection and trying to store test run data again.")
93 | self.database = DatabaseHelper()
94 | self.store_test_duration(duration, test_user_id)
95 | except (Exception, psycopg2.DatabaseError) as error:
96 | print(error)
97 | raise Exception("Post data could not be stored")
98 |
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/src/__init__.py
--------------------------------------------------------------------------------
/src/proxy_auth_plugin.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/src/proxy_auth_plugin.zip
--------------------------------------------------------------------------------
/utilities/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/utilities/.DS_Store
--------------------------------------------------------------------------------
/utilities/country_prefix.json:
--------------------------------------------------------------------------------
1 | {
2 | "United States": "+1",
3 | "United Kingdom": "+44",
4 | "Germany": "+49",
5 | "Canada": "+1",
6 | "Switzerland": "+41"
7 | }
--------------------------------------------------------------------------------
/utilities/proxy.zip/background.js:
--------------------------------------------------------------------------------
1 | // from https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&cad=rja&uact=8&ved=2ahUKEwjJiYiK8arvAhWGCuwKHQWzABgQFjADegQIAhAD&url=https%3A%2F%2Fwonderproxy.com%2Fblog%2Fa-step-by-step-guide-to-setting-up-a-proxy-in-selenium%2F&usg=AOvVaw2u3tm5J7KAE_B2OlkEhK_7
2 |
3 | var config = {
4 | mode: "fixed_servers",
5 | rules: {
6 | singleProxy: {
7 | scheme: "http",
8 | host: "209.127.191.180",
9 | port: parseInt(9279)
10 | },
11 | bypassList: ["foobar.com"]
12 | }
13 | };
14 |
15 | chrome.proxy.settings.set({value: config, scope: "regular"}, function () {
16 | });
17 |
18 | function callbackFn(details) {
19 | return {
20 | authCredentials: {
21 | username: "PLACEHOLDER",
22 | password: "PLACEHOLDER"
23 | }
24 | };
25 | }
26 |
27 | chrome.webRequest.onAuthRequired.addListener(
28 | callbackFn,
29 | {urls: [""]},
30 | ['blocking']
31 | );
--------------------------------------------------------------------------------