├── .DS_Store ├── .idea ├── .gitignore ├── TikTok-Personalization-Investigation.iml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── DataAnalysis ├── .DS_Store ├── .ipynb_checkpoints │ └── skip_gram_hashtags_v2-checkpoint.ipynb ├── Analysis_Methods.py ├── Analysis_Overview.py ├── Analysis_Text_Methods.py ├── SkipGramModel.py ├── SkipGramModelEvaluation.py ├── hashtags_to_ignore.json ├── invalid_lit_data.json ├── test_data_set.csv └── training_data_set.csv ├── README.md ├── Testing ├── .DS_Store ├── APItest.py ├── BlockedProxyHandling.py ├── Maintenance.py ├── ParalleliseTesting.py ├── TestInitializer.py ├── TestSets │ ├── .DS_Store │ ├── archive test data │ ├── cg_us_user-165-166.json │ ├── part_1_tests │ │ ├── cg_ca_user-119-120.json │ │ ├── cg_ca_user-121-122.json │ │ ├── cg_fr_user-55-56.json │ │ ├── cg_fr_user-65-67.json │ │ ├── cg_gb_user-68-69.json │ │ ├── cg_us_user-125-126.json │ │ ├── cg_us_user-137-138.json │ │ ├── cg_us_user-139-140.json │ │ ├── cg_us_user-141-142.json │ │ ├── cg_us_user-143-144.json │ │ ├── cg_us_user-147-148.json │ │ ├── cg_us_user-149-150.json │ │ ├── cg_us_user-161-162.json │ │ ├── cg_us_user-57-58.json │ │ ├── cg_us_user-72-73.json │ │ ├── cg_us_user-74-75.json │ │ ├── cg_us_user-93-94.json │ │ ├── cg_us_user-95-96.json │ │ ├── follow_gb_user-51-52.json │ │ ├── follow_gb_user-53-54.json │ │ ├── follow_us_user-153-154.json │ │ ├── follow_us_user-155-156.json │ │ ├── follow_us_user-47-48.json │ │ ├── follow_us_user-49-50.json │ │ ├── like_gb_user-61-62.json │ │ ├── like_gb_user-63-64.json │ │ ├── like_us_user-111-112.json │ │ ├── like_us_user-113-114.json │ │ ├── like_us_user-115-116.json │ │ ├── like_us_user-117-118.json │ │ ├── like_us_user-123-124.json │ │ ├── like_us_user-135-136.json │ │ ├── like_us_user-159-160.json │ │ ├── like_us_user-45-46.json │ │ ├── like_us_user-59-60.json │ │ ├── like_us_user-70-71.json │ │ ├── location-1_ca_user-99-100.json │ │ ├── location-1_us_user-97-98.json │ │ ├── location-2_ca_user-101-102.json │ │ ├── location-2_us_user-105-106.json │ │ ├── location-3_de_user-107-108.json │ │ ├── location-3_us_user-103-104.json │ │ ├── location-4-de_us_user-109-110.json │ │ ├── location-4-en_us_user-129-132.json │ │ ├── location-4-es_us_user-130-133.json │ │ ├── location-4-fr_us_user-131-134.json │ │ ├── test_user_11.json │ │ ├── vcr_us_user-127-128.json │ │ ├── vcr_us_user-145-146.json │ │ ├── vcr_us_user-151-152.json │ │ ├── vcr_us_user-157-158.json │ │ ├── vcr_us_user-163-164.json │ │ ├── vcr_us_user-77-78.json │ │ ├── vcr_us_user-79-80.json │ │ ├── vcr_us_user-81-82.json │ │ ├── vcr_us_user-83-84.json │ │ ├── vcr_us_user-85-86.json │ │ ├── vcr_us_user-87-88.json │ │ ├── vcr_us_user-89-90.json │ │ └── vcr_us_user-91-92.json │ └── test_user_167.json └── scratch_12.py ├── chromedriver.exe ├── gitignore ├── hashtags_to_ignore.json ├── main.py ├── ngrok.exe ├── proxy_auth_plugin.zip ├── proxy_auth_plugin ├── background.js └── manifest.json ├── src ├── DataStoring.py ├── DatabaseHelper.py ├── Proxy.py ├── SMSHandler.py ├── TestCase1_Loc.py ├── TestRun.py ├── WebHelper.py ├── __init__.py └── proxy_auth_plugin.zip └── utilities ├── .DS_Store ├── Final Test Data ├── test_data_set.csv └── training_data_set.csv ├── Value_Dataset_TikTokdata.json ├── clean_emojis.csv ├── country_prefix.json ├── hashtag_translations.json ├── hashtag_translations_old.json ├── proxy.zip └── background.js ├── test_data_set.csv └── training_data_set.csv /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/.DS_Store -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/TikTok-Personalization-Investigation.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 24 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /DataAnalysis/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/DataAnalysis/.DS_Store -------------------------------------------------------------------------------- /DataAnalysis/Analysis_Overview.py: -------------------------------------------------------------------------------- 1 | from Analysis_Methods import * 2 | from Analysis_Text_Methods import * 3 | from SkipGramModel import * 4 | from SkipGramModelEvaluation import * 5 | 6 | # TEST SCENARIOS: 7 | action_type = 'Like' 8 | all_test_user_ids_like = [[22, 24], [25, 26], [27, 28], [29, 30], [31, 32], [33, 34], [35, 36], [45, 46], [59, 60], 9 | [61, 62], [63, 64], [70, 71], [111, 112], [113, 114], [115, 116], [117, 118], [123, 124], 10 | [135, 136], [159, 160]] 11 | tests_like_5_batches = [[45, 46], [59, 60], [61, 62], [63, 64], [70, 71]] 12 | tests_like_3_batches = [[113, 114], [135, 136], [115, 116], [117, 118], [123, 124], [159, 160]] 13 | excluded_like_users = [[22, 24], [25, 26], [27, 28], [29, 30], [31, 32], [33, 34], [35, 36], [111, 112]] 14 | 15 | # action_type = 'Follow' 16 | all_test_follow_3_batches = [[47, 48], [49, 50], [53, 54], [153, 154], [155, 156]] 17 | excluded_follow_users = [[51, 52]] 18 | 19 | # action_type = 'Video View Rate' 20 | all_test_vvr_3_batches = [[77, 78], [79, 80], [81, 82], [83, 84], [85, 86], [87, 88], [91, 92], [145, 146], [151, 152], 21 | [157, 158]] 22 | # tests_vvr_personas_3_batches = [[87, 88], [91, 92], [145, 146], [151, 152], [157, 158]] 23 | excluded_vvr_users = [[89, 90], [127, 128]] 24 | 25 | # action_type = 'Control Group' 26 | excluded_control_groups_5_batches = [[93, 94]] 27 | new_control_group_3_batches = [[143, 144], [147, 148], [149, 150]] 28 | control_group_5_batches = [[72, 73], [74, 75], [95, 96]] # [38, 39], [40, 41], [55, 56], [57, 58], [65, 67], [68, 69], 29 | control_group_3_batches = [[125, 126], [137, 138], [139, 140], [141, 142], [143, 144], [147, 148], [149, 150]] # [119, 120], [121, 122] 30 | 31 | # action_type = 'Location' 32 | diff_country_same_language_3_batches = [97, 98, 99, 100] 33 | # diff_country_same_language_switching_country_loc_3_batches = [101, 102, 105, 106] # => EXCLUDED ! 34 | diff_country_diff_language_switching_country_3_batches = [103, 104, 107, 108] 35 | same_country_diff_language_3_batches = [109, 110, 129, 130, 131, 132, 133, 134] 36 | 37 | # action_type = "Collaborative Filtering" 38 | collaborative_filtering_groups = [[87, 88], [87, 91], [87, 92], [87, 123], [87, 124], [87, 145], [87, 146], [87, 151], 39 | [87, 152], [87, 157], [87, 158], [87, 159], [87, 160], [88, 91], [88, 92], [88, 123], 40 | [88, 124], [88, 145], [88, 146], [88, 151], [88, 152], [88, 157], [88, 158], 41 | [88, 159], [88, 160], [91, 92], [91, 123], [91, 124], [91, 145], [91, 146], [91, 151], 42 | [91, 152], [91, 157], [91, 158], [91, 159], [91, 160], [92, 123], [92, 124], 43 | [92, 145], [92, 146], [92, 151], [92, 152], [92, 157], [92, 158], [92, 159], 44 | [92, 160], [123, 124], [123, 145], [123, 146], [123, 151], [123, 152], [123, 157], 45 | [123, 158], [123, 159], [123, 160], [124, 145], [124, 146], [124, 151], [124, 152], 46 | [124, 157], [124, 158], [124, 159], [124, 160], [145, 146], [145, 151], [145, 152], 47 | [145, 157], [145, 158], [145, 159], [145, 160], [146, 151], [146, 152], [146, 157], 48 | [146, 158], [146, 159], [146, 160], [151, 152], [151, 157], [151, 158], [151, 159], 49 | [151, 160], [152, 157], [152, 158], [152, 159], [152, 160], [157, 158], [157, 159], 50 | [157, 160], [158, 159], [158, 160], [159, 160]] 51 | 52 | test_groups = { 53 | 'Like': { 54 | 'users': [tests_like_5_batches, tests_like_3_batches], 55 | 'batch': [5, 3] 56 | }, 57 | 'Follow': { 58 | 'users': [all_test_follow_3_batches], 59 | 'batch': [3] 60 | }, 61 | 'Video View Rate': { 62 | 'users': [all_test_vvr_3_batches], 63 | 'batch': [3] 64 | }, 65 | 'Control Group': { 66 | 'users': [control_group_5_batches, control_group_3_batches], 67 | 'batch': [5, 3] 68 | }, 69 | 'Location': { 70 | 'users': [diff_country_same_language_3_batches], 71 | 'batch': [3] 72 | } 73 | } 74 | 75 | test_groups = { 76 | 'Location': { 77 | 'users': [diff_country_same_language_3_batches], 78 | 'batch': [3] 79 | } 80 | } 81 | 82 | if __name__ == '__main__': 83 | 84 | noise_all_computation_5, noise_run_computation_5, noise_avg_overall_runs_overall_users_computation_5 = \ 85 | compute_noise_control_scenarios(control_group_5_batches, 5) 86 | noise_all_computation_3, noise_run_computation_3, noise_avg_overall_runs_overall_users_computation_3 = \ 87 | compute_noise_control_scenarios(control_group_3_batches[:len(control_group_3_batches)-2], 3, False) 88 | # noise_all_computation_3_unfinished, noise_run_computation_3_unfinished = \ 89 | # compute_noise_control_scenarios(control_group_3_batches, 3, True) 90 | 91 | # # Initializing SkipGramEvaluation Class to utilize for analysis 92 | # test_data_values, test_data_dict = get_test_data(test_users=[53, 54, 91, 92, 123, 124]) 93 | # training_data = get_training_data(test_hashtags=test_data_dict) 94 | # embedding_size = 300 95 | # skip_gram_model_evaluation = SkipGramModelEvaluation(embedding_size=embedding_size, test_data=test_data_dict, 96 | # frequencies=[100, 500, 1000], epochs=5, lr=0.1, 97 | # max_freq=100000, min_freq=2) 98 | 99 | for group in test_groups: 100 | for batch in test_groups.get(group).get('batch'): 101 | cur_index = test_groups.get(group).get('batch').index(batch) 102 | test_group = { 103 | "Action_Type": group, 104 | "Batch_Size": batch, 105 | "Users": test_groups.get(group).get('users')[cur_index], 106 | "Account_Of_Unfinished_Scenarios": False 107 | } 108 | 109 | # DIFFERENCE ANALYSIS OF POSTS OF LOCATION TESTS 110 | if test_group.get('Action_Type') == "Location": 111 | heatmap_location(diff_country_same_language_3_batches, noise_avg_overall_runs_overall_users_computation_3) 112 | heatmap_location(diff_country_diff_language_switching_country_3_batches, 113 | noise_avg_overall_runs_overall_users_computation_3, switching_loc=True) 114 | heatmap_location(same_country_diff_language_3_batches, noise_avg_overall_runs_overall_users_computation_3) 115 | 116 | # TRAINING SKIP-GRAM MODEL FOR SIMILARITY ANALYSIS 117 | # test_data_values, test_data_dict = get_test_data(test_users=[53, 54, 91, 92, 123, 124]) 118 | # training_data = get_training_data(test_hashtags=test_data_dict) 119 | # embedding_size = 300 120 | # print("***** Training and test data fetched *****") 121 | # for epochs in [9, 10]: 122 | # for lr in [0.1]: 123 | # print(f"***** Starting Iteration with {epochs} epochs and lr = {lr}") 124 | # skip_gram_model = SkipGramModel(max_freq=100000, min_freq=2, embedding_size=embedding_size, 125 | # neg_sample_size=20, lr=lr, epochs=epochs, training_data=training_data) 126 | # skip_gram_model_evaluation = SkipGramModelEvaluation(embedding_size=embedding_size, test_data=test_data_dict, 127 | # frequencies=[100, 500, 1000], epochs=epochs, lr=lr, 128 | # max_freq=100000, min_freq=2) 129 | # visualize_similarities([123, 124], get_test_run_ids_2_user([123, 124])[:20], action_type, 130 | # skip_gram_model_evaluation, epochs, lr) 131 | 132 | else: 133 | # as some test scenarios did not complete all runs we have to reduce the number of test runs for which we calculate 134 | # the noises 135 | test_runs_to_consider = 0 136 | if test_group.get('Account_Of_Unfinished_Scenarios'): 137 | number_of_test_runs = [] 138 | for pair in test_group.get('Users'): 139 | # get test runs 140 | test_runs = get_test_run_ids_2_user(pair) 141 | number_of_test_runs.append(len(test_runs)) 142 | test_runs_to_consider = min(number_of_test_runs) 143 | 144 | for user_pair in test_group.get('Users'): 145 | action_type = test_group.get('Action_Type') 146 | noise = 0 147 | 148 | if test_group.get('Batch_Size') == 3 and action_type != 'Control Group': 149 | noise = noise_avg_overall_runs_overall_users_computation_3 150 | elif test_group.get('Batch_Size') == 5 and action_type != 'Control Group': 151 | noise = noise_avg_overall_runs_overall_users_computation_5 152 | print(f"Utilized noise: {noise}") 153 | 154 | # in general only consider the first 20 test runs of a test scenario 155 | test_run_ids = get_test_run_ids_2_user(user_pair)[:20] 156 | if test_group.get('Account_For_Unfinished_Scenarios'): 157 | test_run_ids = test_run_ids[:test_runs_to_consider] 158 | print(f"User pair: {user_pair}: {test_run_ids}") 159 | 160 | action_user = get_action_user(user_pair) 161 | print(f"*** ACTION USER IS {action_user}") 162 | 163 | # # DIFFERENCE ANALYSIS OF POSTS, HASHTAGS, CONTENT CREATORS, SOUND analyze overlapping posts between two users 164 | # difference_analysis(test_user_pair=user_pair, test_runs=test_run_ids, 165 | # action_type=action_type, noise=noise, action_user=action_user, 166 | # thesis_chart=False, account_for_drop=True) 167 | # 168 | # # POST METRICS ANALYSIS 169 | # development_of_post_metrics(test_run_ids, user_pair, action_type, action_user, thesis_chart=False) 170 | 171 | # REAPPEARANCE OF POST ATTRIBUTE ANALYSIS INCL DISTRIBUTION OF METRICS APPEARANCE OVER ALL TEST RUNS 172 | for metric in ['Hashtag', 'Content Creator', 'Sound']: 173 | reappearance_analysis_of_metric(test_user_pair=user_pair, test_runs=test_run_ids, 174 | metric=metric, action_type=action_type, 175 | action_user=action_user, thesis_chart=False) 176 | 177 | # # SIMILARITY / DIFFERENCE OF HASHTAG ANALYSIS 178 | visualize_similarities(user_pair, test_run_ids, action_type, skip_gram_model_evaluation, 5, 0.1, 179 | within_feed=True, thesis_chart=False) 180 | # # generate_similarities_differences(test_user_pair=user_pair, test_runs=test_run_ids, 181 | # # action_type=action_type, hashtags=True, 182 | # # within_test_run=True, within_feed=False) 183 | 184 | # analyze gradient of post differences and error rate 185 | # generate_chart_error_rate_2_users(all_test_users=test_group.get('Users'), action_type=action_type) 186 | 187 | -------------------------------------------------------------------------------- /DataAnalysis/Analysis_Text_Methods.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | from gensim.models.doc2vec import Doc2Vec, TaggedDocument 4 | 5 | from Analysis_Methods import * 6 | from SkipGramModelEvaluation import * 7 | 8 | 9 | 10 | base_path = Path(__file__).parent 11 | 12 | 13 | def instance_db(): 14 | file_path = (base_path / "../utilities/db_credentials.json").resolve() 15 | with open(file_path) as file: 16 | db_credentials = json.load(file) 17 | 18 | conn = psycopg2.connect( 19 | host=db_credentials.get('host'), 20 | database=db_credentials.get('database'), 21 | user=db_credentials.get('user'), 22 | password=db_credentials.get('password')) 23 | cur = conn.cursor() 24 | return conn, cur 25 | 26 | 27 | conn, cur = instance_db() 28 | 29 | 30 | def compute_similarity_within_feed(text_set): 31 | """ 32 | Compute the similarity of texts from posts of the same feed. 33 | source: 34 | https://towardsdatascience.com/calculating-string-similarity-in-python-276e18a7d33a 35 | https://rare-technologies.com/word2vec-tutorial/ 36 | https://towardsdatascience.com/calculating-document-similarities-using-bert-and-other-models-b2c1a29c9630 37 | :param text_set: dictionary of structure {"post_id": {text, language}} 38 | :return: 39 | """ 40 | pd.set_option('display.max_colwidth', 0) 41 | pd.set_option('display.max_columns', 0) 42 | 43 | text_corpus_df = pd.DataFrame(columns=['post_id', 'text_corpus', 'text_corpus_cleaned']) 44 | text_corpus_df['post_id'] = text_set.keys() 45 | text_corpus_df['text_corpus'] = [text_set[i]['desc'] for i in text_set.keys()] 46 | 47 | # cleaning data: removing special characters & emojis 48 | text_corpus_df['text_corpus_cleaned'] = [clean_string(text_set[key]['desc'], text_set[key]['lang'], 49 | text_set[key]['already_translated']) 50 | for key in text_set.keys()] 51 | 52 | # delete value from text list if after cleaning empty text remains 53 | nan = float("NaN") 54 | text_corpus_df.replace("", nan, inplace=True) 55 | text_corpus_df.dropna(subset=['text_corpus_cleaned'], inplace=True) 56 | 57 | # computing similarities using Doc2Vec cosine similarity and differences using Doc2Vec euclidean distance 58 | tf_idf_vectoriser = TfidfVectorizer() 59 | tf_idf_vectoriser.fit(text_corpus_df.text_corpus_cleaned) 60 | tf_idf_vectors = tf_idf_vectoriser.transform(text_corpus_df.text_corpus_cleaned) 61 | 62 | # download_package('punkt') 63 | 64 | tagged_data = [TaggedDocument(words=word_tokenize(doc), tags=[i]) for i, doc in 65 | enumerate(text_corpus_df.text_corpus_cleaned)] 66 | model_d2v = Doc2Vec(vector_size=100, alpha=0.025, min_count=1) 67 | 68 | model_d2v.build_vocab(tagged_data) 69 | 70 | for epoch in range(100): 71 | model_d2v.train(tagged_data, 72 | total_examples=model_d2v.corpus_count, 73 | epochs=model_d2v.epochs) 74 | 75 | document_embeddings = np.zeros((text_corpus_df.shape[0], 100)) 76 | 77 | for i in range(len(document_embeddings)): 78 | document_embeddings[i] = model_d2v.docvecs[i] 79 | 80 | pairwise_similarities = cosine_similarity(document_embeddings) 81 | pairwise_differences = euclidean_distances(document_embeddings) 82 | 83 | np.set_printoptions(threshold=sys.maxsize) 84 | 85 | return text_corpus_df, pairwise_similarities, pairwise_differences 86 | 87 | 88 | def generate_similarities_differences(test_user_pair, test_runs, action_type, hashtags=False, description=False, 89 | within_feed=False, within_test_run=False): 90 | """ 91 | Retrieve relevant data to compute similarity: 92 | - for posts within a feed itself 93 | - of posts from two feeds 94 | :param test_user_pair: 95 | :param test_runs: 96 | :param within_feed: 97 | :param within_test_run: 98 | :return: 99 | """ 100 | relevant_data = {} 101 | text_source = '' 102 | scope = '' 103 | if description: 104 | relevant_data = retrieve_description(test_user_pair, test_runs) 105 | text_source = 'descriptions' 106 | if hashtags: 107 | relevant_data = retrieve_hashtags(test_user_pair, test_runs) 108 | text_source = 'hashtags' 109 | 110 | print(f"*** DATA RETRIEVED FOR TEXT SOURCE: {text_source}") 111 | print(relevant_data) 112 | 113 | test_run_feed_similarities_differences = {} 114 | 115 | # Compute the similarity & difference of hashtags of posts within the same feed 116 | if within_feed: 117 | for user in test_user_pair: 118 | test_run_feed_similarities_differences[user] = {} 119 | for run in test_runs: 120 | # compute similarities and differences 121 | text_corpus_df, pairwise_similarities, pairwise_differences = \ 122 | compute_similarity_within_feed(relevant_data[user][run]) 123 | 124 | # structure results from similarity & difference computation 125 | post_similarities_differences, feed_similarity_differences = \ 126 | structuring_similarities_differences(text_corpus_df, pairwise_similarities, pairwise_differences) 127 | test_run_feed_similarities_differences[user][run] = { 128 | 'feed(s)_similarity': feed_similarity_differences.get('avg_sim_entire_feed'), 129 | 'feed(s)_difference': feed_similarity_differences.get('avg_diff_entire_feed') 130 | } 131 | print("*** COMPUTED SIMILARITIES & DIFFERENCES WITHIN FEED") 132 | print(test_run_feed_similarities_differences) 133 | scope = 'within_feed' 134 | plot_similarities_differences(test_run_feed_similarities_differences, text_source, scope, action_type) 135 | 136 | # Compute the similarity & difference of hashtags of posts from a feed across multiple testruns 137 | test_run_two_feed_similarities_differences = {} 138 | if len(test_user_pair) == 2 and within_test_run: 139 | for run in test_runs: 140 | text_sets_similarities_differences = compute_similarity_between_two_feeds( 141 | relevant_data[test_user_pair[0]][run], relevant_data[test_user_pair[1]][run]) 142 | all_sim = [text_sets_similarities_differences.get(item).get('feed_similarity_to_other_feed') for item in 143 | text_sets_similarities_differences.keys()] 144 | all_diff = [text_sets_similarities_differences.get(item).get('feed_difference_to_other_feed') for item in 145 | text_sets_similarities_differences.keys()] 146 | test_run_two_feed_similarities_differences[run] = { 147 | 'feed(s)_similarity': sum(all_sim) / len(all_sim), 148 | 'feed(s)_difference': sum(all_diff) / len(all_diff) 149 | } 150 | scope = 'within_test_run' 151 | print("*** COMPUTED SIMILARITIES & DIFFERENCES BETWEEN FEEDS ACROSS ALL TEST RUNS") 152 | print(test_run_two_feed_similarities_differences) 153 | plot_similarities_differences(test_run_two_feed_similarities_differences, text_source, scope, action_type) 154 | 155 | 156 | def compute_similarity_between_two_feeds(text_set_1, text_set_2): 157 | """ 158 | - either twice the same set of text's as similarity of posts within the same feed shall be computed 159 | - or different set of text's from two different users as similarity of both users' feeds shall be evaluated 160 | :return: 161 | """ 162 | different_text_sets = {} 163 | text_sets = {'text_set_1': text_set_1, 'text_set_2': text_set_2} 164 | text_sets_similarities_differences = {} 165 | 166 | for text_set in text_sets.keys(): 167 | different_text_sets[text_set] = {} 168 | temp_post_sim_diff_to_other_feed = {} 169 | for post_user_1 in text_sets.get(text_set).keys(): 170 | different_text_sets[text_set][post_user_1] = { 171 | f"{post_user_1}": text_sets.get(text_set).get(post_user_1), 172 | } 173 | other_text_set = [text_sets.get(x) for x in text_sets.keys() if x != text_set][0] 174 | for post_from_text_2 in other_text_set.keys(): 175 | different_text_sets[text_set][post_user_1][f"Compared_To_{post_from_text_2}"] = { 176 | 'desc': other_text_set.get(post_from_text_2).get('desc'), 177 | 'lang': other_text_set.get(post_from_text_2).get('lang'), 178 | 'already_translated': other_text_set.get(post_from_text_2).get('already_translated') 179 | } 180 | text_corpus_df, pairwise_similarities, pairwise_differences = \ 181 | compute_similarity_within_feed(different_text_sets[text_set][post_user_1]) 182 | post_similarities_differences, feed_similarity_differences = \ 183 | structuring_similarities_differences(text_corpus_df, pairwise_similarities, pairwise_differences) 184 | temp_post_sim_diff_to_other_feed[post_user_1] = { 185 | 'feed_similarity': feed_similarity_differences.get('avg_sim_entire_feed'), 186 | 'feed_difference': feed_similarity_differences.get('avg_diff_entire_feed') 187 | } 188 | all_sim = [temp_post_sim_diff_to_other_feed.get(item).get('feed_similarity') for item in temp_post_sim_diff_to_other_feed.keys()] 189 | all_diff = [temp_post_sim_diff_to_other_feed.get(item).get('feed_difference') for item in temp_post_sim_diff_to_other_feed.keys()] 190 | text_sets_similarities_differences[text_set] = { 191 | 'feed_similarity_to_other_feed': sum(all_sim) / len(all_sim), 192 | 'feed_difference_to_other_feed': sum(all_diff) / len(all_diff) 193 | } 194 | 195 | return text_sets_similarities_differences 196 | 197 | 198 | def get_training_data(test_hashtags): 199 | """ 200 | Create list of list of hashtags for all posts that shall be used in the training_data set 201 | :return: 202 | """ 203 | sql_hashtags_training = """select distinct p.id, phr.translation_english 204 | from (select phr1.postid, h.id, h.translation_english 205 | from d1rpgcvqcran0q.public.post_hashtag_relation phr1 join 206 | d1rpgcvqcran0q.public.hashtags h on phr1.hashtagid = h.id) phr join 207 | (select id from d1rpgcvqcran0q.public.posts where testuserid not in (53, 54, 91, 92, 123, 124)) p on p.id = phr.postid""" 208 | 209 | # retrieve training hashtag data 210 | training_hashtags_dict = {} 211 | # for training data set 212 | cur.execute(sql_hashtags_training, ()) 213 | results_training = cur.fetchall() 214 | training_hashtags = [] 215 | for item in results_training: 216 | post_id = item[0] 217 | cur_hashtag = item[1].strip() 218 | if cur_hashtag != '' and post_id not in test_hashtags.keys(): 219 | if post_id in training_hashtags_dict.keys() and cur_hashtag not in training_hashtags_dict.get(post_id): 220 | training_hashtags_dict[post_id].append(cur_hashtag) 221 | else: 222 | training_hashtags_dict[post_id] = [cur_hashtag] 223 | if cur_hashtag not in training_hashtags: 224 | training_hashtags.append(cur_hashtag) 225 | 226 | for post in list(test_hashtags.keys()): 227 | for hashtag in list(test_hashtags.get(post)): 228 | if hashtag not in training_hashtags: 229 | if post not in training_hashtags_dict.keys(): 230 | training_hashtags_dict[post] = [hashtag] 231 | else: 232 | training_hashtags_dict[post].append(hashtag) 233 | if post in training_hashtags_dict.keys(): 234 | del test_hashtags[post] 235 | 236 | # store training hashtag data 237 | file_training_data_set = (base_path / "training_data_set.csv").resolve() 238 | with open(file_training_data_set, 'w') as f: 239 | w = csv.writer(f) 240 | for row in training_hashtags_dict.items(): 241 | w.writerow(row) 242 | 243 | return list(training_hashtags_dict.values()) 244 | 245 | 246 | def get_test_data(test_users): 247 | sql_hashtags_test = """select distinct p.id, phr.translation_english 248 | from (select phr1.postid, h.id, h.translation_english 249 | from d1rpgcvqcran0q.public.post_hashtag_relation phr1 join 250 | d1rpgcvqcran0q.public.hashtags h on phr1.hashtagid = h.id) phr join 251 | (select id from d1rpgcvqcran0q.public.posts where testuserid = %s) p on p.id = phr.postid""" 252 | 253 | # retrieve test hashtag data from database 254 | test_hashtags = {} 255 | for user in test_users: 256 | # form test data set 257 | cur.execute(sql_hashtags_test, (user,)) 258 | results_test = cur.fetchall() 259 | for item in results_test: 260 | post_id = item[0] 261 | cur_hashtag = item[1].strip() 262 | if cur_hashtag != '': 263 | if post_id in test_hashtags.keys() and cur_hashtag not in test_hashtags.get(post_id): 264 | test_hashtags[post_id].append(cur_hashtag) 265 | else: 266 | test_hashtags[post_id] = [cur_hashtag] 267 | 268 | # store test hashtag data 269 | file_test_data_set = (base_path / "test_data_set.csv").resolve() 270 | with open(file_test_data_set, 'w') as f: 271 | w = csv.writer(f) 272 | for row in test_hashtags.items(): 273 | w.writerow(row) 274 | 275 | return list(test_hashtags.values()), test_hashtags 276 | 277 | 278 | def adjust_data_structure(dict): 279 | # restructure relevant data 280 | adjusted_data = {} 281 | for item in dict.keys(): 282 | adjusted_data[item] = dict.get(item).get('desc') 283 | return adjusted_data 284 | 285 | 286 | def visualize_similarities(test_user_pair, test_runs, action_type, skipgrammodelevaluation, epochs, lr, 287 | within_feed=False, thesis_chart=False): 288 | """ 289 | Visualize the similarities of the feeds of each user for every test run, both graphs in one subplot 290 | Visualize in another subplot the similarities of two feeds for every test run 291 | :return: 292 | """ 293 | # use function feed_sim() from SkipGramModelEvaluation to compute similarity of specific list of hashtags 294 | # this list either contains only hashtags from one feed --> measuring similarity within a feed 295 | # or hashtags from two feeds --> measuring similarity between two feeds 296 | # perhaps shuffle list of hashtags before computing similarity 297 | 298 | description_data, hashtag_data = retrieve_hashtags(test_user_pair, test_runs) 299 | text_source = 'hashtags' 300 | 301 | print(f"*** DATA RETRIEVED FOR TEXT SOURCE: {text_source}") 302 | # print(hashtag_data) 303 | 304 | # Compute the similarity & difference of hashtags of posts within the same feed 305 | user_feed_similarities = {} 306 | for user in test_user_pair: 307 | user_feed_similarities[user] = {} 308 | for run in test_runs: 309 | filtered_posts = skipgrammodelevaluation.remove_too_frequent_hashtags( 310 | adjust_data_structure(hashtag_data[user][run])) 311 | posts = list(filtered_posts.values()) 312 | post_ids = list(filtered_posts.keys()) 313 | # compute similarities using SkipGramModel 314 | user_feed_similarities[user][run] = round(skipgrammodelevaluation.feed_sim(posts), 4) 315 | print("*** COMPUTED SIMILARITIES WITHIN FEED") 316 | print(user_feed_similarities) 317 | 318 | users_similarities = {} 319 | for run in test_runs: 320 | user_1_hashtags = adjust_data_structure(hashtag_data[test_user_pair[0]][run]) 321 | user_2_hashtags = adjust_data_structure(hashtag_data[test_user_pair[1]][run]) 322 | user_1_filtered_hashtags = skipgrammodelevaluation.remove_too_frequent_hashtags(user_1_hashtags) 323 | user_2_filtered_hashtags = skipgrammodelevaluation.remove_too_frequent_hashtags(user_2_hashtags) 324 | posts = list(user_1_filtered_hashtags.values()) + list(user_2_filtered_hashtags.values()) 325 | # compute similarities using SkipGramModel 326 | users_similarities[run] = round(skipgrammodelevaluation.feed_sim(posts), 4) 327 | print("*** COMPUTED SIMILARITIES BETWEEN TWO FEEDS") 328 | print(users_similarities) 329 | 330 | plot_similarities_differences(user_feed_similarities, users_similarities, text_source, action_type, epochs, lr, 331 | within_feed, thesis_chart) 332 | -------------------------------------------------------------------------------- /DataAnalysis/SkipGramModel.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from collections import defaultdict 3 | from tqdm.notebook import trange, tqdm 4 | from sklearn.preprocessing import normalize 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import json 8 | import operator 9 | import string 10 | import random 11 | 12 | # Special thanks to Jan Scholich (janscho@student.ethz.ch) for significantly contributing to the implementation of 13 | # the Skip Gram Model as outlined below. 14 | 15 | def sigmoid(x): 16 | """ 17 | Helper function sigmoid. 18 | """ 19 | return 1 / (1 + np.exp(-x)) 20 | 21 | 22 | def preprocessing(posts): 23 | training_data = [] 24 | for i in range(len(posts)): 25 | post = posts[i] 26 | # remove punctuation 27 | x = [hashtag.strip(string.punctuation) for hashtag in post] 28 | # make all hashtag lowercase 29 | x = [hashtag.lower() for hashtag in x] 30 | if x: 31 | training_data.append(x) 32 | return training_data 33 | 34 | 35 | class SkipGramModel: 36 | 37 | def __init__(self, max_freq, min_freq, embedding_size, neg_sample_size, lr, epochs, training_data): 38 | self.base_path = Path(__file__).parent 39 | self.training_data = training_data 40 | ### set hyperparameters for creating data set ### 41 | # only considering hashtags that appear between 2 and 1000 times 42 | # appears max times 43 | self.max_freq = max_freq 44 | # appears min times 45 | self.min_freq = min_freq 46 | # filter hashtags 47 | file_fyp_hashtags = (self.base_path / "hashtags_to_ignore.json").resolve() 48 | f = open(file_fyp_hashtags, ) 49 | self.filter_hashtags = list(json.load(f).values()) 50 | # embedding size 51 | self.N = embedding_size 52 | # number of negative samples per positive pairs (wt,wi) 53 | self.K = neg_sample_size 54 | self.W = None 55 | self.W_prime = None 56 | self.hashtag_to_index = {} 57 | self.index_to_hashtag = [] 58 | 59 | ### data loading and preprocessing ### 60 | print("***** Loading and preprocessing data *****") 61 | self.posts = None 62 | self.data_loading_preprocessing() 63 | 64 | ### prep data for training ### 65 | print("***** Preparing data for training *****") 66 | self.training_samples_incl_neg = None 67 | self.vocabulary = None 68 | self.prep_data_for_training() 69 | 70 | ### training ### 71 | # hyperparameters for training 72 | print("***** Starting training *****") 73 | self.lr = lr 74 | self.epochs = epochs 75 | self.epoch_losses = [] 76 | self.step_losses = [] 77 | self.training() 78 | print("Training completed") 79 | 80 | ### plot training loss performance ### 81 | print("***** Plotting training loss performance *****") 82 | self.plot_training_loss_performance() 83 | 84 | ### store model results ### 85 | print("***** Storing model results *****") 86 | self.store_data() 87 | 88 | print("***** Training of skip-gram model completed. *****") 89 | 90 | def data_loading_preprocessing(self): 91 | """ 92 | Data loading and preprocessing 93 | :return: 94 | """ 95 | # file_path = (self.base_path / "training_data_set.csv").resolve() 96 | # posts = np.genfromtxt(file_path, delimiter=',', dtype=np.dtype(str), usecols=1) 97 | # # splits the string of hashtags 98 | # posts = np.char.split(posts) 99 | # print(posts) 100 | 101 | self.posts = preprocessing(self.training_data) 102 | print("Set of hashtags for first three posts: \n", self.posts[:3]) 103 | print("Number of posts:", len(self.posts)) 104 | 105 | def filter_common_hashtags(self, pair): 106 | (hashtag, count) = pair 107 | if hashtag in self.filter_hashtags: 108 | return False 109 | else: 110 | return True 111 | 112 | def prep_data_for_training(self): 113 | """ 114 | Prepare data for training: 115 | - extract vocabulary of hashtags V 116 | - convert corpus into indices 117 | - extract pair (hashtag, context (i.e. hashtags that are co-occurring with hashtag)) 118 | - negative sampling 119 | :return: 120 | """ 121 | 122 | # count how often hashtags appear over all posts 123 | count = defaultdict(int) 124 | for post in self.posts: 125 | for hashtag in post: 126 | count[hashtag] += 1 127 | # sort hashtags by appearance frequency 128 | sorted_counts = sorted(count.items(), key=operator.itemgetter(1), reverse=True) 129 | 130 | # filter hashtags 131 | posts_filter_for_common_hashtags = list(filter(self.filter_common_hashtags, sorted_counts)) 132 | 133 | 134 | replaced_hashtags = [] 135 | for pair in posts_filter_for_common_hashtags: 136 | (hashtag, count) = pair 137 | if count < self.min_freq: 138 | replaced_hashtags.append('unk') 139 | elif count > self.max_freq: 140 | replaced_hashtags.append('unk') 141 | else: 142 | replaced_hashtags.append(hashtag) 143 | 144 | self.vocabulary = replaced_hashtags 145 | 146 | # Assign ids and create lookup tables 147 | for idx, hashtag in enumerate(self.vocabulary, 0): 148 | self.hashtag_to_index[hashtag] = idx 149 | if hashtag not in self.index_to_hashtag: 150 | self.index_to_hashtag.append(hashtag) 151 | 152 | assert len(self.index_to_hashtag) == len(self.hashtag_to_index) 153 | print("Number of hashtag (unfiltered):", len(sorted_counts)) 154 | print("Number of hashtag (filtered):", len(self.index_to_hashtag)) 155 | 156 | # transforming dataset by replacing the words with their index. 157 | posts_index = [] 158 | for post in self.posts: 159 | ids = [] 160 | for hashtag in post: 161 | # only add hashtags that are in the vocabulary!!! (all others dropped) 162 | if hashtag in self.hashtag_to_index: 163 | ids.append(self.hashtag_to_index[hashtag]) 164 | posts_index.append(ids) 165 | print("First three posts represented by the indices of their hashtags:") 166 | print(posts_index[:3]) 167 | print("Number of posts (after indexing them):", len(posts_index)) 168 | print("Number of hashtags (including duplications)", sum([len(x) for x in posts_index])) 169 | 170 | ## Extract pair (hashtag, context) 171 | # initializing the training samples (an array containing one array per hashtag in the vocabulary with all context-word-pairs) 172 | training_samples = [[]] * len(self.index_to_hashtag) 173 | 174 | # used for descriptive statistics (to check that it works) 175 | count = 0 176 | counts = [] 177 | 178 | # iterate through all posts 179 | for post in tqdm(posts_index): 180 | interim_count = 0 181 | # iterate through all hashtags of a post 182 | for i in range(len(post)): 183 | hashtag = post[i] 184 | # iterate through the context of that hashtag 185 | for j in range(0, len(post)): 186 | if j != i: 187 | interim_count += 1 188 | context_hashtag = post[j] 189 | 190 | # add context-hashtag-pair to the training samples 191 | if len(training_samples[hashtag]) == 0: 192 | training_samples[hashtag] = [(hashtag, context_hashtag)] 193 | else: 194 | training_samples[hashtag].append((hashtag, context_hashtag)) 195 | count += interim_count 196 | counts.append(interim_count) 197 | # displays the number of context-hashtag-pairs per post as histogram 198 | fig = plt.figure() 199 | plt.hist(counts) 200 | fig.suptitle("Histogram of the number of training samples/context-hashtag-pairs per post:") 201 | plt.xlabel('Number of training samples') 202 | plt.ylabel('Number of posts') 203 | plt.show() 204 | 205 | # Total number of context-word-pairs (training samples) 206 | print("Number of training samples/context-hashtag-pairs:", sum([len(x) for x in training_samples])) 207 | print("Manual count of training samples to validate:", count) 208 | 209 | # Negative Sampling 210 | # initialize array to capture training samples 211 | self.training_samples_incl_neg = [[]] * len(self.index_to_hashtag) 212 | 213 | # filter out all hashtags from the posts that are not in the vocabulary to get the frequency of all hashtags appearing in the corpus 214 | all_hashtag_rep = list(filter(lambda x: x in self.index_to_hashtag, [inner for outer in self.posts for inner in outer])) 215 | 216 | # iterate through the array of arrays with the context-hashtag-pairs (training samples) 217 | for hashtag_samples_ind in tqdm(range(len(training_samples))): 218 | hashtag_pairs_and_neg = [] 219 | # iterate through the array with the context-hashtag-pairs (done for each word in the vocabulary) 220 | for sample in training_samples[hashtag_samples_ind]: 221 | neg_samples = [] 222 | # repeat for K negative samples 223 | for i in range(self.K): 224 | same_as_context = True 225 | # while the randomly chosen sample (by choosing a random hashtag in the filtered set of all posts) is equal to the context hashtag, 226 | # we choose a new one, else we add it to the list of negative samples. 227 | while same_as_context: 228 | neg = all_hashtag_rep[random.randint(0, len(all_hashtag_rep) - 1)] 229 | neg_ind = self.hashtag_to_index[neg] 230 | same_as_context = neg_ind == sample[1] 231 | neg_samples.append(self.hashtag_to_index[neg]) 232 | # create a tuple (w_i, w_t, C) where C = [(w_0^-, ..., w_20^-)] for every context-hashtag-pair 233 | hashtag_pairs_and_neg.append(sample + (neg_samples,)) 234 | self.training_samples_incl_neg[hashtag_samples_ind] = hashtag_pairs_and_neg 235 | 236 | def training(self): 237 | """ 238 | Training the model based on extracted and preprocessed training data and defined parameters. 239 | # Learning: calculate gradient, set training parameters, train 240 | Plot training performance. 241 | :return: 242 | """ 243 | # training 244 | np.random.seed(42) 245 | random.seed(42) 246 | 247 | # vectorization of the training samples 248 | vectorized_training_samples = [inner for outer in self.training_samples_incl_neg for inner in outer] 249 | 250 | # initialization of weights to be between -0.8 and 0.8 251 | self.W = np.random.rand(len(self.vocabulary), self.N).astype(np.float128) 252 | self.W_prime = np.random.rand(self.N, len(self.vocabulary)).astype(np.float128) 253 | self.W = (2 * self.W - 1) * 0.8 254 | self.W_prime = (2 * self.W_prime - 1) * 0.8 255 | 256 | # normalize vectors to mitigate difference of vector length and only have difference of vector angle 257 | self.W = normalize(self.W, axis=1, norm='l2') 258 | 259 | # iterate through the number of epochs 260 | for i in range(self.epochs): 261 | print("Epoch", i + 1) 262 | epoch_loss = 0 263 | count = 0 264 | 265 | # shuffle training samples to make model more robust 266 | random.shuffle(vectorized_training_samples) 267 | 268 | t = tqdm(vectorized_training_samples, desc="loss: {:.4f}".format(epoch_loss)) 269 | 270 | # iterate through all samples of the training set 271 | for sample in t: 272 | wi = sample[0] 273 | wt = sample[1] 274 | C_minus = sample[2] 275 | 276 | # get the embedding of the hashtag and the context hashtags 277 | e_wi = self.W[wi] 278 | e_wt = self.W_prime[:, wt] 279 | 280 | # temporary variable to sum up the product between the embedding of the hashtag 281 | # and the sigmoid of the dot product of the embedding of the context hashtags and the hashtag 282 | s = 0 283 | # temporary variable to sum up the step loss 284 | step_loss = 0 285 | 286 | # iterate through negative samples 287 | for wm in C_minus: 288 | # get embedding of the negative sample 289 | e_wm = self.W_prime[:, wm] 290 | # update the weight of the context matrix for the (negative) sampled hashtag using GD 291 | # TODO remove "(i+1)", dividing by to reduce loss even stronger which may result that loss diverges again 292 | # TODO check how it influences loss using / not using it 293 | self.W_prime[:, wm] = e_wm - self.lr/(i+1) * sigmoid(np.dot(e_wi, e_wm)) * e_wi 294 | # add to the temporary variable as described above 295 | s += sigmoid(np.dot(e_wi, e_wm)) * e_wm 296 | # add to the step loss 297 | t_step_loss = 1 - sigmoid(np.dot(e_wi, e_wm)) 298 | # case distinction for numerical stability 299 | if t_step_loss <= 0: 300 | step_loss -= np.log(10 ** -10) 301 | else: 302 | step_loss -= np.log(t_step_loss) 303 | 304 | # update weights of the hashtag embeddings 305 | self.W[wi] = e_wi - self.lr/(i+1) * ((sigmoid(np.dot(e_wi, e_wt)) - 1) * e_wt + s) 306 | 307 | # update weights of the context hashtag 308 | self.W_prime[:, wt] = e_wt - self.lr/(i+1) * (sigmoid(np.dot(e_wi, e_wt)) - 1) * e_wi 309 | 310 | # add to step loss 311 | step_loss -= np.log(sigmoid(np.dot(e_wi, e_wt))) 312 | epoch_loss += step_loss 313 | self.step_losses.append(step_loss) 314 | 315 | # for bookkeeping and updating loss 316 | count += 1 317 | if epoch_loss / count == np.inf: 318 | print(count) 319 | if count % 1000 == 0: 320 | t.set_description("loss: {:.8f}".format(epoch_loss / count)) 321 | t.refresh() 322 | 323 | # normalize updated weights 324 | self.W = normalize(self.W, axis=1, norm='l2') 325 | 326 | epoch_loss = epoch_loss / len(vectorized_training_samples) 327 | print("Loss", epoch_loss) 328 | self.epoch_losses.append(epoch_loss) 329 | 330 | def plot_training_loss_performance(self): 331 | """ 332 | Visualizing the loss performance of the current training session. 333 | :return: 334 | """ 335 | # Plot loss during the epochs 336 | fig = plt.figure() 337 | plt.plot(range(self.epochs), self.epoch_losses) 338 | fig.suptitle("Loss progression") 339 | plt.xlabel('Epochs') 340 | plt.ylabel('Loss') 341 | plt.savefig(self.base_path / f"sgm_resources/lossprogressionepochs_epochs{self.epochs}_lr{self.lr}.png", 342 | bbox_inches='tight') 343 | 344 | # plot losses during the individual context-word-negative-sample-tuples 345 | fig = plt.figure() 346 | plt.plot(range(len(self.step_losses)), self.step_losses) 347 | fig.suptitle("Loss progression") 348 | plt.xlabel('Training steps') 349 | plt.ylabel('Loss') 350 | plt.show() 351 | 352 | # plot loss averaged over 1000 successive context-word-negative-sample-tuples 353 | step_losses_thousands = [] 354 | sum = 0 355 | for i in range(len(self.step_losses)): 356 | sum += self.step_losses[i] 357 | if i % 1000 == 0: 358 | step_losses_thousands.append(sum / 1000) 359 | sum = 0 360 | 361 | fig = plt.figure() 362 | plt.plot(range(len(step_losses_thousands)), step_losses_thousands) 363 | fig.suptitle("Loss progression") 364 | plt.xlabel('Training steps in 1000s') 365 | plt.ylabel('Loss') 366 | plt.show() 367 | 368 | def store_data(self): 369 | """ 370 | Store results from model training. 371 | :param W: 372 | :param self.hashtag_to_index: 373 | :param self.index_to_hashtag: 374 | :return: 375 | """ 376 | 377 | # saving embedding weights in csv file 378 | print(self.W) 379 | file_embedding_weights_csv = (self.base_path / f"sgm_resources/embedding_weights_epochs{self.epochs}_lr{self.lr}.csv").resolve() 380 | np.savetxt(file_embedding_weights_csv, self.W, delimiter=',') 381 | 382 | # self.hashtag_to_index 383 | file_hashtag_to_index = (self.base_path / f"sgm_resources/sgm_hashtag_to_index_epochs{self.epochs}_lr{self.lr}.json").resolve() 384 | with open(file_hashtag_to_index, 'w') as f: 385 | json.dump(self.hashtag_to_index, f) 386 | 387 | # self.index_to_hashtag 388 | file_index_to_hashtag = (self.base_path / f"sgm_resources/sgm_index_to_hashtag_epochs{self.epochs}_lr{self.lr}.json").resolve() 389 | with open(file_index_to_hashtag, 'w') as f: 390 | json.dump(self.index_to_hashtag, f) 391 | -------------------------------------------------------------------------------- /DataAnalysis/SkipGramModelEvaluation.py: -------------------------------------------------------------------------------- 1 | import json 2 | import operator 3 | import string 4 | from collections import defaultdict 5 | from pathlib import Path 6 | from matplotlib import pyplot as plt 7 | from sklearn.manifold import TSNE 8 | 9 | import seaborn as sn 10 | import pandas as pd 11 | import numpy as np 12 | 13 | # Special thanks to Jan Scholich (janscho@student.ethz.ch) for significantly contributing to the implementation of 14 | # the Skip Gram Model as outlined below. 15 | 16 | # Skip Gram Model Evaluation from trained model 17 | 18 | def cosine_similarity(e_x, e_y): 19 | """ 20 | Cosine similarity calculation 21 | :param e_x: 22 | :param e_y: 23 | :return: 24 | """ 25 | vec_dot = np.dot(e_x, e_y) 26 | norm = np.linalg.norm(e_x) * np.linalg.norm(e_y) 27 | return vec_dot / norm 28 | 29 | 30 | def preprocessing(hashtag): 31 | # remove punctuation 32 | x = hashtag.strip(string.punctuation) 33 | # make hashtag lowercase 34 | x = hashtag.lower() 35 | if x: 36 | return x 37 | 38 | class SkipGramModelEvaluation: 39 | 40 | def __init__(self, embedding_size, frequencies, epochs, max_freq, min_freq, lr, test_data=None): 41 | self.base_path = Path(__file__).parent 42 | self.N = embedding_size 43 | self.epochs = epochs 44 | self.W = None 45 | self.hashtag_to_index = None 46 | self.index_to_hashtag = None 47 | # appears max times 48 | self.max_freq = max_freq 49 | # appears min times 50 | self.min_freq = min_freq 51 | # filter hashtags 52 | file_fyp_hashtags = (self.base_path / "hashtags_to_ignore.json").resolve() 53 | f = open(file_fyp_hashtags, ) 54 | self.filter_hashtags = list(json.load(f).values()) 55 | self.lr = lr 56 | self.posts_no_embedding = {} 57 | self.test_data = test_data 58 | print("***** Starting evaluation of SGM *****") 59 | self.import_model_results() 60 | self.example_analysis_embeddings() 61 | self.visualizing_hashtag_embeddings(frequencies) 62 | if self.test_data is not None: 63 | self.evaluate_test_data() 64 | print("***** Posts for which 0 hashtags have a pretrained embedding: ", self.posts_no_embedding) 65 | print("***** Evaluation of SGM completed *****") 66 | 67 | def import_model_results(self): 68 | """ 69 | Import hashtag embedding weights, hashtag_to_index, and index_to_hashtag 70 | :return: 71 | """ 72 | # import embedding weights 73 | file_embedding_weights_csv = (self.base_path / f"sgm_resources/embedding_weights_epochs{self.epochs}_lr{self.lr}.csv").resolve() 74 | self.W = np.genfromtxt(file_embedding_weights_csv, delimiter=',') 75 | np.save('embedding.npy', self.W) 76 | 77 | # import hashtags_to_index 78 | file_hashtag_to_index = (self.base_path / f"sgm_resources/sgm_hashtag_to_index_epochs{self.epochs}_lr{self.lr}.json").resolve() 79 | self.hashtag_to_index = json.load(open(file_hashtag_to_index, )) 80 | 81 | # import index_to_hashtags 82 | file_index_to_hashtag = (self.base_path / f"sgm_resources/sgm_index_to_hashtag_epochs{self.epochs}_lr{self.lr}.json").resolve() 83 | self.index_to_hashtag = json.load(open(file_index_to_hashtag, )) 84 | 85 | def example_analysis_embeddings(self): 86 | """ 87 | Evaluating model performance based on analysis of example words. 88 | :return: 89 | """ 90 | pairs = [ 91 | ("cooking", "chocolate"), 92 | ("apple", "iphone"), 93 | ("covid19", "coronavirus"), 94 | ("beerpong", "drink"), 95 | ("bike", "ride"), 96 | ("neymar", "messi"), 97 | ] 98 | print("| x | y | sim(x,y) | ") 99 | print("|--------|---------|--------------|") 100 | for x, y in pairs: 101 | e_x = self.W[self.hashtag_to_index[x]] 102 | e_y = self.W[self.hashtag_to_index[y]] 103 | sim = cosine_similarity(e_x, e_y) 104 | print("|", x, "|", y, "|", sim, "|") 105 | 106 | example_words = ["love", "car", "president", "monday", "green", "money", "health", "faith", "book", "france", 107 | "swiss", "spring", 108 | "food", "home", "law", "america"] 109 | 110 | print("| x | y | sim(x,y) | ") 111 | print("|--------|---------|--------------|") 112 | 113 | for x in example_words: 114 | e_x = self.W[self.hashtag_to_index[x]] 115 | W_sim = np.apply_along_axis(lambda y: cosine_similarity(e_x, y), 1, self.W) 116 | W_sim[self.hashtag_to_index[x]] = 0 117 | y = self.index_to_hashtag[np.argmax(W_sim)] 118 | print("|", x, "|", y, "|", np.max(W_sim), "|") 119 | 120 | def visualizing_hashtag_embeddings(self, frequencies): 121 | """ 122 | Plotting for different frequencies the hashtag embeddings resulted from the Skip-Gram model. 123 | :return: 124 | """ 125 | labels = [] 126 | tokens = [] 127 | 128 | for i in range(len(self.index_to_hashtag)): 129 | tokens.append(self.W[i, :]) 130 | labels.append(self.index_to_hashtag[i]) 131 | 132 | tsne_model = TSNE(perplexity=40, n_components=2, init='pca', n_iter=2500, random_state=23) 133 | new_values = tsne_model.fit_transform(tokens) 134 | print(new_values[:3]) 135 | 136 | for frequency in frequencies: 137 | # plots the 100 most frequent hashtags in 2D 138 | x = np.transpose(new_values[:frequency])[0] 139 | y = np.transpose(new_values[:frequency])[1] 140 | n = self.index_to_hashtag[:frequency] 141 | 142 | fig, ax = plt.subplots(figsize=(24, 16)) 143 | ax.scatter(x, y) 144 | ax.title.set_text(f"{frequency} Most Frequent Hashtags") 145 | 146 | for i, txt in enumerate(n): 147 | ax.annotate(txt, (x[i], y[i])) 148 | plt.savefig( 149 | self.base_path / f"sgm_resources/{frequency}_mostfrequenthashtags_epochs{self.epochs}_lr{self.lr}.png", 150 | bbox_inches='tight') 151 | print(f"Chart visualizing {frequency} Most Frequent Hashtags stored for {self.epochs}.") 152 | 153 | def post_avg_embedding(self, post_hashtags): 154 | """ 155 | calculates the average of the post's hashtags' embeddings 156 | expects list of hashtags 157 | :param post: 158 | :return: 159 | """ 160 | in_vocab = 0 161 | avg_vec = np.zeros(self.N) 162 | for hashtag in post_hashtags: 163 | if hashtag in self.index_to_hashtag: 164 | ind_hashtag = self.hashtag_to_index[hashtag] 165 | avg_vec += self.W[ind_hashtag] 166 | in_vocab += 1 167 | # Todo get embedding for "unk" hashtag if hashtag not in self.index_to_hashtag 168 | else: # retrieve synonym 'unk' for hashtags appearing less than min_freq 169 | ind_hashtag = self.hashtag_to_index['unk'] 170 | avg_vec += self.W[ind_hashtag] 171 | in_vocab += 1 172 | if in_vocab == 0: 173 | # if post_id not in self.posts_no_embedding.keys(): 174 | # self.posts_no_embedding[post_id] = post_hashtags 175 | raise Exception('Post has 0 hashtags that have pretrained embeddings.') 176 | else: 177 | return avg_vec / in_vocab 178 | 179 | def post_sim(self, post1, post2): 180 | """ 181 | calculates similarity between posts, expects two lists of hashtags 182 | :return: 183 | """ 184 | vec1 = self.post_avg_embedding(post1) 185 | vec2 = self.post_avg_embedding(post2) 186 | return cosine_similarity(vec1, vec2) 187 | 188 | def feed_sim(self, feed): 189 | """ 190 | calculates the average similarity between all post in the feed, expects list of lists of hashtags 191 | :param feed: 192 | :return: 193 | """ 194 | pairs = 0 195 | avg_sim = 0 196 | for i in range(len(feed)): 197 | post1 = feed[i] 198 | for j in range(i + 1, len(feed)): 199 | post2 = feed[j] 200 | avg_sim += self.post_sim(post1, post2) 201 | pairs += 1 202 | return avg_sim / pairs 203 | 204 | def check_valid_hashtag(self, pair): 205 | """ 206 | filtering hashtags 207 | :param pair: 208 | :return: 209 | """ 210 | (hashtag, count) = pair 211 | if hashtag in self.filter_hashtags: 212 | return False 213 | elif count < self.min_freq: 214 | return False 215 | elif count > self.max_freq: 216 | return False 217 | else: 218 | return True 219 | 220 | def clean_hashtags(self, posts): 221 | preprocessed_posts = {} 222 | for post in posts.keys(): 223 | for hashtag in posts.get(post): 224 | prepro_pos = preprocessing(hashtag) 225 | if post not in preprocessed_posts.keys(): 226 | preprocessed_posts[post] = [prepro_pos] 227 | else: 228 | preprocessed_posts[post].append(prepro_pos) 229 | 230 | # count how often hashtags appear over all posts 231 | count = defaultdict(int) 232 | for post in list(preprocessed_posts.values()): 233 | for hashtag in post: 234 | count[hashtag] += 1 235 | 236 | # sort hashtags by appearance frequency 237 | sorted_counts = sorted(count.items(), key=operator.itemgetter(1), reverse=True) 238 | 239 | # filter hashtags 240 | filtered_hashtags = list(filter(self.check_valid_hashtag, sorted_counts)) 241 | filtered_hashtags = [hashtag[0] for hashtag in filtered_hashtags] 242 | 243 | filtered_posts = {} 244 | for post in preprocessed_posts.keys(): 245 | for hashtag in preprocessed_posts.get(post): 246 | if hashtag in filtered_hashtags: 247 | if post not in filtered_posts.keys(): 248 | filtered_posts[post] = [hashtag] 249 | else: 250 | filtered_posts[post].append(hashtag) 251 | return filtered_posts 252 | 253 | def check_too_frequent_hashtags(self, hashtag): 254 | if hashtag in self.filter_hashtags: 255 | return False 256 | else: 257 | return True 258 | 259 | def remove_too_frequent_hashtags(self, posts): 260 | hashtags = [] 261 | for post in posts.keys(): 262 | for hashtag in posts.get(post): 263 | if hashtag not in hashtags: 264 | hashtags.append(hashtag) 265 | 266 | filtered_hashtags = list(filter(self.check_too_frequent_hashtags, hashtags)) 267 | 268 | filtered_posts = {} 269 | for post in posts.keys(): 270 | for hashtag in posts.get(post): 271 | if hashtag in filtered_hashtags: 272 | if post not in filtered_posts.keys(): 273 | filtered_posts[post] = [hashtag] 274 | else: 275 | filtered_posts[post].append(hashtag) 276 | return filtered_posts 277 | 278 | def evaluate_test_data(self): 279 | """ 280 | Import and preprocess test data to then evaluate it. 281 | :return: 282 | """ 283 | # file_path = (self.base_path / "test_data_set.csv").resolve() 284 | # posts = np.genfromtxt(file_path, delimiter=',', dtype=np.dtype(str), usecols=1) 285 | # post_ids = np.genfromtxt(file_path, delimiter=',', dtype=np.dtype(str), usecols=0) 286 | # 287 | # posts = list(self.test_data.values()) 288 | # post_ids = list(self.test_data.keys()) 289 | 290 | # splits the string of hashtags 291 | # posts = np.char.split(posts) 292 | # print(posts) 293 | 294 | filtered_posts = self.clean_hashtags(posts=self.test_data) 295 | posts = list(filtered_posts.values()) 296 | post_ids = list(filtered_posts.keys()) 297 | 298 | # Matrix of post similarities 299 | sim = [] 300 | span = range(20) 301 | for i in span: 302 | sim_int = [] 303 | for j in span: 304 | sim_int.append(self.post_sim(posts[i], posts[j])) 305 | sim.append(sim_int) 306 | df_cm = pd.DataFrame(sim, index=post_ids[0:len(span)], 307 | columns=post_ids[0:len(span)]) 308 | plt.figure(figsize=(24, 16)) 309 | sn.heatmap(df_cm, annot=True) 310 | plt.savefig(self.base_path / f"sgm_resources/heatmap_firsttestdata_epochs{self.epochs}_lr{self.lr}.png", 311 | bbox_inches='tight') 312 | print(f"Chart visualizing first 30 posts in heatmap stored for {self.epochs} and lr {self.lr}.") 313 | -------------------------------------------------------------------------------- /DataAnalysis/hashtags_to_ignore.json: -------------------------------------------------------------------------------- 1 | { 2 | "88764338": "foryoupage", 3 | "1637407748596742": "fyp?", 4 | "1642147373664261": "fypvirall", 5 | "1646344785794053": "fyp20", 6 | "1644632912092165": "fypchachallenge", 7 | "1706891576089605": "fyp", 8 | "1635070555641861": "foryoupage", 9 | "1645966921365509": "fypfypfypfypfypfypfypfypfyp", 10 | "883904": "foryouuu", 11 | "1694385466292229": "fypcontents", 12 | "1642613516590086": "fypppppppppppppppp", 13 | "229207": "fyp", 14 | "1651780589526022": "kingfyp", 15 | "1642191380435969": "foryourpageviral", 16 | "1633875828543494": "fypdog", 17 | "1631348850976774": "fyp", 18 | "1631845819935750": "fypart", 19 | "1606946063404037": "likeforyoupage", 20 | "1603105080060934": "foryoupa", 21 | "1617501114305542": "fypfypfyp", 22 | "1702065574970369": "fypmlaysia\ud83c\uddf2\ud83c\uddfe", 23 | "1625705313397766": "fyppp", 24 | "1626746770984966": "fyppppp", 25 | "1647699651574789": "fypfypfypfypfypfypfypfypfypfypfyp", 26 | "1644665564694534": "plisfyp", 27 | "1608548676719622": "dogsforyou", 28 | "1609969345764357": "fypplz", 29 | "1636339340982278": "fypmemes", 30 | "1696630895338498": "fyp\u101e\u1031\u1019\u103e\u1015\u1032\u1010\u1000\u103a\u1010\u1031\u102c\u1037\ud83d\ude03", 31 | "1637418424589317": "fyp??", 32 | "1650426297129990": "fyphair", 33 | "1649953988475909": "itsasignforyou", 34 | "42164": "foryou", 35 | "1603364504464389": "foryoupagee", 36 | "1633460239823877": "fypy", 37 | "1605095166336005": "foryoupageeee", 38 | "1634937149617158": "foryoupage\u2764\ufe0f\u2764\ufe0f", 39 | "1676811985203206": "fyviralfyp", 40 | "1636799196126214": "fypoffical", 41 | "1641715580438534": "fypfypfypfypfypfyp", 42 | "1626123835557893": "foryoufyp", 43 | "1620040599854086": "fypforyoupage", 44 | "1632511288704006": "fypo", 45 | "1679200763816965": "fypttv", 46 | "1634600653321222": "fypps", 47 | "1652538132966402": "foryoupageyeh", 48 | "1626102952307718": "fypppppp", 49 | "1630284807035909": "fyptiktok", 50 | "1620625283638277": "foryouviral", 51 | "1628179191522310": "foryoupgepage", 52 | "1633359003571205": "putthisonfyp", 53 | "1703891853128710": "foreverforyou\ud83d\udc40", 54 | "1635280932750342": "fypppppppppppp", 55 | "1685694592866306": "fyp\u30b7\u309aviral\ud83d\udda4tiktok", 56 | "1637342470396934": "fyp\u30b7", 57 | "1685323802588161": "fyp\u30b7\ud83d\udda4foryoupage\u30b7\ud83e\udd8b", 58 | "1682300274755586": "foryourepageofficial", 59 | "22737416": "thisbudsforyou", 60 | "1636358001636357": "fyppppppppppp", 61 | "1616303504084998": "foryoupage\ud83d\ude22", 62 | "1670914542274565": "fypisbest", 63 | "1616746784464901": "foryourpag\u0435", 64 | "1607083044342806": "foryoupageeeee", 65 | "1602924933299205": "foryoupge", 66 | "1666593428398085": "fypviral\u30b7", 67 | "67231518": "foryouph", 68 | "1605983562245125": "foryou_page", 69 | "1639549424304133": "fyp_tiktok", 70 | "1648778089330694": "fyppppppppppppppppppppp", 71 | "1653220312297477": "fyppppppppppppppppppppppppppppppppppp", 72 | "1630729454643206": "fypforyouforyoupage", 73 | "1658590921953281": "fypppppppppppppppppppppppppppppppppp", 74 | "1627320129956870": "foryoupageofficial", 75 | "1633131185532933": "fypppppppppppppp", 76 | "1703769711014918": "fyp\u2764\ufe0f\ud83d\ude4f\ud83c\udffb\ud83d\ude2d", 77 | "1625649784043526": "fypplzz", 78 | "1649540023232517": "fypppppoo", 79 | "1605511720126469": "foryoupagina", 80 | "54185045": "fyplz", 81 | "1648316753236998": "fypppppppppppppppppppppp", 82 | "1688122502170626": "fyp\u1015\u1031\u102b\u103a\u101b\u1031\u102c\u1000\u103a\u1005\u1019\u103a\u1038", 83 | "1684082479714306": "fyp\u30b7\u309aviral\ud83d\udda4video", 84 | "1605270298646534": "foryoupace", 85 | "1599011224611846": "foryoupag", 86 | "1641193139018758": "fypaged", 87 | "1651632362337286": "fyppageforyou", 88 | "1664953059570690": "dailyvideosforyou", 89 | "1623668238525445": "fypplease", 90 | "84873565": "foryouthis", 91 | "1655586519169029": "fypnotworking", 92 | "1634943047845893": "foryoupage\u2728", 93 | "1640702198714373": "getthisonthefypplease", 94 | "1602020764569605": "foryoupageplease", 95 | "1614846349487125": "fype", 96 | "1637403385093126": "foryou?", 97 | "1692301272753158": "fypuswnt", 98 | "1634091457788933": "fypforyourpage", 99 | "1598364115802118": "foryoupgae", 100 | "1620485025788934": "fypppp", 101 | "1628190985570310": "fypfyp", 102 | "1620988974477318": "getthisonthefyp", 103 | "1647232092094469": "fypfypfypfypfypfypfypfypfypfypfypfypfyp", 104 | "1642258369204230": "fyppppppppppppppppppp", 105 | "7603941": "foryouchallenge", 106 | "1655495140363270": "fyppppppls", 107 | "1685172109999106": "f\u00fcrdichseite\u30b7foryoupage", 108 | "1603504302397446": "foryoulage", 109 | "1639107461180422": "foryoupagedoesnotwork", 110 | "1624339432620038": "fyppage", 111 | "1654857781475333": "fyp\u30b7viral", 112 | "1664119477821441": "fyp\u30b7\u309aviral", 113 | "1637715128978437": "fyppppppppppppppppp", 114 | "1648047480944646": "fypfypfypfy", 115 | "1633801928553478": "fypppppppppp", 116 | "1651224510575621": "fypforyoupagethis", 117 | "1639908894714881": "fypage\u30b7", 118 | "1684624429633537": "foryoupageofficiall2021", 119 | "1627926592598021": "fypforyou", 120 | "1607069197518854": "foryourpages", 121 | "1657857938666501": "fypcommunity", 122 | "1630155480914949": "fyppppppp", 123 | "1640241025714181": "fyppppppppppppp", 124 | "1654955847156741": "blowupforyoupage", 125 | "1604390375122950": "foryouppage", 126 | "1637403568312326": "foryoupage?", 127 | "20922363": "pageforyou", 128 | "1702998879182853": "foryoupage_tik_tok_viral_video\ud83d\udcaf\ud83d\udcb8\ud83e\udd0d\ud83d\udc8a", 129 | "1640999288847365": "fypchallage", 130 | "1675963560778754": "fypmototiktok", 131 | "1667434148820998": "fyp\u30b7cr", 132 | "1634922086900741": "fypchallenge", 133 | "1634318675729413": "fypplss", 134 | "8085197": "foryouuuuu", 135 | "1605614978359301": "foryourepage", 136 | "1647967591256070": "fypcouple", 137 | "1667758271570950": "fypplppppppppppp", 138 | "1638251441181702": "fyp\u30b7\u30c4", 139 | "1644109611442182": "fypofficial", 140 | "1641655411270661": "fyppagee", 141 | "13082896": "foryoupaige", 142 | "1685059559254018": "foryoupageforeveryone\u2661", 143 | "1634577353868293": "pageforyou\ud83e\udd8b", 144 | "1628170725056582": "fyp\ud83d\udc2e", 145 | "1634937292926981": "foryoupage\u2764\ufe0f", 146 | "1619679937718277": "plsfyp", 147 | "1628190219645957": "foryoupagedoesntwork", 148 | "1631522291667974": "forfyp", 149 | "1656713786866694": "fyp\ud83e\uddca", 150 | "1632540676574213": "fypppppppp", 151 | "1634979065961477": "fyp\u2728", 152 | "1649139584636934": "fyppppppppppppppppppppppp", 153 | "1597916865387525": "foryou1", 154 | "1604552984408070": "foryoupageee", 155 | "1603600556098566": "foryoupg", 156 | "1685972632069125": "foryou\ud83e\udde3", 157 | "1664495475082242": "foryoupageofficiall", 158 | "1592201096750085": "foryou\u2764", 159 | "1606872592930822": "foryourpagee", 160 | "1628662386826245": "fyp_", 161 | "1635848705845253": "fypyoupage", 162 | "1634369597616134": "viralfyp", 163 | "61667223": "fyps", 164 | "1617590735671301": "fypfor", 165 | "1634939850577926": "fyp\u2764\ufe0f", 166 | "1609595389571077": "foryoupagetiktok", 167 | "1639699601016837": "fyp\u30c4", 168 | "1619120300969989": "fyp\ud83d\udc95", 169 | "1632002941088774": "fypviral", 170 | "1641236262278149": "fyp\u30b7\u309a", 171 | "1659949481334789": "fypforyoupage\u30b7", 172 | "1609273039298565": "foryoupag\u0435", 173 | "1650742742654982": "fyp2020", 174 | "33971256": "foryouofficial", 175 | "1635084903417862": "foryou\u30b7", 176 | "1665036454423553": "fyp\u30b7foryoupage\u30b7tiktok", 177 | "1624550708748294": "foryoupage\ud83d\udc40\ud83d\udc40", 178 | "1617425133872134": "foryoupageoffical", 179 | "1623383052219414": "fypthis", 180 | "1592743307939841": "tiktokforyou", 181 | "1699264431886341": "fyp\u30b7\ud83c\udf7f\ud83c\udf7f", 182 | "1603204382206981": "fypg", 183 | "7107602": "foryouforyou", 184 | "1656991624902662": "fyp\u30b7foryoupage\u30b7", 185 | "1604142815383557": "foryoupgage", 186 | "1620267499925509": "fypls", 187 | "1611853076456450": "viralforyou", 188 | "1658673609103366": "fyp21", 189 | "1610265502020613": "foryourpagechallenge", 190 | "1675567091586054": "fyppoppppppppppppppppppppppp", 191 | "1679511090156546": "fypdonggggggggg\u30b7", 192 | "1670686468547590": "justgetthisonfyp", 193 | "1635213389964293": "fypit", 194 | "1602878675792901": "getmeontheforyoupage", 195 | "1632581882862593": "fypph", 196 | "1634967704187910": "fyp\u2764", 197 | "1694852740601861": "fypshortclips", 198 | "1686358467262466": "fyppyfyp", 199 | "1604276726661125": "foryoupagethis", 200 | "442854": "foryouu", 201 | "1618323884896262": "tiktokpageforyou", 202 | "1624022682062853": "fyp\ud83d\ude2d", 203 | "1659700661828610": "fyp\u30b7\u30c4post", 204 | "1608973297173509": "foryoupageforever", 205 | "1606878050264069": "onyourforyoupage", 206 | "12009": "justforyou", 207 | "1640768557032454": "fyp\ud83c\udff3\ufe0f\u200d\ud83c\udf08", 208 | "1634950588112902": "foryoupage\u2665\ufe0f", 209 | "1604302285252613": "fypage", 210 | "1639336981017605": "fyppleasetiktok", 211 | "1634937277509638": "foryou\u2764\ufe0f\u2764\ufe0f\u2764\ufe0f", 212 | "1605919148785669": "ffyp", 213 | "1598498371111942": "foryourpage", 214 | "1610847197102085": "foryoutiktok", 215 | "364659": "foryour", 216 | "1664053106852869": "fyp\u30b7\u30c4\u2661", 217 | "1616966643636229": "fypp", 218 | "1626088819377157": "fyfyp", 219 | "1635214780493826": "fypthisss", 220 | "1640815314240518": "fypagechallenge", 221 | "1600360681543685": "getthisontheforyoupage", 222 | "1603987329456134": "foryouapge", 223 | "1670139895030786": "funnycontentforyou", 224 | "1617418873150470": "fyppls", 225 | "1701731659930629": "3minutevideofyp", 226 | "1627524680056837": "fyppppppppp", 227 | "22091782": "foryoup", 228 | "1603502040695813": "foryouoage", 229 | "1666841966392325": "fyp2021", 230 | "1625820133083142": "fypfypfypfyp", 231 | "1623723121789957": "foryou\u30c4", 232 | "1644272574749702": "fyp\u30b7foryoupage", 233 | "1616155509242885": "foryoupagebro", 234 | "1650370082138113": "foryouoffical", 235 | "1623927366993925": "fyp1", 236 | "1674584508176390": "fyp\u30c4viral", 237 | "1604284546288646": "foryoupages", 238 | "43268": "4u", 239 | "108264": "foru", 240 | "20884": "viral", 241 | "153828": "fy" 242 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # An Empirical Investigation of Personalization Factors on TikTok 2 | 3 | In this repository we publish all software resources that were utilized to perform a sock-puppet audit on the web-version of TikTok to mimic a human user. With this audit we focused on analyzing the personalization factors and their influence on the recommendation algorithm of TikTok. You may find our work here: [link](https://dl.acm.org/doi/10.1145/3485447.3512102) 4 | 5 | Within this ReadMe we will provide a short overview on how one can use our code to replicate our results. Please note that eventhough we are confident that our results are trustworthy you may encounter different ones due to different time periods and the continous development of the recommendation algorithm by TikTok. 6 | 7 | ## Running A Test Scenario 8 | 9 | In the following section we will provide a step by step guid how to intialise and run one of the test scenarios we performed in our paper. 10 | 11 | First of all, you need to setup the appropriate infrastructure: 12 | 1. Create a Webshare account to obtain IP addresses from proxies you can use. 13 | 2. Create the test users in the database. 14 | 3. Since every test scenario consists of two test users you have to manually create those two users using the previously stored data on TikTok. 15 | 4. Once the user accounts exist on TikTok you may initialize the test scenario by executing the ParallelTesting.py file with the corresponding parameters. 16 | 17 | 18 | We exemplify these steps performing a run of the test scenario 28. This scneario aims on ... consisting of the users ... 19 | 20 | - Creating User Accounts 21 | -- Get phone numbers 22 | -- Create user accounts using purchased phone numbers 23 | 24 | - explain structure of db_credentials.json 25 | - explain placeholders: Twilio, Heroku DB, Webshare Proxz API, paths within project 26 | 27 | ## Analyzing Generated Data 28 | 29 | In order to obtain the most promising results of the Skip-Gram model we trained the model over 5 epochs with a learning rate of 0.1. 30 | -------------------------------------------------------------------------------- /Testing/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/Testing/.DS_Store -------------------------------------------------------------------------------- /Testing/APItest.py: -------------------------------------------------------------------------------- 1 | # Testing Unofficial TikTok API 2 | 3 | from TikTokApi import TikTokApi 4 | from pathlib import Path 5 | api1 = TikTokApi.get_instance() 6 | # If playwright doesn't work for you try to use selenium 7 | api2 = TikTokApi.get_instance(use_selenium=True) 8 | 9 | base_path = Path(__file__).parent 10 | file_path = (base_path / "../utilities/chromedriver.exe").resolve() 11 | api3 = TikTokApi.get_instance(use_selenium=True, executablePath=file_path) 12 | 13 | results = 10 14 | 15 | # Since TikTok changed their API you need to use the custom_verifyFp option. 16 | # In your web browser you will need to go to TikTok, Log in and get the s_v_web_id value. 17 | trending1 = api1.trending(count=results, custom_verifyFp="verify_klat6pua_gX3v9ItE_uqdV_4zPu_8rMk_KIMu3i51EFuI") 18 | trending2 = api2.trending(count=results, custom_verifyFp="verify_klat6pua_gX3v9ItE_uqdV_4zPu_8rMk_KIMu3i51EFuI") 19 | #trending3 = api3.trending(count=results, custom_verifyFP="verify_klat6pua_gX3v9ItE_uqdV_4zPu_8rMk_KIMu3i51EFuI") 20 | 21 | trending3 = api2.trending(count=results, custom_verifyFp="verify_klkw2don_kCWTFtWb_U1Qu_4OZl_8Rhq_r1fUbV5QMKIt") 22 | 23 | 24 | userID = "6717651461067604997" 25 | secUID = "MS4wLjABAAAALP9H8t1_SVmfuAKXV1o9K8XqiaFLxm2ae-EJ5_AJcwogcI_d9btuf_fjbjFOMNpN" 26 | posts = api2.userPosts(userID=userID, secUID=secUID, custom_verifyFp="verify_kljnnr1d_aPqkxu8I_TXtT_4xO8_8zE5_jEkg97g2DRqO") 27 | 28 | for tiktok in trending1: 29 | # Prints the id of the tiktok 30 | print(tiktok['id']) 31 | 32 | print(len(trending1)) 33 | 34 | for tiktok in trending2: 35 | # Prints the id of the tiktok 36 | print(tiktok['id']) 37 | 38 | print(len(trending2)) -------------------------------------------------------------------------------- /Testing/BlockedProxyHandling.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import seleniumwire 4 | 5 | from seleniumwire import webdriver 6 | from pathlib import Path 7 | 8 | 9 | 10 | def start_session(): 11 | proxy = { 12 | 'proxy_username': 'PLACEHOLDER', 'proxy_password': 'PLACEHOLDER', 13 | 'proxy_host': 'PLACEHOLDER', 'proxy_port': 'PLACEHOLDER', 14 | 'country': 'FR' 15 | } 16 | try: 17 | # bypassing detection of automated software testing 18 | chrome_options = webdriver.ChromeOptions() 19 | chrome_options.add_argument('--disable-blink-features=AutomationControlled') 20 | chrome_options.add_argument('--lang={browser_language}'.format(browser_language='en')) 21 | 22 | # open incognito page to remove any different_posts_noise from tracked cookies or browsing history, according to paper from 23 | # Aniko Hannak et. al. 24 | chrome_options.add_argument('incognito') 25 | 26 | # use proxy if provided: 27 | options = {} 28 | if proxy is not None: 29 | url = "{proxy_username}:{proxy_password}@{proxy_host}:{proxy_port}".format( 30 | proxy_username=proxy['proxy_username'], proxy_password=proxy['proxy_password'], 31 | proxy_host=proxy['proxy_host'], proxy_port=proxy['proxy_port']) 32 | options = { 33 | 'proxy': { 34 | 'http': 'http://' + url, 35 | 'https': 'https://' + url, 36 | 'no_proxy': 'localhost,127.0.0.1' 37 | } 38 | } 39 | 40 | # initializing web driver 41 | base_path = Path(__file__).parent 42 | file_path = (base_path / "../utilities/chromedriver.exe").resolve() 43 | driver = webdriver.Chrome(chrome_options=chrome_options, seleniumwire_options=options, 44 | executable_path=file_path) 45 | driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") 46 | driver.get('https://m.tiktok.com') 47 | except (ConnectionAbortedError, seleniumwire.thirdparty.mitmproxy.exceptions.TcpDisconnect) as err: 48 | print(err) 49 | print('\n New driver session with new proxy initialized.') 50 | # here I would run some new code to create a new session with another proxy address 51 | 52 | 53 | if __name__ == "__main__": 54 | start_session() -------------------------------------------------------------------------------- /Testing/ParalleliseTesting.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | 3 | from src.WebHelper import * 4 | from src.Proxy import * 5 | from src.DataStoring import * 6 | from src.TestRun import TestRun 7 | 8 | base_path = Path(__file__).parent 9 | 10 | def get_test_data(): 11 | database = DatabaseHelper() 12 | file_path = (base_path / "../Testing/TestSets/test_user_167.json").resolve() 13 | with open(file_path) as file: 14 | test_json = json.load(file) 15 | 16 | # test_data with settings from database 17 | # code obtains all details from json & database to initiliaze test 18 | test_data = [] 19 | for test_set in test_json: 20 | if test_json.get(test_set).get('login'): 21 | test_json.get(test_set)['phone_number'] = database.get_phone_number(test_user_id=test_json.get( 22 | test_set).get('test_user_id')) 23 | test_json.get(test_set)['country_phone_number_prefix'] = database.get_country_phone_number_prefix( 24 | test_user_id=test_json.get(test_set).get('test_user_id')) 25 | test_json.get(test_set)['proxy'] = { 26 | "proxy_username": "PLACEHOLDER", "proxy_password": "PLACEHOLDER", 27 | "proxy_host": database.get_proxy_host(test_user_id=test_json.get(test_set).get('test_user_id')), 28 | "proxy_port": database.get_proxy_port(test_user_id=test_json.get(test_set).get('test_user_id')), 29 | "country": database.get_proxy_country(test_user_id=test_json.get(test_set).get('test_user_id'))} 30 | test_data.append(test_json.get(test_set)) 31 | return test_data 32 | 33 | # test the test's setting data 34 | # use this code section to initialize chrome session and login newly created user for the first time, to then 35 | # manually complete the registration process 36 | # test_data = [test_json.get('167')] 37 | # for test_set in test_data: 38 | # if test_set.get('login'): 39 | # test_set['phone_number'] = database.get_phone_number(test_user_id=test_set.get('test_user_id')) 40 | # test_set['country_phone_number_prefix'] = database.get_country_phone_number_prefix( 41 | # test_user_id=test_set.get('test_user_id')) 42 | # test_set['proxy'] = { 43 | # "proxy_username": "PLACEHOLDER", "proxy_password": "PLACEHOLDER", 44 | # "proxy_host": database.get_proxy_host(test_user_id=167), 45 | # "proxy_port": database.get_proxy_port(test_user_id=167), 46 | # "country": database.get_proxy_country(test_user_id=167) 47 | # } 48 | # # c1 = 'US' 49 | # # test_set['proxy'] = {"proxy_username": "PLACEHOLDER", "proxy_password": "PLACEHOLDER", 50 | # # 'proxy_host': 'PLACEHOLDER', 'proxy_port': 'PLACEHOLDER', 'country': c1} 51 | # return test_data 52 | 53 | # for account creation purposes: 54 | # cur = [test_data[0]] 55 | # print(cur[0].get('test_user_id')) 56 | # return cur 57 | 58 | 59 | # main function initializing all different steps within one test iteration 60 | def run_test(test_data): 61 | # setting start time 62 | start = time.time() 63 | 64 | # initializing logger 65 | file_path = (base_path / f"../DataAnalysis/console_logs/console_log_{test_data.get('test_run_id')}_user_" 66 | f"{test_data.get('test_user_id')}.log").resolve() 67 | logging.basicConfig(filename=file_path, filemode='w') 68 | logger = logging.getLogger() 69 | logger.setLevel(logging.WARNING) 70 | logger.warning(f'Starting execution for testuser {test_data.get("test_user_id")}.') 71 | 72 | # initializing DatabaseHelper() object only once for test run 73 | database = DatabaseHelper() 74 | 75 | # initializing helper instance 76 | helper = WebHelper(test_user_id=test_data.get('test_user_id'), 77 | test_run_id=test_data.get('test_run_id'), 78 | logger=logger, 79 | database=database, 80 | phone_number=test_data.get('phone_number'), 81 | country_phone_number_prefix=test_data.get('country_phone_number_prefix'), 82 | reuse_cookies=test_data.get('reuse_cookies'), 83 | proxy=test_data.get('proxy'), 84 | browser_language=test_data.get("browser_language")) 85 | 86 | # triggering login for user via phone number only if "login" set true in test_data 87 | if test_data.get('login'): 88 | helper.login_user_phone() 89 | helper.handle_banners() 90 | 91 | # trigger handling of banners 92 | helper.handle_banners() 93 | 94 | # pause video until actually watching 95 | helper.pause_video() 96 | 97 | # pause first video 98 | if test_data.get('collecting_data_for_first_posts'): 99 | helper.handle_banners() 100 | helper.pause_video() 101 | 102 | # set cookies if applicable 103 | if test_data.get('reuse_cookies'): 104 | helper.set_cookies() 105 | 106 | # define number of batches to scroll through 107 | if len(test_data.get('number_of_posts_to_like_per_batch')) != 0 \ 108 | or len(test_data.get('number_of_creators_to_follow_per_batch')) != 0 \ 109 | or len(test_data.get('number_of_posts_to_watch_longer_per_batch')): 110 | if test_data.get('number_of_batches') != max(len(test_data.get('number_of_posts_to_like_per_batch')), 111 | len(test_data.get('number_of_creators_to_follow_per_batch')), 112 | len(test_data.get('number_of_posts_to_watch_longer_per_batch'))): 113 | raise Exception("Number of batches to scroll through doesn't match!") 114 | else: 115 | number_of_batches = test_data.get('number_of_batches') 116 | else: 117 | number_of_batches = test_data.get('number_of_batches') 118 | 119 | # initializing data storing instance 120 | data_storing = DataStoring(helper=helper, 121 | logger=logger, 122 | database=database, 123 | number_of_batches=number_of_batches, 124 | test_user_id=test_data.get('test_user_id'), 125 | test_run_id=test_data.get('test_run_id')) 126 | 127 | # trigger handling of banners 128 | helper.handle_banners() 129 | 130 | # handling first set of posts 131 | data_storing.get_separate_posts_data(collecting_data_for_first_posts=test_data.get("collecting_data_for_first_posts")) 132 | 133 | # handling remaining posts, scrolling through batches 134 | data_storing.get_request_posts_data(time_to_look_at_post_action=test_data.get('time_to_look_at_post_action'), 135 | time_to_look_at_post_normal=test_data.get('time_to_look_at_post_normal'), 136 | number_of_posts_to_like_per_batch=test_data.get('number_of_posts_to_like_per_batch'), 137 | number_of_creators_to_follow_per_batch=test_data.get('number_of_creators_to_follow_per_batch'), 138 | number_of_posts_to_watch_longer_per_batch=test_data.get('number_of_posts_to_watch_longer_per_batch'), 139 | posts_with_hashtag_to_like=test_data.get('posts_with_hashtag_to_like'), 140 | posts_with_hashtag_to_watch_longer=test_data.get('posts_with_hashtag_to_watch_longer'), 141 | posts_of_content_creators_to_like=test_data.get('posts_of_content_creators_to_like'), 142 | posts_of_music_ids_to_like=test_data.get('posts_of_music_ids_to_like')) 143 | 144 | # commencing shut down of test run: unflagging used proxy, closing driver, storing collected data, computing 145 | # duration and storing it for corresponding testrun 146 | helper.close_driver() 147 | helper.database.unflag_proxy(proxy_host=test_data['proxy']['proxy_host'], 148 | proxy_port=test_data['proxy']['proxy_port']) 149 | data_storing.store_collected_data() 150 | duration = time.time() - start 151 | test_data['duration'] = (duration / 60) 152 | logger.warning(f'Execution for testuser {test_data.get("test_user_id")} completed in {duration} seconds ' 153 | f'({duration / 60} minutes).') 154 | return test_data 155 | 156 | 157 | if __name__ == '__main__': 158 | tests = get_test_data() 159 | 160 | # crete test run object with given test data and run tests in parallel 161 | with TestRun(test_data=tests) as test_run: 162 | for test in tests: 163 | test['test_run_id'] = test_run.test_run_id 164 | with concurrent.futures.ProcessPoolExecutor() as executor: 165 | test_data_results = executor.map(run_test, tests) 166 | test_user_ids = [] 167 | batch_size = 0 168 | 169 | # obtain test results and store them accordingly 170 | for test in test_data_results: 171 | test_run.store_test_duration(duration=test.get('duration'), test_user_id=test.get('test_user_id')) 172 | test_user_ids.append(test.get('test_user_id')) 173 | batch_size = test.get('number_of_batches') 174 | # update analysis table 175 | # update_overlapping_post_test_results_with_new_values(test_run=test_run.test_run_id, test_users=test_user_ids, 176 | # batch_size=batch_size) 177 | -------------------------------------------------------------------------------- /Testing/TestInitializer.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | import time 5 | 6 | import psycopg2 as psycopg2 7 | from selenium import webdriver 8 | from selenium.webdriver.common.by import By 9 | from bs4 import BeautifulSoup 10 | from selenium.webdriver.common.keys import Keys 11 | 12 | from src.WebHelper import * 13 | from src.Proxy import * 14 | from src.DataStoring import * 15 | from src.TestRun import TestRun 16 | 17 | # countries = ['US', 'GB', 'FR', 'DE', 'CA', 'CH', 'CH'] 18 | # c1 = 'DE' 19 | # c2 = 'DE' 20 | # proxy_auth_username = 'auamyynt-dest' 21 | # proxy_auth_password = 'j5u77rwhbdnj' 22 | # proxy_host1, proxy_port1 = get_db_proxy(c1) 23 | # proxy_host2, proxy_port2 = get_db_proxy(c2, {'proxy_host': proxy_host1, 'proxy_port': proxy_port1}) 24 | 25 | # test_data = { 26 | # "testuserinfo": [ 27 | # {"testuserid": 4, "email": "bertman@mailinator.com", "password": "%J0ftE999yQVg2", 28 | # "browser_language": "de", "proxy": 29 | # { 30 | # 'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password, 31 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1, 32 | # 'country': c1 33 | # } 34 | # }, 35 | # {"testuserid": 5, "email": "loc2021@mailinator.com", "password": "%@NreeHIwb*55O5@zD48", 36 | # "browser_language": "tr", "proxy": 37 | # { 38 | # 'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password, 39 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1, 40 | # 'country': c1 41 | # } 42 | # }, 43 | # {"testuserid": 8, "email": "loc2021@mailinator.com", "password": "%@NreeHIwb*55O5@zD48", 44 | # "browser_language": "en", "proxy": 45 | # { 46 | # 'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password, 47 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1, 48 | # 'country': c1 49 | # } 50 | # }, 51 | # {"testuserid": 9, "email": "loc2021@mailinator.com", "password": "%@NreeHIwb*55O5@zD48", 52 | # "browser_language": "es", "proxy": 53 | # { 54 | # 'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password, 55 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1, 56 | # 'country': c1 57 | # } 58 | # } 59 | # ], 60 | # "description": "same location, different languages, no user accounts"} 61 | # 62 | # test_data_2 = { 63 | # "testuserinfo": [ 64 | # {"testuserid": 5, "email": "loc2021@mailinator.com", "password": "%@NreeHIwb*55O5@zD48", 65 | # "browser_language": "tr", "proxy": 66 | # { 67 | # 'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password, 68 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1, 69 | # 'country': c1 70 | # } 71 | # }, 72 | # ], 73 | # "description": "same location, different languages, no user accounts"} 74 | 75 | c1 = 'US' 76 | c2 = 'CA' 77 | proxy_auth_username = 'PLACEHOLDER' 78 | proxy_auth_password = 'PLACEHOLDER' 79 | proxy_host1, proxy_port1 = get_db_proxy(c1) 80 | # test_data = {"testuserid": 35, "phone_number": "7862148574", "password": "IOw2z*W282&X", "browser_language": "en", 81 | # "country_phone_number_prefix": "United States", "time_to_look_at_post": 2, 82 | # "number_of_posts_to_like_per_batch": [0, 0, 0], "collecting_data_for_first_posts": True, 83 | # "proxy": 84 | # {'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password, 85 | # 'proxy_host': '185.95.157.159', 'proxy_port': '6180', 'country': c1} 86 | # } 87 | 88 | test_data = [ 89 | {"test_user_id": 11, "phone_number": "5039664089", "password": "IOw2z*W282&X", "browser_language": "en", 90 | "country_phone_number_prefix": "United States", "time_to_look_at_post": 2, 91 | "number_of_posts_to_like_per_batch": [15, 5], "collecting_data_for_first_posts": False, 92 | "proxy": 93 | {'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password, 94 | 'proxy_host': proxy_host1, 'proxy_port': proxy_port1, 'country': c1} 95 | } 96 | # {"test_user_id": 15, "phone_number": "1798297886", "password": "k@pywYE7l8", "browser_language": "en", 97 | # "country_phone_number_prefix": "Germany", "time_to_look_at_post": 2, 98 | # "number_of_posts_to_like_per_batch": [0, 0, 0], "collecting_data_for_first_posts": False, 99 | # "proxy": 100 | # {'proxy_username': proxy_auth_username, 'proxy_password': proxy_auth_password, 101 | # 'proxy_host': proxy_host1, 'proxy_port': proxy_port1, 'country': c1} 102 | # } 103 | ] 104 | 105 | 106 | with TestRun(test_data=test_data) as test_run: 107 | start = time.time() 108 | test_data = test_data[0] 109 | test_data['test_run_id'] = test_run.test_run_id 110 | base_path = Path(__file__).parent 111 | file_path = (base_path / f"../Data Analysis/console_logs/console_log_{test_data.get('test_run_id')}_user_" 112 | f"{test_data.get('test_user_id')}.log").resolve() 113 | logging.basicConfig(filename=file_path, format='%(asctime)s %(message)s', filemode='w') 114 | logger = logging.getLogger() 115 | logger.setLevel(logging.WARNING) 116 | logger.info(f'Starting execution for testuser {test_data.get("test_user_id")}.') 117 | helper = WebHelper(test_user_id=test_data.get('test_user_id'), 118 | test_run_id=test_data.get('test_run_id'), 119 | logger=logger, 120 | proxy=test_data.get('proxy'), 121 | browser_language=test_data.get("browser_language")) 122 | helper.login_user_phone(test_data.get('phone_number'), test_data.get('country_phone_number_prefix')) 123 | data_storing = DataStoring(helper=helper, 124 | logger=logger, 125 | number_of_batches=len(test_data.get('number_of_posts_to_like_per_batch')), 126 | test_user_id=test_data.get('test_user_id'), 127 | test_run_id=test_data.get('test_run_id')) 128 | data_storing.get_separate_posts_data(collecting_data_for_first_posts=test_data.get("collecting_data_for_first_posts")) 129 | data_storing.get_request_posts_data(test_data.get('time_to_look_at_post'), 130 | test_data.get('number_of_posts_to_like_per_batch')) 131 | helper.database.unflag_proxy(proxy_host=test_data['proxy']['proxy_host'], 132 | proxy_port=test_data['proxy']['proxy_port']) 133 | helper.close_driver() 134 | data_storing.store_collected_data() 135 | duration = time.time() - start 136 | test_data['duration'] = (duration / 60) 137 | logger.info(f'Execution for testuser {test_data.get("test_user_id")} completed in {duration} seconds ' 138 | f'({duration / 60} minutes).') 139 | 140 | -------------------------------------------------------------------------------- /Testing/TestSets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/Testing/TestSets/.DS_Store -------------------------------------------------------------------------------- /Testing/TestSets/cg_us_user-165-166.json: -------------------------------------------------------------------------------- 1 | { 2 | "165": { 3 | "test_user_id": 165, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0, 8 | "time_to_look_at_post_normal": 0.5, 9 | "number_of_batches": 20, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_with_hashtag_to_like": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "166": { 20 | "test_user_id": 166, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": false, 24 | "time_to_look_at_post_action": 0, 25 | "time_to_look_at_post_normal": 0.5, 26 | "number_of_batches": 20, 27 | "number_of_posts_to_like_per_batch": [], 28 | "number_of_creators_to_follow_per_batch": [], 29 | "number_of_posts_to_watch_longer_per_batch": [], 30 | "posts_with_hashtag_to_watch_longer": [], 31 | "posts_with_hashtag_to_like": [], 32 | "posts_of_content_creators_to_like": [], 33 | "posts_of_music_ids_to_like": [], 34 | "collecting_data_for_first_posts": false 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_ca_user-119-120.json: -------------------------------------------------------------------------------- 1 | { 2 | "119": { 3 | "test_user_id": 119, 4 | "login": false, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 8 | "number_of_batches": 3, 9 | "number_of_posts_to_like_per_batch": [], 10 | "number_of_creators_to_follow_per_batch": [], 11 | "number_of_posts_to_watch_longer_per_batch": [], 12 | "posts_with_hashtag_to_watch_longer": [], 13 | "posts_with_hashtag_to_like": [], 14 | "posts_of_content_creators_to_like": [], 15 | "posts_of_music_ids_to_like": [], 16 | "collecting_data_for_first_posts": true 17 | }, 18 | "120": { 19 | "test_user_id": 120, 20 | "login": false, 21 | "browser_language": "en", 22 | "reuse_cookies": false, 23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 24 | "number_of_batches": 3, 25 | "number_of_posts_to_like_per_batch": [], 26 | "number_of_creators_to_follow_per_batch": [], 27 | "number_of_posts_to_watch_longer_per_batch": [], 28 | "posts_with_hashtag_to_watch_longer": [], 29 | "posts_with_hashtag_to_like": [], 30 | "posts_of_content_creators_to_like": [], 31 | "posts_of_music_ids_to_like": [], 32 | "collecting_data_for_first_posts": true 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_ca_user-121-122.json: -------------------------------------------------------------------------------- 1 | { 2 | "121": { 3 | "test_user_id": 121, 4 | "login": false, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": false 16 | }, 17 | "122": { 18 | "test_user_id": 122, 19 | "login": false, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": false 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_fr_user-55-56.json: -------------------------------------------------------------------------------- 1 | { 2 | "55": { 3 | "test_user_id": 55, 4 | "login": false, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 5, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "posts_with_hashtag_to_like": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_of_content_creators_to_like": [], 13 | "posts_of_music_ids_to_like": [], 14 | "collecting_data_for_first_posts": true 15 | }, 16 | "56": { 17 | "test_user_id": 56, 18 | "login": false, 19 | "browser_language": "en", 20 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 21 | "number_of_batches": 5, 22 | "number_of_posts_to_like_per_batch": [], 23 | "number_of_creators_to_follow_per_batch": [], 24 | "posts_with_hashtag_to_like": [], 25 | "posts_with_hashtag_to_watch_longer": [], 26 | "posts_of_content_creators_to_like": [], 27 | "posts_of_music_ids_to_like": [], 28 | "collecting_data_for_first_posts": true 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_fr_user-65-67.json: -------------------------------------------------------------------------------- 1 | { 2 | "65": { 3 | "test_user_id": 65, 4 | "login": false, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 5, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": false 16 | }, 17 | "67": { 18 | "test_user_id": 67, 19 | "login": false, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 5, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": false 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_gb_user-68-69.json: -------------------------------------------------------------------------------- 1 | { 2 | "68": { 3 | "test_user_id": 68, 4 | "login": false, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 8 | "number_of_batches": 5, 9 | "number_of_posts_to_like_per_batch": [], 10 | "number_of_creators_to_follow_per_batch": [], 11 | "number_of_posts_to_watch_longer_per_batch": [], 12 | "posts_with_hashtag_to_watch_longer": [], 13 | "posts_with_hashtag_to_like": [], 14 | "posts_of_content_creators_to_like": [], 15 | "posts_of_music_ids_to_like": [], 16 | "collecting_data_for_first_posts": false 17 | }, 18 | "69": { 19 | "test_user_id": 69, 20 | "login": false, 21 | "browser_language": "en", 22 | "reuse_cookies": false, 23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 24 | "number_of_batches": 5, 25 | "number_of_posts_to_like_per_batch": [], 26 | "number_of_creators_to_follow_per_batch": [], 27 | "number_of_posts_to_watch_longer_per_batch": [], 28 | "posts_with_hashtag_to_watch_longer": [], 29 | "posts_with_hashtag_to_like": [], 30 | "posts_of_content_creators_to_like": [], 31 | "posts_of_music_ids_to_like": [], 32 | "collecting_data_for_first_posts": false 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-125-126.json: -------------------------------------------------------------------------------- 1 | { 2 | "125": { 3 | "test_user_id": 125, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "126": { 18 | "test_user_id": 126, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-137-138.json: -------------------------------------------------------------------------------- 1 | { 2 | "137": { 3 | "test_user_id": 137, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": false 16 | }, 17 | "138": { 18 | "test_user_id": 138, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": false 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-139-140.json: -------------------------------------------------------------------------------- 1 | { 2 | "139": { 3 | "test_user_id": 139, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "140": { 18 | "test_user_id": 140, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-141-142.json: -------------------------------------------------------------------------------- 1 | { 2 | "141": { 3 | "test_user_id": 141, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": false 16 | }, 17 | "142": { 18 | "test_user_id": 142, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": false 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-143-144.json: -------------------------------------------------------------------------------- 1 | { 2 | "143": { 3 | "test_user_id": 143, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 8 | "number_of_batches": 3, 9 | "number_of_posts_to_like_per_batch": [], 10 | "number_of_creators_to_follow_per_batch": [], 11 | "number_of_posts_to_watch_longer_per_batch": [], 12 | "posts_with_hashtag_to_watch_longer": [], 13 | "posts_with_hashtag_to_like": [], 14 | "posts_of_content_creators_to_like": [], 15 | "posts_of_music_ids_to_like": [], 16 | "collecting_data_for_first_posts": false 17 | }, 18 | "144": { 19 | "test_user_id": 144, 20 | "login": true, 21 | "browser_language": "en", 22 | "reuse_cookies": false, 23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 24 | "number_of_batches": 3, 25 | "number_of_posts_to_like_per_batch": [], 26 | "number_of_creators_to_follow_per_batch": [], 27 | "number_of_posts_to_watch_longer_per_batch": [], 28 | "posts_with_hashtag_to_watch_longer": [], 29 | "posts_with_hashtag_to_like": [], 30 | "posts_of_content_creators_to_like": [], 31 | "posts_of_music_ids_to_like": [], 32 | "collecting_data_for_first_posts": false 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-147-148.json: -------------------------------------------------------------------------------- 1 | { 2 | "147": { 3 | "test_user_id": 147, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": true, 7 | "time_to_look_at_post_action": 0, 8 | "time_to_look_at_post_normal": 0.5, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_with_hashtag_to_like": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "148": { 20 | "test_user_id": 148, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": true, 24 | "time_to_look_at_post_action": 0, 25 | "time_to_look_at_post_normal": 0.5, 26 | "number_of_batches": 3, 27 | "number_of_posts_to_like_per_batch": [], 28 | "number_of_creators_to_follow_per_batch": [], 29 | "number_of_posts_to_watch_longer_per_batch": [], 30 | "posts_with_hashtag_to_watch_longer": [], 31 | "posts_with_hashtag_to_like": [], 32 | "posts_of_content_creators_to_like": [], 33 | "posts_of_music_ids_to_like": [], 34 | "collecting_data_for_first_posts": false 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-149-150.json: -------------------------------------------------------------------------------- 1 | { 2 | "149": { 3 | "test_user_id": 149, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": true, 7 | "time_to_look_at_post_action": 0, 8 | "time_to_look_at_post_normal": 0.5, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_with_hashtag_to_like": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "150": { 20 | "test_user_id": 150, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": true, 24 | "time_to_look_at_post_action": 0, 25 | "time_to_look_at_post_normal": 0.5, 26 | "number_of_batches": 3, 27 | "number_of_posts_to_like_per_batch": [], 28 | "number_of_creators_to_follow_per_batch": [], 29 | "number_of_posts_to_watch_longer_per_batch": [], 30 | "posts_with_hashtag_to_watch_longer": [], 31 | "posts_with_hashtag_to_like": [], 32 | "posts_of_content_creators_to_like": [], 33 | "posts_of_music_ids_to_like": [], 34 | "collecting_data_for_first_posts": false 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-161-162.json: -------------------------------------------------------------------------------- 1 | { 2 | "161": { 3 | "test_user_id": 161, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": true, 7 | "time_to_look_at_post_action": 0, 8 | "time_to_look_at_post_normal": 0.5, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_with_hashtag_to_like": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "162": { 20 | "test_user_id": 162, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": true, 24 | "time_to_look_at_post_action": 0, 25 | "time_to_look_at_post_normal": 0.5, 26 | "number_of_batches": 3, 27 | "number_of_posts_to_like_per_batch": [], 28 | "number_of_creators_to_follow_per_batch": [], 29 | "number_of_posts_to_watch_longer_per_batch": [], 30 | "posts_with_hashtag_to_watch_longer": [], 31 | "posts_with_hashtag_to_like": [], 32 | "posts_of_content_creators_to_like": [], 33 | "posts_of_music_ids_to_like": [], 34 | "collecting_data_for_first_posts": false 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-57-58.json: -------------------------------------------------------------------------------- 1 | { 2 | "57": { 3 | "test_user_id": 57, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 5, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "posts_with_hashtag_to_like": [], 11 | "posts_of_content_creators_to_like": [], 12 | "posts_of_music_ids_to_like": [], 13 | "collecting_data_for_first_posts": true 14 | }, 15 | "58": { 16 | "test_user_id": 58, 17 | "login": true, 18 | "browser_language": "en", 19 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 20 | "number_of_batches": 5, 21 | "number_of_posts_to_like_per_batch": [], 22 | "number_of_creators_to_follow_per_batch": [], 23 | "posts_with_hashtag_to_like": [], 24 | "posts_of_content_creators_to_like": [], 25 | "posts_of_music_ids_to_like": [], 26 | "collecting_data_for_first_posts": true 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-72-73.json: -------------------------------------------------------------------------------- 1 | { 2 | "72": { 3 | "test_user_id": 72, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 8 | "number_of_batches": 5, 9 | "number_of_posts_to_like_per_batch": [], 10 | "number_of_creators_to_follow_per_batch": [], 11 | "number_of_posts_to_watch_longer_per_batch": [], 12 | "posts_with_hashtag_to_watch_longer": [], 13 | "posts_with_hashtag_to_like": [], 14 | "posts_of_content_creators_to_like": [], 15 | "posts_of_music_ids_to_like": [], 16 | "collecting_data_for_first_posts": true 17 | }, 18 | "73": { 19 | "test_user_id": 73, 20 | "login": true, 21 | "browser_language": "en", 22 | "reuse_cookies": false, 23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 24 | "number_of_batches": 5, 25 | "number_of_posts_to_like_per_batch": [], 26 | "number_of_creators_to_follow_per_batch": [], 27 | "number_of_posts_to_watch_longer_per_batch": [], 28 | "posts_with_hashtag_to_watch_longer": [], 29 | "posts_with_hashtag_to_like": [], 30 | "posts_of_content_creators_to_like": [], 31 | "posts_of_music_ids_to_like": [], 32 | "collecting_data_for_first_posts": true 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-74-75.json: -------------------------------------------------------------------------------- 1 | { 2 | "74": { 3 | "test_user_id": 74, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 5, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": false 16 | }, 17 | "75": { 18 | "test_user_id": 75, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 5, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": false 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-93-94.json: -------------------------------------------------------------------------------- 1 | { 2 | "93": { 3 | "test_user_id": 93, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 5, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "94": { 18 | "test_user_id": 94, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 5, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/cg_us_user-95-96.json: -------------------------------------------------------------------------------- 1 | { 2 | "95": { 3 | "test_user_id": 95, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 5, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": false 16 | }, 17 | "96": { 18 | "test_user_id": 96, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 5, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": false 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/follow_gb_user-51-52.json: -------------------------------------------------------------------------------- 1 | { 2 | "51": { 3 | "test_user_id": 51, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 8 | "number_of_batches": 3, 9 | "number_of_posts_to_like_per_batch": [], 10 | "number_of_creators_to_follow_per_batch": [0, 0, 0], 11 | "number_of_posts_to_watch_longer_per_batch": [], 12 | "posts_with_hashtag_to_watch_longer": [], 13 | "posts_with_hashtag_to_like": [], 14 | "posts_of_content_creators_to_like": [], 15 | "posts_of_music_ids_to_like": [], 16 | "collecting_data_for_first_posts": true 17 | }, 18 | "52": { 19 | "test_user_id": 52, 20 | "login": true, 21 | "browser_language": "en", 22 | "reuse_cookies": false, 23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 24 | "number_of_batches": 3, 25 | "number_of_posts_to_like_per_batch": [], 26 | "number_of_creators_to_follow_per_batch": [], 27 | "number_of_posts_to_watch_longer_per_batch": [], 28 | "posts_with_hashtag_to_watch_longer": [], 29 | "posts_with_hashtag_to_like": [], 30 | "posts_of_content_creators_to_like": [], 31 | "posts_of_music_ids_to_like": [], 32 | "collecting_data_for_first_posts": true 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/follow_gb_user-53-54.json: -------------------------------------------------------------------------------- 1 | { 2 | "53": { 3 | "test_user_id": 53, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [0, 1, 0], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": false 16 | }, 17 | "54": { 18 | "test_user_id": 54, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": false 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/follow_us_user-153-154.json: -------------------------------------------------------------------------------- 1 | { 2 | "153": { 3 | "test_user_id": 153, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": true, 7 | "time_to_look_at_post_action": 0, 8 | "time_to_look_at_post_normal": 0.5, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [0, 0, 0], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_like": [], 14 | "posts_with_hashtag_to_watch_longer": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": true 18 | }, 19 | "154": { 20 | "test_user_id": 154, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": true, 24 | "time_to_look_at_post_action": 0, 25 | "time_to_look_at_post_normal": 0.5, 26 | "number_of_batches": 3, 27 | "number_of_posts_to_like_per_batch": [], 28 | "number_of_creators_to_follow_per_batch": [], 29 | "number_of_posts_to_watch_longer_per_batch": [], 30 | "posts_with_hashtag_to_like": [], 31 | "posts_with_hashtag_to_watch_longer": [], 32 | "posts_of_content_creators_to_like": [], 33 | "posts_of_music_ids_to_like": [], 34 | "collecting_data_for_first_posts": true 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/follow_us_user-155-156.json: -------------------------------------------------------------------------------- 1 | { 2 | "155": { 3 | "test_user_id": 155, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": true, 7 | "time_to_look_at_post_action": 0, 8 | "time_to_look_at_post_normal": 0.5, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [0, 1, 0], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_like": [], 14 | "posts_with_hashtag_to_watch_longer": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "156": { 20 | "test_user_id": 156, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": true, 24 | "time_to_look_at_post_action": 0, 25 | "time_to_look_at_post_normal": 0.5, 26 | "number_of_batches": 3, 27 | "number_of_posts_to_like_per_batch": [], 28 | "number_of_creators_to_follow_per_batch": [], 29 | "number_of_posts_to_watch_longer_per_batch": [], 30 | "posts_with_hashtag_to_like": [], 31 | "posts_with_hashtag_to_watch_longer": [], 32 | "posts_of_content_creators_to_like": [], 33 | "posts_of_music_ids_to_like": [], 34 | "collecting_data_for_first_posts": false 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/follow_us_user-47-48.json: -------------------------------------------------------------------------------- 1 | { 2 | "47": { 3 | "test_user_id": 47, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [0, 0, 0], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "48": { 18 | "test_user_id": 48, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/follow_us_user-49-50.json: -------------------------------------------------------------------------------- 1 | { 2 | "49": { 3 | "test_user_id": 49, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [0, 0, 0], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_like": [], 12 | "posts_with_hashtag_to_watch_longer": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": false 16 | }, 17 | "50": { 18 | "test_user_id": 50, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_like": [], 27 | "posts_with_hashtag_to_watch_longer": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": false 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_gb_user-61-62.json: -------------------------------------------------------------------------------- 1 | { 2 | "61": { 3 | "test_user_id": 61, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 5, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": ["cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute", "puppy", "dogs", 13 | "cats", "animals", "petsoftiktok", "kitten"], 14 | "posts_of_content_creators_to_like": [], 15 | "posts_of_music_ids_to_like": [], 16 | "collecting_data_for_first_posts": false 17 | }, 18 | "62": { 19 | "test_user_id": 62, 20 | "login": true, 21 | "browser_language": "en", 22 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 23 | "number_of_batches": 5, 24 | "number_of_posts_to_like_per_batch": [], 25 | "number_of_creators_to_follow_per_batch": [], 26 | "number_of_posts_to_watch_longer_per_batch": [], 27 | "posts_with_hashtag_to_watch_longer": [], 28 | "posts_with_hashtag_to_like": [], 29 | "posts_of_content_creators_to_like": [], 30 | "posts_of_music_ids_to_like": [], 31 | "collecting_data_for_first_posts": false 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_gb_user-63-64.json: -------------------------------------------------------------------------------- 1 | { 2 | "63": { 3 | "test_user_id": 63, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0,"time_to_look_at_post_normal": 0.5, 8 | "number_of_batches": 5, 9 | "number_of_posts_to_like_per_batch": [], 10 | "number_of_creators_to_follow_per_batch": [], 11 | "number_of_posts_to_watch_longer_per_batch": [], 12 | "posts_with_hashtag_to_like": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film", 13 | "tiktokfood", "gta5", "gta", "minecraft", "marvel"], 14 | "posts_with_hashtag_to_watch_longer": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "64": { 20 | "test_user_id": 64, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": false, 24 | "time_to_look_at_post_action": 0,"time_to_look_at_post_normal": 0.5, 25 | "number_of_batches": 5, 26 | "number_of_posts_to_like_per_batch": [], 27 | "number_of_creators_to_follow_per_batch": [], 28 | "number_of_posts_to_watch_longer_per_batch": [], 29 | "posts_with_hashtag_to_like": [], 30 | "posts_with_hashtag_to_watch_longer": [], 31 | "posts_of_content_creators_to_like": [], 32 | "posts_of_music_ids_to_like": [], 33 | "collecting_data_for_first_posts": false 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_us_user-111-112.json: -------------------------------------------------------------------------------- 1 | { 2 | "111": { 3 | "test_user_id": 111, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": ["miakhalifa", "alex.stemp", "coco224466", "....jjesus", "lama_mama1", "espn", 14 | "pilaui", "basii_17", "spicekingcam", "the_grinchofficial", "petassembly", "texasbeeworks", "littlecajunhouse", 15 | "iamdivinelyloved", "camwilder", "badparentingmoments", "dianarantamaki", "dm_t.v", "majorkeylife", "thejoshelkin", 16 | "jacob_t_king", "user4350486101671", "billieeilish", "jackblack", "dermdoctor", "aymieandgracie", "copslivetv", 17 | "daddygus99", "joe.bartolozzi", "_.video_.edits.23", "leytink", "n.ikotheking", "paulana52", "kallmekris", 18 | "iamjonathanpeter", "mndiaye_97", "genltart"], 19 | "posts_of_music_ids_to_like": [], 20 | "collecting_data_for_first_posts": true 21 | }, 22 | "112": { 23 | "test_user_id": 112, 24 | "login": true, 25 | "browser_language": "en", 26 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 27 | "number_of_batches": 3, 28 | "number_of_posts_to_like_per_batch": [], 29 | "number_of_creators_to_follow_per_batch": [], 30 | "number_of_posts_to_watch_longer_per_batch": [], 31 | "posts_with_hashtag_to_watch_longer": [], 32 | "posts_with_hashtag_to_like": [], 33 | "posts_of_content_creators_to_like": [], 34 | "posts_of_music_ids_to_like": [], 35 | "collecting_data_for_first_posts": true 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_us_user-113-114.json: -------------------------------------------------------------------------------- 1 | { 2 | "113": { 3 | "test_user_id": 113, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": ["miakhalifa", "alex.stemp", "southcentraljag", "user4350486101671", 14 | "spicekingcam", "joe.bartolozzi", "coco224466", "mndiaye_97", "billieeilish", "daddygus99", "edmundrambo", 15 | "feast24seven", "copslivetv", "pilaui", "dm_t.v", "papi_pilas", "kate_johansson", "dermdoctor", 16 | "patulafamilymcdonalds", "littlecajunhouse", "basii_17", "camwilder", "bonita_alonna", "moontellthat", 17 | "paulana52", "kallmekris", "n.ikotheking", "_.video_.edits.23", "....jjesus", "alivaheeronms", "sunflowertubie", 18 | "lama_mama1", "ace_dadd", "beforenafter13", "khaby.lame", "americanbullish", "iamjonathanpeter", 19 | "noah_and_lincoln", "mamalindy", "hudabeauty", "manchasthetiktoker", "aymieandgracie", "jesusacevedox43", 20 | "hypermilt2", "izzy.tube", "willsmith", "614lyfe", "anxietycouple", "campuzanoabelardo"], 21 | "posts_of_music_ids_to_like": [], 22 | "collecting_data_for_first_posts": true 23 | }, 24 | "114": { 25 | "test_user_id": 114, 26 | "login": true, 27 | "browser_language": "en", 28 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 29 | "number_of_batches": 3, 30 | "number_of_posts_to_like_per_batch": [], 31 | "number_of_creators_to_follow_per_batch": [], 32 | "number_of_posts_to_watch_longer_per_batch": [], 33 | "posts_with_hashtag_to_watch_longer": [], 34 | "posts_with_hashtag_to_like": [], 35 | "posts_of_content_creators_to_like": [], 36 | "posts_of_music_ids_to_like": [], 37 | "collecting_data_for_first_posts": true 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_us_user-115-116.json: -------------------------------------------------------------------------------- 1 | { 2 | "115": { 3 | "test_user_id": 115, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [ 15 | 6972655154873796610, 16 | 6973722766373570561, 17 | 6586947002464996102, 18 | 5000000001320781379, 19 | 222597111081832449, 20 | 6971562931527912197, 21 | 6964101918863969030, 22 | 6746993352891189249, 23 | 6851526062120110854, 24 | 6853205461995375365, 25 | 6967885968040889094, 26 | 6787142837377959937, 27 | 6769046027488987137, 28 | 6926087831404251909, 29 | 6755976952189814785, 30 | 6656534537050409734, 31 | 6947968309945993218, 32 | 6974092128280464133, 33 | 6926304768692456197, 34 | 6952604284408187654, 35 | 6851352741625809669, 36 | 6956662916695197697, 37 | 222453214057697280, 38 | 6981591741455436550, 39 | 242638364112424960, 40 | 6973004412159625989, 41 | 6983473612736957185, 42 | 6984961588641975046, 43 | 6690892217998985990, 44 | 6778795245078416134, 45 | 6601410777356176133, 46 | 6987878637571803910, 47 | 6982992938041117446, 48 | 6985976570515032838, 49 | 6902376363227891714, 50 | 6807984440287955717, 51 | 5000000000755653951, 52 | 6942182583350332165, 53 | 6841255111478478849, 54 | 246423207128506368, 55 | 6939813781522369282, 56 | 222522673426305024, 57 | 6845002070534245125, 58 | 6954740638508124162, 59 | 6932058866448336897, 60 | 6891115298016070402, 61 | 6971462283046210309, 62 | 6975199470258588421 63 | ], 64 | "collecting_data_for_first_posts": true 65 | }, 66 | "116": { 67 | "test_user_id": 116, 68 | "login": true, 69 | "browser_language": "en", 70 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 71 | "number_of_batches": 3, 72 | "number_of_posts_to_like_per_batch": [], 73 | "number_of_creators_to_follow_per_batch": [], 74 | "number_of_posts_to_watch_longer_per_batch": [], 75 | "posts_with_hashtag_to_watch_longer": [], 76 | "posts_with_hashtag_to_like": [], 77 | "posts_of_content_creators_to_like": [], 78 | "posts_of_music_ids_to_like": [], 79 | "collecting_data_for_first_posts": true 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_us_user-117-118.json: -------------------------------------------------------------------------------- 1 | { 2 | "117": { 3 | "test_user_id": 117, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [ 15 | 6586947002464996102, 16 | 6972655154873796610, 17 | 6787142837377959937, 18 | 6971562931527912197, 19 | 5000000001320781379, 20 | 6880156461125683973, 21 | 6756879114637967368, 22 | 6971625163129916161, 23 | 6769046027488987137, 24 | 6967885968040889094, 25 | 6981184452680633094, 26 | 6863280745893432069, 27 | 6973722766373570561, 28 | 6851526062120110854, 29 | 6964101918863969030, 30 | 6941153946308266757, 31 | 6955275373810666245, 32 | 6823997997395806977, 33 | 6961786056861550594, 34 | 6971462283046210309, 35 | 6937042766442580741, 36 | 6853205461995375365, 37 | 6967526237812132613, 38 | 250835551346647040, 39 | 6956662916695197697, 40 | 6980460053367163654, 41 | 6938232357673502722, 42 | 242638364112424960, 43 | 6906155392053544962, 44 | 6963878903706389254, 45 | 6823085155259828997, 46 | 6926087831404251909, 47 | 6979702143154473733, 48 | 6979478166331017990, 49 | 6980890639441349382, 50 | 6983862471790512902, 51 | 6983495999868259077, 52 | 6980715546278972165, 53 | 6601410777356176133, 54 | 6980886814055877381, 55 | 6974191473717742341, 56 | 6931747481009097477, 57 | 6987985525509229317, 58 | 6884523487445240578, 59 | 5000000000755653951, 60 | 6937723642352503557, 61 | 6739272387222702853, 62 | 6965640046342867717, 63 | 222522673426305024, 64 | 6947865088364414978, 65 | 6954740638508124162, 66 | 6947968180685899777, 67 | 6958283382199552773, 68 | 6974011130414893058, 69 | 6790057285126195201, 70 | 6967644755471764230, 71 | 6915181512233110277, 72 | 6705099686660802561, 73 | 6977516346384927493, 74 | 6656534537050409734, 75 | 6963002460872903430, 76 | 6879830140994489093 77 | ], 78 | "collecting_data_for_first_posts": true 79 | }, 80 | "118": { 81 | "test_user_id": 118, 82 | "login": true, 83 | "browser_language": "en", 84 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 85 | "number_of_batches": 3, 86 | "number_of_posts_to_like_per_batch": [], 87 | "number_of_creators_to_follow_per_batch": [], 88 | "number_of_posts_to_watch_longer_per_batch": [], 89 | "posts_with_hashtag_to_watch_longer": [], 90 | "posts_with_hashtag_to_like": [], 91 | "posts_of_content_creators_to_like": [], 92 | "posts_of_music_ids_to_like": [], 93 | "collecting_data_for_first_posts": true 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_us_user-123-124.json: -------------------------------------------------------------------------------- 1 | { 2 | "123": { 3 | "test_user_id": 123, 4 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 5 | "browser_language": "en", 6 | "login": true, 7 | "number_of_batches": 3, 8 | "number_of_creators_to_follow_per_batch": [], 9 | "number_of_posts_to_like_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_of_content_creators_to_like": [], 12 | "posts_of_music_ids_to_like": [], 13 | "posts_with_hashtag_to_like": [ 14 | "football", 15 | "food", 16 | "euro2020", 17 | "movie", 18 | "foodtiktok", 19 | "gaming", 20 | "film", 21 | "tiktokfood", 22 | "gta5", 23 | "gta", 24 | "minecraft", 25 | "marvel", 26 | "cat", 27 | "dog", 28 | "pet", 29 | "dogsoftiktok", 30 | "catsoftiktok", 31 | "cute", 32 | "puppy", 33 | "dogs", 34 | "cats", 35 | "animals", 36 | "petsoftiktok", 37 | "kitten", 38 | "comedy", 39 | "asmr", 40 | "learnontiktok", 41 | "satisfying", 42 | "lol", 43 | "love", 44 | "humour", 45 | "couple", 46 | "foodie", 47 | "baby", 48 | "car", 49 | "cars", 50 | "jokes", 51 | "lifehack", 52 | "satisfyingvideo", 53 | "relationship", 54 | "cooking", 55 | "laugh", 56 | "fun" 57 | ], 58 | "posts_with_hashtag_to_watch_longer": [], 59 | "collecting_data_for_first_posts": true 60 | }, 61 | "124": { 62 | "test_user_id": 124, 63 | "login": true, 64 | "browser_language": "en", 65 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 66 | "number_of_batches": 3, 67 | "number_of_posts_to_like_per_batch": [], 68 | "number_of_creators_to_follow_per_batch": [], 69 | "number_of_posts_to_watch_longer_per_batch": [], 70 | "posts_with_hashtag_to_watch_longer": [], 71 | "posts_with_hashtag_to_like": [], 72 | "posts_of_content_creators_to_like": [], 73 | "posts_of_music_ids_to_like": [], 74 | "collecting_data_for_first_posts": true 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_us_user-135-136.json: -------------------------------------------------------------------------------- 1 | { 2 | "135": { 3 | "test_user_id": 135, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": ["kylethomas", "leytink", "alex.stemp", "sherellmartini", 14 | "juandemontrealoficial", "lunchbreak_al", "hudabeauty", "kate_johansson", "goose_stu", "kallmekris", 15 | "kapsalonfreedomboxmeer", "miakhalifa", "joe.bartolozzi", "coco224466", "khaby.lame", "partyshirt", "mndiaye_97", 16 | "dianarantamaki", "brodywellmaker", "louflores_", "makayla.domagalski1", "anxietycouple", "espn", "nanajoe19", 17 | "thepetcollective", "dina", "thefurrhafamily", "kessel_nathan_official", "jaylucky7", "moontellthat", 18 | "selenagomez", "lizzo", "umql0", "_verobo_", "daveyrz", "kelz", "hudanoor07", "vet.crew", "_catben_", 19 | "gertieinar", "isaiahgarzaintl"], 20 | "posts_of_music_ids_to_like": [], 21 | "collecting_data_for_first_posts": true 22 | }, 23 | "136": { 24 | "test_user_id": 136, 25 | "login": true, 26 | "browser_language": "en", 27 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 28 | "number_of_batches": 3, 29 | "number_of_posts_to_like_per_batch": [], 30 | "number_of_creators_to_follow_per_batch": [], 31 | "number_of_posts_to_watch_longer_per_batch": [], 32 | "posts_with_hashtag_to_watch_longer": [], 33 | "posts_with_hashtag_to_like": [], 34 | "posts_of_content_creators_to_like": [], 35 | "posts_of_music_ids_to_like": [], 36 | "collecting_data_for_first_posts": true 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_us_user-159-160.json: -------------------------------------------------------------------------------- 1 | { 2 | "159": { 3 | "test_user_id": 159, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": true, 7 | "time_to_look_at_post_action": 0, 8 | "time_to_look_at_post_normal": 0.5, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_with_hashtag_to_like": [ 15 | "movie", 16 | "film", 17 | "marvel", 18 | "foodtiktok", 19 | "tiktokfood", 20 | "foodie", 21 | "cooking", 22 | "food", 23 | "gaming", 24 | "gta5", 25 | "gta", 26 | "minecraft", 27 | "cat", 28 | "dog", 29 | "pet", 30 | "dogsoftiktok", 31 | "catsoftiktok", 32 | "cute", 33 | "puppy", 34 | "dogs", 35 | "cats", 36 | "animals", 37 | "petsoftiktok", 38 | "kitten", 39 | "comedy", 40 | "lol", 41 | "humour", 42 | "laugh", 43 | "fun", 44 | "jokes", 45 | "love", 46 | "couple", 47 | "relationship" 48 | ], 49 | "posts_of_content_creators_to_like": [], 50 | "posts_of_music_ids_to_like": [], 51 | "collecting_data_for_first_posts": true 52 | }, 53 | "160": { 54 | "test_user_id": 160, 55 | "login": true, 56 | "browser_language": "en", 57 | "reuse_cookies": true, 58 | "time_to_look_at_post_action": 0, 59 | "time_to_look_at_post_normal": 0.5, 60 | "number_of_batches": 3, 61 | "number_of_posts_to_like_per_batch": [], 62 | "number_of_creators_to_follow_per_batch": [], 63 | "number_of_posts_to_watch_longer_per_batch": [], 64 | "posts_with_hashtag_to_watch_longer": [], 65 | "posts_with_hashtag_to_like": [], 66 | "posts_of_content_creators_to_like": [], 67 | "posts_of_music_ids_to_like": [], 68 | "collecting_data_for_first_posts": true 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_us_user-45-46.json: -------------------------------------------------------------------------------- 1 | { 2 | "45": { 3 | "test_user_id": 45, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 5, 8 | "number_of_posts_to_like_per_batch": [0, 6, 6, 6, 0], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "posts_with_hashtag_to_like": [], 11 | "number_of_posts_to_watch_longer_per_batch": [], 12 | "posts_with_hashtag_to_watch_longer": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": false 16 | }, 17 | "46": { 18 | "test_user_id": 46, 19 | "login": false, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 5, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "posts_with_hashtag_to_like": [], 26 | "number_of_posts_to_watch_longer_per_batch": [], 27 | "posts_with_hashtag_to_watch_longer": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": false 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_us_user-59-60.json: -------------------------------------------------------------------------------- 1 | { 2 | "59": { 3 | "test_user_id": 59, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 5, 8 | "number_of_posts_to_like_per_batch": [0, 6, 6, 6, 0], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "60": { 18 | "test_user_id": 60, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 5, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/like_us_user-70-71.json: -------------------------------------------------------------------------------- 1 | { 2 | "70": { 3 | "test_user_id": 70, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 5, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film", 13 | "tiktokfood", "gta5", "gta", "minecraft", "marvel", "cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute", 14 | "puppy", "dogs", "cats", "animals", "petsoftiktok", "kitten"], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "71": { 20 | "test_user_id": 71, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 24 | "number_of_batches": 5, 25 | "number_of_posts_to_like_per_batch": [], 26 | "number_of_creators_to_follow_per_batch": [], 27 | "number_of_posts_to_watch_longer_per_batch": [], 28 | "posts_with_hashtag_to_watch_longer": [], 29 | "posts_with_hashtag_to_like": [], 30 | "posts_of_content_creators_to_like": [], 31 | "posts_of_music_ids_to_like": [], 32 | "collecting_data_for_first_posts": false 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/location-1_ca_user-99-100.json: -------------------------------------------------------------------------------- 1 | { 2 | "99": { 3 | "test_user_id": 99, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "100": { 18 | "test_user_id": 100, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/location-1_us_user-97-98.json: -------------------------------------------------------------------------------- 1 | { 2 | "97": { 3 | "test_user_id": 97, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "98": { 18 | "test_user_id": 98, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/location-2_ca_user-101-102.json: -------------------------------------------------------------------------------- 1 | { 2 | "101": { 3 | "test_user_id": 101, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "102": { 18 | "test_user_id": 102, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/location-2_us_user-105-106.json: -------------------------------------------------------------------------------- 1 | { 2 | "105": { 3 | "test_user_id": 105, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "106": { 18 | "test_user_id": 106, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/location-3_de_user-107-108.json: -------------------------------------------------------------------------------- 1 | { 2 | "107": { 3 | "test_user_id": 107, 4 | "login": true, 5 | "browser_language": "de", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "108": { 18 | "test_user_id": 108, 19 | "login": true, 20 | "browser_language": "de", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/location-3_us_user-103-104.json: -------------------------------------------------------------------------------- 1 | { 2 | "103": { 3 | "test_user_id": 103, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 8 | "number_of_batches": 3, 9 | "number_of_posts_to_like_per_batch": [], 10 | "number_of_creators_to_follow_per_batch": [], 11 | "number_of_posts_to_watch_longer_per_batch": [], 12 | "posts_with_hashtag_to_watch_longer": [], 13 | "posts_with_hashtag_to_like": [], 14 | "posts_of_content_creators_to_like": [], 15 | "posts_of_music_ids_to_like": [], 16 | "collecting_data_for_first_posts": true 17 | }, 18 | "104": { 19 | "test_user_id": 104, 20 | "login": true, 21 | "browser_language": "en", 22 | "reuse_cookies": false, 23 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 24 | "number_of_batches": 3, 25 | "number_of_posts_to_like_per_batch": [], 26 | "number_of_creators_to_follow_per_batch": [], 27 | "number_of_posts_to_watch_longer_per_batch": [], 28 | "posts_with_hashtag_to_watch_longer": [], 29 | "posts_with_hashtag_to_like": [], 30 | "posts_of_content_creators_to_like": [], 31 | "posts_of_music_ids_to_like": [], 32 | "collecting_data_for_first_posts": true 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/location-4-de_us_user-109-110.json: -------------------------------------------------------------------------------- 1 | { 2 | "109": { 3 | "test_user_id": 109, 4 | "login": true, 5 | "browser_language": "de", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "110": { 18 | "test_user_id": 110, 19 | "login": true, 20 | "browser_language": "de", 21 | "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/location-4-en_us_user-129-132.json: -------------------------------------------------------------------------------- 1 | { 2 | "129": { 3 | "test_user_id": 129, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "132": { 18 | "test_user_id": 132, 19 | "login": true, 20 | "browser_language": "en", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/location-4-es_us_user-130-133.json: -------------------------------------------------------------------------------- 1 | { 2 | "130": { 3 | "test_user_id": 130, 4 | "login": true, 5 | "browser_language": "es", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "133": { 18 | "test_user_id": 133, 19 | "login": true, 20 | "browser_language": "es", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/location-4-fr_us_user-131-134.json: -------------------------------------------------------------------------------- 1 | { 2 | "131": { 3 | "test_user_id": 131, 4 | "login": true, 5 | "browser_language": "fr", 6 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 7 | "number_of_batches": 3, 8 | "number_of_posts_to_like_per_batch": [], 9 | "number_of_creators_to_follow_per_batch": [], 10 | "number_of_posts_to_watch_longer_per_batch": [], 11 | "posts_with_hashtag_to_watch_longer": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_of_content_creators_to_like": [], 14 | "posts_of_music_ids_to_like": [], 15 | "collecting_data_for_first_posts": true 16 | }, 17 | "134": { 18 | "test_user_id": 134, 19 | "login": true, 20 | "browser_language": "fr", 21 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 22 | "number_of_batches": 3, 23 | "number_of_posts_to_like_per_batch": [], 24 | "number_of_creators_to_follow_per_batch": [], 25 | "number_of_posts_to_watch_longer_per_batch": [], 26 | "posts_with_hashtag_to_watch_longer": [], 27 | "posts_with_hashtag_to_like": [], 28 | "posts_of_content_creators_to_like": [], 29 | "posts_of_music_ids_to_like": [], 30 | "collecting_data_for_first_posts": true 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/test_user_11.json: -------------------------------------------------------------------------------- 1 | { 2 | "11": { 3 | "test_user_id": 11, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": true, 7 | "time_to_look_at_post_action": 0, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_posts_to_like_per_batch": [], 10 | "number_of_creators_to_follow_per_batch": [], 11 | "number_of_posts_to_watch_longer_per_batch": [], 12 | "posts_with_hashtag_to_like": [], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_of_content_creators_to_like": [], 15 | "posts_of_music_ids_to_like": [], 16 | "collecting_data_for_first_posts": true, 17 | "number_of_batches": 1 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-127-128.json: -------------------------------------------------------------------------------- 1 | { 2 | "127": { 3 | "test_user_id": 127, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 4, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": [ 14 | "football", 15 | "food", 16 | "euro2020", 17 | "movie", 18 | "foodtiktok", 19 | "gaming", 20 | "film", 21 | "tiktokfood", 22 | "gta5", 23 | "gta", 24 | "minecraft", 25 | "marvel", 26 | "cat", 27 | "dog", 28 | "pet", 29 | "dogsoftiktok", 30 | "catsoftiktok", 31 | "cute", 32 | "puppy", 33 | "dogs", 34 | "cats", 35 | "animals", 36 | "petsoftiktok", 37 | "kitten", 38 | "comedy", 39 | "asmr", 40 | "learnontiktok", 41 | "satisfying", 42 | "lol", 43 | "love", 44 | "humour", 45 | "couple", 46 | "foodie", 47 | "baby", 48 | "car", 49 | "cars", 50 | "jokes", 51 | "lifehack", 52 | "satisfyingvideo", 53 | "relationship", 54 | "cooking", 55 | "laugh", 56 | "fun" 57 | ], 58 | "posts_with_hashtag_to_like": [], 59 | "posts_of_content_creators_to_like": [], 60 | "posts_of_music_ids_to_like": [], 61 | "collecting_data_for_first_posts": false 62 | }, 63 | "128": { 64 | "test_user_id": 128, 65 | "login": true, 66 | "browser_language": "en", 67 | "reuse_cookies": false, 68 | "time_to_look_at_post_action": 0, 69 | "time_to_look_at_post_normal": 2, 70 | "number_of_batches": 3, 71 | "number_of_posts_to_like_per_batch": [], 72 | "number_of_creators_to_follow_per_batch": [], 73 | "number_of_posts_to_watch_longer_per_batch": [], 74 | "posts_with_hashtag_to_watch_longer": [], 75 | "posts_with_hashtag_to_like": [], 76 | "posts_of_content_creators_to_like": [], 77 | "posts_of_music_ids_to_like": [], 78 | "collecting_data_for_first_posts": false 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-145-146.json: -------------------------------------------------------------------------------- 1 | { 2 | "145": { 3 | "test_user_id": 145, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0.75, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film", 14 | "tiktokfood", "gta5", "gta", "minecraft", "marvel", "cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute", 15 | "puppy", "dogs", "cats", "animals", "petsoftiktok", "kitten"], 16 | "posts_with_hashtag_to_like": [], 17 | "posts_of_content_creators_to_like": [], 18 | "posts_of_music_ids_to_like": [], 19 | "collecting_data_for_first_posts": false 20 | }, 21 | "146": { 22 | "test_user_id": 146, 23 | "login": true, 24 | "browser_language": "en", 25 | "reuse_cookies": false, 26 | "time_to_look_at_post_action": 0, 27 | "time_to_look_at_post_normal": 2, 28 | "number_of_batches": 3, 29 | "number_of_posts_to_like_per_batch": [], 30 | "number_of_creators_to_follow_per_batch": [], 31 | "number_of_posts_to_watch_longer_per_batch": [], 32 | "posts_with_hashtag_to_watch_longer": [], 33 | "posts_with_hashtag_to_like": [], 34 | "posts_of_content_creators_to_like": [], 35 | "posts_of_music_ids_to_like": [], 36 | "collecting_data_for_first_posts": false 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-151-152.json: -------------------------------------------------------------------------------- 1 | { 2 | "151": { 3 | "test_user_id": 151, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": true, 7 | "time_to_look_at_post_action": 4, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": [ 14 | "movie", 15 | "film", 16 | "marvel", 17 | "foodtiktok", 18 | "tiktokfood", 19 | "foodie", 20 | "cooking", 21 | "food", 22 | "gaming", 23 | "gta5", 24 | "gta", 25 | "minecraft", 26 | "cat", 27 | "dog", 28 | "pet", 29 | "dogsoftiktok", 30 | "catsoftiktok", 31 | "cute", 32 | "puppy", 33 | "dogs", 34 | "cats", 35 | "animals", 36 | "petsoftiktok", 37 | "kitten", 38 | "comedy", 39 | "lol", 40 | "humour", 41 | "laugh", 42 | "fun", 43 | "jokes", 44 | "love", 45 | "couple", 46 | "relationship" 47 | ], 48 | "posts_with_hashtag_to_like": [], 49 | "posts_of_content_creators_to_like": [], 50 | "posts_of_music_ids_to_like": [], 51 | "collecting_data_for_first_posts": false 52 | }, 53 | "152": { 54 | "test_user_id": 152, 55 | "login": true, 56 | "browser_language": "en", 57 | "reuse_cookies": true, 58 | "time_to_look_at_post_action": 0, 59 | "time_to_look_at_post_normal": 2, 60 | "number_of_batches": 3, 61 | "number_of_posts_to_like_per_batch": [], 62 | "number_of_creators_to_follow_per_batch": [], 63 | "number_of_posts_to_watch_longer_per_batch": [], 64 | "posts_with_hashtag_to_watch_longer": [], 65 | "posts_with_hashtag_to_like": [], 66 | "posts_of_content_creators_to_like": [], 67 | "posts_of_music_ids_to_like": [], 68 | "collecting_data_for_first_posts": false 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-157-158.json: -------------------------------------------------------------------------------- 1 | { 2 | "157": { 3 | "test_user_id": 157, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": true, 7 | "time_to_look_at_post_action": 4, 8 | "time_to_look_at_post_normal": 0.5, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": [ 14 | "movie", 15 | "film", 16 | "marvel", 17 | "foodtiktok", 18 | "tiktokfood", 19 | "foodie", 20 | "cooking", 21 | "food", 22 | "gaming", 23 | "gta5", 24 | "gta", 25 | "minecraft", 26 | "cat", 27 | "dog", 28 | "pet", 29 | "dogsoftiktok", 30 | "catsoftiktok", 31 | "cute", 32 | "puppy", 33 | "dogs", 34 | "cats", 35 | "animals", 36 | "petsoftiktok", 37 | "kitten", 38 | "comedy", 39 | "lol", 40 | "humour", 41 | "laugh", 42 | "fun", 43 | "jokes", 44 | "love", 45 | "couple", 46 | "relationship" 47 | ], 48 | "posts_with_hashtag_to_like": [], 49 | "posts_of_content_creators_to_like": [], 50 | "posts_of_music_ids_to_like": [], 51 | "collecting_data_for_first_posts": false 52 | }, 53 | "158": { 54 | "test_user_id": 158, 55 | "login": true, 56 | "browser_language": "en", 57 | "reuse_cookies": true, 58 | "time_to_look_at_post_action": 0, 59 | "time_to_look_at_post_normal": 0.5, 60 | "number_of_batches": 3, 61 | "number_of_posts_to_like_per_batch": [], 62 | "number_of_creators_to_follow_per_batch": [], 63 | "number_of_posts_to_watch_longer_per_batch": [], 64 | "posts_with_hashtag_to_watch_longer": [], 65 | "posts_with_hashtag_to_like": [], 66 | "posts_of_content_creators_to_like": [], 67 | "posts_of_music_ids_to_like": [], 68 | "collecting_data_for_first_posts": false 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-163-164.json: -------------------------------------------------------------------------------- 1 | { 2 | "163": { 3 | "test_user_id": 163, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": true, 7 | "time_to_look_at_post_action": 4, 8 | "time_to_look_at_post_normal": 0.5, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": [ 14 | 15 | ], 16 | "posts_with_hashtag_to_like": [], 17 | "posts_of_content_creators_to_like": [], 18 | "posts_of_music_ids_to_like": [], 19 | "collecting_data_for_first_posts": false 20 | }, 21 | "164": { 22 | "test_user_id": 164, 23 | "login": true, 24 | "browser_language": "en", 25 | "reuse_cookies": true, 26 | "time_to_look_at_post_action": 0, 27 | "time_to_look_at_post_normal": 0.5, 28 | "number_of_batches": 3, 29 | "number_of_posts_to_like_per_batch": [], 30 | "number_of_creators_to_follow_per_batch": [], 31 | "number_of_posts_to_watch_longer_per_batch": [], 32 | "posts_with_hashtag_to_watch_longer": [], 33 | "posts_with_hashtag_to_like": [], 34 | "posts_of_content_creators_to_like": [], 35 | "posts_of_music_ids_to_like": [], 36 | "collecting_data_for_first_posts": false 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-77-78.json: -------------------------------------------------------------------------------- 1 | { 2 | "77": { 3 | "test_user_id": 77, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [10, 10, 10], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_with_hashtag_to_like": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "78": { 20 | "test_user_id": 78, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": false, "time_to_look_at_post_action": 0, "time_to_look_at_post_normal": 2, 24 | "number_of_batches": 3, 25 | "number_of_posts_to_like_per_batch": [], 26 | "number_of_creators_to_follow_per_batch": [], 27 | "number_of_posts_to_watch_longer_per_batch": [], 28 | "posts_with_hashtag_to_watch_longer": [], 29 | "posts_with_hashtag_to_like": [], 30 | "posts_of_content_creators_to_like": [], 31 | "posts_of_music_ids_to_like": [], 32 | "collecting_data_for_first_posts": false 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-79-80.json: -------------------------------------------------------------------------------- 1 | { 2 | "79": { 3 | "test_user_id": 79, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0.5, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [10, 10, 10], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_with_hashtag_to_like": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "80": { 20 | "test_user_id": 80, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": false, 24 | "time_to_look_at_post_action": 0, 25 | "time_to_look_at_post_normal": 2, 26 | "number_of_batches": 3, 27 | "number_of_posts_to_like_per_batch": [], 28 | "number_of_creators_to_follow_per_batch": [], 29 | "number_of_posts_to_watch_longer_per_batch": [], 30 | "posts_with_hashtag_to_watch_longer": [], 31 | "posts_with_hashtag_to_like": [], 32 | "posts_of_content_creators_to_like": [], 33 | "posts_of_music_ids_to_like": [], 34 | "collecting_data_for_first_posts": false 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-81-82.json: -------------------------------------------------------------------------------- 1 | { 2 | "81": { 3 | "test_user_id": 81, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0.75, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [10, 10, 10], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_with_hashtag_to_like": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "82": { 20 | "test_user_id": 82, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": false, 24 | "time_to_look_at_post_action": 0, 25 | "time_to_look_at_post_normal": 2, 26 | "number_of_batches": 3, 27 | "number_of_posts_to_like_per_batch": [], 28 | "number_of_creators_to_follow_per_batch": [], 29 | "number_of_posts_to_watch_longer_per_batch": [], 30 | "posts_with_hashtag_to_watch_longer": [], 31 | "posts_with_hashtag_to_like": [], 32 | "posts_of_content_creators_to_like": [], 33 | "posts_of_music_ids_to_like": [], 34 | "collecting_data_for_first_posts": false 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-83-84.json: -------------------------------------------------------------------------------- 1 | { 2 | "83": { 3 | "test_user_id": 83, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 1, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [10, 10, 10], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_with_hashtag_to_like": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "84": { 20 | "test_user_id": 84, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": false, 24 | "time_to_look_at_post_action": 0, 25 | "time_to_look_at_post_normal": 2, 26 | "number_of_batches": 3, 27 | "number_of_posts_to_like_per_batch": [], 28 | "number_of_creators_to_follow_per_batch": [], 29 | "number_of_posts_to_watch_longer_per_batch": [], 30 | "posts_with_hashtag_to_watch_longer": [], 31 | "posts_with_hashtag_to_like": [], 32 | "posts_of_content_creators_to_like": [], 33 | "posts_of_music_ids_to_like": [], 34 | "collecting_data_for_first_posts": false 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-85-86.json: -------------------------------------------------------------------------------- 1 | { 2 | "85": { 3 | "test_user_id": 85, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 2, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [10, 10, 10], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_with_hashtag_to_like": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | }, 19 | "86": { 20 | "test_user_id": 86, 21 | "login": true, 22 | "browser_language": "en", 23 | "reuse_cookies": false, 24 | "time_to_look_at_post_action": 0, 25 | "time_to_look_at_post_normal": 2, 26 | "number_of_batches": 3, 27 | "number_of_posts_to_like_per_batch": [], 28 | "number_of_creators_to_follow_per_batch": [], 29 | "number_of_posts_to_watch_longer_per_batch": [], 30 | "posts_with_hashtag_to_watch_longer": [], 31 | "posts_with_hashtag_to_like": [], 32 | "posts_of_content_creators_to_like": [], 33 | "posts_of_music_ids_to_like": [], 34 | "collecting_data_for_first_posts": false 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-87-88.json: -------------------------------------------------------------------------------- 1 | { 2 | "87": { 3 | "test_user_id": 87, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0.5, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film", 14 | "tiktokfood", "gta5", "gta", "minecraft", "marvel", "cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute", 15 | "puppy", "dogs", "cats", "animals", "petsoftiktok", "kitten"], 16 | "posts_with_hashtag_to_like": [], 17 | "posts_of_content_creators_to_like": [], 18 | "posts_of_music_ids_to_like": [], 19 | "collecting_data_for_first_posts": false 20 | }, 21 | "88": { 22 | "test_user_id": 88, 23 | "login": true, 24 | "browser_language": "en", 25 | "reuse_cookies": false, 26 | "time_to_look_at_post_action": 0, 27 | "time_to_look_at_post_normal": 2, 28 | "number_of_batches": 3, 29 | "number_of_posts_to_like_per_batch": [], 30 | "number_of_creators_to_follow_per_batch": [], 31 | "number_of_posts_to_watch_longer_per_batch": [], 32 | "posts_with_hashtag_to_watch_longer": [], 33 | "posts_with_hashtag_to_like": [], 34 | "posts_of_content_creators_to_like": [], 35 | "posts_of_music_ids_to_like": [], 36 | "collecting_data_for_first_posts": false 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-89-90.json: -------------------------------------------------------------------------------- 1 | { 2 | "89": { 3 | "test_user_id": 89, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0.75, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film", 14 | "tiktokfood", "gta5", "gta", "minecraft", "marvel", "cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute", 15 | "puppy", "dogs", "cats", "animals", "petsoftiktok", "kitten"], 16 | "posts_with_hashtag_to_like": [], 17 | "posts_of_content_creators_to_like": [], 18 | "posts_of_music_ids_to_like": [], 19 | "collecting_data_for_first_posts": false 20 | }, 21 | "90": { 22 | "test_user_id": 90, 23 | "login": true, 24 | "browser_language": "en", 25 | "reuse_cookies": false, 26 | "time_to_look_at_post_action": 0, 27 | "time_to_look_at_post_normal": 2, 28 | "number_of_batches": 3, 29 | "number_of_posts_to_like_per_batch": [], 30 | "number_of_creators_to_follow_per_batch": [], 31 | "number_of_posts_to_watch_longer_per_batch": [], 32 | "posts_with_hashtag_to_watch_longer": [], 33 | "posts_with_hashtag_to_like": [], 34 | "posts_of_content_creators_to_like": [], 35 | "posts_of_music_ids_to_like": [], 36 | "collecting_data_for_first_posts": false 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Testing/TestSets/part_1_tests/vcr_us_user-91-92.json: -------------------------------------------------------------------------------- 1 | { 2 | "91": { 3 | "test_user_id": 91, 4 | "login": true, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 1, 8 | "time_to_look_at_post_normal": 2, 9 | "number_of_batches": 3, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": ["football", "food", "euro2020", "movie", "foodtiktok", "gaming", "film", 14 | "tiktokfood", "gta5", "gta", "minecraft", "marvel", "cat", "dog", "pet", "dogsoftiktok", "catsoftiktok", "cute", 15 | "puppy", "dogs", "cats", "animals", "petsoftiktok", "kitten"], 16 | "posts_with_hashtag_to_like": [], 17 | "posts_of_content_creators_to_like": [], 18 | "posts_of_music_ids_to_like": [], 19 | "collecting_data_for_first_posts": false 20 | }, 21 | "92": { 22 | "test_user_id": 92, 23 | "login": true, 24 | "browser_language": "en", 25 | "reuse_cookies": false, 26 | "time_to_look_at_post_action": 0, 27 | "time_to_look_at_post_normal": 2, 28 | "number_of_batches": 3, 29 | "number_of_posts_to_like_per_batch": [], 30 | "number_of_creators_to_follow_per_batch": [], 31 | "number_of_posts_to_watch_longer_per_batch": [], 32 | "posts_with_hashtag_to_watch_longer": [], 33 | "posts_with_hashtag_to_like": [], 34 | "posts_of_content_creators_to_like": [], 35 | "posts_of_music_ids_to_like": [], 36 | "collecting_data_for_first_posts": false 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Testing/TestSets/test_user_167.json: -------------------------------------------------------------------------------- 1 | { 2 | "167": { 3 | "test_user_id": 167, 4 | "login": false, 5 | "browser_language": "en", 6 | "reuse_cookies": false, 7 | "time_to_look_at_post_action": 0, 8 | "time_to_look_at_post_normal": 0.1, 9 | "number_of_batches": 20, 10 | "number_of_posts_to_like_per_batch": [], 11 | "number_of_creators_to_follow_per_batch": [], 12 | "number_of_posts_to_watch_longer_per_batch": [], 13 | "posts_with_hashtag_to_watch_longer": [], 14 | "posts_with_hashtag_to_like": [], 15 | "posts_of_content_creators_to_like": [], 16 | "posts_of_music_ids_to_like": [], 17 | "collecting_data_for_first_posts": false 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /Testing/scratch_12.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | 4 | class Animal: 5 | def __init__(self, n_legs: int): 6 | self.n_legs: int = n_legs 7 | 8 | def make_noise(self): 9 | raise NotImplementedError() 10 | 11 | 12 | class Dog(Animal): 13 | def __init__(self, breed: str, *args, **kwargs): 14 | print(args) 15 | print(kwargs) 16 | self.breed: str = breed 17 | super().__init__(*args, **kwargs) 18 | 19 | 20 | Point = namedtuple("Point", field_names=("x", "y", "z")) 21 | 22 | 23 | if __name__ == '__main__': 24 | dog = Dog("labrador", 4) 25 | print(dir(dog)) 26 | 27 | cat = Animal(2) 28 | print(cat.n_legs) 29 | 30 | p = Point(0.2, 0.2, 0.3) 31 | print(dir(p)) -------------------------------------------------------------------------------- /chromedriver.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/chromedriver.exe -------------------------------------------------------------------------------- /gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/gitignore -------------------------------------------------------------------------------- /hashtags_to_ignore.json: -------------------------------------------------------------------------------- 1 | {"88764338": "foryoupage", "1693596282061825": "fyp memez", "1642147373664261": "fypvirall", "1706891576089605": "fyp", "1704829339220998": "thinkforyourlife", "1648316753236998": "fypppppppppppppppppppppppp", "1646344785794053": "fyp20", "1644632912092165": "fypchachallenge", "1645966921365509": "fypfypfypfypfypfypfypfypfyp", "1661745157709826": "fypgakni", "883904": "foryouuu", "1694385466292229": "fypcontents", "1703472025824258": "chaukemuhluri type", "1616303504084998": "foryoupage", "229207": "fyp", "1654225487570949": "myfypbelike", "1651780589526022": "kingfyp", "1642191380435969": "foryourpageviral", "1633875828543494": "fypdog", "62543": "goodforyou", "1696630895338498": "fyp", "1631845819935750": "fypart", "1705461580495878": "foryouatwestfield", "1606946063404037": "likeforyoupage", "1706349891476490": "rafiqnakfyp", "1634577353868293": "pageforyou", "1603105080060934": "foryoupa", "1617501114305542": "fypfypfyp", "1638319905147909": "thefypdoesntwork", "96178": "lookingforyou", "1625705313397766": "fyppp", "1685323802588161": "fypforyoupage", "1626746770984966": "fyppppp", "1647699651574789": "fypfypfypfypfypfypfypfypfypfypfyp", "1664856447763462": "kdramafyp", "1644665564694534": "plisfyp", "1608548676719622": "dogsforyou", "1640087008637957": "fypdoesnotwork", "1667434148820998": "fyp cr", "1676376584428546": "fyppdongggggg", "1609969345764357": "fypplz", "1646000100595714": "fypmalaysia", "1636339340982278": "fypmemes", "1656713786866694": "fyp", "10031099": "foryoudrink", "1643981773166597": "fyp", "1650426297129990": "fyphair", "1649953988475909": "itsasignforyou", "1664053106852869": "fyp", "1702828530483205": "fyptrusttheprocess", "6477336": "fluffypancakes", "1703769711014918": "fyp", "1690360009542661": "cleaninghacksforyou", "42164": "foryou", "1659616574816257": "fypgaknih", "1603364504464389": "foryoupagee", "1685972632069125": "foryou", "1633460239823877": "fypy", "5578078": "somuchloveforyou", "1642256290828294": "fypisbroken", "1657293034376198": "mexicanforyoupage", "1605095166336005": "foryoupageeee", "1676811985203206": "fyviralfyp", "1636799196126214": "fypoffical", "1641715580438534": "fypfypfypfypfypfyp", "41248": "fallingforyou", "1648778089330694": "fyppppppppppppppppppp", "1626123835557893": "foryoufyp", "1620040599854086": "fypforyoupage", "1692557178255365": "fashiontiktokforyou", "1632511288704006": "fypo", "1654855125875717": "secretfyp", "1679200763816965": "fypttv", "1631460356711430": "pleasegetthisonthefyp", "1634600653321222": "fypps", "1623723121789957": "foryou", "1672970870730754": "fypdonggggggg", "4779077": "funnyvideosforyou", "1652538132966402": "foryoupageyeh", "1630284807035909": "fyptiktok", "393902": "loveforyou", "1620625283638277": "foryouviral", "1628179191522310": "foryoupgepage", "1668677561259014": "blackpeoplebelikefyp", "1658029352404997": "pupsfyp", "1665702793180165": "robuxforyou", "1639908894714881": "fypage", "1633359003571205": "putthisonfyp", "1660595729188869": "recipesforyou", "1649087342340101": "logoforyou", "1620485025788934": "fypp", "294688": "notforyou", "1648455553681413": "puppyfyp", "1640328120128517": "disneyfyp", "1682300274755586": "foryourepageofficial", "37160": "onlyforyou", "1631099105025030": "fyproblox", "22737416": "thisbudsforyou", "1657002917933062": "fypdontworknomore", "1653730940624901": "cakefyp", "1636619391693830": "kpopfyp", "1635084903417862": "foryou", "1701731659930629": "3minutevideoftype", "1636358001636357": "fyppppppppppp", "1670914542274565": "fypisbest", "1616746784464901": "foryourpag\u0435", "1607083044342806": "foryoupageeeee", "1602924933299205": "foryoupge", "1635974755232774": "asmrforyoupage", "1705180534836230": "barbertipsforyou", "1702698474374146": "back_in_fyp", "1623023114746102": "motivationforyou", "1685694592866306": "fyp viraltiktok", "67231518": "foryouph", "1628170725056582": "fyp", "1635201890205702": "asmrfyp", "1702065574970369": "fypmlaysia", "1659410697047041": "bismilahfyp", "1661715660552198": "adamsfyp", "1699264431886341": "fyp", "1637138684632070": "foryourpride", "1630729454643206": "fypforyouforyoupage", "1639655121988613": "gayfyp", "1658590921953281": "fypppppppppppppppppppppppppppppppppppp", "1639549424304133": "fyp tiktok", "1627816081234950": "robloxfyp", "1634103260359685": "fypdoesntwork", "1627320129956870": "foryoupageofficial", "1646970538788869": "fyptoronto", "1653705288350726": "g nnforyou", "1625649784043526": "fypplzz", "43739490": "petsforyou", "1649540023232517": "fypppppoo", "1605511720126469": "foryoupagina", "54185045": "fyplz", "1632540676574213": "fypppppp", "1688122502170626": "fyp", "1678160367690753": "fypdongahh", "1605270298646534": "foryoupace", "1656141302876165": "fypbali", "1599011224611846": "foryoupag", "1641193139018758": "fypaged", "1649873729675269": "christianfyp", "1651632362337286": "fyppageforyou", "1700800312528897": "foryourbestmate", "1664953059570690": "dailyvideosforyou", "1605983562245125": "foryou page", "1623668238525445": "fypplease", "1668051417397253": "tohfyp", "1634939850577926": "fyp", "1667767033351169": "fypsarawak", "1687184453108742": "fypsoundsss", "1670902591059970": "masukberanda type", "84873565": "foryouthis", "1655586519169029": "fypnotworking", "1646415283323910": "hockeyfyp", "1640702198714373": "getthisonthefypplease", "1644272574749702": "fypforyoupage", "1651863311959045": "gtafyp", "1705278121497606": "Black-headed", "1638251441181702": "fyp", "1602020764569605": "foryoupageplease", "1656974288293889": "fypdong", "1614846349487125": "fype", "1642258369204230": "fyppppppppppppppppp", "1597714851741701": "fypuk", "1607127600018438": "bhfyp", "1637403385093126": "foryou?", "1692301272753158": "fypuswnt", "1634091457788933": "fypforyourpage", "1651867800604677": "mexicanforyou", "1704855486517254": "gossipgirlherefyp", "1598364115802118": "foryoupgae", "1653077770558470": "nochopstickforyou", "1633131185532933": "fypppppppppppppppp", "1679059746505730": "destroyforyouba", "1658563098340353": "foryou2021", "1608533679464453": "foryouart", "1691905401746433": "fypmalaysiatiktok", "1674335187381249": "fypanime", "1628190985570310": "fypfyp", "1655836333592582": "spanishfyp", "1685183196061697": "fypdebz", "1620988974477318": "getthisonthefyp", "1647232092094469": "fypfypfypfypfypfypfypfypfypfypfypfypfyp", "1650591402139653": "xmasfyp", "7603941": "foryouchallenge", "1634979065961477": "fyp", "1685172109999106": "f\u00fcrdichseite\u30b7foryoupage", "1603504302397446": "foryoulage", "1624550708748294": "foryoupage", "1639107461180422": "foryoupagedoesnotwork", "1624339432620038": "fyppage", "1674618424012805": "kidsonmyfypbelike", "1635280932750342": "fypppppppppppppp", "1653220312297477": "fyppppppppppppppppppppppppppppppppp", "1626102952307718": "fypppp", "1637715128978437": "fyppppppppppppppppp", "1633200051757061": "bluestaffypuppy", "1648047480944646": "fypfypfypfy", "1633801928553478": "fypppppppppp", "1624000015616005": "fypsound", "1651224510575621": "fypforyoupagethis", "1653739674004485": "tutofyp", "1656078044884993": "masukfyp", "1659004589569030": "scottishforyoupage", "1604114517755910": "foryoupride", "773601": "foryourmom", "1684624429633537": "foryoupageofficiall2021", "1698894208649217": "sgfyp", "1627926592598021": "fypforyou", "1607069197518854": "foryourpages", "1657857938666501": "fypcommunity", "1634937277509638": "foryou", "1640241025714181": "fyppppppppppppp", "1654955847156741": "blowupforyoupage", "1637295409647621": "fypdrama", "1604390375122950": "foryouppage", "1637403568312326": "foryoupage?", "20922363": "pageforyou", "1649954646340614": "fypbabies", "1640999288847365": "fypchallage", "1637418424589317": "fyp", "1675963560778754": "fypmototiktok", "1641236262278149": "fyp", "1634922086900741": "fypchallenge", "1702998879182853": "foryoupage tik tok viral video", "1634318675729413": "fypplss", "1659150842052610": "semogafyp", "1634880784642054": "careforyourfurbabies", "8085197": "foryouuuuu", "1605614978359301": "foryourepage", "1674893053234177": "fypdongggggggg", "1673115521012737": "brandafyp", "1643442074587137": "sgfyp", "1654407569745925": "fypnails", "1644044404124677": "animefyp", "1626287748447237": "antifyp", "1647967591256070": "fypcouple", "1628662386826245": "fyp", "1667758271570950": "fypplppppppppppp", "1668812910222338": "fypdonggggggg", "1644109611442182": "fypofficial", "1641655411270661": "fyppagee", "1623302502209542": "foryounails", "1661947223506946": "berandafyp", "1674584508176390": "fyp viral", "13082896": "foryoupaige", "1656991624902662": "fypforyoupage", "1685059559254018": "foryoupageforeveryone\u2661", "1683703956260866": "fypjebal", "1631348850976774": "fyp", "1637342470396934": "fyp", "1619120300969989": "fyp", "1697420444830726": "follow me and t s g fyp", "1659700661828610": "Fyp post", "1662491668570113": "masukberanda type", "1662601160390657": "moviesforyou", "1646335221298182": "fypindonesia", "1689637217346566": "mynameistiggergetmeonthefyp", "1647234084365318": "fypbr", "1619679937718277": "plsfyp", "1628190219645957": "foryoupagedoesntwork", "1649139584636934": "fyppppppppppppppppppppp", "1620500486126597": "minecraftfyp", "1634943047845893": "foryoupage", "1634950588112902": "foryoupage", "1631522291667974": "forfyp", "1615032447947782": "entertainmentforyou", "1619084880059397": "foryousounds", "1597916865387525": "foryou1", "1604552984408070": "foryoupageee", "1603600556098566": "foryoupg", "1634937292926981": "foryoupage", "1645703110543366": "fypdoesntworkbutidoitanyways", "1664495475082242": "foryoupageofficiall", "1637407748596742": "fyp", "1606872592930822": "foryourpagee", "1655316819776518": "mexicanfyp", "1635204099943426": "fypsounds", "1654857781475333": "fyp viral", "1635848705845253": "fypyoupage", "1673560732511237": "fypdiesntwork", "1644298750203905": "tiktokjokesforyou", "1634369597616134": "viralfyp", "61667223": "fyps", "1617590735671301": "fypfor", "1662784958109697": "happylifefyp", "1609595389571077": "foryoupagetiktok", "1607061020406789": "foryoucomedy", "1647840803266566": "fypfootball", "1658756653576198": "fypdosentwork", "1654665482492933": "xyzbcafyp", "1648715358635009": "berandafyp", "1704170275899398": "fypodoentwork", "1684082479714306": "fyp viralvideo", "1702755085236229": "bewhoyouareforyourpride", "1632002941088774": "fypviral", "78768996": "memesforyou", "1642613516590086": "fypppppppppppppppppp", "1659949481334789": "fypforyoupage\u30b7", "1609273039298565": "foryoupag\u0435", "1650742742654982": "fyp2020", "33971256": "foryouofficial", "1627610222006278": "canesaufforyou", "1698955398675461": "fyppppgreyhound", "1666593428398085": "fypviral", "1617425133872134": "foryoupageoffical", "1623383052219414": "fypthis", "1592743307939841": "tiktokforyou", "1666847537269762": "animegirlforyou", "1603204382206981": "fypg", "7107602": "foryouforyou", "1662807921131525": "2kfyp", "1604142815383557": "foryoupgage", "1620267499925509": "fypls", "1611853076456450": "viralforyou", "1656845043434502": "fypsoccer", "1658673609103366": "fyp21", "1610265502020613": "foryourpagechallenge", "1648042687613958": "fypdontwork", "1624390188065798": "foryoufood", "1666458039063553": "fypmalaysia", "1622662438146694": "notfyp", "1670686468547590": "justgetthisonfyp", "1703891853128710": "foreverforyou", "1635213389964293": "fypit", "1602878675792901": "getmeontheforyoupage", "1632581882862593": "fypph", "1620763223578790": "foryoupakistan", "1666844905635846": "japanfyp", "1704810616802313": "azrulalwaysfyp", "1694852740601861": "fypshortclips", "1686358467262466": "fyppyfyp", "1640768557032454": "fyp", "1649766571425797": "carfyp", "1604276726661125": "foryoupagethis", "1655495140363270": "fyppppls", "1690407489009665": "standwithkaahmir_foryoupage_mu", "442854": "foryouu", "1618323884896262": "tiktokpageforyou", "1634937149617158": "foryoupage", "1637252569181189": "fypdogs", "1608973297173509": "foryoupageforever", "1606878050264069": "onyourforyoupage", "1630155480914949": "fyppppp", "1679267549421570": "fypdoge", "12009": "justforyou", "1670093194164230": "fypdoenstwork", "1679511090156546": "fypdonggggggg", "1667758536220673": "fypdongggg", "1624022682062853": "fyp", "1604302285252613": "fypage", "1639336981017605": "fyppleasetiktok", "1605919148785669": "ffyp", "1598498371111942": "foryourpage", "1610847197102085": "foryoutiktok", "364659": "foryour", "1615796627818502": "foryourdogpage", "1675567091586054": "fyppoppppppppppppppppppppp", "1616966643636229": "fypp", "1626088819377157": "fyfyp", "1635214780493826": "fypthisss", "1634449307607045": "goonthefyp", "1701341399285766": "foryourpride\ud83c\udff3\ufe0f\u200d\ud83c\udf08", "11899407": "artforyou", "1657052815966210": "fypberandatiktok", "1651393084389381": "fypdosntwork", "1625103941893125": "foryoupet", "1640815314240518": "fypagechallenge", "1600360681543685": "getthisontheforyoupage", "1603987329456134": "foryouapge", "1592201096750085": "foryou", "1634967704187910": "fyp", "1679357203568641": "mpesfypgamw", "1672400727355394": "berandafyp ya", "1670139895030786": "funnycontentforyou", "1664068495767558": "mtbforyou", "1617418873150470": "fyppls", "1635070555641861": "foryoupage", "1627524680056837": "fyppppppppp", "22091782": "foryoup", "1603502040695813": "foryouoage", "1664119477821441": "fyp viral", "1639699601016837": "fyp", "1666841966392325": "fyp2021", "1606788550284294": "robloxforyou", "1625820133083142": "fypfypfypfyp", "1665036454423553": "fyp foryoupage tiktok", "1616155509242885": "foryoupagebro", "1650370082138113": "foryouoffical", "1659685565791237": "koreafyp", "1655841212503045": "fypfoodie", "1673852121933830": "foryoupagedosntwork", "1682876476545042": "fakesnapsforyou", "1623927366993925": "fyp1", "1669636561460229": "xyzbcafyp", "1604284546288646": "foryoupages"} -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/main.py -------------------------------------------------------------------------------- /ngrok.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/ngrok.exe -------------------------------------------------------------------------------- /proxy_auth_plugin.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/proxy_auth_plugin.zip -------------------------------------------------------------------------------- /proxy_auth_plugin/background.js: -------------------------------------------------------------------------------- 1 | 2 | var config = { 3 | mode: "fixed_servers", 4 | rules: { 5 | singleProxy: { 6 | scheme: "http", 7 | host: "45.95.96.132", 8 | port: parseInt(8691) 9 | }, 10 | bypassList: ["localhost"] 11 | } 12 | }; 13 | 14 | chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); 15 | 16 | function callbackFn(details) { 17 | return { 18 | authCredentials: { 19 | username: "PLACEHOLDER", 20 | password: "PLACEHOLDER" 21 | } 22 | }; 23 | } 24 | 25 | chrome.webRequest.onAuthRequired.addListener( 26 | callbackFn, 27 | {urls: [""]}, 28 | ['blocking'] 29 | ); 30 | -------------------------------------------------------------------------------- /proxy_auth_plugin/manifest.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "version": "1.0.0", 4 | "manifest_version": 2, 5 | "name": "Chrome Proxy", 6 | "permissions": [ 7 | "proxy", 8 | "tabs", 9 | "unlimitedStorage", 10 | "storage", 11 | "", 12 | "webRequest", 13 | "webRequestBlocking" 14 | ], 15 | "background": { 16 | "scripts": ["background.js"] 17 | }, 18 | "minimum_chrome_version":"22.0.0" 19 | } 20 | -------------------------------------------------------------------------------- /src/Proxy.py: -------------------------------------------------------------------------------- 1 | """ 2 | File access Webshare.io API to get required proxy data 3 | """ 4 | 5 | import requests 6 | from src.DatabaseHelper import * 7 | 8 | APIKEY = "PLACEHOLDER" 9 | 10 | base_path = Path(__file__).parent 11 | file_path = (base_path / "../utilities/db_credentials.json").resolve() 12 | with open(file_path) as file: 13 | db_credentials = json.load(file) 14 | 15 | conn = psycopg2.connect( 16 | host=db_credentials.get('host'), 17 | database=db_credentials.get('database'), 18 | user=db_credentials.get('user'), 19 | password=db_credentials.get('password')) 20 | cur = conn.cursor() 21 | 22 | 23 | def proxy(countries): 24 | country_string = countries[0] 25 | if len(countries) > 1: 26 | for country in countries[1:]: 27 | country_string = country_string + "-" + country 28 | response = requests.get("https://proxy.webshare.io/api/proxy/list/?countries=" + country_string, 29 | headers={"Authorization": "Token %s" % APIKEY}) 30 | proxy_data = {} 31 | proxy_data['username'] = 'PLACEHOLDER' 32 | proxy_data['password'] = 'PLACEHOLDER' 33 | proxies = {} 34 | for proxy in response.json()['results']: 35 | if proxy['country_code'] in proxies: 36 | proxies[proxy['country_code']] = proxies.get(proxy['country_code']) + [ 37 | [proxy['proxy_address'], proxy['ports']['http']]] 38 | else: 39 | proxies[proxy['country_code']] = [[proxy['proxy_address'], proxy['ports']['http']]] 40 | proxy_data['proxies'] = proxies 41 | return proxy_data 42 | 43 | 44 | def get_db_proxy(country): 45 | database = DatabaseHelper() 46 | proxy_host, proxy_port = database.get_active_proxy_from_db(country) 47 | return proxy_host, proxy_port 48 | 49 | 50 | def update_proxy_db(): 51 | response = requests.get("https://proxy.webshare.io/api/proxy/list/", headers={"Authorization": "Token %s" % APIKEY}) 52 | sql = """insert into proxies(host, port, is_blocked, country, currently_used, start_usage) 53 | values(%s,%s,%s,%s,%s, current_timestamp) on conflict on constraint proxies_pkey do nothing""" 54 | for proxy in response.json()['results']: 55 | cur.execute(sql, (proxy.get('proxy_address'), proxy['ports'].get('http'), 56 | 'false', proxy.get('country_code'), 'false',)) 57 | conn.commit() 58 | cur.close() 59 | 60 | 61 | def find_disposable_proxy(country): 62 | 63 | disposable_proxies = [] 64 | proxy_data = proxy([country]).get('proxies').get(country) 65 | sql = """select host, port from proxies where user_using_this_proxy is null and country = %s""" 66 | cur.execute(sql, (country,)) 67 | results = cur.fetchall() 68 | for host_port in results: 69 | host = host_port[0].strip() 70 | port = host_port[1].strip() 71 | if host in list(host[0] for host in proxy_data) and port in list(str(host[1]) for host in proxy_data): 72 | disposable_proxies.append([host, port]) 73 | return disposable_proxies 74 | 75 | 76 | def get_new_proxy(country): 77 | # get new proxy 78 | proxy_data = proxy([country]).get('proxies').get(country) 79 | sql = """select host from proxies""" 80 | cur.execute(sql, ) 81 | results = cur.fetchall() 82 | for host_port in proxy_data: 83 | if host_port[0] not in list(result[0].strip() for result in results): 84 | host = host_port[0] 85 | port = host_port[1] 86 | return host, port 87 | 88 | 89 | def delete_proxy_in_db(host, port): 90 | # delete disposable proxy 91 | sql = """delete from d1rpgcvqcran0q.public.proxies where host = %s and port = %s""" 92 | cur.execute(sql, (host, port,)) 93 | conn.commit() 94 | 95 | 96 | def update_db_for_user(host, port, user): 97 | sql = """update proxies set user_using_this_proxy = %s 98 | where host = %s and port = %s""" 99 | cur.execute(sql, (user, host, port,)) 100 | conn.commit() 101 | 102 | 103 | def proxies_maintenance(): 104 | response = requests.get("https://proxy.webshare.io/api/proxy/list", headers={"Authorization": f"Token {APIKEY}"}) 105 | all_proxies = response.json() 106 | 107 | sql1 = """select host, port from d1rpgcvqcran0q.public.proxies where legacy != true""" 108 | cur.execute(sql1, ) 109 | results = cur.fetchall() 110 | db_proxies = [] 111 | for item in results: 112 | host = item[0].strip() 113 | port = item[1].strip() 114 | db_proxies.append([host, port]) 115 | 116 | proxy_not_in_db = [] 117 | for proxy in all_proxies.get('results'): 118 | if proxy.get('proxy_address') not in list(host[0] for host in db_proxies): 119 | proxy_not_in_db.append([proxy.get('proxy_address'), 120 | proxy.get('ports').get('http'), 121 | proxy.get('country_code')]) 122 | 123 | proxy_not_available = [] 124 | for proxy_db in db_proxies: 125 | if proxy_db[0] not in list(proxy.get('proxy_address') for proxy in all_proxies.get('results')): 126 | proxy_not_available.append([proxy_db[0], proxy_db[1]]) 127 | 128 | # update db: add all missing proxies 129 | sql2 = """insert into d1rpgcvqcran0q.public.proxies(host, port, country) values(%s,%s,%s) 130 | on conflict on constraint proxies_pkey do nothing""" 131 | for proxy in proxy_not_in_db: 132 | cur.execute(sql2, (proxy[0], proxy[1], proxy[2],)) 133 | conn.commit() 134 | 135 | # update db: delete all proxies that no longer exist on webshare.io 136 | for proxy in proxy_not_available: 137 | delete_proxy_in_db(proxy[0], proxy[1]) 138 | -------------------------------------------------------------------------------- /src/SMSHandler.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from flask import Flask, request, redirect 4 | from twilio.twiml.messaging_response import MessagingResponse 5 | 6 | import os 7 | from twilio.rest import Client 8 | 9 | class SMSHandler: 10 | """ 11 | Using the mobile phone service Twilio through their API this class allows to create new phone number for a specific 12 | country, receive all SMS from specific phone number, receive newest SMS from specific phone number, filter out 13 | verification code from a SMS 14 | """ 15 | def __init__(self, database): 16 | self.database = database 17 | account_sid = 'PLACEHOLDER ACCOUNT_SID' 18 | auth_token = 'PLACEHOLDER AUTH_TOKEN' 19 | self.client = Client(account_sid, auth_token) 20 | 21 | def get_cheapest_available_phone_number(self, country): 22 | """ 23 | Get the cheapest available phone number for a country 24 | :param country: 25 | :return: 26 | """ 27 | 28 | def create_phone_number(self, country): 29 | """ 30 | Create phone number for a specific country at the lowest price 31 | :param country: 32 | :return: 33 | """ 34 | 35 | def get_all_sms(self, phone_number): 36 | """ 37 | Receiving all SMS for specific phone number 38 | :param phone_number: 39 | :return: 40 | """ 41 | messages = {} 42 | for message in self.client.messages.list(to=phone_number): 43 | messages[str(message.date_created)] = message.body 44 | return messages 45 | 46 | def get_newest_sms_body(self, phone_number, phone_number_country_prefix_numerous): 47 | """ 48 | Receiving newest SMS for specific phone number 49 | :param phone_number_country_prefix_numerous: 50 | :param phone_number: 51 | :return: 52 | """ 53 | time.sleep(10) 54 | adjusted_phone_number = str(phone_number_country_prefix_numerous) + phone_number 55 | return self.client.messages.list(to=adjusted_phone_number)[0].body 56 | 57 | def get_verification_code(self, test_user_id, phone_number, phone_number_country_prefix_numerous): 58 | """ 59 | Return verification code from TikTok SMS. Attention, sometimes Twilio is quite slow, so the bot has to double 60 | check if received verification code is not already known. If that is the case, the bot has to wait a few more 61 | seconds. 62 | :param test_user_id: 63 | :param phone_number_country_prefix_numerous: 64 | :param phone_number: 65 | :return: 66 | """ 67 | newest_message = self.get_newest_sms_body(phone_number, phone_number_country_prefix_numerous) 68 | 69 | # handle different verification codes 70 | verification_code = newest_message[9:13] 71 | if not verification_code.isdigit(): 72 | try: 73 | idx_start = newest_message.index('use') 74 | idx_end = newest_message.index('as') 75 | verification_code = newest_message[idx_start + 3:idx_end].strip() 76 | code = '' 77 | for char in verification_code: 78 | if char.isdigit(): 79 | code = code + char 80 | verification_code = code.strip() 81 | if not verification_code.isdigit(): 82 | raise ValueError("Verificaiton Code not digit.") 83 | except ValueError as e: 84 | print("SMS: " + newest_message) 85 | print("Error: no verification code provided by TikTok, resend code.") 86 | print("Value Error: " + str(e)) 87 | return "Trigger Resend" 88 | 89 | # check if verification different to previous one 90 | previous_code = self.database.get_previous_verification_code(test_user_id=test_user_id) 91 | if int(verification_code) == previous_code: 92 | print(f"Verification code {verification_code} seems to be too old for {test_user_id}, " 93 | f"fetching again in 10secs.") 94 | time.sleep(10) 95 | self.get_verification_code(test_user_id=test_user_id, 96 | phone_number=phone_number, 97 | phone_number_country_prefix_numerous=phone_number_country_prefix_numerous) 98 | else: 99 | self.database.update_verification_code(verification_code=verification_code, 100 | test_user_id=test_user_id) 101 | return verification_code 102 | 103 | 104 | -------------------------------------------------------------------------------- /src/TestCase1_Loc.py: -------------------------------------------------------------------------------- 1 | from .WebHelper import * 2 | 3 | # idea: benchmarking level of different_posts_noise when checking for location based content 4 | 5 | -------------------------------------------------------------------------------- /src/TestRun.py: -------------------------------------------------------------------------------- 1 | 2 | from src.DatabaseHelper import * 3 | 4 | 5 | class TestRun: 6 | """ 7 | class inserts extracted data to database 8 | - creates test run 9 | :param test_data: 10 | { 11 | "testuserinfo": [ 12 | {"testuserid": 1, "email": "bertman@mailinator.com", "password": "%J0ftE999yQVg2"}, 13 | {"testuserid": 2, "email": "loc2021@mailinator.com", "password": "%@NreeHIwb*55O5@zD48"} 14 | ], 15 | "description": "filtering different_posts_noise, same location, same language, two different accounts", 16 | "proxy": str(proxy_US.get('proxy_host')) + ":" + str(proxy_US.get('proxy_port')), 17 | "browser_language": "en" 18 | } 19 | """ 20 | 21 | def __init__(self, test_data): 22 | self.test_run_id = None 23 | self.database = DatabaseHelper() 24 | self.test_data = test_data 25 | self.create_test_run() 26 | 27 | def __enter__(self): 28 | print(f"TestRun {self.test_run_id} started.") 29 | return self 30 | 31 | def __exit__(self, exc_type, exc_val, exc_tb): 32 | print(f"TestRun {self.test_run_id} executed.") 33 | 34 | def create_test_run(self): 35 | """ 36 | Create test run data to store collected data correctly 37 | :param 38 | :return: 39 | """ 40 | try: 41 | # get test run id, set test_run_id as set 42 | self.database.cur.execute(""" 43 | with next_id as ( 44 | select * from testrunids 45 | where set = false 46 | order by id asc 47 | limit 1 48 | ) 49 | 50 | update testrunids 51 | set set = true 52 | where id = (select id from next_id)""") 53 | self.database.conn.commit() 54 | 55 | # get id 56 | self.database.cur.execute(""" 57 | select id from testrunids 58 | where set = true 59 | order by id desc 60 | limit 1""") 61 | self.test_run_id = self.database.cur.fetchone()[0] 62 | 63 | for item in self.test_data: 64 | test = """insert into testrun(id,testuserid,ip_used,country,browser_language) 65 | values(%s,%s,%s,%s,%s) on conflict on constraint testrun_pkey do nothing""" 66 | self.database.cur.execute(test, ( 67 | self.test_run_id, # id 68 | item.get('test_user_id'), # test_user_id 69 | str(item.get("proxy").get('proxy_host')) + ':' + 70 | str(item.get('proxy').get('proxy_port')), # ip_used 71 | item.get('proxy').get("country"), 72 | item.get("browser_language"), 73 | )) 74 | self.database.conn.commit() 75 | except (psycopg2.InterfaceError, psycopg2.OperationalError) as cursor_error: 76 | print(cursor_error) 77 | print("Instantiating db connection and trying to create test run again.") 78 | self.database = DatabaseHelper() 79 | self.create_test_run() 80 | except (Exception, psycopg2.DatabaseError) as error: 81 | print(error) 82 | raise Exception("Test run could not be created.") 83 | 84 | def store_test_duration(self, duration, test_user_id): 85 | try: 86 | sql = """update testrun 87 | set duration = (%s) where id = (%s) and testuserid = (%s)""" 88 | self.database.cur.execute(sql, (duration, self.test_run_id, test_user_id)) 89 | self.database.conn.commit() 90 | except (psycopg2.InterfaceError, psycopg2.OperationalError) as cursor_error: 91 | print(cursor_error) 92 | print("Instantiating db connection and trying to store test run data again.") 93 | self.database = DatabaseHelper() 94 | self.store_test_duration(duration, test_user_id) 95 | except (Exception, psycopg2.DatabaseError) as error: 96 | print(error) 97 | raise Exception("Post data could not be stored") 98 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/src/__init__.py -------------------------------------------------------------------------------- /src/proxy_auth_plugin.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/src/proxy_auth_plugin.zip -------------------------------------------------------------------------------- /utilities/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboeke/TikTok-Personalization-Investigation/762164169d5faec33d0d57250b170a0b60d763ac/utilities/.DS_Store -------------------------------------------------------------------------------- /utilities/country_prefix.json: -------------------------------------------------------------------------------- 1 | { 2 | "United States": "+1", 3 | "United Kingdom": "+44", 4 | "Germany": "+49", 5 | "Canada": "+1", 6 | "Switzerland": "+41" 7 | } -------------------------------------------------------------------------------- /utilities/proxy.zip/background.js: -------------------------------------------------------------------------------- 1 | // from https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&cad=rja&uact=8&ved=2ahUKEwjJiYiK8arvAhWGCuwKHQWzABgQFjADegQIAhAD&url=https%3A%2F%2Fwonderproxy.com%2Fblog%2Fa-step-by-step-guide-to-setting-up-a-proxy-in-selenium%2F&usg=AOvVaw2u3tm5J7KAE_B2OlkEhK_7 2 | 3 | var config = { 4 | mode: "fixed_servers", 5 | rules: { 6 | singleProxy: { 7 | scheme: "http", 8 | host: "209.127.191.180", 9 | port: parseInt(9279) 10 | }, 11 | bypassList: ["foobar.com"] 12 | } 13 | }; 14 | 15 | chrome.proxy.settings.set({value: config, scope: "regular"}, function () { 16 | }); 17 | 18 | function callbackFn(details) { 19 | return { 20 | authCredentials: { 21 | username: "PLACEHOLDER", 22 | password: "PLACEHOLDER" 23 | } 24 | }; 25 | } 26 | 27 | chrome.webRequest.onAuthRequired.addListener( 28 | callbackFn, 29 | {urls: [""]}, 30 | ['blocking'] 31 | ); --------------------------------------------------------------------------------