├── .gitignore ├── LICENSE ├── README.md ├── docs └── example.gif ├── requirements.txt └── src ├── __init__.py ├── flaskapp.wsgi ├── model_generation ├── __init__.py ├── config.json ├── data │ ├── .gitkeep │ └── segmented_output │ │ └── .gitkeep ├── data_retrieval.py ├── modeling.py └── suggester.py ├── server.py └── static ├── css └── main.css ├── img └── arrow.svg ├── index.html └── js └── main.js /.gitignore: -------------------------------------------------------------------------------- 1 | config_override.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 John Klingelhofer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### 2024 Addendum 2 | 3 | This project will likely not work as it once did if anyone attempts to re-use it due to reddit API changes, I've long since deleted my Reddit account so the demo link below may no longer be functional. 4 | 5 | ### Overview 6 | 7 | [Click here to see it in action](http://159.89.246.81/) 8 | 9 | This is a recommendation engine for subreddits based on the subreddits to which the user's last 300 comments and 100 posts were submitted. In order to achieve this, these histories were pulled for 200,000 users through the reddit API, and a model in Keras was trained on vectors of each user's group of subreddits to establish the relationships between subreddits. 10 | 11 | ![](docs/example.gif) 12 | 13 | Once trained, usernames can be submitted to this model through a basic Flask API. 14 | 15 | ### Running locally 16 | 17 | Due to the size of the size of the data surpassing GitHub file limits, anyone wishing to run this locally will need to go through their own model retrieval and training steps. 18 | 19 | The steps are as follows: 20 | 21 | 1. Install the required packages in `requirements.txt` 22 | 2. Generate Reddit API keys and put them into the `config.json`, or put them into a config_override file. 23 | 3. While in the `config.json`, adjust the parameters for the model as desired, such as number of users to use in the 24 | generation of training data, and the number of comments/submissions to go through for each user in generating this data. 25 | 4. Run `data_retrieval.py`, this is the longest step in the process, and may take several hours to get all the needed 26 | user information. 27 | 5. Run `modeling.py` to generate the model. On a machine with a recent NVIDIA GPU and the proper setup to utilize it, 28 | training process shouldn't take more than a few minutes. 29 | 6. Run `server.py`. 30 | -------------------------------------------------------------------------------- /docs/example.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klingj3/subreddit_suggester/80e8aa0b5e240d09041b8746f038954269d3b770/docs/example.gif -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==1.1.1 2 | Keras==2.3.1 3 | Keras-Applications==1.0.8 4 | Keras-Preprocessing==1.1.0 5 | tensorboard==2.1.0 6 | tensorflow==2.1.2 7 | tensorflow-estimator==2.1.0 8 | numpy==1.18.1 9 | pandas==1.0.3 10 | praw==6.5.1 11 | prawcore==1.0.1 12 | progressbar2==3.50.1 13 | Werkzeug==0.16.0 -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klingj3/subreddit_suggester/80e8aa0b5e240d09041b8746f038954269d3b770/src/__init__.py -------------------------------------------------------------------------------- /src/flaskapp.wsgi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import os 3 | import sys 4 | import logging 5 | 6 | def execfile(filename): 7 | globals = dict( __file__ = filename ) 8 | exec( open(filename).read(), globals ) 9 | 10 | activate_this = os.path.join('/var/www/subreddit_suggestor/src/venv/bin', 'activate_this.py' ) 11 | execfile( activate_this ) 12 | 13 | logging.basicConfig(stream=sys.stderr) 14 | sys.path.insert(0,"/var/www/subreddit_suggestor/src") 15 | os.chdir('/var/www/subreddit_suggestor/src') 16 | from server import app as application 17 | application.secret_key = os.getenv('SECRET_KEY', 'for dev') -------------------------------------------------------------------------------- /src/model_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klingj3/subreddit_suggester/80e8aa0b5e240d09041b8746f038954269d3b770/src/model_generation/__init__.py -------------------------------------------------------------------------------- /src/model_generation/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "client_id": "YOUR CLIENT ID HERE!", 3 | "client_secret": "YOUR CLIENT SECRET HERE!", 4 | "usernames_path": "model_generation/data/segmented_output/random_usernames_{i}.txt", 5 | "subreddits_score_path": "model_generation/data/segmented_output/scored_subreddits_{i}.json", 6 | "num_usernames": 200000, 7 | "combined_user_to_subreddit_score_path": "model_generation/data/user_to_subreddit_score.json", 8 | "rank_to_sfw_status": "model_generation/data/rank_to_sfw_status.json", 9 | "rank_to_subreddit_path": "model_generation/data/rank_to_subreddit.json", 10 | "max_subreddits_in_data": 50000, 11 | "max_subreddits_in_model": 15000, 12 | "max_subreddits_per_user_vector": 50, 13 | "method": "hot", 14 | "model_path": "model_generation/data/model_output_{method}.h5", 15 | "test_pct": 0.1 16 | } -------------------------------------------------------------------------------- /src/model_generation/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klingj3/subreddit_suggester/80e8aa0b5e240d09041b8746f038954269d3b770/src/model_generation/data/.gitkeep -------------------------------------------------------------------------------- /src/model_generation/data/segmented_output/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klingj3/subreddit_suggester/80e8aa0b5e240d09041b8746f038954269d3b770/src/model_generation/data/segmented_output/.gitkeep -------------------------------------------------------------------------------- /src/model_generation/data_retrieval.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | from prawcore.exceptions import Forbidden, NotFound 3 | 4 | import json 5 | import praw 6 | import progressbar as pg 7 | import os 8 | 9 | 10 | class DataRetriever(object): 11 | """ 12 | Generate and format the data which is used to drive the model. 13 | """ 14 | 15 | def __init__(self, worker_no=0, num_workers=1): 16 | """ 17 | Load the config files and establish praw utility. 18 | :param worker_no: Int id of worker 19 | :param num_workers: Int number of workers, dictates division of labor between jobs. 20 | """ 21 | with open("model_generation/config.json", "r") as infile: # 22 | self.config = json.loads(infile.read()) 23 | if os.path.exists("model_generation/config_override.json"): 24 | with open("model_generation/config_override.json", "r") as infile: 25 | self.config.update(json.loads(infile.read())) 26 | 27 | self.reddit = praw.Reddit(user_agent="user", client_id=self.config["client_id"], 28 | client_secret=self.config["client_secret"]) 29 | 30 | if worker_no >= num_workers: 31 | raise ValueError(f"worker_no passed {worker_no} >= the number of workers") 32 | 33 | self.i, self.total_instances = worker_no, num_workers 34 | self.usernames_path = self.config["usernames_path"].format(i=self.i) 35 | 36 | def get_random_usernames(self): 37 | """ 38 | Get a random sample of usernames by randomly selecting subreddits and recent comments within that subreddit. 39 | :param number: Number of ids to retrieve 40 | :param ids_per_subreddit: Max number of ids per subreddit 41 | :param destination_file: The path of the text file to contain the exported ids. 42 | :return: None 43 | """ 44 | ids = set() 45 | 46 | number = int(self.config["num_usernames"]/self.total_instances) 47 | max_ids_per_subreddit = 1000 48 | 49 | if self.i == 0: 50 | print("Getting random ids...") 51 | print(number) 52 | bar = pg.ProgressBar(max_value=number) 53 | bar.update(0) 54 | 55 | forbidden_count = 0 56 | 57 | while len(ids) < number: 58 | try: 59 | subreddit_name = self.reddit.subreddit("random").display_name 60 | subreddit = self.reddit.subreddit(subreddit_name) 61 | if subreddit.subscribers > 10000: # For speed, ignore subreddits with very few subscribers as user origins. 62 | old_id_num = len(ids) 63 | for submission in subreddit.top(limit=10): 64 | if len(submission.comments): 65 | if submission.author: 66 | ids.add((str(submission.author), subreddit_name)) 67 | for comment in submission.comments.list(): 68 | try: 69 | if comment.author: 70 | ids.add((str(comment.author), subreddit_name)) 71 | if len(ids) - old_id_num > max_ids_per_subreddit: 72 | break 73 | except AttributeError: 74 | pass 75 | # For clarity, only display the status bar updates for the first worker. 76 | if self.i == 0: 77 | bar.update(min(number, len(ids))) 78 | except (Forbidden, NotFound): 79 | forbidden_count += 1 80 | if forbidden_count > 100: 81 | print("Max exceptions exceeded, stopping remainder of auience selection") 82 | break 83 | 84 | with open(self.usernames_path, "w") as outfile: 85 | outfile.write(json.dumps(list(ids)[:number])) 86 | 87 | def generate_user_subreddits_data(self): 88 | """ 89 | Get a list of the distinct subreddits that the reddit accounts in a particular file have submitted or commented 90 | within. 91 | :param path_to_usernames: String path to the list of usernames. 92 | :param path_to_key_scores: Path to the output of ids to strings. 93 | :param path_to_decoder_json: Path to a JSON for decoding the strings. 94 | :return: None 95 | """ 96 | try: 97 | with open(self.usernames_path, "r") as infile: 98 | usernames = json.loads(infile.read()) 99 | except FileNotFoundError: 100 | usernames = {} # Just pass so other threads can proceed normally. 101 | 102 | username_to_subreddit_scores = dict() 103 | if self.i == 0: # For clarity, just show the status for one of the jobs. 104 | print("Getting subreddit visitation data...") 105 | work_range = pg.progressbar(usernames) 106 | else: 107 | work_range = usernames 108 | 109 | for username, origin_subreddit in work_range: 110 | subreddit_scores = self.get_distinct_subreddits_for_user(username, excluded_subreddit=origin_subreddit) 111 | if subreddit_scores: 112 | username_to_subreddit_scores[username] = subreddit_scores 113 | 114 | with open(self.config["subreddits_score_path"].format(i=self.i), "w") as outfile: 115 | outfile.write(json.dumps(username_to_subreddit_scores)) 116 | 117 | def get_distinct_subreddits_for_user(self, username, excluded_subreddit=None): 118 | """ 119 | Get a list of distinct subreddits a user has interacted with. 120 | :param username: String username of the user for which activity will be evaluated. 121 | :param excluded_subreddit: String name of subreddit to not be included in returned values or factor into counts. 122 | This value is normally used to prevent the subreddit from which a username was pulled from appearing in the output, 123 | which can skew the popularity metrics towards rarer randomly chosen subreddits. 124 | :return: Dict in format { 125 | String subreddit name: Float % of reddit interactions (submissions or comments) by a user which were in 126 | that subreddit. 127 | } on success, empty dict if API exception encountered 128 | """ 129 | redditor = self.reddit.redditor(username) 130 | try: 131 | comment_subreddit_counts = Counter([str(comment.subreddit) for comment in redditor.comments.new(limit=300)]) 132 | del comment_subreddit_counts[excluded_subreddit] 133 | except (Forbidden, NotFound): 134 | return {} 135 | 136 | try: 137 | submission_subreddit_counts = Counter([str(submission.subreddit) for submission in 138 | redditor.submissions.new(limit=100)]) 139 | del submission_subreddit_counts[excluded_subreddit] 140 | except (Forbidden, NotFound): 141 | return {} 142 | 143 | subreddits = set(comment_subreddit_counts.keys()).union(set(submission_subreddit_counts.keys())) 144 | total_actions = sum(comment_subreddit_counts.values()) + sum(submission_subreddit_counts.values()) 145 | 146 | return {subreddit: (comment_subreddit_counts[subreddit] + submission_subreddit_counts[subreddit])/total_actions 147 | for subreddit in subreddits} 148 | 149 | def combine_and_prep_data(self, minimum_popularity=None, highest_num=64): 150 | """ 151 | Taking the individual files produced in the generate subreddits for individual users step, combine them into: 152 | - For each user, a list of tuples of (Int, Float) with the Int being the popularity ranking of a subreddit 153 | and the Float what percentage of the user"s recent activity was in that subreddit. This file is saved 154 | to the value under key "combined_user_to_subreddit_score_path" in the config file. 155 | - Dump a JSON of {Integer Ranking: Subreddit name} for the subreddits visited by the users, where their 156 | popularity is above the minimum popularity ranking. 157 | :return: None 158 | """ 159 | from collections import Counter 160 | 161 | if not minimum_popularity: 162 | minimum_popularity = self.config["max_subreddits_in_data"] 163 | 164 | combined_user_to_subreddit_scores = dict() 165 | subreddit_to_popularity = Counter() 166 | user_subreddit_score_directory = "/".join(self.config["subreddits_score_path"].split('/')[:-1]) 167 | for file in os.listdir(user_subreddit_score_directory): 168 | path = os.path.join(user_subreddit_score_directory, file) 169 | if ".json" in file and int(file.split('.')[0].split('_')[-1]) < highest_num: 170 | with open(path, "r") as infile: 171 | combined_user_to_subreddit_scores.update(json.loads(infile.read())) 172 | for subreddit_scores in combined_user_to_subreddit_scores.values(): 173 | for subreddit, score in subreddit_scores.items(): 174 | subreddit_to_popularity[subreddit] += score 175 | 176 | rank_to_subreddit = dict() 177 | for subreddit, _ in pg.progressbar(subreddit_to_popularity.most_common(minimum_popularity)): 178 | rank_to_subreddit[len(rank_to_subreddit)+1] = subreddit 179 | 180 | with open(self.config["rank_to_subreddit_path"], "w") as outfile: 181 | outfile.write(json.dumps(rank_to_subreddit)) 182 | 183 | subreddit_to_rank = {subreddit: rank for rank, subreddit in rank_to_subreddit.items()} 184 | 185 | output_data = {i: 186 | [(subreddit_to_rank[subreddit], score) for subreddit, score in user_subreddit_score.items() if subreddit 187 | in subreddit_to_rank] for i, user_subreddit_score in enumerate(combined_user_to_subreddit_scores.values())} 188 | with open(self.config["combined_user_to_subreddit_score_path"], "w") as outfile: 189 | outfile.write(json.dumps(output_data)) 190 | 191 | def generate_sfw_subreddit_info(self): 192 | from time import sleep 193 | with open(self.config['rank_to_subreddit_path'], 'r') as infile: 194 | rank_to_subreddit = json.loads(infile.read()) 195 | 196 | rank_to_sfw_status = dict() 197 | for rank, subreddit in pg.progressbar(list(rank_to_subreddit.items())[:self.config['max_subreddits_in_model']]): 198 | for _ in range(10): # Max retries 199 | try: 200 | rank_to_sfw_status[rank] = not self.reddit.subreddit(subreddit).over18 201 | break 202 | except Exception: 203 | sleep(10) 204 | pass 205 | else: 206 | print("Max retries exceeded on subreddit " + subreddit) 207 | # For safety, assume false. 208 | rank_to_sfw_status[rank] = False 209 | 210 | with open(self.config['rank_to_sfw_status'], 'w') as outfile: 211 | outfile.write(json.dumps(rank_to_sfw_status)) 212 | 213 | if __name__ == "__main__": 214 | import threading 215 | import os 216 | 217 | os.chdir('..') 218 | 219 | def get_data_slice(i, j): 220 | data_retriever = DataRetriever(worker_no=i, num_workers=j) 221 | # data_retriever.get_random_usernames() 222 | data_retriever.generate_user_subreddits_data() 223 | 224 | max_threads = 64 225 | jobs = [] 226 | print(f"Starting work on {max_threads} jobs.") 227 | for i in range(max_threads): 228 | p = threading.Thread(target=get_data_slice, args=(i, max_threads)) 229 | jobs.append(p) 230 | p.start() 231 | 232 | for j in jobs: 233 | j.join() 234 | 235 | data_retriever = DataRetriever(worker_no=0, num_workers=1) 236 | data_retriever.combine_and_prep_data(highest_num=max_threads) 237 | 238 | data_retriever.generate_sfw_subreddit_info() -------------------------------------------------------------------------------- /src/model_generation/modeling.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import os 4 | import tensorflow as tf 5 | 6 | from keras.models import Sequential 7 | from keras.layers import Dense, Dropout, Embedding, Flatten, Activation, BatchNormalization 8 | from sklearn.model_selection import train_test_split 9 | 10 | 11 | class SuggestionModeler(object): 12 | """ 13 | A collection of functions to generate a model of subreddit suggestions from the data retreived in 14 | data_retrieval.py 15 | """ 16 | def __init__(self, force_retrain=False): 17 | self.session = tf.Session() 18 | self.graph = tf.get_default_graph() 19 | 20 | with open("model_generation/config.json", "r") as infile: 21 | self.config = json.loads(infile.read()) 22 | if os.path.exists("config_override.json"): 23 | with open("model_generation/config_override.json", "r") as infile: 24 | self.config.update(json.loads(infile.read())) 25 | 26 | self.subreddit_to_rank = dict() 27 | with open(self.config["rank_to_subreddit_path"], 'r') as infile: 28 | self.rank_to_subreddit = json.loads(infile.read()) 29 | self.rank_to_subreddit = {int(k): v for k, v in self.rank_to_subreddit.items()} 30 | for rank, subreddit in self.rank_to_subreddit.items(): 31 | self.subreddit_to_rank[subreddit] = rank 32 | with open(self.config['rank_to_sfw_status'], 'r') as infile: 33 | self.rank_to_sfw_status = json.loads(infile.read()) 34 | self.rank_to_sfw_status = {int(k): v for k, v in self.rank_to_sfw_status.items()} 35 | 36 | self.method = self.config["method"] 37 | self.model_path = self.config['model_path'].format(method=self.method) 38 | 39 | if self.method == "hot": 40 | model = Sequential() 41 | model.add(Dense(512, activation='relu', 42 | input_shape=(self.config['max_subreddits_in_model'], ))) 43 | model.add(Dropout(0.5)) 44 | model.add(Dense(self.config['max_subreddits_in_model'], activation='sigmoid')) 45 | model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['acc']) 46 | else: 47 | raise ValueError("'method' in config not well defined") 48 | 49 | self.model = model 50 | if force_retrain or not os.path.exists(self.model_path): 51 | model.summary() 52 | print("Preparing train/test data...") 53 | X, y = self.arrange_training_data(method=self.method) 54 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.config['test_pct']) 55 | 56 | train_data, test_data = (X_train, y_train), (X_test, y_test) 57 | print("Starting training process...") 58 | self.train_model(train_data, test_data) 59 | 60 | with self.graph.as_default(): 61 | with self.session.as_default(): 62 | self.model.load_weights(self.model_path) 63 | 64 | def arrange_training_data(self, method): 65 | import random 66 | 67 | with open(self.config["combined_user_to_subreddit_score_path"], 'r') as infile: 68 | user_subreddit_scores = json.loads(infile.read()) 69 | 70 | for k, scores in user_subreddit_scores.items(): 71 | user_subreddit_scores[k] = sorted(scores, key=lambda x: x[1], reverse=True) 72 | 73 | data_length, data_width = len(user_subreddit_scores), self.config['max_subreddits_in_model'] 74 | user_subreddit_scores = list(user_subreddit_scores.values()) 75 | random.shuffle(user_subreddit_scores) 76 | 77 | if method == 'hot': # Input vector is one-hot encoding. 78 | X = np.zeros((data_length, data_width), dtype=np.bool) 79 | for i, scores in enumerate(user_subreddit_scores): 80 | for subreddit_key, score in scores: 81 | if subreddit_key <= data_width: 82 | X[i][subreddit_key - 1] = True 83 | else: 84 | raise ValueError(f"Unhandled training data preparation method {method}") 85 | 86 | 87 | y = np.zeros((data_length, data_width), dtype=np.bool) 88 | for i, scores in enumerate(user_subreddit_scores): 89 | for subreddit_key, score in scores: 90 | if subreddit_key <= data_width: 91 | y[i][subreddit_key-1] = score > 0 92 | return X, y 93 | 94 | def arrange_user_data(self, user_data): 95 | user_data = {k: v for k, v in sorted(user_data.items(), key=lambda x: x[1], reverse=True) 96 | if 0 < self.subreddit_to_rank.get(k, -1) < self.config['max_subreddits_in_model']} 97 | if self.method == 'hot': 98 | data = np.zeros((1, self.config['max_subreddits_in_model']), dtype=np.bool) 99 | for subreddit_name, subreddit_score in user_data.items(): 100 | if subreddit_name in self.subreddit_to_rank: 101 | data[0][self.subreddit_to_rank[subreddit_name]-1] = subreddit_score > 0 102 | 103 | return data 104 | 105 | def train_model(self, train_data, test_data): 106 | X, y = train_data 107 | self.model.fit(X, y, epochs=5, batch_size=256, verbose=1) 108 | self.model.save(self.model_path) 109 | X, y = test_data 110 | scores = self.model.evaluate(X, y, verbose=1) 111 | print(self.model.metrics_names) 112 | print(scores) 113 | 114 | def get_user_predictions(self, user_data): 115 | arranged_data = self.arrange_user_data(user_data) 116 | user_known_subreddits = set(list(user_data.keys())) 117 | 118 | with self.graph.as_default(): 119 | with self.session.as_default(): 120 | predictions = self.model.predict(arranged_data)[0] 121 | 122 | predictions = [(self.rank_to_subreddit[i+1], round(float(score), 5), i) for i, score 123 | in enumerate(predictions) if self.rank_to_subreddit[i+1] not in user_known_subreddits \ 124 | and self.rank_to_sfw_status[i+1] and i > 200] 125 | predictions.sort(key=lambda x: x[1], reverse=True) 126 | return predictions 127 | 128 | 129 | if __name__ == '__main__': 130 | import os 131 | os.chdir('..') 132 | modeler = SuggestionModeler(True) 133 | 134 | -------------------------------------------------------------------------------- /src/model_generation/suggester.py: -------------------------------------------------------------------------------- 1 | from .data_retrieval import DataRetriever 2 | from .modeling import SuggestionModeler 3 | import json 4 | 5 | 6 | class Suggester(object): 7 | 8 | def __init__(self): 9 | self.retriever = DataRetriever() 10 | self.model = SuggestionModeler() 11 | 12 | def get_estimates_for_user(self, username): 13 | """ 14 | Given a username, generate a list of suggested subreddits they may enjoy based on their recent activity. 15 | :param username: String username 16 | :return: String dumped json in format { 17 | 'success': True or False, 18 | 'message: None or description of error, 19 | 'data': List of 200 subreddits ranked by confidence, values 20 | [String subreddit name, float confidence, int popularity rating] 21 | } 22 | """ 23 | username = username.strip() 24 | user_data = self.retriever.get_distinct_subreddits_for_user(username) 25 | if not user_data: 26 | return json.dumps({ 27 | 'success': True, 28 | 'message': 'No reddit data found for user ' + username 29 | }) 30 | res = self.model.get_user_predictions(user_data)[:200] 31 | return json.dumps({ 32 | 'success': True, 33 | 'data': res 34 | }) 35 | -------------------------------------------------------------------------------- /src/server.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from model_generation.suggester import Suggester 3 | from flask import Flask, send_file 4 | app = Flask(__name__) 5 | 6 | 7 | @app.before_first_request 8 | def load_model(): 9 | # Load the model for speed in subsequent calls. 10 | app.suggester = Suggester() 11 | 12 | 13 | @app.route("/api/suggestions/") 14 | def suggestions(username): 15 | return app.suggester.get_estimates_for_user(username) 16 | 17 | 18 | @app.route("/") 19 | def landing(): 20 | return send_file('static/index.html') 21 | 22 | 23 | if __name__ == '__main__': 24 | app.run(threaded=False) -------------------------------------------------------------------------------- /src/static/css/main.css: -------------------------------------------------------------------------------- 1 | html { 2 | height: 100%; 3 | background: linear-gradient(145deg, #151821, #2f3d42, #46535a); 4 | animation: background 3s infinite alternate ease-in-out; 5 | background-size: 200% 200%; 6 | } 7 | 8 | body { 9 | text-align: center; 10 | font-family: monospace; 11 | font-weight: lighter; 12 | color: white; 13 | position: fixed; 14 | width: 100%; 15 | } 16 | 17 | .button { 18 | cursor: pointer; 19 | border: 1px; 20 | border-color: rgba(255, 255, 255, 0.45); 21 | border-style: solid; 22 | border-radius: 4px; 23 | display: inline-block; 24 | font-weight: 600; 25 | width: 85px; 26 | padding: 15px 0; 27 | box-shadow: 0 0 0px rgba(253, 238, 255, 0.2); 28 | transition: 0.4s; 29 | } 30 | 31 | .button:hover { 32 | color: white; 33 | box-shadow: 0 0 20px rgba(253, 238, 255, 0.2); 34 | background-color: #849ead; 35 | } 36 | 37 | .loading-message { 38 | text-align: center; 39 | margin-top: 30px; 40 | } 41 | 42 | .main { 43 | max-width: 800px; 44 | width: 70%; 45 | margin-left: auto; 46 | margin-right: auto; 47 | } 48 | 49 | .main .header { 50 | margin-top: 10vh; 51 | } 52 | 53 | .main .description { 54 | text-align: left; 55 | width: 90%; 56 | margin-left: auto; 57 | margin-right: auto; 58 | } 59 | 60 | .main #username { 61 | width: 80%; 62 | padding: 15px; 63 | border: 1px solid #ccc; 64 | border-radius: 3px; 65 | margin-bottom: 10px; 66 | box-sizing: border-box; 67 | font-family: montserrat, sans-serif; 68 | color: #2C3E50; 69 | font-size: 13px; 70 | } 71 | 72 | .image-container { 73 | position: absolute; 74 | } 75 | 76 | #suggestion-table { 77 | height: 60vh; 78 | text-align: left; 79 | } 80 | 81 | #canvas { 82 | position: fixed; 83 | height: 100%; 84 | width: 100%; 85 | z-index: -1; 86 | } 87 | 88 | @keyframes background { 89 | 0%{background-position:0% 75%} 90 | 50%{background-position:100% 25%} 91 | 100%{background-position:25% 100%} 92 | } 93 | 94 | 95 | table{ 96 | width:100%; 97 | table-layout: fixed; 98 | } 99 | .tbl-header{ 100 | background-color: rgba(255,255,255,0.1); 101 | } 102 | .tbl-content{ 103 | height:50vh; 104 | overflow-x:auto; 105 | margin-top: 0px; 106 | border: 1px solid rgba(255,255,255,0.3); 107 | } 108 | th{ 109 | pointer-events: none; 110 | text-align: left; 111 | font-weight: 500; 112 | font-size: 12px; 113 | color: #fff; 114 | text-transform: uppercase; 115 | } 116 | tr:hover { 117 | background-color: rgba(255, 255, 255, 0.06); 118 | cursor: pointer; 119 | } 120 | 121 | td{ 122 | padding: 15px; 123 | text-align: left; 124 | vertical-align:middle; 125 | font-weight: 300; 126 | font-size: 12px; 127 | color: #fff; 128 | border-bottom: solid 1px rgba(255,255,255,0.1); 129 | } 130 | /* for custom scrollbar for webkit browser*/ 131 | 132 | td { 133 | width: 10px; 134 | } 135 | 136 | th { 137 | width: 54px; 138 | padding: 0px; 139 | } 140 | 141 | td+td { 142 | width: auto; 143 | } 144 | 145 | th+th { 146 | width: auto; 147 | padding: 20px 0px 20px 6px; 148 | } 149 | 150 | ::-webkit-scrollbar { 151 | width: 6px; 152 | } 153 | ::-webkit-scrollbar-track { 154 | -webkit-box-shadow: inset 0 0 6px rgba(0,0,0,0.2); 155 | } 156 | ::-webkit-scrollbar-thumb { 157 | -webkit-box-shadow: inset 0 0 6px rgba(0,0,0,0.2); 158 | } -------------------------------------------------------------------------------- /src/static/img/arrow.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 9 | 10 | -------------------------------------------------------------------------------- /src/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Subreddit Suggester 6 | 7 | 8 | 9 |
10 |
11 |
12 |
13 |

Subreddit Suggester

14 |
15 |
16 |

17 | Given a username, suggest new subreddits a user is likely to comment or submit items in based on the 18 | subreddits of their last 300 comments and 100 submissions. 19 |

20 |
21 |
22 | 23 |
24 | Suggest! 25 |
26 |
27 |
28 |
29 |
30 |
31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/static/js/main.js: -------------------------------------------------------------------------------- 1 | /*jshint esversion: 6 */ 2 | 3 | /* Submit an API call to get the most popular subreddits for a particular username */ 4 | function getSubreddits(username) { 5 | d3.select("#suggestion-table").remove(); 6 | d3.select("#suggestion-message").remove(); 7 | let mnt = d3.select('.results'); 8 | if (!username) { 9 | mnt.append('div').attr('id', 'suggestion-message').text("Please provide a username"); 10 | return; 11 | } 12 | let table = mnt.append("div").attr("id", "suggestion-table").style('opacity', 0); 13 | let header = table.append("div").attr("class", "tbl-header") 14 | .append("table") 15 | .append("thead") 16 | .append("tr"); 17 | header.append("th").text(""); 18 | header.append("th").text("Popularity"); 19 | header.append("th").text("Subreddit"); 20 | header.append("th").text("Confidence"); 21 | table.transition().duration(1000).style("opacity", 1); 22 | table.append("div").attr("class", "tbl-content").append("table").append("div").attr("class", "loading-message") 23 | .text("loading"); 24 | $.getJSON(`api/suggestions/${username}`, (response) => { 25 | // Remove old container for these suggestion values, if they exist. 26 | if (response && response.data) { 27 | d3.select('.tbl-content').remove(); 28 | let content = table.append("div").attr("class", "tbl-content").append("table").append("tbody"); 29 | response.data.forEach((entry, i) => { 30 | let row = content.append("tr").attr("onclick", `window.open('https://reddit.com/r/${entry[0]}')`); 31 | row.append("td").text(i+1); 32 | row.append("td").text(entry[2]); 33 | row.append("td").text(entry[0]); 34 | row.append("td").text(entry[1]); 35 | 36 | }); 37 | } else { 38 | table.remove(); 39 | mnt.append('div').attr('id', 'suggestion-message').text("An error was encountered in retrieving this" + 40 | "user's data. " + response.message ? response.message : ''); 41 | } 42 | }); 43 | } 44 | 45 | function generateBackground() { 46 | let canvas = d3.select("#canvas"); 47 | let canvasDim = canvas.node().getBoundingClientRect(); 48 | setInterval(() => { 49 | let sizeFactor = Math.max(Math.random(), 0.5); 50 | let x = canvasDim.width * Math.random(), 51 | y = canvasDim.height + 20; 52 | let initialRotation = Math.random() * 360; 53 | let imageContainer = canvas.append("img"); 54 | imageContainer.attr("class", "image-container") 55 | .attr("src", "/static/img/arrow.svg") 56 | .attr("width", 45 * sizeFactor) 57 | .attr("height", 45 * sizeFactor) 58 | .style("transform", `rotate(${initialRotation}deg)`) 59 | .style("opacity", 0.5 * sizeFactor) 60 | .style("top", `${y}px`) 61 | .style("left", `${x}px`) 62 | .transition().duration(3000) 63 | .style("opacity", 0) 64 | .style("top", `${y - 200 * sizeFactor}px`) 65 | .style("transform", `rotate(${initialRotation + 90}deg)`) 66 | .remove() 67 | ; 68 | }, 200); 69 | } 70 | 71 | generateBackground(); --------------------------------------------------------------------------------