├── .gitignore
├── LICENSE
├── README.md
├── docs
    └── example.gif
├── requirements.txt
└── src
    ├── __init__.py
    ├── flaskapp.wsgi
    ├── model_generation
        ├── __init__.py
        ├── config.json
        ├── data
        │   ├── .gitkeep
        │   └── segmented_output
        │   │   └── .gitkeep
        ├── data_retrieval.py
        ├── modeling.py
        └── suggester.py
    ├── server.py
    └── static
        ├── css
            └── main.css
        ├── img
            └── arrow.svg
        ├── index.html
        └── js
            └── main.js


/.gitignore:
--------------------------------------------------------------------------------
1 | config_override.json


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 John Klingelhofer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### 2024 Addendum
 2 | 
 3 | This project will likely not work as it once did if anyone attempts to re-use it due to reddit API changes, I've long since deleted my Reddit account so the demo link below may no longer be functional. 
 4 | 
 5 | ### Overview 
 6 | 
 7 | [Click here to see it in action](http://159.89.246.81/)
 8 | 
 9 | This is a recommendation engine for subreddits based on the subreddits to which the user's last 300 comments and 100 posts were submitted. In order to achieve this, these histories were pulled for 200,000 users through the reddit API, and a model in Keras was trained on vectors of each user's group of subreddits to establish the relationships between subreddits. 
10 | 
11 | ![](docs/example.gif)
12 | 
13 | Once trained, usernames can be submitted to this model through a basic Flask API.
14 | 
15 | ### Running locally
16 | 
17 | Due to the size of the size of the data surpassing GitHub file limits, anyone wishing to run this locally will need to go through their own model retrieval and training steps.
18 | 
19 | The steps are as follows:
20 | 
21 | 1. Install the required packages in `requirements.txt`
22 | 2. Generate Reddit API keys and put them into the `config.json`, or put them into a config_override file.
23 | 3. While in the `config.json`, adjust the parameters for the model as desired, such as number of users to use in the 
24 | generation of training data, and the number of comments/submissions to go through for each user in generating this data.
25 | 4. Run `data_retrieval.py`, this is the longest step in the process, and may take several hours to get all the needed 
26 | user information.
27 | 5. Run `modeling.py` to generate the model. On a machine with a recent NVIDIA GPU and the proper setup to utilize it, 
28 | training process shouldn't take more than a few minutes.
29 | 6. Run `server.py`.
30 | 


--------------------------------------------------------------------------------
/docs/example.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klingj3/subreddit_suggester/80e8aa0b5e240d09041b8746f038954269d3b770/docs/example.gif


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Flask==1.1.1
 2 | Keras==2.3.1
 3 | Keras-Applications==1.0.8
 4 | Keras-Preprocessing==1.1.0
 5 | tensorboard==2.1.0
 6 | tensorflow==2.1.2
 7 | tensorflow-estimator==2.1.0
 8 | numpy==1.18.1
 9 | pandas==1.0.3
10 | praw==6.5.1
11 | prawcore==1.0.1
12 | progressbar2==3.50.1
13 | Werkzeug==0.16.0


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klingj3/subreddit_suggester/80e8aa0b5e240d09041b8746f038954269d3b770/src/__init__.py


--------------------------------------------------------------------------------
/src/flaskapp.wsgi:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | import os
 3 | import sys
 4 | import logging
 5 | 
 6 | def execfile(filename):
 7 |     globals = dict( __file__ = filename )
 8 |     exec( open(filename).read(), globals )
 9 | 
10 | activate_this = os.path.join('/var/www/subreddit_suggestor/src/venv/bin', 'activate_this.py' )
11 | execfile( activate_this )
12 | 
13 | logging.basicConfig(stream=sys.stderr)
14 | sys.path.insert(0,"/var/www/subreddit_suggestor/src")
15 | os.chdir('/var/www/subreddit_suggestor/src')
16 | from server import app as application
17 | application.secret_key = os.getenv('SECRET_KEY', 'for dev')


--------------------------------------------------------------------------------
/src/model_generation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klingj3/subreddit_suggester/80e8aa0b5e240d09041b8746f038954269d3b770/src/model_generation/__init__.py


--------------------------------------------------------------------------------
/src/model_generation/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "client_id": "YOUR CLIENT ID HERE!",
 3 |   "client_secret": "YOUR CLIENT SECRET HERE!",
 4 |   "usernames_path": "model_generation/data/segmented_output/random_usernames_{i}.txt",
 5 |   "subreddits_score_path": "model_generation/data/segmented_output/scored_subreddits_{i}.json",
 6 |   "num_usernames": 200000,
 7 |   "combined_user_to_subreddit_score_path": "model_generation/data/user_to_subreddit_score.json",
 8 |   "rank_to_sfw_status": "model_generation/data/rank_to_sfw_status.json",
 9 |   "rank_to_subreddit_path": "model_generation/data/rank_to_subreddit.json",
10 |   "max_subreddits_in_data": 50000,
11 |   "max_subreddits_in_model": 15000,
12 |   "max_subreddits_per_user_vector": 50,
13 |   "method": "hot",
14 |   "model_path": "model_generation/data/model_output_{method}.h5",
15 |   "test_pct": 0.1
16 | }


--------------------------------------------------------------------------------
/src/model_generation/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klingj3/subreddit_suggester/80e8aa0b5e240d09041b8746f038954269d3b770/src/model_generation/data/.gitkeep


--------------------------------------------------------------------------------
/src/model_generation/data/segmented_output/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klingj3/subreddit_suggester/80e8aa0b5e240d09041b8746f038954269d3b770/src/model_generation/data/segmented_output/.gitkeep


--------------------------------------------------------------------------------
/src/model_generation/data_retrieval.py:
--------------------------------------------------------------------------------
  1 | from collections import Counter
  2 | from prawcore.exceptions import Forbidden, NotFound
  3 | 
  4 | import json
  5 | import praw
  6 | import progressbar as pg
  7 | import os
  8 | 
  9 | 
 10 | class DataRetriever(object):
 11 |     """
 12 |     Generate and format the data which is used to drive the model.
 13 |     """
 14 | 
 15 |     def __init__(self, worker_no=0, num_workers=1):
 16 |         """
 17 |         Load the config files and establish praw utility.
 18 |         :param worker_no: Int id of worker
 19 |         :param num_workers: Int number of workers, dictates division of labor between jobs.
 20 |         """
 21 |         with open("model_generation/config.json", "r") as infile:  #
 22 |             self.config = json.loads(infile.read())
 23 |         if os.path.exists("model_generation/config_override.json"):
 24 |             with open("model_generation/config_override.json", "r") as infile:
 25 |                 self.config.update(json.loads(infile.read()))
 26 | 
 27 |         self.reddit = praw.Reddit(user_agent="user", client_id=self.config["client_id"],
 28 |                                   client_secret=self.config["client_secret"])
 29 | 
 30 |         if worker_no >= num_workers:
 31 |             raise ValueError(f"worker_no passed {worker_no} >= the number of workers")
 32 | 
 33 |         self.i, self.total_instances = worker_no, num_workers
 34 |         self.usernames_path = self.config["usernames_path"].format(i=self.i)
 35 | 
 36 |     def get_random_usernames(self):
 37 |         """
 38 |         Get a random sample of usernames by randomly selecting subreddits and recent comments within that subreddit.
 39 |         :param number: Number of ids to retrieve
 40 |         :param ids_per_subreddit: Max number of ids per subreddit
 41 |         :param destination_file: The path of the text file to contain the exported ids.
 42 |         :return: None
 43 |         """
 44 |         ids = set()
 45 | 
 46 |         number = int(self.config["num_usernames"]/self.total_instances)
 47 |         max_ids_per_subreddit = 1000
 48 | 
 49 |         if self.i == 0:
 50 |             print("Getting random ids...")
 51 |             print(number)
 52 |             bar = pg.ProgressBar(max_value=number)
 53 |             bar.update(0)
 54 | 
 55 |         forbidden_count = 0
 56 | 
 57 |         while len(ids) < number:
 58 |             try:
 59 |                 subreddit_name = self.reddit.subreddit("random").display_name
 60 |                 subreddit = self.reddit.subreddit(subreddit_name)
 61 |                 if subreddit.subscribers > 10000:  # For speed, ignore subreddits with very few subscribers as user origins.
 62 |                     old_id_num = len(ids)
 63 |                     for submission in subreddit.top(limit=10):
 64 |                         if len(submission.comments):
 65 |                             if submission.author:
 66 |                                 ids.add((str(submission.author), subreddit_name))
 67 |                             for comment in submission.comments.list():
 68 |                                 try:
 69 |                                     if comment.author:
 70 |                                         ids.add((str(comment.author), subreddit_name))
 71 |                                         if len(ids) - old_id_num > max_ids_per_subreddit:
 72 |                                             break
 73 |                                 except AttributeError:
 74 |                                     pass
 75 |                     # For clarity, only display the status bar updates for the first worker.
 76 |                     if self.i == 0:
 77 |                         bar.update(min(number, len(ids)))
 78 |             except (Forbidden, NotFound):
 79 |                 forbidden_count += 1
 80 |                 if forbidden_count > 100:
 81 |                     print("Max exceptions exceeded, stopping remainder of auience selection")
 82 |                     break
 83 | 
 84 |         with open(self.usernames_path, "w") as outfile:
 85 |             outfile.write(json.dumps(list(ids)[:number]))
 86 | 
 87 |     def generate_user_subreddits_data(self):
 88 |         """
 89 |         Get a list of the distinct subreddits that the reddit accounts in a particular file have submitted or commented
 90 |         within.
 91 |         :param path_to_usernames: String path to the list of usernames.
 92 |         :param path_to_key_scores: Path to the output of ids to strings.
 93 |         :param path_to_decoder_json: Path to a JSON for decoding the strings.
 94 |         :return: None
 95 |         """
 96 |         try:
 97 |             with open(self.usernames_path, "r") as infile:
 98 |                 usernames = json.loads(infile.read())
 99 |         except FileNotFoundError:
100 |             usernames = {}  # Just pass so other threads can proceed normally.
101 | 
102 |         username_to_subreddit_scores = dict()
103 |         if self.i == 0:  # For clarity, just show the status for one of the jobs.
104 |             print("Getting subreddit visitation data...")
105 |             work_range = pg.progressbar(usernames)
106 |         else:
107 |             work_range = usernames
108 | 
109 |         for username, origin_subreddit in work_range:
110 |             subreddit_scores = self.get_distinct_subreddits_for_user(username, excluded_subreddit=origin_subreddit)
111 |             if subreddit_scores:
112 |                 username_to_subreddit_scores[username] = subreddit_scores
113 | 
114 |         with open(self.config["subreddits_score_path"].format(i=self.i), "w") as outfile:
115 |             outfile.write(json.dumps(username_to_subreddit_scores))
116 | 
117 |     def get_distinct_subreddits_for_user(self, username, excluded_subreddit=None):
118 |         """
119 |         Get a list of distinct subreddits a user has interacted with.
120 |         :param username: String username of the user for which activity will be evaluated.
121 |         :param excluded_subreddit: String name of subreddit to not be included in returned values or factor into counts.
122 |         This value is normally used to prevent the subreddit from which a username was pulled from appearing in the output,
123 |         which can skew the popularity metrics towards rarer randomly chosen subreddits.
124 |         :return: Dict in format {
125 |             String subreddit name: Float % of reddit interactions (submissions or comments) by a user which were in
126 |                 that subreddit.
127 |         } on success, empty dict if API exception encountered
128 |         """
129 |         redditor = self.reddit.redditor(username)
130 |         try:
131 |             comment_subreddit_counts = Counter([str(comment.subreddit) for comment in redditor.comments.new(limit=300)])
132 |             del comment_subreddit_counts[excluded_subreddit]
133 |         except (Forbidden, NotFound):
134 |             return {}
135 | 
136 |         try:
137 |             submission_subreddit_counts = Counter([str(submission.subreddit) for submission in
138 |                                                    redditor.submissions.new(limit=100)])
139 |             del submission_subreddit_counts[excluded_subreddit]
140 |         except (Forbidden, NotFound):
141 |             return {}
142 | 
143 |         subreddits = set(comment_subreddit_counts.keys()).union(set(submission_subreddit_counts.keys()))
144 |         total_actions = sum(comment_subreddit_counts.values()) + sum(submission_subreddit_counts.values())
145 | 
146 |         return {subreddit: (comment_subreddit_counts[subreddit] + submission_subreddit_counts[subreddit])/total_actions
147 |             for subreddit in subreddits}
148 | 
149 |     def combine_and_prep_data(self, minimum_popularity=None, highest_num=64):
150 |         """
151 |         Taking the individual files produced in the generate subreddits for individual users step, combine them into:
152 |             - For each user, a list of tuples of (Int, Float) with the Int being the popularity ranking of a subreddit
153 |               and the Float what percentage of the user"s recent activity was in that subreddit. This file is saved
154 |               to the value under key "combined_user_to_subreddit_score_path" in the config file.
155 |             - Dump a JSON of {Integer Ranking: Subreddit name} for the subreddits visited by the users, where their
156 |               popularity is above the minimum popularity ranking.
157 |         :return: None
158 |         """
159 |         from collections import Counter
160 | 
161 |         if not minimum_popularity:
162 |             minimum_popularity = self.config["max_subreddits_in_data"]
163 | 
164 |         combined_user_to_subreddit_scores = dict()
165 |         subreddit_to_popularity = Counter()
166 |         user_subreddit_score_directory = "/".join(self.config["subreddits_score_path"].split('/')[:-1])
167 |         for file in os.listdir(user_subreddit_score_directory):
168 |             path = os.path.join(user_subreddit_score_directory, file)
169 |             if ".json" in file and int(file.split('.')[0].split('_')[-1]) < highest_num:
170 |                 with open(path, "r") as infile:
171 |                     combined_user_to_subreddit_scores.update(json.loads(infile.read()))
172 |         for subreddit_scores in combined_user_to_subreddit_scores.values():
173 |             for subreddit, score in subreddit_scores.items():
174 |                 subreddit_to_popularity[subreddit] += score
175 | 
176 |         rank_to_subreddit = dict()
177 |         for subreddit, _ in pg.progressbar(subreddit_to_popularity.most_common(minimum_popularity)):
178 |             rank_to_subreddit[len(rank_to_subreddit)+1] = subreddit
179 | 
180 |         with open(self.config["rank_to_subreddit_path"], "w") as outfile:
181 |             outfile.write(json.dumps(rank_to_subreddit))
182 | 
183 |         subreddit_to_rank = {subreddit: rank for rank, subreddit in rank_to_subreddit.items()}
184 | 
185 |         output_data = {i:
186 |             [(subreddit_to_rank[subreddit], score) for subreddit, score in user_subreddit_score.items() if subreddit
187 |              in subreddit_to_rank] for i, user_subreddit_score in enumerate(combined_user_to_subreddit_scores.values())}
188 |         with open(self.config["combined_user_to_subreddit_score_path"], "w") as outfile:
189 |             outfile.write(json.dumps(output_data))
190 | 
191 |     def generate_sfw_subreddit_info(self):
192 |         from time import sleep
193 |         with open(self.config['rank_to_subreddit_path'], 'r') as infile:
194 |             rank_to_subreddit = json.loads(infile.read())
195 | 
196 |         rank_to_sfw_status = dict()
197 |         for rank, subreddit in pg.progressbar(list(rank_to_subreddit.items())[:self.config['max_subreddits_in_model']]):
198 |             for _ in range(10):  # Max retries
199 |                 try:
200 |                     rank_to_sfw_status[rank] = not self.reddit.subreddit(subreddit).over18
201 |                     break
202 |                 except Exception:
203 |                     sleep(10)
204 |                     pass
205 |             else:
206 |                 print("Max retries exceeded on subreddit " + subreddit)
207 |                 # For safety, assume false.
208 |                 rank_to_sfw_status[rank] = False
209 | 
210 |         with open(self.config['rank_to_sfw_status'], 'w') as outfile:
211 |             outfile.write(json.dumps(rank_to_sfw_status))
212 | 
213 | if __name__ == "__main__":
214 |     import threading
215 |     import os
216 | 
217 |     os.chdir('..')
218 | 
219 |     def get_data_slice(i, j):
220 |         data_retriever = DataRetriever(worker_no=i, num_workers=j)
221 |         # data_retriever.get_random_usernames()
222 |         data_retriever.generate_user_subreddits_data()
223 | 
224 |     max_threads = 64
225 |     jobs = []
226 |     print(f"Starting work on {max_threads} jobs.")
227 |     for i in range(max_threads):
228 |         p = threading.Thread(target=get_data_slice, args=(i, max_threads))
229 |         jobs.append(p)
230 |         p.start()
231 | 
232 |     for j in jobs:
233 |         j.join()
234 | 
235 |     data_retriever = DataRetriever(worker_no=0, num_workers=1)
236 |     data_retriever.combine_and_prep_data(highest_num=max_threads)
237 | 
238 |     data_retriever.generate_sfw_subreddit_info()


--------------------------------------------------------------------------------
/src/model_generation/modeling.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import numpy as np
  3 | import os
  4 | import tensorflow as tf
  5 | 
  6 | from keras.models import Sequential
  7 | from keras.layers import Dense, Dropout, Embedding, Flatten, Activation, BatchNormalization
  8 | from sklearn.model_selection import train_test_split
  9 | 
 10 | 
 11 | class SuggestionModeler(object):
 12 |     """
 13 |     A collection of functions to generate a model of subreddit suggestions from the data retreived in
 14 |     data_retrieval.py
 15 |     """
 16 |     def __init__(self, force_retrain=False):
 17 |         self.session = tf.Session()
 18 |         self.graph = tf.get_default_graph()
 19 | 
 20 |         with open("model_generation/config.json", "r") as infile:
 21 |             self.config = json.loads(infile.read())
 22 |         if os.path.exists("config_override.json"):
 23 |             with open("model_generation/config_override.json", "r") as infile:
 24 |                 self.config.update(json.loads(infile.read()))
 25 | 
 26 |         self.subreddit_to_rank = dict()
 27 |         with open(self.config["rank_to_subreddit_path"], 'r') as infile:
 28 |             self.rank_to_subreddit = json.loads(infile.read())
 29 |             self.rank_to_subreddit = {int(k): v for k, v in self.rank_to_subreddit.items()}
 30 |             for rank, subreddit in self.rank_to_subreddit.items():
 31 |                 self.subreddit_to_rank[subreddit] = rank
 32 |         with open(self.config['rank_to_sfw_status'], 'r') as infile:
 33 |             self.rank_to_sfw_status = json.loads(infile.read())
 34 |             self.rank_to_sfw_status = {int(k): v for k, v in self.rank_to_sfw_status.items()}
 35 | 
 36 |         self.method = self.config["method"]
 37 |         self.model_path = self.config['model_path'].format(method=self.method)
 38 | 
 39 |         if self.method == "hot":
 40 |             model = Sequential()
 41 |             model.add(Dense(512, activation='relu',
 42 |                             input_shape=(self.config['max_subreddits_in_model'], )))
 43 |             model.add(Dropout(0.5))
 44 |             model.add(Dense(self.config['max_subreddits_in_model'], activation='sigmoid'))
 45 |             model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['acc'])
 46 |         else:
 47 |             raise ValueError("'method' in config not well defined")
 48 | 
 49 |         self.model = model
 50 |         if force_retrain or not os.path.exists(self.model_path):
 51 |             model.summary()
 52 |             print("Preparing train/test data...")
 53 |             X, y = self.arrange_training_data(method=self.method)
 54 |             X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.config['test_pct'])
 55 | 
 56 |             train_data, test_data = (X_train, y_train), (X_test, y_test)
 57 |             print("Starting training process...")
 58 |             self.train_model(train_data, test_data)
 59 | 
 60 |         with self.graph.as_default():
 61 |             with self.session.as_default():
 62 |                 self.model.load_weights(self.model_path)
 63 | 
 64 |     def arrange_training_data(self, method):
 65 |         import random
 66 | 
 67 |         with open(self.config["combined_user_to_subreddit_score_path"], 'r') as infile:
 68 |             user_subreddit_scores = json.loads(infile.read())
 69 | 
 70 |         for k, scores in user_subreddit_scores.items():
 71 |             user_subreddit_scores[k] = sorted(scores, key=lambda x: x[1], reverse=True)
 72 | 
 73 |         data_length, data_width = len(user_subreddit_scores), self.config['max_subreddits_in_model']
 74 |         user_subreddit_scores = list(user_subreddit_scores.values())
 75 |         random.shuffle(user_subreddit_scores)
 76 | 
 77 |         if method == 'hot':  # Input vector is one-hot encoding.
 78 |             X = np.zeros((data_length, data_width), dtype=np.bool)
 79 |             for i, scores in enumerate(user_subreddit_scores):
 80 |                 for subreddit_key, score in scores:
 81 |                     if subreddit_key <= data_width:
 82 |                         X[i][subreddit_key - 1] = True
 83 |         else:
 84 |             raise ValueError(f"Unhandled training data preparation method {method}")
 85 | 
 86 | 
 87 |         y = np.zeros((data_length, data_width), dtype=np.bool)
 88 |         for i, scores in enumerate(user_subreddit_scores):
 89 |             for subreddit_key, score in scores:
 90 |                 if subreddit_key <= data_width:
 91 |                     y[i][subreddit_key-1] = score > 0
 92 |         return X, y
 93 | 
 94 |     def arrange_user_data(self, user_data):
 95 |         user_data = {k: v for k, v in sorted(user_data.items(), key=lambda x: x[1], reverse=True)
 96 |                      if 0 < self.subreddit_to_rank.get(k, -1) < self.config['max_subreddits_in_model']}
 97 |         if self.method == 'hot':
 98 |             data = np.zeros((1, self.config['max_subreddits_in_model']), dtype=np.bool)
 99 |             for subreddit_name, subreddit_score in user_data.items():
100 |                 if subreddit_name in self.subreddit_to_rank:
101 |                     data[0][self.subreddit_to_rank[subreddit_name]-1] = subreddit_score > 0
102 | 
103 |         return data
104 | 
105 |     def train_model(self, train_data, test_data):
106 |         X, y = train_data
107 |         self.model.fit(X, y, epochs=5, batch_size=256, verbose=1)
108 |         self.model.save(self.model_path)
109 |         X, y = test_data
110 |         scores = self.model.evaluate(X, y, verbose=1)
111 |         print(self.model.metrics_names)
112 |         print(scores)
113 | 
114 |     def get_user_predictions(self, user_data):
115 |         arranged_data = self.arrange_user_data(user_data)
116 |         user_known_subreddits = set(list(user_data.keys()))
117 | 
118 |         with self.graph.as_default():
119 |             with self.session.as_default():
120 |                 predictions = self.model.predict(arranged_data)[0]
121 | 
122 |         predictions = [(self.rank_to_subreddit[i+1], round(float(score), 5), i) for i, score
123 |                        in enumerate(predictions) if self.rank_to_subreddit[i+1] not in user_known_subreddits \
124 |                        and self.rank_to_sfw_status[i+1] and i > 200]
125 |         predictions.sort(key=lambda x: x[1], reverse=True)
126 |         return predictions
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     import os
131 |     os.chdir('..')
132 |     modeler = SuggestionModeler(True)
133 | 
134 | 


--------------------------------------------------------------------------------
/src/model_generation/suggester.py:
--------------------------------------------------------------------------------
 1 | from .data_retrieval import DataRetriever
 2 | from .modeling import SuggestionModeler
 3 | import json
 4 | 
 5 | 
 6 | class Suggester(object):
 7 | 
 8 |     def __init__(self):
 9 |         self.retriever = DataRetriever()
10 |         self.model = SuggestionModeler()
11 | 
12 |     def get_estimates_for_user(self, username):
13 |         """
14 |         Given a username, generate a list of suggested subreddits they may enjoy based on their recent activity.
15 |         :param username: String username
16 |         :return: String dumped json in format {
17 |             'success': True or False,
18 |             'message: None or description of error,
19 |             'data': List of 200 subreddits ranked by confidence, values
20 |                [String subreddit name, float confidence, int popularity rating]
21 |         }
22 |         """
23 |         username = username.strip()
24 |         user_data = self.retriever.get_distinct_subreddits_for_user(username)
25 |         if not user_data:
26 |             return json.dumps({
27 |                 'success': True,
28 |                 'message': 'No reddit data found for user ' + username
29 |             })
30 |         res = self.model.get_user_predictions(user_data)[:200]
31 |         return json.dumps({
32 |             'success': True,
33 |             'data': res
34 |         })
35 | 


--------------------------------------------------------------------------------
/src/server.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from model_generation.suggester import Suggester
 3 | from flask import Flask, send_file
 4 | app = Flask(__name__)
 5 | 
 6 | 
 7 | @app.before_first_request
 8 | def load_model():
 9 |     # Load the model for speed in subsequent calls.
10 |     app.suggester = Suggester()
11 | 
12 | 
13 | @app.route("/api/suggestions/<username>")
14 | def suggestions(username):
15 |     return app.suggester.get_estimates_for_user(username)
16 | 
17 | 
18 | @app.route("/")
19 | def landing():
20 |     return send_file('static/index.html')
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     app.run(threaded=False)


--------------------------------------------------------------------------------
/src/static/css/main.css:
--------------------------------------------------------------------------------
  1 | html {
  2 |     height: 100%;
  3 |     background: linear-gradient(145deg, #151821, #2f3d42, #46535a);
  4 |     animation: background 3s infinite alternate ease-in-out;
  5 |     background-size: 200% 200%;
  6 | }
  7 | 
  8 | body {
  9 |     text-align: center;
 10 |     font-family: monospace;
 11 |     font-weight: lighter;
 12 |     color: white;
 13 |     position: fixed;
 14 |     width: 100%;
 15 | }
 16 | 
 17 | .button {
 18 |   cursor: pointer;
 19 |   border: 1px;
 20 |   border-color: rgba(255, 255, 255, 0.45);
 21 |   border-style: solid;
 22 |   border-radius: 4px;
 23 |   display: inline-block;
 24 |   font-weight: 600;
 25 |   width: 85px;
 26 |   padding: 15px 0;
 27 |   box-shadow: 0 0 0px rgba(253, 238, 255, 0.2);
 28 |   transition: 0.4s;
 29 | }
 30 | 
 31 | .button:hover {
 32 |   color: white;
 33 |   box-shadow: 0 0 20px rgba(253, 238, 255, 0.2);
 34 |   background-color: #849ead;
 35 | }
 36 | 
 37 | .loading-message {
 38 |     text-align: center;
 39 |     margin-top: 30px;
 40 | }
 41 | 
 42 | .main {
 43 |     max-width: 800px;
 44 |     width: 70%;
 45 |     margin-left: auto;
 46 |     margin-right: auto;
 47 | }
 48 | 
 49 | .main .header {
 50 |     margin-top: 10vh;
 51 | }
 52 | 
 53 | .main .description {
 54 |     text-align: left;
 55 |     width: 90%;
 56 |     margin-left: auto;
 57 |     margin-right: auto;
 58 | }
 59 | 
 60 | .main #username {
 61 |     width: 80%;
 62 | 	padding: 15px;
 63 | 	border: 1px solid #ccc;
 64 | 	border-radius: 3px;
 65 | 	margin-bottom: 10px;
 66 | 	box-sizing: border-box;
 67 |     font-family: montserrat, sans-serif;
 68 | 	color: #2C3E50;
 69 | 	font-size: 13px;
 70 | }
 71 | 
 72 | .image-container {
 73 |     position: absolute;
 74 | }
 75 | 
 76 | #suggestion-table {
 77 |     height: 60vh;
 78 |     text-align: left;
 79 | }
 80 | 
 81 | #canvas {
 82 |     position: fixed;
 83 |     height: 100%;
 84 |     width: 100%;
 85 |     z-index: -1;
 86 | }
 87 | 
 88 | @keyframes background {
 89 |    0%{background-position:0% 75%}
 90 |    50%{background-position:100% 25%}
 91 |    100%{background-position:25% 100%}
 92 | }
 93 | 
 94 | 
 95 | table{
 96 |   width:100%;
 97 |   table-layout: fixed;
 98 | }
 99 | .tbl-header{
100 |   background-color: rgba(255,255,255,0.1);
101 |  }
102 | .tbl-content{
103 |   height:50vh;
104 |   overflow-x:auto;
105 |   margin-top: 0px;
106 |   border: 1px solid rgba(255,255,255,0.3);
107 | }
108 | th{
109 |   pointer-events: none;
110 |   text-align: left;
111 |   font-weight: 500;
112 |   font-size: 12px;
113 |   color: #fff;
114 |   text-transform: uppercase;
115 | }
116 | tr:hover {
117 |   background-color: rgba(255, 255, 255, 0.06);
118 |   cursor: pointer;
119 | }
120 | 
121 | td{
122 |   padding: 15px;
123 |   text-align: left;
124 |   vertical-align:middle;
125 |   font-weight: 300;
126 |   font-size: 12px;
127 |   color: #fff;
128 |   border-bottom: solid 1px rgba(255,255,255,0.1);
129 | }
130 | /* for custom scrollbar for webkit browser*/
131 | 
132 | td {
133 |     width: 10px;
134 | }
135 | 
136 | th {
137 |     width: 54px;
138 |     padding: 0px;
139 | }
140 | 
141 | td+td {
142 |     width: auto;
143 | }
144 | 
145 | th+th {
146 |     width: auto;
147 |     padding: 20px 0px 20px 6px;
148 | }
149 | 
150 | ::-webkit-scrollbar {
151 |     width: 6px;
152 | }
153 | ::-webkit-scrollbar-track {
154 |     -webkit-box-shadow: inset 0 0 6px rgba(0,0,0,0.2);
155 | }
156 | ::-webkit-scrollbar-thumb {
157 |     -webkit-box-shadow: inset 0 0 6px rgba(0,0,0,0.2);
158 | }


--------------------------------------------------------------------------------
/src/static/img/arrow.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 18.1.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 4 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 5 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 6 | <path style="fill:#fff" d="M9.951908,4.166667H9.951867l-5.36036,5.825781C4.593775,9.997617,4.592553,9.994831,4.594821,10h3.321361
 7 | 	c0.000668,0.000668,0.001042,0.001041,0.001709,0.001709V15h4.166667v-4.99648c0.001369-0.001369,0.002151-0.002151,0.00352-0.00352
 8 | 	h3.329831L9.951908,4.166667z"/>
 9 | </svg>
10 | 


--------------------------------------------------------------------------------
/src/static/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <title>Subreddit Suggester</title>
 6 |     <link rel="stylesheet" type="text/css" href="static/css/main.css">
 7 | </head>
 8 | <body>
 9 | <div id="canvas"></div>
10 | <div class="main">
11 |     <div class="header">
12 |         <div class="title">
13 |             <h1>Subreddit Suggester</h1>
14 |         </div>
15 |         <div class="description">
16 |             <h4>
17 |                 Given a username, suggest new subreddits a user is likely to comment or submit items in based on the
18 |                 subreddits of their last 300 comments and 100 submissions.
19 |             </h4>
20 |         </div>
21 |         <div class="name-input">
22 |             <input id="username" type="text" placeholder="Insert a username here">
23 |             <div class="button" onclick="getSubreddits(document.getElementById('username').value)">
24 |                 Suggest!
25 |             </div>
26 |         </div>
27 |     </div>
28 |     <div class="results">
29 |     </div>
30 | </div>
31 | </body>
32 | <script src="https://d3js.org/d3.v5.min.js"></script>
33 | <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
34 | <script src="static/js/main.js"></script>
35 | </html>


--------------------------------------------------------------------------------
/src/static/js/main.js:
--------------------------------------------------------------------------------
 1 | /*jshint esversion: 6 */
 2 | 
 3 | /* Submit an API call to get the most popular subreddits for a particular username */
 4 | function getSubreddits(username) {
 5 |     d3.select("#suggestion-table").remove();
 6 |     d3.select("#suggestion-message").remove();
 7 |     let mnt = d3.select('.results');
 8 |     if (!username) {
 9 |         mnt.append('div').attr('id', 'suggestion-message').text("Please provide a username");
10 |         return;
11 |     }
12 |     let table = mnt.append("div").attr("id", "suggestion-table").style('opacity', 0);
13 |     let header = table.append("div").attr("class", "tbl-header")
14 |         .append("table")
15 |         .append("thead")
16 |         .append("tr");
17 |     header.append("th").text("");
18 |     header.append("th").text("Popularity");
19 |     header.append("th").text("Subreddit");
20 |     header.append("th").text("Confidence");
21 |     table.transition().duration(1000).style("opacity", 1);
22 |     table.append("div").attr("class", "tbl-content").append("table").append("div").attr("class", "loading-message")
23 |         .text("loading");
24 |     $.getJSON(`api/suggestions/${username}`, (response) => {
25 |         // Remove old container for these suggestion values, if they exist.
26 |         if (response && response.data) {
27 |             d3.select('.tbl-content').remove();
28 |             let content = table.append("div").attr("class", "tbl-content").append("table").append("tbody");
29 |             response.data.forEach((entry, i) => {
30 |                 let row = content.append("tr").attr("onclick", `window.open('https://reddit.com/r/${entry[0]}')`);
31 |                 row.append("td").text(i+1);
32 |                 row.append("td").text(entry[2]);
33 |                 row.append("td").text(entry[0]);
34 |                 row.append("td").text(entry[1]);
35 | 
36 |             });
37 |         } else {
38 |             table.remove();
39 |             mnt.append('div').attr('id', 'suggestion-message').text("An error was encountered in retrieving this" +
40 |                 "user's data. " + response.message ? response.message : '');
41 |         }
42 |     });
43 | }
44 | 
45 | function generateBackground() {
46 |     let canvas = d3.select("#canvas");
47 |     let canvasDim = canvas.node().getBoundingClientRect();
48 |     setInterval(() => {
49 |         let sizeFactor = Math.max(Math.random(), 0.5);
50 |         let x = canvasDim.width * Math.random(),
51 |             y = canvasDim.height + 20;
52 |         let initialRotation = Math.random() * 360;
53 |         let imageContainer = canvas.append("img");
54 |         imageContainer.attr("class", "image-container")
55 |             .attr("src", "/static/img/arrow.svg")
56 |             .attr("width", 45 * sizeFactor)
57 |             .attr("height", 45 * sizeFactor)
58 |             .style("transform", `rotate(${initialRotation}deg)`)
59 |             .style("opacity", 0.5 * sizeFactor)
60 |             .style("top", `${y}px`)
61 |             .style("left", `${x}px`)
62 |             .transition().duration(3000)
63 |             .style("opacity", 0)
64 |             .style("top", `${y - 200 * sizeFactor}px`)
65 |             .style("transform", `rotate(${initialRotation + 90}deg)`)
66 |             .remove()
67 |         ;
68 |     }, 200);
69 | }
70 | 
71 | generateBackground();


--------------------------------------------------------------------------------