├── .gitignore ├── sample_code.py ├── README.md └── facebook_api_functions.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | *.jpg 3 | *.png 4 | *.mp4 5 | -------------------------------------------------------------------------------- /sample_code.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import facebook_api_functions as FB 3 | 4 | #%% Example Usage 5 | username = 'mukurudotcom' 6 | userid = FB.get_user_id(username) 7 | 8 | # get posts for a username 9 | posts = FB.get_posts_feed_comments(username, 'posts') 10 | 11 | # get feed for a username 12 | feed = FB.get_posts_feed_comments(username, 'feed') 13 | 14 | # get comments for a single post 15 | comments = FB.get_posts_feed_comments(posts.at[0,'id'], 'comments') 16 | 17 | # get reactions from posts 18 | emotions = FB.get_posts_reactions(username, reactions=['LIKE','LOVE']) 19 | 20 | # get reactions from comments 21 | emotions = FB.get_comment_reactions(posts.at[0,'id'], reactions=['WOW','HAHA']) 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Facebook Graph API 2 | This repository has some sample code from helping CSCAR clients for extracting data from the Facebook Graph API. This uses the `requests` library to make GET requests to the API. I then take the JSON response and extract some subset of the response. 3 | 4 | Example code is provided in `sample_code.py`. You need to provide your **User Access Token** in `facebook_api_functions.py` before starting. 5 | 6 | `facebook_api_functions.py` contains functions to do the following: 7 | - Get user id for username. 8 | - Get posts, feed or comments for a node (i.e. user, comment). 9 | - Get reactions from posts. 10 | - Get reactions from comments. 11 | 12 | ## Node/Edge Relation Table 13 | 14 | Node|Edge|Use Cases 15 | ---|---|--- 16 | User|Posts|Get posts from a user; Get reactions from a post 17 | User|Feed|Get user's feed 18 | Posts|Comments|Get comments from a post 19 | Comments|Reactions|Get reactions from a comment 20 | 21 | ## User Access Token 22 | Get one at https://developers.facebook.com/tools/explorer/. You need to login to use it. 23 | -------------------------------------------------------------------------------- /facebook_api_functions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This script contains functions to do the following: 4 | Get user id for username. 5 | Get posts, feed or comments for a node (i.e. user, comment). 6 | Get reactions from posts. 7 | Get reactions from comments. 8 | 9 | IMPORTANT: 10 | You will need your own access_token from 11 | https://developers.facebook.com/tools/explorer 12 | as it expires in 1-2 hrs at the top of the hour 13 | """ 14 | 15 | import requests 16 | import pandas as pd 17 | 18 | access_token = 'EAACEdEose0cBAAEj1huezW8iulXOSNJ0rCZBeRzJ8VTruA1jZCTgZBZCY7G66q6QGATKpv9c1ZBbMD3yxuMKPbf2XjTlWOBikvZC3EhUEavGVmHpd3xVFxGIruy1DS9RjCvdv2xIbIdO2n5uXdRKh00MRCD5edZAYNkV8eC0VYPbsZBVPkTBEstttoG7J7u1G3HBmbRYKLUVSLlCBnpaSURC' 19 | 20 | version = '2.12' 21 | baseurl = 'https://graph.facebook.com/v{}'.format(version) 22 | 23 | #%% get user id 24 | def get_user_id(username): 25 | """ 26 | Get user id for username. 27 | 28 | Parameters 29 | ---------- 30 | username: str 31 | username 32 | 33 | Returns 34 | ------- 35 | string 36 | user id 37 | """ 38 | params = {'access_token': access_token, 39 | 'fields': 'id,name'} 40 | url = '{}/{}'.format(baseurl,username) 41 | R = requests.get(url, params=params) 42 | R.raise_for_status() 43 | return R.json()['id'] 44 | 45 | #%% get user posts or feed 46 | def get_posts_feed_comments(nodeid, edge, count=100): 47 | """ 48 | Get posts, feed or comments for a node (i.e. user, comment). 49 | 50 | Parameters 51 | ---------- 52 | nodeid: str 53 | id of the node in question 54 | edge: str 55 | edge type you are looking for 56 | count: int 57 | max number of results to return per page 58 | 59 | Returns 60 | ------- 61 | DataFrame 62 | Columns: ['timestamp','parentid','id','message'] 63 | """ 64 | assert edge in ['posts','feed','comments'] 65 | params = {'access_token': access_token, 66 | 'limit': count} 67 | if edge in ['posts','feed']: 68 | params['fields'] = 'from,message,picture,link,name,type,created_time,shares,likes.summary(total_count)' 69 | else: # comments 70 | params['fields'] = 'message,created_time,like_count' 71 | if any(c.isalpha() for c in nodeid): 72 | print('Looks like the comment_id has alphabet characters which is incorrect') 73 | return pd.DataFrame() 74 | url = '{}/{}/{}'.format(baseurl,nodeid,edge) 75 | counter = 0 76 | list_posts = [] 77 | while True: 78 | counter += 1 79 | print('{0} {1} Page {2}'.format(nodeid, edge, counter)) 80 | R = requests.get(url, params=params) 81 | R.raise_for_status() 82 | response = R.json() 83 | for i, post in enumerate(response['data']): 84 | timestamp = post['created_time'] 85 | postid = post['id'] 86 | message = post.get('message','') 87 | list_posts.append((timestamp,postid,message)) 88 | flag = response['paging'].get('next', False) 89 | if flag: 90 | params['after'] = response['paging']['cursors']['after'] 91 | else: 92 | break 93 | 94 | columns = ['timestamp','id','message'] 95 | df = pd.DataFrame(list_posts, columns=columns) 96 | df.insert(0, 'parentid', nodeid) 97 | return df 98 | 99 | #%% 100 | def get_posts_reactions(nodeid, *, count=100, reactions=[]): 101 | """ 102 | Get reactions from posts. 103 | 104 | Parameters 105 | ---------- 106 | nodeid: str 107 | id of the node in question 108 | count: int 109 | max number of results to return per page 110 | reactions: list 111 | list of reactions interested in 112 | Returns 113 | ------- 114 | DataFrame 115 | Columns: ['parentid','id','reaction1','reaction2','reaction3', etc...] 116 | """ 117 | reaction_set = set(['NONE','LIKE','LOVE','WOW','HAHA','SAD','ANGRY','THANKFUL']) 118 | R = set(reactions) 119 | nomatch = R.difference(reaction_set) 120 | if nomatch: 121 | print('{} is invalid reaction. Choose from {}'.format(nomatch, reaction_set)) 122 | return pd.DataFrame() 123 | list_reactions = [] 124 | url = '{0}/{1}/posts'.format(baseurl,nodeid) 125 | for reaction in reactions: 126 | params = {'access_token': access_token, 127 | 'limit': count} 128 | pg = 0 129 | while True: 130 | pg += 1 131 | print('{0} posts reaction {1} Page {2}'.format(nodeid, reaction, pg)) 132 | params['fields'] = 'reactions.type({}).summary(total_count)'.format(reaction) 133 | R = requests.get(url, params=params) 134 | R.raise_for_status() 135 | response = R.json() 136 | for i, post in enumerate(response['data']): 137 | postid = post['id'] 138 | reaction_count = post['reactions']['summary']['total_count'] 139 | list_reactions.append((postid,reaction,reaction_count)) 140 | flag = response['paging'].get('next', False) 141 | if flag: 142 | params['after'] = response['paging']['cursors']['after'] 143 | else: 144 | break 145 | columns = ['id','reaction','count'] 146 | df = pd.DataFrame(list_reactions, columns=columns) 147 | df = df.pivot(index='id', columns='reaction', values='count').reset_index() 148 | df.insert(0, 'parentid', nodeid) 149 | return df 150 | 151 | #%% 152 | def get_comment_reactions(nodeid, *, count=100, reactions=[]): 153 | """ 154 | Get reactions from comment. 155 | 156 | Parameters 157 | ---------- 158 | nodeid: str 159 | id of the node in question 160 | count: int 161 | max number of results to return per page 162 | reactions: list 163 | list of reactions interested in 164 | Returns 165 | ------- 166 | DataFrame 167 | Columns: ['parentid','id','reaction1','reaction2','reaction3', etc...] 168 | """ 169 | reaction_set = set(['NONE','LIKE','LOVE','WOW','HAHA','SAD','ANGRY','THANKFUL']) 170 | R = set(reactions) 171 | nomatch = R.difference(reaction_set) 172 | if nomatch: 173 | print('{} is invalid reaction. Choose from {}'.format(nomatch, reaction_set)) 174 | return pd.DataFrame() 175 | if any(c.isalpha() for c in nodeid): 176 | print('Looks like the comment_id has alphabet characters which is incorrect') 177 | return pd.DataFrame() 178 | list_reactions = [] 179 | url = '{0}/{1}/reactions'.format(baseurl,nodeid) 180 | for reaction in reactions: 181 | params = {'access_token': access_token, 182 | 'limit': count, 183 | 'summary': 'total_count', 184 | 'type': reaction} 185 | print('{0} comment reaction {1} Page 1'.format(nodeid, reaction)) 186 | R = requests.get(url, params=params) 187 | R.raise_for_status() 188 | response = R.json() 189 | reaction_count = response['summary']['total_count'] 190 | list_reactions.append(reaction_count) 191 | df = pd.DataFrame(list_reactions).transpose() 192 | df.columns = reactions 193 | df.insert(0, 'parentid', nodeid) 194 | return df 195 | --------------------------------------------------------------------------------