├── README └── AppStoreReviews.py /README: -------------------------------------------------------------------------------- 1 | This is a simple script to get Apple AppStore reviews for you iOS application. 2 | 3 | INSTALL 4 | Need python version 2.6 at least, not compatible with python 3.0. Additional modules: elementtree and argparse are required. To install this modules: 5 | % sudo easy_install elementtree 6 | % sudo easy_install argparse 7 | The output is given with UTF-8 charsets; if you are scraping non-english AppStore set the environment to use UTF: 8 | % export LANG=en_US.UTF-8 9 | % export PYTHONIOENCODING=utf-8 10 | 11 | USAGE 12 | AppStoreReviews.py [-h] [-i AppId] [-c "Name"] [-l] 13 | 1) Get reviews for application id = 322550986 14 | % AppStoreReviews.py -i 322550986 15 | 2) Get reviews for application id = 322550986 in specific AppStore 16 | % AppStoreReviews.py -i 322550986 -c "united states" 17 | 3) Get the list of known appstores 18 | % AppStoreReviews.py -l 19 | 20 | USE AS A MODULE 21 | You can use it as a module in your python program. Just import it and use getReviews(appStoreId, appId) function to retrieve reviews for given country and application. 22 | 23 | FUNCTIONS 24 | getReviews(appStoreId, appId) 25 | returns list of reviews for given AppStore ID and application Id 26 | return list format: [{"topic": unicode string, "review": unicode string, "rank": int}] 27 | 28 | COPYRIGHTS 29 | Apple AppStore reviews scrapper 30 | version 2011-04-12 31 | Tomek "Grych" Gryszkiewicz, grych@tg.pl 32 | http://www.tg.pl 33 | 34 | based on "Scraping AppStore Reviews" blog by Erica Sadun 35 | - http://blogs.oreilly.com/iphone/2008/08/scraping-appstore-reviews.html 36 | AppStore codes are based on "appstore_reviews" by Jeremy Wohl 37 | - https://github.com/jeremywohl/iphone-scripts/blob/master/appstore_reviews 38 | 39 | 40 | -------------------------------------------------------------------------------- /AppStoreReviews.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ''' Apple AppStore reviews scrapper 3 | version 2011-04-12 4 | Tomek "Grych" Gryszkiewicz, grych@tg.pl 5 | http://www.tg.pl 6 | 7 | based on "Scraping AppStore Reviews" blog by Erica Sadun 8 | - http://blogs.oreilly.com/iphone/2008/08/scraping-appstore-reviews.html 9 | AppStore codes are based on "appstore_reviews" by Jeremy Wohl 10 | - https://github.com/jeremywohl/iphone-scripts/blob/master/appstore_reviews 11 | ''' 12 | import urllib2 13 | from elementtree import ElementTree 14 | import sys 15 | import string 16 | import argparse 17 | import re 18 | 19 | appStores = { 20 | 'Argentina': 143505, 21 | 'Australia': 143460, 22 | 'Belgium': 143446, 23 | 'Brazil': 143503, 24 | 'Canada': 143455, 25 | 'Chile': 143483, 26 | 'China': 143465, 27 | 'Colombia': 143501, 28 | 'Costa Rica': 143495, 29 | 'Croatia': 143494, 30 | 'Czech Republic': 143489, 31 | 'Denmark': 143458, 32 | 'Deutschland': 143443, 33 | 'El Salvador': 143506, 34 | 'Espana': 143454, 35 | 'Finland': 143447, 36 | 'France': 143442, 37 | 'Greece': 143448, 38 | 'Guatemala': 143504, 39 | 'Hong Kong': 143463, 40 | 'Hungary': 143482, 41 | 'India': 143467, 42 | 'Indonesia': 143476, 43 | 'Ireland': 143449, 44 | 'Israel': 143491, 45 | 'Italia': 143450, 46 | 'Korea': 143466, 47 | 'Kuwait': 143493, 48 | 'Lebanon': 143497, 49 | 'Luxembourg': 143451, 50 | 'Malaysia': 143473, 51 | 'Mexico': 143468, 52 | 'Nederland': 143452, 53 | 'New Zealand': 143461, 54 | 'Norway': 143457, 55 | 'Osterreich': 143445, 56 | 'Pakistan': 143477, 57 | 'Panama': 143485, 58 | 'Peru': 143507, 59 | 'Phillipines': 143474, 60 | 'Poland': 143478, 61 | 'Portugal': 143453, 62 | 'Qatar': 143498, 63 | 'Romania': 143487, 64 | 'Russia': 143469, 65 | 'Saudi Arabia': 143479, 66 | 'Schweiz/Suisse': 143459, 67 | 'Singapore': 143464, 68 | 'Slovakia': 143496, 69 | 'Slovenia': 143499, 70 | 'South Africa': 143472, 71 | 'Sri Lanka': 143486, 72 | 'Sweden': 143456, 73 | 'Taiwan': 143470, 74 | 'Thailand': 143475, 75 | 'Turkey': 143480, 76 | 'United Arab Emirates' :143481, 77 | 'United Kingdom': 143444, 78 | 'United States': 143441, 79 | 'Venezuela': 143502, 80 | 'Vietnam': 143471, 81 | 'Japan': 143462, 82 | 'Dominican Republic': 143508, 83 | 'Ecuador': 143509, 84 | 'Egypt': 143516, 85 | 'Estonia': 143518, 86 | 'Honduras': 143510, 87 | 'Jamaica': 143511, 88 | 'Kazakhstan': 143517, 89 | 'Latvia': 143519, 90 | 'Lithuania': 143520, 91 | 'Macau': 143515, 92 | 'Malta': 143521, 93 | 'Moldova': 143523, 94 | 'Nicaragua': 143512, 95 | 'Paraguay': 143513, 96 | 'Uruguay': 143514 97 | } 98 | 99 | def getReviews(appStoreId, appId,maxReviews=-1): 100 | ''' returns list of reviews for given AppStore ID and application Id 101 | return list format: [{"topic": unicode string, "review": unicode string, "rank": int}] 102 | ''' 103 | reviews=[] 104 | i=0 105 | while True: 106 | ret = _getReviewsForPage(appStoreId, appId, i) 107 | if len(ret)==0: # funny do while emulation ;) 108 | break 109 | reviews += ret 110 | i += 1 111 | if maxReviews > 0 and len(reviews) > maxReviews: 112 | break 113 | return reviews 114 | 115 | def _getReviewsForPage(appStoreId, appId, pageNo): 116 | userAgent = 'iTunes/9.2 (Macintosh; U; Mac OS X 10.6)' 117 | front = "%d-1" % appStoreId 118 | url = "http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZStore.woa/wa/viewContentsUserReviews?id=%s&pageNumber=%d&sortOrdering=4&onlyLatestVersion=false&type=Purple+Software" % (appId, pageNo) 119 | req = urllib2.Request(url, headers={"X-Apple-Store-Front": front,"User-Agent": userAgent}) 120 | try: 121 | u = urllib2.urlopen(req, timeout=30) 122 | except urllib2.HTTPError: 123 | print "Can't connect to the AppStore, please try again later." 124 | raise SystemExit 125 | root = ElementTree.parse(u).getroot() 126 | reviews=[] 127 | for node in root.findall('{http://www.apple.com/itms/}View/{http://www.apple.com/itms/}ScrollView/{http://www.apple.com/itms/}VBoxView/{http://www.apple.com/itms/}View/{http://www.apple.com/itms/}MatrixView/{http://www.apple.com/itms/}VBoxView/{http://www.apple.com/itms/}VBoxView/{http://www.apple.com/itms/}VBoxView/'): 128 | review = {} 129 | 130 | review_node = node.find("{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle") 131 | if review_node is None: 132 | review["review"] = None 133 | else: 134 | review["review"] = review_node.text 135 | 136 | version_node = node.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}GotoURL") 137 | if version_node is None: 138 | review["version"] = None 139 | else: 140 | review["version"] = re.search("Version [^\n^\ ]+", version_node.tail).group() 141 | 142 | user_node = node.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}GotoURL/{http://www.apple.com/itms/}b") 143 | if user_node is None: 144 | review["user"] = None 145 | else: 146 | review["user"] = user_node.text.strip() 147 | 148 | rank_node = node.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}HBoxView") 149 | try: 150 | alt = rank_node.attrib['alt'] 151 | st = int(alt.strip(' stars')) 152 | review["rank"] = st 153 | except KeyError: 154 | review["rank"] = None 155 | 156 | topic_node = node.find("{http://www.apple.com/itms/}HBoxView/{http://www.apple.com/itms/}TextView/{http://www.apple.com/itms/}SetFontStyle/{http://www.apple.com/itms/}b") 157 | if topic_node is None: 158 | review["topic"] = None 159 | else: 160 | review["topic"] = topic_node.text 161 | 162 | reviews.append(review) 163 | return reviews 164 | 165 | def _print_reviews(reviews, country): 166 | ''' returns (reviews count, sum rank) 167 | ''' 168 | if len(reviews)>0: 169 | print "Reviews in %s:" % (country) 170 | print "" 171 | sumRank = 0 172 | for review in reviews: 173 | print "%s by %s" % (review["version"], review["user"]) 174 | for i in range(review["rank"]): 175 | sys.stdout.write ("*") # to avoid space or newline after print 176 | print " (%s) %s" % (review["topic"], review["review"]) 177 | print "" 178 | sumRank += review["rank"] 179 | print "Number of reviews in %s: %d, avg rank: %.2f\n" % (country, len(reviews), 1.0*sumRank/len(reviews)) 180 | return (len(reviews), sumRank) 181 | else: 182 | return (0, 0) 183 | 184 | def _print_rawmode(reviews): 185 | for review in reviews: 186 | print review["topic"], review["review"].replace("\n","") 187 | 188 | if __name__ == '__main__': 189 | parser = argparse.ArgumentParser(description='AppStoreReviewsScrapper command line.', epilog='To get your application Id look into the AppStore link to you app, for example http://itunes.apple.com/pl/app/autobuser-warszawa/id335042980?mt=8 - app Id is the number between "id" and "?mt=0"') 190 | parser.add_argument('-i', '--id', default=0, metavar='AppId', type=int, help='Application Id (see below)') 191 | parser.add_argument('-c', '--country', metavar='"Name"', type=str, default='all', help='AppStore country name (use -l to see them)') 192 | parser.add_argument('-l', '--list', action='store_true', default=False, help='AppStores list') 193 | parser.add_argument('-m', '--max-reviews',default=-1,metavar='MaxReviews',type=int,help='Max number of reviews to load') 194 | parser.add_argument('-r', '--raw-mode',action='store_true',default=False,help='Print raw mode') 195 | args = parser.parse_args() 196 | if args.id == 0: 197 | parser.print_help() 198 | raise SystemExit 199 | country = string.capwords(args.country) 200 | countries=appStores.keys() 201 | countries.sort() 202 | if args.list: 203 | for c in countries: 204 | print c 205 | else: 206 | if (country=="All"): 207 | rankCount = 0; rankSum = 0 208 | for c in countries: 209 | reviews = getReviews(appStores[c], args.id,maxReviews=args.max_reviews) 210 | (rc,rs) = _print_reviews(reviews, c) 211 | rankCount += rc 212 | rankSum += rs 213 | print "\nTotal number of reviews: %d, avg rank: %.2f" % (rankCount, 1.0 * rankSum/rankCount) 214 | else: 215 | try: 216 | reviews = getReviews(appStores[country], args.id,maxReviews=args.max_reviews) 217 | if args.raw_mode: 218 | _print_rawmode(reviews) 219 | else: 220 | _print_reviews(reviews, country) 221 | except KeyError: 222 | print "No such country %s!\n\nWell, it could exist in real life, but I dont know it." % country 223 | pass 224 | 225 | --------------------------------------------------------------------------------