├── example_output.txt ├── stock.py ├── README.md └── executor.py /example_output.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paullo0106/stocktw/HEAD/example_output.txt -------------------------------------------------------------------------------- /stock.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import sys 3 | import executor 4 | 5 | 6 | 7 | def execute(stockId=None, date=None): 8 | 9 | now = datetime.datetime.now() 10 | if date is None or len(date)==0: 11 | #convert western calendar to R.O.C calendar 12 | date = str(now.year-1911) + "{:02d}".format(now.month) + "{:02d}".format(now.day) 13 | elif len(date)==4: 14 | date = str(now.year-1911) + date[0:2] + date[2:4] 15 | 16 | executor.execute(stockId, date) 17 | 18 | 19 | 20 | if __name__ == "__main__": 21 | if len(sys.argv)<2: 22 | print 'Please input the stock id' 23 | exit() 24 | else: 25 | # TODO: check if the stock id is a valid one from some pre-defined stock list 26 | stockId = sys.argv[1] 27 | if not stockId.isdigit(): 28 | print "Please input a valid stock number as first argument" 29 | exit() 30 | 31 | if len(sys.argv)>2: 32 | date = sys.argv[2] 33 | else: 34 | date = '' 35 | 36 | if len(date)!=4 and len(date)!=7 and len(date)!=0: 37 | print "Please input 4 or 7 numbers as date (ex: 0520, or 1030520)" 38 | exit() 39 | 40 | execute(stockId, date) 41 | 42 | 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | TWSE data collecting 台股每日券商買賣資料排名 3 | =============================== 4 | 5 | 抓取每日個股券商買賣張數及價位,並按照買超數量排名 6 | * 資料來源 證券櫃檯買賣中心 (http://www.gretai.org.tw/ch/)。 7 | * 資料來源 台灣證券交易所 (http://www.twse.com.tw)。 8 | 9 | 10 | 11 | Basics 12 | ----------------------------- 13 | 14 | * Authors: Paul Lo 15 | * Version: 1.0.0 of 2014/06/03 16 | * Python Version: Python 2.7 17 | * Docs: TBA 18 | 19 | 20 | Requires 21 | ----------------------------- 22 | 23 | python 2.7 24 | 25 | 26 | Report Issue or get involved 27 | ----------------------------- 28 | 29 | - Github: https://github.com/paullo0106/stocktw/ 30 | - Issues: https://github.com/paullo0106/stocktw/issues/ 31 | 32 | 33 | Web Demo 34 | ----------------------------- 35 | 36 | TBD 37 | 38 | 39 | Quick Start 40 | ----------------------------- 41 | 42 | * Specify stock id and date (R.O.C calendar in 7 numbers) as arguments: 43 | 44 | example: 45 | ```python stock.py 3293 1030603``` 46 | 47 | * Specify stock id and date (4 numbers for month and year) as arguments 48 | 49 | example: 50 | ```python stock.py 3293 0603``` 51 | 52 | 53 | * specify stock id as the only one argument (the current date will be automatically applied) 54 | 55 | example: 56 | ```python stock.py 3293``` 57 | 58 | 59 | 60 | # Output example 61 | 62 | :: 以3293 0603為例 (後半段省略, 完整請見 [example_output.txt] (https://github.com/paullo0106/stocktw/blob/master/example_output.txt) ) 63 | 64 | Sum | Total Buy | Buy Cost | Total Sell | Sell Cost | Avg Cost | Agent 65 | 96.0 | 96.0 | 135.84 | 0.0 | 0.0 | 135.84 | 5858 統一嘉義 66 | 95.0 | 331.0 | 135.99 | 236.0 | 133.02 | 134.76 | 9692 富邦嘉義 67 | 62.98 | 63.0 | 133.39 | 0.02 | 136.0 | 133.39 | 9659 富邦高雄 68 | 35.0 | 35.0 | 133.0 | 0.0 | 0.0 | 133.0 | 918W 群益金鼎忠孝 69 | 30.0 | 30.0 | 133.0 | 0.0 | 0.0 | 133.0 | 5690 豐興 70 | 23.0 | 30.0 | 133.0 | 7.0 | 133.0 | 133.0 | 9658 富邦建國 71 | 20.0 | 20.0 | 134.0 | 0.0 | 0.0 | 134.0 | 5600 永興 72 | 20.0 | 20.0 | 133.0 | 0.0 | 0.0 | 133.0 | 921S 凱基上新莊 73 | 19.0 | 19.0 | 133.0 | 0.0 | 0.0 | 133.0 | 8882 國泰台中 74 | 19.0 | 20.0 | 133.0 | 1.0 | 136.0 | 133.14 | 9852 元大大松山 75 | 18.0 | 18.0 | 133.0 | 0.0 | 0.0 | 133.0 | 8880 國泰 76 | 16.0 | 16.0 | 133.0 | 0.0 | 0.0 | 133.0 | 9652 富邦世貿 77 | 16.0 | 16.0 | 133.5 | 0.0 | 0.0 | 133.5 | 1090 台灣工銀 78 | 15.0 | 15.0 | 133.0 | 0.0 | 0.0 | 133.0 | 6465 大昌桃園 79 | 14.0 | 14.0 | 133.0 | 0.0 | 0.0 | 133.0 | 585E 統一新營 80 | (...omitted) 81 | 82 | 83 | 84 | Change Logs 85 | ----------------------------- 86 | 87 | ### 1.0.0 2014/06/03 88 | 89 | - Add python code with basic functions (only OTC stocks is available in this version) 90 | -------------------------------------------------------------------------------- /executor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import csv 5 | import urllib2 6 | import time 7 | from functools import wraps 8 | 9 | summaryData = {} # Keep a list of necessary data for each agent/branch, the value is a list which contains [,,,,] 10 | 11 | # alignment: http://docs.python.org/2/library/string.html#formatspec 12 | # http://stackoverflow.com/questions/10623727/python-spacing-and-aligning-strings 13 | printDataFormat = " {0:>10} | {1:>10} | {2:>10} | {3:>10} | {4:>10} | {5:>10} | {6:<15}" 14 | # print out columns: 'Sum','Total Buy','Buy Cost','Total Sell','Sell Cost','Avg Cost','Agent' 15 | 16 | 17 | def registerNum(agent_name,buy_num,sell_num,cost): 18 | name = agent_name.split() # format: ' ' 19 | name = name[0] # Use number as identifier is good enough 20 | 21 | global summaryData 22 | 23 | buy_num = float(buy_num) 24 | sell_num = float(sell_num) 25 | cost = float(cost) 26 | if summaryData.get(name,None)==None: # this agent name appears for the first time 27 | if buy_num>0 and sell_num>0: # then the cost listed on excel is both sell cost and buy cost 28 | summaryData[name] = (agent_name, buy_num, sell_num, cost, cost) 29 | elif buy_num>0: # the cost is for buy cost 30 | summaryData[name] = (agent_name, buy_num, sell_num, cost, 0) 31 | else: # the cost is for sell cost 32 | summaryData[name] = (agent_name, buy_num, sell_num, 0, cost) 33 | else: # do calculation to combine with previous data of this agent_name 34 | 35 | tmpData = summaryData[name] 36 | 37 | buyNum = tmpData[1] 38 | buyCost = tmpData[3] 39 | sellNum = tmpData[2] 40 | sellCost = tmpData[4] 41 | 42 | if buy_num>0: 43 | newBuyTotal = buy_num*cost 44 | buyTotal = buyNum*buyCost + newBuyTotal 45 | buyNum+=buy_num 46 | buyCost = buyTotal/buyNum 47 | 48 | if sell_num>0: 49 | newSellTotal = sell_num*cost 50 | sellTotal = sellNum*sellCost + newSellTotal 51 | sellNum+=sell_num 52 | sellCost = sellTotal/sellNum 53 | 54 | summaryData[name] = (agent_name, buyNum, sellNum, buyCost, sellCost ) 55 | 56 | 57 | def printHeader(): 58 | print printDataFormat.format('Sum','Total Buy','Buy Cost','Total Sell','Sell Cost','Avg Cost','Agent') 59 | 60 | 61 | def printResult(agentName): 62 | list = summaryData[agentName] 63 | 64 | # ('116f \xa4\xe9\xb2\xb1\xb4_\xbf\xb3', 130000.0, 0.0, 0, 40.88346153846154, 0) 65 | agentName = list[0] 66 | buy = list[1] 67 | sell = list[2] 68 | buy = buy/1000 69 | sell = sell/1000 70 | sum = buy-sell 71 | 72 | buyCost = list[3] 73 | sellCost = list[4] 74 | avgCost = (buyCost*buy + sellCost*sell) / (buy+sell) # calculate the average cost 75 | 76 | 77 | sum = round(sum,2) 78 | buy = round(buy,2) 79 | sell = round(sell,2) 80 | buyCost = round(buyCost,2) 81 | sellCost = round(sellCost,2) 82 | avgCost = round(avgCost,2) 83 | 84 | # alignment: http://docs.python.org/2/library/string.html#formatspec 85 | # http://stackoverflow.com/questions/10623727/python-spacing-and-aligning-strings 86 | #print list 87 | #print 'sum:'+ str(sum) 88 | #print 'agent:' + agentName 89 | #print str(sum) + ' ' + str(buy) + ' ' + str(buyCost) + ' ' + str(sell) + ' ' + str(sellCost) + ' ' + str(avgCost) + ' ' + str(agentName) 90 | print printDataFormat.format(sum, buy, buyCost, sell, sellCost, avgCost, agentName) 91 | 92 | 93 | def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None): 94 | """Retry calling the decorated function using an exponential backoff. 95 | 96 | http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ 97 | original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry 98 | 99 | :param ExceptionToCheck: the exception to check. may be a tuple of 100 | exceptions to check 101 | :type ExceptionToCheck: Exception or tuple 102 | :param tries: number of times to try (not retry) before giving up 103 | :type tries: int 104 | :param delay: initial delay between retries in seconds 105 | :type delay: int 106 | :param backoff: backoff multiplier e.g. value of 2 will double the delay 107 | each retry 108 | :type backoff: int 109 | :param logger: logger to use. If None, print 110 | :type logger: logging.Logger instance 111 | """ 112 | def deco_retry(f): 113 | 114 | @wraps(f) 115 | def f_retry(*args, **kwargs): 116 | mtries, mdelay = tries, delay 117 | while mtries > 1: 118 | try: 119 | return f(*args, **kwargs) 120 | except ExceptionToCheck, e: 121 | msg = "%s, Retrying in %d seconds..." % (str(e), mdelay) 122 | if logger: 123 | logger.warning(msg) 124 | else: 125 | print msg 126 | print args 127 | time.sleep(mdelay) 128 | mtries -= 1 129 | mdelay *= backoff 130 | return f(*args, **kwargs) 131 | 132 | return f_retry # true decorator 133 | 134 | return deco_retry 135 | 136 | 137 | @retry(Exception,tries=3) 138 | def collectOTCData(stockId,date): 139 | url = 'http://www.gretai.org.tw/web/stock/aftertrading/broker_trading/download_ALLCSV.php?curstk='+stockId+'&stk_date='+date 140 | response = urllib2.urlopen(url) 141 | return response 142 | 143 | 144 | #main function here 145 | def execute(stockId, date): 146 | 147 | if stockId is None: # or date==None: 148 | #print 'some error message' 149 | return 150 | 151 | global summaryData 152 | summaryData = {} 153 | 154 | try: 155 | response = collectOTCData(stockId,date) 156 | except Exception, e: 157 | print e 158 | print 'Cannot get data for ' + str(stockId) 159 | return 160 | 161 | cr = csv.reader(response) 162 | 163 | ''' Expected format example in each row: 164 | ['1823', '1020 合庫', '124.50', '0', '2,000', '', '1824', '1020 合庫', '127.50', '0', '110'] 165 | ''' 166 | for row in cr: 167 | x=0 168 | 169 | if len(row)>10: # 11 columns in total 170 | name1 = row[1] 171 | buy1 = row[3] 172 | buy1 = buy1.replace(",","") 173 | name2 = row[7] 174 | buy2 = row[9] 175 | buy2 = buy2.replace(",","") 176 | 177 | sell1 = row[4] 178 | sell1 = sell1.replace(",","") 179 | sell2 = row[10] 180 | sell2 = sell2.replace(",","") 181 | 182 | cost1 = row[2] 183 | cost2 = row[8] 184 | 185 | try: # in order to skip the beginning few lines of header 186 | ''' header format example: 187 | ['券商買賣證券成交價量資訊'] 188 | ['證券代碼', '3293'] 189 | ['序號','券商','價格','買進股數','賣出股數','','序號','券商','價格','買進股數','賣出股數'] 190 | ''' 191 | str(int(buy1)) 192 | str(int(buy2)) 193 | str(int(sell1)) 194 | str(int(sell2)) 195 | except ValueError: 196 | continue 197 | 198 | registerNum(name1,buy1,sell1,cost1) 199 | registerNum(name2,buy2,sell2,cost2) 200 | 201 | if len(summaryData)<1: 202 | print 'Found no result for ' + str(stockId) + ' on ' + str(date) 203 | return 204 | 205 | rankAll = {} 206 | 207 | # Calculate and sort according to buy/sell number 208 | for key,value in summaryData.iteritems(): 209 | diff = float(value[1])-float(value[2]) # buy number - sell number 210 | rankAll[key] = diff 211 | 212 | rankAll = sorted(rankAll.items(), key=lambda x:x[1], reverse=True) 213 | 214 | printHeader() 215 | for item in rankAll: 216 | printResult(item[0]) 217 | 218 | 219 | 220 | 221 | --------------------------------------------------------------------------------