├── CFTC.py ├── CME1.py ├── CME2.py ├── CME3.py ├── CQF.py ├── LICENSE ├── LME.py ├── MENA Newsletter.py ├── Macrotrends.py ├── README.md ├── SHFE.py ├── Springer.py ├── Tomtom.py ├── US Federal Holidays.py ├── US Treasury.py ├── WallStreetBets.py └── preview ├── cme1 html.PNG ├── cme1 tree.png ├── cme2 euronext.PNG ├── cme2 inspect element.png ├── cme2 json.PNG ├── cme2 link address.png ├── cme2 network.PNG ├── cme2 request url.PNG ├── cme2 url.PNG ├── cqf login link.PNG ├── cqf post form.PNG ├── cqf query.PNG ├── cqf request header.PNG ├── legality.PNG ├── mena bat file.PNG ├── mena bat format.PNG ├── mena check.PNG ├── mena create task.PNG ├── mena finito.PNG ├── mena freq.PNG ├── mena python path.PNG ├── mena script name.PNG ├── mena set time.PNG ├── mena start program.PNG ├── mena task name.PNG ├── mena task scheduler.PNG ├── proxy domain.PNG ├── proxy ie.png ├── proxy lan.PNG ├── shfe javascript.png ├── shfe regex.png └── web-scraping-profile.png /CFTC.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | 5 | 6 | #scrape cftc trader commitment report 7 | 8 | 9 | # In[1]: 10 | 11 | 12 | import requests 13 | import pandas as pd 14 | import re 15 | import os 16 | os.chdir('H:/') 17 | 18 | 19 | # In[2]: 20 | 21 | 22 | #scraping function 23 | def scrape(url): 24 | 25 | session=requests.Session() 26 | 27 | session.headers.update( 28 | {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'}) 29 | 30 | response=session.get(url) 31 | 32 | return response 33 | 34 | 35 | # In[3]: 36 | 37 | 38 | #get data 39 | def etl(response): 40 | 41 | #create a list 42 | text=response.content.decode('utf-8').split('\r') 43 | 44 | 45 | #create index for each block 46 | assets=[i for i in text if 'CHICAGO MERCANTILE EXCHANGE' in i] 47 | ind=[text.index(i) for i in assets] 48 | 49 | 50 | overall=[] 51 | 52 | #etl 53 | for i in ind: 54 | 55 | commodity=text[i].split(' - CHICAGO MERCANTILE EXCHANGE')[0].replace('\n','') 56 | commodity_code=text[i].split('Code-')[-1].replace('\n','') 57 | date=re.search('\d{2}\/\d{2}\/\d{2}',text[i+1]).group() 58 | contractunit=re.search('(?<=\().*(?=OPEN INTEREST)',text[i+7]).group().replace(')','') 59 | open_interest=re.search('(?<=OPEN INTEREST\:).*',text[i+7]).group() 60 | non_commercial_long_commitment,non_commercial_short_commitment, \ 61 | non_commercial_spread_commitment,commercial_long_commitment, \ 62 | commercial_short_commitment,total_long_commitment,total_short_commitment, \ 63 | non_reportable_long_commitment,non_reportable_short_commitment=re.findall('\S+',text[i+9]) 64 | changedate=re.search('\d{2}\/\d{2}\/\d{2}',text[i+11]).group() 65 | change_open_interest=text[i+11].split(' ')[-1].replace(')','') 66 | non_commercial_long_change,non_commercial_short_change, \ 67 | non_commercial_spread_change,commercial_long_change, \ 68 | commercial_short_change,total_long_change,total_short_change, \ 69 | non_reportable_long_change,non_reportable_short_change=re.findall('\S+',text[i+12]) 70 | non_commercial_long_percent,non_commercial_short_percent, \ 71 | non_commercial_spread_percent,commercial_long_percent, \ 72 | commercial_short_percent,total_long_percent,total_short_percent, \ 73 | non_reportable_long_percent,non_reportable_short_percent=re.findall('\S+',text[i+15]) 74 | totaltraders=text[i+17].split(' ')[-1].replace(')','') 75 | non_commercial_long_traders,non_commercial_short_traders, \ 76 | non_commercial_spread_traders,commercial_long_traders, \ 77 | commercial_short_traders,total_long_traders,total_short_traders=re.findall('\S+',text[i+18]) 78 | 79 | temp=[commodity,commodity_code,date,contractunit,open_interest, 80 | non_commercial_long_commitment,non_commercial_short_commitment, 81 | non_commercial_spread_commitment,commercial_long_commitment, 82 | commercial_short_commitment,total_long_commitment, 83 | total_short_commitment,non_reportable_long_commitment, 84 | non_reportable_short_commitment,changedate,change_open_interest, 85 | non_commercial_long_change,non_commercial_short_change, 86 | non_commercial_spread_change,commercial_long_change, 87 | commercial_short_change,total_long_change,total_short_change, 88 | non_reportable_long_change,non_reportable_short_change, 89 | non_commercial_long_percent,non_commercial_short_percent, 90 | non_commercial_spread_percent,commercial_long_percent, 91 | commercial_short_percent,total_long_percent, 92 | total_short_percent,non_reportable_long_percent, 93 | non_reportable_short_percent,totaltraders, 94 | non_commercial_long_traders,non_commercial_short_traders, 95 | non_commercial_spread_traders,commercial_long_traders, 96 | commercial_short_traders,total_long_traders,total_short_traders] 97 | 98 | overall+=temp 99 | 100 | 101 | colnames=['commodity', 102 | 'commodity_code', 103 | 'date', 104 | 'contract_unit', 105 | 'open_interest', 106 | 'non_commercial_long_commitment', 107 | 'non_commercial_short_commitment', 108 | 'non_commercial_spread_commitment', 109 | 'commercial_long_commitment', 110 | 'commercial_short_commitment', 111 | 'total_long_commitment', 112 | 'total_short_commitment', 113 | 'non_reportable_long_commitment', 114 | 'non_reportable_short_commitment', 115 | 'change_date', 116 | 'change_open_interest', 117 | 'non_commercial_long_change', 118 | 'non_commercial_short_change', 119 | 'non_commercial_spread_change', 120 | 'commercial_long_change', 121 | 'commercial_short_change', 122 | 'total_long_change', 123 | 'total_short_change', 124 | 'non_reportable_long_change', 125 | 'non_reportable_short_change', 126 | 'non_commercial_long_percent', 127 | 'non_commercial_short_percent', 128 | 'non_commercial_spread_percent', 129 | 'commercial_long_percent', 130 | 'commercial_short_percent', 131 | 'total_long_percent', 132 | 'total_short_percent', 133 | 'non_reportable_long_percent', 134 | 'non_reportable_short_percent', 135 | 'total_traders', 136 | 'non_commercial_long_traders', 137 | 'non_commercial_short_traders', 138 | 'non_commercial_spread_traders', 139 | 'commercial_long_traders', 140 | 'commercial_short_traders', 141 | 'total_long_traders', 142 | 'total_short_traders'] 143 | 144 | 145 | #create dataframe 146 | df=pd.DataFrame(columns=colnames) 147 | 148 | 149 | for i in range(len(colnames)): 150 | df[colnames[i]]=overall[i::len(colnames)] 151 | 152 | 153 | #transform 154 | ind=['commodity', 'commodity_code','change_date', 155 | 'date', 'contract_unit', 'open_interest', 156 | 'change_open_interest','total_traders'] 157 | 158 | df=df.melt(id_vars=ind,value_vars=[i for i in df.columns if i not in ind]) 159 | 160 | #isolate position 161 | df['position']='' 162 | 163 | ind_long=df.loc[df['variable'].apply(lambda x: 'long' in x )].index 164 | ind_short=df.loc[df['variable'].apply(lambda x: 'short' in x )].index 165 | ind_spread=df.loc[df['variable'].apply(lambda x: 'spread' in x )].index 166 | 167 | for i in ind_spread: 168 | df.at[i,'position']='spread' 169 | for i in ind_short: 170 | df.at[i,'position']='short' 171 | for i in ind_long: 172 | df.at[i,'position']='long' 173 | 174 | df['variable']=df['variable'].str.replace('long_','').str.replace('short_','').str.replace('spread_','') 175 | 176 | #isolate type 177 | df['type']=df['variable'].apply(lambda x:'_'.join(x.split('_')[:-1])) 178 | 179 | #clean variable name 180 | df['variable']=df['variable'].apply(lambda x:x.split('_')[-1]) 181 | 182 | df['variable']=df['variable'].str.replace('percent', 183 | 'percent_of_open_interest_for_each_type_of_traders') 184 | 185 | df['variable']=df['variable'].str.replace('traders', 186 | 'number_of_traders_in_each_type') 187 | 188 | #change col order 189 | df=df[['commodity', 'commodity_code', 'change_date', 190 | 'date', 'contract_unit','open_interest', 191 | 'change_open_interest', 'total_traders', 192 | 'type','position','variable','value', ]] 193 | 194 | return df 195 | 196 | 197 | # In[4]: 198 | 199 | def main(): 200 | 201 | url='https://www.cftc.gov/dea/futures/deacmesf.htm' 202 | 203 | #scrape 204 | response=scrape(url) 205 | 206 | #get data 207 | df=etl(option_url) 208 | 209 | df.to_csv('trader commitment report.csv',index=False) 210 | 211 | 212 | if __name__ == "__main__": 213 | main() 214 | 215 | -------------------------------------------------------------------------------- /CME1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Mar 20 16:23:53 2018 4 | 5 | """ 6 | 7 | #scraping CME is soooo effortless 8 | #just simple html parse tree 9 | #how i love Chicago 10 | import urllib.request as u 11 | import pandas as pd 12 | from bs4 import BeautifulSoup as bs 13 | import os 14 | os.chdir('H:/') 15 | 16 | 17 | # 18 | def scrape(category_name,commodity_name): 19 | 20 | #i use proxy handler cuz my uni network runs on its proxy 21 | #and i cannot authenticate python through the proxy 22 | #so i use empty proxy to bypass the authentication 23 | proxy_handler = u.ProxyHandler({}) 24 | opener = u.build_opener(proxy_handler) 25 | 26 | #cme officially forbids scraping 27 | #so a header must be used for disguise as an internet browser 28 | #the developers say no to scraping, it appears to be so 29 | #but actually they turn a blind eye to us, thx 30 | #i need different types of commodity 31 | #so i need to format the website for each commodity 32 | req=u.Request('http://www.cmegroup.com/trading/metals/%s/%s.html'%( 33 | category_name,commodity_name),headers={'User-Agent': 'Mozilla/5.0'}) 34 | response=opener.open(req) 35 | result=response.read() 36 | soup=bs(result,'html.parser') 37 | 38 | return soup 39 | 40 | 41 | # 42 | def etl(category_name,commodity_name): 43 | 44 | try: 45 | page=scrape(category_name,commodity_name) 46 | print(commodity_name) 47 | 48 | except Exception as e: 49 | print(e) 50 | 51 | 52 | #i need date, prior settle price and volume 53 | #it is essential to view source of the website first 54 | #then use beautiful soup to search specific class 55 | p1=page.find_all('span',class_='cmeNoWrap') 56 | p2=page.find_all('td',class_=['statusOK','statusNull','statusAlert']) 57 | p3=page.find_all('td',class_="cmeTableRight") 58 | 59 | a=[] 60 | b=[] 61 | c=[] 62 | 63 | for i in p1: 64 | a.append(i.text) 65 | 66 | #somehow prior settle is hard to get 67 | #we cannot find that specific tag 68 | #we can search for the previous tag instead 69 | #the find_next function of beautifulsoup allows us to get the next tag 70 | #the previous tag of prior settle is change 71 | for j in p2: 72 | temp=j.find_next() 73 | b.append(temp.text) 74 | 75 | #the volume contains comma 76 | for k in p3: 77 | c.append(float(str(k).replace(',',''))) 78 | 79 | 80 | df=pd.DataFrame() 81 | df['expiration date']=a 82 | df['prior settle']=b 83 | df['volume']=c 84 | df['name']=commodity_name 85 | 86 | #for me, i wanna highlight the front month 87 | #The front month is the month where the majority of volume and liquidity occurs 88 | df['front month']=df['volume']==max(df['volume']) 89 | 90 | 91 | # 92 | def main(): 93 | 94 | #scraping and etl 95 | df1=etl('precious','silver') 96 | df2=etl('precious','gold') 97 | df3=etl('precious','palladium') 98 | df4=etl('base','copper') 99 | 100 | #concatenate then export 101 | dd=pd.concat([df1,df2,df3,df4]) 102 | dd.to_csv('cme.csv',encoding='utf_8_sig') 103 | 104 | 105 | if __name__ == "__main__": 106 | main() 107 | -------------------------------------------------------------------------------- /CME2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Apr 9 11:33:03 2018 4 | 5 | """ 6 | #previously in CME1 7 | #i said scraping CME is soooo effortless 8 | #CME technical guys must have heard my voice 9 | #they changed the website from xml structure to json query 10 | #holy crap!! well, it would not scare off people like us!! 11 | 12 | #here is the trick 13 | #before we actually go to the website of CME quotes 14 | #we press ctrl+shift+i in chrome or f12 in ie 15 | #we can inspect element of the website 16 | #we just go to the network monitor 17 | #we will be able to see all the network activity 18 | #including where the data of CME is coming from 19 | #this is how we gon do it baby 20 | import pandas as pd 21 | import requests 22 | import os 23 | os.chdir('H:/') 24 | 25 | 26 | # 27 | def scrape(commodity_code): 28 | 29 | session=requests.Session() 30 | 31 | 32 | #cme officially forbids scraping 33 | #so a header must be used to disguise as a browser 34 | #technically speaking, the website should be able to detect that too 35 | #those tech guys just turn a blind eye, thx fellas 36 | session.headers.update( 37 | {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'}) 38 | 39 | 40 | #now that we have found out where the data is coming from 41 | #we need to do a lil analysis on the url 42 | #e.g. http://www.cmegroup.com/CmeWS/mvc/Quotes/Future/437/G 43 | #it is quite obvious that 437 is a code name for commodity gold 44 | #but how do we know the code for each commodity 45 | #this is an issue raised by maysam19 46 | # https://github.com/je-suis-tm/web-scraping/issues/1 47 | #might as well as mention the solution here 48 | #there are two ways to solve it 49 | 50 | #if you only need very few types of commodity 51 | #you can go to websites one by one 52 | #e.g. https://www.cmegroup.com/trading/metals/precious/gold.html 53 | #you can right click and select view page source 54 | #search for /CmeWS/mvc/Quotes/Future/ 55 | #you should find the commodity code easily 56 | 57 | #if you got so many types of commodity to scrape 58 | #you should seek for the link that contains such information from inspect element 59 | #here is the hack that i have done for you, voila 60 | # https://www.cmegroup.com/CmeWS/mvc/ProductSlate/V2/List 61 | #it is a json file that contains codes of each commodity in cme 62 | #if you are visiting this script to understand json file 63 | #dont worry, we will talk about how to read it very soon 64 | response=session.get( 65 | 'http://www.cmegroup.com/CmeWS/mvc/Quotes/Future/%s/G'%(commodity_code)) 66 | 67 | return response 68 | 69 | 70 | # 71 | def etl(commodity_code,commodity_name): 72 | 73 | try: 74 | response=scrape(commodity_code) 75 | print(response) 76 | 77 | except Exception as e: 78 | print(e) 79 | 80 | 81 | #think of json file as dictionaries inside dictionaries 82 | #the simplest way to handle json files is pandas 83 | #remember, the solution is pandas package, not json package! 84 | #dataframe is a default way of reading json 85 | #if you dont like the structure 86 | #you can use pd.read_json with orient as a key argument 87 | #you can choose from index, columns, values, split, records 88 | df=pd.DataFrame(response.json()) 89 | 90 | #pandas turns json into a dataframe 91 | #still, for df['quotes'] 92 | #we end up with a bunch of dictionaries 93 | #we just treat things as normal dictionaries 94 | #we use the key to get value for each dictionary 95 | #and we form a new dataframe as output 96 | #for me, i only need prior settle price and expiration date 97 | #volume is used to detect the front month contract 98 | output=pd.DataFrame() 99 | output['prior settle']=[i['priorSettle'] for i in df['quotes']] 100 | output['expiration date']=[i['expirationDate'] for i in df['quotes']] 101 | output['volume']=[i['volume'] for i in df['quotes']] 102 | output['volume']=output['volume'].replace(',','').astype(float) 103 | output['name']=commodity_name 104 | output['front month']=output['volume']==max(output['volume']) 105 | 106 | return output 107 | 108 | 109 | # 110 | def main(): 111 | 112 | df1=etl('458','silver') 113 | df2=etl('437','gold') 114 | df3=etl('445','palladium') 115 | df4=etl('438','copper') 116 | 117 | 118 | #concatenate then export 119 | output=pd.concat([df1,df2,df3,df4]) 120 | output.to_csv('cme.csv',encoding='utf_8_sig') 121 | 122 | 123 | if __name__ == "__main__": 124 | main() 125 | -------------------------------------------------------------------------------- /CME3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | 5 | 6 | #without the help of my intern, this option data scraper would never exist 7 | #thank you, Olivia, much appreciated for the data etl 8 | 9 | # In[1]: 10 | 11 | 12 | import requests 13 | import pandas as pd 14 | import time 15 | import random as rd 16 | import os 17 | os.chdir('H:/') 18 | 19 | 20 | # In[2]: 21 | 22 | 23 | #scraping function 24 | def scrape(url): 25 | 26 | session=requests.Session() 27 | 28 | session.headers.update( 29 | {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'}) 30 | 31 | time.sleep(rd.randint(0,10)) 32 | 33 | response=session.get(url,params={"_": int(time.time()*1000)}) 34 | 35 | return response 36 | 37 | 38 | # In[3]: 39 | 40 | 41 | #get options expiration id 42 | def get_expiration_data(expiration_json,options_id): 43 | 44 | expiration_dict=expiration_json[str(options_id)]['expirations'] 45 | 46 | return [(expiration_dict[i]['expiration'],expiration_dict[i]['label']) for i in expiration_dict] 47 | 48 | 49 | # In[4]: 50 | 51 | 52 | #get group id 53 | def get_groupid(jsondata): 54 | 55 | commoditygroup=pd.DataFrame.from_dict(jsondata['filters']['group']) 56 | 57 | var=locals() 58 | for i in range(len(commoditygroup)): 59 | var['a'+str(i)]=pd.DataFrame.from_dict(commoditygroup['children'].iloc[i]) 60 | var['a'+str(i)]['group']=commoditygroup['name'].iloc[i] 61 | 62 | groupid=pd.concat([var['a'+str(i)] for i in range(len(commoditygroup))]) 63 | groupid.reset_index(inplace=True,drop=True) 64 | 65 | return groupid 66 | 67 | #get product id 68 | def get_productid(jsondata): 69 | 70 | return pd.DataFrame.from_dict(jsondata['products']) 71 | 72 | 73 | # In[5]: 74 | 75 | 76 | #get option quote 77 | def get_data(jsondata): 78 | 79 | table=pd.DataFrame.from_dict(jsondata,orient='index').T 80 | 81 | #unpack option related data 82 | optionContractQuotes=table['optionContractQuotes'].iloc[0] 83 | 84 | var=locals() 85 | for i in range(len(optionContractQuotes)): 86 | var['a'+str(i)]=pd.DataFrame.from_dict(optionContractQuotes[i]).T 87 | 88 | var['a'+str(i)]['strikePrice']=var['a'+str(i)]['change'].loc['strikePrice'] 89 | var['a'+str(i)]['strikeRank']=var['a'+str(i)]['change'].loc['strikePrice'] 90 | var['a'+str(i)]['underlyingFutureContract']=var['a'+str(i)]['change'].loc['underlyingFutureContract'] 91 | var['a'+str(i)].drop(['strikePrice','strikeRank','underlyingFutureContract'], 92 | inplace=True) 93 | var['a'+str(i)].reset_index(inplace=True) 94 | var['a'+str(i)].columns=var['a'+str(i)].columns.str.replace('index','optiontype') 95 | 96 | options=pd.concat([var['a'+str(i)] for i in range(len(optionContractQuotes))]) 97 | options.columns=['options-'+i for i in options.columns] 98 | 99 | #unpack underlying future contract 100 | assert len(table)==1,"table length mismatch" 101 | underlyingFutureContractQuotes=pd.DataFrame.from_dict(table['underlyingFutureContractQuotes'].iloc[0]) 102 | 103 | assert len(underlyingFutureContractQuotes)==1,"underlyingFutureContractQuotes length mismatch" 104 | lastTradeDate_dict=underlyingFutureContractQuotes['lastTradeDate'].iloc[0] 105 | lastTradeDate=pd.DataFrame() 106 | for i in lastTradeDate_dict: 107 | lastTradeDate[i]=[lastTradeDate_dict[i]] 108 | 109 | priceChart_dict=underlyingFutureContractQuotes['priceChart'].iloc[0] 110 | priceChart=pd.DataFrame() 111 | for i in priceChart_dict: 112 | priceChart[i]=[priceChart_dict[i]] 113 | del underlyingFutureContractQuotes['lastTradeDate'] 114 | del underlyingFutureContractQuotes['priceChart'] 115 | priceChart.columns=priceChart.columns.str.replace('code','pricechartcode') 116 | 117 | futures=pd.concat([underlyingFutureContractQuotes,lastTradeDate,priceChart],axis=1) 118 | futures.columns=['futures-'+i for i in futures.columns] 119 | 120 | #concatenate options and futures 121 | output=options.copy(deep=True) 122 | 123 | assert len(futures)==1,"futures length mismatch" 124 | for i in futures: 125 | output[i]=futures[i].iloc[0] 126 | 127 | del table['optionContractQuotes'] 128 | del table['underlyingFutureContractQuotes'] 129 | for i in table: 130 | output[i]=table[i].iloc[0] 131 | 132 | return output 133 | 134 | 135 | 136 | # In[6]: 137 | 138 | def main(): 139 | 140 | id_url='https://www.cmegroup.com/CmeWS/mvc/ProductSlate/V2/List' 141 | 142 | #get group and product id to find the future contract 143 | response_id=scrape(id_url) 144 | groupid=get_groupid(response_id.json()) 145 | productid=get_productid(response_id.json()) 146 | 147 | #301 denotes corn option 148 | option_id=301 149 | 150 | #get expiration code from futures 151 | expiration_url=f'https://www.cmegroup.com/CmeWS/mvc/Options/Categories/List/{option_id}/G?optionTypeFilter=' 152 | response_expiration=scrape(expiration_url) 153 | target_exp_id=get_expiration_data(response_expiration.json()) 154 | 155 | #get option data 156 | for expiration_id,expiration_date in target_exp_id: 157 | 158 | option_url=f'https://www.cmegroup.com/CmeWS/mvc/Quotes/Option/{option_id}/G/{expiration_id}/ALL?optionProductId={option_id}&strikeRange=ALL' 159 | response_option=scrape(option_url) 160 | 161 | #not every expiration_id leads to concrete data 162 | try: 163 | df=get_data(response_option.json()) 164 | 165 | target=['options-optiontype', 166 | 'options-change', 167 | 'options-close', 168 | 'options-high', 169 | 'options-highLimit', 170 | 'options-last', 171 | 'options-low', 172 | 'options-lowLimit', 173 | 'options-mdKey', 174 | 'options-open', 175 | 'options-percentageChange', 176 | 'options-priorSettle', 177 | 'options-updated', 178 | 'options-volume', 179 | 'options-strikePrice', 180 | 'options-strikeRank', 181 | 'futures-change', 182 | 'futures-close', 183 | 'futures-expirationDate', 184 | 'futures-high', 185 | 'futures-highLimit', 186 | 'futures-last', 187 | 'futures-low', 188 | 'futures-lowLimit', 189 | 'futures-mdKey', 190 | 'futures-open', 191 | 'futures-optionUri', 192 | 'futures-percentageChange', 193 | 'futures-priorSettle', 194 | 'futures-productId', 195 | 'futures-productName', 196 | 'futures-updated', 197 | 'futures-volume', 198 | 'futures-default24', 199 | 'tradeDate'] 200 | 201 | df=df[target] 202 | 203 | #fix the expiration mismatch between futures and options 204 | #or you can use cme rule based month coding system 205 | # https://www.cmegroup.com/month-codes.html 206 | df['futures-expirationDate']=pd.to_datetime(expiration_date) 207 | 208 | df.to_csv(f'corn option {expiration_id}.csv',index=False) 209 | 210 | except ValueError: 211 | pass 212 | 213 | if __name__ == "__main__": 214 | main() 215 | 216 | 217 | -------------------------------------------------------------------------------- /CQF.py: -------------------------------------------------------------------------------- 1 | #this is a script to scrape website that requires login 2 | #make sure you understand the basics of a webpage 3 | #u should go through other simple scrapers in this repo before moving to this one 4 | # https://github.com/je-suis-tm/web-scraping 5 | 6 | #in the following context 7 | #the script is trying to get some articles from a website 8 | #this website called cqf only allows pdf download for registered users 9 | 10 | import requests 11 | from bs4 import BeautifulSoup as bs 12 | import re 13 | import os 14 | os.chdir('d:/') 15 | 16 | def main(): 17 | 18 | #input your username and password 19 | #ideally we should not store password 20 | #we should use getpass as followed 21 | """ 22 | import getpass 23 | getpass.getpass('input password:') 24 | """ 25 | session=requests.Session() 26 | username='' 27 | password='' 28 | prefix='https://www.cqfinstitute.org/cqf-access/nojs/' 29 | login_url='https://www.cqfinstitute.org/user/login?destination=cqf-access/nojs/' 30 | 31 | 32 | #the first stage is to get a list of what you want 33 | response=session.get('https://www.cqfinstitute.org/articles') 34 | page=bs(response.content,'html.parser') 35 | 36 | #in this case, we just need to find a list of all the articles 37 | #each article is assigned with a code 38 | #we only need (prefix+code) to visit the article download website 39 | articlelist=page.find_all('a',class_='use-ajax ctools-modal-cqf-popup') 40 | 41 | d={} 42 | for i in articlelist: 43 | if i.text: 44 | d[i.text]=re.search('(?<=nojs\/)\d*', 45 | i.get('href')).group() 46 | 47 | #d is a dictionary that contains all the articles and codes 48 | #for simplicity, we only wanna get the first article 49 | target=d[list(d.keys())[0]] 50 | 51 | 52 | #the second stage is authentication 53 | #for websites without captcha or other methods to detect bots 54 | #it will be as simple as followed 55 | #if we need to go through captcha or other human verification 56 | #we can use neural network to recognize stuff 57 | #or download the image and let human identify it 58 | #this script will not cover that part (cuz i am lazy) 59 | 60 | #u may wonder where i get the headers and data from 61 | #before writing any script at all 62 | #we should use browser to login and go through the process 63 | #while typing username and password in browser 64 | #we can right click and inspect element 65 | #in chrome, simply ctrl+shift+i 66 | #the top columns in a popup window are elements, console, sources, network... 67 | #we select network monitor before we login 68 | #next, we click sign in button 69 | #and we should see a lot of traffic in network monitor 70 | #usually there is something called login or sign-in or auth 71 | #when we click it, we can see our username and password in form data 72 | #voila, that is everything we need to post 73 | #an easy way is to copy as powershell and paste it in our ide 74 | #we just need to restructure headers and form data in a pythonic way 75 | #normally we dont include cookies as they may expire after a few weeks 76 | #and we can find login url in request url section 77 | auth=session.post(login_url+target, 78 | headers={"Cache-Control":"max-age=0", 79 | "Origin":"https://www.cqfinstitute.org", 80 | "Upgrade-Insecure-Requests":"1", 81 | "DNT":"1", 82 | "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36", 83 | "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", 84 | "Referer":"https://www.cqfinstitute.org/user/login?destination=cqf-access/nojs/"+target, 85 | "Accept-Encoding":"gzip, deflate, br", 86 | "Accept-Language":"en-US,en;q=0.9"}, 87 | data={'name': username, 88 | 'pass': password, 89 | 'form_id': 'user_login', 90 | 'device_authentication_operating_system': 'Windows 10 64-bit', 91 | 'device_authentication_browser': 'Chrome', 92 | 'op': 'Log in'}) 93 | 94 | 95 | #normally when we finish login 96 | #we should take a look at the response 97 | #in most cases, login response is a json 98 | #we need to find something like token or auth 99 | #and update the session header as followed 100 | """ 101 | token=auth.json()["token"] 102 | session.headers.update({"Authorization": 'Token %s'%token}) 103 | """ 104 | 105 | 106 | #once we officially sign in as a user 107 | #the third stage is to download the pdf 108 | response=session.get(prefix+target) 109 | page=bs(response.content,'html.parser') 110 | 111 | pdf_link=(page.find('div',class_='file file-ext').find('a').get('href')) 112 | 113 | pdf=session.get(pdf_link) 114 | 115 | f=open('a.pdf','wb') 116 | f.write(pdf.content) 117 | f.close() 118 | 119 | 120 | return 121 | 122 | 123 | 124 | if __name__ == "__main__": 125 | main() 126 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LME.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | #this is a script to store scraped content into database 5 | #if we scrape a lot of websites or simply scrape a website everyday 6 | #we will end up with a huge amount of data 7 | #it is essential to create a data warehouse to keep everything organized 8 | import sqlite3 9 | import requests 10 | import pandas as pd 11 | from io import BytesIO 12 | import re 13 | import pyodbc 14 | 15 | 16 | #say if we wanna get the trader commitment report of lme from the link below 17 | # https://www.lme.com/en-GB/Market-Data/Reports-and-data/Commitments-of-traders#tabIndex=1 18 | #when we select aluminum and we will be redirected to a new link 19 | # https://www.lme.com/en-GB/Market-Data/Reports-and-data/Commitments-of-traders/Aluminium 20 | #if we try to view page source, we will find nothing in html parse tree 21 | #what do we do? 22 | #here is a very common scenario in web scraping 23 | #we simply right click and select inspect element 24 | #we will have to monitor the traffic one by one to identify where the report comes from 25 | #as usual, i have done it for you 26 | def get_download_link(): 27 | 28 | download_link='https://www.lme.com/api/Lists/DownloadLinks/%7B02E29CA4-5597-42E7-9A22-59BB73AE8F6B%7D' 29 | 30 | 31 | #there are quite a few pages of reports 32 | #for simplicity, we only care about the latest report 33 | #note that the page counting starts from 0 34 | session=requests.Session() 35 | response = session.get(download_link, 36 | params={"currentPage": 0}) 37 | 38 | 39 | #the response is a json file 40 | #i assume you should be familiar with json now 41 | #if not, plz check the link below 42 | # https://github.com/je-suis-tm/web-scraping/blob/master/CME2.py 43 | url_list=response.json()['content_items'] 44 | 45 | 46 | return url_list 47 | 48 | 49 | 50 | #once we find out where the download link is 51 | #we can get the actual report 52 | def get_report(url_list): 53 | 54 | prefix='https://www.lme.com' 55 | url=url_list[0]['Url'] 56 | 57 | 58 | session=requests.Session() 59 | response = session.get(prefix+url) 60 | 61 | 62 | #we also get the date of the data from url 63 | date=pd.to_datetime(re.search(r"\d{4}/\d{2}/\d{2}",url).group()) 64 | 65 | return response.content,date 66 | 67 | 68 | # 69 | def etl(content,date): 70 | 71 | #the first seven rows are annoying headers 72 | #we simply skip them 73 | df = pd.ExcelFile(BytesIO(content)).parse('AH', skiprows=7) 74 | 75 | #assume we only want positions of investment funds 76 | #lets do some etl 77 | df['Unnamed: 0'].fillna(method='ffill', 78 | inplace=True) 79 | 80 | col=list(df.columns) 81 | for i in range(1,len(col)): 82 | if 'Unnamed' in col[i]: 83 | col[i]=col[i-1] 84 | 85 | df.columns=col 86 | del df['Notation of the position quantity'] 87 | df.dropna(inplace=True) 88 | 89 | output=df['Investment Funds'][df['Unnamed: 0']=='Number of Positions'] 90 | output.columns=['long','short'] 91 | 92 | output=output.melt(value_vars=['long','short'], 93 | var_name='position', 94 | value_name='value') 95 | 96 | output['type']=df['LOTS'].drop_duplicates().tolist()*2 97 | output['date']=date 98 | 99 | return output 100 | 101 | 102 | #for sql server 103 | #we have to use pyodbc driver 104 | def connect( 105 | server=None, database=None, driver=None, 106 | username=None, password=None, 107 | autocommit=False 108 | ): 109 | """ get the db connection """ 110 | connection_string = "Driver={driver}; Server={server}; Database={database}" 111 | if username: 112 | connection_string += "; UID={username}" 113 | if password: 114 | connection_string += "; PWD={password}" 115 | if not driver: 116 | driver = [ 117 | d for d in sorted(pyodbc.drivers()) 118 | if re.match(r"(ODBC Driver \d+ for )?SQL Server", d) 119 | ][0] 120 | 121 | return pyodbc.connect( 122 | connection_string.format( 123 | server=server, 124 | database=database, 125 | driver=driver, 126 | username=username, 127 | password=password, 128 | ), 129 | autocommit=autocommit, 130 | ) 131 | 132 | 133 | #this function is to insert data into sqlite3 database 134 | #i will not go into details for sql grammar 135 | #for pythoners, sql is a piece of cake 136 | #go check out the following link for sql 137 | # https://www.w3schools.com/sql/ 138 | def database(df,SQL=False): 139 | 140 | #plz make sure u have created the database and the table to proceed 141 | #to create a table in database, first two lines are the same as below 142 | #just add a few more lines 143 | 144 | #c.execute("""CREATE TABLE lme (position TEXT, value FLOAT, type TEXT, date DATE);""") 145 | #conn.commit() 146 | #conn.close() 147 | 148 | #connect to sqlite3 149 | if not SQL: 150 | 151 | #to see what it looks like in the database 152 | #use microsoft access or toad or just pandas 153 | #db=pd.read_sql("""SELECT * FROM lme""",conn) 154 | conn = sqlite3.connect('database.db') 155 | else: 156 | SERVER='10.10.10.10' 157 | DATABASE='meme_stock' 158 | conn=connect(SERVER,DATABASE,'SQL Server') 159 | c = conn.cursor() 160 | 161 | #insert data 162 | for i in range(len(df)): 163 | try: 164 | c.execute("""INSERT INTO lme VALUES (?,?,?,?)""",df.iloc[i,:]) 165 | conn.commit() 166 | print('Updating...') 167 | except Exception as e: 168 | print(e) 169 | 170 | #always need to close it 171 | conn.close() 172 | 173 | print('Done.') 174 | 175 | return 176 | 177 | 178 | # 179 | def main(): 180 | 181 | url_list=get_download_link() 182 | 183 | content,date=get_report(url_list) 184 | 185 | output=etl(content,date) 186 | 187 | database(output) 188 | 189 | 190 | if __name__ == "__main__": 191 | main() 192 | -------------------------------------------------------------------------------- /MENA Newsletter.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | #this script is about the latest news of MENA region 4 | #we scrape different influential media websites, or so-called fake news, lol 5 | #and send only updates to the mailbox for daily newsletter 6 | #in order to do that, we need a db to store all the historical content of websites 7 | #and all the scraping techniques from html parse tree to regular expression 8 | #over time, i also discovered the issue of information overload in daily newsletter 9 | #hence, i invented a graph theory based algorithm to extract key information 10 | #a part of this algo will also be featured in this script to solve info redundancy 11 | #as u can see, this is the most advanced script in web scraping repository 12 | #it contains almost every technique we have introduced so far 13 | #make sure you have gone through all the other scripts before moving onto this one 14 | 15 | import pandas as pd 16 | from bs4 import BeautifulSoup as bs 17 | import requests 18 | import datetime as dt 19 | import win32com.client as win32 20 | import sqlite3 21 | import os 22 | import re 23 | import copy 24 | import time 25 | os.chdir('d:/') 26 | 27 | #this is a home made special package for text mining 28 | #it is designed to extract key information and remove similar contents 29 | #for details of this graph traversal algorithm plz refer to the following link 30 | # https://github.com/je-suis-tm/graph-theory/blob/master/Text%20Mining%20project/text_mining.py 31 | import text_mining 32 | 33 | 34 | #main stuff 35 | def main(): 36 | 37 | ec=scrape('https://www.economist.com/middle-east-and-africa/',economist) 38 | aj=scrape('https://www.aljazeera.com/topics/regions/middleeast.html',aljazeera) 39 | tr=scrape('https://www.reuters.com/news/archive/middle-east',reuters) 40 | bc=scrape('https://www.bbc.co.uk/news/world/middle_east',bbc) 41 | ws=scrape('https://www.wsj.com/news/types/middle-east-news',wsj) 42 | ft=scrape('https://www.ft.com/world/mideast',financialtimes) 43 | bb=scrape('https://www.bloomberg.com/view/topics/middle-east',bloomberg) 44 | cn=scrape('https://edition.cnn.com/middle-east',cnn) 45 | fo=scrape('https://fortune.com/tag/middle-east/',fortune) 46 | 47 | #concat scraped data via append, can use pd.concat as an alternative 48 | #unlike the previous version, current version does not sort information by source 49 | #the purpose of blending data together is to go through text mining pipeline 50 | df=ft 51 | for i in [aj,tr,bc,ws,cn,fo,ec,bb]: 52 | df=df.append(i) 53 | 54 | #CRUCIAL!!! 55 | #as we append dataframe together, we need to reset the index 56 | #otherwise, we would not be able to use reindex in database function call 57 | df.reset_index(inplace=True,drop=True) 58 | 59 | #first round, insert into database and remove outdated information 60 | df=database(df) 61 | 62 | #second round, use home made package to remove similar contents 63 | output=text_mining.remove_similar(df,text_mining.stopword) 64 | 65 | #if the link is not correctly captured 66 | #remove anything before www and add https:// 67 | for i in range(len(output)): 68 | if 'https://' not in output['link'][i]: 69 | temp=re.search('www',output['link'][i]).start() 70 | output.at[i,'link']='http://'+output['link'][i][temp:] 71 | 72 | print(output) 73 | 74 | 75 | #using html email template 76 | #check stripo for different templates 77 | # https://stripo.email/templates/ 78 | html=""" 79 | 81 | 82 | 83 |
84 | 85 | 86 | 87 | 88 | 89 |
112 | Middle East |
113 |
144 | |
` or `