├── LICENSE ├── Main.py ├── README.md ├── Robot.py ├── get_proxy_and_user_information ├── ConnectRedis.py ├── GetProxy.py ├── GetUserInfo.py ├── IgnoreWarnings.py ├── __init__.py └── names ├── record_product_information ├── GetProductRank.py ├── VisitRecord.py ├── __init__.py └── create_table.sql └── scripts ├── Alarm.py └── ChangeMacAddress.py /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-2017 GitHub Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /Main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2016-08-31 20:58:46 4 | # @Last modified by: LC 5 | # @Last Modified time: 2017-03-23 17:20:39 6 | # @Email: liangchaowu5@gmail.com 7 | 8 | import time 9 | from Robot import Robot 10 | from get_proxy_and_user_information.GetProxy import get_valid_proxy 11 | 12 | if __name__ == '__main__': 13 | # provide the informaion of the product on Amazon, including asin and words for searching 14 | asin = 'B0131A19HS' 15 | #asin = 'B002NSMFOQ' 16 | search_words = 'shower curtain rings' 17 | add_to_cart_probability = 0.7 18 | while True: 19 | proxy = get_valid_proxy('https://www.amazon.com', 'china_ips') 20 | robot = Robot(proxy) 21 | ############################################### 22 | # sign in and browse 23 | ############################################### 24 | """ 25 | robot.sign_in() 26 | # one item 27 | robot.search_keywords(search_words) 28 | robot.simulate_browsing(search_words, asin, add_to_cart_probability) 29 | # another item 30 | # .... 31 | """ 32 | ############################################### 33 | # sign up 34 | ############################################### 35 | #normal sign up 36 | """ 37 | user_info = robot.generate_sign_up_user(random_password=True) 38 | robot.sign_up(user_info) 39 | """ 40 | 41 | # sign up 42 | user_info = robot.generate_sign_up_user(random_password=True) 43 | robot.sign_up(user_info) 44 | time.sleep(5) 45 | #robot.search_keywords(search_words) 46 | robot.simulate_browsing(search_words, asin, add_to_cart_probability) 47 | robot.exit_driver() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |  2 | # 模拟访问亚马逊商品的爬虫 3 | 4 | `AmazonRobot` 是通过 `python` 实现的一个通过脚本自动访问[Amazon][1]上的商品的爬虫程序。主要实现了用户注册、根据给出的搜索词语和商品的 `asin` 号进行搜索并访问商品、按照一定概率将商品加入购物车等。同时通过动态修改UA ,维护代理池, 控制爬取速率防止被识别出是爬虫。由于需要解析网页的 JS 代码,整个代码主要依靠 `selenium` 来解析 JS 代码。 5 | 6 | 用到的数据库有 `Redis` 和 `MySQL`,`Redis` 主要用于存储代理池、用于注册的一些用户信息(姓名,电话,地址,visa卡等);`MySQL`用于存储被访问的商品的一些信息(asin号,访问日期,日pv量,商品的排名等)。**需要先在代码中指定这两个数据库的地址**。 7 | 8 | 9 | 除了 `selenium`, 还依赖的第三方库有:`redis`, `MySQLdb`, `requests`, `bs4`, `user_agent`;python版本为2.7 10 | 11 | 整个代码的结构如下: 12 | ``` 13 | ├── Main.py # 主程序入口 14 | ├── Robot.py # 模拟访问的Robot类 15 | ├── get_proxy_and_user_information # 抓取代理和用户信息,存入Redis 16 | │   ├── ConnectRedis.py # 需要在该文件中指定 Redis 数据库的地址 17 | │   ├── GetProxy.py 18 | │   ├── GetUserInfo.py 19 | │   ├── IgnoreWarnings.py 20 | │   ├── __init__.py 21 | ├── record_product_information # 更新商品在 MySQL 中的信息 22 | │   ├── create_table.sql 23 | │   ├── GetProductRank.py 24 | │   ├── VisitRecord.py # 需要在该文件中指定 MySQL 数据库的地址 25 | │   ├── __init__.py 26 | └── scripts 27 | ├── Alarm.py # 用于检测主机是否宕机的脚本 28 | └── ChangeMacAddress.py # 更改主机 mac 地址 29 | ``` 30 | 31 | 上面最后的一个文件`ChangeMacAddress.py`可用于更改主机 mac 地址(目前支持 ubuntu 16.0 和 centos6.0),原来是为了防止被识别出是爬虫而写的,但是后来想想实际上并不能起到这个作用。从计算机网络的知识可知,数据包的mac地址每经过一次转发mac地址都会改变,原因是以太网在链路层中通过arp广播建立arp表用于 IP 和 mac 地址的映射关系,然后进行转发,当数据包从链路层出来后,实际上是根据 mac 地址去查找目的主机去转发的,因此数据包在转发过程中IP地址不变(NAT之类的除外),而mac地址每转发一次就改变一次。显然,我们的网络跟亚马逊的网络不是直连的,因此mac地址肯定会改变多次。 32 | 33 | 最后,通过 `selenium` 实现的爬虫实际上是非常消耗内存和CPU的,所以这样访问的效率会非常低下,在实验过程中对于流量较小的商品曾试过一周内将其从第五页推到首页,但是对于流量较大的商品作用就很小了。建议调试的时候带 GUI ,而在服务器运行的时候通过 `xvfb` 替代GUI,同时结合 `Ansible` 等实现主机群管理。 34 | 35 | [1]: https://www.amazon.com/ -------------------------------------------------------------------------------- /Robot.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2016-08-15 22:34:08 4 | # @Last modified by: LC 5 | # @Last Modified time: 2017-03-23 17:25:42 6 | # @Email: liangchaowu5@gmail.com 7 | 8 | ################################################################################### 9 | # Function: simulate some actions manipulated by humans with gui, including: 10 | # 1. sign up and sign in 11 | # 2. search keywords and visit target product 12 | # 3. add product to cart 13 | ################################################################################### 14 | 15 | 16 | import time 17 | import random 18 | import requests 19 | import redis 20 | import sys 21 | import string 22 | 23 | from user_agent import generate_user_agent 24 | from selenium import webdriver 25 | from selenium.common.exceptions import NoSuchElementException 26 | from selenium.webdriver.common.proxy import * 27 | 28 | # change mac address is useless 29 | # from scripts.ChangeMacAddress import change_mac_address, generate_mac_address 30 | from get_proxy_and_user_information.ConnectRedis import get_connection 31 | from get_proxy_and_user_information.IgnoreWarnings import ignore_warnings 32 | from get_proxy_and_user_information.GetProxy import get_valid_proxy 33 | from record_product_information.VisitRecord import update_record 34 | 35 | 36 | 37 | class Robot: 38 | def __init__(self, proxy): 39 | """init the webdriver by setting the proxy and user-agent 40 | 41 | Args: 42 | proxy (str): proxy in the form of ip:port 43 | """ 44 | # set proxy 45 | ip, port = proxy.split(':') 46 | profile = webdriver.FirefoxProfile() 47 | profile.set_preference("network.proxy.type", 1) 48 | profile.set_preference("network.proxy.http", ip) 49 | profile.set_preference("network.proxy.http_port", port) 50 | # set user_agent 51 | profile.set_preference("general.useragent.override", generate_user_agent()) 52 | 53 | profile.update_preferences() 54 | self.driver = webdriver.Firefox(firefox_profile=profile) 55 | 56 | print 'current proxy: %s'%proxy 57 | 58 | 59 | def sign_up(self, sign_up_form, sign_up_url = r'https://www.amazon.com/ap/register?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2Fgp%2Fyourstore%2Fhome%3Fie%3DUTF8%26ref_%3Dnav_custrec_newcust'): 60 | """sign up with randomly generate user 61 | 62 | Args: 63 | sign_up_form (dict): some infomation required to sign up: name, e-mail and password 64 | sign_up_url (str, optional): url to sign up, custom url can jumps to the target url after signing up 65 | """ 66 | # generate and change mac address 67 | # mac = generate_mac_address() 68 | #change_mac_address(mac) 69 | try: 70 | self.driver.get(sign_up_url) 71 | for k,v in sign_up_form.items(): 72 | inputElement = self.driver.find_element_by_name(k) 73 | inputElement.send_keys(v) 74 | time.sleep(5) 75 | inputElement.submit() 76 | user_info = sign_up_form['email']+'#'+sign_up_form['password']+'#'+mac 77 | self.store_registered_user(user_info) 78 | except Exception, e: 79 | print 'Error while signing up\n%s'%e.message 80 | self.exit_driver() 81 | sys.exit(0) 82 | 83 | 84 | def sign_in(self, sign_in_url = r'https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2F%3Fref_%3Dnav_signin'): 85 | """sign in with a registered user 86 | 87 | Args: 88 | sign_in_url (str, optional): url to sign in, custom url can jumps to the target url after signing in 89 | """ 90 | sign_in_form = {} 91 | try: 92 | # randomly get a user from redis 93 | r = get_connection(DB = 1) 94 | info = r.srandmember('china_users',1)[0].split('#') 95 | if len(info) == 3: 96 | mail_box, passwd, mac = info 97 | elif len(info) == 2: 98 | mail_box, mac = info 99 | passwd = 'ScutAmazon1234$' 100 | #change_mac_address(mac) 101 | sign_in_form = {'email':mail_box, 'password':passwd} 102 | 103 | # sign_in 104 | self.driver.get(sign_in_url) 105 | for k,v in sign_in_form.items(): 106 | inputElement = self.driver.find_element_by_name(k) 107 | inputElement.send_keys(v) 108 | time.sleep(5) 109 | inputElement.submit() 110 | except Exception, e: 111 | print 'Error while getting a user from redis and signing in\n%s'%e.message 112 | self.exit_driver() 113 | sys.exit(0) 114 | 115 | 116 | def simulate_browsing(self, words, asin , possibility, qid = None): 117 | """generate target url in terms of key words to search the item and the asin of the item, 118 | visit the url and add to cart within certain probability 119 | 120 | Args: 121 | words (str): words used to search items, seperated by space 122 | asin (str): ASIN of the item 123 | possibility (flaot): probability of adding item to cart 124 | """ 125 | key_words = '+'.join(words.split()) 126 | if qid: 127 | target_url = 'https://www.amazon.com/dp/%s/ie=UTF8&qid=%s&keywords=%s' %(asin, qid, key_words) 128 | else: 129 | target_url = 'https://www.amazon.com/dp/%s/ie=UTF8&keywords=%s' %(asin, key_words) 130 | #self.search_keywords(key_words) 131 | try: 132 | self.driver.get(target_url) 133 | update_record(asin, key_words, 'pv', number=1) 134 | time.sleep(10) 135 | """ 136 | if random.random()< possibility: 137 | self.add_to_cart() 138 | time.sleep(5) 139 | update_record(asin, key_words, 'cart', number=1) 140 | print '========successfully add item to cart======' 141 | """ 142 | # add to wish list 143 | wish_list = '#add-to-wishlist-button-submit' 144 | self.driver.find_element_by_css_selector(wish_list).click() 145 | time.sleep(15) 146 | # alert = self.driver.switch_to_alert() # NoAlertPresentException 147 | 148 | except ValueError, e: 149 | print 'Error while visiting %s\n%s'%(target_url, e.message) 150 | #self.exit_driver() 151 | sys.exit(0) 152 | 153 | 154 | def search_keywords(self, words): 155 | """type in keywords to search on the index page of amazon 156 | 157 | Args: 158 | words (str): words used to search items, seperated by space 159 | """ 160 | try: 161 | self.driver.get(r'https://www.amazon.com/') 162 | inputElement = self.driver.find_element_by_name('field-keywords') 163 | inputElement.send_keys(words) 164 | inputElement.submit() 165 | except Exception, e: 166 | print 'Error while searching keywords\n%s'%e.message 167 | self.exit_driver() 168 | sys.exit(0) 169 | 170 | 171 | def add_to_cart(self): 172 | """add item to cart""" 173 | cart = '#add-to-cart-button' 174 | try: 175 | self.driver.find_element_by_css_selector(cart).click() 176 | print '================successfully add to cart===================' 177 | time.sleep(5) 178 | except Exception,e: 179 | print 'Error while adding item to cart\n%s'%e.message 180 | self.exit_driver() 181 | sys.exit(0) 182 | 183 | 184 | def generate_sign_up_user(self, random_password = False): 185 | """ramdomly generate a user to sign up 186 | 187 | Args: 188 | random_password (bool, optional): use uniform password or specific password 189 | """ 190 | # user name 191 | conn = get_connection(DB=3) 192 | user_name = conn.srandmember('user_name', 1)[0] 193 | 194 | # mail box 195 | prefix = string.digits+string.lowercase 196 | postfix = ['@126.com', '@163.com', '@sina.com', '@gmail.com', '@139.com', '@foxmail.com'] 197 | prefix_len = random.randint(5,12) 198 | mail = '' 199 | for i in xrange(prefix_len): 200 | mail += random.choice(prefix) 201 | mail_box = mail+random.choice(postfix) 202 | 203 | # password 204 | if random_password: 205 | candidates = string.digits+string.letters+'!@$%&*+-_' 206 | passwd = '' 207 | for i in xrange(random.randint(7,17)): 208 | passwd += random.choice(candidates) 209 | else: 210 | passwd = 'ScutAmazon1234$' 211 | 212 | sign_up_form = {'customerName':user_name, 'email':mail_box, 'password':passwd, 'passwordCheck':passwd} 213 | return sign_up_form 214 | 215 | 216 | def store_registered_user(self, user_info): 217 | """store infomation of registered user in redis 218 | 219 | Args: 220 | user_info (str): infomation of registered user in the form of mail#password#mac or mail#mac 221 | """ 222 | try: 223 | if len(user_info.split('#')) == 3: 224 | DB = 2 225 | user_set = 'valid_users' 226 | elif len(user_info.split('#')) == 2: 227 | DB = 1 228 | user_set = 'china_users' 229 | else: 230 | print 'Error while storing user in redis, wrong format of user infomation\n %s'%info 231 | sys.exit(0) 232 | 233 | conn = get_connection(DB = DB) 234 | conn.sadd(user_set, user_info) 235 | print '===========successfully add user %s to reids:%s:%s==============' %(user_info, DB, user_set) 236 | except Exception, e: 237 | print 'Error while adding registered user to redis\n %s'%(e.message) 238 | sys.exit(0) 239 | 240 | 241 | def exit_driver(self): 242 | """exit the webdriver""" 243 | try: 244 | self.driver.quit() 245 | except Exception, e: 246 | print 'Error while exiting the web driver\n%s'%e.message 247 | 248 | 249 | if __name__ == '__main__': 250 | asin = 'B002NSMFOQ' 251 | words = 'shower curtain rings' 252 | add_to_cart_probability = 0.7 253 | while True: 254 | proxy = get_valid_proxy('https://www.amazon.com', 'china_ips') 255 | robot = Robot(proxy) 256 | ############################################### 257 | # sign in and browse 258 | ############################################### 259 | robot.sign_in() 260 | # one item 261 | #robot.search_keywords(words) 262 | robot.simulate_browsing(words, asin, add_to_cart_probability) 263 | # another item 264 | # .... 265 | """ 266 | ############################################### 267 | # sign up 268 | ############################################### 269 | user_info = robot.generate_sign_up_user(random_password=True) 270 | robot.sign_up(user_info) 271 | time.sleep(5) 272 | robot.search_keywords(words) 273 | robot.simulate_browsing(words, asin, add_to_cart_probability) 274 | robot.exit_driver() 275 | """ 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | -------------------------------------------------------------------------------- /get_proxy_and_user_information/ConnectRedis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2016-07-10 16:50:47 4 | # @Last modified by: LC 5 | # @Last Modified time: 2017-03-23 18:51:08 6 | # @Email: liangchaowu5@gmail.com 7 | 8 | ############################################################################################################### 9 | # Function: get a connection to redis 10 | # 11 | # details of dbs of redis: 12 | # db0(proxy): china_ips, american_ips, amazon_ips, 13 | # db1(registered users, same password(SCutAmazon1234$), "mail#mac"): china_users, china_user_1, china_user_2 14 | # db2(registered users, specific password, "mail#password#mac"): valid_users 15 | # db3(user infomation for registering): user_name, address, name_phone, name_visa_expire 16 | ############################################################################################################### 17 | 18 | 19 | import redis 20 | 21 | 22 | def get_connection(HOST = 'XXXX', PORT = 6379, PASSWORD = 'XXXX', DB = 0): 23 | """get a connection to redis 24 | 25 | Args: 26 | HOST (str, optional): IP of redis server 27 | PORT (int, optional): the port that redis server listening 28 | PASSWORD (str, optional): password to the redis-server 29 | DB (int, optional): number of the db(0~15), default 0 30 | """ 31 | r = redis.Redis(host = HOST, port = PORT, password = PASSWORD, db= DB) 32 | return r 33 | 34 | 35 | if __name__ == '__main__': 36 | # manipulation on set 37 | ip_set = 'amazon_ips' 38 | r = get_connection() 39 | proxy = r.srandmember(ip_set, 5) 40 | r.srem(ip_set, proxy[0]) 41 | print proxy 42 | -------------------------------------------------------------------------------- /get_proxy_and_user_information/GetProxy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2016-07-04 21:04:49 4 | # @Last modified by: LC 5 | # @Last Modified time: 2016-08-14 10:57:52 6 | # @Email: liangchaowu5@gmail.com 7 | 8 | ########################################################################## 9 | # Function: 10 | # 1. fetch proxies from site: http://www.xicidaili.com/,store them in redis 11 | # 2. get a valid proxy for a certain site 12 | ########################################################################## 13 | 14 | 15 | import random 16 | import time 17 | import sys 18 | 19 | import requests 20 | from bs4 import BeautifulSoup 21 | from user_agent import generate_user_agent 22 | 23 | from ConnectRedis import get_connection 24 | from IgnoreWarnings import ignore_warnings 25 | 26 | 27 | # proxies from different countries 28 | CHINA = r'http://www.xicidaili.com/nn/' # china 29 | OTHER = r'http://www.xicidaili.com/wn/' # other countries 30 | 31 | 32 | def get_proxies(proxy_type, ip_set, start_page, end_page): 33 | """extract proxies from page source code, store them in redis 34 | 35 | Args: 36 | proxy_type (str): base url for proxy type, like the global variables CHINA and OTHER 37 | ip_set (str): which set should the ips be stored in redis 38 | start_page (int): which page to start crawling 39 | end_page (int): which page to stop crawling 40 | """ 41 | try: 42 | conn = get_connection() 43 | except Exception: 44 | print 'Error while connecting to redis' 45 | return 46 | proxies, curr_proxy =[], None 47 | for page in xrange(start_page, end_page+1): 48 | if page % 2 == 0: 49 | time.sleep(20) 50 | # get page source code 51 | headers = {'user-agent': generate_user_agent(), 'referer': 'http://www.xicidaili.com/'} 52 | text = requests.get(proxy_type+str(page), headers = headers).text 53 | # extract ips from source code 54 | soup = BeautifulSoup(text, 'lxml') 55 | for tr in soup.find_all('tr')[1:]: 56 | tds = tr.find_all('td') 57 | #if u'美国' in tds[3].text: 58 | proxy = tds[1].text+':'+tds[2].text 59 | if is_valid('https://www.amazon.com/', proxy): 60 | conn.sadd(ip_set, proxy) 61 | print '%s added to ip set %s' %(proxy, ip_set) 62 | 63 | 64 | 65 | def get_valid_proxy(target_url, ip_set, referer = 'https://www.google.com'): 66 | """extract a valid proxy for target_url from redis 67 | 68 | Args: 69 | target_url (str): url that need to visite with a proxy 70 | ip_set (str): the set in redis that stores proxies 71 | referer (str, optional): referer to construct headers for testing whether proxy is valid 72 | 73 | Returns: 74 | curr_proxy(str): a valid proxy in the format of ip:port 75 | """ 76 | try: 77 | conn = get_connection() 78 | proxies = conn.srandmember(ip_set, 5) 79 | curr_proxy = proxies.pop() 80 | # if proxy is not valid, delete it from redis 81 | while not is_valid(target_url, curr_proxy, referer): 82 | conn.srem(ip_set, curr_proxy) 83 | if len(proxies) == 0: 84 | proxies = conn.srandmember(ip_set, 5) 85 | curr_proxy = proxies.pop() 86 | return curr_proxy 87 | except Exception, e: 88 | print 'Error while getting proxy from redis\n%s'%e.message 89 | sys.exit(0) 90 | 91 | 92 | 93 | def is_valid(target_url, ip, referer): 94 | """judge if a proxy ip is valid for target_url 95 | 96 | Args: 97 | target_url (str): url that need to visite with a proxy 98 | ip (str): the set in redis to get 99 | referer (str, optional): referer part of headers of the request 100 | 101 | Returns: 102 | boolean 103 | """ 104 | ignore_warnings() 105 | proxy = { 106 | 'http': 'http://%s' %ip 107 | } 108 | headers = {'user-agent': generate_user_agent(), 'referer': referer} 109 | try: 110 | r = requests.get(target_url, headers = headers, proxies = proxy, timeout = 6) 111 | return True 112 | except Exception: 113 | return False 114 | 115 | 116 | 117 | if __name__ == '__main__': 118 | # disable the warnings from https website 119 | ignore_warnings() 120 | while True: 121 | get_proxies(CHINA, 'china_ips', 1,230) 122 | time.sleep(600) 123 | 124 | 125 | -------------------------------------------------------------------------------- /get_proxy_and_user_information/GetUserInfo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2016-08-08 21:49:50 4 | # @Last modified by: LC 5 | # @Last Modified time: 2017-03-23 17:01:54 6 | # @Email: liangchaowu5@gmail.com 7 | 8 | ######################################################################################### 9 | # Function: fetch user infomation, including address, phone, visa, names, and store them in redis 10 | # 1. fetch american address from https://fakena.me/random-real-address/ 11 | # 2. fetch phone, visa from http://www.fakeaddressgenerator.com/World/us_address_generator 12 | # 3. import user names from local file "names" into redis 13 | ######################################################################################### 14 | 15 | 16 | 17 | import re 18 | import time 19 | 20 | import requests 21 | from bs4 import BeautifulSoup 22 | from user_agent import generate_user_agent 23 | 24 | from GetProxy import get_valid_proxy 25 | from IgnoreWarnings import ignore_warnings 26 | from ConnectRedis import get_connection 27 | 28 | 29 | 30 | def get_address(proxy): 31 | """fetch american address from https://fakena.me/random-real-address/ 32 | 33 | Args: 34 | proxy (str): proxy to visit the target site, ip:port 35 | 36 | Returns: 37 | format_addr (str): american address in the form of "address_line # city # state # zip" 38 | """ 39 | ignore_warnings() 40 | url = r'https://fakena.me/random-real-address/' 41 | referer = r'https://fakena.me' 42 | header = {'user-agent' : generate_user_agent() , 'referer':referer } 43 | curr_proxy ={ 44 | 'http': 'http://%s'%proxy 45 | } 46 | 47 | text = requests.get(url, headers = header, proxies = curr_proxy).text 48 | pattern = re.compile('(.+)
(.+)
') 49 | result = re.findall(pattern, text) 50 | if result: # sometimes the result is empty 51 | print result[0][0], result[0][1] 52 | address_line = result[0][0] 53 | city, state_zip = result[0][1].split(',') 54 | state, zip = state_zip.split() 55 | format_addr = address_line+'#'+city+'#'+state+'#'+zip 56 | return format_addr 57 | else: 58 | return '' 59 | 60 | 61 | def get_phone_visa(): 62 | """fetch phone, visa from http://www.fakeaddressgenerator.com/World/us_address_generator""" 63 | url = r'http://www.fakeaddressgenerator.com/World/us_address_generator' 64 | referer = r'http://www.fakeaddressgenerator.com/World' 65 | header = {'user-agent' : generate_user_agent() , 'referer':referer } 66 | text = requests.get(url, headers = header).text 67 | soup = BeautifulSoup(text, 'lxml') 68 | info = soup.find_all('input') 69 | """ 70 | print 'name:',info[0]['value'] 71 | print 'phone:',info[9]['value'] 72 | print 'visa:',info[11]['value'] 73 | print 'expires:',info[13]['value'] 74 | """ 75 | name_phone = info[0]['value']+'#'+info[9]['value'] 76 | name_visa = info[0]['value']+'#'+info[11]['value']+'#'+info[13]['value'] 77 | print name_phone, name_visa 78 | return name_phone, name_visa 79 | 80 | 81 | def get_user_names(): 82 | r = get_connection(DB=1) 83 | with open('names') as f: 84 | for line in f: 85 | # print line.strip().title() 86 | r.sadd('user_name', line.strip().title()) 87 | 88 | 89 | if __name__ == '__main__': 90 | r = get_connection(DB = 3) 91 | crawl_address, crawl_phone_visa = True, False 92 | if crawl_address: 93 | count = 0 94 | while True: 95 | if count % 10 == 0: 96 | proxy = get_valid_proxy('https://fakena.me/random-real-address/', 'china_ips', referer = r'https://fakena.me') 97 | print 'current proxy: %s'%proxy 98 | addr = get_address(proxy) 99 | if addr: 100 | r.sadd('address', addr) 101 | print 'successfully add address %s to redis'%addr 102 | count += 1 103 | time.sleep(5) 104 | elif crawl_phone_visa: 105 | while True: 106 | name_phone, name_visa = get_phone_visa() 107 | r.sadd('name_phone', name_phone) 108 | r.sadd('name_visa', name_visa) 109 | print 'successfully add phone:%s, visa:%s to redis'%(name_phone, name_visa) 110 | time.sleep(5) 111 | else: 112 | print 'nothing to crawel' 113 | 114 | 115 | -------------------------------------------------------------------------------- /get_proxy_and_user_information/IgnoreWarnings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2016-07-11 22:31:51 4 | # @Last modified by: LC 5 | # @Last Modified time: 2016-08-14 09:29:00 6 | # @Email: liangchaowu5@gmail.com 7 | 8 | ########################################################### 9 | # function: ignore the warnings when visiting https website 10 | ########################################################### 11 | 12 | import requests 13 | 14 | 15 | from requests.packages.urllib3.exceptions import InsecurePlatformWarning,InsecureRequestWarning, SubjectAltNameWarning, SNIMissingWarning 16 | 17 | def ignore_warnings(): 18 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning) 19 | requests.packages.urllib3.disable_warnings(SubjectAltNameWarning) 20 | requests.packages.urllib3.disable_warnings(InsecurePlatformWarning) 21 | requests.packages.urllib3.disable_warnings(SNIMissingWarning) -------------------------------------------------------------------------------- /get_proxy_and_user_information/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2017-03-23 17:14:51 4 | # @Last modified by: LC 5 | # @Last Modified time: 2017-03-23 17:14:55 6 | # @Email: liangchaowu5@gmail.com 7 | -------------------------------------------------------------------------------- /record_product_information/GetProductRank.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2016-08-25 22:19:42 4 | # @Last modified by: LC 5 | # @Last Modified time: 2016-09-02 22:09:13 6 | # @Email: liangchaowu5@gmail.com 7 | 8 | from selenium import webdriver 9 | from bs4 import BeautifulSoup 10 | 11 | 12 | # PhantomJS 无法完全解释,某些项无法获取 13 | #driver = webdriver.PhantomJS(executable_path = r'H:/PythonModule/phantomjs/phantomjs-2.1.1-windows/bin/phantomjs.exe') 14 | #item_keywords = 'Bestfy(TM) 2Pack 10FT Nylon Braided Lightning Cable' 15 | 16 | def get_product_page(search_keywords, item_keywords): 17 | """find the page of an item described by item_keywords when searching with search_keywords 18 | 19 | Args: 20 | search_keywords (str): keywords to search items, joined by '+' 21 | item_keywords (str): words to describe an item, part of title of the item 22 | 23 | Returns: 24 | 25 | """ 26 | driver = webdriver.Firefox() 27 | found = False 28 | page = -1 29 | try: 30 | for i in xrange(1,21): 31 | target_url = 'https://www.amazon.com/s/ref=sr_pg_%s?page=%s&keywords=%s&ie=UTF8'%(i, i, search_keywords) 32 | driver.get(target_url) 33 | text = driver.page_source 34 | soup = BeautifulSoup(text, 'lxml') 35 | titles = soup.find_all('h2') 36 | for title in titles: 37 | if item_keywords in title.get('data-attribute', ''): 38 | print 'Page %s: Found'%i 39 | found = True 40 | print title.get('data-attribute') 41 | print target_url 42 | break 43 | if found: 44 | page = i 45 | break 46 | print 'Page %s: Not Found'%i 47 | return page 48 | except Exception, e: 49 | print e.message 50 | finally: 51 | driver.quit() 52 | 53 | if __name__ == '__main__': 54 | search_keywords = 'lightning+cable' 55 | item_keywords = 'Bestfy(TM) 2Pack 10FT Nylon Braided Lightning Cable 8Pin to USB Charging Cable' 56 | 57 | search_keywords = 'shower+curtain+rings' 58 | item_keywords = 'Clean Healthy Living Roller Shower Curtain Rings - Polished Stainless Steel' 59 | 60 | search_keywords = 'shower+curtain+rings' 61 | item_keywords = 'Carnation Home Fashions Rococo Ceramic Resin Shower Curtain Hook, Brown-set of 12' 62 | get_product_page(search_keywords, item_keywords) 63 | 64 | 65 | -------------------------------------------------------------------------------- /record_product_information/VisitRecord.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2016-08-26 09:38:24 4 | # @Last modified by: LC 5 | # @Last Modified time: 2017-03-23 21:23:49 6 | # @Email: liangchaowu5@gmail.com 7 | 8 | ################################################################## 9 | # record the detail information of product in mysql 10 | # including: pv created by the crawler, ranking of the product. etc 11 | # infomation of the table can be seen in create_table.sql 12 | ################################################################## 13 | 14 | import sys 15 | import MySQLdb 16 | from GetProductRank import get_product_page 17 | 18 | #infomation about mysql server 19 | HOST = 'XXXX' 20 | PORT = 3306 21 | USER = 'amazon' 22 | PASSWD = 'xxxxxx' 23 | DB = 'amazon' 24 | CHARSET = 'utf8' 25 | 26 | 27 | def get_connection(): 28 | try: 29 | conn = MySQLdb.connect(host=HOST,port=PORT,user=USER,passwd=PASSWD,db=DB,charset=CHARSET) 30 | return conn 31 | except Exception,e: 32 | print 'error while connecting to mysql' 33 | sys.exit() 34 | 35 | 36 | 37 | def update_record(asin, keywords, field, number=1, item_keywords=None): 38 | """update record with Pessimistic Concurrency Control 39 | 40 | Args: 41 | asin (TYPE): 42 | keywords (TYPE): 43 | field (TYPE): 44 | number (int, optional): 45 | 46 | Returns: 47 | TYPE 48 | """ 49 | try: 50 | conn = get_connection() 51 | conn.autocommit(False) 52 | cursor = conn.cursor() 53 | SQL = 'select %s from visit_record where asin="%s" and keywords="%s" and date=curdate() for update'%(field, asin, keywords) 54 | cursor.execute(SQL) 55 | result = cursor.fetchall() 56 | if not result: 57 | SQL = 'insert into visit_record(asin,date,keywords) values("%s", curdate(), "%s");' %(asin, keywords) 58 | cursor.execute(SQL) 59 | print 'insert new record for (%s,%s)'%(asin,keywords) 60 | 61 | if field == 'rank_page': 62 | if item_keywords: 63 | field_value = get_product_page(keywords, item_keywords) 64 | else: 65 | print 'Error, item deicription can not be empty' 66 | sys.exit(0) 67 | elif field=='pv' or field=='cart' or field=='wish_list': 68 | if result: 69 | field_value = int(result[0][0]) 70 | field_value += number 71 | else: 72 | field_value = 0 73 | 74 | else: 75 | print 'ERROR: no such field %s in database'%field 76 | sys.exit(0) 77 | # update record 78 | SQL = 'update visit_record set %s=%s where asin="%s" and keywords="%s" and date = curdate();'%(field, field_value, asin, keywords) 79 | cursor.execute(SQL) 80 | conn.commit() 81 | finally: 82 | cursor.close() 83 | conn.close() 84 | 85 | 86 | 87 | if __name__ == '__main__': 88 | """ 89 | asin = 'B0131A19HP' 90 | keywords = 'shower+curtain+rings' 91 | item_keywords = 'Clean Healthy Living Roller Shower Curtain Rings - Polished Stainless Steel' 92 | update_record(asin, keywords, 'pv',item_keywords = item_keywords) 93 | """ 94 | with open('products') as f: 95 | for line in f: 96 | asin, keywords, item_keywords = line.strip().split('#') 97 | update_record(asin, keywords, 'rank_page',item_keywords = item_keywords) 98 | 99 | 100 | -------------------------------------------------------------------------------- /record_product_information/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2017-03-23 17:15:10 4 | # @Last modified by: LC 5 | # @Last Modified time: 2017-03-23 17:15:13 6 | # @Email: liangchaowu5@gmail.com 7 | -------------------------------------------------------------------------------- /record_product_information/create_table.sql: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: LC 3 | * @Date: 2016-08-23 15:16:43 4 | * @Last Modified by: LC 5 | * @Last Modified time: 2016-08-26 23:20:47 6 | */ 7 | 8 | create database amazon; 9 | use amazon; 10 | 11 | create table visit_record 12 | ( 13 | id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 14 | asin CHAR(10) NOT NULL, 15 | date DATE NOT NULL, 16 | pv INT DEFAULT 0, 17 | cart INT DEFAULT 0, 18 | wish_list INT DEFAULT 0, 19 | keywords VARCHAR(150) NOT NULL, 20 | rank_page TINYINT 21 | )ENGINE=InnoDB CHARSET=UTF8; 22 | 23 | create index asin_date_keywords_inx on visit_record(asin, date, keywords) -------------------------------------------------------------------------------- /scripts/Alarm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2016-08-30 22:18:49 4 | # @Last modified by: LC 5 | # @Last Modified time: 2017-03-23 20:32:48 6 | # @Email: liangchaowu5@gmail.com 7 | 8 | ############################################################## 9 | # monitoring the hosts excuting the task, run it in crontab 10 | # you can also add other function like ping 11 | ############################################################## 12 | import subprocess 13 | import smtplib 14 | import string 15 | import time 16 | 17 | def ping(ip): 18 | """check if host is alive 19 | 20 | Args: 21 | ip (str): ip of the host 22 | 23 | Returns: 24 | 1 or 0: represent whether the host is up or not 25 | """ 26 | command = 'ping -c 4 %s'%ip 27 | try: 28 | subprocess.check_call(command.split()) 29 | print '======================host %s is up' %ip 30 | return 1 31 | except subprocess.CalledProcessError: 32 | print '======================host %s is down' %ip 33 | return 0 34 | 35 | 36 | 37 | def send_email(subject, content): 38 | """send email when host is down, need to change your own email 39 | 40 | Args: 41 | subject (str): subject of the email 42 | content (str): content of the email 43 | 44 | Returns: 45 | None 46 | """ 47 | HOST="smtp.sina.com" 48 | PASSWORD="XXXXXXX" 49 | FROM="XXXX@sina.com" 50 | TO="XXXX@139.com" 51 | SUBJECT=subject 52 | 53 | body=string.join(( 54 | "FROM: %s" %FROM, 55 | "TO: %s" %TO, 56 | "SUBJECT: %s" %SUBJECT, 57 | "", 58 | content),"\r\n") 59 | server=smtplib.SMTP() 60 | server.connect(HOST,'25') 61 | server.starttls() 62 | server.login(FROM,PASSWORD) 63 | server.sendmail(FROM,TO,body) 64 | server.quit() 65 | 66 | 67 | if __name__ == '__main__': 68 | # hosts is a list of your machine, one for a line, represent as ip 69 | with open('hosts') as f: 70 | lines = f.readlines() 71 | for line in lines: 72 | ip = line.strip() 73 | if ip: 74 | if ping(ip)==0: 75 | time.sleep(10) 76 | if ping(ip)==0: 77 | send_email(subject = 'Ping Failure', content= 'Fail to ping %s'%ip) 78 | print '=========send email successfully' 79 | -------------------------------------------------------------------------------- /scripts/ChangeMacAddress.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: LC 3 | # @Date: 2016-07-23 22:49:04 4 | # @Last modified by: LC 5 | # @Last Modified time: 2017-03-23 20:31:53 6 | # @Email: liangchaowu5@gmail.com 7 | 8 | 9 | # ######################################################################### 10 | # Function: Change mac address temporally through built-in command ifconfig 11 | # currently only work for ubuntu and centos 12 | # ######################################################################### 13 | 14 | import random 15 | import re 16 | import subprocess 17 | import os 18 | import sys 19 | 20 | # mac address of some manufacturer 21 | manufacturer_mac_address = [ 22 | [0xE0, 0x43, 0xDB], 23 | [0x24, 0x05, 0xF5], 24 | [0x2C, 0x30, 0x33], 25 | [0x3C, 0xD9, 0x2B], 26 | [0x9C, 0x8E, 0x99], 27 | [0xB4, 0x99, 0xBA], 28 | [0x1C, 0xC1, 0xDE], 29 | [0x3C, 0x35, 0x56], 30 | [0x00, 0x50, 0xBA], 31 | [0x00, 0x17, 0x9A], 32 | [0x1C, 0xBD, 0xB9], 33 | [0x90, 0x94, 0xE4], 34 | [0x28, 0x10, 0x7B], 35 | [0x1C, 0x7E, 0xE5], 36 | [0xC4, 0xA8, 0x1D], 37 | [0x18, 0x62, 0x2C], 38 | [0x7C, 0x03, 0xD8], 39 | [0xE8, 0xF1, 0xB0], 40 | [0x00, 0xF8, 0x71], 41 | [0x20, 0xBB, 0x76], 42 | [0x2C, 0x22, 0x8B], 43 | [0x34, 0x8A, 0xAE], 44 | [0xBC, 0xEC, 0x23], 45 | [0x8C, 0xE7, 0x48], 46 | [0xAC, 0x06, 0xC7], 47 | [0xCC, 0x46, 0xD6], 48 | [0x48, 0xAD, 0x08], 49 | [0x2C, 0xAB, 0x00], 50 | [0x00, 0xE0, 0xFC], 51 | [0x24, 0xDF, 0x6A], 52 | [0x00, 0x9A, 0xCD], 53 | [0x00, 0xCD, 0xFE], 54 | [0x38, 0xF2, 0x3E], 55 | [0x58, 0xAC, 0x78], 56 | [0x90, 0x7F, 0x61], 57 | [0x28, 0xBC, 0x18], 58 | [0x80, 0x7A, 0xBF], 59 | [0x40, 0x9F, 0x87], 60 | [0x3C, 0x5A, 0xB4], 61 | [0x00, 0x1A, 0x11], 62 | [0xD8, 0x3C, 0x69], 63 | [0x74, 0xAC, 0x5F], 64 | [0x18, 0xAF, 0x61], 65 | [0xBC, 0x83, 0xA7], 66 | [0x00, 0x03, 0x47], 67 | [0x00, 0x11, 0x75], 68 | [0x00, 0x13, 0xE8], 69 | [0x00, 0x13, 0x02], 70 | [0xE4, 0xF8, 0x9C], 71 | [0xA4, 0x02, 0xB9], 72 | [0x4C, 0x34, 0x88], 73 | [0x00, 0x0D, 0x0B], 74 | [0x00, 0x07, 0x40], 75 | [0x00, 0x24, 0xA5], 76 | [0xDC, 0xFB, 0x02], 77 | [0xF4, 0xCE, 0x46], 78 | [0x00, 0x1C, 0xC4], 79 | [0x00, 0x25, 0xB3], 80 | [0x00, 0x18, 0x71], 81 | [0x00, 0x0B, 0xCD], 82 | [0x00, 0x0E, 0x7F], 83 | [0x00, 0x0F, 0x20], 84 | [0x00, 0x11, 0x0A], 85 | [0x00, 0x13, 0x21], 86 | [0x00, 0x16, 0x35], 87 | [0x00, 0x17, 0xA4], 88 | [0x00, 0x08, 0x02], 89 | [0x90, 0xE7, 0xC4], 90 | [0x74, 0xA7, 0x8E], 91 | [0xD8, 0x60, 0xB0], 92 | [0x80, 0x38, 0xBC], 93 | [0xD4, 0x40, 0xF0], 94 | [0x64, 0xA6, 0x51], 95 | [0xE8, 0xCD, 0x2D], 96 | [0xAC, 0xE2, 0x15], 97 | [0xEC, 0x23, 0x3D], 98 | [0x78, 0xF5, 0xFD], 99 | [0x80, 0xB6, 0x86], 100 | [0x10, 0xC6, 0x1F], 101 | [0x88, 0x53, 0xD4], 102 | [0x0C, 0x37, 0xDC], 103 | [0xBC, 0x76, 0x70], 104 | [0x24, 0xDB, 0xAC], 105 | [0xBC, 0x3A, 0xEA], 106 | [0xE8, 0xBB, 0xA8], 107 | [0x00, 0x21, 0xE8], 108 | [0x00, 0x60, 0x57], 109 | [0x00, 0x07, 0xD8], 110 | [0x00, 0x12, 0xF2], 111 | [0x00, 0x1B, 0xED], 112 | [0x00, 0x24, 0x38], 113 | [0x84, 0x74, 0x2A], 114 | [0x68, 0x1A, 0xB2], 115 | [0xE0, 0x05, 0xC5], 116 | [0xA0, 0xF3, 0xC1], 117 | [0x8C, 0x21, 0x0A], 118 | [0xEC, 0x17, 0x2F], 119 | [0xEC, 0x88, 0x8F], 120 | [0x14, 0xCF, 0x92], 121 | [0x64, 0x56, 0x01], 122 | [0x14, 0xCC, 0x20], 123 | [0xBC, 0x46, 0x99], 124 | [0x0C, 0x45, 0xBA], 125 | [0x84, 0x77, 0x78], 126 | [0x04, 0x53, 0xD5], 127 | [0xCC, 0x44, 0x63], 128 | [0x6C, 0x72, 0xE7], 129 | [0xCC, 0xA2, 0x23], 130 | [0xE8, 0x08, 0x8B], 131 | [0x60, 0xE7, 0x01], 132 | [0x00, 0x08, 0x83], 133 | [0xC4, 0x34, 0x6B], 134 | [0x8C, 0xDC, 0xD4], 135 | [0x34, 0x64, 0xA9], 136 | [0xD4, 0xC9, 0xEF], 137 | [0xA4, 0x5D, 0x36], 138 | [0xA0, 0xD3, 0xC1], 139 | [0x40, 0xA8, 0xF0], 140 | [0x6C, 0x3B, 0xE5], 141 | [0x08, 0x2E, 0x5F], 142 | [0x28, 0x92, 0x4A], 143 | [0x10, 0x60, 0x4B], 144 | [0x30, 0x8D, 0x99], 145 | [0x00, 0x30, 0xC1], 146 | [0xFC, 0x3F, 0xDB], 147 | [0x4C, 0xA1, 0x61], 148 | [0x7C, 0x61, 0x93], 149 | [0x00, 0x12, 0x17], 150 | [0x00, 0x0C, 0x41], 151 | [0x00, 0x0F, 0x66], 152 | [0x44, 0xE0, 0x8E], 153 | [0x18, 0x59, 0x33], 154 | [0xE4, 0x48, 0xC7], 155 | [0x24, 0x76, 0x7D], 156 | [0x2C, 0xAB, 0xA4], 157 | [0x00, 0x02, 0xC7], 158 | [0x04, 0x76, 0x6E], 159 | [0x00, 0x6B, 0x8E], 160 | [0xAC, 0x85, 0x3D], 161 | [0x74, 0x88, 0x2A], 162 | [0x78, 0xD7, 0x52], 163 | [0xE0, 0x24, 0x7F], 164 | [0x00, 0x46, 0x4B], 165 | [0x70, 0x7B, 0xE8], 166 | [0x54, 0x89, 0x98], 167 | [0x08, 0x19, 0xA6], 168 | [0x3C, 0xF8, 0x08], 169 | [0xB4, 0x15, 0x13], 170 | [0x28, 0x31, 0x52], 171 | [0xDC, 0xD2, 0xFC], 172 | [0xF8, 0xA4, 0x5F], 173 | [0x8C, 0xBE, 0xBE], 174 | [0x64, 0x09, 0x80], 175 | [0x98, 0xFA, 0xE3], 176 | [0x18, 0x59, 0x36], 177 | [0x9C, 0x99, 0xA0], 178 | [0x00, 0x03, 0xDD], 179 | [0x00, 0x10, 0x7B], 180 | [0x00, 0x90, 0x6D], 181 | [0x00, 0x90, 0xBF], 182 | [0x00, 0x50, 0x80], 183 | [0x00, 0xE0, 0x18], 184 | [0x00, 0x0C, 0x6E], 185 | [0x00, 0x1B, 0xFC], 186 | [0x00, 0x1E, 0x8C], 187 | [0x00, 0x15, 0xF2], 188 | [0x00, 0x23, 0x54], 189 | [0x00, 0x1F, 0xC6], 190 | [0x60, 0x18, 0x2E], 191 | [0xF4, 0xCF, 0xE2], 192 | [0x50, 0x1C, 0xBF], 193 | [0x28, 0x5F, 0xDB], 194 | [0x40, 0x4D, 0x8E], 195 | [0x78, 0x1D, 0xBA], 196 | [0x00, 0x1E, 0x10], 197 | [0xB0, 0xAD, 0xAA], 198 | [0x10, 0xCD, 0xAE], 199 | [0x50, 0xCD, 0x22], 200 | [0xFC, 0xA8, 0x41], 201 | [0x3C, 0xB1, 0x5B], 202 | [0xC8, 0xF4, 0x06], 203 | [0x2C, 0xF4, 0xC5], 204 | [0x70, 0x38, 0xEE], 205 | [0x88, 0xF0, 0x31], 206 | [0x50, 0x87, 0x89], 207 | [0x38, 0x1C, 0x1A], 208 | [0xF4, 0x0F, 0x1B], 209 | [0xBC, 0x67, 0x1C], 210 | [0xA0, 0xEC, 0xF9], 211 | [0xD4, 0x6D, 0x50], 212 | [0x1C, 0xE8, 0x5D], 213 | [0xC4, 0x72, 0x95], 214 | [0xA0, 0x55, 0x4F], 215 | [0x84, 0xB8, 0x02], 216 | [0xBC, 0xC4, 0x93], 217 | [0x00, 0x19, 0x47], 218 | [0x00, 0x22, 0xCE], 219 | [0xF0, 0x29, 0x29], 220 | [0xEC, 0xE1, 0xA9], 221 | [0x7C, 0x69, 0xF6] 222 | ] 223 | 224 | 225 | def generate_mac_address(): 226 | """ randomly generate a mac address, the first three bytes identify a manufacturer""" 227 | prefix = manufacturer_mac_address[random.randint(0, len(manufacturer_mac_address)-1)] 228 | mac = prefix+\ 229 | [random.randint(0x00, 0xff), 230 | random.randint(0x00, 0xff), 231 | random.randint(0x00, 0xff) ] 232 | return ':'.join(map(lambda x: "%02x" % x, mac)) 233 | 234 | 235 | def change_mac_address(mac): 236 | """change mac address permanently by modifying the configuring file of the network interface and restart it 237 | available for Ubuntu and Centos now, the last line of the configuring file should be mac address 238 | 239 | Args: 240 | mac (str): XX:XX:XX:XX:XX:XX 241 | """ 242 | text, command, interface_file = '', '', '' 243 | # ubuntu 244 | ubuntu_interface_file = '/etc/network/interfaces' 245 | if os.path.isfile(ubuntu_interface_file): 246 | text = 'hwaddress ether %s\n'%mac 247 | command = '/etc/init.d/networking restart' 248 | interface_file = ubuntu_interface_file 249 | else : 250 | # centos 251 | popen = subprocess.Popen(['ifconfig'], stdout = subprocess.PIPE, stderr = subprocess.PIPE) 252 | out, err = popen.communicate() 253 | text = out.split('\n')[0] 254 | if err: print err 255 | interface = re.match(r'(\w+)', text).group(0) 256 | centos_interface_file = '/etc/sysconfig/network-scripts/ifcfg-%s'%interface 257 | if os.path.isfile(centos_interface_file): 258 | text = 'MACADDR=%s\n'%mac 259 | command = '/etc/init.d/network restart' 260 | interface_file = centos_interface_file 261 | else: 262 | print 'network interface not found' 263 | sys.exit(0) 264 | # change mac address 265 | try: 266 | with open(interface_file) as f: 267 | lines = f.readlines() 268 | with open(interface_file, 'w') as wf: 269 | for line in lines: 270 | if line.lower().startswith(('hwaddr', 'macaddr')): 271 | continue 272 | wf.write(line) 273 | wf.write(text) 274 | os.system(command) 275 | print 'successfully change mac address to %s'%mac 276 | except Exception, e: 277 | print 'Error while changing mac address \n %s'%(e.message) 278 | sys.exit(0) 279 | 280 | 281 | def change_mac_address_with_ifconfig(mac): 282 | """modify mac address for linux with ifconfig command(not work) 283 | 284 | Args: 285 | mac(str): XX:XX:XX:XX:XX:XX 286 | """ 287 | try: 288 | command = 'ifconfig' 289 | popen = subprocess.Popen(command.split(), stdout = subprocess.PIPE, stderr = subprocess.PIPE) 290 | out, err = popen.communicate() 291 | text = out.split('\n')[0] 292 | if err: print err 293 | interface = re.match(r'(\w+)', text).group(0) 294 | command = 'ifconfig %s hw ether %s'%(interface, mac) 295 | os.system(command) 296 | print 'change mac address to %s successfully'%mac 297 | except Exception, e: 298 | print 'Error while changing mac address \n %s'%(e.message) 299 | sys.exit(0) 300 | 301 | 302 | 303 | if __name__ == '__main__': 304 | change_mac_address(generate_mac_address()) --------------------------------------------------------------------------------