├── 0-Connecting ├── example1.py └── example2.py ├── 1-HTML-Parsing ├── .DS_Store └── htmlParsing.py ├── 2-Basic-Crawler └── basicCrawler.py ├── 3-Advanced-Crawlers ├── __pycache__ │ ├── content.cpython-34.pyc │ ├── topic.cpython-34.pyc │ └── website.cpython-34.pyc ├── content.py ├── crawler.py ├── mysql-optional │ ├── __pycache__ │ │ ├── content.cpython-34.pyc │ │ ├── topic.cpython-34.pyc │ │ └── website.cpython-34.pyc │ ├── articles_mysql.py │ ├── content.py │ ├── content.pyc │ ├── crawler.sql │ ├── topic.py │ ├── topic.pyc │ ├── website.py │ └── website.pyc ├── sites.csv ├── topic.py ├── topics.txt └── website.py ├── 4-Crawling-Patterns ├── .DS_Store ├── crawlExternal.py ├── crawlSite_recursive.py ├── crawlWiki_randomWalk.py └── crawlWiki_recursive.py ├── 5-Scrapy └── wikiSpider │ ├── articles.csv │ ├── articles.json │ ├── scrapy.cfg │ ├── wiki.log │ └── wikiSpider │ ├── __init__.py │ ├── __init__.pyc │ ├── items.py │ ├── items.pyc │ ├── pipelines.py │ ├── settings.py │ ├── settings.pyc │ └── spiders │ ├── .DS_Store │ ├── __init__.py │ ├── __init__.pyc │ ├── articleSpider.py │ ├── articleSpider.pyc │ └── wiki.log ├── 6-Selenium ├── 1-seleniumBasic.py ├── 2-dragAndDrop.py ├── ghostdriver.log └── phantomjs │ ├── .DS_Store │ ├── ChangeLog │ ├── LICENSE.BSD │ ├── README.md │ ├── bin │ ├── .DS_Store │ └── phantomjs │ ├── examples │ ├── arguments.js │ ├── child_process-examples.js │ ├── colorwheel.js │ ├── countdown.js │ ├── detectsniff.js │ ├── direction.js │ ├── echoToFile.js │ ├── features.js │ ├── fibo.js │ ├── follow.js │ ├── hello.js │ ├── imagebin.js │ ├── injectme.js │ ├── ipgeocode.js │ ├── loadspeed.js │ ├── loadurlwithoutcss.js │ ├── modernizr.js │ ├── module.js │ ├── movies.js │ ├── netlog.js │ ├── netsniff.js │ ├── openurlwithproxy.js │ ├── outputEncoding.js │ ├── page_events.js │ ├── pagecallback.js │ ├── phantomwebintro.js │ ├── pizza.js │ ├── post.js │ ├── postjson.js │ ├── postserver.js │ ├── printenv.js │ ├── printheaderfooter.js │ ├── printmargins.js │ ├── rasterize.js │ ├── render_multi_url.js │ ├── run-jasmine.js │ ├── run-jasmine2.js │ ├── run-qunit.js │ ├── scandir.js │ ├── seasonfood.js │ ├── server.js │ ├── serverkeepalive.js │ ├── simpleserver.js │ ├── sleepsort.js │ ├── stdin-stdout-stderr.js │ ├── technews.js │ ├── tweets.js │ ├── universe.js │ ├── unrandomize.js │ ├── useragent.js │ ├── version.js │ ├── waitfor.js │ ├── walk_through_frames.js │ └── weather.js │ └── third-party.txt └── README.md /0-Connecting/example1.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlopen 2 | 3 | html = urlopen("https://oreilly.com") 4 | print(html.read()) 5 | -------------------------------------------------------------------------------- /0-Connecting/example2.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlopen 2 | from urllib.error import HTTPError 3 | from urllib.error import URLError 4 | 5 | try: 6 | html = urlopen("https://www.olin.edu/thisschoolsuckssomuch") 7 | except HTTPError as e: 8 | print(e) 9 | except URLError as e: 10 | print("The server could not be found!") 11 | else: 12 | print("It Worked!") -------------------------------------------------------------------------------- /1-HTML-Parsing/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/1-HTML-Parsing/.DS_Store -------------------------------------------------------------------------------- /1-HTML-Parsing/htmlParsing.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlopen 2 | from bs4 import BeautifulSoup 3 | 4 | html = urlopen("http://www.oreilly.com/") 5 | bsObj = BeautifulSoup(html,"lxml") 6 | image = bsObj.find("img", {"alt":"O'Reilly Media, Inc."}) 7 | print(image.attrs['src']) -------------------------------------------------------------------------------- /2-Basic-Crawler/basicCrawler.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlopen 2 | from bs4 import BeautifulSoup 3 | import re 4 | 5 | ################## 6 | # This crawler gets the most recent "Technology" articles 7 | # from Reuters, and prints out their title and lede 8 | # (or the first paragraph) 9 | ################# 10 | def getArticle(url): 11 | print("URL: "+url) 12 | html = urlopen(url) 13 | articleObj = BeautifulSoup(html.read(), "lxml") 14 | #Get article title. This should have a class name ending in "title" 15 | title = articleObj.find("h1").get_text() 16 | time = articleObj.find("span",{"class":"timestamp"}).get_text() 17 | location = "" 18 | if articleObj.find("span",{"class":"articleLocation"}): 19 | location = articleObj.find("span",{"class":"articleLocation"}).get_text() 20 | #Get the main body of the article text 21 | body = articleObj.find("span", {"id":"article-text"}).get_text() 22 | 23 | print("TITLE: "+title) 24 | 25 | print("AUTHOR: "+time) 26 | 27 | print("LOCATION: "+location) 28 | print("BODY: "+body) 29 | print("-----------------------------") 30 | 31 | for i in range(0, 10): 32 | print("Scraping page: "+str(i)+" of articles") 33 | url = "http://www.reuters.com/news/archive/technologyNews?view=page&page="+str(i)+"&pageSize=10" 34 | html = urlopen(url) 35 | listingObj = BeautifulSoup(html.read(), "lxml") 36 | urls = listingObj.findAll("h3", {"class":"story-title"}) 37 | for url in urls: 38 | newPage = url.find("a").attrs['href'] 39 | #Ignore external URLs 40 | if newPage.startswith("/"): 41 | getArticle("http://reuters.com"+newPage) -------------------------------------------------------------------------------- /3-Advanced-Crawlers/__pycache__/content.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/3-Advanced-Crawlers/__pycache__/content.cpython-34.pyc -------------------------------------------------------------------------------- /3-Advanced-Crawlers/__pycache__/topic.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/3-Advanced-Crawlers/__pycache__/topic.cpython-34.pyc -------------------------------------------------------------------------------- /3-Advanced-Crawlers/__pycache__/website.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/3-Advanced-Crawlers/__pycache__/website.cpython-34.pyc -------------------------------------------------------------------------------- /3-Advanced-Crawlers/content.py: -------------------------------------------------------------------------------- 1 | class Content: 2 | 'Common base class for all articles/pages' 3 | 4 | 5 | def __init__(self, id, topicId, title, body, url): 6 | self.id = id 7 | self.topicId = topicId; 8 | self.title = title; 9 | self.body = body; 10 | self.url = url; 11 | -------------------------------------------------------------------------------- /3-Advanced-Crawlers/crawler.py: -------------------------------------------------------------------------------- 1 | from website import Website 2 | from topic import Topic 3 | from content import Content 4 | 5 | import pymysql 6 | import requests 7 | from bs4 import BeautifulSoup 8 | import sys 9 | from io import StringIO 10 | import csv 11 | 12 | class Crawler: 13 | conn = None 14 | cur = None 15 | 16 | # def __init__(self): 17 | # print("Starting!") 18 | 19 | 20 | ######### 21 | # Prints content, can be integrated with MySQL to store things 22 | ######### 23 | def printContent(self, topic, title, body, url): 24 | print("New article found for: "+topic.name) 25 | print(title) 26 | print(body) 27 | 28 | 29 | ######### 30 | # Creates a new topic object, from a topic string 31 | ########## 32 | def getTopicFromName(self, topicName): 33 | topic = Topic(0, topicName) 34 | return topic 35 | 36 | ################ 37 | # Utilty function used to get a Beautiful Soup object 38 | # from a given URL 39 | ############## 40 | def getPage(self, url): 41 | print("Retrieving URL:\n"+url) 42 | session = requests.Session() 43 | headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"} 44 | try: 45 | req = session.get(url, headers=headers) 46 | except requests.exceptions.RequestException: 47 | return None 48 | bsObj = BeautifulSoup(req.text, "lxml") 49 | return bsObj 50 | 51 | 52 | ################ 53 | # Utilty function used to get a content string from a Beautiful Soup 54 | # object and a selector. Returns an empty string if no object 55 | # is found for the given selector 56 | ############## 57 | def safeGet(self, pageObj, selector): 58 | childObj = pageObj.select(selector) 59 | if childObj is not None and len(childObj) > 0: 60 | return childObj[0].get_text() 61 | return "" 62 | 63 | 64 | ################ 65 | # Searches a given website for a given topic and records all 66 | # pages found 67 | ############## 68 | def search(self, topic, site): 69 | bsObj = self.getPage(site.searchUrl+topic.name) 70 | searchResults = bsObj.select(site.resultListing) 71 | for result in searchResults: 72 | url = result.select(site.resultUrl)[0].attrs["href"] 73 | #Check to see whether it's a relative or an absolute URL 74 | 75 | if(site.absoluteUrl == "TRUE"): 76 | pageObj = self.getPage(url) 77 | else: 78 | pageObj = self.getPage(site.url+url) 79 | if pageObj == None: 80 | print("Something was wrong with that page or URL. Skipping!") 81 | else: 82 | title = self.safeGet(pageObj, site.pageTitle) 83 | print("Title is "+title) 84 | body = self.safeGet(pageObj, site.pageBody) 85 | if title != "" and body != "": 86 | self.printContent(topic, title, body, url) 87 | 88 | ################ 89 | # Starts a search of a given website for a given topic 90 | ############## 91 | def crawl(self, topicStr, targetSite): 92 | global conn 93 | global cur 94 | #If using MySQL, this will get any stored details about the topic 95 | #If not using MySQL, it will essentially do nothing 96 | topic = self.getTopicFromName(topicStr) 97 | self.search(topic, targetSite) 98 | 99 | ##################################################### 100 | ##### "User" code, outside the scraper class ######## 101 | ##################################################### 102 | 103 | f = open("topics.txt", 'r') 104 | topicName = f.readline().strip() 105 | crawler = Crawler() 106 | 107 | #Get a list of sites to search from the sites.csv file 108 | data = open("sites.csv", 'r').read() 109 | dataFile = StringIO(data) 110 | siteRows = csv.reader(dataFile) 111 | 112 | #Skip the header line in the CSV file - the header makes it easy to read, 113 | #but we don't want to use the column titles as actual site data 114 | next(siteRows) 115 | 116 | #build a list of websites to search, from the CSV file 117 | sites = [] 118 | for row in siteRows: 119 | sites.append(Website(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7])) 120 | 121 | while(topicName): 122 | print("GETTING INFO ABOUT: "+topicName); 123 | for targetSite in sites: 124 | crawler.crawl(topicName, targetSite) 125 | topicName = f.readline().strip() 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /3-Advanced-Crawlers/mysql-optional/__pycache__/content.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/3-Advanced-Crawlers/mysql-optional/__pycache__/content.cpython-34.pyc -------------------------------------------------------------------------------- /3-Advanced-Crawlers/mysql-optional/__pycache__/topic.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/3-Advanced-Crawlers/mysql-optional/__pycache__/topic.cpython-34.pyc -------------------------------------------------------------------------------- /3-Advanced-Crawlers/mysql-optional/__pycache__/website.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/3-Advanced-Crawlers/mysql-optional/__pycache__/website.cpython-34.pyc -------------------------------------------------------------------------------- /3-Advanced-Crawlers/mysql-optional/articles_mysql.py: -------------------------------------------------------------------------------- 1 | from website import Website 2 | from topic import Topic 3 | from content import Content 4 | 5 | import pymysql 6 | import requests 7 | from bs4 import BeautifulSoup 8 | import sys 9 | from io import StringIO 10 | import csv 11 | 12 | class Crawler: 13 | conn = None 14 | cur = None 15 | 16 | def __init__(self): 17 | global conn 18 | global curcomp 19 | 20 | 21 | ######### 22 | # Open a MySQL connection. Should be triggered by the caller before running 23 | # the scraper, if the caller is using MySQL 24 | ######### 25 | def openCon(self): 26 | global conn 27 | global cur 28 | #Use this line connecting to MySQL on Linux/Unix/MacOSX 29 | conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='mysql', charset='utf8') 30 | #Use this line connecting to MySQL on Windows 31 | #conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd=None, db='mysql' charset='utf8') 32 | 33 | cur = conn.cursor(pymysql.cursors.DictCursor) 34 | cur.execute("USE articleCrawler") 35 | 36 | ######### 37 | # Close a MySQL connection. Should be triggered by the caller after running 38 | # the scraper, if the caller is using MySQL 39 | ######### 40 | def closeCon(self): 41 | global conn 42 | global cur 43 | conn.close() 44 | 45 | ######### 46 | # Prints and stores content if content does not already exist for that URL and topic 47 | ######### 48 | def storeContent(self, topic, site, title, body, url): 49 | global conn 50 | global cur 51 | #Optionally, comment out the print statements if you want this to go straight to 52 | #MySQL without printing 53 | print("New article found for: "+topic.name) 54 | print(title) 55 | print(body) 56 | 57 | if(len(body) > 9999): 58 | body = body[:9999] 59 | if(len(title) > 999): 60 | title = title[:999] 61 | cur.execute("SELECT * FROM content WHERE url = %s AND topicId = %s", (url, int(topic.id))) 62 | if cur.rowcount == 0: 63 | try: 64 | cur.execute("INSERT INTO content (topicId, siteId, title, body, url) VALUES(%s, %s, %s, %s, %s)", (int(topic.id), int(site.id), title, body, url)) 65 | except: 66 | print("Could not store article") 67 | try: 68 | conn.commit() 69 | except: 70 | conn.rollback() 71 | 72 | 73 | def getSites(self): 74 | global conn 75 | global cur 76 | cur.execute("SELECT * FROM sites") 77 | sitesData = cur.fetchall() 78 | allSiteObjs = [] 79 | for site in sitesData: 80 | siteObj = Website(site['id'], site['name'], site['url'], site['searchUrl'], site['resultListing'], site['resultUrl'], site['absoluteUrl'], site['pageTitle'], site['pageBody']) 81 | allSiteObjs.append(siteObj) 82 | return allSiteObjs 83 | 84 | def getTopics(self): 85 | global conn 86 | global cur 87 | cur.execute("SELECT * FROM topics") 88 | topicsData = cur.fetchall() 89 | allTopicObjs = [] 90 | for topic in topicsData: 91 | topicObj = Topic(topic['id'], topic['name']) 92 | allTopicObjs.append(topicObj) 93 | return allTopicObjs 94 | 95 | 96 | ################ 97 | # Utilty function used to get a Beautiful Soup object 98 | # from a given URL 99 | ############## 100 | def getPage(self, url): 101 | print("Retrieving URL:\n"+url) 102 | session = requests.Session() 103 | headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"} 104 | try: 105 | req = session.get(url, headers=headers) 106 | except requests.exceptions.RequestException: 107 | return None 108 | bsObj = BeautifulSoup(req.text, "lxml") 109 | return bsObj 110 | 111 | 112 | ################ 113 | # Utilty function used to get a string from a Beautiful Soup 114 | # object and a selector. Returns an empty string if no object 115 | # is found for the given selector 116 | ############## 117 | def safeGet(self, pageObj, selector): 118 | childObj = pageObj.select(selector) 119 | if childObj is not None and len(childObj) > 0: 120 | return childObj[0].get_text() 121 | return "" 122 | 123 | 124 | ################ 125 | # Searches a given website for a given topic and records all 126 | # pages found 127 | ############## 128 | def search(self, topic, site): 129 | print(site.searchUrl+topic.name) 130 | bsObj = self.getPage(site.searchUrl+topic.name) 131 | searchResults = bsObj.select(site.resultListing) 132 | for result in searchResults: 133 | url = result.select(site.resultUrl)[0].attrs["href"] 134 | #Check to see whether it's a relative or an absolute URL 135 | 136 | if(site.absoluteUrl == "true"): 137 | pageObj = self.getPage(url) 138 | else: 139 | pageObj = self.getPage(site.url+url) 140 | 141 | if pageObj == None: 142 | print("Something is wrong with that page or URL. Skipping") 143 | else: 144 | title = self.safeGet(pageObj, site.pageTitle) 145 | print("Title is "+title) 146 | body = self.safeGet(pageObj, site.pageBody) 147 | if title != "" and body != "": 148 | self.storeContent(topic, site, title, body, url) 149 | 150 | 151 | ##################################################### 152 | ##### "User" code, outside the scraper class ######## 153 | ##################################################### 154 | 155 | crawler = Crawler() 156 | crawler.openCon() 157 | #build a list of websites to search, from the CSV file 158 | sites = crawler.getSites() 159 | topics = crawler.getTopics() 160 | 161 | for topic in topics: 162 | print("GETTING INFO ABOUT: "+topic.name); 163 | for targetSite in sites: 164 | print("FROM SITE: "+targetSite.name); 165 | crawler.search(topic, targetSite) 166 | 167 | crawler.closeCon() 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /3-Advanced-Crawlers/mysql-optional/content.py: -------------------------------------------------------------------------------- 1 | class Content: 2 | 'Common base class for all articles/pages' 3 | 4 | 5 | def __init__(self, id, topicId, siteId, title, body, url): 6 | self.id = id 7 | self.topicId = topicId; 8 | self.siteId = siteId; 9 | self.title = title; 10 | self.body = body; 11 | self.url = url; 12 | -------------------------------------------------------------------------------- /3-Advanced-Crawlers/mysql-optional/content.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/3-Advanced-Crawlers/mysql-optional/content.pyc -------------------------------------------------------------------------------- /3-Advanced-Crawlers/mysql-optional/crawler.sql: -------------------------------------------------------------------------------- 1 | -- phpMyAdmin SQL Dump 2 | -- version 4.2.5 3 | -- http://www.phpmyadmin.net 4 | -- 5 | -- Host: localhost:8889 6 | -- Generation Time: Oct 17, 2015 at 05:33 PM 7 | -- Server version: 5.5.38 8 | -- PHP Version: 5.5.14 9 | 10 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO"; 11 | SET time_zone = "+00:00"; 12 | 13 | -- 14 | -- Database: `articleCrawler` 15 | -- 16 | 17 | -- -------------------------------------------------------- 18 | 19 | -- 20 | -- Table structure for table `content` 21 | -- 22 | 23 | CREATE TABLE `content` ( 24 | `id` int(11) NOT NULL, 25 | `topicId` int(11) NOT NULL, 26 | `siteId` int(11) NOT NULL, 27 | `title` varchar(1000) NOT NULL, 28 | `body` varchar(10000) NOT NULL, 29 | `url` varchar(300) DEFAULT NULL 30 | ) ENGINE=MyISAM DEFAULT CHARSET=latin1 AUTO_INCREMENT=28 ; 31 | 32 | -- -------------------------------------------------------- 33 | 34 | -- 35 | -- Table structure for table `Sites` 36 | -- 37 | 38 | CREATE TABLE `sites` ( 39 | `id` int(11) NOT NULL, 40 | `name` varchar(255) NOT NULL, 41 | `url` varchar(511) NOT NULL, 42 | `searchUrl` varchar(511) NOT NULL, 43 | `resultListing` varchar(127) NOT NULL, 44 | `resultUrl` varchar(127) NOT NULL, 45 | `absoluteUrl` varchar(127) NOT NULL, 46 | `pageTitle` varchar(127) NOT NULL, 47 | `pageBody` varchar(127) NOT NULL 48 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 AUTO_INCREMENT=1 ; 49 | 50 | -- -------------------------------------------------------- 51 | 52 | -- 53 | -- Table structure for table `topics` 54 | -- 55 | 56 | CREATE TABLE `topics` ( 57 | `id` int(11) NOT NULL, 58 | `name` varchar(100) NOT NULL 59 | ) ENGINE=MyISAM DEFAULT CHARSET=latin1 AUTO_INCREMENT=15 ; 60 | 61 | -- 62 | -- Indexes for dumped tables 63 | -- 64 | 65 | -- 66 | -- Indexes for table `content` 67 | -- 68 | ALTER TABLE `content` 69 | ADD PRIMARY KEY (`id`); 70 | 71 | -- 72 | -- Indexes for table `Sites` 73 | -- 74 | ALTER TABLE `Sites` 75 | ADD PRIMARY KEY (`id`); 76 | 77 | -- 78 | -- Indexes for table `topics` 79 | -- 80 | ALTER TABLE `topics` 81 | ADD PRIMARY KEY (`id`); 82 | 83 | -- 84 | -- AUTO_INCREMENT for dumped tables 85 | -- 86 | 87 | -- 88 | -- AUTO_INCREMENT for table `content` 89 | -- 90 | ALTER TABLE `content` 91 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT,AUTO_INCREMENT=28; 92 | -- 93 | -- AUTO_INCREMENT for table `Sites` 94 | -- 95 | ALTER TABLE `Sites` 96 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT; 97 | -- 98 | -- AUTO_INCREMENT for table `topics` 99 | -- 100 | ALTER TABLE `topics` 101 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT,AUTO_INCREMENT=15; 102 | 103 | 104 | INSERT INTO sites (name,url,searchUrl,resultListing,resultUrl,absoluteUrl,pageTitle,pageBody) 105 | 106 | VALUES("Brookings","http://www.brookings.edu","http://www.brookings.edu/search?start=1&q=","ul.search-results li","h3.title a","FALSE","h1","div[itemprop=\"articleBody\"]"), 107 | 108 | ("Reuters","http://reuters.com","http://www.reuters.com/search/news?blob=","div.search-result-content","h3.search-result-title a","TRUE","h1","span#articleText"); 109 | 110 | INSERT INTO `topics` (`name`) VALUES 111 | ('data'), 112 | ('star%20wars'); -------------------------------------------------------------------------------- /3-Advanced-Crawlers/mysql-optional/topic.py: -------------------------------------------------------------------------------- 1 | class Topic: 2 | 'Common base class for all topics' 3 | 4 | 5 | def __init__(self, id, name): 6 | self.id = id 7 | self.name = name 8 | 9 | -------------------------------------------------------------------------------- /3-Advanced-Crawlers/mysql-optional/topic.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/3-Advanced-Crawlers/mysql-optional/topic.pyc -------------------------------------------------------------------------------- /3-Advanced-Crawlers/mysql-optional/website.py: -------------------------------------------------------------------------------- 1 | class Website: 2 | 'Common base class for all articles/pages' 3 | 4 | 5 | def __init__(self, id, name, url, searchUrl, resultListing, resultUrl, absoluteUrl, pageTitle, pageBody): 6 | self.id = id 7 | self.name = name 8 | self.url = url 9 | self.searchUrl = searchUrl 10 | self.resultListing = resultListing 11 | self.resultUrl = resultUrl 12 | self.absoluteUrl=absoluteUrl 13 | self.pageTitle = pageTitle 14 | self.pageBody = pageBody -------------------------------------------------------------------------------- /3-Advanced-Crawlers/mysql-optional/website.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/3-Advanced-Crawlers/mysql-optional/website.pyc -------------------------------------------------------------------------------- /3-Advanced-Crawlers/sites.csv: -------------------------------------------------------------------------------- 1 | name,url,searchUrl,resultListingClass,resultUrl,absoluteUrl,pageTitle,pageBody 2 | Reuters,http://reuters.com,http://www.reuters.com/search/news?blob=,div.search-result-content,h3.search-result-title a,FALSE,h1,span#article-text 3 | Brookings,http://www.brookings.edu,https://www.brookings.edu/search/?s=,div.list-content article,h4.title a,TRUE,h1,div.post-body 4 | -------------------------------------------------------------------------------- /3-Advanced-Crawlers/topic.py: -------------------------------------------------------------------------------- 1 | class Topic: 2 | 'Common base class for all topics' 3 | 4 | 5 | def __init__(self, id, name): 6 | self.id = id 7 | self.name = name 8 | 9 | -------------------------------------------------------------------------------- /3-Advanced-Crawlers/topics.txt: -------------------------------------------------------------------------------- 1 | Python 2 | Machine%20Learning -------------------------------------------------------------------------------- /3-Advanced-Crawlers/website.py: -------------------------------------------------------------------------------- 1 | class Website: 2 | 'Common base class for all articles/pages' 3 | 4 | 5 | def __init__(self, name, url, searchUrl, resultListing, resultUrl, absoluteUrl, pageTitle, pageBody): 6 | self.name = name 7 | self.url = url 8 | self.searchUrl = searchUrl 9 | self.resultListing = resultListing 10 | self.resultUrl = resultUrl 11 | self.absoluteUrl=absoluteUrl 12 | self.pageTitle = pageTitle 13 | self.pageBody = pageBody -------------------------------------------------------------------------------- /4-Crawling-Patterns/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/4-Crawling-Patterns/.DS_Store -------------------------------------------------------------------------------- /4-Crawling-Patterns/crawlExternal.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlopen 2 | from urllib.parse import urlparse 3 | from bs4 import BeautifulSoup 4 | import re 5 | import datetime 6 | import random 7 | 8 | pages = set() 9 | random.seed(datetime.datetime.now()) 10 | 11 | #Retrieves a list of all Internal links found on a page 12 | def getInternalLinks(bsObj, includeUrl): 13 | includeUrl = urlparse(includeUrl).scheme+"://"+urlparse(includeUrl).netloc 14 | internalLinks = [] 15 | #Finds all links that begin with a "/" 16 | for link in bsObj.findAll("a", href=re.compile("^(/|.*"+includeUrl+")")): 17 | if link.attrs['href'] is not None: 18 | if link.attrs['href'] not in internalLinks: 19 | if(link.attrs['href'].startswith("/")): 20 | internalLinks.append(includeUrl+link.attrs['href']) 21 | else: 22 | internalLinks.append(link.attrs['href']) 23 | return internalLinks 24 | 25 | #Retrieves a list of all external links found on a page 26 | def getExternalLinks(bsObj, excludeUrl): 27 | externalLinks = [] 28 | #Finds all links that start with "http" or "www" that do 29 | #not contain the current URL 30 | for link in bsObj.findAll("a", href=re.compile( 31 | "^(http|www)((?!"+excludeUrl+").)*$")): 32 | if link.attrs['href'] is not None: 33 | if link.attrs['href'] not in externalLinks: 34 | externalLinks.append(link.attrs['href']) 35 | return externalLinks 36 | 37 | def getRandomExternalLink(startingPage): 38 | html = urlopen(startingPage) 39 | bsObj = BeautifulSoup(html,"lxml") 40 | #Pass in the current page object, along with the domain name to exclude 41 | externalLinks = getExternalLinks(bsObj, urlparse(startingPage).netloc) 42 | if len(externalLinks) == 0: 43 | print("No external links, looking around the site for one") 44 | domain = urlparse(startingPage).scheme+"://"+urlparse(startingPage).netloc 45 | internalLinks = getInternalLinks(bsObj, domain) 46 | return getRandomExternalLink(internalLinks[random.randint(0,len(internalLinks)-1)]) 47 | else: 48 | return externalLinks[random.randint(0, len(externalLinks)-1)] 49 | 50 | def followExternalOnly(startingSite): 51 | externalLink = getRandomExternalLink(startingSite) 52 | print("Random external link is: "+externalLink) 53 | followExternalOnly(externalLink) 54 | 55 | followExternalOnly("http://oreilly.com") 56 | -------------------------------------------------------------------------------- /4-Crawling-Patterns/crawlSite_recursive.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlopen 2 | from urllib.parse import urlparse 3 | from bs4 import BeautifulSoup 4 | import re 5 | 6 | pages = set() 7 | 8 | def formatUrl(url, root): 9 | if(url.startswith("/")): 10 | return root+url 11 | if(url.startswith("http")): 12 | return url 13 | return root+"/"+url 14 | 15 | #Retrieves a list of all Internal links found on a page 16 | def getInternalLinks(bsObj, root): 17 | internalLinks = [] 18 | parsed_uri = urlparse(root) 19 | domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri) 20 | for link in bsObj.findAll("a"): 21 | if link.has_attr('href'): 22 | url = link.attrs['href'] 23 | #Check if URL is internal 24 | if url is not None and "#" not in url and (url.startswith(domain) or not url.startswith("http")): 25 | url = formatUrl(url, root) 26 | if url not in internalLinks: 27 | internalLinks.append(url) 28 | return internalLinks 29 | 30 | def getLinks(pageUrl, root): 31 | global pages 32 | html = urlopen(pageUrl) 33 | bsObj = BeautifulSoup(html, "lxml") 34 | internalLinks = getInternalLinks(bsObj, root) 35 | print(internalLinks) 36 | for link in internalLinks: 37 | if link not in pages: 38 | #We have encountered a new page 39 | print("----------------\n"+link) 40 | pages.add(link) 41 | getLinks(link, root) 42 | 43 | getLinks("http://pythonscraping.com","http://pythonscraping.com") -------------------------------------------------------------------------------- /4-Crawling-Patterns/crawlWiki_randomWalk.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlopen 2 | from bs4 import BeautifulSoup 3 | import re 4 | import random 5 | import time 6 | 7 | pages = set() 8 | def getLinks(pageUrl): 9 | global pages 10 | html = urlopen("http://en.wikipedia.org"+pageUrl) 11 | bsObj = BeautifulSoup(html, "lxml") 12 | try: 13 | print(bsObj.h1.get_text()) 14 | print(bsObj.find(id ="mw-content-text").findAll("p")[0]) 15 | print(bsObj.find(id="ca-edit").find("span").find("a").attrs['href']) 16 | except AttributeError: 17 | print("This page is missing something! No worries though!") 18 | links = bsObj.findAll("a", href=re.compile("^(/wiki/)((?!:).)*$")) 19 | randomLink = links[random.randint(0, len(links)-1)] 20 | while randomLink.attrs['href'] in pages: 21 | randomLink = links[random.randint(0, len(links)-1)] 22 | 23 | 24 | #We have encountered a new page 25 | newPage = randomLink.attrs['href'] 26 | print("----------------\n"+newPage) 27 | pages.add(newPage) 28 | time.sleep(1) 29 | getLinks(newPage) 30 | getLinks("") -------------------------------------------------------------------------------- /4-Crawling-Patterns/crawlWiki_recursive.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlopen 2 | from bs4 import BeautifulSoup 3 | import re 4 | 5 | pages = set() 6 | def getLinks(pageUrl): 7 | global pages 8 | html = urlopen("http://en.wikipedia.org"+pageUrl) 9 | bsObj = BeautifulSoup(html, "lxml") 10 | try: 11 | print(bsObj.h1.get_text()) 12 | print(bsObj.find(id ="mw-content-text").findAll("p")[0]) 13 | print(bsObj.find(id="ca-edit").find("span").find("a").attrs['href']) 14 | except AttributeError: 15 | print("This page is missing something! No worries though!") 16 | 17 | for link in bsObj.findAll("a", href=re.compile("^(/wiki/)((?!:).)*$")): 18 | if 'href' in link.attrs: 19 | if link.attrs['href'] not in pages: 20 | #We have encountered a new page 21 | newPage = link.attrs['href'] 22 | print("----------------\n"+newPage) 23 | pages.add(newPage) 24 | getLinks(newPage) 25 | getLinks("") -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/articles.csv: -------------------------------------------------------------------------------- 1 | title 2 | Python (programming language) 3 | Boo (programming language) 4 | Cobra (programming language) 5 | Falcon (programming language) 6 | F Sharp (programming language) 7 | Modula-3 8 | D (programming language) 9 | Perl 10 | Lisp (programming language) 11 | Java (programming language) 12 | Go (programming language) 13 | Icon (programming language) 14 | Haskell (programming language) 15 | Dylan (programming language) 16 | C (programming language) 17 | FreeDOS 18 | GNOME Foundation 19 | Free Software Movement of India 20 | C++ 21 | Contributor License Agreement 22 | Comparison of free and open-source software licenses 23 | zlib License 24 | WTFPL 25 | Main Page 26 | Microsoft Open Specification Promise 27 | Fork (software development) 28 | Microsoft Developer Network 29 | ZDNet 30 | Comparison of Visual Basic and Visual Basic .NET 31 | Comparison of the Java and .NET platforms 32 | Comparison of C Sharp and Visual Basic .NET 33 | Memory safety 34 | Compiled language 35 | System programming language 36 | Comparison of C Sharp and Java 37 | Newsqueak 38 | Modula 39 | Limbo (programming language) 40 | occam (programming language) 41 | OS X 42 | Apple Dylan 43 | Runtime system 44 | Human-readable medium 45 | License compatibility 46 | Free software license 47 | Library (computing) 48 | libpng 49 | Code Project Open License 50 | French Institute for Research in Computer Science and Automation 51 | Centre national de la recherche scientifique 52 | Commissariat à l'énergie atomique et aux énergies alternatives 53 | MySQL 54 | MariaDB 55 | Bitbucket 56 | OpenOffice.org 57 | Jagged array 58 | Visual Basic .NET 59 | Visual Basic 60 | Comparison of Object Pascal and C 61 | NOS (software) 62 | JOVIAL 63 | Control Data Corporation 64 | C++ 65 | Bill Roscoe 66 | List of concurrent and parallel programming languages 67 | XC (programming language) 68 | -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/articles.json: -------------------------------------------------------------------------------- 1 | [{"title": "Python (programming language)"}, 2 | {"title": "Dylan (programming language)"}, 3 | {"title": "Icon (programming language)"}, 4 | {"title": "Boo (programming language)"}, 5 | {"title": "Modula-3"}, 6 | {"title": "Lisp (programming language)"}, 7 | {"title": "Java (programming language)"}, 8 | {"title": "Perl"}, 9 | {"title": "Haskell (programming language)"}, 10 | {"title": "C (programming language)"}, 11 | {"title": "ABC (programming language)"}, 12 | {"title": "C++"}, 13 | {"title": "Stackless Python"}, 14 | {"title": "PyPy"}, 15 | {"title": "Cython"}, 16 | {"title": "Trusted Computing"}, 17 | {"title": "ALGOL 68"}, 18 | {"title": "Main Page"}, 19 | {"title": "Microsoft Open Specification Promise"}, 20 | {"title": "Fork (software development)"}, 21 | {"title": "Open-source software security"}, 22 | {"title": "Software patents and free software"}, 23 | {"title": "Unified Extensible Firmware Interface"}, 24 | {"title": "SCO\u2013Linux controversies"}, 25 | {"title": "Delegate (CLI)"}, 26 | {"title": "Assembly (CLI)"}, 27 | {"title": "Virtual Execution System"}, 28 | {"title": "Platform Invocation Services"}, 29 | {"title": "Common Type System"}, 30 | {"title": "List of CIL instructions"}, 31 | {"title": "Code Access Security"}, 32 | {"title": "Comparison of Visual Basic and Visual Basic .NET"}, 33 | {"title": "Orwell (programming language)"}, 34 | {"title": "ML (programming language)"}, 35 | {"title": "Miranda (programming language)"}, 36 | {"title": "Kent Recursive Calculator"}, 37 | {"title": "ISWIM"}, 38 | {"title": "Id (programming language)"}, 39 | {"title": "Hope (programming language)"}, 40 | {"title": "OpenMP"}, 41 | {"title": "Edsger W. Dijkstra"}, 42 | {"title": "Tony Hoare"}, 43 | {"title": "Cornelis H. A. Koster"}, 44 | {"title": "Orthogonality"}, 45 | {"title": "Context-free grammar"}, 46 | {"title": "Van Wijngaarden grammar"}, 47 | {"title": "IFIP Working Group 2.1"}, 48 | {"title": "Samba (software)"}, 49 | {"title": "gummiboot (software)"}, 50 | {"title": "rEFInd"}, 51 | {"title": "Real-time clock"}, 52 | {"title": "Non-volatile random-access memory"}, 53 | {"title": "Globally unique identifier"}, 54 | {"title": "BIOS boot partition"}, 55 | {"title": "GNU GRUB"}, 56 | {"title": "Exception handling"}, 57 | {"title": "Instruction set"}, 58 | {"title": "Microsoft Developer Network"}, 59 | {"title": "Object type (object-oriented programming)"}, 60 | {"title": "Memory management"}, 61 | {"title": "Reference type"}, 62 | {"title": "Value type"}, 63 | {"title": "Inheritance (object-oriented programming)"}, 64 | {"title": "Eager evaluation"}, 65 | {"title": "Peter Landin"}, 66 | {"title": "Hindley\u2013Milner type system"} -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # http://doc.scrapy.org/en/latest/topics/scrapyd.html 5 | 6 | [settings] 7 | default = wikiSpider.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = wikiSpider 12 | -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wiki.log: -------------------------------------------------------------------------------- 1 | 2015-03-09 00:11:36-0400 [scrapy] INFO: Scrapy 0.24.4 started (bot: wikiSpider) 2 | 2015-03-09 00:11:36-0400 [scrapy] INFO: Optional features available: ssl, http11 3 | 2015-03-09 00:11:36-0400 [scrapy] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'wikiSpider.spiders', 'SPIDER_MODULES': ['wikiSpider.spiders'], 'LOG_FILE': 'wiki.log', 'BOT_NAME': 'wikiSpider'} 4 | 2015-03-09 00:11:36-0400 [scrapy] INFO: Enabled extensions: LogStats, TelnetConsole, CloseSpider, WebService, CoreStats, SpiderState 5 | 2015-03-09 00:11:37-0400 [scrapy] INFO: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats 6 | 2015-03-09 00:11:37-0400 [scrapy] INFO: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware 7 | 2015-03-09 00:11:37-0400 [scrapy] INFO: Enabled item pipelines: 8 | 2015-03-09 00:11:37-0400 [article] INFO: Spider opened 9 | 2015-03-09 00:11:37-0400 [article] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 10 | 2015-03-09 00:11:37-0400 [scrapy] DEBUG: Telnet console listening on 127.0.0.1:6023 11 | 2015-03-09 00:11:37-0400 [scrapy] DEBUG: Web service listening on 127.0.0.1:6080 12 | 2015-03-09 00:11:37-0400 [article] DEBUG: Crawled (200) (referer: None) 13 | 2015-03-09 00:11:37-0400 [scrapy] INFO: Received SIGINT, shutting down gracefully. Send again to force 14 | 2015-03-09 00:11:37-0400 [article] INFO: Closing spider (shutdown) 15 | 2015-03-09 00:11:37-0400 [article] DEBUG: Filtered offsite request to 'en.wikibooks.org': 16 | 2015-03-09 00:11:37-0400 [article] DEBUG: Filtered offsite request to 'code.google.com': 17 | 2015-03-09 00:11:37-0400 [article] DEBUG: Filtered offsite request to 'en.wikiquote.org': 18 | 2015-03-09 00:11:37-0400 [scrapy] INFO: Received SIGINT twice, forcing unclean shutdown 19 | -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/5-Scrapy/wikiSpider/wikiSpider/__init__.py -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/5-Scrapy/wikiSpider/wikiSpider/__init__.pyc -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/items.py: -------------------------------------------------------------------------------- 1 | 2 | from scrapy import Item, Field 3 | 4 | 5 | class Article(Item): 6 | title = Field() 7 | links = Field() 8 | lastModified = Field() -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/items.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/5-Scrapy/wikiSpider/wikiSpider/items.pyc -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class WikispiderPipeline(object): 10 | def process_item(self, item, spider): 11 | return item 12 | -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for wikiSpider project 4 | # 5 | # For simplicity, this file contains only the most important settings by 6 | # default. All the other settings are documented here: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # 10 | 11 | BOT_NAME = 'wikiSpider' 12 | 13 | SPIDER_MODULES = ['wikiSpider.spiders'] 14 | NEWSPIDER_MODULE = 'wikiSpider.spiders' 15 | 16 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 17 | #USER_AGENT = 'wikiSpider (+http://www.yourdomain.com)' 18 | -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/settings.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/5-Scrapy/wikiSpider/wikiSpider/settings.pyc -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/spiders/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/5-Scrapy/wikiSpider/wikiSpider/spiders/.DS_Store -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/spiders/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/5-Scrapy/wikiSpider/wikiSpider/spiders/__init__.pyc -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/spiders/articleSpider.py: -------------------------------------------------------------------------------- 1 | from scrapy.contrib.spiders import CrawlSpider, Rule 2 | from wikiSpider.items import Article 3 | from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor 4 | from scrapy import log 5 | 6 | class ArticleSpider(CrawlSpider): 7 | name="article" 8 | allowed_domains = ["en.wikipedia.org"] 9 | start_urls = ["http://en.wikipedia.org/wiki/Python_%28programming_language%29"] 10 | rules = [ 11 | Rule(SgmlLinkExtractor(allow=('(/wiki/)((?!:).)*$'),), callback="parse_item", follow=True) 12 | ] 13 | 14 | def parse_item(self, response): 15 | item = Article() 16 | title = response.xpath('//h1/text()')[0].extract() 17 | links = response.xpath('//a[starts-with(@href, "/wiki/") and not(contains(@href,":"))]/@href').extract() 18 | lastMod = response.xpath('//li[@id="footer-info-lastmod"]/text()')[0].extract() 19 | lastMod = lastMod.replace("This page was last modified on ", "") 20 | item['title'] = title 21 | item['links'] = links 22 | item['lastModified'] = lastMod 23 | return item -------------------------------------------------------------------------------- /5-Scrapy/wikiSpider/wikiSpider/spiders/articleSpider.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/5-Scrapy/wikiSpider/wikiSpider/spiders/articleSpider.pyc -------------------------------------------------------------------------------- /6-Selenium/1-seleniumBasic.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import time 3 | 4 | #driver = webdriver.PhantomJS(executable_path='/Users/ryan/Documents/git/python-crawling/6-Selenium/phantomjs/bin/phantomjs') 5 | driver = webdriver.PhantomJS() 6 | driver.get("http://pythonscraping.com/pages/javascript/ajaxDemo.html") 7 | time.sleep(3) 8 | print(driver.find_element_by_id("content").text) 9 | -------------------------------------------------------------------------------- /6-Selenium/2-dragAndDrop.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.remote.webelement import WebElement 3 | from selenium.webdriver import ActionChains 4 | 5 | #driver = webdriver.PhantomJS() 6 | driver = webdriver.Firefox() 7 | driver.get('http://pythonscraping.com/pages/javascript/draggableDemo.html') 8 | 9 | print(driver.find_element_by_id("message").text) 10 | 11 | element = driver.find_element_by_id("draggable") 12 | target = driver.find_element_by_id("div2") 13 | actions = ActionChains(driver) 14 | actions.drag_and_drop(element, target).perform() 15 | 16 | print(driver.find_element_by_id("message").text) -------------------------------------------------------------------------------- /6-Selenium/ghostdriver.log: -------------------------------------------------------------------------------- 1 | [INFO - 2016-01-16T22:56:56.441Z] GhostDriver - Main - running on port 57593 2 | [INFO - 2016-01-16T22:56:57.312Z] Session [71b5dab0-bca4-11e5-a944-7d581e39fb26] - page.settings - {"XSSAuditingEnabled":false,"javascriptCanCloseWindows":true,"javascriptCanOpenWindows":true,"javascriptEnabled":true,"loadImages":true,"localToRemoteUrlAccessEnabled":false,"userAgent":"Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.0.0 Safari/538.1","webSecurityEnabled":true} 3 | [INFO - 2016-01-16T22:56:57.312Z] Session [71b5dab0-bca4-11e5-a944-7d581e39fb26] - page.customHeaders: - {} 4 | [INFO - 2016-01-16T22:56:57.313Z] Session [71b5dab0-bca4-11e5-a944-7d581e39fb26] - Session.negotiatedCapabilities - {"browserName":"phantomjs","version":"2.0.0","driverName":"ghostdriver","driverVersion":"1.2.0","platform":"mac-unknown-64bit","javascriptEnabled":true,"takesScreenshot":true,"handlesAlerts":false,"databaseEnabled":false,"locationContextEnabled":false,"applicationCacheEnabled":false,"browserConnectionEnabled":false,"cssSelectorsEnabled":true,"webStorageEnabled":false,"rotatable":false,"acceptSslCerts":false,"nativeEvents":true,"proxy":{"proxyType":"direct"}} 5 | [INFO - 2016-01-16T22:56:57.313Z] SessionManagerReqHand - _postNewSessionCommand - New Session Created: 71b5dab0-bca4-11e5-a944-7d581e39fb26 6 | [INFO - 2016-01-16T23:01:56.461Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 7 | [INFO - 2016-01-16T23:06:56.472Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 8 | [INFO - 2016-01-16T23:12:19.833Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 9 | [INFO - 2016-01-17T02:22:21.575Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 10 | [INFO - 2016-01-17T02:27:21.550Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 11 | [INFO - 2016-01-17T02:32:21.565Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 12 | [INFO - 2016-01-17T02:37:21.579Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 13 | [INFO - 2016-01-17T02:42:21.593Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 14 | [INFO - 2016-01-17T02:47:21.608Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 15 | [INFO - 2016-01-17T04:54:29.840Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 16 | [INFO - 2016-01-17T04:59:29.854Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 17 | [INFO - 2016-01-17T05:04:29.869Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 18 | [INFO - 2016-01-17T05:09:29.884Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 19 | [INFO - 2016-01-17T05:14:29.898Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 20 | [INFO - 2016-01-17T05:19:29.912Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 21 | [INFO - 2016-01-17T05:24:29.926Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 22 | [INFO - 2016-01-17T05:29:29.941Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 23 | [INFO - 2016-01-17T05:34:29.955Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 24 | [INFO - 2016-01-17T05:39:29.970Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 25 | [INFO - 2016-01-17T05:44:29.985Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 26 | [INFO - 2016-01-17T05:49:29.999Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 27 | [INFO - 2016-01-17T05:54:30.014Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 28 | [INFO - 2016-01-17T05:59:30.029Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 29 | [INFO - 2016-01-17T06:04:29.841Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 30 | [INFO - 2016-01-17T06:11:06.277Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 31 | [INFO - 2016-01-17T06:16:06.290Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 32 | [INFO - 2016-01-17T06:21:06.305Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 33 | [INFO - 2016-01-17T06:26:06.319Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 34 | [INFO - 2016-01-17T06:31:06.372Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 35 | [INFO - 2016-01-17T06:36:06.349Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 36 | [INFO - 2016-01-17T06:41:06.373Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 37 | [INFO - 2016-01-17T06:46:06.378Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 38 | [INFO - 2016-01-17T06:51:06.393Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 39 | [INFO - 2016-01-17T06:56:06.408Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 40 | [INFO - 2016-01-17T07:01:06.424Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 41 | [INFO - 2016-01-17T13:10:08.600Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 42 | [INFO - 2016-01-17T13:15:08.614Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 43 | [INFO - 2016-01-17T13:20:08.637Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 44 | [INFO - 2016-01-17T13:25:08.098Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 45 | [INFO - 2016-01-17T13:30:08.113Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 46 | [INFO - 2016-01-17T13:35:08.128Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 47 | [INFO - 2016-01-17T13:40:08.143Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 48 | [INFO - 2016-01-17T13:45:08.158Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 49 | [INFO - 2016-01-17T13:50:08.173Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 50 | [INFO - 2016-01-17T13:55:08.188Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 51 | [INFO - 2016-01-17T14:00:08.203Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 52 | [INFO - 2016-01-17T14:05:08.218Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 53 | [INFO - 2016-01-17T14:10:08.233Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 54 | [INFO - 2016-01-17T14:15:08.249Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW 55 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/6-Selenium/phantomjs/.DS_Store -------------------------------------------------------------------------------- /6-Selenium/phantomjs/ChangeLog: -------------------------------------------------------------------------------- 1 | Please see also http://phantomjs.org/releases.html. 2 | 3 | 2015-01-23: Version 2.0.0 4 | 5 | New features 6 | 7 | * Switched to Qt 5 and updated WebKit (issue 10448) 8 | * Implemented clearing of memory cache (issue 10357) 9 | * Added support for HTTP header change for every request (issue 11299) 10 | 11 | Improvements 12 | 13 | * Fixed rendering of CJK text by always linking the codecs (issue 10249) 14 | * Ensured onResourceReceived is still fired on an error (issue 11163) 15 | * Fixed possible crash in handling network requests (issue 11252) 16 | * Removed hardcoded GhostDriver launching message (issue 12681) 17 | * Allowed disk cache more than 2 GB (issue 12303) 18 | 19 | Examples 20 | 21 | * Netsniff example should exit when fails to load (issue 11333) 22 | 23 | 2014-01-25: Version 1.9.7 24 | 25 | * Reverted to GhostDriver 1.1.0 instead of 1.1.1 (issue 11915) 26 | * Fixed another warning of obsolete userSpaceScaleFactor on OS X 10.9 (issue 11612) 27 | 28 | 2014-01-20: Version 1.9.6 29 | 30 | * Updated GhostDriver to version 1.1.1 (issue 11877, 11893) 31 | 32 | 2014-01-19: Version 1.9.3 33 | 34 | * Fixed CoreText performance note on OS X 10.9 (issue 11418) 35 | * Fixed warning of obsolete userSpaceScaleFactor on OS X 10.9 (issue 11612) 36 | 37 | 2013-09-06: Version 1.9.2 38 | 39 | * Fixed graphical artifacts with transparent background on Windows (issue 11276, 11007, 11366) 40 | * Updated GhostDriver to version 1.0.4 (issue 11452) 41 | 42 | 2013-06-04: Version 1.9.1 43 | 44 | Critical bug fixes: 45 | 46 | * Fixed problems with specifying proxy server (issue 10811, 11117) 47 | * Fixed UTF-8 encoding with system.stdout and system.stderr (issue 11162) 48 | * Ensured that onResourceReceived will be always invoked (issue 11163) 49 | * Fixed module loading from an absolute path on Windows (issue 11165) 50 | * Fixed typo in the command-line option for setting the cache size (11219) 51 | * Fixed possible crash when handling network requests (issue 11252, 11338) 52 | 53 | 2013-03-20: Version 1.9.0 "Sakura" 54 | 55 | New features 56 | 57 | * Added spawn and execFile to execute external programs (issue 10219) 58 | * Added the ability to abort network requests (issue 10230) 59 | * Added system access to stdin, stdout, and stderr (issue 10333) 60 | * Added support for custom CA certificates location (issue 10916) 61 | * Added seek function to the File stream (issue 10937) 62 | * Implemented file read for a specified number of bytes (issue 10938) 63 | * Added a callback to handle network error (issue 10954, 10997) 64 | * Added custom encoding support when opening a page (issue 11043) 65 | * Implemented require.stub() support for a factory function (issue 11044) 66 | * Added page loading indicator and progress (issue 11091) 67 | * Added a timeout option for network requests (issue 11129) 68 | 69 | Improvements 70 | 71 | * Fixed the build on FreeBSD (issue 10597) 72 | * Ensured a consistent 72 dpi for Linux headless rendering (issue 10659) 73 | * Fixed possible PDF error due to invalid CreationDate field (issue 10663) 74 | * Fixed crash when uploading non existing files (issue 10941) 75 | * Improved the autocomplete internal of the interactive/REPL mode (issue 10943) 76 | * Fixed possible crash when accessing inline frames (issue 10947) 77 | * Changed Linux binary package setup to be built on CentOS 5 (issue 10963) 78 | * Extended SSL ignore setting to synchronous XHR (issue 10985) 79 | * Added convenient constants for modifier keys (issue 11056) 80 | * Fixed incorrect date handling in the cookies (issue 11068) 81 | * Updated GhostDriver to version 1.0.3 (issue 11146) 82 | 83 | Examples 84 | 85 | * Fixed invalid data URI in the netsniff example (issue 10740) 86 | * Implemented a new weather example (issue 10794) 87 | * Fixed rendering issues in render_multi_url (issue 11021) 88 | * Fixed proper event sequence in page_events example (issue 11028) 89 | * Miscellanous tweaks (issue 11082) 90 | 91 | 2013-03-02: Version 1.8.2 92 | 93 | Critical bug fixes: 94 | 95 | * Fixed possible PDF error due to invalid CreationDate field (issue 663) 96 | * Fixed crash when uploading non existing files (issue 941) 97 | * Fixed possible crash when accessing inline frames (issue 947) 98 | * Extended SSL ignore setting to synchronous XHR (issue 985) 99 | * Fixed incorrect date handling in the cookies (issue 1068) 100 | 101 | 2013-01-06: Version 1.8.1 102 | 103 | Critical bug fix: 104 | 105 | * Mac OS X: Fix possible crash when using some TrueType fonts (issue 690) 106 | 107 | 2012-12-21: Version 1.8.0 "Blue Winter Rose" 108 | 109 | New features 110 | 111 | * Integrated GhostDriver as the WebDriver implementation (issue 49) 112 | * Added an option to specify the SSL protocol (issue 174) 113 | * Added encoding support for WebServer's response (issue 505) 114 | * Added process ID (PID) to the System module (issue 769) 115 | * Added properties to obtain page and frame title (issue 799) 116 | * Added page navigation methods (issue 808) 117 | * Added support for modifier keys in keyboard events (issue 835) 118 | * Added onFilePicker callback for more generic file upload API (issue 843) 119 | * Added the ability to set the page content and location (issue 909) 120 | 121 | Improvements 122 | 123 | * Fixed date parsing in ISO8601 format (issue 187, 267) 124 | * Fixed window.location (issue 530, 632) 125 | * Deregistered multiple callback handler (issue 807) 126 | * Fixed sending of double-click events (issue 848) 127 | * Increases maximum number of redirects (issue 849) 128 | * Fixed keycodes sent for lowercase characters (issue 852) 129 | * Fixed a regression in table row page break (issue 880) 130 | * Completed the CoffeeScript version of the examples (issue 907) 131 | * Updated Qt to version 4.8.4 (issue 918) 132 | * Fixed potential hang in some example scripts (issue 922) 133 | 134 | 2012-09-22: Version 1.7.0 "Blazing Star" 135 | 136 | New features 137 | 138 | * Added a module system modelled after CommonJS/Node.js (issue 47) 139 | * Added support for window pop-up (issue 151) 140 | * Static build on Linux (issue 413) 141 | * Added run-time detection of SSL support (issue 484) 142 | * Added more events support (issue 492, 712) 143 | * Added support for disabling automatic proxy detection (issue 580) 144 | * Provided page closing callback (issue 678) 145 | * Added methods to access URL, frames URL, frame Content (issue 758) 146 | * Added more cookies-related API (issue 761) 147 | 148 | Improvements 149 | 150 | * Refactored command-line options handling (issue 55) 151 | * Improved the workflow for producing release builds (issue 599) 152 | * Improved cookies API and implementation (issue 603, 761) 153 | * Improved frame switching API (issue 654) 154 | * Fixed iframe handling regression (issue 683) 155 | * Fixed OS version number with Windows 8 and Mountain Lion (issue 684, 688) 156 | * Fixed HAR navigation info in the netsniff example (issue 733) 157 | * Fixed compile warnings with Visual Studio (issue 744) 158 | * Removed hacks for static linking on Windows (issue 753) 159 | * Added ICO image handling on Windows (issue 779) 160 | * Fixed font antialiasing on Windows (issue 785) 161 | * Improved Jasmine test runner for Jasmine 1.2 (issue 792) 162 | 163 | 2012-07-22: Version 1.6.1 164 | 165 | Bug fixes 166 | 167 | * Don't build the deploy in debug mode (issue 599) 168 | * Fixed building on Windows (issue 424) 169 | * Fixed remote inspector when building statically (issue 430) 170 | 171 | 2012-06-20: Version 1.6.0 "Lavender" 172 | 173 | New features 174 | 175 | * Added support for passing arguments to WebPage's evaluate (issue 132) 176 | * Added callbacks for JavaScript onConfirm and onPrompt (issue 133) 177 | * Added stack trace when error occurs (issue 166) 178 | * Added support for local storage path and quota (issue 300) 179 | * Added initial support for cookies handling (issue 354) 180 | * Added support for header footer when printing the page (issue 410, 512) 181 | * Added headers support in the loading request (issue 452) 182 | * Added support to render the web page as base64-encoded string (issue 547) 183 | * Added hooks for navigation event (issue 562) 184 | * Added command-line option to show debug messages (issue 575) 185 | * Added support for the zoom factor for web page rendering (issue 579) 186 | * Added crash reporter for Mac OS X and Linux, based on Google Breakpad (issue 576) 187 | * Added 'os' object to the system module (issue 585) 188 | * Added support for asynchronous evaluation (issue 593) 189 | 190 | Improvements 191 | 192 | * Fixed remote debugging to work on Mac OS X and Windows (issue 430) 193 | * Fixed web server getting the dropped connection for empty response (issue 451) 194 | * Fixed text rendered as boxes (squares) on headless Linux (issue 460) 195 | * Updated Qt to version 4.8.2 (issue 495) 196 | * Updated CoffeeScript compiler to version 1.3.3 (issue 496) 197 | * Fixed the build script to detect and use MAKEFLAGS (issue 503) 198 | * Fixed the build script to properly pass Qt config flags (issue 507) 199 | * Changed Info.plist to be embedded in Mac OS X executable (issue 528) 200 | * Fixed wrong module require in the imagebin example (issue 536) 201 | * Fixed example scripts to exit with the right exit code (issue 544) 202 | * Fixed build failure with glib 2.31.0+ (issue 559) 203 | * Fixed error handler failures in some cases (issue 589) 204 | * Fixed Twitter-related examples to work with the new site (issue 609) 205 | 206 | 2012-03-20: Version 1.5.0 "Ghost Flower" 207 | 208 | New features 209 | 210 | * Added interactive mode, also known as REPL (issue 252) 211 | * Added setting for web security, to allow cross domain XHR (issue 28) 212 | * Added error handler for WebPage object (issue 166) 213 | * Added support for custom HTTP header in the network request (issue 77) 214 | * Added support for read write encoding in the file system module (issue 367) 215 | * Added remote debugging support on Linux (issue 6) 216 | * Added support for proxy authentication (issue 105) 217 | * Added System module, to retrieve environment variables (issue 271) and arguments (issue 276) 218 | * Added fs.readLink function (issue 329) 219 | * Added support for reading and writing binary data (issue 400) 220 | * Added support to retrieve request data in the WebServer? module (issue 340) 221 | * Added support for individual top/bottom/left/right print margins (issue 388) 222 | * Added command-line option --help (issue 347) 223 | * Added short command-line options -v and -h (issue 408) 224 | * Removed support for Flash and other plugins (issue 418) 225 | 226 | Bug fixes 227 | 228 | * Fixed multiple console.log arguments (issue 36) 229 | * Fixed file upload (issue 307) 230 | * Fixed the web server instance to be asynchronous (issue 326) and still support Keep Alive (issue 416) 231 | * Workaround Qt 4.8.0 crash due to empty URL scheme (issue 365) 232 | * Fixed a Content-Type problem where POST does not work (issue 337) 233 | * Fixed reading body request in the web server even without specific Content-Type (issue 439) 234 | * Fixed Jasmine test runner with Jasmine 1.1 (issue 402) 235 | * Fixed request URL formatting in the web server (issue 437) 236 | * Don't display debugging and warning messages (issue 323) 237 | 238 | 2011-12-31: Version 1.4.1 239 | 240 | Bug fixes 241 | 242 | * Fix setting the proxy type (issue 266) 243 | * Workaround for file upload regression (issue 307) 244 | * Fix extraneous messsages in non-debug mode (issue 323) 245 | 246 | 2011-12-22: Version 1.4.0 "Glory of the Snow" 247 | 248 | New features 249 | 250 | * Added embedded HTTP server (issue 115) 251 | * Added convenient build script for Linux (issue 197) 252 | * Added support for SOCKS5 proxy (issue 266) 253 | * Updated CoffeeScript compiler to version 1.2 (issue 312) 254 | 255 | Bug fixes 256 | 257 | * Fix potential crash in QUrl with Qt 4.8 (issue 304) 258 | * Fix bug in CookieJar with QSettings and string (PyPhantomJS issue 10) 259 | * Prevent showing the icon on Mac OS X Dock (issue 281) 260 | 261 | Examples 262 | 263 | * Added a new example to detect browsers sniffing (issue 263) 264 | * Added HTTP server example (issue 115) 265 | 266 | 2011-09-23: Version 1.3.0 "Water Lily" 267 | 268 | Bug fixes 269 | 270 | * Fixed open() and POST method, without specifying the finished handler 271 | * Fixed script execution warning dialog (issue 165) 272 | * Added WebPage.release() to free the web page from memory (issue 154) 273 | * Added special handling of about:blank (issue 235) 274 | * Made a separate network access manager for each page (issue 190) 275 | 276 | New features 277 | 278 | * Introduced file system API based on CommonJS Filesystem proposal (issue 129) 279 | * Added support for persistent cookies (issue 91) 280 | * Added event handling, currently only for mouse events (issue 234) 281 | * Added page scroll position (issue 162) 282 | * Added HTTP authentication support (issue 45) 283 | * Added callback for page initialization (issue 143) 284 | * Added support to specify script and output encoding (issue 186) 285 | * Added option to allow local content to do cross-domain access (issue 28) 286 | * Added support to apply configurations from a JSON file (issue 180) 287 | * Added a convenient WebPage initialization construction (issue 206) 288 | * Added option to limit the size of disk cache (issue 220) 289 | 290 | Examples 291 | 292 | * Added a new example on using Modernizr to detect features (issue 144) 293 | * Fixed pizza.js example to use Mobile Yelp (issue 200) 294 | * Fixed netsniff.coffee example due to wrong indentation (issue 225) 295 | * Added an example to show live network traffic (issue 227) 296 | * Added an example demonstrating different output encodings (issue 186) 297 | 298 | 2011-06-21: Version 1.2.0 "Birds of Paradise" 299 | 300 | Version 1.2.0 is a major update. It introduces a whole set of new API. 301 | 302 | Bug fixes 303 | 304 | * Fixed rendering a very large web page (issue 54) 305 | * Fixed reporting of CoffeeScript compile error (issue 125) 306 | 307 | New features 308 | 309 | * Added callback for console message (issue 12) 310 | * Improved security model via WebPage object (issue 41) 311 | * Added support for POST, HEAD, PUT, and DELETE (issue 88) 312 | * Scripts filename is now passed as phantom.scriptName 313 | * Added callback to capture resource requests and responses (issue 2) 314 | * Added the ability to load external JavaScript (issue 32) 315 | 316 | Examples 317 | 318 | * Ported examples to use WebPage object 319 | * Added a new example to upload an image to imagebin.org 320 | * Added a new example to show HTTP POST feature 321 | * Added a new example to sniff network traffic and save it in HAR format 322 | 323 | 324 | 2011-04-27: Version 1.1.0 "Cherry Blossom" 325 | 326 | Fixed the script loading to use UTF-8 encoding (Yasuhiro Matsumoto). 327 | 328 | Added check for system proxy setting (Yasuhiro Matsumoto). 329 | 330 | Fixed building with Cygwin and Qt 4.5 (John Dalton). 331 | 332 | Added a new example: driver for QUnit tests (Łukasz Korecki). 333 | 334 | Fixed issue #20: problem with JPG transparent color (Alessandro Portale). 335 | 336 | Fixed issue #9: ignore first line starting with #! (Matthias, aka fourplusone). 337 | 338 | Fixed issue #7: support for file upload for form submission (Matthias, aka fourplusone). 339 | 340 | Fixed issue #35: support for disabling images loading (Ariya Hidayat). 341 | 342 | Fixed issue #14: enable or disable plugins (Ariya Hidayat). 343 | 344 | Added a new example: using Canvas to produce the color wheel (Ariya Hidayat). 345 | 346 | Added support for rasterizing as GIF image (Ariya Hidayat). 347 | 348 | Added support for CoffeeScript (Ariya Hidayat). 349 | 350 | Fixed issue #19: option for setting the proxy (Clint Berry, Ariya Hidayat). 351 | 352 | Python implementation using PyQt (James Roe). 353 | 354 | Fixed issue #17: Specify paper size for PDF export (Alessandro Portale). 355 | 356 | Fixed issue #60: Win32 and OS/2 icon files (Salvador Parra Camacho). 357 | 358 | Added clipping rectangle to the render function (Wouter de Bie). 359 | 360 | Added an example on sychronous waiting (Gabor Torok). 361 | 362 | Added command line option to use disk cache (Jon Turner). 363 | 364 | Added text extracting example (Weston Ruter). 365 | 366 | Fixed issue #93: Build with Qt < 4.7 (Ariya Hidayat). 367 | 368 | Ported all examples to CoffeeScript (Robert Gieseke). 369 | 370 | 2011-01-17: Version 1.0.0 371 | 372 | Initial launch. 373 | 374 | The API is centralized at the 'phantom' object (as child of 375 | window object) which has the properties: args, content, 376 | loadStatus, state, userAgent, version, viewportSize, and 377 | the following functions: exit, open, render, sleep. 378 | 379 | Several examples are included, among others: web page rasterizer, 380 | weather service, headless test framework driver, and many others. 381 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/LICENSE.BSD: -------------------------------------------------------------------------------- 1 | Redistribution and use in source and binary forms, with or without 2 | modification, are permitted provided that the following conditions are met: 3 | 4 | * Redistributions of source code must retain the above copyright 5 | notice, this list of conditions and the following disclaimer. 6 | * Redistributions in binary form must reproduce the above copyright 7 | notice, this list of conditions and the following disclaimer in the 8 | documentation and/or other materials provided with the distribution. 9 | * Neither the name of the nor the 10 | names of its contributors may be used to endorse or promote products 11 | derived from this software without specific prior written permission. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 14 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 | ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 17 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 22 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/README.md: -------------------------------------------------------------------------------- 1 | # [PhantomJS](http://phantomjs.org) - Scriptable Headless WebKit 2 | 3 | PhantomJS ([www.phantomjs.org](http://phantomjs.org)) is a headless WebKit scriptable with JavaScript. It is used by hundreds of [developers](http://phantomjs.org/buzz.html) and dozens of [organizations](http://phantomjs.org/users.html) for web-related development workflow. 4 | 5 | The latest [stable release](http://phantomjs.org/release-1.9.html) is version 1.9 (codenamed "Sakura"). Follow the official Twitter stream [@PhantomJS](http://twitter.com/PhantomJS) to get the frequent development updates. 6 | 7 | The next major version, PhantomJS 2, is a significant upgrade. It is still in [heavy development](https://github.com/ariya/phantomjs/wiki/PhantomJS-2). There is **no timeline** for the release yet, please monitor the [mailing-list](https://groups.google.com/forum/#!forum/phantomjs) for the progress. 8 | 9 | **Note**: Please **do not** create a GitHub pull request **without** reading the [Contribution Guide](https://github.com/ariya/phantomjs/blob/master/CONTRIBUTING.md) first. Failure to do so may result in the rejection of the pull request. 10 | 11 | ## Use Cases 12 | 13 | - **Headless web testing**. Lightning-fast testing without the browser is now possible! Various [test frameworks](http://phantomjs.org/headless-testing.html) such as Jasmine, Capybara, QUnit, Mocha, WebDriver, YUI Test, BusterJS, FuncUnit, Robot Framework, and many others are supported. 14 | - **Page automation**. [Access and manipulate](http://phantomjs.org/page-automation.html) web pages with the standard DOM API, or with usual libraries like jQuery. 15 | - **Screen capture**. Programmatically [capture web contents](http://phantomjs.org/screen-capture.html), including CSS, SVG and Canvas. Build server-side web graphics apps, from a screenshot service to a vector chart rasterizer. 16 | - **Network monitoring**. Automate performance analysis, track [page loading](http://phantomjs.org/network-monitoring.html) and export as standard HAR format. 17 | 18 | ## Features 19 | 20 | - **Multiplatform**, available on major operating systems: Windows, Mac OS X, Linux, and other Unices. 21 | - **Fast and native implementation** of web standards: DOM, CSS, JavaScript, Canvas, and SVG. No emulation! 22 | - **Pure headless (no X11) on Linux**, ideal for continuous integration systems. Also runs on Amazon EC2, Heroku, and Iron.io. 23 | - **Easy to install**: [Download](http://phantomjs.org/download.html), unpack, and start having fun in just 5 minutes. 24 | 25 | ## Ecosystem 26 | 27 | PhantomJS needs not be used only as a stand-alone tool. Check also some excellent related projects: 28 | 29 | - [CasperJS](http://casperjs.org) enables easy navigation scripting and common high-level testing. 30 | - [Poltergeist](https://github.com/jonleighton/poltergeist) allows running Capybara tests headlessly. 31 | - [Guard::Jasmine](https://github.com/netzpirat/guard-jasmine) automatically tests Jasmine specs on Rails when files are modified. 32 | - [GhostDriver](http://github.com/detro/ghostdriver/) complements Selenium tests with a PhantomJS WebDriver implementation. 33 | - [PhantomRobot](https://github.com/datakurre/phantomrobot) runs Robot Framework acceptance tests in the background via PhantomJS. 34 | - [Mocha-PhantomJS](https://github.com/metaskills/mocha-phantomjs) run Mocha tests using PhantomJS. 35 | 36 | and many others [related projects](http://phantomjs.org/related-projects.html). 37 | 38 | ## Questions? 39 | 40 | - Explore the complete [documentation](http://phantomjs.org/documentation/). 41 | - Read tons of [user articles](http://phantomjs.org/buzz.html) on using PhantomJS. 42 | - Join the [mailing-list](http://groups.google.com/group/phantomjs) and discuss with other PhantomJS fans. 43 | 44 | PhantomJS is free software/open source, and is distributed under the [BSD license](http://opensource.org/licenses/BSD-3-Clause). It contains third-party code, see the included `third-party.txt` file for the license information on third-party code. 45 | 46 | PhantomJS is created and maintained by [Ariya Hidayat](http://ariya.ofilabs.com/about) (Twitter: [@ariyahidayat](http://twitter.com/ariyahidayat)), with the help of [many contributors](https://github.com/ariya/phantomjs/contributors). 47 | 48 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/bin/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/6-Selenium/phantomjs/bin/.DS_Store -------------------------------------------------------------------------------- /6-Selenium/phantomjs/bin/phantomjs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/REMitchell/python-crawling/371c27cac365aea7e9857b1fa78851c0208a3704/6-Selenium/phantomjs/bin/phantomjs -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/arguments.js: -------------------------------------------------------------------------------- 1 | var system = require('system'); 2 | if (system.args.length === 1) { 3 | console.log('Try to pass some args when invoking this script!'); 4 | } else { 5 | system.args.forEach(function (arg, i) { 6 | console.log(i + ': ' + arg); 7 | }); 8 | } 9 | phantom.exit(); 10 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/child_process-examples.js: -------------------------------------------------------------------------------- 1 | var spawn = require("child_process").spawn 2 | var execFile = require("child_process").execFile 3 | 4 | var child = spawn("ls", ["-lF", "/rooot"]) 5 | 6 | child.stdout.on("data", function (data) { 7 | console.log("spawnSTDOUT:", JSON.stringify(data)) 8 | }) 9 | 10 | child.stderr.on("data", function (data) { 11 | console.log("spawnSTDERR:", JSON.stringify(data)) 12 | }) 13 | 14 | child.on("exit", function (code) { 15 | console.log("spawnEXIT:", code) 16 | }) 17 | 18 | //child.kill("SIGKILL") 19 | 20 | execFile("ls", ["-lF", "/usr"], null, function (err, stdout, stderr) { 21 | console.log("execFileSTDOUT:", JSON.stringify(stdout)) 22 | console.log("execFileSTDERR:", JSON.stringify(stderr)) 23 | }) 24 | 25 | setTimeout(function () { 26 | phantom.exit(0) 27 | }, 2000) 28 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/colorwheel.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(); 2 | page.viewportSize = { width: 400, height : 400 }; 3 | page.content = ''; 4 | page.evaluate(function() { 5 | var el = document.getElementById('surface'), 6 | context = el.getContext('2d'), 7 | width = window.innerWidth, 8 | height = window.innerHeight, 9 | cx = width / 2, 10 | cy = height / 2, 11 | radius = width / 2.3, 12 | imageData, 13 | pixels, 14 | hue, sat, value, 15 | i = 0, x, y, rx, ry, d, 16 | f, g, p, u, v, w, rgb; 17 | 18 | el.width = width; 19 | el.height = height; 20 | imageData = context.createImageData(width, height); 21 | pixels = imageData.data; 22 | 23 | for (y = 0; y < height; y = y + 1) { 24 | for (x = 0; x < width; x = x + 1, i = i + 4) { 25 | rx = x - cx; 26 | ry = y - cy; 27 | d = rx * rx + ry * ry; 28 | if (d < radius * radius) { 29 | hue = 6 * (Math.atan2(ry, rx) + Math.PI) / (2 * Math.PI); 30 | sat = Math.sqrt(d) / radius; 31 | g = Math.floor(hue); 32 | f = hue - g; 33 | u = 255 * (1 - sat); 34 | v = 255 * (1 - sat * f); 35 | w = 255 * (1 - sat * (1 - f)); 36 | pixels[i] = [255, v, u, u, w, 255, 255][g]; 37 | pixels[i + 1] = [w, 255, 255, v, u, u, w][g]; 38 | pixels[i + 2] = [u, u, w, 255, 255, v, u][g]; 39 | pixels[i + 3] = 255; 40 | } 41 | } 42 | } 43 | 44 | context.putImageData(imageData, 0, 0); 45 | document.body.style.backgroundColor = 'white'; 46 | document.body.style.margin = '0px'; 47 | }); 48 | 49 | page.render('colorwheel.png'); 50 | 51 | phantom.exit(); 52 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/countdown.js: -------------------------------------------------------------------------------- 1 | var t = 10, 2 | interval = setInterval(function(){ 3 | if ( t > 0 ) { 4 | console.log(t--); 5 | } else { 6 | console.log("BLAST OFF!"); 7 | phantom.exit(); 8 | } 9 | }, 1000); 10 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/detectsniff.js: -------------------------------------------------------------------------------- 1 | // Detect if a web page sniffs the user agent or not. 2 | 3 | var page = require('webpage').create(), 4 | system = require('system'), 5 | sniffed, 6 | address; 7 | 8 | page.onInitialized = function () { 9 | page.evaluate(function () { 10 | 11 | (function () { 12 | var userAgent = window.navigator.userAgent, 13 | platform = window.navigator.platform; 14 | 15 | window.navigator = { 16 | appCodeName: 'Mozilla', 17 | appName: 'Netscape', 18 | cookieEnabled: false, 19 | sniffed: false 20 | }; 21 | 22 | window.navigator.__defineGetter__('userAgent', function () { 23 | window.navigator.sniffed = true; 24 | return userAgent; 25 | }); 26 | 27 | window.navigator.__defineGetter__('platform', function () { 28 | window.navigator.sniffed = true; 29 | return platform; 30 | }); 31 | })(); 32 | }); 33 | }; 34 | 35 | if (system.args.length === 1) { 36 | console.log('Usage: detectsniff.js '); 37 | phantom.exit(1); 38 | } else { 39 | address = system.args[1]; 40 | console.log('Checking ' + address + '...'); 41 | page.open(address, function (status) { 42 | if (status !== 'success') { 43 | console.log('FAIL to load the address'); 44 | phantom.exit(); 45 | } else { 46 | window.setTimeout(function () { 47 | sniffed = page.evaluate(function () { 48 | return navigator.sniffed; 49 | }); 50 | if (sniffed) { 51 | console.log('The page tried to sniff the user agent.'); 52 | } else { 53 | console.log('The page did not try to sniff the user agent.'); 54 | } 55 | phantom.exit(); 56 | }, 1500); 57 | } 58 | }); 59 | } 60 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/direction.js: -------------------------------------------------------------------------------- 1 | // Get driving direction using Google Directions API. 2 | 3 | var page = require('webpage').create(), 4 | system = require('system'), 5 | origin, dest, steps; 6 | 7 | if (system.args.length < 3) { 8 | console.log('Usage: direction.js origin destination'); 9 | console.log('Example: direction.js "San Diego" "Palo Alto"'); 10 | phantom.exit(1); 11 | } else { 12 | origin = system.args[1]; 13 | dest = system.args[2]; 14 | page.open(encodeURI('http://maps.googleapis.com/maps/api/directions/xml?origin=' + origin + 15 | '&destination=' + dest + '&units=imperial&mode=driving&sensor=false'), function (status) { 16 | if (status !== 'success') { 17 | console.log('Unable to access network'); 18 | } else { 19 | steps = page.content.match(/(.*)<\/html_instructions>/ig); 20 | if (steps == null) { 21 | console.log('No data available for ' + origin + ' to ' + dest); 22 | } else { 23 | steps.forEach(function (ins) { 24 | ins = ins.replace(/\</ig, '<').replace(/\>/ig, '>'); 25 | ins = ins.replace(/\
/g, ''); 27 | console.log(ins); 28 | }); 29 | console.log(''); 30 | console.log(page.content.match(/.*<\/copyrights>/ig).join('').replace(/<.*?>/g, '')); 31 | } 32 | } 33 | phantom.exit(); 34 | }); 35 | } 36 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/echoToFile.js: -------------------------------------------------------------------------------- 1 | // echoToFile.js - Write in a given file all the parameters passed on the CLI 2 | var fs = require('fs'), 3 | system = require('system'); 4 | 5 | if (system.args.length < 3) { 6 | console.log("Usage: echoToFile.js DESTINATION_FILE "); 7 | phantom.exit(1); 8 | } else { 9 | var content = '', 10 | f = null, 11 | i; 12 | for ( i= 2; i < system.args.length; ++i ) { 13 | content += system.args[i] + (i === system.args.length-1 ? '' : ' '); 14 | } 15 | 16 | try { 17 | fs.write(system.args[1], content, 'w'); 18 | } catch(e) { 19 | console.log(e); 20 | } 21 | 22 | phantom.exit(); 23 | } 24 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/features.js: -------------------------------------------------------------------------------- 1 | var feature, supported = [], unsupported = []; 2 | 3 | phantom.injectJs('modernizr.js'); 4 | console.log('Detected features (using Modernizr ' + Modernizr._version + '):'); 5 | for (feature in Modernizr) { 6 | if (Modernizr.hasOwnProperty(feature)) { 7 | if (feature[0] !== '_' && typeof Modernizr[feature] !== 'function' && 8 | feature !== 'input' && feature !== 'inputtypes') { 9 | if (Modernizr[feature]) { 10 | supported.push(feature); 11 | } else { 12 | unsupported.push(feature); 13 | } 14 | } 15 | } 16 | } 17 | 18 | console.log(''); 19 | console.log('Supported:'); 20 | supported.forEach(function (e) { 21 | console.log(' ' + e); 22 | }); 23 | 24 | console.log(''); 25 | console.log('Not supported:'); 26 | unsupported.forEach(function (e) { 27 | console.log(' ' + e); 28 | }); 29 | phantom.exit(); 30 | 31 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/fibo.js: -------------------------------------------------------------------------------- 1 | var fibs = [0, 1]; 2 | var ticker = window.setInterval(function () { 3 | console.log(fibs[fibs.length - 1]); 4 | fibs.push(fibs[fibs.length - 1] + fibs[fibs.length - 2]); 5 | if (fibs.length > 10) { 6 | window.clearInterval(ticker); 7 | phantom.exit(); 8 | } 9 | }, 300); 10 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/follow.js: -------------------------------------------------------------------------------- 1 | // List following and followers from several accounts 2 | 3 | var users = ['PhantomJS', 4 | 'ariyahidayat', 5 | 'detronizator', 6 | 'KDABQt', 7 | 'lfranchi', 8 | 'jonleighton', 9 | '_jamesmgreene', 10 | 'Vitalliumm']; 11 | 12 | function follow(user, callback) { 13 | var page = require('webpage').create(); 14 | page.open('http://mobile.twitter.com/' + user, function (status) { 15 | if (status === 'fail') { 16 | console.log(user + ': ?'); 17 | } else { 18 | var data = page.evaluate(function () { 19 | return document.querySelector('div.profile td.stat.stat-last div.statnum').innerText; 20 | }); 21 | console.log(user + ': ' + data); 22 | } 23 | page.close(); 24 | callback.apply(); 25 | }); 26 | } 27 | 28 | function process() { 29 | if (users.length > 0) { 30 | var user = users[0]; 31 | users.splice(0, 1); 32 | follow(user, process); 33 | } else { 34 | phantom.exit(); 35 | } 36 | } 37 | 38 | process(); 39 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/hello.js: -------------------------------------------------------------------------------- 1 | console.log('Hello, world!'); 2 | phantom.exit(); 3 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/imagebin.js: -------------------------------------------------------------------------------- 1 | // Upload an image to imagebin.org 2 | 3 | var page = require('webpage').create(), 4 | system = require('system'), 5 | fname; 6 | 7 | if (system.args.length !== 2) { 8 | console.log('Usage: imagebin.js filename'); 9 | phantom.exit(1); 10 | } else { 11 | fname = system.args[1]; 12 | page.open("http://imagebin.org/index.php?page=add", function () { 13 | page.uploadFile('input[name=image]', fname); 14 | page.evaluate(function () { 15 | document.querySelector('input[name=nickname]').value = 'phantom'; 16 | document.querySelector('input[name=disclaimer_agree]').click() 17 | document.querySelector('form').submit(); 18 | }); 19 | window.setTimeout(function () { 20 | phantom.exit(); 21 | }, 3000); 22 | }); 23 | } 24 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/injectme.js: -------------------------------------------------------------------------------- 1 | // Use 'page.injectJs()' to load the script itself in the Page context 2 | 3 | if ( typeof(phantom) !== "undefined" ) { 4 | var page = require('webpage').create(); 5 | 6 | // Route "console.log()" calls from within the Page context to the main Phantom context (i.e. current "this") 7 | page.onConsoleMessage = function(msg) { 8 | console.log(msg); 9 | }; 10 | 11 | page.onAlert = function(msg) { 12 | console.log(msg); 13 | }; 14 | 15 | console.log("* Script running in the Phantom context."); 16 | console.log("* Script will 'inject' itself in a page..."); 17 | page.open("about:blank", function(status) { 18 | if ( status === "success" ) { 19 | console.log(page.injectJs("injectme.js") ? "... done injecting itself!" : "... fail! Check the $PWD?!"); 20 | } 21 | phantom.exit(); 22 | }); 23 | } else { 24 | alert("* Script running in the Page context."); 25 | } 26 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/ipgeocode.js: -------------------------------------------------------------------------------- 1 | // Give the estimated location based on the IP address. 2 | 3 | cb = function (data) { 4 | var loc = data.city; 5 | if (data.region_name.length > 0) 6 | loc = loc + ', ' + data.region_name; 7 | console.log('IP address: ' + data.ip); 8 | console.log('Estimated location: ' + loc); 9 | phantom.exit(); 10 | }; 11 | 12 | var el = document.createElement('script'); 13 | el.src = 'http://freegeoip.net/json/?callback=cb'; 14 | document.body.appendChild(el); 15 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/loadspeed.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(), 2 | system = require('system'), 3 | t, address; 4 | 5 | if (system.args.length === 1) { 6 | console.log('Usage: loadspeed.js '); 7 | phantom.exit(1); 8 | } else { 9 | t = Date.now(); 10 | address = system.args[1]; 11 | page.open(address, function (status) { 12 | if (status !== 'success') { 13 | console.log('FAIL to load the address'); 14 | } else { 15 | t = Date.now() - t; 16 | console.log('Page title is ' + page.evaluate(function () { 17 | return document.title; 18 | })); 19 | console.log('Loading time ' + t + ' msec'); 20 | } 21 | phantom.exit(); 22 | }); 23 | } 24 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/loadurlwithoutcss.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(), 2 | system = require('system'); 3 | 4 | if (system.args.length < 2) { 5 | console.log('Usage: loadurlwithoutcss.js URL'); 6 | phantom.exit(); 7 | } 8 | 9 | var address = system.args[1]; 10 | 11 | page.onResourceRequested = function(requestData, request) { 12 | if ((/http:\/\/.+?\.css/gi).test(requestData['url']) || requestData.headers['Content-Type'] == 'text/css') { 13 | console.log('The url of the request is matching. Aborting: ' + requestData['url']); 14 | request.abort(); 15 | } 16 | }; 17 | 18 | page.open(address, function(status) { 19 | if (status === 'success') { 20 | phantom.exit(); 21 | } else { 22 | console.log('Unable to load the address!'); 23 | phantom.exit(); 24 | } 25 | }); -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/modernizr.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Modernizr v2.8.2 3 | * www.modernizr.com 4 | * 5 | * Copyright (c) Faruk Ates, Paul Irish, Alex Sexton 6 | * Available under the BSD and MIT licenses: www.modernizr.com/license/ 7 | */ 8 | 9 | /* 10 | * Modernizr tests which native CSS3 and HTML5 features are available in 11 | * the current UA and makes the results available to you in two ways: 12 | * as properties on a global Modernizr object, and as classes on the 13 | * element. This information allows you to progressively enhance 14 | * your pages with a granular level of control over the experience. 15 | * 16 | * Modernizr has an optional (not included) conditional resource loader 17 | * called Modernizr.load(), based on Yepnope.js (yepnopejs.com). 18 | * To get a build that includes Modernizr.load(), as well as choosing 19 | * which tests to include, go to www.modernizr.com/download/ 20 | * 21 | * Authors Faruk Ates, Paul Irish, Alex Sexton 22 | * Contributors Ryan Seddon, Ben Alman 23 | */ 24 | 25 | window.Modernizr = (function( window, document, undefined ) { 26 | 27 | var version = '2.8.2', 28 | 29 | Modernizr = {}, 30 | 31 | /*>>cssclasses*/ 32 | // option for enabling the HTML classes to be added 33 | enableClasses = true, 34 | /*>>cssclasses*/ 35 | 36 | docElement = document.documentElement, 37 | 38 | /** 39 | * Create our "modernizr" element that we do most feature tests on. 40 | */ 41 | mod = 'modernizr', 42 | modElem = document.createElement(mod), 43 | mStyle = modElem.style, 44 | 45 | /** 46 | * Create the input element for various Web Forms feature tests. 47 | */ 48 | inputElem /*>>inputelem*/ = document.createElement('input') /*>>inputelem*/ , 49 | 50 | /*>>smile*/ 51 | smile = ':)', 52 | /*>>smile*/ 53 | 54 | toString = {}.toString, 55 | 56 | // TODO :: make the prefixes more granular 57 | /*>>prefixes*/ 58 | // List of property values to set for css tests. See ticket #21 59 | prefixes = ' -webkit- -moz- -o- -ms- '.split(' '), 60 | /*>>prefixes*/ 61 | 62 | /*>>domprefixes*/ 63 | // Following spec is to expose vendor-specific style properties as: 64 | // elem.style.WebkitBorderRadius 65 | // and the following would be incorrect: 66 | // elem.style.webkitBorderRadius 67 | 68 | // Webkit ghosts their properties in lowercase but Opera & Moz do not. 69 | // Microsoft uses a lowercase `ms` instead of the correct `Ms` in IE8+ 70 | // erik.eae.net/archives/2008/03/10/21.48.10/ 71 | 72 | // More here: github.com/Modernizr/Modernizr/issues/issue/21 73 | omPrefixes = 'Webkit Moz O ms', 74 | 75 | cssomPrefixes = omPrefixes.split(' '), 76 | 77 | domPrefixes = omPrefixes.toLowerCase().split(' '), 78 | /*>>domprefixes*/ 79 | 80 | /*>>ns*/ 81 | ns = {'svg': 'http://www.w3.org/2000/svg'}, 82 | /*>>ns*/ 83 | 84 | tests = {}, 85 | inputs = {}, 86 | attrs = {}, 87 | 88 | classes = [], 89 | 90 | slice = classes.slice, 91 | 92 | featureName, // used in testing loop 93 | 94 | 95 | /*>>teststyles*/ 96 | // Inject element with style element and some CSS rules 97 | injectElementWithStyles = function( rule, callback, nodes, testnames ) { 98 | 99 | var style, ret, node, docOverflow, 100 | div = document.createElement('div'), 101 | // After page load injecting a fake body doesn't work so check if body exists 102 | body = document.body, 103 | // IE6 and 7 won't return offsetWidth or offsetHeight unless it's in the body element, so we fake it. 104 | fakeBody = body || document.createElement('body'); 105 | 106 | if ( parseInt(nodes, 10) ) { 107 | // In order not to give false positives we create a node for each test 108 | // This also allows the method to scale for unspecified uses 109 | while ( nodes-- ) { 110 | node = document.createElement('div'); 111 | node.id = testnames ? testnames[nodes] : mod + (nodes + 1); 112 | div.appendChild(node); 113 | } 114 | } 115 | 116 | // '].join(''); 122 | div.id = mod; 123 | // IE6 will false positive on some tests due to the style element inside the test div somehow interfering offsetHeight, so insert it into body or fakebody. 124 | // Opera will act all quirky when injecting elements in documentElement when page is served as xml, needs fakebody too. #270 125 | (body ? div : fakeBody).innerHTML += style; 126 | fakeBody.appendChild(div); 127 | if ( !body ) { 128 | //avoid crashing IE8, if background image is used 129 | fakeBody.style.background = ''; 130 | //Safari 5.13/5.1.4 OSX stops loading if ::-webkit-scrollbar is used and scrollbars are visible 131 | fakeBody.style.overflow = 'hidden'; 132 | docOverflow = docElement.style.overflow; 133 | docElement.style.overflow = 'hidden'; 134 | docElement.appendChild(fakeBody); 135 | } 136 | 137 | ret = callback(div, rule); 138 | // If this is done after page load we don't want to remove the body so check if body exists 139 | if ( !body ) { 140 | fakeBody.parentNode.removeChild(fakeBody); 141 | docElement.style.overflow = docOverflow; 142 | } else { 143 | div.parentNode.removeChild(div); 144 | } 145 | 146 | return !!ret; 147 | 148 | }, 149 | /*>>teststyles*/ 150 | 151 | /*>>mq*/ 152 | // adapted from matchMedia polyfill 153 | // by Scott Jehl and Paul Irish 154 | // gist.github.com/786768 155 | testMediaQuery = function( mq ) { 156 | 157 | var matchMedia = window.matchMedia || window.msMatchMedia; 158 | if ( matchMedia ) { 159 | return matchMedia(mq) && matchMedia(mq).matches || false; 160 | } 161 | 162 | var bool; 163 | 164 | injectElementWithStyles('@media ' + mq + ' { #' + mod + ' { position: absolute; } }', function( node ) { 165 | bool = (window.getComputedStyle ? 166 | getComputedStyle(node, null) : 167 | node.currentStyle)['position'] == 'absolute'; 168 | }); 169 | 170 | return bool; 171 | 172 | }, 173 | /*>>mq*/ 174 | 175 | 176 | /*>>hasevent*/ 177 | // 178 | // isEventSupported determines if a given element supports the given event 179 | // kangax.github.com/iseventsupported/ 180 | // 181 | // The following results are known incorrects: 182 | // Modernizr.hasEvent("webkitTransitionEnd", elem) // false negative 183 | // Modernizr.hasEvent("textInput") // in Webkit. github.com/Modernizr/Modernizr/issues/333 184 | // ... 185 | isEventSupported = (function() { 186 | 187 | var TAGNAMES = { 188 | 'select': 'input', 'change': 'input', 189 | 'submit': 'form', 'reset': 'form', 190 | 'error': 'img', 'load': 'img', 'abort': 'img' 191 | }; 192 | 193 | function isEventSupported( eventName, element ) { 194 | 195 | element = element || document.createElement(TAGNAMES[eventName] || 'div'); 196 | eventName = 'on' + eventName; 197 | 198 | // When using `setAttribute`, IE skips "unload", WebKit skips "unload" and "resize", whereas `in` "catches" those 199 | var isSupported = eventName in element; 200 | 201 | if ( !isSupported ) { 202 | // If it has no `setAttribute` (i.e. doesn't implement Node interface), try generic element 203 | if ( !element.setAttribute ) { 204 | element = document.createElement('div'); 205 | } 206 | if ( element.setAttribute && element.removeAttribute ) { 207 | element.setAttribute(eventName, ''); 208 | isSupported = is(element[eventName], 'function'); 209 | 210 | // If property was created, "remove it" (by setting value to `undefined`) 211 | if ( !is(element[eventName], 'undefined') ) { 212 | element[eventName] = undefined; 213 | } 214 | element.removeAttribute(eventName); 215 | } 216 | } 217 | 218 | element = null; 219 | return isSupported; 220 | } 221 | return isEventSupported; 222 | })(), 223 | /*>>hasevent*/ 224 | 225 | // TODO :: Add flag for hasownprop ? didn't last time 226 | 227 | // hasOwnProperty shim by kangax needed for Safari 2.0 support 228 | _hasOwnProperty = ({}).hasOwnProperty, hasOwnProp; 229 | 230 | if ( !is(_hasOwnProperty, 'undefined') && !is(_hasOwnProperty.call, 'undefined') ) { 231 | hasOwnProp = function (object, property) { 232 | return _hasOwnProperty.call(object, property); 233 | }; 234 | } 235 | else { 236 | hasOwnProp = function (object, property) { /* yes, this can give false positives/negatives, but most of the time we don't care about those */ 237 | return ((property in object) && is(object.constructor.prototype[property], 'undefined')); 238 | }; 239 | } 240 | 241 | // Adapted from ES5-shim https://github.com/kriskowal/es5-shim/blob/master/es5-shim.js 242 | // es5.github.com/#x15.3.4.5 243 | 244 | if (!Function.prototype.bind) { 245 | Function.prototype.bind = function bind(that) { 246 | 247 | var target = this; 248 | 249 | if (typeof target != "function") { 250 | throw new TypeError(); 251 | } 252 | 253 | var args = slice.call(arguments, 1), 254 | bound = function () { 255 | 256 | if (this instanceof bound) { 257 | 258 | var F = function(){}; 259 | F.prototype = target.prototype; 260 | var self = new F(); 261 | 262 | var result = target.apply( 263 | self, 264 | args.concat(slice.call(arguments)) 265 | ); 266 | if (Object(result) === result) { 267 | return result; 268 | } 269 | return self; 270 | 271 | } else { 272 | 273 | return target.apply( 274 | that, 275 | args.concat(slice.call(arguments)) 276 | ); 277 | 278 | } 279 | 280 | }; 281 | 282 | return bound; 283 | }; 284 | } 285 | 286 | /** 287 | * setCss applies given styles to the Modernizr DOM node. 288 | */ 289 | function setCss( str ) { 290 | mStyle.cssText = str; 291 | } 292 | 293 | /** 294 | * setCssAll extrapolates all vendor-specific css strings. 295 | */ 296 | function setCssAll( str1, str2 ) { 297 | return setCss(prefixes.join(str1 + ';') + ( str2 || '' )); 298 | } 299 | 300 | /** 301 | * is returns a boolean for if typeof obj is exactly type. 302 | */ 303 | function is( obj, type ) { 304 | return typeof obj === type; 305 | } 306 | 307 | /** 308 | * contains returns a boolean for if substr is found within str. 309 | */ 310 | function contains( str, substr ) { 311 | return !!~('' + str).indexOf(substr); 312 | } 313 | 314 | /*>>testprop*/ 315 | 316 | // testProps is a generic CSS / DOM property test. 317 | 318 | // In testing support for a given CSS property, it's legit to test: 319 | // `elem.style[styleName] !== undefined` 320 | // If the property is supported it will return an empty string, 321 | // if unsupported it will return undefined. 322 | 323 | // We'll take advantage of this quick test and skip setting a style 324 | // on our modernizr element, but instead just testing undefined vs 325 | // empty string. 326 | 327 | // Because the testing of the CSS property names (with "-", as 328 | // opposed to the camelCase DOM properties) is non-portable and 329 | // non-standard but works in WebKit and IE (but not Gecko or Opera), 330 | // we explicitly reject properties with dashes so that authors 331 | // developing in WebKit or IE first don't end up with 332 | // browser-specific content by accident. 333 | 334 | function testProps( props, prefixed ) { 335 | for ( var i in props ) { 336 | var prop = props[i]; 337 | if ( !contains(prop, "-") && mStyle[prop] !== undefined ) { 338 | return prefixed == 'pfx' ? prop : true; 339 | } 340 | } 341 | return false; 342 | } 343 | /*>>testprop*/ 344 | 345 | // TODO :: add testDOMProps 346 | /** 347 | * testDOMProps is a generic DOM property test; if a browser supports 348 | * a certain property, it won't return undefined for it. 349 | */ 350 | function testDOMProps( props, obj, elem ) { 351 | for ( var i in props ) { 352 | var item = obj[props[i]]; 353 | if ( item !== undefined) { 354 | 355 | // return the property name as a string 356 | if (elem === false) return props[i]; 357 | 358 | // let's bind a function 359 | if (is(item, 'function')){ 360 | // default to autobind unless override 361 | return item.bind(elem || obj); 362 | } 363 | 364 | // return the unbound function or obj or value 365 | return item; 366 | } 367 | } 368 | return false; 369 | } 370 | 371 | /*>>testallprops*/ 372 | /** 373 | * testPropsAll tests a list of DOM properties we want to check against. 374 | * We specify literally ALL possible (known and/or likely) properties on 375 | * the element including the non-vendor prefixed one, for forward- 376 | * compatibility. 377 | */ 378 | function testPropsAll( prop, prefixed, elem ) { 379 | 380 | var ucProp = prop.charAt(0).toUpperCase() + prop.slice(1), 381 | props = (prop + ' ' + cssomPrefixes.join(ucProp + ' ') + ucProp).split(' '); 382 | 383 | // did they call .prefixed('boxSizing') or are we just testing a prop? 384 | if(is(prefixed, "string") || is(prefixed, "undefined")) { 385 | return testProps(props, prefixed); 386 | 387 | // otherwise, they called .prefixed('requestAnimationFrame', window[, elem]) 388 | } else { 389 | props = (prop + ' ' + (domPrefixes).join(ucProp + ' ') + ucProp).split(' '); 390 | return testDOMProps(props, prefixed, elem); 391 | } 392 | } 393 | /*>>testallprops*/ 394 | 395 | 396 | /** 397 | * Tests 398 | * ----- 399 | */ 400 | 401 | // The *new* flexbox 402 | // dev.w3.org/csswg/css3-flexbox 403 | 404 | tests['flexbox'] = function() { 405 | return testPropsAll('flexWrap'); 406 | }; 407 | 408 | // The *old* flexbox 409 | // www.w3.org/TR/2009/WD-css3-flexbox-20090723/ 410 | 411 | tests['flexboxlegacy'] = function() { 412 | return testPropsAll('boxDirection'); 413 | }; 414 | 415 | // On the S60 and BB Storm, getContext exists, but always returns undefined 416 | // so we actually have to call getContext() to verify 417 | // github.com/Modernizr/Modernizr/issues/issue/97/ 418 | 419 | tests['canvas'] = function() { 420 | var elem = document.createElement('canvas'); 421 | return !!(elem.getContext && elem.getContext('2d')); 422 | }; 423 | 424 | tests['canvastext'] = function() { 425 | return !!(Modernizr['canvas'] && is(document.createElement('canvas').getContext('2d').fillText, 'function')); 426 | }; 427 | 428 | // webk.it/70117 is tracking a legit WebGL feature detect proposal 429 | 430 | // We do a soft detect which may false positive in order to avoid 431 | // an expensive context creation: bugzil.la/732441 432 | 433 | tests['webgl'] = function() { 434 | return !!window.WebGLRenderingContext; 435 | }; 436 | 437 | /* 438 | * The Modernizr.touch test only indicates if the browser supports 439 | * touch events, which does not necessarily reflect a touchscreen 440 | * device, as evidenced by tablets running Windows 7 or, alas, 441 | * the Palm Pre / WebOS (touch) phones. 442 | * 443 | * Additionally, Chrome (desktop) used to lie about its support on this, 444 | * but that has since been rectified: crbug.com/36415 445 | * 446 | * We also test for Firefox 4 Multitouch Support. 447 | * 448 | * For more info, see: modernizr.github.com/Modernizr/touch.html 449 | */ 450 | 451 | tests['touch'] = function() { 452 | var bool; 453 | 454 | if(('ontouchstart' in window) || window.DocumentTouch && document instanceof DocumentTouch) { 455 | bool = true; 456 | } else { 457 | injectElementWithStyles(['@media (',prefixes.join('touch-enabled),('),mod,')','{#modernizr{top:9px;position:absolute}}'].join(''), function( node ) { 458 | bool = node.offsetTop === 9; 459 | }); 460 | } 461 | 462 | return bool; 463 | }; 464 | 465 | 466 | // geolocation is often considered a trivial feature detect... 467 | // Turns out, it's quite tricky to get right: 468 | // 469 | // Using !!navigator.geolocation does two things we don't want. It: 470 | // 1. Leaks memory in IE9: github.com/Modernizr/Modernizr/issues/513 471 | // 2. Disables page caching in WebKit: webk.it/43956 472 | // 473 | // Meanwhile, in Firefox < 8, an about:config setting could expose 474 | // a false positive that would throw an exception: bugzil.la/688158 475 | 476 | tests['geolocation'] = function() { 477 | return 'geolocation' in navigator; 478 | }; 479 | 480 | 481 | tests['postmessage'] = function() { 482 | return !!window.postMessage; 483 | }; 484 | 485 | 486 | // Chrome incognito mode used to throw an exception when using openDatabase 487 | // It doesn't anymore. 488 | tests['websqldatabase'] = function() { 489 | return !!window.openDatabase; 490 | }; 491 | 492 | // Vendors had inconsistent prefixing with the experimental Indexed DB: 493 | // - Webkit's implementation is accessible through webkitIndexedDB 494 | // - Firefox shipped moz_indexedDB before FF4b9, but since then has been mozIndexedDB 495 | // For speed, we don't test the legacy (and beta-only) indexedDB 496 | tests['indexedDB'] = function() { 497 | return !!testPropsAll("indexedDB", window); 498 | }; 499 | 500 | // documentMode logic from YUI to filter out IE8 Compat Mode 501 | // which false positives. 502 | tests['hashchange'] = function() { 503 | return isEventSupported('hashchange', window) && (document.documentMode === undefined || document.documentMode > 7); 504 | }; 505 | 506 | // Per 1.6: 507 | // This used to be Modernizr.historymanagement but the longer 508 | // name has been deprecated in favor of a shorter and property-matching one. 509 | // The old API is still available in 1.6, but as of 2.0 will throw a warning, 510 | // and in the first release thereafter disappear entirely. 511 | tests['history'] = function() { 512 | return !!(window.history && history.pushState); 513 | }; 514 | 515 | tests['draganddrop'] = function() { 516 | var div = document.createElement('div'); 517 | return ('draggable' in div) || ('ondragstart' in div && 'ondrop' in div); 518 | }; 519 | 520 | // FF3.6 was EOL'ed on 4/24/12, but the ESR version of FF10 521 | // will be supported until FF19 (2/12/13), at which time, ESR becomes FF17. 522 | // FF10 still uses prefixes, so check for it until then. 523 | // for more ESR info, see: mozilla.org/en-US/firefox/organizations/faq/ 524 | tests['websockets'] = function() { 525 | return 'WebSocket' in window || 'MozWebSocket' in window; 526 | }; 527 | 528 | 529 | // css-tricks.com/rgba-browser-support/ 530 | tests['rgba'] = function() { 531 | // Set an rgba() color and check the returned value 532 | 533 | setCss('background-color:rgba(150,255,150,.5)'); 534 | 535 | return contains(mStyle.backgroundColor, 'rgba'); 536 | }; 537 | 538 | tests['hsla'] = function() { 539 | // Same as rgba(), in fact, browsers re-map hsla() to rgba() internally, 540 | // except IE9 who retains it as hsla 541 | 542 | setCss('background-color:hsla(120,40%,100%,.5)'); 543 | 544 | return contains(mStyle.backgroundColor, 'rgba') || contains(mStyle.backgroundColor, 'hsla'); 545 | }; 546 | 547 | tests['multiplebgs'] = function() { 548 | // Setting multiple images AND a color on the background shorthand property 549 | // and then querying the style.background property value for the number of 550 | // occurrences of "url(" is a reliable method for detecting ACTUAL support for this! 551 | 552 | setCss('background:url(https://),url(https://),red url(https://)'); 553 | 554 | // If the UA supports multiple backgrounds, there should be three occurrences 555 | // of the string "url(" in the return value for elemStyle.background 556 | 557 | return (/(url\s*\(.*?){3}/).test(mStyle.background); 558 | }; 559 | 560 | 561 | 562 | // this will false positive in Opera Mini 563 | // github.com/Modernizr/Modernizr/issues/396 564 | 565 | tests['backgroundsize'] = function() { 566 | return testPropsAll('backgroundSize'); 567 | }; 568 | 569 | tests['borderimage'] = function() { 570 | return testPropsAll('borderImage'); 571 | }; 572 | 573 | 574 | // Super comprehensive table about all the unique implementations of 575 | // border-radius: muddledramblings.com/table-of-css3-border-radius-compliance 576 | 577 | tests['borderradius'] = function() { 578 | return testPropsAll('borderRadius'); 579 | }; 580 | 581 | // WebOS unfortunately false positives on this test. 582 | tests['boxshadow'] = function() { 583 | return testPropsAll('boxShadow'); 584 | }; 585 | 586 | // FF3.0 will false positive on this test 587 | tests['textshadow'] = function() { 588 | return document.createElement('div').style.textShadow === ''; 589 | }; 590 | 591 | 592 | tests['opacity'] = function() { 593 | // Browsers that actually have CSS Opacity implemented have done so 594 | // according to spec, which means their return values are within the 595 | // range of [0.0,1.0] - including the leading zero. 596 | 597 | setCssAll('opacity:.55'); 598 | 599 | // The non-literal . in this regex is intentional: 600 | // German Chrome returns this value as 0,55 601 | // github.com/Modernizr/Modernizr/issues/#issue/59/comment/516632 602 | return (/^0.55$/).test(mStyle.opacity); 603 | }; 604 | 605 | 606 | // Note, Android < 4 will pass this test, but can only animate 607 | // a single property at a time 608 | // goo.gl/v3V4Gp 609 | tests['cssanimations'] = function() { 610 | return testPropsAll('animationName'); 611 | }; 612 | 613 | 614 | tests['csscolumns'] = function() { 615 | return testPropsAll('columnCount'); 616 | }; 617 | 618 | 619 | tests['cssgradients'] = function() { 620 | /** 621 | * For CSS Gradients syntax, please see: 622 | * webkit.org/blog/175/introducing-css-gradients/ 623 | * developer.mozilla.org/en/CSS/-moz-linear-gradient 624 | * developer.mozilla.org/en/CSS/-moz-radial-gradient 625 | * dev.w3.org/csswg/css3-images/#gradients- 626 | */ 627 | 628 | var str1 = 'background-image:', 629 | str2 = 'gradient(linear,left top,right bottom,from(#9f9),to(white));', 630 | str3 = 'linear-gradient(left top,#9f9, white);'; 631 | 632 | setCss( 633 | // legacy webkit syntax (FIXME: remove when syntax not in use anymore) 634 | (str1 + '-webkit- '.split(' ').join(str2 + str1) + 635 | // standard syntax // trailing 'background-image:' 636 | prefixes.join(str3 + str1)).slice(0, -str1.length) 637 | ); 638 | 639 | return contains(mStyle.backgroundImage, 'gradient'); 640 | }; 641 | 642 | 643 | tests['cssreflections'] = function() { 644 | return testPropsAll('boxReflect'); 645 | }; 646 | 647 | 648 | tests['csstransforms'] = function() { 649 | return !!testPropsAll('transform'); 650 | }; 651 | 652 | 653 | tests['csstransforms3d'] = function() { 654 | 655 | var ret = !!testPropsAll('perspective'); 656 | 657 | // Webkit's 3D transforms are passed off to the browser's own graphics renderer. 658 | // It works fine in Safari on Leopard and Snow Leopard, but not in Chrome in 659 | // some conditions. As a result, Webkit typically recognizes the syntax but 660 | // will sometimes throw a false positive, thus we must do a more thorough check: 661 | if ( ret && 'webkitPerspective' in docElement.style ) { 662 | 663 | // Webkit allows this media query to succeed only if the feature is enabled. 664 | // `@media (transform-3d),(-webkit-transform-3d){ ... }` 665 | injectElementWithStyles('@media (transform-3d),(-webkit-transform-3d){#modernizr{left:9px;position:absolute;height:3px;}}', function( node, rule ) { 666 | ret = node.offsetLeft === 9 && node.offsetHeight === 3; 667 | }); 668 | } 669 | return ret; 670 | }; 671 | 672 | 673 | tests['csstransitions'] = function() { 674 | return testPropsAll('transition'); 675 | }; 676 | 677 | 678 | /*>>fontface*/ 679 | // @font-face detection routine by Diego Perini 680 | // javascript.nwbox.com/CSSSupport/ 681 | 682 | // false positives: 683 | // WebOS github.com/Modernizr/Modernizr/issues/342 684 | // WP7 github.com/Modernizr/Modernizr/issues/538 685 | tests['fontface'] = function() { 686 | var bool; 687 | 688 | injectElementWithStyles('@font-face {font-family:"font";src:url("https://")}', function( node, rule ) { 689 | var style = document.getElementById('smodernizr'), 690 | sheet = style.sheet || style.styleSheet, 691 | cssText = sheet ? (sheet.cssRules && sheet.cssRules[0] ? sheet.cssRules[0].cssText : sheet.cssText || '') : ''; 692 | 693 | bool = /src/i.test(cssText) && cssText.indexOf(rule.split(' ')[0]) === 0; 694 | }); 695 | 696 | return bool; 697 | }; 698 | /*>>fontface*/ 699 | 700 | // CSS generated content detection 701 | tests['generatedcontent'] = function() { 702 | var bool; 703 | 704 | injectElementWithStyles(['#',mod,'{font:0/0 a}#',mod,':after{content:"',smile,'";visibility:hidden;font:3px/1 a}'].join(''), function( node ) { 705 | bool = node.offsetHeight >= 3; 706 | }); 707 | 708 | return bool; 709 | }; 710 | 711 | 712 | 713 | // These tests evaluate support of the video/audio elements, as well as 714 | // testing what types of content they support. 715 | // 716 | // We're using the Boolean constructor here, so that we can extend the value 717 | // e.g. Modernizr.video // true 718 | // Modernizr.video.ogg // 'probably' 719 | // 720 | // Codec values from : github.com/NielsLeenheer/html5test/blob/9106a8/index.html#L845 721 | // thx to NielsLeenheer and zcorpan 722 | 723 | // Note: in some older browsers, "no" was a return value instead of empty string. 724 | // It was live in FF3.5.0 and 3.5.1, but fixed in 3.5.2 725 | // It was also live in Safari 4.0.0 - 4.0.4, but fixed in 4.0.5 726 | 727 | tests['video'] = function() { 728 | var elem = document.createElement('video'), 729 | bool = false; 730 | 731 | // IE9 Running on Windows Server SKU can cause an exception to be thrown, bug #224 732 | try { 733 | if ( bool = !!elem.canPlayType ) { 734 | bool = new Boolean(bool); 735 | bool.ogg = elem.canPlayType('video/ogg; codecs="theora"') .replace(/^no$/,''); 736 | 737 | // Without QuickTime, this value will be `undefined`. github.com/Modernizr/Modernizr/issues/546 738 | bool.h264 = elem.canPlayType('video/mp4; codecs="avc1.42E01E"') .replace(/^no$/,''); 739 | 740 | bool.webm = elem.canPlayType('video/webm; codecs="vp8, vorbis"').replace(/^no$/,''); 741 | } 742 | 743 | } catch(e) { } 744 | 745 | return bool; 746 | }; 747 | 748 | tests['audio'] = function() { 749 | var elem = document.createElement('audio'), 750 | bool = false; 751 | 752 | try { 753 | if ( bool = !!elem.canPlayType ) { 754 | bool = new Boolean(bool); 755 | bool.ogg = elem.canPlayType('audio/ogg; codecs="vorbis"').replace(/^no$/,''); 756 | bool.mp3 = elem.canPlayType('audio/mpeg;') .replace(/^no$/,''); 757 | 758 | // Mimetypes accepted: 759 | // developer.mozilla.org/En/Media_formats_supported_by_the_audio_and_video_elements 760 | // bit.ly/iphoneoscodecs 761 | bool.wav = elem.canPlayType('audio/wav; codecs="1"') .replace(/^no$/,''); 762 | bool.m4a = ( elem.canPlayType('audio/x-m4a;') || 763 | elem.canPlayType('audio/aac;')) .replace(/^no$/,''); 764 | } 765 | } catch(e) { } 766 | 767 | return bool; 768 | }; 769 | 770 | 771 | // In FF4, if disabled, window.localStorage should === null. 772 | 773 | // Normally, we could not test that directly and need to do a 774 | // `('localStorage' in window) && ` test first because otherwise Firefox will 775 | // throw bugzil.la/365772 if cookies are disabled 776 | 777 | // Also in iOS5 Private Browsing mode, attempting to use localStorage.setItem 778 | // will throw the exception: 779 | // QUOTA_EXCEEDED_ERRROR DOM Exception 22. 780 | // Peculiarly, getItem and removeItem calls do not throw. 781 | 782 | // Because we are forced to try/catch this, we'll go aggressive. 783 | 784 | // Just FWIW: IE8 Compat mode supports these features completely: 785 | // www.quirksmode.org/dom/html5.html 786 | // But IE8 doesn't support either with local files 787 | 788 | tests['localstorage'] = function() { 789 | try { 790 | localStorage.setItem(mod, mod); 791 | localStorage.removeItem(mod); 792 | return true; 793 | } catch(e) { 794 | return false; 795 | } 796 | }; 797 | 798 | tests['sessionstorage'] = function() { 799 | try { 800 | sessionStorage.setItem(mod, mod); 801 | sessionStorage.removeItem(mod); 802 | return true; 803 | } catch(e) { 804 | return false; 805 | } 806 | }; 807 | 808 | 809 | tests['webworkers'] = function() { 810 | return !!window.Worker; 811 | }; 812 | 813 | 814 | tests['applicationcache'] = function() { 815 | return !!window.applicationCache; 816 | }; 817 | 818 | 819 | // Thanks to Erik Dahlstrom 820 | tests['svg'] = function() { 821 | return !!document.createElementNS && !!document.createElementNS(ns.svg, 'svg').createSVGRect; 822 | }; 823 | 824 | // specifically for SVG inline in HTML, not within XHTML 825 | // test page: paulirish.com/demo/inline-svg 826 | tests['inlinesvg'] = function() { 827 | var div = document.createElement('div'); 828 | div.innerHTML = ''; 829 | return (div.firstChild && div.firstChild.namespaceURI) == ns.svg; 830 | }; 831 | 832 | // SVG SMIL animation 833 | tests['smil'] = function() { 834 | return !!document.createElementNS && /SVGAnimate/.test(toString.call(document.createElementNS(ns.svg, 'animate'))); 835 | }; 836 | 837 | // This test is only for clip paths in SVG proper, not clip paths on HTML content 838 | // demo: srufaculty.sru.edu/david.dailey/svg/newstuff/clipPath4.svg 839 | 840 | // However read the comments to dig into applying SVG clippaths to HTML content here: 841 | // github.com/Modernizr/Modernizr/issues/213#issuecomment-1149491 842 | tests['svgclippaths'] = function() { 843 | return !!document.createElementNS && /SVGClipPath/.test(toString.call(document.createElementNS(ns.svg, 'clipPath'))); 844 | }; 845 | 846 | /*>>webforms*/ 847 | // input features and input types go directly onto the ret object, bypassing the tests loop. 848 | // Hold this guy to execute in a moment. 849 | function webforms() { 850 | /*>>input*/ 851 | // Run through HTML5's new input attributes to see if the UA understands any. 852 | // We're using f which is the element created early on 853 | // Mike Taylr has created a comprehensive resource for testing these attributes 854 | // when applied to all input types: 855 | // miketaylr.com/code/input-type-attr.html 856 | // spec: www.whatwg.org/specs/web-apps/current-work/multipage/the-input-element.html#input-type-attr-summary 857 | 858 | // Only input placeholder is tested while textarea's placeholder is not. 859 | // Currently Safari 4 and Opera 11 have support only for the input placeholder 860 | // Both tests are available in feature-detects/forms-placeholder.js 861 | Modernizr['input'] = (function( props ) { 862 | for ( var i = 0, len = props.length; i < len; i++ ) { 863 | attrs[ props[i] ] = !!(props[i] in inputElem); 864 | } 865 | if (attrs.list){ 866 | // safari false positive's on datalist: webk.it/74252 867 | // see also github.com/Modernizr/Modernizr/issues/146 868 | attrs.list = !!(document.createElement('datalist') && window.HTMLDataListElement); 869 | } 870 | return attrs; 871 | })('autocomplete autofocus list placeholder max min multiple pattern required step'.split(' ')); 872 | /*>>input*/ 873 | 874 | /*>>inputtypes*/ 875 | // Run through HTML5's new input types to see if the UA understands any. 876 | // This is put behind the tests runloop because it doesn't return a 877 | // true/false like all the other tests; instead, it returns an object 878 | // containing each input type with its corresponding true/false value 879 | 880 | // Big thanks to @miketaylr for the html5 forms expertise. miketaylr.com/ 881 | Modernizr['inputtypes'] = (function(props) { 882 | 883 | for ( var i = 0, bool, inputElemType, defaultView, len = props.length; i < len; i++ ) { 884 | 885 | inputElem.setAttribute('type', inputElemType = props[i]); 886 | bool = inputElem.type !== 'text'; 887 | 888 | // We first check to see if the type we give it sticks.. 889 | // If the type does, we feed it a textual value, which shouldn't be valid. 890 | // If the value doesn't stick, we know there's input sanitization which infers a custom UI 891 | if ( bool ) { 892 | 893 | inputElem.value = smile; 894 | inputElem.style.cssText = 'position:absolute;visibility:hidden;'; 895 | 896 | if ( /^range$/.test(inputElemType) && inputElem.style.WebkitAppearance !== undefined ) { 897 | 898 | docElement.appendChild(inputElem); 899 | defaultView = document.defaultView; 900 | 901 | // Safari 2-4 allows the smiley as a value, despite making a slider 902 | bool = defaultView.getComputedStyle && 903 | defaultView.getComputedStyle(inputElem, null).WebkitAppearance !== 'textfield' && 904 | // Mobile android web browser has false positive, so must 905 | // check the height to see if the widget is actually there. 906 | (inputElem.offsetHeight !== 0); 907 | 908 | docElement.removeChild(inputElem); 909 | 910 | } else if ( /^(search|tel)$/.test(inputElemType) ){ 911 | // Spec doesn't define any special parsing or detectable UI 912 | // behaviors so we pass these through as true 913 | 914 | // Interestingly, opera fails the earlier test, so it doesn't 915 | // even make it here. 916 | 917 | } else if ( /^(url|email)$/.test(inputElemType) ) { 918 | // Real url and email support comes with prebaked validation. 919 | bool = inputElem.checkValidity && inputElem.checkValidity() === false; 920 | 921 | } else { 922 | // If the upgraded input compontent rejects the :) text, we got a winner 923 | bool = inputElem.value != smile; 924 | } 925 | } 926 | 927 | inputs[ props[i] ] = !!bool; 928 | } 929 | return inputs; 930 | })('search tel url email datetime date month week time datetime-local number range color'.split(' ')); 931 | /*>>inputtypes*/ 932 | } 933 | /*>>webforms*/ 934 | 935 | 936 | // End of test definitions 937 | // ----------------------- 938 | 939 | 940 | 941 | // Run through all tests and detect their support in the current UA. 942 | // todo: hypothetically we could be doing an array of tests and use a basic loop here. 943 | for ( var feature in tests ) { 944 | if ( hasOwnProp(tests, feature) ) { 945 | // run the test, throw the return value into the Modernizr, 946 | // then based on that boolean, define an appropriate className 947 | // and push it into an array of classes we'll join later. 948 | featureName = feature.toLowerCase(); 949 | Modernizr[featureName] = tests[feature](); 950 | 951 | classes.push((Modernizr[featureName] ? '' : 'no-') + featureName); 952 | } 953 | } 954 | 955 | /*>>webforms*/ 956 | // input tests need to run. 957 | Modernizr.input || webforms(); 958 | /*>>webforms*/ 959 | 960 | 961 | /** 962 | * addTest allows the user to define their own feature tests 963 | * the result will be added onto the Modernizr object, 964 | * as well as an appropriate className set on the html element 965 | * 966 | * @param feature - String naming the feature 967 | * @param test - Function returning true if feature is supported, false if not 968 | */ 969 | Modernizr.addTest = function ( feature, test ) { 970 | if ( typeof feature == 'object' ) { 971 | for ( var key in feature ) { 972 | if ( hasOwnProp( feature, key ) ) { 973 | Modernizr.addTest( key, feature[ key ] ); 974 | } 975 | } 976 | } else { 977 | 978 | feature = feature.toLowerCase(); 979 | 980 | if ( Modernizr[feature] !== undefined ) { 981 | // we're going to quit if you're trying to overwrite an existing test 982 | // if we were to allow it, we'd do this: 983 | // var re = new RegExp("\\b(no-)?" + feature + "\\b"); 984 | // docElement.className = docElement.className.replace( re, '' ); 985 | // but, no rly, stuff 'em. 986 | return Modernizr; 987 | } 988 | 989 | test = typeof test == 'function' ? test() : test; 990 | 991 | if (typeof enableClasses !== "undefined" && enableClasses) { 992 | docElement.className += ' ' + (test ? '' : 'no-') + feature; 993 | } 994 | Modernizr[feature] = test; 995 | 996 | } 997 | 998 | return Modernizr; // allow chaining. 999 | }; 1000 | 1001 | 1002 | // Reset modElem.cssText to nothing to reduce memory footprint. 1003 | setCss(''); 1004 | modElem = inputElem = null; 1005 | 1006 | /*>>shiv*/ 1007 | /** 1008 | * @preserve HTML5 Shiv prev3.7.1 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 1009 | */ 1010 | ;(function(window, document) { 1011 | /*jshint evil:true */ 1012 | /** version */ 1013 | var version = '3.7.0'; 1014 | 1015 | /** Preset options */ 1016 | var options = window.html5 || {}; 1017 | 1018 | /** Used to skip problem elements */ 1019 | var reSkip = /^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i; 1020 | 1021 | /** Not all elements can be cloned in IE **/ 1022 | var saveClones = /^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i; 1023 | 1024 | /** Detect whether the browser supports default html5 styles */ 1025 | var supportsHtml5Styles; 1026 | 1027 | /** Name of the expando, to work with multiple documents or to re-shiv one document */ 1028 | var expando = '_html5shiv'; 1029 | 1030 | /** The id for the the documents expando */ 1031 | var expanID = 0; 1032 | 1033 | /** Cached data for each document */ 1034 | var expandoData = {}; 1035 | 1036 | /** Detect whether the browser supports unknown elements */ 1037 | var supportsUnknownElements; 1038 | 1039 | (function() { 1040 | try { 1041 | var a = document.createElement('a'); 1042 | a.innerHTML = ''; 1043 | //if the hidden property is implemented we can assume, that the browser supports basic HTML5 Styles 1044 | supportsHtml5Styles = ('hidden' in a); 1045 | 1046 | supportsUnknownElements = a.childNodes.length == 1 || (function() { 1047 | // assign a false positive if unable to shiv 1048 | (document.createElement)('a'); 1049 | var frag = document.createDocumentFragment(); 1050 | return ( 1051 | typeof frag.cloneNode == 'undefined' || 1052 | typeof frag.createDocumentFragment == 'undefined' || 1053 | typeof frag.createElement == 'undefined' 1054 | ); 1055 | }()); 1056 | } catch(e) { 1057 | // assign a false positive if detection fails => unable to shiv 1058 | supportsHtml5Styles = true; 1059 | supportsUnknownElements = true; 1060 | } 1061 | 1062 | }()); 1063 | 1064 | /*--------------------------------------------------------------------------*/ 1065 | 1066 | /** 1067 | * Creates a style sheet with the given CSS text and adds it to the document. 1068 | * @private 1069 | * @param {Document} ownerDocument The document. 1070 | * @param {String} cssText The CSS text. 1071 | * @returns {StyleSheet} The style element. 1072 | */ 1073 | function addStyleSheet(ownerDocument, cssText) { 1074 | var p = ownerDocument.createElement('p'), 1075 | parent = ownerDocument.getElementsByTagName('head')[0] || ownerDocument.documentElement; 1076 | 1077 | p.innerHTML = 'x'; 1078 | return parent.insertBefore(p.lastChild, parent.firstChild); 1079 | } 1080 | 1081 | /** 1082 | * Returns the value of `html5.elements` as an array. 1083 | * @private 1084 | * @returns {Array} An array of shived element node names. 1085 | */ 1086 | function getElements() { 1087 | var elements = html5.elements; 1088 | return typeof elements == 'string' ? elements.split(' ') : elements; 1089 | } 1090 | 1091 | /** 1092 | * Returns the data associated to the given document 1093 | * @private 1094 | * @param {Document} ownerDocument The document. 1095 | * @returns {Object} An object of data. 1096 | */ 1097 | function getExpandoData(ownerDocument) { 1098 | var data = expandoData[ownerDocument[expando]]; 1099 | if (!data) { 1100 | data = {}; 1101 | expanID++; 1102 | ownerDocument[expando] = expanID; 1103 | expandoData[expanID] = data; 1104 | } 1105 | return data; 1106 | } 1107 | 1108 | /** 1109 | * returns a shived element for the given nodeName and document 1110 | * @memberOf html5 1111 | * @param {String} nodeName name of the element 1112 | * @param {Document} ownerDocument The context document. 1113 | * @returns {Object} The shived element. 1114 | */ 1115 | function createElement(nodeName, ownerDocument, data){ 1116 | if (!ownerDocument) { 1117 | ownerDocument = document; 1118 | } 1119 | if(supportsUnknownElements){ 1120 | return ownerDocument.createElement(nodeName); 1121 | } 1122 | if (!data) { 1123 | data = getExpandoData(ownerDocument); 1124 | } 1125 | var node; 1126 | 1127 | if (data.cache[nodeName]) { 1128 | node = data.cache[nodeName].cloneNode(); 1129 | } else if (saveClones.test(nodeName)) { 1130 | node = (data.cache[nodeName] = data.createElem(nodeName)).cloneNode(); 1131 | } else { 1132 | node = data.createElem(nodeName); 1133 | } 1134 | 1135 | // Avoid adding some elements to fragments in IE < 9 because 1136 | // * Attributes like `name` or `type` cannot be set/changed once an element 1137 | // is inserted into a document/fragment 1138 | // * Link elements with `src` attributes that are inaccessible, as with 1139 | // a 403 response, will cause the tab/window to crash 1140 | // * Script elements appended to fragments will execute when their `src` 1141 | // or `text` property is set 1142 | return node.canHaveChildren && !reSkip.test(nodeName) && !node.tagUrn ? data.frag.appendChild(node) : node; 1143 | } 1144 | 1145 | /** 1146 | * returns a shived DocumentFragment for the given document 1147 | * @memberOf html5 1148 | * @param {Document} ownerDocument The context document. 1149 | * @returns {Object} The shived DocumentFragment. 1150 | */ 1151 | function createDocumentFragment(ownerDocument, data){ 1152 | if (!ownerDocument) { 1153 | ownerDocument = document; 1154 | } 1155 | if(supportsUnknownElements){ 1156 | return ownerDocument.createDocumentFragment(); 1157 | } 1158 | data = data || getExpandoData(ownerDocument); 1159 | var clone = data.frag.cloneNode(), 1160 | i = 0, 1161 | elems = getElements(), 1162 | l = elems.length; 1163 | for(;i>shiv*/ 1309 | 1310 | // Assign private properties to the return object with prefix 1311 | Modernizr._version = version; 1312 | 1313 | // expose these for the plugin API. Look in the source for how to join() them against your input 1314 | /*>>prefixes*/ 1315 | Modernizr._prefixes = prefixes; 1316 | /*>>prefixes*/ 1317 | /*>>domprefixes*/ 1318 | Modernizr._domPrefixes = domPrefixes; 1319 | Modernizr._cssomPrefixes = cssomPrefixes; 1320 | /*>>domprefixes*/ 1321 | 1322 | /*>>mq*/ 1323 | // Modernizr.mq tests a given media query, live against the current state of the window 1324 | // A few important notes: 1325 | // * If a browser does not support media queries at all (eg. oldIE) the mq() will always return false 1326 | // * A max-width or orientation query will be evaluated against the current state, which may change later. 1327 | // * You must specify values. Eg. If you are testing support for the min-width media query use: 1328 | // Modernizr.mq('(min-width:0)') 1329 | // usage: 1330 | // Modernizr.mq('only screen and (max-width:768)') 1331 | Modernizr.mq = testMediaQuery; 1332 | /*>>mq*/ 1333 | 1334 | /*>>hasevent*/ 1335 | // Modernizr.hasEvent() detects support for a given event, with an optional element to test on 1336 | // Modernizr.hasEvent('gesturestart', elem) 1337 | Modernizr.hasEvent = isEventSupported; 1338 | /*>>hasevent*/ 1339 | 1340 | /*>>testprop*/ 1341 | // Modernizr.testProp() investigates whether a given style property is recognized 1342 | // Note that the property names must be provided in the camelCase variant. 1343 | // Modernizr.testProp('pointerEvents') 1344 | Modernizr.testProp = function(prop){ 1345 | return testProps([prop]); 1346 | }; 1347 | /*>>testprop*/ 1348 | 1349 | /*>>testallprops*/ 1350 | // Modernizr.testAllProps() investigates whether a given style property, 1351 | // or any of its vendor-prefixed variants, is recognized 1352 | // Note that the property names must be provided in the camelCase variant. 1353 | // Modernizr.testAllProps('boxSizing') 1354 | Modernizr.testAllProps = testPropsAll; 1355 | /*>>testallprops*/ 1356 | 1357 | 1358 | /*>>teststyles*/ 1359 | // Modernizr.testStyles() allows you to add custom styles to the document and test an element afterwards 1360 | // Modernizr.testStyles('#modernizr { position:absolute }', function(elem, rule){ ... }) 1361 | Modernizr.testStyles = injectElementWithStyles; 1362 | /*>>teststyles*/ 1363 | 1364 | 1365 | /*>>prefixed*/ 1366 | // Modernizr.prefixed() returns the prefixed or nonprefixed property name variant of your input 1367 | // Modernizr.prefixed('boxSizing') // 'MozBoxSizing' 1368 | 1369 | // Properties must be passed as dom-style camelcase, rather than `box-sizing` hypentated style. 1370 | // Return values will also be the camelCase variant, if you need to translate that to hypenated style use: 1371 | // 1372 | // str.replace(/([A-Z])/g, function(str,m1){ return '-' + m1.toLowerCase(); }).replace(/^ms-/,'-ms-'); 1373 | 1374 | // If you're trying to ascertain which transition end event to bind to, you might do something like... 1375 | // 1376 | // var transEndEventNames = { 1377 | // 'WebkitTransition' : 'webkitTransitionEnd', 1378 | // 'MozTransition' : 'transitionend', 1379 | // 'OTransition' : 'oTransitionEnd', 1380 | // 'msTransition' : 'MSTransitionEnd', 1381 | // 'transition' : 'transitionend' 1382 | // }, 1383 | // transEndEventName = transEndEventNames[ Modernizr.prefixed('transition') ]; 1384 | 1385 | Modernizr.prefixed = function(prop, obj, elem){ 1386 | if(!obj) { 1387 | return testPropsAll(prop, 'pfx'); 1388 | } else { 1389 | // Testing DOM property e.g. Modernizr.prefixed('requestAnimationFrame', window) // 'mozRequestAnimationFrame' 1390 | return testPropsAll(prop, obj, elem); 1391 | } 1392 | }; 1393 | /*>>prefixed*/ 1394 | 1395 | 1396 | /*>>cssclasses*/ 1397 | // Remove "no-js" class from element, if it exists: 1398 | docElement.className = docElement.className.replace(/(^|\s)no-js(\s|$)/, '$1$2') + 1399 | 1400 | // Add the new classes to the element. 1401 | (enableClasses ? ' js ' + classes.join(' ') : ''); 1402 | /*>>cssclasses*/ 1403 | 1404 | return Modernizr; 1405 | 1406 | })(this, this.document); 1407 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/module.js: -------------------------------------------------------------------------------- 1 | var universe = require('./universe'); 2 | universe.start(); 3 | console.log('The answer is' + universe.answer); 4 | phantom.exit(); 5 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/movies.js: -------------------------------------------------------------------------------- 1 | // List movies from kids-in-mind.com 2 | 3 | var cbfunc = function (data) { 4 | globaldata= data; 5 | var list = data.query.results.movie; 6 | list.forEach(function (item) { 7 | console.log(item.title + ' [' + item.rating.MPAA.content + ']'); 8 | }); 9 | phantom.exit(); 10 | }; 11 | 12 | var el = document.createElement('script'); 13 | el.src = 'http://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20movies.kids-in-mind&format=json&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys&callback=cbfunc'; 14 | document.body.appendChild(el); 15 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/netlog.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(), 2 | system = require('system'), 3 | address; 4 | 5 | if (system.args.length === 1) { 6 | console.log('Usage: netlog.js '); 7 | phantom.exit(1); 8 | } else { 9 | address = system.args[1]; 10 | 11 | page.onResourceRequested = function (req) { 12 | console.log('requested: ' + JSON.stringify(req, undefined, 4)); 13 | }; 14 | 15 | page.onResourceReceived = function (res) { 16 | console.log('received: ' + JSON.stringify(res, undefined, 4)); 17 | }; 18 | 19 | page.open(address, function (status) { 20 | if (status !== 'success') { 21 | console.log('FAIL to load the address'); 22 | } 23 | phantom.exit(); 24 | }); 25 | } 26 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/netsniff.js: -------------------------------------------------------------------------------- 1 | if (!Date.prototype.toISOString) { 2 | Date.prototype.toISOString = function () { 3 | function pad(n) { return n < 10 ? '0' + n : n; } 4 | function ms(n) { return n < 10 ? '00'+ n : n < 100 ? '0' + n : n } 5 | return this.getFullYear() + '-' + 6 | pad(this.getMonth() + 1) + '-' + 7 | pad(this.getDate()) + 'T' + 8 | pad(this.getHours()) + ':' + 9 | pad(this.getMinutes()) + ':' + 10 | pad(this.getSeconds()) + '.' + 11 | ms(this.getMilliseconds()) + 'Z'; 12 | } 13 | } 14 | 15 | function createHAR(address, title, startTime, resources) 16 | { 17 | var entries = []; 18 | 19 | resources.forEach(function (resource) { 20 | var request = resource.request, 21 | startReply = resource.startReply, 22 | endReply = resource.endReply; 23 | 24 | if (!request || !startReply || !endReply) { 25 | return; 26 | } 27 | 28 | // Exclude Data URI from HAR file because 29 | // they aren't included in specification 30 | if (request.url.match(/(^data:image\/.*)/i)) { 31 | return; 32 | } 33 | 34 | entries.push({ 35 | startedDateTime: request.time.toISOString(), 36 | time: endReply.time - request.time, 37 | request: { 38 | method: request.method, 39 | url: request.url, 40 | httpVersion: "HTTP/1.1", 41 | cookies: [], 42 | headers: request.headers, 43 | queryString: [], 44 | headersSize: -1, 45 | bodySize: -1 46 | }, 47 | response: { 48 | status: endReply.status, 49 | statusText: endReply.statusText, 50 | httpVersion: "HTTP/1.1", 51 | cookies: [], 52 | headers: endReply.headers, 53 | redirectURL: "", 54 | headersSize: -1, 55 | bodySize: startReply.bodySize, 56 | content: { 57 | size: startReply.bodySize, 58 | mimeType: endReply.contentType 59 | } 60 | }, 61 | cache: {}, 62 | timings: { 63 | blocked: 0, 64 | dns: -1, 65 | connect: -1, 66 | send: 0, 67 | wait: startReply.time - request.time, 68 | receive: endReply.time - startReply.time, 69 | ssl: -1 70 | }, 71 | pageref: address 72 | }); 73 | }); 74 | 75 | return { 76 | log: { 77 | version: '1.2', 78 | creator: { 79 | name: "PhantomJS", 80 | version: phantom.version.major + '.' + phantom.version.minor + 81 | '.' + phantom.version.patch 82 | }, 83 | pages: [{ 84 | startedDateTime: startTime.toISOString(), 85 | id: address, 86 | title: title, 87 | pageTimings: { 88 | onLoad: page.endTime - page.startTime 89 | } 90 | }], 91 | entries: entries 92 | } 93 | }; 94 | } 95 | 96 | var page = require('webpage').create(), 97 | system = require('system'); 98 | 99 | if (system.args.length === 1) { 100 | console.log('Usage: netsniff.js '); 101 | phantom.exit(1); 102 | } else { 103 | 104 | page.address = system.args[1]; 105 | page.resources = []; 106 | 107 | page.onLoadStarted = function () { 108 | page.startTime = new Date(); 109 | }; 110 | 111 | page.onResourceRequested = function (req) { 112 | page.resources[req.id] = { 113 | request: req, 114 | startReply: null, 115 | endReply: null 116 | }; 117 | }; 118 | 119 | page.onResourceReceived = function (res) { 120 | if (res.stage === 'start') { 121 | page.resources[res.id].startReply = res; 122 | } 123 | if (res.stage === 'end') { 124 | page.resources[res.id].endReply = res; 125 | } 126 | }; 127 | 128 | page.open(page.address, function (status) { 129 | var har; 130 | if (status !== 'success') { 131 | console.log('FAIL to load the address'); 132 | phantom.exit(1); 133 | } else { 134 | page.endTime = new Date(); 135 | page.title = page.evaluate(function () { 136 | return document.title; 137 | }); 138 | har = createHAR(page.address, page.title, page.startTime, page.resources); 139 | console.log(JSON.stringify(har, undefined, 4)); 140 | phantom.exit(); 141 | } 142 | }); 143 | } 144 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/openurlwithproxy.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(), 2 | system = require('system'), 3 | host, port, address; 4 | 5 | if (system.args.length < 4) { 6 | console.log('Usage: openurlwithproxy.js '); 7 | phantom.exit(1); 8 | } else { 9 | host = system.args[1]; 10 | port = system.args[2]; 11 | address = system.args[3]; 12 | phantom.setProxy(host, port, 'manual', '', ''); 13 | page.open(address, function (status) { 14 | if (status !== 'success') { 15 | console.log('FAIL to load the address "' + 16 | address + '" using proxy "' + host + ':' + port + '"'); 17 | } else { 18 | console.log('Page title is ' + page.evaluate(function () { 19 | return document.title; 20 | })); 21 | } 22 | phantom.exit(); 23 | }); 24 | } 25 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/outputEncoding.js: -------------------------------------------------------------------------------- 1 | function helloWorld() { 2 | console.log(phantom.outputEncoding + ": こんにちは、世界!"); 3 | } 4 | 5 | console.log("Using default encoding..."); 6 | helloWorld(); 7 | 8 | console.log("\nUsing other encodings..."); 9 | 10 | var encodings = ["euc-jp", "sjis", "utf8", "System"]; 11 | for (var i = 0; i < encodings.length; i++) { 12 | phantom.outputEncoding = encodings[i]; 13 | helloWorld(); 14 | } 15 | 16 | phantom.exit() 17 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/page_events.js: -------------------------------------------------------------------------------- 1 | // The purpose of this is to show how and when events fire, considering 5 steps 2 | // happening as follows: 3 | // 4 | // 1. Load URL 5 | // 2. Load same URL, but adding an internal FRAGMENT to it 6 | // 3. Click on an internal Link, that points to another internal FRAGMENT 7 | // 4. Click on an external Link, that will send the page somewhere else 8 | // 5. Close page 9 | // 10 | // Take particular care when going through the output, to understand when 11 | // things happen (and in which order). Particularly, notice what DOESN'T 12 | // happen during step 3. 13 | // 14 | // If invoked with "-v" it will print out the Page Resources as they are 15 | // Requested and Received. 16 | // 17 | // NOTE.1: The "onConsoleMessage/onAlert/onPrompt/onConfirm" events are 18 | // registered but not used here. This is left for you to have fun with. 19 | // NOTE.2: This script is not here to teach you ANY JavaScript. It's aweful! 20 | // NOTE.3: Main audience for this are people new to PhantomJS. 21 | 22 | var sys = require("system"), 23 | page = require("webpage").create(), 24 | logResources = false, 25 | step1url = "http://en.wikipedia.org/wiki/DOM_events", 26 | step2url = "http://en.wikipedia.org/wiki/DOM_events#Event_flow"; 27 | 28 | if (sys.args.length > 1 && sys.args[1] === "-v") { 29 | logResources = true; 30 | } 31 | 32 | function printArgs() { 33 | var i, ilen; 34 | for (i = 0, ilen = arguments.length; i < ilen; ++i) { 35 | console.log(" arguments[" + i + "] = " + JSON.stringify(arguments[i])); 36 | } 37 | console.log(""); 38 | } 39 | 40 | //////////////////////////////////////////////////////////////////////////////// 41 | 42 | page.onInitialized = function() { 43 | console.log("page.onInitialized"); 44 | printArgs.apply(this, arguments); 45 | }; 46 | page.onLoadStarted = function() { 47 | console.log("page.onLoadStarted"); 48 | printArgs.apply(this, arguments); 49 | }; 50 | page.onLoadFinished = function() { 51 | console.log("page.onLoadFinished"); 52 | printArgs.apply(this, arguments); 53 | }; 54 | page.onUrlChanged = function() { 55 | console.log("page.onUrlChanged"); 56 | printArgs.apply(this, arguments); 57 | }; 58 | page.onNavigationRequested = function() { 59 | console.log("page.onNavigationRequested"); 60 | printArgs.apply(this, arguments); 61 | }; 62 | page.onRepaintRequested = function() { 63 | console.log("page.onRepaintRequested"); 64 | printArgs.apply(this, arguments); 65 | }; 66 | 67 | if (logResources === true) { 68 | page.onResourceRequested = function() { 69 | console.log("page.onResourceRequested"); 70 | printArgs.apply(this, arguments); 71 | }; 72 | page.onResourceReceived = function() { 73 | console.log("page.onResourceReceived"); 74 | printArgs.apply(this, arguments); 75 | }; 76 | } 77 | 78 | page.onClosing = function() { 79 | console.log("page.onClosing"); 80 | printArgs.apply(this, arguments); 81 | }; 82 | 83 | // window.console.log(msg); 84 | page.onConsoleMessage = function() { 85 | console.log("page.onConsoleMessage"); 86 | printArgs.apply(this, arguments); 87 | }; 88 | 89 | // window.alert(msg); 90 | page.onAlert = function() { 91 | console.log("page.onAlert"); 92 | printArgs.apply(this, arguments); 93 | }; 94 | // var confirmed = window.confirm(msg); 95 | page.onConfirm = function() { 96 | console.log("page.onConfirm"); 97 | printArgs.apply(this, arguments); 98 | }; 99 | // var user_value = window.prompt(msg, default_value); 100 | page.onPrompt = function() { 101 | console.log("page.onPrompt"); 102 | printArgs.apply(this, arguments); 103 | }; 104 | 105 | //////////////////////////////////////////////////////////////////////////////// 106 | 107 | setTimeout(function() { 108 | console.log(""); 109 | console.log("### STEP 1: Load '" + step1url + "'"); 110 | page.open(step1url); 111 | }, 0); 112 | 113 | setTimeout(function() { 114 | console.log(""); 115 | console.log("### STEP 2: Load '" + step2url + "' (load same URL plus FRAGMENT)"); 116 | page.open(step2url); 117 | }, 5000); 118 | 119 | setTimeout(function() { 120 | console.log(""); 121 | console.log("### STEP 3: Click on page internal link (aka FRAGMENT)"); 122 | page.evaluate(function() { 123 | var ev = document.createEvent("MouseEvents"); 124 | ev.initEvent("click", true, true); 125 | document.querySelector("a[href='#Event_object']").dispatchEvent(ev); 126 | }); 127 | }, 10000); 128 | 129 | setTimeout(function() { 130 | console.log(""); 131 | console.log("### STEP 4: Click on page external link"); 132 | page.evaluate(function() { 133 | var ev = document.createEvent("MouseEvents"); 134 | ev.initEvent("click", true, true); 135 | document.querySelector("a[title='JavaScript']").dispatchEvent(ev); 136 | }); 137 | }, 15000); 138 | 139 | setTimeout(function() { 140 | console.log(""); 141 | console.log("### STEP 5: Close page and shutdown (with a delay)"); 142 | page.close(); 143 | setTimeout(function(){ 144 | phantom.exit(); 145 | }, 100); 146 | }, 20000); 147 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/pagecallback.js: -------------------------------------------------------------------------------- 1 | var p = require("webpage").create(); 2 | 3 | p.onConsoleMessage = function(msg) { console.log(msg); }; 4 | 5 | // Calls to "callPhantom" within the page 'p' arrive here 6 | p.onCallback = function(msg) { 7 | console.log("Received by the 'phantom' main context: "+msg); 8 | return "Hello there, I'm coming to you from the 'phantom' context instead"; 9 | }; 10 | 11 | p.evaluate(function() { 12 | // Return-value of the "onCallback" handler arrive here 13 | var callbackResponse = window.callPhantom("Hello, I'm coming to you from the 'page' context"); 14 | console.log("Received by the 'page' context: "+callbackResponse); 15 | }); 16 | 17 | phantom.exit(); 18 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/phantomwebintro.js: -------------------------------------------------------------------------------- 1 | // Read the Phantom webpage '#intro' element text using jQuery and "includeJs" 2 | 3 | var page = require('webpage').create(); 4 | 5 | page.onConsoleMessage = function(msg) { 6 | console.log(msg); 7 | }; 8 | 9 | page.open("http://www.phantomjs.org", function(status) { 10 | if ( status === "success" ) { 11 | page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() { 12 | page.evaluate(function() { 13 | console.log("$(\".explanation\").text() -> " + $(".explanation").text()); 14 | }); 15 | phantom.exit(); 16 | }); 17 | } 18 | }); 19 | 20 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/pizza.js: -------------------------------------------------------------------------------- 1 | // Find pizza in Mountain View using Yelp 2 | 3 | var page = require('webpage').create(), 4 | url = 'http://lite.yelp.com/search?find_desc=pizza&find_loc=94040&find_submit=Search'; 5 | 6 | page.open(url, function (status) { 7 | if (status !== 'success') { 8 | console.log('Unable to access network'); 9 | } else { 10 | var results = page.evaluate(function() { 11 | var list = document.querySelectorAll('address'), pizza = [], i; 12 | for (i = 0; i < list.length; i++) { 13 | pizza.push(list[i].innerText); 14 | } 15 | return pizza; 16 | }); 17 | console.log(results.join('\n')); 18 | } 19 | phantom.exit(); 20 | }); 21 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/post.js: -------------------------------------------------------------------------------- 1 | // Example using HTTP POST operation 2 | 3 | var page = require('webpage').create(), 4 | server = 'http://posttestserver.com/post.php?dump', 5 | data = 'universe=expanding&answer=42'; 6 | 7 | page.open(server, 'post', data, function (status) { 8 | if (status !== 'success') { 9 | console.log('Unable to post!'); 10 | } else { 11 | console.log(page.content); 12 | } 13 | phantom.exit(); 14 | }); 15 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/postjson.js: -------------------------------------------------------------------------------- 1 | // Example using HTTP POST operation 2 | 3 | var page = require('webpage').create(), 4 | server = 'http://posttestserver.com/post.php?dump', 5 | data = '{"universe": "expanding", "answer": 42}'; 6 | 7 | var headers = { 8 | "Content-Type": "application/json" 9 | } 10 | 11 | page.open(server, 'post', data, headers, function (status) { 12 | if (status !== 'success') { 13 | console.log('Unable to post!'); 14 | } else { 15 | console.log(page.content); 16 | } 17 | phantom.exit(); 18 | }); 19 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/postserver.js: -------------------------------------------------------------------------------- 1 | // Example using HTTP POST operation 2 | 3 | var page = require('webpage').create(), 4 | server = require('webserver').create(), 5 | system = require('system'), 6 | data = 'universe=expanding&answer=42'; 7 | 8 | if (system.args.length !== 2) { 9 | console.log('Usage: postserver.js '); 10 | phantom.exit(1); 11 | } 12 | 13 | var port = system.args[1]; 14 | 15 | service = server.listen(port, function (request, response) { 16 | console.log('Request received at ' + new Date()); 17 | 18 | response.statusCode = 200; 19 | response.headers = { 20 | 'Cache': 'no-cache', 21 | 'Content-Type': 'text/plain;charset=utf-8' 22 | }; 23 | response.write(JSON.stringify(request, null, 4)); 24 | response.close(); 25 | }); 26 | 27 | page.open('http://localhost:' + port + '/', 'post', data, function (status) { 28 | if (status !== 'success') { 29 | console.log('Unable to post!'); 30 | } else { 31 | console.log(page.plainText); 32 | } 33 | phantom.exit(); 34 | }); 35 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/printenv.js: -------------------------------------------------------------------------------- 1 | var system = require('system'), 2 | env = system.env, 3 | key; 4 | 5 | for (key in env) { 6 | if (env.hasOwnProperty(key)) { 7 | console.log(key + '=' + env[key]); 8 | } 9 | } 10 | phantom.exit(); 11 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/printheaderfooter.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(), 2 | system = require('system'); 3 | 4 | function someCallback(pageNum, numPages) { 5 | return "

someCallback: " + pageNum + " / " + numPages + "

"; 6 | } 7 | 8 | if (system.args.length < 3) { 9 | console.log('Usage: printheaderfooter.js URL filename'); 10 | phantom.exit(1); 11 | } else { 12 | var address = system.args[1]; 13 | var output = system.args[2]; 14 | page.viewportSize = { width: 600, height: 600 }; 15 | page.paperSize = { 16 | format: 'A4', 17 | margin: "1cm", 18 | /* default header/footer for pages that don't have custom overwrites (see below) */ 19 | header: { 20 | height: "1cm", 21 | contents: phantom.callback(function(pageNum, numPages) { 22 | if (pageNum == 1) { 23 | return ""; 24 | } 25 | return "

Header " + pageNum + " / " + numPages + "

"; 26 | }) 27 | }, 28 | footer: { 29 | height: "1cm", 30 | contents: phantom.callback(function(pageNum, numPages) { 31 | if (pageNum == numPages) { 32 | return ""; 33 | } 34 | return "

Footer " + pageNum + " / " + numPages + "

"; 35 | }) 36 | } 37 | }; 38 | page.open(address, function (status) { 39 | if (status !== 'success') { 40 | console.log('Unable to load the address!'); 41 | } else { 42 | /* check whether the loaded page overwrites the header/footer setting, 43 | i.e. whether a PhantomJSPriting object exists. Use that then instead 44 | of our defaults above. 45 | 46 | example: 47 | 48 | 49 | 61 | 62 |

asdfadsf

asdfadsfycvx

63 | 64 | */ 65 | if (page.evaluate(function(){return typeof PhantomJSPrinting == "object";})) { 66 | paperSize = page.paperSize; 67 | paperSize.header.height = page.evaluate(function() { 68 | return PhantomJSPrinting.header.height; 69 | }); 70 | paperSize.header.contents = phantom.callback(function(pageNum, numPages) { 71 | return page.evaluate(function(pageNum, numPages){return PhantomJSPrinting.header.contents(pageNum, numPages);}, pageNum, numPages); 72 | }); 73 | paperSize.footer.height = page.evaluate(function() { 74 | return PhantomJSPrinting.footer.height; 75 | }); 76 | paperSize.footer.contents = phantom.callback(function(pageNum, numPages) { 77 | return page.evaluate(function(pageNum, numPages){return PhantomJSPrinting.footer.contents(pageNum, numPages);}, pageNum, numPages); 78 | }); 79 | page.paperSize = paperSize; 80 | console.log(page.paperSize.header.height); 81 | console.log(page.paperSize.footer.height); 82 | } 83 | window.setTimeout(function () { 84 | page.render(output); 85 | phantom.exit(); 86 | }, 200); 87 | } 88 | }); 89 | } 90 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/printmargins.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(), 2 | system = require('system'); 3 | 4 | if (system.args.length < 7) { 5 | console.log('Usage: printmargins.js URL filename LEFT TOP RIGHT BOTTOM'); 6 | console.log(' margin examples: "1cm", "10px", "7mm", "5in"'); 7 | phantom.exit(1); 8 | } else { 9 | var address = system.args[1]; 10 | var output = system.args[2]; 11 | var marginLeft = system.args[3]; 12 | var marginTop = system.args[4]; 13 | var marginRight = system.args[5]; 14 | var marginBottom = system.args[6]; 15 | page.viewportSize = { width: 600, height: 600 }; 16 | page.paperSize = { 17 | format: 'A4', 18 | margin: { 19 | left: marginLeft, 20 | top: marginTop, 21 | right: marginRight, 22 | bottom: marginBottom 23 | } 24 | }; 25 | page.open(address, function (status) { 26 | if (status !== 'success') { 27 | console.log('Unable to load the address!'); 28 | } else { 29 | window.setTimeout(function () { 30 | page.render(output); 31 | phantom.exit(); 32 | }, 200); 33 | } 34 | }); 35 | } 36 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/rasterize.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(), 2 | system = require('system'), 3 | address, output, size; 4 | 5 | if (system.args.length < 3 || system.args.length > 5) { 6 | console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]'); 7 | console.log(' paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"'); 8 | console.log(' image (png/jpg output) examples: "1920px" entire page, window width 1920px'); 9 | console.log(' "800px*600px" window, clipped to 800x600'); 10 | phantom.exit(1); 11 | } else { 12 | address = system.args[1]; 13 | output = system.args[2]; 14 | page.viewportSize = { width: 600, height: 600 }; 15 | if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") { 16 | size = system.args[3].split('*'); 17 | page.paperSize = size.length === 2 ? { width: size[0], height: size[1], margin: '0px' } 18 | : { format: system.args[3], orientation: 'portrait', margin: '1cm' }; 19 | } else if (system.args.length > 3 && system.args[3].substr(-2) === "px") { 20 | size = system.args[3].split('*'); 21 | if (size.length === 2) { 22 | pageWidth = parseInt(size[0], 10); 23 | pageHeight = parseInt(size[1], 10); 24 | page.viewportSize = { width: pageWidth, height: pageHeight }; 25 | page.clipRect = { top: 0, left: 0, width: pageWidth, height: pageHeight }; 26 | } else { 27 | console.log("size:", system.args[3]); 28 | pageWidth = parseInt(system.args[3], 10); 29 | pageHeight = parseInt(pageWidth * 3/4, 10); // it's as good an assumption as any 30 | console.log ("pageHeight:",pageHeight); 31 | page.viewportSize = { width: pageWidth, height: pageHeight }; 32 | } 33 | } 34 | if (system.args.length > 4) { 35 | page.zoomFactor = system.args[4]; 36 | } 37 | page.open(address, function (status) { 38 | if (status !== 'success') { 39 | console.log('Unable to load the address!'); 40 | phantom.exit(1); 41 | } else { 42 | window.setTimeout(function () { 43 | page.render(output); 44 | phantom.exit(); 45 | }, 200); 46 | } 47 | }); 48 | } 49 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/render_multi_url.js: -------------------------------------------------------------------------------- 1 | // Render Multiple URLs to file 2 | 3 | var RenderUrlsToFile, arrayOfUrls, system; 4 | 5 | system = require("system"); 6 | 7 | /* 8 | Render given urls 9 | @param array of URLs to render 10 | @param callbackPerUrl Function called after finishing each URL, including the last URL 11 | @param callbackFinal Function called after finishing everything 12 | */ 13 | RenderUrlsToFile = function(urls, callbackPerUrl, callbackFinal) { 14 | var getFilename, next, page, retrieve, urlIndex, webpage; 15 | urlIndex = 0; 16 | webpage = require("webpage"); 17 | page = null; 18 | getFilename = function() { 19 | return "rendermulti-" + urlIndex + ".png"; 20 | }; 21 | next = function(status, url, file) { 22 | page.close(); 23 | callbackPerUrl(status, url, file); 24 | return retrieve(); 25 | }; 26 | retrieve = function() { 27 | var url; 28 | if (urls.length > 0) { 29 | url = urls.shift(); 30 | urlIndex++; 31 | page = webpage.create(); 32 | page.viewportSize = { 33 | width: 800, 34 | height: 600 35 | }; 36 | page.settings.userAgent = "Phantom.js bot"; 37 | return page.open("http://" + url, function(status) { 38 | var file; 39 | file = getFilename(); 40 | if (status === "success") { 41 | return window.setTimeout((function() { 42 | page.render(file); 43 | return next(status, url, file); 44 | }), 200); 45 | } else { 46 | return next(status, url, file); 47 | } 48 | }); 49 | } else { 50 | return callbackFinal(); 51 | } 52 | }; 53 | return retrieve(); 54 | }; 55 | 56 | arrayOfUrls = null; 57 | 58 | if (system.args.length > 1) { 59 | arrayOfUrls = Array.prototype.slice.call(system.args, 1); 60 | } else { 61 | console.log("Usage: phantomjs render_multi_url.js [domain.name1, domain.name2, ...]"); 62 | arrayOfUrls = ["www.google.com", "www.bbc.co.uk", "www.phantomjs.org"]; 63 | } 64 | 65 | RenderUrlsToFile(arrayOfUrls, (function(status, url, file) { 66 | if (status !== "success") { 67 | return console.log("Unable to render '" + url + "'"); 68 | } else { 69 | return console.log("Rendered '" + url + "' at '" + file + "'"); 70 | } 71 | }), function() { 72 | return phantom.exit(); 73 | }); 74 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/run-jasmine.js: -------------------------------------------------------------------------------- 1 | var system = require('system'); 2 | 3 | /** 4 | * Wait until the test condition is true or a timeout occurs. Useful for waiting 5 | * on a server response or for a ui change (fadeIn, etc.) to occur. 6 | * 7 | * @param testFx javascript condition that evaluates to a boolean, 8 | * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or 9 | * as a callback function. 10 | * @param onReady what to do when testFx condition is fulfilled, 11 | * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or 12 | * as a callback function. 13 | * @param timeOutMillis the max amount of time to wait. If not specified, 3 sec is used. 14 | */ 15 | function waitFor(testFx, onReady, timeOutMillis) { 16 | var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3001, //< Default Max Timeout is 3s 17 | start = new Date().getTime(), 18 | condition = false, 19 | interval = setInterval(function() { 20 | if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) { 21 | // If not time-out yet and condition not yet fulfilled 22 | condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code 23 | } else { 24 | if(!condition) { 25 | // If condition still not fulfilled (timeout but condition is 'false') 26 | console.log("'waitFor()' timeout"); 27 | phantom.exit(1); 28 | } else { 29 | // Condition fulfilled (timeout and/or condition is 'true') 30 | console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms."); 31 | typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled 32 | clearInterval(interval); //< Stop this interval 33 | } 34 | } 35 | }, 100); //< repeat check every 100ms 36 | }; 37 | 38 | 39 | if (system.args.length !== 2) { 40 | console.log('Usage: run-jasmine.js URL'); 41 | phantom.exit(1); 42 | } 43 | 44 | var page = require('webpage').create(); 45 | 46 | // Route "console.log()" calls from within the Page context to the main Phantom context (i.e. current "this") 47 | page.onConsoleMessage = function(msg) { 48 | console.log(msg); 49 | }; 50 | 51 | page.open(system.args[1], function(status){ 52 | if (status !== "success") { 53 | console.log("Unable to open " + system.args[1]); 54 | phantom.exit(1); 55 | } else { 56 | waitFor(function(){ 57 | return page.evaluate(function(){ 58 | return document.body.querySelector('.symbolSummary .pending') === null 59 | }); 60 | }, function(){ 61 | var exitCode = page.evaluate(function(){ 62 | try { 63 | console.log(''); 64 | console.log(document.body.querySelector('.description').innerText); 65 | var list = document.body.querySelectorAll('.results > #details > .specDetail.failed'); 66 | if (list && list.length > 0) { 67 | console.log(''); 68 | console.log(list.length + ' test(s) FAILED:'); 69 | for (i = 0; i < list.length; ++i) { 70 | var el = list[i], 71 | desc = el.querySelector('.description'), 72 | msg = el.querySelector('.resultMessage.fail'); 73 | console.log(''); 74 | console.log(desc.innerText); 75 | console.log(msg.innerText); 76 | console.log(''); 77 | } 78 | return 1; 79 | } else { 80 | console.log(document.body.querySelector('.alert > .passingAlert.bar').innerText); 81 | return 0; 82 | } 83 | } catch (ex) { 84 | console.log(ex); 85 | return 1; 86 | } 87 | }); 88 | phantom.exit(exitCode); 89 | }); 90 | } 91 | }); 92 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/run-jasmine2.js: -------------------------------------------------------------------------------- 1 | var system = require('system'); 2 | 3 | /** 4 | * Wait until the test condition is true or a timeout occurs. Useful for waiting 5 | * on a server response or for a ui change (fadeIn, etc.) to occur. 6 | * 7 | * @param testFx javascript condition that evaluates to a boolean, 8 | * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or 9 | * as a callback function. 10 | * @param onReady what to do when testFx condition is fulfilled, 11 | * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or 12 | * as a callback function. 13 | * @param timeOutMillis the max amount of time to wait. If not specified, 3 sec is used. 14 | */ 15 | function waitFor(testFx, onReady, timeOutMillis) { 16 | var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3001, //< Default Max Timeout is 3s 17 | start = new Date().getTime(), 18 | condition = false, 19 | interval = setInterval(function() { 20 | if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) { 21 | // If not time-out yet and condition not yet fulfilled 22 | condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code 23 | } else { 24 | if(!condition) { 25 | // If condition still not fulfilled (timeout but condition is 'false') 26 | console.log("'waitFor()' timeout"); 27 | phantom.exit(1); 28 | } else { 29 | // Condition fulfilled (timeout and/or condition is 'true') 30 | console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms."); 31 | typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled 32 | clearInterval(interval); //< Stop this interval 33 | } 34 | } 35 | }, 100); //< repeat check every 100ms 36 | }; 37 | 38 | 39 | if (system.args.length !== 2) { 40 | console.log('Usage: run-jasmine.js URL'); 41 | phantom.exit(1); 42 | } 43 | 44 | var page = require('webpage').create(); 45 | 46 | // Route "console.log()" calls from within the Page context to the main Phantom context (i.e. current "this") 47 | page.onConsoleMessage = function(msg) { 48 | console.log(msg); 49 | }; 50 | 51 | page.open(system.args[1], function(status){ 52 | if (status !== "success") { 53 | console.log("Unable to access network"); 54 | phantom.exit(); 55 | } else { 56 | waitFor(function(){ 57 | return page.evaluate(function(){ 58 | return document.body.querySelector('.symbolSummary .pending') === null 59 | }); 60 | }, function(){ 61 | var exitCode = page.evaluate(function(){ 62 | console.log(''); 63 | 64 | var el = document.body.querySelector('.banner'); 65 | var banner = el.querySelector('.title').innerText + " " + 66 | el.querySelector('.version').innerText + " " + 67 | el.querySelector('.duration').innerText; 68 | console.log(banner); 69 | 70 | var list = document.body.querySelectorAll('.results > .failures > .spec-detail.failed'); 71 | if (list && list.length > 0) { 72 | console.log(''); 73 | console.log(list.length + ' test(s) FAILED:'); 74 | for (i = 0; i < list.length; ++i) { 75 | var el = list[i], 76 | desc = el.querySelector('.description'), 77 | msg = el.querySelector('.messages > .result-message'); 78 | console.log(''); 79 | console.log(desc.innerText); 80 | console.log(msg.innerText); 81 | console.log(''); 82 | } 83 | return 1; 84 | } else { 85 | console.log(document.body.querySelector('.alert > .bar.passed').innerText); 86 | return 0; 87 | } 88 | }); 89 | phantom.exit(exitCode); 90 | }); 91 | } 92 | }); 93 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/run-qunit.js: -------------------------------------------------------------------------------- 1 | var system = require('system'); 2 | 3 | /** 4 | * Wait until the test condition is true or a timeout occurs. Useful for waiting 5 | * on a server response or for a ui change (fadeIn, etc.) to occur. 6 | * 7 | * @param testFx javascript condition that evaluates to a boolean, 8 | * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or 9 | * as a callback function. 10 | * @param onReady what to do when testFx condition is fulfilled, 11 | * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or 12 | * as a callback function. 13 | * @param timeOutMillis the max amount of time to wait. If not specified, 3 sec is used. 14 | */ 15 | function waitFor(testFx, onReady, timeOutMillis) { 16 | var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3001, //< Default Max Timout is 3s 17 | start = new Date().getTime(), 18 | condition = false, 19 | interval = setInterval(function() { 20 | if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) { 21 | // If not time-out yet and condition not yet fulfilled 22 | condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code 23 | } else { 24 | if(!condition) { 25 | // If condition still not fulfilled (timeout but condition is 'false') 26 | console.log("'waitFor()' timeout"); 27 | phantom.exit(1); 28 | } else { 29 | // Condition fulfilled (timeout and/or condition is 'true') 30 | console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms."); 31 | typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled 32 | clearInterval(interval); //< Stop this interval 33 | } 34 | } 35 | }, 100); //< repeat check every 250ms 36 | }; 37 | 38 | 39 | if (system.args.length !== 2) { 40 | console.log('Usage: run-qunit.js URL'); 41 | phantom.exit(1); 42 | } 43 | 44 | var page = require('webpage').create(); 45 | 46 | // Route "console.log()" calls from within the Page context to the main Phantom context (i.e. current "this") 47 | page.onConsoleMessage = function(msg) { 48 | console.log(msg); 49 | }; 50 | 51 | page.open(system.args[1], function(status){ 52 | if (status !== "success") { 53 | console.log("Unable to access network"); 54 | phantom.exit(1); 55 | } else { 56 | waitFor(function(){ 57 | return page.evaluate(function(){ 58 | var el = document.getElementById('qunit-testresult'); 59 | if (el && el.innerText.match('completed')) { 60 | return true; 61 | } 62 | return false; 63 | }); 64 | }, function(){ 65 | var failedNum = page.evaluate(function(){ 66 | var el = document.getElementById('qunit-testresult'); 67 | console.log(el.innerText); 68 | try { 69 | return el.getElementsByClassName('failed')[0].innerHTML; 70 | } catch (e) { } 71 | return 10000; 72 | }); 73 | phantom.exit((parseInt(failedNum, 10) > 0) ? 1 : 0); 74 | }); 75 | } 76 | }); 77 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/scandir.js: -------------------------------------------------------------------------------- 1 | // List all the files in a Tree of Directories 2 | var system = require('system'); 3 | 4 | if (system.args.length !== 2) { 5 | console.log("Usage: phantomjs scandir.js DIRECTORY_TO_SCAN"); 6 | phantom.exit(1); 7 | } 8 | 9 | var scanDirectory = function (path) { 10 | var fs = require('fs'); 11 | if (fs.exists(path) && fs.isFile(path)) { 12 | console.log(path); 13 | } else if (fs.isDirectory(path)) { 14 | fs.list(path).forEach(function (e) { 15 | if ( e !== "." && e !== ".." ) { //< Avoid loops 16 | scanDirectory(path + '/' + e); 17 | } 18 | }); 19 | } 20 | }; 21 | scanDirectory(system.args[1]); 22 | phantom.exit(); 23 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/seasonfood.js: -------------------------------------------------------------------------------- 1 | // Show BBC seasonal food list. 2 | 3 | var cbfunc = function (data) { 4 | var list = data.query.results.results.result, 5 | names = ['January', 'February', 'March', 6 | 'April', 'May', 'June', 7 | 'July', 'August', 'September', 8 | 'October', 'November', 'December']; 9 | list.forEach(function (item) { 10 | console.log([item.name.replace(/\s/ig, ' '), ':', 11 | names[item.atItsBestUntil], 'to', 12 | names[item.atItsBestFrom]].join(' ')); 13 | }); 14 | phantom.exit(); 15 | }; 16 | 17 | var el = document.createElement('script'); 18 | el.src = 'http://query.yahooapis.com/v1/public/yql?q=SELECT%20*%20FROM%20bbc.goodfood.seasonal%3B&format=json&diagnostics=true&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys&callback=cbfunc'; 19 | document.body.appendChild(el); 20 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/server.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(); 2 | var server = require('webserver').create(); 3 | var system = require('system'); 4 | var host, port; 5 | 6 | if (system.args.length !== 2) { 7 | console.log('Usage: server.js '); 8 | phantom.exit(1); 9 | } else { 10 | port = system.args[1]; 11 | var listening = server.listen(port, function (request, response) { 12 | console.log("GOT HTTP REQUEST"); 13 | console.log(JSON.stringify(request, null, 4)); 14 | 15 | // we set the headers here 16 | response.statusCode = 200; 17 | response.headers = {"Cache": "no-cache", "Content-Type": "text/html"}; 18 | // this is also possible: 19 | response.setHeader("foo", "bar"); 20 | // now we write the body 21 | // note: the headers above will now be sent implictly 22 | response.write("YES!"); 23 | // note: writeBody can be called multiple times 24 | response.write("

pretty cool :)"); 25 | response.close(); 26 | }); 27 | if (!listening) { 28 | console.log("could not create web server listening on port " + port); 29 | phantom.exit(); 30 | } 31 | var url = "http://localhost:" + port + "/foo/bar.php?asdf=true"; 32 | console.log("SENDING REQUEST TO:"); 33 | console.log(url); 34 | page.open(url, function (status) { 35 | if (status !== 'success') { 36 | console.log('FAIL to load the address'); 37 | } else { 38 | console.log("GOT REPLY FROM SERVER:"); 39 | console.log(page.content); 40 | } 41 | phantom.exit(); 42 | }); 43 | } 44 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/serverkeepalive.js: -------------------------------------------------------------------------------- 1 | var port, server, service, 2 | system = require('system'); 3 | 4 | if (system.args.length !== 2) { 5 | console.log('Usage: serverkeepalive.js '); 6 | phantom.exit(1); 7 | } else { 8 | port = system.args[1]; 9 | server = require('webserver').create(); 10 | 11 | service = server.listen(port, { keepAlive: true }, function (request, response) { 12 | console.log('Request at ' + new Date()); 13 | console.log(JSON.stringify(request, null, 4)); 14 | 15 | var body = JSON.stringify(request, null, 4); 16 | response.statusCode = 200; 17 | response.headers = { 18 | 'Cache': 'no-cache', 19 | 'Content-Type': 'text/plain', 20 | 'Connection': 'Keep-Alive', 21 | 'Keep-Alive': 'timeout=5, max=100', 22 | 'Content-Length': body.length 23 | }; 24 | response.write(body); 25 | response.close(); 26 | }); 27 | 28 | if (service) { 29 | console.log('Web server running on port ' + port); 30 | } else { 31 | console.log('Error: Could not create web server listening on port ' + port); 32 | phantom.exit(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/simpleserver.js: -------------------------------------------------------------------------------- 1 | var port, server, service, 2 | system = require('system'); 3 | 4 | if (system.args.length !== 2) { 5 | console.log('Usage: simpleserver.js '); 6 | phantom.exit(1); 7 | } else { 8 | port = system.args[1]; 9 | server = require('webserver').create(); 10 | 11 | service = server.listen(port, function (request, response) { 12 | 13 | console.log('Request at ' + new Date()); 14 | console.log(JSON.stringify(request, null, 4)); 15 | 16 | response.statusCode = 200; 17 | response.headers = { 18 | 'Cache': 'no-cache', 19 | 'Content-Type': 'text/html' 20 | }; 21 | response.write(''); 22 | response.write(''); 23 | response.write('Hello, world!'); 24 | response.write(''); 25 | response.write(''); 26 | response.write('

This is from PhantomJS web server.

'); 27 | response.write('

Request data:

'); 28 | response.write('
');
29 |         response.write(JSON.stringify(request, null, 4));
30 |         response.write('
'); 31 | response.write(''); 32 | response.write(''); 33 | response.close(); 34 | }); 35 | 36 | if (service) { 37 | console.log('Web server running on port ' + port); 38 | } else { 39 | console.log('Error: Could not create web server listening on port ' + port); 40 | phantom.exit(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/sleepsort.js: -------------------------------------------------------------------------------- 1 | // sleepsort.js - Sort integers from the commandline in a very ridiculous way: leveraging timeouts :P 2 | var system = require('system'); 3 | 4 | function sleepSort(array, callback) { 5 | var sortedCount = 0, 6 | i, len; 7 | for ( i = 0, len = array.length; i < len; ++i ) { 8 | setTimeout((function(j){ 9 | return function() { 10 | console.log(array[j]); 11 | ++sortedCount; 12 | (len === sortedCount) && callback(); 13 | }; 14 | }(i)), array[i]); 15 | } 16 | } 17 | 18 | if ( system.args.length < 2 ) { 19 | console.log("Usage: phantomjs sleepsort.js PUT YOUR INTEGERS HERE SEPARATED BY SPACES"); 20 | phantom.exit(1); 21 | } else { 22 | sleepSort(system.args.slice(1), function() { 23 | phantom.exit(); 24 | }); 25 | } 26 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/stdin-stdout-stderr.js: -------------------------------------------------------------------------------- 1 | var system = require('system'); 2 | 3 | system.stdout.write('Hello, system.stdout.write!'); 4 | system.stdout.writeLine('\nHello, system.stdout.writeLine!'); 5 | 6 | system.stderr.write('Hello, system.stderr.write!'); 7 | system.stderr.writeLine('\nHello, system.stderr.writeLine!'); 8 | 9 | system.stdout.writeLine('system.stdin.readLine(): '); 10 | var line = system.stdin.readLine(); 11 | system.stdout.writeLine(JSON.stringify(line)); 12 | 13 | // This is essentially a `readAll` 14 | system.stdout.writeLine('system.stdin.read(5): (ctrl+D to end)'); 15 | var input = system.stdin.read(5); 16 | system.stdout.writeLine(JSON.stringify(input)); 17 | 18 | phantom.exit(0); 19 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/technews.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(); 2 | page.viewportSize = { width: 320, height: 480 }; 3 | page.open('http://news.google.com/news/i/section?&topic=t', function (status) { 4 | if (status !== 'success') { 5 | console.log('Unable to access the network!'); 6 | } else { 7 | page.evaluate(function () { 8 | var body = document.body; 9 | body.style.backgroundColor = '#fff'; 10 | body.querySelector('div#title-block').style.display = 'none'; 11 | body.querySelector('form#edition-picker-form').parentElement.parentElement.style.display = 'none'; 12 | }); 13 | page.render('technews.png'); 14 | } 15 | phantom.exit(); 16 | }); 17 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/tweets.js: -------------------------------------------------------------------------------- 1 | // Get twitter status for given account (or for the default one, "PhantomJS") 2 | 3 | var page = require('webpage').create(), 4 | system = require('system'), 5 | twitterId = "PhantomJS"; //< default value 6 | 7 | // Route "console.log()" calls from within the Page context to the main Phantom context (i.e. current "this") 8 | page.onConsoleMessage = function(msg) { 9 | console.log(msg); 10 | }; 11 | 12 | // Print usage message, if no twitter ID is passed 13 | if (system.args.length < 2) { 14 | console.log("Usage: tweets.js [twitter ID]"); 15 | } else { 16 | twitterId = system.args[1]; 17 | } 18 | 19 | // Heading 20 | console.log("*** Latest tweets from @" + twitterId + " ***\n"); 21 | 22 | // Open Twitter Mobile and, onPageLoad, do... 23 | page.open(encodeURI("http://mobile.twitter.com/" + twitterId), function (status) { 24 | // Check for page load success 25 | if (status !== "success") { 26 | console.log("Unable to access network"); 27 | } else { 28 | // Execute some DOM inspection within the page context 29 | page.evaluate(function() { 30 | var list = document.querySelectorAll('div.tweet-text'); 31 | for (var i = 0; i < list.length; ++i) { 32 | console.log((i + 1) + ": " + list[i].innerText); 33 | } 34 | }); 35 | } 36 | phantom.exit(); 37 | }); 38 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/universe.js: -------------------------------------------------------------------------------- 1 | // This is to be used by "module.js" (and "module.coffee") example(s). 2 | // There should NOT be a "universe.coffee" as only 1 of the 2 would 3 | // ever be loaded unless the file extension was specified. 4 | 5 | exports.answer = 42; 6 | 7 | exports.start = function () { 8 | console.log('Starting the universe....'); 9 | } 10 | 11 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/unrandomize.js: -------------------------------------------------------------------------------- 1 | // Modify global object at the page initialization. 2 | // In this example, effectively Math.random() always returns 0.42. 3 | 4 | var page = require('webpage').create(); 5 | 6 | page.onInitialized = function () { 7 | page.evaluate(function () { 8 | Math.random = function() { 9 | return 42 / 100; 10 | }; 11 | }); 12 | }; 13 | 14 | page.open('http://ariya.github.com/js/random/', function (status) { 15 | var result; 16 | if (status !== 'success') { 17 | console.log('Network error.'); 18 | } else { 19 | console.log(page.evaluate(function () { 20 | return document.getElementById('numbers').textContent; 21 | })); 22 | } 23 | phantom.exit(); 24 | }); 25 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/useragent.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(); 2 | console.log('The default user agent is ' + page.settings.userAgent); 3 | page.settings.userAgent = 'SpecialAgent'; 4 | page.open('http://www.httpuseragent.org', function (status) { 5 | if (status !== 'success') { 6 | console.log('Unable to access network'); 7 | } else { 8 | var ua = page.evaluate(function () { 9 | return document.getElementById('myagent').innerText; 10 | }); 11 | console.log(ua); 12 | } 13 | phantom.exit(); 14 | }); 15 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/version.js: -------------------------------------------------------------------------------- 1 | console.log('using PhantomJS version ' + 2 | phantom.version.major + '.' + 3 | phantom.version.minor + '.' + 4 | phantom.version.patch); 5 | phantom.exit(); 6 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/waitfor.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Wait until the test condition is true or a timeout occurs. Useful for waiting 3 | * on a server response or for a ui change (fadeIn, etc.) to occur. 4 | * 5 | * @param testFx javascript condition that evaluates to a boolean, 6 | * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or 7 | * as a callback function. 8 | * @param onReady what to do when testFx condition is fulfilled, 9 | * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or 10 | * as a callback function. 11 | * @param timeOutMillis the max amount of time to wait. If not specified, 3 sec is used. 12 | */ 13 | function waitFor(testFx, onReady, timeOutMillis) { 14 | var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s 15 | start = new Date().getTime(), 16 | condition = false, 17 | interval = setInterval(function() { 18 | if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) { 19 | // If not time-out yet and condition not yet fulfilled 20 | condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code 21 | } else { 22 | if(!condition) { 23 | // If condition still not fulfilled (timeout but condition is 'false') 24 | console.log("'waitFor()' timeout"); 25 | phantom.exit(1); 26 | } else { 27 | // Condition fulfilled (timeout and/or condition is 'true') 28 | console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms."); 29 | typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled 30 | clearInterval(interval); //< Stop this interval 31 | } 32 | } 33 | }, 250); //< repeat check every 250ms 34 | }; 35 | 36 | 37 | var page = require('webpage').create(); 38 | 39 | // Open Twitter on 'sencha' profile and, onPageLoad, do... 40 | page.open("http://twitter.com/#!/sencha", function (status) { 41 | // Check for page load success 42 | if (status !== "success") { 43 | console.log("Unable to access network"); 44 | } else { 45 | // Wait for 'signin-dropdown' to be visible 46 | waitFor(function() { 47 | // Check in the page if a specific element is now visible 48 | return page.evaluate(function() { 49 | return $("#signin-dropdown").is(":visible"); 50 | }); 51 | }, function() { 52 | console.log("The sign-in dialog should be visible now."); 53 | phantom.exit(); 54 | }); 55 | } 56 | }); 57 | 58 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/walk_through_frames.js: -------------------------------------------------------------------------------- 1 | var p = require("webpage").create(); 2 | 3 | function pageTitle(page) { 4 | return page.evaluate(function(){ 5 | return window.document.title; 6 | }); 7 | } 8 | 9 | function setPageTitle(page, newTitle) { 10 | page.evaluate(function(newTitle){ 11 | window.document.title = newTitle; 12 | }, newTitle); 13 | } 14 | 15 | p.open("../test/webpage-spec-frames/index.html", function(status) { 16 | console.log("pageTitle(): " + pageTitle(p)); 17 | console.log("currentFrameName(): "+p.currentFrameName()); 18 | console.log("childFramesCount(): "+p.childFramesCount()); 19 | console.log("childFramesName(): "+p.childFramesName()); 20 | console.log("setPageTitle(CURRENT TITLE+'-visited')"); setPageTitle(p, pageTitle(p) + "-visited"); 21 | console.log(""); 22 | 23 | console.log("p.switchToChildFrame(\"frame1\"): "+p.switchToChildFrame("frame1")); 24 | console.log("pageTitle(): " + pageTitle(p)); 25 | console.log("currentFrameName(): "+p.currentFrameName()); 26 | console.log("childFramesCount(): "+p.childFramesCount()); 27 | console.log("childFramesName(): "+p.childFramesName()); 28 | console.log("setPageTitle(CURRENT TITLE+'-visited')"); setPageTitle(p, pageTitle(p) + "-visited"); 29 | console.log(""); 30 | 31 | console.log("p.switchToChildFrame(\"frame1-2\"): "+p.switchToChildFrame("frame1-2")); 32 | console.log("pageTitle(): " + pageTitle(p)); 33 | console.log("currentFrameName(): "+p.currentFrameName()); 34 | console.log("childFramesCount(): "+p.childFramesCount()); 35 | console.log("childFramesName(): "+p.childFramesName()); 36 | console.log("setPageTitle(CURRENT TITLE+'-visited')"); setPageTitle(p, pageTitle(p) + "-visited"); 37 | console.log(""); 38 | 39 | console.log("p.switchToParentFrame(): "+p.switchToParentFrame()); 40 | console.log("pageTitle(): " + pageTitle(p)); 41 | console.log("currentFrameName(): "+p.currentFrameName()); 42 | console.log("childFramesCount(): "+p.childFramesCount()); 43 | console.log("childFramesName(): "+p.childFramesName()); 44 | console.log("setPageTitle(CURRENT TITLE+'-visited')"); setPageTitle(p, pageTitle(p) + "-visited"); 45 | console.log(""); 46 | 47 | console.log("p.switchToChildFrame(0): "+p.switchToChildFrame(0)); 48 | console.log("pageTitle(): " + pageTitle(p)); 49 | console.log("currentFrameName(): "+p.currentFrameName()); 50 | console.log("childFramesCount(): "+p.childFramesCount()); 51 | console.log("childFramesName(): "+p.childFramesName()); 52 | console.log("setPageTitle(CURRENT TITLE+'-visited')"); setPageTitle(p, pageTitle(p) + "-visited"); 53 | console.log(""); 54 | 55 | console.log("p.switchToMainFrame()"); p.switchToMainFrame(); 56 | console.log("pageTitle(): " + pageTitle(p)); 57 | console.log("currentFrameName(): "+p.currentFrameName()); 58 | console.log("childFramesCount(): "+p.childFramesCount()); 59 | console.log("childFramesName(): "+p.childFramesName()); 60 | console.log("setPageTitle(CURRENT TITLE+'-visited')"); setPageTitle(p, pageTitle(p) + "-visited"); 61 | console.log(""); 62 | 63 | console.log("p.switchToChildFrame(\"frame2\"): "+p.switchToChildFrame("frame2")); 64 | console.log("pageTitle(): " + pageTitle(p)); 65 | console.log("currentFrameName(): "+p.currentFrameName()); 66 | console.log("childFramesCount(): "+p.childFramesCount()); 67 | console.log("childFramesName(): "+p.childFramesName()); 68 | console.log("setPageTitle(CURRENT TITLE+'-visited')"); setPageTitle(p, pageTitle(p) + "-visited"); 69 | console.log(""); 70 | 71 | phantom.exit(); 72 | }); 73 | 74 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/examples/weather.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(), 2 | system = require('system'), 3 | city, 4 | url; 5 | 6 | city = 'Mountain View, California'; // default 7 | if (system.args.length > 1) { 8 | city = Array.prototype.slice.call(system.args, 1).join(' '); 9 | } 10 | url = encodeURI('http://api.openweathermap.org/data/2.1/find/name?q=' + city); 11 | 12 | console.log('Checking weather condition for', city, '...'); 13 | 14 | page.open(url, function(status) { 15 | var result, data; 16 | if (status !== 'success') { 17 | console.log('Error: Unable to access network!'); 18 | } else { 19 | result = page.evaluate(function () { 20 | return document.body.innerText; 21 | }); 22 | try { 23 | data = JSON.parse(result); 24 | data = data.list[0]; 25 | console.log(''); 26 | console.log('City:', data.name); 27 | console.log('Condition:', data.weather.map(function(entry) { 28 | return entry.main; 29 | }).join(', ')); 30 | console.log('Temperature:', Math.round(data.main.temp - 273.15), 'C'); 31 | console.log('Humidity:', Math.round(data.main.humidity), '%'); 32 | } catch (e) { 33 | console.log('Error:', e.toString()); 34 | } 35 | } 36 | phantom.exit(); 37 | }); 38 | -------------------------------------------------------------------------------- /6-Selenium/phantomjs/third-party.txt: -------------------------------------------------------------------------------- 1 | This document contains the list of Third Party Software included with 2 | PhantomJS, along with the license information. 3 | 4 | Third Party Software may impose additional restrictions and it is the 5 | user's responsibility to ensure that they have met the licensing 6 | requirements of PhantomJS and the relevant license of the Third Party 7 | Software they are using. 8 | 9 | Qt - http://qt-project.org/ 10 | License: GNU Lesser General Public License (LGPL) version 2.1. 11 | Reference: http://qt-project.org/doc/qt-4.8/lgpl.html. 12 | 13 | WebKit - http://www.webkit.org/ 14 | License: GNU Lesser General Public License (LGPL) version 2.1 and BSD. 15 | Reference: http://www.webkit.org/coding/lgpl-license.html and 16 | http://www.webkit.org/coding/bsd-license.html. 17 | 18 | Mongoose - https://github.com/cesanta/mongoose 19 | License: MIT 20 | Reference: https://github.com/cesanta/mongoose/commit/abbf27338ef554cce0281ac157aa71a9c1b82a55 21 | 22 | Breakpad - http://code.google.com/p/google-breakpad/ 23 | License: BSD. 24 | Reference: http://code.google.com/p/google-breakpad/source/browse/trunk/COPYING. 25 | 26 | OpenSSL - http://www.openssl.org/ 27 | License: OpenSSL License, SSLeay License. 28 | Reference: http://www.openssl.org/source/license.html. 29 | 30 | Linenoise - https://github.com/tadmarshall/linenoise 31 | License: BSD. 32 | Reference: https://github.com/tadmarshall/linenoise/blob/master/linenoise.h. 33 | 34 | QCommandLine - http://xf.iksaif.net/dev/qcommandline.html 35 | License: GNU Lesser General Public License (LGPL) version 2.1. 36 | Reference: http://dev.iksaif.net/projects/qcommandline/repository/revisions/master/entry/COPYING 37 | 38 | wkhtmlpdf - http://code.google.com/p/wkhtmltopdf/ 39 | License: GNU Lesser General Public License (LGPL) 40 | Reference: http://code.google.com/p/wkhtmltopdf/ 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Code samples for crawling the web with Python 2 | 3 | Requires BeautifulSoup, Requests, Scrapy --------------------------------------------------------------------------------