├── .DS_Store ├── .gitignore ├── README.md ├── demo.py ├── demo1.py ├── demo10.py ├── demo11.py ├── demo12.py ├── demo13.py ├── demo14.py ├── demo15.py ├── demo16.py ├── demo17.py ├── demo2.py ├── demo3.py ├── demo4.py ├── demo5.py ├── demo6.py ├── demo7.py ├── demo8.py ├── demo9.py ├── example.pdf ├── example.png ├── launch.py ├── launch_set_viewport.py ├── launch_taobao_fail.py ├── launch_userdata.py ├── launch_webdriver_detection.py ├── launch_with_dev.py ├── preview.pdf ├── requests_test.py └── start.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/PyppeteerTest/5e624cf1b88a36ae41d07b6d6e4cbbf4059c5c44/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyppeteerTest 2 | Pyppeteer Demo 3 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | 5 | async def main(): 6 | browser = await launch() 7 | page = await browser.newPage() 8 | await page.goto('http://quotes.toscrape.com/js/') 9 | await page.screenshot({'path': 'example.png'}) 10 | await page.pdf(path='example.pdf') 11 | dimensions = await page.evaluate('''() => { 12 | return { 13 | width: document.documentElement.clientWidth, 14 | height: document.documentElement.clientHeight, 15 | deviceScaleFactor: window.devicePixelRatio, 16 | } 17 | }''') 18 | 19 | print(dimensions) 20 | # >>> {'width': 800, 'height': 600, 'deviceScaleFactor': 1} 21 | await browser.close() 22 | 23 | 24 | asyncio.get_event_loop().run_until_complete(main()) 25 | -------------------------------------------------------------------------------- /demo1.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | from pyquery import PyQuery as pq 4 | 5 | 6 | async def main(): 7 | browser = await launch() 8 | page = await browser.newPage() 9 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/') 10 | await page.waitForSelector('.item .name') 11 | doc = pq(await page.content()) 12 | names = [item.text() for item in doc('.item .name').items()] 13 | print('Names:', names) 14 | await browser.close() 15 | 16 | 17 | asyncio.get_event_loop().run_until_complete(main()) 18 | -------------------------------------------------------------------------------- /demo10.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | 5 | async def main(): 6 | browser = await launch(headless=False) 7 | page = await browser.newPage() 8 | await page.goto('https://www.baidu.com') 9 | page = await browser.newPage() 10 | await page.goto('https://www.bing.com') 11 | pages = await browser.pages() 12 | print('Pages:', pages) 13 | page1 = pages[1] 14 | await page1.bringToFront() 15 | await asyncio.sleep(100) 16 | 17 | 18 | asyncio.get_event_loop().run_until_complete(main()) 19 | -------------------------------------------------------------------------------- /demo11.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | from pyquery import PyQuery as pq 4 | 5 | 6 | async def main(): 7 | browser = await launch() 8 | page = await browser.newPage() 9 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/') 10 | await browser.close() 11 | 12 | 13 | asyncio.get_event_loop().run_until_complete(main()) 14 | -------------------------------------------------------------------------------- /demo12.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | from pyquery import PyQuery as pq 4 | 5 | 6 | async def main(): 7 | browser = await launch() 8 | page = await browser.newPage() 9 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/') 10 | await page.waitForSelector('.item .name') 11 | j_result1 = await page.J('.item .name') 12 | j_result2 = await page.querySelector('.item .name') 13 | jj_result1 = await page.JJ('.item .name') 14 | jj_result2 = await page.querySelectorAll('.item .name') 15 | print('J Result1:', j_result1) 16 | print('J Result2:', j_result2) 17 | print('JJ Result1:', jj_result1) 18 | print('JJ Result2:', jj_result2) 19 | await browser.close() 20 | 21 | 22 | asyncio.get_event_loop().run_until_complete(main()) 23 | -------------------------------------------------------------------------------- /demo13.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | 5 | async def main(): 6 | browser = await launch() 7 | page = await browser.newPage() 8 | await page.authenticate({'username': 'admin', 'password': 'admin'}) 9 | await page.goto('https://static3.scrape.cuiqingcai.com/') 10 | print(await page.content()) 11 | await browser.close() 12 | 13 | 14 | if __name__ == '__main__': 15 | asyncio.get_event_loop().run_until_complete(main()) 16 | -------------------------------------------------------------------------------- /demo14.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | from pyquery import PyQuery as pq 4 | 5 | 6 | async def main(): 7 | browser = await launch(headless=False) 8 | page = await browser.newPage() 9 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/') 10 | await page.waitForSelector('.item .name') 11 | await page.click('.item .name', options={ 12 | 'button': 'right', 13 | 'clickCount': 1, # 1 or 2 14 | 'delay': 3000, # 毫秒 15 | }) 16 | await browser.close() 17 | 18 | 19 | asyncio.get_event_loop().run_until_complete(main()) 20 | -------------------------------------------------------------------------------- /demo15.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | from pyquery import PyQuery as pq 4 | 5 | 6 | async def main(): 7 | browser = await launch(headless=False) 8 | page = await browser.newPage() 9 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/') 10 | print('HTML:', await page.content()) 11 | print('Cookies:', await page.cookies()) 12 | await browser.close() 13 | 14 | 15 | asyncio.get_event_loop().run_until_complete(main()) 16 | -------------------------------------------------------------------------------- /demo16.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | from pyquery import PyQuery as pq 4 | 5 | 6 | async def main(): 7 | browser = await launch(headless=False) 8 | page = await browser.newPage() 9 | await page.goto('https://dynamic1.scrape.cuiqingcai.com/') 10 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/') 11 | # 后退 12 | await page.goBack() 13 | # 前进 14 | await page.goForward() 15 | # 刷新 16 | await page.reload() 17 | # 保存 PDF 18 | await page.pdf() 19 | # 截图 20 | await page.screenshot() 21 | # 设置页面 HTML 22 | await page.setContent('

Hello World

') 23 | # 设置 User-Agent 24 | await page.setUserAgent('Python') 25 | # 设置 Headers 26 | await page.setExtraHTTPHeaders(headers={}) 27 | # 关闭 28 | await page.close() 29 | await browser.close() 30 | 31 | 32 | asyncio.get_event_loop().run_until_complete(main()) 33 | -------------------------------------------------------------------------------- /demo17.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | from pyquery import PyQuery as pq 4 | 5 | 6 | async def main(): 7 | browser = await launch(headless=False) 8 | page = await browser.newPage() 9 | await page.goto('https://www.taobao.com') 10 | # 后退 11 | await page.type('#q', 'iPad') 12 | # 关闭 13 | await asyncio.sleep(10) 14 | await browser.close() 15 | 16 | 17 | asyncio.get_event_loop().run_until_complete(main()) 18 | -------------------------------------------------------------------------------- /demo2.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | width, height = 1366, 768 5 | 6 | 7 | async def main(): 8 | browser = await launch() 9 | page = await browser.newPage() 10 | await page.setViewport({'width': width, 'height': height}) 11 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/') 12 | await page.waitForSelector('.item .name') 13 | await asyncio.sleep(2) 14 | await page.screenshot(path='example.png') 15 | dimensions = await page.evaluate('''() => { 16 | return { 17 | width: document.documentElement.clientWidth, 18 | height: document.documentElement.clientHeight, 19 | deviceScaleFactor: window.devicePixelRatio, 20 | } 21 | }''') 22 | 23 | print(dimensions) 24 | await browser.close() 25 | 26 | 27 | asyncio.get_event_loop().run_until_complete(main()) 28 | -------------------------------------------------------------------------------- /demo3.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | 5 | async def main(): 6 | await launch(headless=False) 7 | await asyncio.sleep(100) 8 | 9 | 10 | asyncio.get_event_loop().run_until_complete(main()) 11 | -------------------------------------------------------------------------------- /demo4.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | async def main(): 5 | browser = await launch(devtools=True) 6 | page = await browser.newPage() 7 | await page.goto('https://www.baidu.com') 8 | await asyncio.sleep(100) 9 | 10 | asyncio.get_event_loop().run_until_complete(main()) -------------------------------------------------------------------------------- /demo5.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | 5 | async def main(): 6 | browser = await launch(headless=False, args=['--disable-infobars']) 7 | page = await browser.newPage() 8 | await page.goto('https://antispider1.scrape.cuiqingcai.com/') 9 | await asyncio.sleep(100) 10 | 11 | 12 | asyncio.get_event_loop().run_until_complete(main()) 13 | -------------------------------------------------------------------------------- /demo6.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | 5 | async def main(): 6 | browser = await launch(headless=False, args=['--disable-infobars']) 7 | page = await browser.newPage() 8 | await page.evaluateOnNewDocument('Object.defineProperty(navigator, "webdriver", {get: () => undefined})') 9 | await page.goto('https://antispider1.scrape.cuiqingcai.com/') 10 | await asyncio.sleep(100) 11 | 12 | 13 | asyncio.get_event_loop().run_until_complete(main()) 14 | -------------------------------------------------------------------------------- /demo7.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | width, height = 1366, 768 5 | 6 | 7 | async def main(): 8 | browser = await launch(headless=False, args=['--disable-infobars', f'--window-size={width},{height}']) 9 | page = await browser.newPage() 10 | await page.setViewport({'width': width, 'height': height}) 11 | await page.evaluateOnNewDocument('Object.defineProperty(navigator, "webdriver", {get: () => undefined})') 12 | await page.goto('https://antispider1.scrape.cuiqingcai.com/') 13 | await asyncio.sleep(100) 14 | 15 | 16 | asyncio.get_event_loop().run_until_complete(main()) 17 | -------------------------------------------------------------------------------- /demo8.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | width, height = 1366, 768 5 | 6 | 7 | async def main(): 8 | browser = await launch(headless=False, userDataDir='./userdata', 9 | args=['--disable-infobars', f'--window-size={width},{height}']) 10 | page = await browser.newPage() 11 | await page.setViewport({'width': width, 'height': height}) 12 | await page.goto('https://www.taobao.com') 13 | await asyncio.sleep(100) 14 | 15 | 16 | asyncio.get_event_loop().run_until_complete(main()) 17 | -------------------------------------------------------------------------------- /demo9.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | width, height = 1200, 768 5 | 6 | async def main(): 7 | browser = await launch(headless=False, 8 | args=['--disable-infobars', f'--window-size={width},{height}']) 9 | context = await browser.createIncognitoBrowserContext() 10 | page = await context.newPage() 11 | await page.setViewport({'width': width, 'height': height}) 12 | await page.goto('https://www.baidu.com') 13 | await asyncio.sleep(100) 14 | 15 | 16 | asyncio.get_event_loop().run_until_complete(main()) 17 | -------------------------------------------------------------------------------- /example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/PyppeteerTest/5e624cf1b88a36ae41d07b6d6e4cbbf4059c5c44/example.pdf -------------------------------------------------------------------------------- /example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/PyppeteerTest/5e624cf1b88a36ae41d07b6d6e4cbbf4059c5c44/example.png -------------------------------------------------------------------------------- /launch.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | 5 | async def main(): 6 | await launch(headless=False) 7 | await asyncio.sleep(100) 8 | 9 | 10 | asyncio.get_event_loop().run_until_complete(main()) 11 | -------------------------------------------------------------------------------- /launch_set_viewport.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | width, height = 1366, 768 5 | 6 | 7 | async def main(): 8 | browser = await launch(headless=False, 9 | args=[f'--window-size={width},{height}']) 10 | page = await browser.newPage() 11 | await page.setViewport({'width': width, 'height': height}) 12 | await page.goto('https://www.taobao.com') 13 | await asyncio.sleep(100) 14 | 15 | 16 | asyncio.get_event_loop().run_until_complete(main()) 17 | -------------------------------------------------------------------------------- /launch_taobao_fail.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | 5 | async def main(): 6 | browser = await launch(headless=False, args=['--no-sandbox']) 7 | page = await browser.newPage() 8 | await page.goto('https://www.taobao.com') 9 | await asyncio.sleep(100) 10 | 11 | 12 | asyncio.get_event_loop().run_until_complete(main()) 13 | -------------------------------------------------------------------------------- /launch_userdata.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | 5 | async def main(): 6 | browser = await launch(headless=False, userDataDir='./userdata', args=['--disable-infobars']) 7 | page = await browser.newPage() 8 | await page.goto('https://www.taobao.com') 9 | await asyncio.sleep(100) 10 | 11 | 12 | asyncio.get_event_loop().run_until_complete(main()) 13 | -------------------------------------------------------------------------------- /launch_webdriver_detection.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | width, height = 1366, 768 5 | 6 | 7 | async def main(): 8 | browser = await launch(headless=False, args=['--disable-infobars', f'--window-size={width},{height}']) 9 | page = await browser.newPage() 10 | await page.setViewport({'width': width, 'height': height}) 11 | 12 | await page.goto('https://login.taobao.com/member/login.jhtml?redirectURL=https://www.taobao.com/') 13 | 14 | await page.evaluate( 15 | '''() =>{ Object.defineProperties(navigator,{ webdriver:{ get: () => false } }) }''') 16 | await asyncio.sleep(100) 17 | 18 | 19 | asyncio.get_event_loop().run_until_complete(main()) 20 | -------------------------------------------------------------------------------- /launch_with_dev.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | 4 | 5 | async def main(): 6 | browser = await launch(headless=False, args=['--disable-infobars']) 7 | page = await browser.newPage() 8 | await page.goto('https://www.taobao.com') 9 | await asyncio.sleep(100) 10 | 11 | 12 | asyncio.get_event_loop().run_until_complete(main()) 13 | -------------------------------------------------------------------------------- /preview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/PyppeteerTest/5e624cf1b88a36ae41d07b6d6e4cbbf4059c5c44/preview.pdf -------------------------------------------------------------------------------- /requests_test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from pyquery import PyQuery as pq 3 | 4 | url = 'http://quotes.toscrape.com/js/' 5 | response = requests.get(url) 6 | doc = pq(response.text) 7 | print('Quotes:', doc('.quote').length) -------------------------------------------------------------------------------- /start.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pyppeteer import launch 3 | from pyquery import PyQuery as pq 4 | 5 | 6 | async def main(): 7 | browser = await launch() 8 | page = await browser.newPage() 9 | await page.goto('http://quotes.toscrape.com/js/') 10 | doc = pq(await page.content()) 11 | print('Quotes:', doc('.quote').length) 12 | await browser.close() 13 | 14 | 15 | asyncio.get_event_loop().run_until_complete(main()) 16 | --------------------------------------------------------------------------------