├── .DS_Store
├── .gitignore
├── README.md
├── demo.py
├── demo1.py
├── demo10.py
├── demo11.py
├── demo12.py
├── demo13.py
├── demo14.py
├── demo15.py
├── demo16.py
├── demo17.py
├── demo2.py
├── demo3.py
├── demo4.py
├── demo5.py
├── demo6.py
├── demo7.py
├── demo8.py
├── demo9.py
├── example.pdf
├── example.png
├── launch.py
├── launch_set_viewport.py
├── launch_taobao_fail.py
├── launch_userdata.py
├── launch_webdriver_detection.py
├── launch_with_dev.py
├── preview.pdf
├── requests_test.py
└── start.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Python3WebSpider/PyppeteerTest/5e624cf1b88a36ae41d07b6d6e4cbbf4059c5c44/.DS_Store
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PyppeteerTest
2 | Pyppeteer Demo
3 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 |
5 | async def main():
6 | browser = await launch()
7 | page = await browser.newPage()
8 | await page.goto('http://quotes.toscrape.com/js/')
9 | await page.screenshot({'path': 'example.png'})
10 | await page.pdf(path='example.pdf')
11 | dimensions = await page.evaluate('''() => {
12 | return {
13 | width: document.documentElement.clientWidth,
14 | height: document.documentElement.clientHeight,
15 | deviceScaleFactor: window.devicePixelRatio,
16 | }
17 | }''')
18 |
19 | print(dimensions)
20 | # >>> {'width': 800, 'height': 600, 'deviceScaleFactor': 1}
21 | await browser.close()
22 |
23 |
24 | asyncio.get_event_loop().run_until_complete(main())
25 |
--------------------------------------------------------------------------------
/demo1.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 | from pyquery import PyQuery as pq
4 |
5 |
6 | async def main():
7 | browser = await launch()
8 | page = await browser.newPage()
9 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/')
10 | await page.waitForSelector('.item .name')
11 | doc = pq(await page.content())
12 | names = [item.text() for item in doc('.item .name').items()]
13 | print('Names:', names)
14 | await browser.close()
15 |
16 |
17 | asyncio.get_event_loop().run_until_complete(main())
18 |
--------------------------------------------------------------------------------
/demo10.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 |
5 | async def main():
6 | browser = await launch(headless=False)
7 | page = await browser.newPage()
8 | await page.goto('https://www.baidu.com')
9 | page = await browser.newPage()
10 | await page.goto('https://www.bing.com')
11 | pages = await browser.pages()
12 | print('Pages:', pages)
13 | page1 = pages[1]
14 | await page1.bringToFront()
15 | await asyncio.sleep(100)
16 |
17 |
18 | asyncio.get_event_loop().run_until_complete(main())
19 |
--------------------------------------------------------------------------------
/demo11.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 | from pyquery import PyQuery as pq
4 |
5 |
6 | async def main():
7 | browser = await launch()
8 | page = await browser.newPage()
9 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/')
10 | await browser.close()
11 |
12 |
13 | asyncio.get_event_loop().run_until_complete(main())
14 |
--------------------------------------------------------------------------------
/demo12.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 | from pyquery import PyQuery as pq
4 |
5 |
6 | async def main():
7 | browser = await launch()
8 | page = await browser.newPage()
9 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/')
10 | await page.waitForSelector('.item .name')
11 | j_result1 = await page.J('.item .name')
12 | j_result2 = await page.querySelector('.item .name')
13 | jj_result1 = await page.JJ('.item .name')
14 | jj_result2 = await page.querySelectorAll('.item .name')
15 | print('J Result1:', j_result1)
16 | print('J Result2:', j_result2)
17 | print('JJ Result1:', jj_result1)
18 | print('JJ Result2:', jj_result2)
19 | await browser.close()
20 |
21 |
22 | asyncio.get_event_loop().run_until_complete(main())
23 |
--------------------------------------------------------------------------------
/demo13.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 |
5 | async def main():
6 | browser = await launch()
7 | page = await browser.newPage()
8 | await page.authenticate({'username': 'admin', 'password': 'admin'})
9 | await page.goto('https://static3.scrape.cuiqingcai.com/')
10 | print(await page.content())
11 | await browser.close()
12 |
13 |
14 | if __name__ == '__main__':
15 | asyncio.get_event_loop().run_until_complete(main())
16 |
--------------------------------------------------------------------------------
/demo14.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 | from pyquery import PyQuery as pq
4 |
5 |
6 | async def main():
7 | browser = await launch(headless=False)
8 | page = await browser.newPage()
9 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/')
10 | await page.waitForSelector('.item .name')
11 | await page.click('.item .name', options={
12 | 'button': 'right',
13 | 'clickCount': 1, # 1 or 2
14 | 'delay': 3000, # 毫秒
15 | })
16 | await browser.close()
17 |
18 |
19 | asyncio.get_event_loop().run_until_complete(main())
20 |
--------------------------------------------------------------------------------
/demo15.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 | from pyquery import PyQuery as pq
4 |
5 |
6 | async def main():
7 | browser = await launch(headless=False)
8 | page = await browser.newPage()
9 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/')
10 | print('HTML:', await page.content())
11 | print('Cookies:', await page.cookies())
12 | await browser.close()
13 |
14 |
15 | asyncio.get_event_loop().run_until_complete(main())
16 |
--------------------------------------------------------------------------------
/demo16.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 | from pyquery import PyQuery as pq
4 |
5 |
6 | async def main():
7 | browser = await launch(headless=False)
8 | page = await browser.newPage()
9 | await page.goto('https://dynamic1.scrape.cuiqingcai.com/')
10 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/')
11 | # 后退
12 | await page.goBack()
13 | # 前进
14 | await page.goForward()
15 | # 刷新
16 | await page.reload()
17 | # 保存 PDF
18 | await page.pdf()
19 | # 截图
20 | await page.screenshot()
21 | # 设置页面 HTML
22 | await page.setContent('
Hello World
')
23 | # 设置 User-Agent
24 | await page.setUserAgent('Python')
25 | # 设置 Headers
26 | await page.setExtraHTTPHeaders(headers={})
27 | # 关闭
28 | await page.close()
29 | await browser.close()
30 |
31 |
32 | asyncio.get_event_loop().run_until_complete(main())
33 |
--------------------------------------------------------------------------------
/demo17.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 | from pyquery import PyQuery as pq
4 |
5 |
6 | async def main():
7 | browser = await launch(headless=False)
8 | page = await browser.newPage()
9 | await page.goto('https://www.taobao.com')
10 | # 后退
11 | await page.type('#q', 'iPad')
12 | # 关闭
13 | await asyncio.sleep(10)
14 | await browser.close()
15 |
16 |
17 | asyncio.get_event_loop().run_until_complete(main())
18 |
--------------------------------------------------------------------------------
/demo2.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 | width, height = 1366, 768
5 |
6 |
7 | async def main():
8 | browser = await launch()
9 | page = await browser.newPage()
10 | await page.setViewport({'width': width, 'height': height})
11 | await page.goto('https://dynamic2.scrape.cuiqingcai.com/')
12 | await page.waitForSelector('.item .name')
13 | await asyncio.sleep(2)
14 | await page.screenshot(path='example.png')
15 | dimensions = await page.evaluate('''() => {
16 | return {
17 | width: document.documentElement.clientWidth,
18 | height: document.documentElement.clientHeight,
19 | deviceScaleFactor: window.devicePixelRatio,
20 | }
21 | }''')
22 |
23 | print(dimensions)
24 | await browser.close()
25 |
26 |
27 | asyncio.get_event_loop().run_until_complete(main())
28 |
--------------------------------------------------------------------------------
/demo3.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 |
5 | async def main():
6 | await launch(headless=False)
7 | await asyncio.sleep(100)
8 |
9 |
10 | asyncio.get_event_loop().run_until_complete(main())
11 |
--------------------------------------------------------------------------------
/demo4.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 | async def main():
5 | browser = await launch(devtools=True)
6 | page = await browser.newPage()
7 | await page.goto('https://www.baidu.com')
8 | await asyncio.sleep(100)
9 |
10 | asyncio.get_event_loop().run_until_complete(main())
--------------------------------------------------------------------------------
/demo5.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 |
5 | async def main():
6 | browser = await launch(headless=False, args=['--disable-infobars'])
7 | page = await browser.newPage()
8 | await page.goto('https://antispider1.scrape.cuiqingcai.com/')
9 | await asyncio.sleep(100)
10 |
11 |
12 | asyncio.get_event_loop().run_until_complete(main())
13 |
--------------------------------------------------------------------------------
/demo6.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 |
5 | async def main():
6 | browser = await launch(headless=False, args=['--disable-infobars'])
7 | page = await browser.newPage()
8 | await page.evaluateOnNewDocument('Object.defineProperty(navigator, "webdriver", {get: () => undefined})')
9 | await page.goto('https://antispider1.scrape.cuiqingcai.com/')
10 | await asyncio.sleep(100)
11 |
12 |
13 | asyncio.get_event_loop().run_until_complete(main())
14 |
--------------------------------------------------------------------------------
/demo7.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 | width, height = 1366, 768
5 |
6 |
7 | async def main():
8 | browser = await launch(headless=False, args=['--disable-infobars', f'--window-size={width},{height}'])
9 | page = await browser.newPage()
10 | await page.setViewport({'width': width, 'height': height})
11 | await page.evaluateOnNewDocument('Object.defineProperty(navigator, "webdriver", {get: () => undefined})')
12 | await page.goto('https://antispider1.scrape.cuiqingcai.com/')
13 | await asyncio.sleep(100)
14 |
15 |
16 | asyncio.get_event_loop().run_until_complete(main())
17 |
--------------------------------------------------------------------------------
/demo8.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 | width, height = 1366, 768
5 |
6 |
7 | async def main():
8 | browser = await launch(headless=False, userDataDir='./userdata',
9 | args=['--disable-infobars', f'--window-size={width},{height}'])
10 | page = await browser.newPage()
11 | await page.setViewport({'width': width, 'height': height})
12 | await page.goto('https://www.taobao.com')
13 | await asyncio.sleep(100)
14 |
15 |
16 | asyncio.get_event_loop().run_until_complete(main())
17 |
--------------------------------------------------------------------------------
/demo9.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 | width, height = 1200, 768
5 |
6 | async def main():
7 | browser = await launch(headless=False,
8 | args=['--disable-infobars', f'--window-size={width},{height}'])
9 | context = await browser.createIncognitoBrowserContext()
10 | page = await context.newPage()
11 | await page.setViewport({'width': width, 'height': height})
12 | await page.goto('https://www.baidu.com')
13 | await asyncio.sleep(100)
14 |
15 |
16 | asyncio.get_event_loop().run_until_complete(main())
17 |
--------------------------------------------------------------------------------
/example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Python3WebSpider/PyppeteerTest/5e624cf1b88a36ae41d07b6d6e4cbbf4059c5c44/example.pdf
--------------------------------------------------------------------------------
/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Python3WebSpider/PyppeteerTest/5e624cf1b88a36ae41d07b6d6e4cbbf4059c5c44/example.png
--------------------------------------------------------------------------------
/launch.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 |
5 | async def main():
6 | await launch(headless=False)
7 | await asyncio.sleep(100)
8 |
9 |
10 | asyncio.get_event_loop().run_until_complete(main())
11 |
--------------------------------------------------------------------------------
/launch_set_viewport.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 | width, height = 1366, 768
5 |
6 |
7 | async def main():
8 | browser = await launch(headless=False,
9 | args=[f'--window-size={width},{height}'])
10 | page = await browser.newPage()
11 | await page.setViewport({'width': width, 'height': height})
12 | await page.goto('https://www.taobao.com')
13 | await asyncio.sleep(100)
14 |
15 |
16 | asyncio.get_event_loop().run_until_complete(main())
17 |
--------------------------------------------------------------------------------
/launch_taobao_fail.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 |
5 | async def main():
6 | browser = await launch(headless=False, args=['--no-sandbox'])
7 | page = await browser.newPage()
8 | await page.goto('https://www.taobao.com')
9 | await asyncio.sleep(100)
10 |
11 |
12 | asyncio.get_event_loop().run_until_complete(main())
13 |
--------------------------------------------------------------------------------
/launch_userdata.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 |
5 | async def main():
6 | browser = await launch(headless=False, userDataDir='./userdata', args=['--disable-infobars'])
7 | page = await browser.newPage()
8 | await page.goto('https://www.taobao.com')
9 | await asyncio.sleep(100)
10 |
11 |
12 | asyncio.get_event_loop().run_until_complete(main())
13 |
--------------------------------------------------------------------------------
/launch_webdriver_detection.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 | width, height = 1366, 768
5 |
6 |
7 | async def main():
8 | browser = await launch(headless=False, args=['--disable-infobars', f'--window-size={width},{height}'])
9 | page = await browser.newPage()
10 | await page.setViewport({'width': width, 'height': height})
11 |
12 | await page.goto('https://login.taobao.com/member/login.jhtml?redirectURL=https://www.taobao.com/')
13 |
14 | await page.evaluate(
15 | '''() =>{ Object.defineProperties(navigator,{ webdriver:{ get: () => false } }) }''')
16 | await asyncio.sleep(100)
17 |
18 |
19 | asyncio.get_event_loop().run_until_complete(main())
20 |
--------------------------------------------------------------------------------
/launch_with_dev.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 |
4 |
5 | async def main():
6 | browser = await launch(headless=False, args=['--disable-infobars'])
7 | page = await browser.newPage()
8 | await page.goto('https://www.taobao.com')
9 | await asyncio.sleep(100)
10 |
11 |
12 | asyncio.get_event_loop().run_until_complete(main())
13 |
--------------------------------------------------------------------------------
/preview.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Python3WebSpider/PyppeteerTest/5e624cf1b88a36ae41d07b6d6e4cbbf4059c5c44/preview.pdf
--------------------------------------------------------------------------------
/requests_test.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from pyquery import PyQuery as pq
3 |
4 | url = 'http://quotes.toscrape.com/js/'
5 | response = requests.get(url)
6 | doc = pq(response.text)
7 | print('Quotes:', doc('.quote').length)
--------------------------------------------------------------------------------
/start.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pyppeteer import launch
3 | from pyquery import PyQuery as pq
4 |
5 |
6 | async def main():
7 | browser = await launch()
8 | page = await browser.newPage()
9 | await page.goto('http://quotes.toscrape.com/js/')
10 | doc = pq(await page.content())
11 | print('Quotes:', doc('.quote').length)
12 | await browser.close()
13 |
14 |
15 | asyncio.get_event_loop().run_until_complete(main())
16 |
--------------------------------------------------------------------------------