├── README.md
└── scrape_video.py


/README.md:
--------------------------------------------------------------------------------
 1 | # TikTok Video Scraper w/o watermark
 2 | 
 3 | This is the code that follows the tutorial [here](https://www.youtube.com/watch?v=UsT11sOD1JA)
 4 | 
 5 | Common errors / issues:
 6 | 1. There is a TikTok captcha
 7 | - Increase the page timeout, and manually solve the captcha and then let the script run itself
 8 | 2. If you get an error during step 4 for downloading the video
 9 | - Make sure to check your cookies / headers 
10 | 
11 | 
12 | If you get stuck and need help, make sure to join the [discord](https://beacons.ai/codewithvincent), and drop your question in the questions channel.
13 | 


--------------------------------------------------------------------------------
/scrape_video.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from selenium.webdriver.chrome.options import Options
 3 | import time
 4 | import requests
 5 | from urllib.request import urlopen
 6 | 
 7 | def downloadVideo(link, id):
 8 |     print(f"Downloading video {id} from: {link}")
 9 |     cookies = {
10 |         # Please get this data from the console network activity tool
11 |         # This is explained in the video :)
12 |     }
13 | 
14 |     headers = {
15 |         # Please get this data from the console network activity tool
16 |         # This is explained in the video :)
17 |     }
18 | 
19 |     params = {
20 |         'url': 'dl',
21 |     }
22 | 
23 |     data = {
24 |         'id': link,
25 |         'locale': 'en',
26 |         'tt': '', # NOTE: This value gets changed, please use the value that you get when you copy the curl command from the network console
27 |     }
28 |     
29 |     print("STEP 4: Getting the download link")
30 |     print("If this step fails, PLEASE read the steps above")
31 |     response = requests.post('https://ssstik.io/abc', params=params, cookies=cookies, headers=headers, data=data)
32 |     downloadSoup = BeautifulSoup(response.text, "html.parser")
33 | 
34 |     downloadLink = downloadSoup.a["href"]
35 |     videoTitle = downloadSoup.p.getText().strip()
36 | 
37 |     print("STEP 5: Saving the video :)")
38 |     mp4File = urlopen(downloadLink)
39 |     # Feel free to change the download directory
40 |     with open(f"videos/{id}-{videoTitle}.mp4", "wb") as output:
41 |         while True:
42 |             data = mp4File.read(4096)
43 |             if data:
44 |                 output.write(data)
45 |             else:
46 |                 break
47 | 
48 | print("STEP 1: Open Chrome browser")
49 | options = Options()
50 | options.add_argument("start-maximized")
51 | options.add_argument("--disable-blink-features=AutomationControlled")
52 | options.add_experimental_option("excludeSwitches", ["enable-automation"])
53 | driver = webdriver.Chrome(options=options)
54 | # Change the tiktok link
55 | driver.get("https://www.tiktok.com/@papayaho.cat")
56 | 
57 | # IF YOU GET A TIKTOK CAPTCHA, CHANGE THE TIMEOUT HERE
58 | # to 60 seconds, just enough time for you to complete the captcha yourself.
59 | time.sleep(1)
60 | 
61 | scroll_pause_time = 1
62 | screen_height = driver.execute_script("return window.screen.height;")
63 | i = 1
64 | 
65 | print("STEP 2: Scrolling page")
66 | while True:
67 |     driver.execute_script("window.scrollTo(0, {screen_height}*{i});".format(screen_height=screen_height, i=i))  
68 |     i += 1
69 |     time.sleep(scroll_pause_time)
70 |     scroll_height = driver.execute_script("return document.body.scrollHeight;")  
71 |     if (screen_height) * i > scroll_height:
72 |         break 
73 | 
74 | # this class may change, so make sure to inspect the page and find the correct class
75 | className = "tiktok-1s72ajp-DivWrapper"
76 | 
77 | script  = "let l = [];"
78 | script += "document.getElementsByClassName(\""
79 | script += className
80 | script += "\").forEach(item => { l.push(item.querySelector('a').href)});"
81 | script += "return l;"
82 | 
83 | urlsToDownload = driver.execute_script(script)
84 | 
85 | print(f"STEP 3: Time to download {len(urlsToDownload)} videos")
86 | for index, url in enumerate(urlsToDownload):
87 |     print(f"Downloading video: {index}")
88 |     downloadVideo(url, index)
89 |     time.sleep(10)
90 | 


--------------------------------------------------------------------------------