├── README.md └── scrape_video.py /README.md: -------------------------------------------------------------------------------- 1 | # TikTok Video Scraper w/o watermark 2 | 3 | This is the code that follows the tutorial [here](https://www.youtube.com/watch?v=UsT11sOD1JA) 4 | 5 | Common errors / issues: 6 | 1. There is a TikTok captcha 7 | - Increase the page timeout, and manually solve the captcha and then let the script run itself 8 | 2. If you get an error during step 4 for downloading the video 9 | - Make sure to check your cookies / headers 10 | 11 | 12 | If you get stuck and need help, make sure to join the [discord](https://beacons.ai/codewithvincent), and drop your question in the questions channel. 13 | -------------------------------------------------------------------------------- /scrape_video.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.chrome.options import Options 3 | import time 4 | import requests 5 | from urllib.request import urlopen 6 | 7 | def downloadVideo(link, id): 8 | print(f"Downloading video {id} from: {link}") 9 | cookies = { 10 | # Please get this data from the console network activity tool 11 | # This is explained in the video :) 12 | } 13 | 14 | headers = { 15 | # Please get this data from the console network activity tool 16 | # This is explained in the video :) 17 | } 18 | 19 | params = { 20 | 'url': 'dl', 21 | } 22 | 23 | data = { 24 | 'id': link, 25 | 'locale': 'en', 26 | 'tt': '', # NOTE: This value gets changed, please use the value that you get when you copy the curl command from the network console 27 | } 28 | 29 | print("STEP 4: Getting the download link") 30 | print("If this step fails, PLEASE read the steps above") 31 | response = requests.post('https://ssstik.io/abc', params=params, cookies=cookies, headers=headers, data=data) 32 | downloadSoup = BeautifulSoup(response.text, "html.parser") 33 | 34 | downloadLink = downloadSoup.a["href"] 35 | videoTitle = downloadSoup.p.getText().strip() 36 | 37 | print("STEP 5: Saving the video :)") 38 | mp4File = urlopen(downloadLink) 39 | # Feel free to change the download directory 40 | with open(f"videos/{id}-{videoTitle}.mp4", "wb") as output: 41 | while True: 42 | data = mp4File.read(4096) 43 | if data: 44 | output.write(data) 45 | else: 46 | break 47 | 48 | print("STEP 1: Open Chrome browser") 49 | options = Options() 50 | options.add_argument("start-maximized") 51 | options.add_argument("--disable-blink-features=AutomationControlled") 52 | options.add_experimental_option("excludeSwitches", ["enable-automation"]) 53 | driver = webdriver.Chrome(options=options) 54 | # Change the tiktok link 55 | driver.get("https://www.tiktok.com/@papayaho.cat") 56 | 57 | # IF YOU GET A TIKTOK CAPTCHA, CHANGE THE TIMEOUT HERE 58 | # to 60 seconds, just enough time for you to complete the captcha yourself. 59 | time.sleep(1) 60 | 61 | scroll_pause_time = 1 62 | screen_height = driver.execute_script("return window.screen.height;") 63 | i = 1 64 | 65 | print("STEP 2: Scrolling page") 66 | while True: 67 | driver.execute_script("window.scrollTo(0, {screen_height}*{i});".format(screen_height=screen_height, i=i)) 68 | i += 1 69 | time.sleep(scroll_pause_time) 70 | scroll_height = driver.execute_script("return document.body.scrollHeight;") 71 | if (screen_height) * i > scroll_height: 72 | break 73 | 74 | # this class may change, so make sure to inspect the page and find the correct class 75 | className = "tiktok-1s72ajp-DivWrapper" 76 | 77 | script = "let l = [];" 78 | script += "document.getElementsByClassName(\"" 79 | script += className 80 | script += "\").forEach(item => { l.push(item.querySelector('a').href)});" 81 | script += "return l;" 82 | 83 | urlsToDownload = driver.execute_script(script) 84 | 85 | print(f"STEP 3: Time to download {len(urlsToDownload)} videos") 86 | for index, url in enumerate(urlsToDownload): 87 | print(f"Downloading video: {index}") 88 | downloadVideo(url, index) 89 | time.sleep(10) 90 | --------------------------------------------------------------------------------