├── tests ├── __init__.py ├── test_user.py ├── test_utils.py ├── test_captcha.py └── captcha_examples.json ├── examples ├── __init__.py ├── check_tool.py ├── ms_token_example.py ├── hashtag_example.py ├── video_bytes_example .py ├── comments_example.py ├── video_example.py ├── network_info_example.py └── user_example.py ├── .gitattributes ├── pytok ├── __init__.py ├── api │ ├── trending.py │ ├── __init__.py │ ├── sound.py │ ├── search.py │ ├── hashtag.py │ ├── user.py │ ├── base.py │ └── video.py ├── exceptions.py ├── helpers.py ├── tiktok.py ├── captcha_solver.py └── utils.py ├── requirements.txt ├── .gitignore ├── CITATION.cff ├── setup.py └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /pytok/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | .. include:: ../README.md 3 | """ 4 | __docformat__ = "restructuredtext" 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | playwright 3 | pyvirtualdisplay 4 | opencv-python 5 | brotli 6 | pandas 7 | tqdm 8 | patchright 9 | pyclick 10 | TikTokApi -------------------------------------------------------------------------------- /examples/check_tool.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from pytok.tiktok import PyTok 5 | 6 | async def main(): 7 | async with PyTok(browser="chromium") as api: 8 | await api._page.goto("https://www.browserscan.net/") 9 | pass 10 | 11 | if __name__ == "__main__": 12 | asyncio.run(main()) 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/* 2 | bmp.log 3 | geckodriver.log 4 | server.log 5 | browsermob-proxy/* 6 | myScripts/* 7 | test.py 8 | debug.log 9 | res.html 10 | tmp/* 11 | dist/* 12 | *.egg-info 13 | tmp/ 14 | tmp 15 | .pytest_cache/* 16 | test.mp4 17 | test.txt 18 | .pytest_cache/* 19 | tests/__pycache__/* 20 | *.pyc 21 | acrawl.js 22 | test2.py 23 | build 24 | MANIFEST 25 | src 26 | .vscode 27 | .env 28 | tests/data -------------------------------------------------------------------------------- /examples/ms_token_example.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from pytok.tiktok import PyTok 5 | 6 | async def main(): 7 | async with PyTok(headless=True) as api: 8 | user = api.user(username="therock") 9 | # get random user to load page 10 | user_data = await user.info() 11 | ms_tokens = await api.get_ms_tokens() 12 | print(ms_tokens) 13 | 14 | if __name__ == "__main__": 15 | asyncio.run(main()) 16 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Steel" 5 | given-names: "Ben" 6 | orcid: "https://orcid.org/0009-0006-3845-1394" 7 | - family-names: "Abrahams" 8 | given-names: "Alexei" 9 | orcid: "https://orcid.org/0000-0002-6547-072X" 10 | title: "PyTok" 11 | version: 0.1.0 12 | doi: 10.5281/zenodo.12802714 13 | date-released: 2024-07-23 14 | url: "https://github.com/networkdynamics/pytok" -------------------------------------------------------------------------------- /tests/test_user.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | from pytok.tiktok import PyTok 5 | 6 | # username = "brianjordanalvarez" 7 | username = 'marierenaudstab' 8 | 9 | 10 | async def test_user_videos(): 11 | async with PyTok(headless=True) as api: 12 | user = api.user(username=username) 13 | user_data = await user.info() 14 | count = 0 15 | async for video in api.user(username=username).videos(count=100): 16 | count += 1 17 | 18 | assert count >= 120 19 | 20 | 21 | if __name__ == '__main__': 22 | asyncio.run(test_user_videos()) -------------------------------------------------------------------------------- /examples/hashtag_example.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from pytok.tiktok import PyTok 5 | 6 | hashtag_name = 'fyp' 7 | 8 | async def main(): 9 | async with PyTok(manual_captcha_solves=True) as api: 10 | hashtag = api.hashtag(name=hashtag_name) 11 | 12 | videos = [] 13 | async for video in hashtag.videos(count=1000): 14 | video_info = await video.info() 15 | videos.append(video_info) 16 | 17 | with open("out.json", "w") as out_file: 18 | json.dump(videos, out_file) 19 | 20 | if __name__ == "__main__": 21 | asyncio.run(main()) -------------------------------------------------------------------------------- /examples/video_bytes_example .py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from pytok.tiktok import PyTok 5 | 6 | username = 'therock' 7 | id = '7296444945991224622' 8 | 9 | async def main(): 10 | async with PyTok() as api: 11 | video = api.video(username=username, id=id) 12 | 13 | # Bytes of the TikTok video 14 | video_data = await video.info() 15 | video_bytes = await video.bytes() 16 | 17 | with open("out.json", "w") as out_file: 18 | json.dump(video_data, out_file) 19 | 20 | with open("out.mp4", "wb") as out_file: 21 | out_file.write(video_bytes) 22 | 23 | if __name__ == "__main__": 24 | asyncio.run(main()) 25 | 26 | -------------------------------------------------------------------------------- /examples/comments_example.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from pytok.tiktok import PyTok 5 | 6 | videos = [ 7 | { 8 | 'id': '7058106162235100462', 9 | 'author': { 10 | 'uniqueId': 'charlesmcbryde' 11 | } 12 | } 13 | ] 14 | 15 | async def main(): 16 | async with PyTok(headless=False) as api: 17 | for video in videos: 18 | comments = [] 19 | async for comment in api.video(id=video['id'], username=video['author']['uniqueId']).comments(count=1000): 20 | comments.append(comment) 21 | 22 | assert len(comments) > 0, "No comments found" 23 | with open("out.json", "w") as f: 24 | json.dump(comments, f) 25 | 26 | if __name__ == "__main__": 27 | asyncio.run(main()) 28 | -------------------------------------------------------------------------------- /pytok/api/trending.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | from urllib.parse import urlencode 5 | 6 | from .video import Video 7 | from .sound import Sound 8 | from .user import User 9 | from .hashtag import Hashtag 10 | 11 | from typing import TYPE_CHECKING, Iterator 12 | 13 | if TYPE_CHECKING: 14 | from ..tiktok import PyTok 15 | 16 | 17 | class Trending: 18 | """Contains static methods related to trending.""" 19 | 20 | parent: PyTok 21 | 22 | @staticmethod 23 | def videos(count=30, **kwargs) -> Iterator[Video]: 24 | """ 25 | Returns Videos that are trending on TikTok. 26 | 27 | - Parameters: 28 | - count (int): The amount of videos you want returned. 29 | """ 30 | 31 | raise NotImplementedError() 32 | -------------------------------------------------------------------------------- /examples/video_example.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from pytok.tiktok import PyTok 5 | 6 | username = 'therock' 7 | id = '7296444945991224622' 8 | 9 | async def main(): 10 | async with PyTok() as api: 11 | video = api.video(username=username, id=id) 12 | 13 | # Bytes of the TikTok video 14 | video_data = await video.info() 15 | related_videos = [] 16 | async for related_video in video.related_videos(): 17 | related_videos.append(related_video) 18 | 19 | with open("out.json", "w") as out_file: 20 | json.dump(video_data, out_file) 21 | 22 | with open("related.json", "w") as out_file: 23 | json.dump(list(related_videos), out_file) 24 | 25 | if __name__ == "__main__": 26 | asyncio.run(main()) 27 | 28 | -------------------------------------------------------------------------------- /examples/network_info_example.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from pytok.tiktok import PyTok 5 | 6 | username = 'therock' 7 | id = '7296444945991224622' 8 | 9 | async def main(): 10 | async with PyTok() as api: 11 | video = api.video(username=username, id=id) 12 | 13 | # Bytes of the TikTok video 14 | video_data = await video.info() 15 | network_data = await video.network_info() 16 | bytes_network_data = await video.bytes_network_info() 17 | 18 | all_data = { 19 | "video_data": video_data, 20 | "network_data": network_data, 21 | "bytes_network_data": bytes_network_data 22 | } 23 | 24 | with open("out.json", "w") as out_file: 25 | json.dump(all_data, out_file) 26 | 27 | if __name__ == "__main__": 28 | asyncio.run(main()) 29 | 30 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from pytok import utils 6 | 7 | @pytest.mark.parametrize("json_file_path", [os.path.join(".", "tests", "data", "20230915-200856_error_videos.json")]) 8 | def test_get_video_df(json_file_path): 9 | csv_file_path = json_file_path.replace(".json", ".csv") 10 | video_df = utils.try_load_video_df_from_file(csv_file_path, file_paths=[json_file_path]) 11 | 12 | assert video_df is not None 13 | assert len(video_df) > 0 14 | 15 | @pytest.mark.parametrize("json_file_path", [os.path.join(".", "tests", "data", "20230915-200856_error_users.json")]) 16 | def test_get_user_df(json_file_path): 17 | csv_file_path = json_file_path.replace(".json", ".csv") 18 | user_df = utils.try_load_user_df_from_file(csv_file_path, file_paths=[json_file_path]) 19 | 20 | assert user_df is not None 21 | assert len(user_df) > 0 22 | 23 | if __name__ == "__main__": 24 | pytest.main([__file__]) -------------------------------------------------------------------------------- /examples/user_example.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import logging 4 | 5 | from pytok.tiktok import PyTok 6 | 7 | # Enable debug logging 8 | logging.basicConfig( 9 | level=logging.DEBUG, 10 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 11 | ) 12 | 13 | async def main(): 14 | users = ['therock'] 15 | async with PyTok(logging_level=logging.DEBUG, manual_captcha_solves=True, log_captcha_solves=True) as api: 16 | for username in users: 17 | user = api.user(username=username) 18 | user_data = await user.info() 19 | 20 | videos = [] 21 | videos_bytes = [] 22 | async for video in user.videos(): 23 | video_data = await video.info() 24 | videos.append(video_data) 25 | 26 | assert len(videos) > 0, "No videos found" 27 | with open("out.json", "w") as f: 28 | json.dump(videos, f) 29 | 30 | if __name__ == "__main__": 31 | asyncio.run(main()) 32 | -------------------------------------------------------------------------------- /pytok/api/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains classes that all represent different types of data sent back by the TikTok servers. 3 | 4 | The files within in module correspond to what type of object is described and all have different methods associated with them. 5 | 6 | 7 | ### How To Interpret TikTok Data 8 | There are quite a few ambigious keys in the JSON that TikTok returns so here's a section that tries to document some of them. 9 | 10 | **Note**: These are incomplete, if you get confused about something feel free to add it here as a PR once you figure it out. 11 | 12 | | JSON Key | Description | 13 | |------------------|-------------| 14 | | createTime | The [unix epoch](https://docs.python.org/3/library/datetime.html#datetime.date.fromtimestamp) of creation, all other time fields are also unix epochs. | 15 | | secUid & (userId or id) | Two different unique attributes that are used in conjunction to reference a specific account, so if you're storing users somewhere in a database, you should store both secUid & userId. | 16 | | id | A unique attribute used to reference a non-user object like video, hashtag, etc | 17 | | diggCount | The likes for a specific video. | 18 | | digged | Used to check if the current user has liked/digged a video, this will always be false since this package doesn't support logged-in user functions. | 19 | """ 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | import os.path 3 | import setuptools 4 | 5 | with open("README.md", "r", encoding="utf-8") as fh: 6 | long_description = fh.read() 7 | 8 | setuptools.setup( 9 | name="pytok", 10 | packages=setuptools.find_packages(), 11 | version="0.0.1", 12 | license="MIT", 13 | description="Playwright based version of The Unofficial TikTok API Wrapper in Python 3.", 14 | author="Ben Steel", 15 | author_email="bendavidsteel@gmail.com", 16 | url="https://github.com/networkdynamics/pytok", 17 | long_description=long_description, 18 | long_description_content_type="text/markdown", 19 | keywords=["tiktok", "python3", "api", "unofficial", "tiktok-api", "tiktok api"], 20 | install_requires=["requests", "playwright", "undetected_playwright", "pyvirtualdisplay", "tqdm", "opencv-python", "brotli", "patchright", "pyclick", "TikTokApi"], 21 | classifiers=[ 22 | "Development Status :: 3 - Alpha", 23 | "Intended Audience :: Developers", 24 | "Topic :: Software Development :: Build Tools", 25 | "License :: OSI Approved :: MIT License", 26 | "Programming Language :: Python :: 3.7", 27 | "Programming Language :: Python :: 3.8", 28 | "Programming Language :: Python :: 3.9", 29 | "Programming Language :: Python :: 3.10", 30 | ], 31 | ) 32 | -------------------------------------------------------------------------------- /pytok/exceptions.py: -------------------------------------------------------------------------------- 1 | class TikTokException(Exception): 2 | """Generic exception that all other TikTok errors are children of.""" 3 | 4 | def __init__(self, *args, **kwargs): 5 | super().__init__(*args, **kwargs) 6 | 7 | 8 | class CaptchaException(TikTokException): 9 | """TikTok is showing captcha""" 10 | 11 | 12 | class NotFoundException(TikTokException): 13 | """TikTok indicated that this object does not exist.""" 14 | 15 | 16 | class EmptyResponseException(TikTokException): 17 | """TikTok sent back an empty response.""" 18 | 19 | 20 | class SoundRemovedException(TikTokException): 21 | """This TikTok sound has no id from being removed by TikTok.""" 22 | 23 | 24 | class InvalidJSONException(TikTokException): 25 | """TikTok returned invalid JSON.""" 26 | 27 | 28 | class NotAvailableException(TikTokException): 29 | """The requested object is not available in this region.""" 30 | 31 | class NoContentException(TikTokException): 32 | """TikTok returned no content""" 33 | 34 | class TimeoutException(TikTokException): 35 | """Timed out trying to get content from TikTok""" 36 | 37 | class ApiFailedException(TikTokException): 38 | """TikTok API is failing""" 39 | 40 | class FewerVideosThanExpectedException(TikTokException): 41 | """TikTok is returning fewer videos for this user than their metadata led us to expect""" 42 | 43 | class AccountPrivateException(TikTokException): 44 | """This TikTok account is private and cannot be scraped""" -------------------------------------------------------------------------------- /tests/test_captcha.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | import os 4 | 5 | import cv2 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | 9 | from pytok import captcha_solver 10 | 11 | def main(): 12 | this_dir_path = os.path.dirname(os.path.realpath(__file__)) 13 | with open(os.path.join(this_dir_path, 'captcha_examples.json'), 'r') as f: 14 | data = json.load(f) 15 | for type, examples in data.items(): 16 | for example in examples: 17 | puzzle_b64 = example['puzzle'].strip("b'") 18 | piece_b64 = example['piece'].strip("b'") 19 | 20 | best_angle = captcha_solver.whirl_solver(puzzle_b64, piece_b64) 21 | puzzle, piece, puzzle_edge, piece_edge = captcha_solver._get_images_and_edges(puzzle_b64, piece_b64) 22 | 23 | solved_puzzle = puzzle.copy() 24 | puzzle_r = (piece.shape[0] / 2) - 1 25 | for y in range(solved_puzzle.shape[1]): 26 | for x in range(solved_puzzle.shape[0]): 27 | if (x - solved_puzzle.shape[0] / 2) ** 2 + (y - solved_puzzle.shape[1] / 2) ** 2 < puzzle_r ** 2: 28 | theta = np.arctan2(y - solved_puzzle.shape[1] / 2, x - solved_puzzle.shape[0] / 2) 29 | theta -= (best_angle / piece_edge.shape[0]) * 2 * np.pi 30 | r = np.sqrt((x - solved_puzzle.shape[0] / 2) ** 2 + (y - solved_puzzle.shape[1] / 2) ** 2) 31 | solved_puzzle[x, y] = piece[int(piece.shape[0] / 2 + r * np.cos(theta)), int(piece.shape[1] / 2 + r * np.sin(theta))] 32 | 33 | matches = np.zeros(puzzle_edge.shape[0]) 34 | for angle in range(puzzle_edge.shape[0]): 35 | match = np.sum(puzzle_edge * np.roll(piece_edge, angle, axis=0)) 36 | matches[angle] = match 37 | 38 | # save the best match 39 | fig, ax = plt.subplots(nrows=7) 40 | ax[0].imshow(puzzle) 41 | ax[1].imshow(piece) 42 | ax[2].imshow(solved_puzzle) 43 | ax[3].imshow(np.repeat(puzzle_edge[np.newaxis, :, :] / 255, 50, axis=0)) 44 | ax[4].imshow(np.repeat(piece_edge[np.newaxis, :, :] / 255, 50, axis=0)) 45 | ax[5].imshow(np.repeat(np.roll(piece_edge / 255, best_angle, axis=0)[np.newaxis, :, :], 50, axis=0)) 46 | ax[6].plot(matches) 47 | plt.show() 48 | 49 | 50 | if __name__ == '__main__': 51 | main() -------------------------------------------------------------------------------- /pytok/helpers.py: -------------------------------------------------------------------------------- 1 | import re 2 | from urllib import parse as url_parsers 3 | 4 | import requests 5 | 6 | from .exceptions import * 7 | 8 | 9 | def extract_tag_contents(html): 10 | if isinstance(html, bytes): 11 | html = html.decode("utf-8") 12 | data_json_match = re.search(r"""")[0] 28 | return j_raw 29 | else: 30 | sigi_json = re.search('