├── tests
├── __init__.py
├── test_user.py
├── test_utils.py
├── test_captcha.py
└── captcha_examples.json
├── examples
├── __init__.py
├── check_tool.py
├── ms_token_example.py
├── hashtag_example.py
├── video_bytes_example .py
├── comments_example.py
├── video_example.py
├── network_info_example.py
└── user_example.py
├── .gitattributes
├── pytok
├── __init__.py
├── api
│ ├── trending.py
│ ├── __init__.py
│ ├── sound.py
│ ├── search.py
│ ├── hashtag.py
│ ├── user.py
│ ├── base.py
│ └── video.py
├── exceptions.py
├── helpers.py
├── tiktok.py
├── captcha_solver.py
└── utils.py
├── requirements.txt
├── .gitignore
├── CITATION.cff
├── setup.py
└── README.md
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/pytok/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. include:: ../README.md
3 | """
4 | __docformat__ = "restructuredtext"
5 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | playwright
3 | pyvirtualdisplay
4 | opencv-python
5 | brotli
6 | pandas
7 | tqdm
8 | patchright
9 | pyclick
10 | TikTokApi
--------------------------------------------------------------------------------
/examples/check_tool.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 |
4 | from pytok.tiktok import PyTok
5 |
6 | async def main():
7 | async with PyTok(browser="chromium") as api:
8 | await api._page.goto("https://www.browserscan.net/")
9 | pass
10 |
11 | if __name__ == "__main__":
12 | asyncio.run(main())
13 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/*
2 | bmp.log
3 | geckodriver.log
4 | server.log
5 | browsermob-proxy/*
6 | myScripts/*
7 | test.py
8 | debug.log
9 | res.html
10 | tmp/*
11 | dist/*
12 | *.egg-info
13 | tmp/
14 | tmp
15 | .pytest_cache/*
16 | test.mp4
17 | test.txt
18 | .pytest_cache/*
19 | tests/__pycache__/*
20 | *.pyc
21 | acrawl.js
22 | test2.py
23 | build
24 | MANIFEST
25 | src
26 | .vscode
27 | .env
28 | tests/data
--------------------------------------------------------------------------------
/examples/ms_token_example.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 |
4 | from pytok.tiktok import PyTok
5 |
6 | async def main():
7 | async with PyTok(headless=True) as api:
8 | user = api.user(username="therock")
9 | # get random user to load page
10 | user_data = await user.info()
11 | ms_tokens = await api.get_ms_tokens()
12 | print(ms_tokens)
13 |
14 | if __name__ == "__main__":
15 | asyncio.run(main())
16 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this software, please cite it as below."
3 | authors:
4 | - family-names: "Steel"
5 | given-names: "Ben"
6 | orcid: "https://orcid.org/0009-0006-3845-1394"
7 | - family-names: "Abrahams"
8 | given-names: "Alexei"
9 | orcid: "https://orcid.org/0000-0002-6547-072X"
10 | title: "PyTok"
11 | version: 0.1.0
12 | doi: 10.5281/zenodo.12802714
13 | date-released: 2024-07-23
14 | url: "https://github.com/networkdynamics/pytok"
--------------------------------------------------------------------------------
/tests/test_user.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import os
3 |
4 | from pytok.tiktok import PyTok
5 |
6 | # username = "brianjordanalvarez"
7 | username = 'marierenaudstab'
8 |
9 |
10 | async def test_user_videos():
11 | async with PyTok(headless=True) as api:
12 | user = api.user(username=username)
13 | user_data = await user.info()
14 | count = 0
15 | async for video in api.user(username=username).videos(count=100):
16 | count += 1
17 |
18 | assert count >= 120
19 |
20 |
21 | if __name__ == '__main__':
22 | asyncio.run(test_user_videos())
--------------------------------------------------------------------------------
/examples/hashtag_example.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 |
4 | from pytok.tiktok import PyTok
5 |
6 | hashtag_name = 'fyp'
7 |
8 | async def main():
9 | async with PyTok(manual_captcha_solves=True) as api:
10 | hashtag = api.hashtag(name=hashtag_name)
11 |
12 | videos = []
13 | async for video in hashtag.videos(count=1000):
14 | video_info = await video.info()
15 | videos.append(video_info)
16 |
17 | with open("out.json", "w") as out_file:
18 | json.dump(videos, out_file)
19 |
20 | if __name__ == "__main__":
21 | asyncio.run(main())
--------------------------------------------------------------------------------
/examples/video_bytes_example .py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 |
4 | from pytok.tiktok import PyTok
5 |
6 | username = 'therock'
7 | id = '7296444945991224622'
8 |
9 | async def main():
10 | async with PyTok() as api:
11 | video = api.video(username=username, id=id)
12 |
13 | # Bytes of the TikTok video
14 | video_data = await video.info()
15 | video_bytes = await video.bytes()
16 |
17 | with open("out.json", "w") as out_file:
18 | json.dump(video_data, out_file)
19 |
20 | with open("out.mp4", "wb") as out_file:
21 | out_file.write(video_bytes)
22 |
23 | if __name__ == "__main__":
24 | asyncio.run(main())
25 |
26 |
--------------------------------------------------------------------------------
/examples/comments_example.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 |
4 | from pytok.tiktok import PyTok
5 |
6 | videos = [
7 | {
8 | 'id': '7058106162235100462',
9 | 'author': {
10 | 'uniqueId': 'charlesmcbryde'
11 | }
12 | }
13 | ]
14 |
15 | async def main():
16 | async with PyTok(headless=False) as api:
17 | for video in videos:
18 | comments = []
19 | async for comment in api.video(id=video['id'], username=video['author']['uniqueId']).comments(count=1000):
20 | comments.append(comment)
21 |
22 | assert len(comments) > 0, "No comments found"
23 | with open("out.json", "w") as f:
24 | json.dump(comments, f)
25 |
26 | if __name__ == "__main__":
27 | asyncio.run(main())
28 |
--------------------------------------------------------------------------------
/pytok/api/trending.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import logging
4 | from urllib.parse import urlencode
5 |
6 | from .video import Video
7 | from .sound import Sound
8 | from .user import User
9 | from .hashtag import Hashtag
10 |
11 | from typing import TYPE_CHECKING, Iterator
12 |
13 | if TYPE_CHECKING:
14 | from ..tiktok import PyTok
15 |
16 |
17 | class Trending:
18 | """Contains static methods related to trending."""
19 |
20 | parent: PyTok
21 |
22 | @staticmethod
23 | def videos(count=30, **kwargs) -> Iterator[Video]:
24 | """
25 | Returns Videos that are trending on TikTok.
26 |
27 | - Parameters:
28 | - count (int): The amount of videos you want returned.
29 | """
30 |
31 | raise NotImplementedError()
32 |
--------------------------------------------------------------------------------
/examples/video_example.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 |
4 | from pytok.tiktok import PyTok
5 |
6 | username = 'therock'
7 | id = '7296444945991224622'
8 |
9 | async def main():
10 | async with PyTok() as api:
11 | video = api.video(username=username, id=id)
12 |
13 | # Bytes of the TikTok video
14 | video_data = await video.info()
15 | related_videos = []
16 | async for related_video in video.related_videos():
17 | related_videos.append(related_video)
18 |
19 | with open("out.json", "w") as out_file:
20 | json.dump(video_data, out_file)
21 |
22 | with open("related.json", "w") as out_file:
23 | json.dump(list(related_videos), out_file)
24 |
25 | if __name__ == "__main__":
26 | asyncio.run(main())
27 |
28 |
--------------------------------------------------------------------------------
/examples/network_info_example.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 |
4 | from pytok.tiktok import PyTok
5 |
6 | username = 'therock'
7 | id = '7296444945991224622'
8 |
9 | async def main():
10 | async with PyTok() as api:
11 | video = api.video(username=username, id=id)
12 |
13 | # Bytes of the TikTok video
14 | video_data = await video.info()
15 | network_data = await video.network_info()
16 | bytes_network_data = await video.bytes_network_info()
17 |
18 | all_data = {
19 | "video_data": video_data,
20 | "network_data": network_data,
21 | "bytes_network_data": bytes_network_data
22 | }
23 |
24 | with open("out.json", "w") as out_file:
25 | json.dump(all_data, out_file)
26 |
27 | if __name__ == "__main__":
28 | asyncio.run(main())
29 |
30 |
--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 |
5 | from pytok import utils
6 |
7 | @pytest.mark.parametrize("json_file_path", [os.path.join(".", "tests", "data", "20230915-200856_error_videos.json")])
8 | def test_get_video_df(json_file_path):
9 | csv_file_path = json_file_path.replace(".json", ".csv")
10 | video_df = utils.try_load_video_df_from_file(csv_file_path, file_paths=[json_file_path])
11 |
12 | assert video_df is not None
13 | assert len(video_df) > 0
14 |
15 | @pytest.mark.parametrize("json_file_path", [os.path.join(".", "tests", "data", "20230915-200856_error_users.json")])
16 | def test_get_user_df(json_file_path):
17 | csv_file_path = json_file_path.replace(".json", ".csv")
18 | user_df = utils.try_load_user_df_from_file(csv_file_path, file_paths=[json_file_path])
19 |
20 | assert user_df is not None
21 | assert len(user_df) > 0
22 |
23 | if __name__ == "__main__":
24 | pytest.main([__file__])
--------------------------------------------------------------------------------
/examples/user_example.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 | import logging
4 |
5 | from pytok.tiktok import PyTok
6 |
7 | # Enable debug logging
8 | logging.basicConfig(
9 | level=logging.DEBUG,
10 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
11 | )
12 |
13 | async def main():
14 | users = ['therock']
15 | async with PyTok(logging_level=logging.DEBUG, manual_captcha_solves=True, log_captcha_solves=True) as api:
16 | for username in users:
17 | user = api.user(username=username)
18 | user_data = await user.info()
19 |
20 | videos = []
21 | videos_bytes = []
22 | async for video in user.videos():
23 | video_data = await video.info()
24 | videos.append(video_data)
25 |
26 | assert len(videos) > 0, "No videos found"
27 | with open("out.json", "w") as f:
28 | json.dump(videos, f)
29 |
30 | if __name__ == "__main__":
31 | asyncio.run(main())
32 |
--------------------------------------------------------------------------------
/pytok/api/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module contains classes that all represent different types of data sent back by the TikTok servers.
3 |
4 | The files within in module correspond to what type of object is described and all have different methods associated with them.
5 |
6 |
7 | ### How To Interpret TikTok Data
8 | There are quite a few ambigious keys in the JSON that TikTok returns so here's a section that tries to document some of them.
9 |
10 | **Note**: These are incomplete, if you get confused about something feel free to add it here as a PR once you figure it out.
11 |
12 | | JSON Key | Description |
13 | |------------------|-------------|
14 | | createTime | The [unix epoch](https://docs.python.org/3/library/datetime.html#datetime.date.fromtimestamp) of creation, all other time fields are also unix epochs. |
15 | | secUid & (userId or id) | Two different unique attributes that are used in conjunction to reference a specific account, so if you're storing users somewhere in a database, you should store both secUid & userId. |
16 | | id | A unique attribute used to reference a non-user object like video, hashtag, etc |
17 | | diggCount | The likes for a specific video. |
18 | | digged | Used to check if the current user has liked/digged a video, this will always be false since this package doesn't support logged-in user functions. |
19 | """
20 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | import os.path
3 | import setuptools
4 |
5 | with open("README.md", "r", encoding="utf-8") as fh:
6 | long_description = fh.read()
7 |
8 | setuptools.setup(
9 | name="pytok",
10 | packages=setuptools.find_packages(),
11 | version="0.0.1",
12 | license="MIT",
13 | description="Playwright based version of The Unofficial TikTok API Wrapper in Python 3.",
14 | author="Ben Steel",
15 | author_email="bendavidsteel@gmail.com",
16 | url="https://github.com/networkdynamics/pytok",
17 | long_description=long_description,
18 | long_description_content_type="text/markdown",
19 | keywords=["tiktok", "python3", "api", "unofficial", "tiktok-api", "tiktok api"],
20 | install_requires=["requests", "playwright", "undetected_playwright", "pyvirtualdisplay", "tqdm", "opencv-python", "brotli", "patchright", "pyclick", "TikTokApi"],
21 | classifiers=[
22 | "Development Status :: 3 - Alpha",
23 | "Intended Audience :: Developers",
24 | "Topic :: Software Development :: Build Tools",
25 | "License :: OSI Approved :: MIT License",
26 | "Programming Language :: Python :: 3.7",
27 | "Programming Language :: Python :: 3.8",
28 | "Programming Language :: Python :: 3.9",
29 | "Programming Language :: Python :: 3.10",
30 | ],
31 | )
32 |
--------------------------------------------------------------------------------
/pytok/exceptions.py:
--------------------------------------------------------------------------------
1 | class TikTokException(Exception):
2 | """Generic exception that all other TikTok errors are children of."""
3 |
4 | def __init__(self, *args, **kwargs):
5 | super().__init__(*args, **kwargs)
6 |
7 |
8 | class CaptchaException(TikTokException):
9 | """TikTok is showing captcha"""
10 |
11 |
12 | class NotFoundException(TikTokException):
13 | """TikTok indicated that this object does not exist."""
14 |
15 |
16 | class EmptyResponseException(TikTokException):
17 | """TikTok sent back an empty response."""
18 |
19 |
20 | class SoundRemovedException(TikTokException):
21 | """This TikTok sound has no id from being removed by TikTok."""
22 |
23 |
24 | class InvalidJSONException(TikTokException):
25 | """TikTok returned invalid JSON."""
26 |
27 |
28 | class NotAvailableException(TikTokException):
29 | """The requested object is not available in this region."""
30 |
31 | class NoContentException(TikTokException):
32 | """TikTok returned no content"""
33 |
34 | class TimeoutException(TikTokException):
35 | """Timed out trying to get content from TikTok"""
36 |
37 | class ApiFailedException(TikTokException):
38 | """TikTok API is failing"""
39 |
40 | class FewerVideosThanExpectedException(TikTokException):
41 | """TikTok is returning fewer videos for this user than their metadata led us to expect"""
42 |
43 | class AccountPrivateException(TikTokException):
44 | """This TikTok account is private and cannot be scraped"""
--------------------------------------------------------------------------------
/tests/test_captcha.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import json
3 | import os
4 |
5 | import cv2
6 | import matplotlib.pyplot as plt
7 | import numpy as np
8 |
9 | from pytok import captcha_solver
10 |
11 | def main():
12 | this_dir_path = os.path.dirname(os.path.realpath(__file__))
13 | with open(os.path.join(this_dir_path, 'captcha_examples.json'), 'r') as f:
14 | data = json.load(f)
15 | for type, examples in data.items():
16 | for example in examples:
17 | puzzle_b64 = example['puzzle'].strip("b'")
18 | piece_b64 = example['piece'].strip("b'")
19 |
20 | best_angle = captcha_solver.whirl_solver(puzzle_b64, piece_b64)
21 | puzzle, piece, puzzle_edge, piece_edge = captcha_solver._get_images_and_edges(puzzle_b64, piece_b64)
22 |
23 | solved_puzzle = puzzle.copy()
24 | puzzle_r = (piece.shape[0] / 2) - 1
25 | for y in range(solved_puzzle.shape[1]):
26 | for x in range(solved_puzzle.shape[0]):
27 | if (x - solved_puzzle.shape[0] / 2) ** 2 + (y - solved_puzzle.shape[1] / 2) ** 2 < puzzle_r ** 2:
28 | theta = np.arctan2(y - solved_puzzle.shape[1] / 2, x - solved_puzzle.shape[0] / 2)
29 | theta -= (best_angle / piece_edge.shape[0]) * 2 * np.pi
30 | r = np.sqrt((x - solved_puzzle.shape[0] / 2) ** 2 + (y - solved_puzzle.shape[1] / 2) ** 2)
31 | solved_puzzle[x, y] = piece[int(piece.shape[0] / 2 + r * np.cos(theta)), int(piece.shape[1] / 2 + r * np.sin(theta))]
32 |
33 | matches = np.zeros(puzzle_edge.shape[0])
34 | for angle in range(puzzle_edge.shape[0]):
35 | match = np.sum(puzzle_edge * np.roll(piece_edge, angle, axis=0))
36 | matches[angle] = match
37 |
38 | # save the best match
39 | fig, ax = plt.subplots(nrows=7)
40 | ax[0].imshow(puzzle)
41 | ax[1].imshow(piece)
42 | ax[2].imshow(solved_puzzle)
43 | ax[3].imshow(np.repeat(puzzle_edge[np.newaxis, :, :] / 255, 50, axis=0))
44 | ax[4].imshow(np.repeat(piece_edge[np.newaxis, :, :] / 255, 50, axis=0))
45 | ax[5].imshow(np.repeat(np.roll(piece_edge / 255, best_angle, axis=0)[np.newaxis, :, :], 50, axis=0))
46 | ax[6].plot(matches)
47 | plt.show()
48 |
49 |
50 | if __name__ == '__main__':
51 | main()
--------------------------------------------------------------------------------
/pytok/helpers.py:
--------------------------------------------------------------------------------
1 | import re
2 | from urllib import parse as url_parsers
3 |
4 | import requests
5 |
6 | from .exceptions import *
7 |
8 |
9 | def extract_tag_contents(html):
10 | if isinstance(html, bytes):
11 | html = html.decode("utf-8")
12 | data_json_match = re.search(r"""")[0]
28 | return j_raw
29 | else:
30 | sigi_json = re.search('