├── .gitattributes
├── .gitignore
├── CITATION.cff
├── README.md
├── examples
    ├── __init__.py
    ├── check_tool.py
    ├── comments_example.py
    ├── hashtag_example.py
    ├── ms_token_example.py
    ├── network_info_example.py
    ├── user_example.py
    └── video_example.py
├── pytok
    ├── __init__.py
    ├── api
    │   ├── __init__.py
    │   ├── base.py
    │   ├── hashtag.py
    │   ├── search.py
    │   ├── sound.py
    │   ├── trending.py
    │   ├── user.py
    │   └── video.py
    ├── captcha_solver.py
    ├── exceptions.py
    ├── helpers.py
    ├── tiktok.py
    └── utils.py
├── requirements.txt
├── setup.py
└── tests
    ├── __init__.py
    ├── captcha_examples.json
    ├── test_captcha.py
    ├── test_user.py
    └── test_utils.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/*
 2 | bmp.log
 3 | geckodriver.log
 4 | server.log
 5 | browsermob-proxy/*
 6 | myScripts/*
 7 | test.py
 8 | debug.log
 9 | res.html
10 | tmp/*
11 | dist/*
12 | *.egg-info
13 | tmp/
14 | tmp
15 | .pytest_cache/*
16 | test.mp4
17 | test.txt
18 | .pytest_cache/*
19 | tests/__pycache__/*
20 | *.pyc
21 | acrawl.js
22 | test2.py
23 | build
24 | MANIFEST
25 | src
26 | .vscode
27 | .env
28 | tests/data


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Steel"
 5 |   given-names: "Ben"
 6 |   orcid: "https://orcid.org/0009-0006-3845-1394"
 7 | - family-names: "Abrahams"
 8 |   given-names: "Alexei"
 9 |   orcid: "https://orcid.org/0000-0002-6547-072X"
10 | title: "PyTok"
11 | version: 0.1.0
12 | doi: 10.5281/zenodo.12802714
13 | date-released: 2024-07-23
14 | url: "https://github.com/networkdynamics/pytok"


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | [![DOI](https://zenodo.org/badge/555492190.svg)](https://zenodo.org/doi/10.5281/zenodo.12802713)
  3 | 
  4 | # pytok
  5 | 
  6 | This is a Playwright based version of David Teacher's unofficial api wrapper for TikTok.com in python. It re-implements a currently limited set of the features of the original library, with a shifted focus on using browser automation to allow automatic captcha solves with a hopefully minor trade-off in performance.
  7 | 
  8 | ## Installation
  9 | 
 10 | ```bash
 11 | pip install git+https://github.com/networkdynamics/pytok.git@master
 12 | ```
 13 | 
 14 | ## Quick Start Guide
 15 | 
 16 | Here's a quick bit of code to get the videos from a particular hashtag on TikTok. There's more examples in the [examples](https://github.com/networkdynamics/pytok/tree/master/examples) directory.
 17 | 
 18 | ```py
 19 | import asyncio
 20 | 
 21 | from pytok.tiktok import PyTok
 22 | 
 23 | async def main():
 24 |     async with PyTok() as api:
 25 |         user = api.user(username="therock")
 26 |         user_data = await user.info()
 27 |         print(user_data)
 28 | 
 29 |         videos = []
 30 |         async for video in user.videos():
 31 |             video_data = video.info()
 32 |             print(video_data)
 33 | 
 34 | if __name__ == "__main__":
 35 |     asyncio.run(main())
 36 | ```
 37 | 
 38 | 
 39 | Please note pulling data from TikTok takes a while! We recommend leaving the scripts running on a server for a while for them to finish downloading everything. Feel free to play around with the delay constants to either speed up the process or avoid TikTok rate limiting, like so: `PyTok(request_delay=10)`
 40 | 
 41 | Please do not hesitate to make an issue in this repo to get our help with this!
 42 | 
 43 | ## Citation
 44 | 
 45 | If you use this library in your research, please cite it using the following BibTeX entry:
 46 | 
 47 | ```bibtex
 48 | @software{ben_steel_2024_12802714,
 49 |   author       = {Ben Steel and
 50 |                   Alexei Abrahams},
 51 |   title        = {{networkdynamics/pytok: Initial working version of 
 52 |                    library}},
 53 |   month        = jul,
 54 |   year         = 2024,
 55 |   publisher    = {Zenodo},
 56 |   version      = {v0.1.0},
 57 |   doi          = {10.5281/zenodo.12802714},
 58 |   url          = {https://doi.org/10.5281/zenodo.12802714}
 59 | }
 60 | ```
 61 | 
 62 | ## Format and Schema
 63 | 
 64 | The JSONable dictionary returned by the `info()` methods contains all of the data that the TikTok API returns. We have provided helper functions to parse that data into Pandas DataFrames, `utils.get_comment_df()`, `utils.get_video_df()` and `utils.get_user_df()` for the data from comments, videos, and users respectively.
 65 | 
 66 | The video dataframe will contain the following columns:
 67 | |Field name | Description |
 68 | |----------|----------|
 69 | |`video_id`| Unique video ID |
 70 | |`createtime`| UTC datetime of video creation time in YYYY-MM-DD HH:MM:SS format |
 71 | |`author_name`| Unique author name |
 72 | |`author_id`| Unique author ID |
 73 | |`desc`| The full video description from the author |
 74 | |`hashtags`| A list of hashtags used in the video description |
 75 | |`share_video_id`| If the video is sharing another video, this is the video ID of that original video, else empty |
 76 | |`share_video_user_id`| If the video is sharing another video, this the user ID of the author of that video, else empty |
 77 | |`share_video_user_name`| If the video is sharing another video, this is the user name of the author of that video, else empty |
 78 | |`share_type`| If the video is sharing another video, this is the type of the share, stitch, duet etc. |
 79 | |`mentions`| A list of users mentioned in the video description, if any |
 80 | |`digg_count`| The number of likes on the video |
 81 | |`share_count`| The number of times the video was shared |
 82 | |`comment_count`| The number of comments on the video |
 83 | |`play_count`| The number of times the video was played |
 84 | 
 85 | The comment dataframe will contain the following columns:
 86 | |Field name | Description |
 87 | |----------|-----------|
 88 | |`comment_id`| Unique comment ID |
 89 | |`createtime`| UTC datetime of comment creation time in YYYY-MM-DD HH:MM:SS format |
 90 | |`author_name`| Unique author name |
 91 | |`author_id`| Unique author ID |
 92 | |`text`| Text of the comment |
 93 | |`mentions`| A list of users that are tagged in the comment |
 94 | |`video_id`| The ID of the video the comment is on |
 95 | |`comment_language`| The language of the comment, as predicted by the TikTok API |
 96 | |`digg_count`| The number of likes the comment got |
 97 | |`reply_comment_id`| If the comment is replying to another comment, this is the ID of that comment |
 98 | 
 99 | The user dataframe will contain the following columns:
100 | |Field name | Description |
101 | |----------|-----------|
102 | |`id`| Unique author ID |
103 | |`unique_id`| Unique user name |
104 | |`nickname`| Display user name, changeable |
105 | |`signature`| Short user description |
106 | |`verified`| Whether or not the user is verified |
107 | |`num_following`| How many other accounts the user is following |
108 | |`num_followers`| How many followers the user has |
109 | |`num_videos`| How many videos the user has made |
110 | |`num_likes`| How many total likes the user has had |
111 | |`createtime`| When the user account was made. This is derived from the `id` field, and can occasionally be incorrect with a very low unix epoch such as 1971 |
112 | 
113 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/networkdynamics/pytok/c1b8704be711f647d2a222bfeef9ea2b6a325375/examples/__init__.py


--------------------------------------------------------------------------------
/examples/check_tool.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from pytok.tiktok import PyTok
 5 | 
 6 | async def main():
 7 |     async with PyTok(browser="chromium") as api:
 8 |         await api._page.goto("https://www.browserscan.net/")
 9 |         pass
10 | 
11 | if __name__ == "__main__":
12 |     asyncio.run(main())
13 | 


--------------------------------------------------------------------------------
/examples/comments_example.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from pytok.tiktok import PyTok
 5 | 
 6 | videos = [
 7 |     {
 8 |         'id': '7058106162235100462',
 9 |         'author': {
10 |             'uniqueId': 'charlesmcbryde'
11 |         }
12 |     }
13 | ]
14 | 
15 | async def main():
16 |     async with PyTok(headless=False) as api:
17 |         for video in videos:
18 |             comments = []
19 |             async for comment in api.video(id=video['id'], username=video['author']['uniqueId']).comments(count=1000):
20 |                 comments.append(comment)
21 | 
22 |             assert len(comments) > 0, "No comments found"
23 |             with open("out.json", "w") as f:
24 |                 json.dump(comments, f)
25 | 
26 | if __name__ == "__main__":
27 |     asyncio.run(main())
28 | 


--------------------------------------------------------------------------------
/examples/hashtag_example.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from pytok.tiktok import PyTok
 5 | 
 6 | hashtag_name = 'fyp'
 7 | 
 8 | async def main():
 9 |     async with PyTok(manual_captcha_solves=True) as api:
10 |         hashtag = api.hashtag(name=hashtag_name)
11 | 
12 |         videos = []
13 |         async for video in hashtag.videos(count=1000):
14 |             video_info = await video.info()
15 |             videos.append(video_info)
16 | 
17 |         with open("out.json", "w") as out_file:
18 |             json.dump(videos, out_file)
19 | 
20 | if __name__ == "__main__":
21 |     asyncio.run(main())


--------------------------------------------------------------------------------
/examples/ms_token_example.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from pytok.tiktok import PyTok
 5 | 
 6 | async def main():
 7 |     async with PyTok(headless=True) as api:
 8 |         user = api.user(username="therock")
 9 |         # get random user to load page
10 |         user_data = await user.info()
11 |         ms_tokens = await api.get_ms_tokens()
12 |         print(ms_tokens)
13 | 
14 | if __name__ == "__main__":
15 |     asyncio.run(main())
16 | 


--------------------------------------------------------------------------------
/examples/network_info_example.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from pytok.tiktok import PyTok
 5 | 
 6 | username = 'therock'
 7 | id = '7296444945991224622'
 8 | 
 9 | async def main():
10 |     async with PyTok() as api:
11 |         video = api.video(username=username, id=id)
12 | 
13 |         # Bytes of the TikTok video
14 |         video_data = await video.info()
15 |         network_data = await video.network_info()
16 |         bytes_network_data = await video.bytes_network_info()
17 | 
18 |         all_data = {
19 |             "video_data": video_data,
20 |             "network_data": network_data,
21 |             "bytes_network_data": bytes_network_data
22 |         }
23 | 
24 |         with open("out.json", "w") as out_file:
25 |             json.dump(all_data, out_file)
26 | 
27 | if __name__ == "__main__":
28 |     asyncio.run(main())
29 | 
30 | 


--------------------------------------------------------------------------------
/examples/user_example.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from pytok.tiktok import PyTok
 5 | 
 6 | async def main():
 7 |     users = ['therock']
 8 |     async with PyTok(manual_captcha_solves=True, log_captcha_solves=True) as api:
 9 |         for username in users:
10 |             user = api.user(username=username)
11 |             user_data = await user.info()
12 | 
13 |             videos = []
14 |             videos_bytes = []
15 |             async for video in user.videos():
16 |                 video_data = await video.info()
17 |                 videos.append(video_data)
18 | 
19 |             assert len(videos) > 0, "No videos found"
20 |             with open("out.json", "w") as f:
21 |                 json.dump(videos, f)
22 | 
23 | if __name__ == "__main__":
24 |     asyncio.run(main())
25 | 


--------------------------------------------------------------------------------
/examples/video_example.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from pytok.tiktok import PyTok
 5 | 
 6 | username = 'therock'
 7 | id = '7296444945991224622'
 8 | 
 9 | async def main():
10 |     async with PyTok() as api:
11 |         video = api.video(username=username, id=id)
12 | 
13 |         # Bytes of the TikTok video
14 |         video_data = await video.info()
15 |         related_videos = []
16 |         async for related_video in video.related_videos():
17 |             related_videos.append(related_video)
18 |         video_bytes = await video.bytes()
19 | 
20 |         with open("out.json", "w") as out_file:
21 |             json.dump(video_data, out_file)
22 | 
23 |         with open("related.json", "w") as out_file:
24 |             json.dump(list(related_videos), out_file)
25 | 
26 |         with open("out.mp4", "wb") as out_file:
27 |             out_file.write(video_bytes)
28 | 
29 | if __name__ == "__main__":
30 |     asyncio.run(main())
31 | 
32 | 


--------------------------------------------------------------------------------
/pytok/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. include:: ../README.md
3 | """
4 | __docformat__ = "restructuredtext"
5 | 


--------------------------------------------------------------------------------
/pytok/api/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module contains classes that all represent different types of data sent back by the TikTok servers.
 3 | 
 4 | The files within in module correspond to what type of object is described and all have different methods associated with them.
 5 | 
 6 | 
 7 | ### How To Interpret TikTok Data
 8 | There are quite a few ambigious keys in the JSON that TikTok returns so here's a section that tries to document some of them.
 9 | 
10 | **Note**: These are incomplete, if you get confused about something feel free to add it here as a PR once you figure it out.
11 | 
12 | | JSON Key         | Description |
13 | |------------------|-------------|
14 | | createTime | The [unix epoch](https://docs.python.org/3/library/datetime.html#datetime.date.fromtimestamp) of creation, all other time fields are also unix epochs.  |
15 | | secUid & (userId or id) | Two different unique attributes that are used in conjunction to reference a specific account, so if you're storing users somewhere in a database, you should store both secUid & userId. |
16 | | id | A unique attribute used to reference a non-user object like video, hashtag, etc |
17 | | diggCount | The likes for a specific video. |
18 | | digged | Used to check if the current user has liked/digged a video, this will always be false since this package doesn't support logged-in user functions. |
19 | """
20 | 


--------------------------------------------------------------------------------
/pytok/api/base.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from datetime import datetime
  3 | import random
  4 | 
  5 | from patchright.async_api import expect, Page
  6 | 
  7 | from .. import exceptions, captcha_solver
  8 | 
  9 | TOK_DELAY = 20
 10 | CAPTCHA_DELAY = 999999
 11 | 
 12 | 
 13 | def get_login_close_element(page):
 14 |     return page.get_by_text("Continue as guest", exact=True) \
 15 |         .or_(page.get_by_text("Continue without login", exact=True))
 16 | 
 17 | 
 18 | def get_captcha_element(page):
 19 |     return page.locator('Rotate the shapes') \
 20 |         .or_(page.get_by_text('Verify to continue:', exact=True)) \
 21 |         .or_(page.get_by_text('Click on the shapes with the same size', exact=True)) \
 22 |         .or_(page.get_by_text('Drag the slider to fit the puzzle', exact=True).first)
 23 | 
 24 | 
 25 | class Base:
 26 | 
 27 |     async def check_initial_call(self, url):
 28 |         event = await self.wait_for_requests(url)
 29 |         response = await event.value.response()
 30 |         if response.status >= 300:
 31 |             raise exceptions.NotAvailableException("Content is not available")
 32 | 
 33 |     async def wait_for_content_or_captcha(self, content_tag):
 34 |         page = self.parent._page
 35 | 
 36 |         content_element = page.locator(content_tag).first
 37 |         # content_element = page.get_by_text('Videos', exact=True)
 38 |         captcha_element = get_captcha_element(page)
 39 | 
 40 |         try:
 41 |             await expect(content_element.or_(captcha_element)).to_be_visible(timeout=TOK_DELAY * 1000)
 42 | 
 43 |         except TimeoutError as e:
 44 |             raise exceptions.TimeoutException(str(e))
 45 | 
 46 |         captcha_visible = await captcha_element.is_visible()
 47 |         if captcha_visible:
 48 |             await self.solve_captcha()
 49 |             asyncio.sleep(1)
 50 |             await expect(content_element).to_be_visible(timeout=TOK_DELAY * 1000)
 51 | 
 52 |         return content_element
 53 |     
 54 |     async def wait_for_content_or_unavailable(self, content_tag, unavailable_text, no_content_text=None):
 55 |         page: Page = self.parent._page
 56 |         content_element = page.locator(content_tag).first
 57 |         captcha_element = get_captcha_element(page)
 58 |         unavailable_element = page.get_by_text(unavailable_text, exact=True)
 59 | 
 60 |         # try:
 61 |         expected_elements = content_element.or_(captcha_element).or_(unavailable_element)
 62 | 
 63 |         def add_no_content_text(expected_es, text):
 64 |             if no_content_text:
 65 |                 if isinstance(no_content_text, list):
 66 |                     for text in no_content_text:
 67 |                         expected_es = expected_es.or_(page.get_by_text(text, exact=True))
 68 |                 elif isinstance(no_content_text, str):
 69 |                     expected_es = expected_es.or_(page.get_by_text(no_content_text, exact=True))
 70 |             return expected_es
 71 |         expected_elements = add_no_content_text(expected_elements, no_content_text)
 72 | 
 73 |         await self.check_and_resolve_refresh_button()
 74 |         await self.check_and_resolve_login_popup()
 75 | 
 76 |         self.parent.logger.debug(f"Checking for '{unavailable_text}'")
 77 |         if await unavailable_element.is_visible():
 78 |             raise exceptions.NotAvailableException(f"Content is not available with message: '{unavailable_text}'")
 79 |         
 80 |         if no_content_text:
 81 |             if isinstance(no_content_text, list):
 82 |                 for text in no_content_text:
 83 |                     no_content_element = page.get_by_text(text, exact=True)
 84 |                     if await no_content_element.is_visible():
 85 |                         raise exceptions.NoContentException(f"Content is not available with message: '{text}'")
 86 |                     else:
 87 |                         self.parent.logger.debug(f"Could not find text '{text}'")
 88 |             elif isinstance(no_content_text, str):
 89 |                 no_content_element = page.get_by_text(no_content_text, exact=True)
 90 |                 if await no_content_element.is_visible():
 91 |                     raise exceptions.NoContentException(f"Content is not available with message: '{no_content_text}'")
 92 |                 else:
 93 |                     self.parent.logger.debug(f"Could not find text '{text}'")
 94 | 
 95 |         max_tries = 10
 96 |         tries = 0
 97 |         self.parent.logger.debug("Waiting for main content to become visible")
 98 |         while not (await content_element.is_visible()) and tries < max_tries:
 99 |             await asyncio.sleep(0.5)
100 |             await self.check_and_resolve_refresh_button()
101 |             tries += 1
102 |         
103 |         if tries >= max_tries:
104 |             # try some other behaviour
105 |             url = page.url
106 |             await page.goto("https://www.tiktok.com")
107 |             await asyncio.sleep(5)
108 |             await page.goto(url)
109 | 
110 |         return content_element
111 | 
112 |     async def check_and_resolve_refresh_button(self):
113 |         page: Page = self.parent._page
114 |         refresh_button = page.get_by_text('Refresh')
115 |         self.parent.logger.debug("Checking for refresh button")
116 |         if await refresh_button.is_visible():
117 |             self.parent.logger.debug("Refresh button found, clicking")
118 |             await refresh_button.click()
119 |             await asyncio.sleep(1)
120 | 
121 |     async def check_and_resolve_login_popup(self):
122 |         page: Page = self.parent._page
123 |         login_popup = page.get_by_text('Log in to TikTok')
124 |         self.parent.logger.debug("Checking for login to TikTok pop up")
125 |         if await login_popup.is_visible():
126 |             self.parent.logger.debug("Login prompt found, checking for close button")
127 |             login_close = page.locator('[data-e2e="modal-close-inner-button"]')
128 |             if await login_close.is_visible():
129 |                 await login_close.click()
130 |                 await asyncio.sleep(1)
131 |             else:
132 |                 raise exceptions.NotAvailableException(f"Content is not available with message: 'Log in to TikTok'")
133 | 
134 | 
135 |     async def wait_for_content_or_unavailable_or_captcha(self, content_tag, unavailable_text, no_content_text=None):
136 |         page: Page = self.parent._page
137 |         content_element = page.locator(content_tag).first
138 |         captcha_element = get_captcha_element(page)
139 |         unavailable_element = page.get_by_text(unavailable_text, exact=True)
140 | 
141 |         # try:
142 |         expected_elements = content_element.or_(captcha_element).or_(unavailable_element)
143 | 
144 |         def add_no_content_text(expected_es, text):
145 |             if no_content_text:
146 |                 if isinstance(no_content_text, list):
147 |                     for text in no_content_text:
148 |                         expected_es = expected_es.or_(page.get_by_text(text, exact=True))
149 |                 elif isinstance(no_content_text, str):
150 |                     expected_es = expected_es.or_(page.get_by_text(no_content_text, exact=True))
151 |             return expected_es
152 |         expected_elements = add_no_content_text(expected_elements, no_content_text)
153 | 
154 |         await self.check_and_resolve_refresh_button()
155 |         await self.check_and_resolve_login_popup()
156 | 
157 |         # await expect(expected_elements).to_be_visible(
158 |         #     timeout=TOK_DELAY * 1000)
159 | 
160 |         self.parent.logger.debug("Checking for captcha")
161 |         if await captcha_element.is_visible():
162 |             self.parent.logger.debug("Captcha found")
163 |             await self.solve_captcha()
164 |             await asyncio.sleep(1)
165 |             if await captcha_element.is_visible():
166 |                 raise exceptions.CaptchaException("Captcha is still visible after solving")
167 |             expected_elements = content_element.or_(unavailable_element)
168 |             expected_elements = add_no_content_text(expected_elements, no_content_text)
169 |             await expect(expected_elements).to_be_visible(
170 |                 timeout=TOK_DELAY * 1000)  # waits TOK_DELAY seconds and launches new browser instance
171 | 
172 |         # check after resolving captcha
173 |         await self.check_and_resolve_refresh_button()
174 |         await self.check_and_resolve_login_popup()
175 | 
176 |         self.parent.logger.debug(f"Checking for '{unavailable_text}'")
177 |         if await unavailable_element.is_visible():
178 |             raise exceptions.NotAvailableException(f"Content is not available with message: '{unavailable_text}'")
179 |         
180 |         if no_content_text:
181 |             if isinstance(no_content_text, list):
182 |                 for text in no_content_text:
183 |                     no_content_element = page.get_by_text(text, exact=True)
184 |                     if await no_content_element.is_visible():
185 |                         raise exceptions.NoContentException(f"Content is not available with message: '{text}'")
186 |                     else:
187 |                         self.parent.logger.debug(f"Could not find text '{text}'")
188 |             elif isinstance(no_content_text, str):
189 |                 no_content_element = page.get_by_text(no_content_text, exact=True)
190 |                 if await no_content_element.is_visible():
191 |                     raise exceptions.NoContentException(f"Content is not available with message: '{no_content_text}'")
192 |                 else:
193 |                     self.parent.logger.debug(f"Could not find text '{text}'")
194 | 
195 |         max_tries = 10
196 |         tries = 0
197 |         self.parent.logger.debug("Waiting for main content to become visible")
198 |         content_is_visible = await content_element.is_visible()
199 |         while not content_is_visible and tries < max_tries:
200 |             await asyncio.sleep(1)
201 |             await self.check_and_resolve_refresh_button()
202 |             tries += 1
203 |             content_is_visible = await content_element.is_visible()
204 |         
205 |         if tries >= max_tries:
206 |             pass
207 |             # raise exceptions.TimeoutException(f"Content is not available for unknown reason")
208 | 
209 |         return content_element
210 | 
211 |     async def check_for_unavailable_or_captcha(self, unavailable_text):
212 |         page = self.parent._page
213 |         captcha_element = get_captcha_element(page)
214 |         unavailable_element = page.get_by_text(unavailable_text, exact=True)
215 | 
216 |         captcha_visible = await captcha_element.is_visible()
217 |         if captcha_visible:
218 |             num_tries = 0
219 |             max_tries = 3
220 |             captcha_exceptions = []
221 |             while num_tries < max_tries:
222 |                 num_tries += 1
223 |                 try:
224 |                     await self.solve_captcha()
225 |                     await asyncio.sleep(1)
226 |                     captcha_is_visible = await captcha_element.is_visible()
227 |                     if captcha_is_visible:
228 |                         captcha_exceptions.append(exceptions.CaptchaException("Captcha is still visible after solving"))
229 |                         continue
230 |                     else:
231 |                         break
232 |                 except Exception as e:
233 |                     captcha_exceptions.append(e)
234 |             else:
235 |                 print(
236 |                     f"Failed to solve captcha after {max_tries} tries with errors: {captcha_exceptions}, continuing anyway...")
237 | 
238 |         login_element = get_login_close_element(page)
239 |         login_visible = await login_element.is_visible()
240 |         if login_visible:
241 |             try:
242 |                 login_close = get_login_close_element(page)
243 |                 login_close_visible = await login_close.is_visible()
244 |                 if login_close_visible:
245 |                     await login_close.click()
246 |             except Exception as e:
247 |                 print(f"Failed to close login with error: {e}, continuing anyway...")
248 | 
249 |         if await unavailable_element.is_visible():
250 |             raise exceptions.NotAvailableException(f"Content is not available with message: '{unavailable_text}'")
251 | 
252 |     async def check_for_unavailable(self, unavailable_text):
253 |         page = self.parent._page
254 |         unavailable_element = page.get_by_text(unavailable_text, exact=True)
255 |         if await unavailable_element.is_visible():
256 |             raise exceptions.NotAvailableException(f"Content is not available with message: '{unavailable_text}'")
257 | 
258 |     async def check_for_reload_button(self):
259 |         page = self.parent._page
260 |         reload_button = page.get_by_text('Refresh', exact=True)
261 |         if await reload_button.is_visible():
262 |             await reload_button.click()
263 | 
264 |     async def wait_for_requests(self, api_path, timeout=TOK_DELAY):
265 |         page = self.parent._page
266 |         try:
267 |             async with page.expect_request(api_path, timeout=timeout * 1000) as first:
268 |                 return await first.value
269 |         except Exception as e:
270 |             raise exceptions.TimeoutException(str(e))
271 | 
272 |     def get_requests(self, api_path):
273 |         """searches a list of all requests thus far issued by the Playwright browser instance"""
274 |         return [request for request in self.parent._requests if api_path in request.url]
275 | 
276 |     def get_responses(self, api_path):
277 |         return [response for response in self.parent._responses if api_path in response.url]
278 | 
279 |     async def get_response_body(self, response):
280 |         return await response.body()
281 | 
282 |     async def scroll_to_bottom(self, speed=4):
283 |         page = self.parent._page
284 |         current_scroll_position = await page.evaluate(
285 |             "() => document.documentElement.scrollTop || document.body.scrollTop;")
286 |         new_height = current_scroll_position + 1
287 |         while current_scroll_position <= new_height:
288 |             current_scroll_position += speed + random.randint(-speed, speed)
289 |             await page.evaluate(f"() => window.scrollTo(0, {current_scroll_position});")
290 |             new_height = await page.evaluate("() => document.body.scrollHeight;")
291 | 
292 |     async def scroll_to(self, position, speed=5):
293 |         page = self.parent._page
294 |         current_scroll_position = await page.evaluate(
295 |             "() => document.documentElement.scrollTop || document.body.scrollTop;")
296 |         new_height = current_scroll_position + 1
297 |         while current_scroll_position <= new_height:
298 |             current_scroll_position += speed + random.randint(-speed, speed)
299 |             await page.evaluate(f"() => window.scrollTo(0, {current_scroll_position});")
300 |             new_height = await page.evaluate("() => document.body.scrollHeight;")
301 |             if current_scroll_position > position:
302 |                 break
303 | 
304 |     async def slight_scroll_up(self, speed=4):
305 |         page = self.parent._page
306 |         desired_scroll = -500
307 |         current_scroll = 0
308 |         while current_scroll > desired_scroll:
309 |             current_scroll -= speed + random.randint(-speed, speed)
310 |             await page.evaluate(f"() => window.scrollBy(0, {-speed});")
311 | 
312 |     async def scroll_down(self, amount, speed=4):
313 |         page = self.parent._page
314 |         
315 |         current_scroll_position = await page.evaluate(
316 |             "() => document.documentElement.scrollTop || document.body.scrollTop;")
317 |         desired_position = current_scroll_position + amount
318 |         while current_scroll_position < desired_position:
319 |             scroll_amount = speed + random.randint(-speed, speed) * 0.5
320 |             await page.evaluate(f"() => window.scrollBy(0, {scroll_amount});")
321 |             new_scroll_position = await page.evaluate(
322 |             "() => document.documentElement.scrollTop || document.body.scrollTop;")
323 |             if new_scroll_position > current_scroll_position:
324 |                 current_scroll_position = new_scroll_position
325 |             else:
326 |                 # we hit the bottom
327 |                 break
328 | 
329 |     async def wait_until_not_skeleton_or_captcha(self, skeleton_tag):
330 |         page = self.parent._page
331 |         content = page.locator(f'[data-e2e={skeleton_tag}]')
332 |         try:
333 |             await expect(content).not_to_be_visible()
334 |         except TimeoutError as e:
335 |             captcha_element = get_captcha_element(page)
336 |             if await captcha_element.is_visible():
337 |                 await self.solve_captcha()
338 |                 asyncio.sleep(1)
339 |             else:
340 |                 raise exceptions.TimeoutException(str(e))
341 | 
342 |     async def check_and_wait_for_captcha(self):
343 |         page = self.parent._page
344 |         captcha_element = get_captcha_element(page)
345 |         captcha_visible = await captcha_element.is_visible()
346 |         if captcha_visible:
347 |             await self.solve_captcha()
348 |             await asyncio.sleep(1)
349 | 
350 |     async def check_and_close_signin(self):
351 |         page = self.parent._page
352 |         signin_element = get_login_close_element(page)
353 |         signin_visible = await signin_element.is_visible()
354 |         if signin_visible:
355 |             await signin_element.click()
356 | 
357 |     async def solve_captcha(self):
358 |         if self.parent._manual_captcha_solves:
359 |             input("Press Enter to continue after solving CAPTCHA:")
360 |             await asyncio.sleep(1)
361 |             if self.parent._log_captcha_solves:
362 |                 request = self.get_requests('/captcha/verify')[0]
363 |                 body = request.post_data
364 |                 with open(f"manual_captcha_{datetime.now().isoformat()}.json", "w") as f:
365 |                     f.write(body)
366 |             return
367 |         """
368 |         this method not only calculates the CAPTCHA solution but also POSTs it to TikTok's server.
369 |         """
370 |         # get captcha data
371 |         request = self.get_requests('/captcha/get')[0]
372 |         captcha_response = await request.response()
373 |         if captcha_response is not None:
374 |             captcha_json = await captcha_response.json()
375 |         else:
376 |             raise exceptions.EmptyResponseException
377 | 
378 |         if 'mode' in captcha_json['data']:
379 |             captcha_data = captcha_json['data']
380 |         elif 'challenges' in captcha_json['data']:
381 |             captcha_data = captcha_json['data']['challenges'][0]
382 |         captcha_type = captcha_data['mode']
383 |         if captcha_type not in ['slide', 'whirl']:
384 |             raise exceptions.CaptchaException(f"Unsupported captcha type: {captcha_type}")
385 | 
386 |         """
387 |         captcha_data['question']['url1'] is a URL from TikTok's content delivery network. If you copy-paste it into your
388 |         web browser, you should GET the puzzle image. puzzle_response is the full response from the server, and
389 |         puzzle is the image itself, returned as a sequence of bytes.
390 |         """
391 |         puzzle_req = self.get_requests(captcha_data['question']['url1'])[0]
392 |         puzzle_response = await puzzle_req.response()
393 |         puzzle = await puzzle_response.body()
394 | 
395 |         if not puzzle:
396 |             raise exceptions.CaptchaException("Puzzle was not found in response")
397 | 
398 |         """
399 |         captcha_data['question']['url2'] is a URL from TikTok's content delivery network. If you copy-paste it into your
400 |         web browser, you should GET the puzzle piece that has to be moved to the correct position in the puzzle. 
401 |         piece_response: the full Playwright/HTTP response object
402 |         piece: the image of the puzzle piece, returned as a sequence of bytes
403 |         """
404 |         piece_req = self.get_requests(captcha_data['question']['url2'])[0]
405 |         piece_response = await piece_req.response()
406 |         piece = await piece_response.body()
407 | 
408 |         if not piece:
409 |             raise exceptions.CaptchaException("Piece was not found in response")
410 | 
411 |         """
412 |         -at this point in the code you have the puzzle image (puzzle) and the piece image (piece)
413 |         -now a local CAPTCHA solver will decide how to place the piece in the puzzle
414 |         -finally, the solution will be POSTed to TikTok, and the server's response will be obtained
415 |         """
416 |         solve = await captcha_solver.CaptchaSolver(captcha_response, puzzle, piece).solve_captcha()
417 | 
418 |         page = self.parent._page
419 |         drag = page.locator('css=div.secsdk-captcha-drag-icon').first
420 |         bar = page.locator('css=div.captcha_verify_slide--slidebar').first
421 |         
422 |         drag_bounding_box = await drag.bounding_box()
423 |         bar_bounding_box = await bar.bounding_box()
424 | 
425 |         drag_centre = {
426 |             'x': drag_bounding_box['x'] + drag_bounding_box['width'] / 2,
427 |             'y': drag_bounding_box['y'] + drag_bounding_box['height'] / 2
428 |         }
429 | 
430 |         bar_effective_width = bar_bounding_box['width'] - drag_bounding_box['width']
431 |         distance_to_drag = bar_effective_width * solve['maxloc']
432 | 
433 |         from pyclick import HumanCurve
434 | 
435 |         curve_kwargs = {
436 |             'knotsCount': 7, 
437 |             'distortionMean': 14.3, 
438 |             'distortionStdev': 22.7, 
439 |             'distortionFrequency': 0.8, 
440 |             'targetPoints': 500
441 |         }
442 |         points = HumanCurve(
443 |             [0, 0], 
444 |             [int(drag_centre['x']), int(drag_centre['y'])],
445 |             **curve_kwargs
446 |         ).points
447 |         for point in points:
448 |             await page.mouse.move(point[0], point[1])
449 |         await page.mouse.down()
450 |         points = HumanCurve(
451 |             [int(drag_centre['x']), int(drag_centre['y'])], 
452 |             [int(drag_centre['x'] + distance_to_drag), int(drag_centre['y'])],
453 |             **curve_kwargs
454 |         ).points
455 |         for point in points:
456 |             await page.mouse.move(point[0], point[1])
457 |         await page.mouse.up()
458 | 
459 |         if self.parent._log_captcha_solves:
460 |             await asyncio.sleep(1)
461 |             request = self.get_requests('/captcha/verify')[0]
462 |             body = request.post_data
463 |             with open(f"automated_captcha_{datetime.now().isoformat()}.json", "w") as f:
464 |                 f.write(body)
465 | 
466 | 


--------------------------------------------------------------------------------
/pytok/api/hashtag.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | import urllib.parse
  5 | 
  6 | from typing import TYPE_CHECKING, ClassVar, Iterator, Optional
  7 | 
  8 | import requests
  9 | 
 10 | if TYPE_CHECKING:
 11 |     from ..tiktok import PyTok
 12 |     from .video import Video
 13 | 
 14 | from .base import Base
 15 | from ..helpers import edit_url, extract_tag_contents
 16 | from ..exceptions import *
 17 | 
 18 | 
 19 | class Hashtag(Base):
 20 |     """
 21 |     A TikTok Hashtag/Challenge.
 22 | 
 23 |     Example Usage
 24 |     ```py
 25 |     hashtag = api.hashtag(name='funny')
 26 |     ```
 27 |     """
 28 | 
 29 |     parent: ClassVar[PyTok]
 30 | 
 31 |     id: Optional[str]
 32 |     """The ID of the hashtag"""
 33 |     name: Optional[str]
 34 |     """The name of the hashtag (omiting the #)"""
 35 |     as_dict: dict
 36 |     """The raw data associated with this hashtag."""
 37 | 
 38 |     def __init__(
 39 |         self,
 40 |         name: Optional[str] = None,
 41 |         id: Optional[str] = None,
 42 |         data: Optional[dict] = None,
 43 |     ):
 44 |         """
 45 |         You must provide the name or id of the hashtag.
 46 |         """
 47 |         self.name = name
 48 |         self.id = id
 49 | 
 50 |         if data is not None:
 51 |             self.as_dict = data
 52 |             self.__extract_from_data()
 53 |         else:
 54 |             self.as_dict = None
 55 | 
 56 |     async def info(self, **kwargs) -> dict:
 57 |         """
 58 |         Returns TikTok's dictionary representation of the hashtag object.
 59 |         """
 60 |         if self.as_dict is None:
 61 |             return await self.info_full(**kwargs)
 62 |         return self.as_dict
 63 | 
 64 |     async def info_full(self, **kwargs) -> dict:
 65 |         """
 66 |         Returns all information sent by TikTok related to this hashtag.
 67 | 
 68 |         Example Usage
 69 |         ```py
 70 |         hashtag_data = api.hashtag(name='funny').info_full()
 71 |         ```
 72 |         """
 73 |         page = self.parent._page
 74 | 
 75 |         url = f"https://www.tiktok.com/tag/{self.name}"
 76 |         await page.goto(url)
 77 | 
 78 |         await self.wait_for_content_or_unavailable_or_captcha('[data-e2e=challenge-item]', 'Not available')
 79 |         await self.check_and_close_signin()
 80 | 
 81 |         challenge_responses = self.get_responses("api/challenge/detail")
 82 |         challenge_responses = [request for request in challenge_responses if f"challengeName={urllib.parse.quote_plus(self.name)}" in request.url]
 83 |         if len(challenge_responses) == 0:
 84 |             raise ApiFailedException("Failed to get challenge request")
 85 |         else:
 86 |             challenge_response = challenge_responses[0]
 87 | 
 88 |         rep_body = await self.get_response_body(challenge_response)
 89 |         rep_d = json.loads(rep_body.decode('utf-8'))
 90 | 
 91 |         if 'challengeInfo' not in rep_d:
 92 |             raise ApiFailedException("Failed to get challengeInfo from response")
 93 | 
 94 |         self.as_dict = rep_d['challengeInfo']
 95 |         return self.as_dict
 96 | 
 97 |     async def videos(self, count=30, offset=0, **kwargs) -> Iterator[Video]:
 98 |         """Returns a dictionary listing TikToks with a specific hashtag.
 99 | 
100 |         - Parameters:
101 |             - count (int): The amount of videos you want returned.
102 |             - offset (int): The the offset of videos from 0 you want to get.
103 | 
104 |         Example Usage
105 |         ```py
106 |         for video in api.hashtag(name='funny').videos():
107 |             # do something
108 |         ```
109 |         """
110 |         await self.info()
111 | 
112 |         try:
113 |             async for video in self._get_videos_api(count, offset, **kwargs):
114 |                 yield video
115 |         except ApiFailedException:
116 |             async for video in self._get_videos_scraping(count, offset, **kwargs):
117 |                 yield video
118 | 
119 | 
120 |     async def _get_videos_scraping(self, count=30, offset=0, **kwargs):
121 |         processed_urls = []
122 |         amount_yielded = 0
123 |         pull_method = 'browser'
124 |         tries = 0
125 |         MAX_TRIES = 5
126 |         data_request_path = "api/challenge/item_list"
127 | 
128 |         while amount_yielded < count:
129 |             await self.parent.request_delay()
130 | 
131 |             search_requests = self.get_requests(data_request_path)
132 |             search_requests = [response for response in search_requests if f"challengeID={self.as_dict['challenge']['id']}" in response.url]
133 |             search_requests = [request for request in search_requests if request.url not in processed_urls]
134 |             for request in search_requests:
135 |                 processed_urls.append(request.url)
136 |                 response = await request.response()
137 |                 try:
138 |                     body = await self.get_response_body(response)
139 |                     res = json.loads(body)
140 |                 except:
141 |                     continue
142 |                 if res.get('type') == 'verify':
143 |                     # this is the captcha denied response
144 |                     continue
145 | 
146 |                 videos = res.get("itemList", [])
147 |                 amount_yielded += len(videos)
148 |                 for video in videos:
149 |                     yield self.parent.video(data=video)
150 | 
151 |                 if not res.get("hasMore", False):
152 |                     self.parent.logger.info(
153 |                         "TikTok isn't sending more TikToks beyond this point."
154 |                     )
155 |                     return
156 | 
157 |             for _ in range(tries):
158 |                 await self.slight_scroll_up()
159 |                 await self.scroll_to_bottom()
160 |                 await self.parent.request_delay()
161 |             
162 |                 search_requests = self.get_requests(data_request_path)
163 |                 search_requests = [request for request in search_requests if request.url not in processed_urls]
164 | 
165 |             if len(search_requests) == 0:
166 |                 tries += 1
167 |                 if tries > MAX_TRIES:
168 |                     raise
169 |                 continue
170 |                 
171 | 
172 |     async def _get_videos_api(self, count=30, offset=0, **kwargs):
173 |         responses = self.get_responses("api/challenge/item_list")
174 |         responses = [response for response in responses if f"challengeID={self.as_dict['challenge']['id']}" in response.url]
175 | 
176 |         amount_yielded = 0
177 |         cursor = 0
178 |         while amount_yielded < count:
179 |             for response in responses:
180 |                 next_url = edit_url(response.url, {"cursor": cursor})
181 |                 cookies = await self.parent._context.cookies()
182 |                 cookies = {cookie['name']: cookie['value'] for cookie in cookies}
183 |                 r = requests.get(next_url, headers=response.headers, cookies=cookies)
184 |                 try:
185 |                     res = r.json()
186 |                 except json.decoder.JSONDecodeError:
187 |                     raise ApiFailedException("Failed to decode JSON from TikTok API response")
188 | 
189 |                 cursor = res["cursor"]
190 |                 videos = res.get("itemList", [])
191 | 
192 |                 amount_yielded += len(videos)
193 |                 for video in videos:
194 |                     yield self.parent.video(data=video)
195 | 
196 |                 # if not res.get("hasMore", False):
197 |                 #     self.parent.logger.info(
198 |                 #         "TikTok isn't sending more TikToks beyond this point."
199 |                 #     )
200 |                 #     return
201 | 
202 |     def __extract_from_data(self):
203 |         data = self.as_dict
204 |         keys = data.keys()
205 | 
206 |         if "title" in keys:
207 |             self.id = data["id"]
208 |             self.name = data["title"]
209 | 
210 |         if None in (self.name, self.id):
211 |             Hashtag.parent.logger.error(
212 |                 f"Failed to create Hashtag with data: {data}\nwhich has keys {data.keys()}"
213 |             )
214 | 
215 |     def __repr__(self):
216 |         return self.__str__()
217 | 
218 |     def __str__(self):
219 |         return f"PyTok.hashtag(id='{self.id}', name='{self.name}')"
220 | 
221 |     def __getattr__(self, name):
222 |         # TODO: Maybe switch to using @property instead
223 |         if name in ["id", "name", "as_dict"]:
224 |             self.as_dict = self.info()
225 |             self.__extract_from_data()
226 |             return self.__getattribute__(name)
227 | 
228 |         raise AttributeError(f"{name} doesn't exist on PyTok.api.Hashtag")
229 | 


--------------------------------------------------------------------------------
/pytok/api/search.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | import time
  5 | from typing import TYPE_CHECKING, Iterator, Type
  6 | from urllib.parse import urlencode
  7 | import re
  8 | 
  9 | from .user import User
 10 | from .hashtag import Hashtag
 11 | from .video import Video
 12 | from .base import Base
 13 | from ..exceptions import *
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from ..tiktok import PyTok
 17 | 
 18 | import requests
 19 | from playwright.async_api import TimeoutError
 20 | 
 21 | class Search(Base):
 22 |     """Contains static methods about searching."""
 23 | 
 24 |     parent: PyTok
 25 | 
 26 |     def __init__(self, search_term):
 27 |         self.search_term = search_term
 28 | 
 29 |     def videos(self, count=28, offset=0, **kwargs) -> Iterator[Video]:
 30 |         """
 31 |         Searches for Videos
 32 | 
 33 |         - Parameters:
 34 |             - search_term (str): The phrase you want to search for.
 35 |             - count (int): The amount of videos you want returned.
 36 |             - offset (int): The offset of videos from your data you want returned.
 37 | 
 38 |         Example Usage
 39 |         ```py
 40 |         for video in api.search.videos('therock'):
 41 |             # do something
 42 |         ```
 43 |         """
 44 |         return self.search_type(
 45 |             "item", count=count, offset=offset, **kwargs
 46 |         )
 47 | 
 48 |     def users(self, count=28, offset=0, **kwargs) -> Iterator[User]:
 49 |         """
 50 |         Searches for users using an alternate endpoint than Search.users
 51 | 
 52 |         - Parameters:
 53 |             - search_term (str): The phrase you want to search for.
 54 |             - count (int): The amount of videos you want returned.
 55 | 
 56 |         Example Usage
 57 |         ```py
 58 |         for user in api.search.users_alternate('therock'):
 59 |             # do something
 60 |         ```
 61 |         """
 62 |         return self.search_type(
 63 |             "user", count=count, offset=offset, **kwargs
 64 |         )
 65 | 
 66 |     async def search_type(self, obj_type, count=28, offset=0, **kwargs) -> Iterator:
 67 |         """
 68 |         Searches for users using an alternate endpoint than Search.users
 69 | 
 70 |         - Parameters:
 71 |             - search_term (str): The phrase you want to search for.
 72 |             - count (int): The amount of videos you want returned.
 73 |             - obj_type (str): user | item
 74 | 
 75 |         Just use .video & .users
 76 |         ```
 77 |         """
 78 | 
 79 |         if obj_type == "user":
 80 |             subdomain = "www"
 81 |             subpath = "user"
 82 |         elif obj_type == "item":
 83 |             subdomain = "us"
 84 |             subpath = "video"
 85 |         else:
 86 |             raise TypeError("invalid obj_type")
 87 | 
 88 |         page = self.parent._page
 89 | 
 90 |         url = f"https://{subdomain}.tiktok.com/search/{subpath}?q={self.search_term}"
 91 |         await page.goto(url)
 92 | 
 93 |         await self.wait_for_content_or_captcha('search_video-item')
 94 | 
 95 |         processed_urls = []
 96 |         amount_yielded = 0
 97 |         pull_method = 'browser'
 98 |         
 99 |         path = f"api/search/{obj_type}"
100 | 
101 |         while amount_yielded < count:
102 |             await self.parent.request_delay()
103 | 
104 |             if pull_method == 'browser':
105 |                 search_requests = self.get_requests(path)
106 |                 search_requests = [request for request in search_requests if request.url not in processed_urls]
107 |                 for request in search_requests:
108 |                     processed_urls.append(request.url)
109 |                     body = await self.get_response_body(request)
110 |                     res = json.loads(body)
111 |                     if res.get('type') == 'verify':
112 |                         # this is the captcha denied response
113 |                         continue
114 | 
115 |                     # When I move to 3.10+ support make this a match switch.
116 |                     if obj_type == "user":
117 |                         for result in res.get("user_list", []):
118 |                             yield User(data=result)
119 |                             amount_yielded += 1
120 | 
121 |                     if obj_type == "item":
122 |                         for result in res.get("item_list", []):
123 |                             yield Video(data=result)
124 |                             amount_yielded += 1
125 | 
126 |                     if res.get("has_more", 0) == 0:
127 |                         Search.parent.logger.info(
128 |                             "TikTok is not sending videos beyond this point."
129 |                         )
130 |                         return
131 | 
132 |                 try:
133 |                     load_more_button = self.wait_for_content_or_captcha('search-load-more')
134 |                 except TimeoutError:
135 |                     return
136 | 
137 |                 load_more_button.click()
138 | 
139 |                 self.wait_until_not_skeleton_or_captcha('video-skeleton-container')
140 | 
141 |             
142 |             elif pull_method == 'requests':
143 |                 cursor = res["cursor"]
144 |                 next_url = re.sub("offset=([0-9]+)", f"offset={cursor}", request.url)
145 |                 cookies = self.parent._context.cookies()
146 |                 cookies = {cookie['name']: cookie['value'] for cookie in cookies}
147 |                 r = requests.get(next_url, headers=request.headers, cookies=cookies)
148 |                 res = r.json()
149 | 
150 |                 if res.get('type') == 'verify':
151 |                     pull_method = 'browser'
152 |                     continue
153 | 
154 |                 if obj_type == "user":
155 |                     for result in res.get("user_list", []):
156 |                         yield User(data=result)
157 |                         amount_yielded += 1
158 | 
159 |                 if obj_type == "item":
160 |                     for result in res.get("item_list", []):
161 |                         yield Video(data=result)
162 |                         amount_yielded += 1
163 | 
164 |                 if res.get("has_more", 0) == 0:
165 |                     self.parent.logger.info(
166 |                         "TikTok is not sending videos beyond this point."
167 |                     )
168 |                     return
169 | 


--------------------------------------------------------------------------------
/pytok/api/sound.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from os import path
  3 | 
  4 | import json
  5 | 
  6 | from urllib.parse import quote, urlencode
  7 | 
  8 | from ..helpers import extract_tag_contents
  9 | from ..exceptions import *
 10 | 
 11 | from typing import TYPE_CHECKING, ClassVar, Iterator, Optional
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from ..tiktok import PyTok
 15 |     from .user import User
 16 |     from .video import Video
 17 | 
 18 | 
 19 | class Sound:
 20 |     """
 21 |     A TikTok Sound/Music/Song.
 22 | 
 23 |     Example Usage
 24 |     ```py
 25 |     song = api.song(id='7016547803243022337')
 26 |     ```
 27 |     """
 28 | 
 29 |     parent: ClassVar[PyTok]
 30 | 
 31 |     id: str
 32 |     """TikTok's ID for the sound"""
 33 |     title: Optional[str]
 34 |     """The title of the song."""
 35 |     author: Optional[User]
 36 |     """The author of the song (if it exists)"""
 37 | 
 38 |     def __init__(self, id: Optional[str] = None, data: Optional[str] = None):
 39 |         """
 40 |         You must provide the id of the sound or it will not work.
 41 |         """
 42 |         if data is not None:
 43 |             self.as_dict = data
 44 |             self.__extract_from_data()
 45 |         elif id is None:
 46 |             raise TypeError("You must provide id parameter.")
 47 |         else:
 48 |             self.id = id
 49 | 
 50 |     def info(self, use_html=False, **kwargs) -> dict:
 51 |         """
 52 |         Returns a dictionary of TikTok's Sound/Music object.
 53 | 
 54 |         - Parameters:
 55 |             - use_html (bool): If you want to perform an HTML request or not.
 56 |                 Defaults to False to use an API call, which shouldn't get detected
 57 |                 as often as an HTML request.
 58 | 
 59 | 
 60 |         Example Usage
 61 |         ```py
 62 |         sound_data = api.sound(id='7016547803243022337').info()
 63 |         ```
 64 |         """
 65 |         raise NotImplementedError()
 66 | 
 67 |     def info_full(self, **kwargs) -> dict:
 68 |         """
 69 |         Returns all the data associated with a TikTok Sound.
 70 | 
 71 |         This makes an API request, there is no HTML request option, as such
 72 |         with Sound.info()
 73 | 
 74 |         Example Usage
 75 |         ```py
 76 |         sound_data = api.sound(id='7016547803243022337').info_full()
 77 |         ```
 78 |         """
 79 |         raise NotImplementedError()
 80 | 
 81 |     def videos(self, count=30, offset=0, **kwargs) -> Iterator[Video]:
 82 |         """
 83 |         Returns Video objects of videos created with this sound.
 84 | 
 85 |         - Parameters:
 86 |             - count (int): The amount of videos you want returned.
 87 |             - offset (int): The offset of videos you want returned.
 88 | 
 89 |         Example Usage
 90 |         ```py
 91 |         for video in api.sound(id='7016547803243022337').videos():
 92 |             # do something
 93 |         ```
 94 |         """
 95 |         raise NotImplementedError()
 96 | 
 97 |     def __extract_from_data(self):
 98 |         data = self.as_dict
 99 |         keys = data.keys()
100 | 
101 |         self.id = data.get("id")
102 |         self.title = data.get("title")
103 | 
104 |         if data.get("authorName") is not None:
105 |             self.author = self.parent.user(username=data["authorName"])
106 | 
107 |         if self.id is None:
108 |             Sound.parent.logger.error(
109 |                 f"Failed to create Sound with data: {data}\nwhich has keys {data.keys()}"
110 |             )
111 | 
112 |     def __ensure_valid(self):
113 |         if self.id == "":
114 |             raise SoundRemovedException("This sound has been removed!")
115 | 
116 |     def __repr__(self):
117 |         return self.__str__()
118 | 
119 |     def __str__(self):
120 |         return f"PyTok.sound(id='{self.id}')"
121 | 
122 | 


--------------------------------------------------------------------------------
/pytok/api/trending.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import logging
 4 | from urllib.parse import urlencode
 5 | 
 6 | from .video import Video
 7 | from .sound import Sound
 8 | from .user import User
 9 | from .hashtag import Hashtag
10 | 
11 | from typing import TYPE_CHECKING, Iterator
12 | 
13 | if TYPE_CHECKING:
14 |     from ..tiktok import PyTok
15 | 
16 | 
17 | class Trending:
18 |     """Contains static methods related to trending."""
19 | 
20 |     parent: PyTok
21 | 
22 |     @staticmethod
23 |     def videos(count=30, **kwargs) -> Iterator[Video]:
24 |         """
25 |         Returns Videos that are trending on TikTok.
26 | 
27 |         - Parameters:
28 |             - count (int): The amount of videos you want returned.
29 |         """
30 | 
31 |         raise NotImplementedError()
32 | 


--------------------------------------------------------------------------------
/pytok/api/user.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | 
  4 | import asyncio
  5 | import json
  6 | import re
  7 | from urllib.parse import urlencode, urlparse
  8 | 
  9 | from patchright.async_api import TimeoutError as PlaywrightTimeoutError
 10 | import requests
 11 | 
 12 | from ..exceptions import *
 13 | from ..helpers import extract_tag_contents, edit_url
 14 | 
 15 | from typing import TYPE_CHECKING, ClassVar, Iterator, Optional
 16 | 
 17 | if TYPE_CHECKING:
 18 |     from ..tiktok import PyTok
 19 |     from .video import Video
 20 | 
 21 | from .base import Base
 22 | 
 23 | 
 24 | class User(Base):
 25 |     """
 26 |     A TikTok User.
 27 | 
 28 |     Example Usage
 29 |     ```py
 30 |     user = api.user(username='therock')
 31 |     # or
 32 |     user_id = '5831967'
 33 |     sec_uid = 'MS4wLjABAAAA-VASjiXTh7wDDyXvjk10VFhMWUAoxr8bgfO1kAL1-9s'
 34 |     user = api.user(user_id=user_id, sec_uid=sec_uid)
 35 |     ```
 36 | 
 37 |     """
 38 | 
 39 |     parent: ClassVar[PyTok]
 40 | 
 41 |     user_id: str
 42 |     """The user ID of the user."""
 43 |     sec_uid: str
 44 |     """The sec UID of the user."""
 45 |     username: str
 46 |     """The username of the user."""
 47 |     as_dict: dict
 48 |     """The raw data associated with this user."""
 49 | 
 50 |     def __init__(
 51 |             self,
 52 |             username: Optional[str] = None,
 53 |             user_id: Optional[str] = None,
 54 |             sec_uid: Optional[str] = None,
 55 |             data: Optional[dict] = None,
 56 |     ):
 57 |         """
 58 |         You must provide the username or (user_id and sec_uid) otherwise this
 59 |         will not function correctly.
 60 |         """
 61 |         self.__update_id_sec_uid_username(user_id, sec_uid, username)
 62 |         if data is not None:
 63 |             self.as_dict = data
 64 |             self.__extract_from_data()
 65 |         else:
 66 |             self.as_dict = {}
 67 | 
 68 |     def info(self, **kwargs):
 69 |         """
 70 |         Returns a dictionary of TikTok's User object
 71 | 
 72 |         Example Usage
 73 |         ```py
 74 |         user_data = api.user(username='therock').info()
 75 |         ```
 76 |         """
 77 |         return self.info_full(**kwargs)
 78 | 
 79 |     async def info_full(self, **kwargs) -> dict:
 80 |         """
 81 |         Returns a dictionary of information associated with this User.
 82 |         Includes statistics about this user.
 83 | 
 84 |         Example Usage
 85 |         ```py
 86 |         user_data = api.user(username='therock').info_full()
 87 |         ```
 88 |         """
 89 | 
 90 |         # TODO: Find the one using only user_id & sec_uid
 91 |         if not self.username:
 92 |             raise TypeError(
 93 |                 "You must provide the username when creating this class to use this method."
 94 |             )
 95 | 
 96 |         url = f"https://www.tiktok.com/@{self.username}?lang=en"
 97 | 
 98 |         page = self.parent._page
 99 |         
100 |         self.parent.logger.debug(f"Loading page: {url}")
101 |         if page.url != url:
102 |             try:
103 |                 async with page.expect_request(url) as event:
104 |                     await page.goto(url, timeout=60 * 1000)
105 |                     request = await event.value
106 |                     response = await request.response()
107 |                     if response.status >= 300:
108 |                         raise NotAvailableException("Content is not available")
109 |             except PlaywrightTimeoutError:
110 |                 raise TimeoutException("Page load timed out")
111 | 
112 |         # try:
113 |         await self.wait_for_content_or_unavailable_or_captcha('[data-e2e="user-post-item"]',
114 |                                                             "Couldn't find this account",
115 |                                                             no_content_text=["No content", "This account is private"])
116 |         # resolve any remaining issues
117 |         await asyncio.sleep(0.5)
118 |         await self.wait_for_content_or_unavailable_or_captcha('[data-e2e="user-post-item"]',
119 |                                                             "Couldn't find this account",
120 |                                                             no_content_text=["No content", "This account is private"])
121 | 
122 |         data_responses = self.get_responses('api/user/detail')
123 | 
124 |         if len(data_responses) > 0:
125 |             data_response = data_responses[-1]
126 |             data = await data_response.json()
127 |             user_info = data["userInfo"]
128 |             user = user_info["user"] | user_info["stats"]
129 |             self.as_dict = user
130 |             self.__extract_from_data()
131 |             return user
132 |         else:
133 |             # get initial html data
134 |             html_body = await page.content()
135 |             
136 |             tag_contents = extract_tag_contents(html_body)
137 |             self.initial_json = json.loads(tag_contents)
138 | 
139 |             if 'UserModule' in self.initial_json:
140 |                 user = self.initial_json["UserModule"]["users"][self.username] | self.initial_json["UserModule"]["stats"][self.username]
141 |             elif '__DEFAULT_SCOPE__' in self.initial_json:
142 |                 user_detail = self.initial_json['__DEFAULT_SCOPE__']['webapp.user-detail']
143 |                 if user_detail['statusCode'] != 0:
144 |                     raise InvalidJSONException("Failed to find user data in HTML")
145 |                 user_info = user_detail['userInfo']
146 |                 user = user_info['user'] | user_info['stats']
147 |             else:
148 |                 raise InvalidJSONException("Failed to find user data in HTML")
149 | 
150 |             self.as_dict = user
151 |             self.__extract_from_data()
152 |             return user
153 | 
154 |     async def videos(self, get_bytes=False, count=None, batch_size=100, **kwargs) -> Iterator[Video]:
155 |         """
156 |         Returns an iterator yielding Video objects.
157 | 
158 |         - Parameters:
159 |             - count (int): The amount of videos you want returned.
160 |             - cursor (int): The unix epoch to get uploaded videos since.
161 | 
162 |         Example Usage
163 |         ```py
164 |         user = api.user(username='therock')
165 |         for video in user.videos(count=100):
166 |             # do something
167 |         ```
168 |         """
169 |         if self.as_dict and self.as_dict['videoCount'] == 0:
170 |             return
171 |         
172 |         try:
173 |             videos, finished, cursor = await self._get_initial_videos(count, get_bytes)
174 |             for video in videos:
175 |                 yield video
176 | 
177 |             if finished or count and len(videos) >= count:
178 |                 return
179 | 
180 |             async for video in self._get_videos_api(count, cursor, get_bytes, **kwargs):
181 |                 yield video
182 |         except ApiFailedException:
183 |             async for video in self._get_videos_scraping(count, get_bytes):
184 |                 yield video
185 |         except Exception as ex:
186 |             raise
187 | 
188 |     async def _get_videos_api(self, count, cursor, get_bytes, **kwargs) -> Iterator[Video]:
189 |         # requesting videos via the api in the context of the browser session makes tiktok kill the session
190 |         # using requests instead
191 |         amount_yielded = 0
192 | 
193 |         data_request = self.parent.request_cache['videos']
194 | 
195 |         all_cookies = await self.parent._context.cookies()
196 |         verify_cookies = [cookie for cookie in all_cookies if cookie['name'] == 's_v_web_id']
197 |         if not verify_cookies:
198 |             raise ApiFailedException("Failed to get videos from API without verify cookies")
199 |         verify_fp = verify_cookies[0]['value']
200 | 
201 |         while (count is None or amount_yielded < count):
202 |             next_url = edit_url(
203 |                 data_request.url, 
204 |                 {
205 |                     'cursor': cursor, 
206 |                     'id': self.user_id, 
207 |                     'secUid': self.sec_uid,
208 |                     'needPinnedItemIds': True,
209 |                     'post_item_list_request_type': 0,
210 |                     'verifyFp': verify_fp
211 |                 }
212 |             )
213 |             headers = {
214 |                 'accept': '*/*',
215 |                 'accept-encoding': 'gzip, deflate, br, zstd',
216 |                 'accept-language': 'en-GB,en;q=0.9',
217 |                 'priority': 'u=1, i',
218 |                 'referer': f'https://www.tiktok.com/@{self.username}?lang=en',
219 |                 'sec-ch-ua': '"Not;A=Brand";v="24", "Chromium";v="128"',
220 |                 'sec-ch-ua-mobile': '?0',
221 |                 'sec-ch-ua-platform': '"Windows"',
222 |                 'sec-fetch-dest': 'empty',
223 |                 'sec-fetch-mode': 'cors',
224 |                 'sec-fetch-site': 'same-origin',
225 |                 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.18 Safari/537.36'
226 |             }
227 |             cookies = await self.parent._context.cookies()
228 |             cookies = {cookie['name']: cookie['value'] for cookie in cookies}
229 |             r = requests.get(next_url, headers=headers, cookies=cookies)
230 | 
231 |             if r.status_code != 200:
232 |                 raise ApiFailedException(f"Failed to get videos from API with status code {r.status_code}")
233 |             if not r.content:
234 |                 raise ApiFailedException(f"Failed to get videos from API with empty response")
235 | 
236 |             res = r.json()
237 | 
238 |             if res.get('type') == 'verify':
239 |                 raise ApiFailedException("TikTok API is asking for verification")
240 | 
241 |             videos = res.get('itemList', [])
242 |             cursor = int(res['cursor'])
243 | 
244 |             if videos:
245 |                 amount_yielded += len(videos)
246 |                 for video in videos:
247 |                     yield self.parent.video(data=video)
248 | 
249 |             has_more = res.get("hasMore")
250 |             if not has_more:
251 |                 self.parent.logger.info(
252 |                     "TikTok isn't sending more TikToks beyond this point."
253 |                 )
254 |                 return
255 | 
256 |             await self.parent.request_delay()
257 |         
258 | 
259 |     async def _get_videos_scraping(self, count, get_bytes):
260 |         page = self.parent._page
261 | 
262 |         url = f"https://www.tiktok.com/@{self.username}"
263 |         if url not in page.url:
264 |             await page.goto(url)
265 |             await self.check_initial_call(url)
266 |         await self.wait_for_content_or_unavailable_or_captcha('[data-e2e=user-post-item]', "This account is private")
267 | 
268 |         video_pull_method = 'scroll'
269 |         if video_pull_method == 'scroll':
270 |             async for video in self._get_videos_scroll(count, get_bytes):
271 |                 yield video
272 |         elif video_pull_method == 'individual':
273 |             async for video in self._get_videos_individual(count, get_bytes):
274 |                 yield video
275 | 
276 |     async def _get_videos_individual(self, count, get_bytes):
277 |         page = self.parent._page
278 | 
279 |         await page.locator("[data-e2e=user-post-item]").click()
280 | 
281 |         self.wait_for_content_or_captcha('browse-video')
282 | 
283 |         still_more = True
284 |         all_videos = []
285 | 
286 |         while still_more:
287 |             html_req_path = page.url
288 |             initial_html_request = self.get_requests(html_req_path)[0]
289 |             html_body = self.get_response_body(initial_html_request)
290 |             tag_contents = extract_tag_contents(html_body)
291 |             res = json.loads(tag_contents)
292 | 
293 |             all_videos += res['itemList']
294 | 
295 |             if still_more:
296 |                 await page.locator("[data-e2e=browse-video]").press('ArrowDown')
297 | 
298 |     async def _load_each_video(self, videos):
299 |         page = self.parent._page
300 | 
301 |         # get description elements with identifiable links
302 |         desc_elements_locator = page.locator("[data-e2e=user-post-item-desc]")
303 |         desc_elements_count = await desc_elements_locator.count()
304 | 
305 |         video_elements = []
306 |         for video in videos:
307 |             found = False
308 |             for i in range(desc_elements_count):
309 |                 desc_element = desc_elements_locator.nth(i)
310 |                 inner_html = await desc_element.inner_html()
311 |                 match = re.search(r'href="https:\/\/www\.tiktok\.com\/@[^\/]+\/video\/([0-9]+)"', inner_html)
312 |                 if not match:
313 |                     continue
314 |                 video_id = match.group(1)
315 |                 if video['id'] == video_id:
316 |                     # get sibling element of video element
317 |                     video_element = page.locator(f"xpath=//a[contains(@href, '{video['id']}')]/../..").first
318 |                     video_elements.append((video, video_element))
319 |                     found = True
320 |                     break
321 | 
322 |             if not found:
323 |                 pass
324 |                 # TODO: log this
325 |                 # raise Exception(f"Could not find video element for video {video['id']}")
326 | 
327 |         for video, element in video_elements:
328 |             await element.scroll_into_view_if_needed()
329 |             await element.hover()
330 |             try:
331 |                 play_path = urlparse(video['video']['playAddr']).path
332 |             except KeyError:
333 |                 print(f"Missing JSON attributes for video: {video['id']}")
334 |                 continue
335 | 
336 |             try:
337 |                 requests = self.get_requests(play_path)
338 |                 resp = await requests[0].response()
339 |             except Exception as ex:
340 |                 print(f"Failed to load video file for video: {video['id']}")
341 | 
342 |             await self.parent.request_delay()
343 | 
344 |     async def _get_initial_videos(self, count, get_bytes):
345 |         all_videos = []
346 |         finished = False
347 | 
348 |         cursor = 0
349 |         video_responses = self.get_responses('api/post/item_list')
350 |         video_responses = [res for res in video_responses if f"secUid={self.sec_uid}" in res.url]
351 |         for video_response in video_responses:
352 |             try:
353 |                 if len(video_response._body) == 0:
354 |                     continue
355 |                 video_data = await video_response.json()
356 |                 if video_data.get('itemList'):
357 |                     videos = video_data['itemList']
358 |                     video_objs = [self.parent.video(data=video) for video in videos]
359 |                     all_videos += video_objs
360 |                 finished = not video_data.get('hasMore', False)
361 |                 cursor = video_data.get('cursor', 0)
362 |             except Exception as ex:
363 |                 pass
364 | 
365 |         if len(video_responses) == 0:
366 |             raise ApiFailedException("Failed to get videos from API")
367 | 
368 |         self.parent.request_cache['videos'] = video_responses[-1]
369 |         return all_videos, finished, cursor
370 | 
371 |     async def _get_videos_scroll(self, count, get_bytes):
372 | 
373 |         data_request_path = "api/post/item_list"
374 |         data_urls = []
375 |         tries = 1
376 |         amount_yielded = 0
377 |         MAX_TRIES = 10
378 | 
379 |         cursors = []
380 |         while tries <= MAX_TRIES:
381 |             await self.check_and_wait_for_captcha()
382 |             await self.parent.request_delay()
383 |             await self.slight_scroll_up()
384 |             await self.parent.request_delay()
385 |             await self.scroll_down(30000, speed=12)
386 | 
387 |             data_requests = [req for req in self.get_requests(data_request_path) if req.url not in data_urls]
388 |             data_requests = [res for res in data_requests if f"secUid={self.sec_uid}" in res.url]
389 | 
390 |             if not data_requests:
391 |                 tries += 1
392 |                 if tries > MAX_TRIES:
393 |                     raise EmptyResponseException('TikTok backend broke')
394 |                 continue
395 | 
396 |             for data_request in data_requests:
397 |                 data_urls.append(data_request.url)
398 |                 try:
399 |                     data_response = await data_request.response()
400 |                     res_body = await self.get_response_body(data_response)
401 |                 except Exception as ex:
402 |                     continue
403 | 
404 |                 if not res_body:
405 |                     continue
406 | 
407 |                 self.parent.request_cache['videos'] = data_request
408 | 
409 |                 res = json.loads(res_body)
410 |                 videos = res.get("itemList", [])
411 |                 cursors.append(int(res['cursor']))
412 | 
413 |                 if get_bytes:
414 |                     await self._load_each_video(videos)
415 | 
416 |                 amount_yielded += len(videos)
417 |                 video_objs = [self.parent.video(data=video) for video in videos]
418 | 
419 |                 for video in video_objs:
420 |                     yield video
421 | 
422 |                 if count and amount_yielded >= count:
423 |                     return
424 | 
425 |                 has_more = res.get("hasMore", False)
426 |                 if not has_more:
427 |                     User.parent.logger.info(
428 |                         "TikTok isn't sending more TikToks beyond this point."
429 |                     )
430 |                     return
431 | 
432 |         return
433 | 
434 |     def liked(self, count: int = 30, cursor: int = 0, **kwargs) -> Iterator[Video]:
435 |         """
436 |         Returns a dictionary listing TikToks that a given a user has liked.
437 | 
438 |         **Note**: The user's likes must be **public** (which is not the default option)
439 | 
440 |         - Parameters:
441 |             - count (int): The amount of videos you want returned.
442 |             - cursor (int): The unix epoch to get uploaded videos since.
443 | 
444 |         Example Usage
445 |         ```py
446 |         for liked_video in api.user(username='public_likes'):
447 |             # do something
448 |         ```
449 |         """
450 |         processed = User.parent._process_kwargs(kwargs)
451 |         kwargs["custom_device_id"] = processed.device_id
452 | 
453 |         amount_yielded = 0
454 |         first = True
455 | 
456 |         if self.user_id is None and self.sec_uid is None:
457 |             self.__find_attributes()
458 | 
459 |         while amount_yielded < count:
460 |             query = {
461 |                 "count": 30,
462 |                 "id": self.user_id,
463 |                 "type": 2,
464 |                 "secUid": self.sec_uid,
465 |                 "cursor": cursor,
466 |                 "sourceType": 9,
467 |                 "appId": 1233,
468 |                 "region": processed.region,
469 |                 "priority_region": processed.region,
470 |                 "language": processed.language,
471 |             }
472 |             path = "api/favorite/item_list/?{}&{}".format(
473 |                 User.parent._add_url_params(), urlencode(query)
474 |             )
475 | 
476 |             res = self.parent.get_data(path, **kwargs)
477 | 
478 |             if "itemList" not in res.keys():
479 |                 if first:
480 |                     User.parent.logger.error("User's likes are most likely private")
481 |                 return
482 | 
483 |             videos = res.get("itemList", [])
484 |             amount_yielded += len(videos)
485 |             for video in videos:
486 |                 amount_yielded += 1
487 |                 yield self.parent.video(data=video)
488 | 
489 |             if not res.get("hasMore", False) and not first:
490 |                 User.parent.logger.info(
491 |                     "TikTok isn't sending more TikToks beyond this point."
492 |                 )
493 |                 return
494 | 
495 |             cursor = res["cursor"]
496 |             first = False
497 | 
498 |     def __extract_from_data(self):
499 |         data = self.as_dict
500 |         keys = data.keys()
501 | 
502 |         if "user_info" in keys:
503 |             self.__update_id_sec_uid_username(
504 |                 data["user_info"]["uid"],
505 |                 data["user_info"]["sec_uid"],
506 |                 data["user_info"]["unique_id"],
507 |             )
508 |         elif "uniqueId" in keys:
509 |             self.__update_id_sec_uid_username(
510 |                 data["id"], data["secUid"], data["uniqueId"]
511 |             )
512 | 
513 |         if None in (self.username, self.user_id, self.sec_uid):
514 |             User.parent.logger.error(
515 |                 f"Failed to create User with data: {data}\nwhich has keys {data.keys()}"
516 |             )
517 | 
518 |     def __update_id_sec_uid_username(self, id, sec_uid, username):
519 |         self.user_id = id
520 |         self.sec_uid = sec_uid
521 |         self.username = username
522 | 
523 |     def __find_attributes(self) -> None:
524 |         # It is more efficient to check search first, since self.user_object() makes HTML request.
525 |         found = False
526 |         for u in self.parent.search.users(self.username):
527 |             if u.username == self.username:
528 |                 found = True
529 |                 self.__update_id_sec_uid_username(u.user_id, u.sec_uid, u.username)
530 |                 break
531 | 
532 |         if not found:
533 |             user_object = self.info()
534 |             self.__update_id_sec_uid_username(
535 |                 user_object["id"], user_object["secUid"], user_object["uniqueId"]
536 |             )
537 | 
538 |     def __repr__(self):
539 |         return self.__str__()
540 | 
541 |     def __str__(self):
542 |         return f"PyTok.user(username='{self.username}', user_id='{self.user_id}', sec_uid='{self.sec_uid}')"
543 | 
544 | 


--------------------------------------------------------------------------------
/pytok/api/video.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import asyncio
  4 | from datetime import datetime
  5 | import json
  6 | from urllib import parse as url_parsers
  7 | from typing import TYPE_CHECKING, ClassVar, Optional
  8 | 
  9 | import brotli
 10 | import requests
 11 | from playwright.async_api import TimeoutError as PlaywrightTimeoutError
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from ..tiktok import PyTok
 15 |     from .user import User
 16 |     from .sound import Sound
 17 |     from .hashtag import Hashtag
 18 | 
 19 | from .base import Base
 20 | from ..helpers import extract_tag_contents, edit_url, extract_video_id_from_url, extract_user_id_from_url
 21 | from .. import exceptions
 22 | 
 23 | class Counter:
 24 |     def __init__(self):
 25 |         self._counter = 0
 26 | 
 27 |     def add(self, n):
 28 |         self._counter += n
 29 | 
 30 |     def get(self):
 31 |         return self._counter
 32 | 
 33 | class Video(Base):
 34 |     """
 35 |     A TikTok Video class
 36 | 
 37 |     Example Usage
 38 |     ```py
 39 |     video = api.video(id='7041997751718137094')
 40 |     ```
 41 |     """
 42 | 
 43 |     parent: ClassVar[PyTok]
 44 | 
 45 |     id: Optional[str]
 46 |     """TikTok's ID of the Video"""
 47 |     create_time: Optional[datetime]
 48 |     """The creation time of the Video"""
 49 |     stats: Optional[dict]
 50 |     """TikTok's stats of the Video"""
 51 |     author: Optional[User]
 52 |     """The User who created the Video"""
 53 |     sound: Optional[Sound]
 54 |     """The Sound that is associated with the Video"""
 55 |     hashtags: Optional[list[Hashtag]]
 56 |     """A List of Hashtags on the Video"""
 57 |     as_dict: dict
 58 |     """The raw data associated with this Video."""
 59 | 
 60 |     def __init__(
 61 |             self,
 62 |             id: Optional[str] = None,
 63 |             username: Optional[str] = None,
 64 |             url: Optional[str] = None,
 65 |             data: Optional[dict] = None,
 66 |     ):
 67 |         """
 68 |         You must provide the id or a valid url, else this will fail.
 69 |         """
 70 |         self.id = id
 71 |         self.username = username
 72 |         if data is not None:
 73 |             self.as_dict = data
 74 |             self.__extract_from_data()
 75 |         elif url is not None:
 76 |             self.id = extract_video_id_from_url(url)
 77 |             self.username = extract_user_id_from_url(url)
 78 | 
 79 |         if self.id is None and url is None:
 80 |             raise TypeError("You must provide id or url parameter.")
 81 | 
 82 |     async def info(self, **kwargs) -> dict:
 83 |         """
 84 |         Returns a dictionary of all data associated with a TikTok Video.
 85 | 
 86 |         Example Usage
 87 |         ```py
 88 |         video_data = api.video(id='7041997751718137094').info()
 89 |         ```
 90 |         """
 91 |         if not hasattr(self, 'as_dict'):
 92 |             url = self._get_url()
 93 |             page = self.parent._page
 94 |             if page.url != url:
 95 |                 await self.view()
 96 | 
 97 |             await self.check_and_resolve_login_popup()
 98 | 
 99 |             # get initial html data
100 |             initial_html_response = self.get_responses(url)[-1]
101 |             html_body = await self.get_response_body(initial_html_response)
102 |             contents = extract_tag_contents(html_body)
103 |             res = json.loads(contents)
104 | 
105 |             video_detail = res['__DEFAULT_SCOPE__']['webapp.video-detail']
106 |             if video_detail['statusCode'] != 0:
107 |                 raise exceptions.NotAvailableException(
108 |                     f"Content is not available with status message: {video_detail['statusMsg']}")
109 |             video_data = video_detail['itemInfo']['itemStruct']
110 |             self.as_dict = video_data
111 |         else:
112 |             video_data = self.as_dict
113 | 
114 |         return video_data
115 | 
116 |     async def network_info(self, **kwargs) -> dict:
117 |         """
118 |         Returns a dictionary of all network data associated with a TikTok Video.
119 | 
120 |         Example Usage
121 |         ```py
122 |         video_data = api.video(id='7041997751718137094').network_data()
123 |         ```
124 |         """
125 |         url = self._get_url()
126 |         page = self.parent._page
127 |         if page.url != url:
128 |             await self.view()
129 |         initial_html_response = self.get_responses(url)[-1]
130 |         network_info = {}
131 |         network_info['server_addr'] = await initial_html_response.server_addr()
132 |         network_info['headers'] = await initial_html_response.all_headers()
133 |         return network_info
134 | 
135 |     async def bytes_network_info(self, **kwargs) -> dict:
136 |         """
137 |         Returns a dictionary of all network data associated with a TikTok Video.
138 | 
139 |         Example Usage
140 |         ```py
141 |         video_data = api.video(id='7041997751718137094').bytes_network_data()
142 |         ```
143 |         """
144 |         play_path = url_parsers.urlparse(self.as_dict['video']['playAddr']).path
145 |         reqs = self.get_requests(play_path)
146 |         if len(reqs) == 0:
147 |             # TODO load page and pull
148 |             raise Exception("No requests found for video")
149 |         for req in reqs:
150 |             try:
151 |                 res = await req.response()
152 |                 network_info = {}
153 |                 network_info['server_addr'] = await res.server_addr()
154 |                 network_info['headers'] = await res.all_headers()
155 |                 return network_info
156 |             except Exception:
157 |                 continue
158 |         else:
159 |             raise Exception("Failed to get video bytes")
160 | 
161 |     def _get_url(self) -> str:
162 |         if self.username is not None:
163 |             return f"https://www.tiktok.com/@{self.username}/video/{self.id}"
164 |         else:
165 |             # will autoresolve to correct username
166 |             return f"https://www.tiktok.com/@user/video/{self.id}"
167 | 
168 |     async def view(self, **kwargs) -> None:
169 |         """
170 |         Opens the TikTok Video in your default browser.
171 | 
172 |         Example Usage
173 |         ```py
174 |         api.video(id='7041997751718137094').view()
175 |         ```
176 |         """
177 |         page = self.parent._page
178 |         url = self._get_url()
179 |         try:
180 |             async with page.expect_request(url) as event:
181 |                 await page.goto(url)
182 |                 request = await event.value
183 |                 response = await request.response()
184 |                 if response.status >= 300:
185 |                     raise exceptions.NotAvailableException("Content is not available")
186 |             # no need to check for captcha, because video data is in the html regardless
187 |             await self.wait_for_content_or_unavailable('[id="main-content-video_detail"]', 'Video currently unavailable')
188 |         except PlaywrightTimeoutError as e:
189 |             raise exceptions.TimeoutException(str(e))
190 |         
191 |     async def _related_videos(self, counter, count=20):
192 |         data_request_path = "api/related/item_list"
193 |         data_requests = self.get_requests(data_request_path)
194 |         for req in data_requests:
195 |             # parse params from url
196 |             url_parsed = url_parsers.urlparse(req.url)
197 |             params = url_parsers.parse_qs(url_parsed.query)
198 |             if params['itemID'][0] != self.id:
199 |                 continue
200 |             res = await req.response()
201 |             if res is None:
202 |                 continue
203 |             body = await res.body()
204 |             if len(body) == 0:
205 |                 continue
206 |             d = await res.json()
207 |             for v in d.get('itemList', []):
208 |                 yield v
209 |                 counter.add(1)
210 |             if counter.get() >= count:
211 |                 break
212 | 
213 |     async def related_videos(self, count=20) -> list[dict]:
214 |         """
215 |         Returns a list of related
216 |         TikTok Videos to the current Video.
217 |         
218 |         """
219 |         counter = Counter()
220 |         async for video in self._related_videos(counter, count=count):
221 |             yield video
222 | 
223 |         # get via scroll
224 |         # solve captcha if necessary
225 |         if counter.get() == 0:
226 |             await self.check_and_wait_for_captcha()
227 |             await self.parent._page.reload()
228 |             await asyncio.sleep(5)
229 |             async for video in self._related_videos(counter, count=count):
230 |                 yield video
231 | 
232 |     async def bytes(self, **kwargs) -> bytes:
233 |         """
234 |         Returns the bytes of a TikTok Video.
235 | 
236 |         Example Usage
237 |         ```py
238 |         video_bytes = api.video(id='7041997751718137094').bytes()
239 | 
240 |         # Saving The Video
241 |         with open('saved_video.mp4', 'wb') as output:
242 |             output.write(video_bytes)
243 |         ```
244 |         """
245 |         bytes_url = self.as_dict['video']['playAddr']
246 |         if len(bytes_url) == 0:
247 |             raise exceptions.NotAvailableException("Post does not have a video")
248 |         play_path = url_parsers.urlparse(bytes_url).path
249 |         resps = self.get_responses(play_path)
250 |         if len(resps) > 0:
251 |             for res in resps:
252 |                 if hasattr(res, '_body'):
253 |                     if len(res._body) > 0:
254 |                         return res._body
255 |         # if we don't have the bytes in the response, we need to get it from the server
256 | 
257 |         # send the request ourselves
258 |         try:
259 |             return await asyncio.wait_for(self._request_bytes(bytes_url), timeout=10)
260 |         except TimeoutError:
261 |             raise exceptions.TimeoutException("Failed to get video bytes in time")
262 | 
263 |     async def _request_bytes(self, bytes_url):
264 |         bytes_headers = {
265 |             'sec-ch-ua': '"HeadlessChrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"', 
266 |             'referer': 'https://www.tiktok.com/', 
267 |             'accept-encoding': 'identity;q=1, *;q=0', 
268 |             'sec-ch-ua-mobile': '?0', 
269 |             'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.4 Safari/537.36', 
270 |             'range': 'bytes=0-', 
271 |             'sec-ch-ua-platform': '"Windows"'
272 |         }
273 |         cookies = await self.parent._context.cookies()
274 |         cookies = {cookie['name']: cookie['value'] for cookie in cookies}
275 |         r = requests.get(bytes_url, headers=bytes_headers, cookies=cookies)
276 |         if r.content is not None or len(r.content) > 0:
277 |             return r.content
278 |         raise Exception("Failed to get video bytes")
279 | 
280 |     async def _get_comments_and_req(self, count):
281 |         # get request
282 |         data_request_path = "api/comment/list"
283 |         data_responses = self.get_responses(data_request_path)
284 | 
285 |         amount_yielded = 0
286 |         all_comments = []
287 |         processed_urls = []
288 | 
289 |         valid_data_request = None
290 |         for data_response in data_responses:
291 |             try:
292 |                 res = await data_response.json()
293 | 
294 |                 self.parent.request_cache['comments'] = data_response.request
295 | 
296 |                 processed_urls.append(data_response.url)
297 | 
298 |                 comments = res.get("comments", [])
299 | 
300 |                 amount_yielded += len(comments)
301 |                 all_comments += comments
302 | 
303 |                 if amount_yielded > count:
304 |                     return all_comments, processed_urls, True
305 | 
306 |                 has_more = res.get("has_more")
307 |                 if has_more != 1:
308 |                     self.parent.logger.info(
309 |                         "TikTok isn't sending more TikToks beyond this point."
310 |                     )
311 |                     return all_comments, processed_urls, True
312 |             except Exception:
313 |                 pass
314 | 
315 |         return all_comments, processed_urls, False
316 | 
317 |     async def _get_comment_replies(self, comment, batch_size):
318 |         if 'comments' not in self.parent.request_cache:
319 |             return
320 |         data_request = self.parent.request_cache['comments']
321 |         num_already_fetched = len(
322 |             comment.get('reply_comment', []) if comment.get('reply_comment', []) is not None else [])
323 |         num_comments_to_fetch = comment['reply_comment_total'] - num_already_fetched
324 |         while num_comments_to_fetch > 0:
325 | 
326 |             url_parsed = url_parsers.urlparse(data_request.url)
327 |             params = url_parsers.parse_qs(url_parsed.query)
328 |             params['cursor'] = num_already_fetched
329 |             del params['aweme_id']
330 |             params['count'] = min(num_comments_to_fetch, batch_size)
331 |             params['item_id'] = comment['aweme_id']
332 |             params['comment_id'] = comment['cid']
333 |             params['focus_state'] = 'true'
334 |             url_path = url_parsed.path.replace("api/comment/list", "api/comment/list/reply")
335 |             next_url = f"{url_parsed.scheme}://{url_parsed.netloc}{url_path}?{url_parsers.urlencode(params, doseq=True)}"
336 |             cookies = await self.parent._context.cookies()
337 |             cookies = {cookie['name']: cookie['value'] for cookie in cookies}
338 |             r = requests.get(next_url, headers=data_request.headers, cookies=cookies)
339 |             res = r.json()
340 | 
341 |             reply_comments = res.get("comments", [])
342 | 
343 |             if reply_comments:
344 |                 comment['reply_comment'] = comment['reply_comment'] + reply_comments if comment[
345 |                     'reply_comment'] else reply_comments
346 | 
347 |             has_more = res.get("has_more")
348 |             if has_more != 1:
349 |                 self.parent.logger.info(
350 |                     "TikTok isn't sending more TikToks beyond this point."
351 |                 )
352 |                 break
353 | 
354 |             await self.parent.request_delay()
355 | 
356 |             num_already_fetched = len(comment['reply_comment'])
357 |             num_comments_to_fetch = comment['reply_comment_total'] - num_already_fetched
358 | 
359 |     async def comments(self, count=200, batch_size=100):
360 |         if self.id and self.username:
361 |             await self.view()
362 |             await self.wait_for_content_or_unavailable_or_captcha('css=[data-e2e=comment-level-1]',
363 |                                                                   'Be the first to comment!')
364 |             # TODO allow multi layer comment fetch
365 | 
366 |             amount_yielded = 0
367 |             all_comments, processed_urls, finished = await self._get_comments_and_req(count)
368 | 
369 |             for comment in all_comments:
370 |                 await self._get_comment_replies(comment, batch_size)
371 | 
372 |             amount_yielded += len(all_comments)
373 |             for comment in all_comments:
374 |                 yield comment
375 | 
376 |             if finished:
377 |                 return
378 | 
379 |             # so that we don't re-yield any comments previously yielded
380 |             comment_ids = set(comment['cid'] for comment in all_comments)
381 |             try:
382 |                 async for comment in self._get_api_comments(count, batch_size, comment_ids):
383 |                     yield comment
384 |             except exceptions.ApiFailedException as e:
385 |                 async for comment in self._get_scroll_comments(count, amount_yielded, processed_urls):
386 |                     yield comment
387 |         else:
388 |             # if we only have the video id, we need to entirely rely on the api
389 |             async for comment in self._get_api_comments(count, batch_size, set()):
390 |                 yield comment
391 | 
392 |     async def _get_scroll_comments(self, count, amount_yielded, processed_urls):
393 |         page = self.parent._page
394 |         if page.url != self._get_url():
395 |             await self.view()
396 |         tries = 0
397 | 
398 |         data_request_path = "api/comment/list"
399 |         while amount_yielded < count:
400 |             # scroll down to induce request
401 |             await self.scroll_to(10000)
402 |             await self.slight_scroll_up()
403 |             await self.check_and_wait_for_captcha()
404 |             await self.check_and_close_signin()
405 | 
406 |             data_responses = self.get_responses(data_request_path)
407 |             data_responses = [data_response for data_response in data_responses if
408 |                               data_response.url not in processed_urls]
409 | 
410 |             if len(data_responses) == 0:
411 |                 if tries > 5:
412 |                     print(f"Not sending anymore!")
413 |                     break
414 |                 tries += 1
415 | 
416 |             for data_response in data_responses:
417 |                 try:
418 |                     res = await data_response.json()
419 | 
420 |                     processed_urls.append(data_response.url)
421 | 
422 |                     comments = res.get("comments", [])
423 | 
424 |                     for comment in comments:
425 |                         await self._get_comment_replies(comment, 100)
426 | 
427 |                     amount_yielded += len(comments)
428 |                     for comment in comments:
429 |                         yield comment
430 | 
431 |                     if amount_yielded > count:
432 |                         return
433 | 
434 |                     has_more = res.get("has_more")
435 |                     if has_more != 1:
436 |                         self.parent.logger.info(
437 |                             "TikTok isn't sending more TikToks beyond this point."
438 |                         )
439 |                         return
440 |                 except Exception as e:
441 |                     processed_urls.append(data_response.url)
442 | 
443 |     async def _get_comments_via_requests(self, count, cursor, data_request):
444 |         ms_tokens = await self.parent.get_ms_tokens()
445 |         next_url = edit_url(data_request.url, {'count': count, 'cursor': cursor, 'aweme_id': self.id})
446 |         cookies = await self.parent._context.cookies()
447 |         cookies = {cookie['name']: cookie['value'] for cookie in cookies}
448 |         headers = await data_request.all_headers()
449 |         headers = {k: v for k, v in headers.items() if not k.startswith(':')}
450 |         headers['referer'] = None
451 |         r = requests.get(next_url, headers=headers, cookies=cookies)
452 | 
453 |         if r.status_code != 200:
454 |             raise Exception(f"Failed to get comments with status code {r.status_code}")
455 | 
456 |         if len(r.content) == 0:
457 |             print("Failed to comments from API, switching to scroll")
458 |             raise exceptions.ApiFailedException("No content in response")
459 | 
460 |         try:
461 |             res = r.json()
462 |         except Exception:
463 |             res = json.loads(brotli.decompress(r.content).decode())
464 | 
465 |         return res
466 | 
467 |     async def _get_api_comments(self, count, batch_size, comment_ids):
468 | 
469 |         data_request = self.parent.request_cache['comments']
470 | 
471 |         try:
472 |             amount_yielded = 0
473 |             cursor = 0
474 |             while amount_yielded < count:
475 |                 # try directly requesting through browser
476 |                 url = edit_url(data_request.url,
477 |                                {'count': 20, 'cursor': cursor, 'aweme_id': self.id})  # , 'msToken': ms_tokens[-1]})
478 |                 page = self.parent._page
479 |                 async with page.expect_request(url) as event:
480 |                     await page.goto(url)
481 |                     request = await event.value
482 |                     response = await request.response()
483 |                     if response.status >= 300:
484 |                         raise exceptions.NotAvailableException("Content is not available")
485 | 
486 |                 if response.status != 200:
487 |                     raise Exception(f"Failed to get comments with status code {response.status}")
488 | 
489 |                 content = await response.body()
490 |                 if len(content) == 0:
491 |                     raise Exception("No content in response")
492 | 
493 |                 res = await response.json()
494 |                 cursor = res.get("cursor", 0)
495 | 
496 |                 comments = res.get("comments", [])
497 |                 amount_yielded += len(comments)
498 |                 for comment in comments:
499 |                     if comment['cid'] not in comment_ids:
500 |                         try:
501 |                             await self._get_comment_replies(comment, batch_size)
502 |                         except Exception:
503 |                             pass
504 |                         yield comment
505 |         except Exception as e:
506 |             try:
507 |                 # try getting all at once
508 |                 retries = 5
509 |                 for _ in range(retries):
510 |                     try:
511 |                         cursor = '0'
512 |                         res = await self._get_comments_via_requests(count, cursor, data_request)
513 | 
514 |                         comments = res.get("comments", [])
515 |                         for comment in comments:
516 |                             if comment['cid'] not in comment_ids:
517 |                                 try:
518 |                                     await self._get_comment_replies(comment, batch_size)
519 |                                 except Exception:
520 |                                     pass
521 |                                 yield comment
522 | 
523 |                         return
524 |                     except Exception as e:
525 |                         pass
526 |                 else:
527 |                     print("Failed to get all comments at once")
528 |                     print("Trying batched...")
529 |                     raise Exception("Failed to get comments")
530 |             except Exception as e:
531 | 
532 |                 amount_yielded = len(comment_ids)
533 |                 cursor = 0
534 |                 while amount_yielded < count:
535 |                     res = await self._get_comments_via_requests(20, cursor, data_request)
536 | 
537 |                     if res.get('type') == 'verify':
538 |                         # force new request for cache
539 |                         self._get_comments_and_req()
540 | 
541 |                     cursor = res.get("cursor", 0)
542 |                     comments = res.get("comments", [])
543 | 
544 |                     if comments:
545 |                         for comment in comments:
546 |                             await self._get_comment_replies(comment, batch_size)
547 | 
548 |                         amount_yielded += len(comments)
549 |                         for comment in comments:
550 |                             yield comment
551 | 
552 |                     has_more = res.get("has_more")
553 |                     if has_more != 1:
554 |                         self.parent.logger.info(
555 |                             "TikTok isn't sending more TikToks beyond this point."
556 |                         )
557 |                         return
558 | 
559 |                     await self.parent.request_delay()
560 | 
561 |     def __extract_from_data(self) -> None:
562 |         data = self.as_dict
563 |         keys = data.keys()
564 | 
565 |         if "author" in keys:
566 |             self.id = data["id"]
567 |             self.username = data["author"]["uniqueId"]
568 |             self.create_time = datetime.fromtimestamp(int(data["createTime"]))
569 |             self.stats = data["stats"]
570 |             self.author = self.parent.user(data=data["author"])
571 |             self.sound = self.parent.sound(data=data["music"])
572 | 
573 |             self.hashtags = [
574 |                 self.parent.hashtag(data=hashtag)
575 |                 for hashtag in data.get("challenges", [])
576 |             ]
577 | 
578 |         if self.id is None:
579 |             Video.parent.logger.error(
580 |                 f"Failed to create Video with data: {data}\nwhich has keys {data.keys()}"
581 |             )
582 | 
583 |     def __repr__(self):
584 |         return self.__str__()
585 | 
586 |     def __str__(self):
587 |         return f"PyTok.video(id='{self.id}')"
588 | 
589 |     # def __getattr__(self, name):
590 |     #     # Handle author, sound, hashtags, as_dict
591 |     #     if name in ["author", "sound", "hashtags", "stats", "create_time", "as_dict"]:
592 |     #         self.as_dict = self.info()
593 |     #         self.__extract_from_data()
594 |     #         return self.__getattribute__(name)
595 | 
596 |     #     raise AttributeError(f"{name} doesn't exist on PyTok.api.Video")
597 | 


--------------------------------------------------------------------------------
/pytok/captcha_solver.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import random
  3 | from urllib.parse import urlparse
  4 | 
  5 | import cv2
  6 | import base64
  7 | import numpy as np
  8 | import requests
  9 | 
 10 | 
 11 | class CaptchaSolver:
 12 |     def __init__(self, response, puzzle, piece):
 13 |         self._request = response.request
 14 |         self._response = response
 15 |         self._client = requests.Session()
 16 |         self._puzzle = base64.b64encode(puzzle)
 17 |         self._piece = base64.b64encode(piece)
 18 | 
 19 |     def _host(self):
 20 |         return urlparse(self._request.url).netloc
 21 | 
 22 |     def _params(self):
 23 |         return urlparse(self._request.url).query
 24 | 
 25 |     def _headers(self) -> dict:
 26 |         return self._request.headers
 27 | 
 28 |     async def _get_challenge(self) -> dict:
 29 |         return await self._response.json()
 30 | 
 31 |     async def _solve_captcha(self) -> dict:
 32 |         if self._mode == "slide":
 33 |             solver = PuzzleSolver(self._puzzle, self._piece)
 34 |             maxloc = solver.get_position()
 35 |         elif self._mode == "whirl":
 36 |             maxloc = whirl_solver(self._puzzle, self._piece)
 37 |         randlength = round(
 38 |             random.random() * (100 - 50) + 50
 39 |         )
 40 |         await asyncio.sleep(1)  # don't remove delay or it will fail
 41 |         return {
 42 |             "maxloc": maxloc,
 43 |             "randlenght": randlength
 44 |         }
 45 | 
 46 |     def _post_captcha(self, solve: dict) -> dict:
 47 |         params = self._params()
 48 | 
 49 |         body = {
 50 |             "id": solve["id"],
 51 |             "mode": self._mode
 52 |         }
 53 |         if self._mode == "slide":
 54 |             body.update({
 55 |                 "modified_img_width": 552,
 56 |                 "reply": list(
 57 |                     {
 58 |                         "relative_time": i * solve["randlenght"],
 59 |                         "x": round(
 60 |                             solve["maxloc"] / (solve["randlenght"] / (i + 1))
 61 |                         ),
 62 |                         "y": solve["tip"],
 63 |                     }
 64 |                     for i in range(
 65 |                         solve["randlenght"]
 66 |                     )
 67 |                 ),
 68 |             })
 69 |         elif self._mode == "whirl":
 70 |             body.update({
 71 |                 "modified_img_width": 340,
 72 |                 "drag_width": 271,
 73 |                 "reply": list(
 74 |                     {
 75 |                         "relative_time": i * solve["randlenght"],
 76 |                         "x": round(
 77 |                             271 * solve["maxloc"] / (solve["randlenght"] / (i + 1))
 78 |                         ),
 79 |                         "y": solve["tip"],
 80 |                     }
 81 |                     for i in range(
 82 |                         solve["randlenght"]
 83 |                     )
 84 |                 ),
 85 |             })
 86 | 
 87 |         host = self._host()
 88 |         headers = self._headers()
 89 | 
 90 |         resp = self._client.post(
 91 |             url=(
 92 |                     "https://"
 93 |                     + host
 94 |                     + "/captcha/verify?"
 95 |                     + params
 96 |             ),
 97 |             headers=headers.update(
 98 |                 {
 99 |                     "content-type": "application/json"
100 |                 }
101 |             ),
102 |             json=body
103 |         )
104 | 
105 |         if resp.status_code != 200:
106 |             raise Exception("Captcha was not solved")
107 |         else:
108 |             # status code was 200, but perhaps the response was to say that the CAPTCHA failed.
109 |             if resp.json()['code'] >= 500:
110 |                 raise Exception(f"CAPTCHA server responded 200 but said: {resp.json()['message']}")
111 | 
112 |         return resp.json()
113 | 
114 |     async def solve_captcha(self):
115 |         # this method is called
116 |         captcha_challenge = await self._get_challenge()
117 | 
118 |         if 'mode' in captcha_challenge["data"]:
119 |             captcha_challenge = captcha_challenge["data"]
120 |         elif 'challenges' in captcha_challenge["data"]:
121 |             captcha_challenge = captcha_challenge["data"]["challenges"][0]
122 |         captcha_id = captcha_challenge["id"]
123 |         self._mode = captcha_challenge["mode"]
124 | 
125 |         solve = await self._solve_captcha()
126 | 
127 |         solve['id'] = captcha_id
128 |         if captcha_challenge["mode"] == "slide":
129 |             tip_y = captcha_challenge["question"]["tip_y"]
130 |             solve['tip'] = tip_y
131 |         elif captcha_challenge["mode"] == "whirl":
132 |             solve['tip'] = 0
133 |         return solve
134 | 
135 | 
136 | class PuzzleSolver:
137 |     def __init__(self, base64puzzle, base64piece):
138 |         self.puzzle = base64puzzle
139 |         self.piece = base64piece
140 | 
141 |     def get_position(self):
142 |         puzzle = self._background_preprocessing()
143 |         piece = self._piece_preprocessing()
144 |         matched = cv2.matchTemplate(
145 |             puzzle,
146 |             piece,
147 |             cv2.TM_CCOEFF_NORMED
148 |         )
149 |         min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(matched)
150 |         return max_loc[0]
151 | 
152 |     def _background_preprocessing(self):
153 |         img = self._img_to_grayscale(self.piece)
154 |         background = self._sobel_operator(img)
155 |         return background
156 | 
157 |     def _piece_preprocessing(self):
158 |         img = self._img_to_grayscale(self.puzzle)
159 |         template = self._sobel_operator(img)
160 |         return template
161 | 
162 |     def _sobel_operator(self, img):
163 |         scale = 1
164 |         delta = 0
165 |         ddepth = cv2.CV_16S
166 | 
167 |         img = cv2.GaussianBlur(img, (3, 3), 0)
168 |         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
169 |         grad_x = cv2.Sobel(
170 |             gray,
171 |             ddepth,
172 |             1,
173 |             0,
174 |             ksize=3,
175 |             scale=scale,
176 |             delta=delta,
177 |             borderType=cv2.BORDER_DEFAULT,
178 |         )
179 |         grad_y = cv2.Sobel(
180 |             gray,
181 |             ddepth,
182 |             0,
183 |             1,
184 |             ksize=3,
185 |             scale=scale,
186 |             delta=delta,
187 |             borderType=cv2.BORDER_DEFAULT,
188 |         )
189 |         abs_grad_x = cv2.convertScaleAbs(grad_x)
190 |         abs_grad_y = cv2.convertScaleAbs(grad_y)
191 |         grad = cv2.addWeighted(abs_grad_x, 0.5, abs_grad_y, 0.5, 0)
192 | 
193 |         return grad
194 | 
195 |     def _img_to_grayscale(self, img):
196 |         return cv2.imdecode(
197 |             self._string_to_image(img),
198 |             cv2.IMREAD_COLOR
199 |         )
200 | 
201 |     def _string_to_image(self, base64_string):
202 |         return np.frombuffer(
203 |             base64.b64decode(base64_string),
204 |             dtype="uint8"
205 |         )
206 | 
207 | 
208 | def _get_images_and_edges(b64_puzzle, b64_piece, resolution=300):
209 |     puzzle = cv2.imdecode(np.frombuffer(base64.b64decode(b64_puzzle), dtype="uint8"), cv2.IMREAD_COLOR)
210 |     piece = cv2.imdecode(np.frombuffer(base64.b64decode(b64_piece), dtype="uint8"), cv2.IMREAD_COLOR)
211 | 
212 |     # get inner edge of puzzle
213 |     r = (piece.shape[0] / 2) + 1
214 |     puzzle_edge = np.zeros((resolution, 3))
215 |     for idx, theta in enumerate(np.linspace(0, 2 * np.pi, resolution)):
216 |         x = int(puzzle.shape[0] / 2 + r * np.cos(theta))
217 |         y = int(puzzle.shape[1] / 2 + r * np.sin(theta))
218 |         puzzle_edge[idx] = puzzle[x, y]
219 | 
220 |     # get outer edge of piece
221 |     r = (piece.shape[0] / 2) - 1
222 |     piece_edge = np.zeros((resolution, 3))
223 |     for idx, theta in enumerate(np.linspace(0, 2 * np.pi, resolution)):
224 |         x = min(int(piece.shape[0] / 2 + r * np.cos(theta)), piece.shape[0] - 1)
225 |         y = min(int(piece.shape[1] / 2 + r * np.sin(theta)), piece.shape[1] - 1)
226 |         piece_edge[idx] = piece[x, y]
227 | 
228 |     return puzzle, piece, puzzle_edge, piece_edge
229 | 
230 | 
231 | def whirl_solver(b64_puzzle, b64_piece):
232 |     resolution = 300
233 |     _, _, puzzle_edge, piece_edge = _get_images_and_edges(b64_puzzle, b64_piece, resolution=resolution)
234 | 
235 |     # find the best match
236 |     best_match = 0
237 |     best_angle = 0
238 |     for angle in range(resolution):
239 |         match = np.sum(puzzle_edge * np.roll(piece_edge, angle, axis=0))
240 |         if match > best_match:
241 |             best_match = match
242 |             best_angle = angle
243 | 
244 |     return (resolution - best_angle) / resolution


--------------------------------------------------------------------------------
/pytok/exceptions.py:
--------------------------------------------------------------------------------
 1 | class TikTokException(Exception):
 2 |     """Generic exception that all other TikTok errors are children of."""
 3 | 
 4 |     def __init__(self, *args, **kwargs):
 5 |         super().__init__(*args, **kwargs)
 6 | 
 7 | 
 8 | class CaptchaException(TikTokException):
 9 |     """TikTok is showing captcha"""
10 | 
11 | 
12 | class NotFoundException(TikTokException):
13 |     """TikTok indicated that this object does not exist."""
14 | 
15 | 
16 | class EmptyResponseException(TikTokException):
17 |     """TikTok sent back an empty response."""
18 | 
19 | 
20 | class SoundRemovedException(TikTokException):
21 |     """This TikTok sound has no id from being removed by TikTok."""
22 | 
23 | 
24 | class InvalidJSONException(TikTokException):
25 |     """TikTok returned invalid JSON."""
26 | 
27 | 
28 | class NotAvailableException(TikTokException):
29 |     """The requested object is not available in this region."""
30 | 
31 | class NoContentException(TikTokException):
32 |     """TikTok returned no content"""
33 | 
34 | class TimeoutException(TikTokException):
35 |     """Timed out trying to get content from TikTok"""
36 | 
37 | class ApiFailedException(TikTokException):
38 |     """TikTok API is failing"""
39 | 
40 | class FewerVideosThanExpectedException(TikTokException):
41 |     """TikTok is returning fewer videos for this user than their metadata led us to expect"""
42 | 
43 | class AccountPrivateException(TikTokException):
44 |     """This TikTok account is private and cannot be scraped"""


--------------------------------------------------------------------------------
/pytok/helpers.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from urllib import parse as url_parsers
 3 | 
 4 | import requests
 5 | 
 6 | from .exceptions import *
 7 | 
 8 | 
 9 | def extract_tag_contents(html):
10 |     if isinstance(html, bytes):
11 |         html = html.decode("utf-8")
12 |     data_json_match = re.search(r"""<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" type="application\/json">([^\>]+)<\/script>""", html)
13 |     if data_json_match:
14 |         return data_json_match.group(1)
15 |     else:
16 |         next_json = re.search(
17 |             r"id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<next_data>[^<]+)",
18 |             html,
19 |         )
20 |         if next_json:
21 |             nonce_start = '<head nonce="'
22 |             nonce_end = '">'
23 |             nonce = html.split(nonce_start)[1].split(nonce_end)[0]
24 |             j_raw = html.split(
25 |                 '<script id="__NEXT_DATA__" type="application/json" nonce="%s" crossorigin="anonymous">'
26 |                 % nonce
27 |             )[1].split("</script>")[0]
28 |             return j_raw
29 |         else:
30 |             sigi_json = re.search('<script id="SIGI_STATE" type="application\/json">(.*?)<\/script>', html)
31 |             #sigi_json = re.search(
32 |                 #r'>\s*window\[[\'"]SIGI_STATE[\'"]\]\s*=\s*(?P<sigi_state>{.+});', html
33 |             #)
34 |             if sigi_json:
35 |                 return sigi_json.group(1)
36 |             else:
37 |                 raise NotAvailableException("Could not find the tag contents")
38 |                 # not a reliable way to check for captchas
39 |                 # raise CaptchaException(
40 |                 #    "TikTok blocks this request displaying a Captcha \nTip: Consider using a proxy or a custom_verify_fp as method parameters"
41 |                 # )
42 | 
43 | 
44 | def extract_video_id_from_url(url):
45 |     url = requests.head(url=url, allow_redirects=True).url
46 |     if "@" in url and "/video/" in url:
47 |         return url.split("/video/")[1].split("?")[0]
48 |     else:
49 |         raise TypeError(
50 |             "URL format not supported. Below is an example of a supported url.\n"
51 |             "https://www.tiktok.com/@therock/video/6829267836783971589"
52 |         )
53 | 
54 | def extract_user_id_from_url(url):
55 |     url = requests.head(url=url, allow_redirects=True).url
56 |     if "@" in url and "/video/" in url:
57 |         return url.split("/video/")[0].split("@")[1]
58 |     else:
59 |         raise TypeError(
60 |             "URL format not supported. Below is an example of a supported url.\n"
61 |             "https://www.tiktok.com/@therock/video/6829267836783971589"
62 |         )
63 | 
64 | def add_if_not_replace(text, pat, replace, add):
65 |     if re.search(pat, text):
66 |         return re.sub(pat, replace, text)
67 |     else:
68 |         text += add
69 |         return text
70 |     
71 | def edit_url(url, new_params):
72 |     url_parsed = url_parsers.urlparse(url)
73 |     params = url_parsers.parse_qs(url_parsed.query, keep_blank_values=True)
74 |     for k, v in new_params.items():
75 |         params[k] = [v]
76 |     # url encode params chosen to match the tiktok url encoding method
77 |     return f"{url_parsed.scheme}://{url_parsed.netloc}{url_parsed.path}?{url_parsers.urlencode(params, doseq=True, safe='=', quote_via=url_parsers.quote)}"
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/pytok/tiktok.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import re
  4 | import time
  5 | from typing import Optional
  6 | 
  7 | from patchright.async_api import async_playwright
  8 | 
  9 | from .api.sound import Sound
 10 | from .api.user import User
 11 | from .api.search import Search
 12 | from .api.hashtag import Hashtag
 13 | from .api.video import Video
 14 | from .api.trending import Trending
 15 | 
 16 | from .exceptions import *
 17 | from .utils import LOGGER_NAME
 18 | from .captcha_solver import CaptchaSolver
 19 | from dataclasses import dataclass
 20 | 
 21 | os.environ["no_proxy"] = "127.0.0.1,localhost"
 22 | 
 23 | BASE_URL = "https://m.tiktok.com/"
 24 | DESKTOP_BASE_URL = "https://www.tiktok.com/"
 25 | 
 26 | 
 27 | class PyTok:
 28 |     _is_context_manager = False
 29 |     user = User
 30 |     search = Search
 31 |     sound = Sound
 32 |     hashtag = Hashtag
 33 |     video = Video
 34 |     trending = Trending
 35 |     logger = logging.getLogger(LOGGER_NAME)
 36 | 
 37 |     def __init__(
 38 |             self,
 39 |             logging_level: int = logging.WARNING,
 40 |             request_delay: Optional[int] = 0,
 41 |             headless: Optional[bool] = False,
 42 |             browser: Optional[str] = "chromium",
 43 |             manual_captcha_solves: Optional[bool] = False,
 44 |             log_captcha_solves: Optional[bool] = False,
 45 |     ):
 46 |         """The PyTok class. Used to interact with TikTok. This is a singleton
 47 |             class to prevent issues from arising with playwright
 48 | 
 49 |         ##### Parameters
 50 |         * logging_level: The logging level you want the program to run at, optional
 51 |             These are the standard python logging module's levels.
 52 | 
 53 |         * request_delay: The amount of time in seconds to wait before making a request, optional
 54 |             This is used to throttle your own requests as you may end up making too
 55 |             many requests to TikTok for your IP.
 56 | 
 57 |         * **kwargs
 58 |             Parameters that are passed on to basically every module and methods
 59 |             that interact with this main class. These may or may not be documented
 60 |             in other places.
 61 |         """
 62 | 
 63 |         self._headless = headless
 64 |         self._request_delay = request_delay
 65 |         self._browser = browser
 66 |         self._manual_captcha_solves = manual_captcha_solves
 67 |         self._log_captcha_solves = log_captcha_solves
 68 | 
 69 |         self.logger.setLevel(logging_level)
 70 | 
 71 |         # Add classes from the api folder
 72 |         User.parent = self
 73 |         Search.parent = self
 74 |         Sound.parent = self
 75 |         Hashtag.parent = self
 76 |         Video.parent = self
 77 |         Trending.parent = self
 78 | 
 79 |         self.request_cache = {}
 80 | 
 81 |         if self._headless:
 82 |             from pyvirtualdisplay import Display
 83 |             self._display = Display()
 84 |             self._display.start()
 85 | 
 86 |         # options = uc.ChromeOptions()
 87 |         # options.add_argument('--ignore-ssl-errors=yes')
 88 |         # options.add_argument('--ignore-certificate-errors')
 89 |         # # options.page_load_strategy = 'eager'
 90 | 
 91 |     async def __aenter__(self):
 92 |         self._playwright = await async_playwright().start()
 93 |         fingerprint_options = {}
 94 |         if self._browser == "firefox":
 95 |             self._browser = await self._playwright.firefox.launch(headless=self._headless)
 96 |             # fingerprint_options['browser'] = [ForgeBrowser("firefox")]
 97 |         elif self._browser == "chromium":
 98 |             self._browser = await self._playwright.chromium.launch(
 99 |                 # user_data_dir="...",
100 |                 channel="chrome",
101 |                 headless=False,
102 |                 # no_viewport=True,
103 |                 # do NOT add custom browser headers or user_agent
104 |             )
105 |             # fingerprint_options['browser'] = 'chrome'
106 |         else:
107 |             raise Exception("Browser not supported")
108 |         # self._context = await AsyncNewContext(self._browser, fingerprint_options=fingerprint_options)
109 |         # device_config = self._playwright.devices['Desktop Chrome']
110 |         self._context = await self._browser.new_context()
111 |         self._page = await self._context.new_page()
112 | 
113 |         # move mouse to 0, 0 to have known mouse start position
114 |         await self._page.mouse.move(0, 0)
115 | 
116 |         self._requests = []
117 |         self._responses = []
118 | 
119 |         self._page.on("request", lambda request: self._requests.append(request))
120 | 
121 |         async def save_responses_and_body(response):
122 |             self._responses.append(response)
123 |             try:
124 |                 response._body = await response.body()
125 |             except Exception:
126 |                 pass
127 | 
128 |         self._page.on("response", save_responses_and_body)
129 | 
130 |         self._user_agent = await self._page.evaluate("() => navigator.userAgent")
131 |         self._is_context_manager = True
132 |         return self
133 | 
134 |     async def request_delay(self):
135 |         if self._request_delay is not None:
136 |             await self._page.wait_for_timeout(self._request_delay * 1000)
137 | 
138 |     def __del__(self):
139 |         """A basic cleanup method, called automatically from the code"""
140 |         if not self._is_context_manager:
141 |             self.logger.debug(
142 |                 "PyTok was shutdown improperlly. Ensure the instance is terminated with .shutdown()"
143 |             )
144 |             self.shutdown()
145 |         return
146 | 
147 |     #
148 |     # PRIVATE METHODS
149 |     #
150 | 
151 |     def r1(self, pattern, text):
152 |         m = re.search(pattern, text)
153 |         if m:
154 |             return m.group(1)
155 | 
156 |     async def shutdown(self) -> None:
157 |         try:
158 |             await self._context.close()
159 |             await self._browser.close()
160 |             await self._playwright.stop()
161 |         except Exception:
162 |             pass
163 |         finally:
164 |             if self._headless:
165 |                 display = getattr(self, "_display", None)
166 |                 if display:
167 |                     display.stop()
168 | 
169 |     async def __aexit__(self, type, value, traceback):
170 |         await self.shutdown()
171 | 
172 |     async def get_ms_tokens(self):
173 |         all_cookies = await self._context.cookies()
174 |         cookie_name = 'msToken'
175 |         cookies = []
176 |         for cookie in all_cookies:
177 |             if cookie["name"] == cookie_name and cookie['secure']:
178 |                 cookies.append(cookie['value'])
179 |         if len(cookies) == 0:
180 |             raise Exception(f"Could not find {cookie_name} cookie")
181 |         return cookies
182 | 


--------------------------------------------------------------------------------
/pytok/utils.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | import json
  3 | import os
  4 | import re
  5 | 
  6 | import pandas as pd
  7 | import tqdm
  8 | 
  9 | LOGGER_NAME: str = "PyTok"
 10 | 
 11 | 
 12 | def update_if_not_none(dict1, dict2):
 13 |     dict1.update((k, v) for k, v in dict2.items() if v is not None)
 14 |     return dict1
 15 | 
 16 | 
 17 | def _get_comment_features(comment):
 18 |     comment_user = comment['user']
 19 |     if isinstance(comment_user, str):
 20 |         raise ValueError()
 21 |     elif isinstance(comment_user, dict):
 22 |         if 'unique_id' in comment_user:
 23 |             author_id = comment_user['uid']
 24 |             author_name = comment_user['unique_id']
 25 |         elif 'uniqueId' in comment_user:
 26 |             author_id = comment_user['id']
 27 |             author_name = comment_user['uniqueId']
 28 |         else:
 29 |             author_name = ''
 30 |             author_id = comment_user['uid']
 31 |     else:
 32 |         raise ValueError()
 33 | 
 34 |     mentioned_users = [info['user_id'] for info in comment['text_extra'] if 'user_id' in info and info['user_id'] != '']
 35 | 
 36 |     return author_id, author_name, mentioned_users
 37 | 
 38 | 
 39 | def load_comment_df_from_files(file_paths):
 40 |     comments = []
 41 |     for file_path in tqdm.tqdm(file_paths):
 42 | 
 43 |         if not os.path.exists(file_path):
 44 |             continue
 45 | 
 46 |         with open(file_path, 'r') as f:
 47 |             comments = json.load(f)
 48 |         comments.extend(comments)
 49 | 
 50 |     return get_comment_df(comments)
 51 | 
 52 | 
 53 | def get_comment_df(comments):
 54 |     comments_data = []
 55 |     for comment in comments:
 56 | 
 57 |         try:
 58 |             author_id, author_name, mentioned_users = _get_comment_features(comment)
 59 |         except ValueError:
 60 |             continue
 61 | 
 62 |         comment_replies = comment.get('reply_comment', None)
 63 |         if comment_replies:
 64 |             for reply_comment in comment_replies:
 65 |                 try:
 66 |                     reply_author_id, reply_author_name, reply_mentioned_users = _get_comment_features(reply_comment)
 67 |                 except ValueError:
 68 |                     continue
 69 | 
 70 |                 comments_data.append((
 71 |                     reply_comment['cid'],
 72 |                     datetime.utcfromtimestamp(reply_comment['create_time']),
 73 |                     reply_author_name,
 74 |                     reply_author_id,
 75 |                     reply_comment['text'],
 76 |                     reply_mentioned_users,
 77 |                     reply_comment['aweme_id'],
 78 |                     reply_comment['comment_language'],
 79 |                     reply_comment['digg_count'],
 80 |                     comment['cid']
 81 |                 ))
 82 | 
 83 |         comments_data.append((
 84 |             comment['cid'],
 85 |             datetime.utcfromtimestamp(comment['create_time']),
 86 |             author_name,
 87 |             author_id,
 88 |             comment['text'],
 89 |             mentioned_users,
 90 |             comment['aweme_id'],
 91 |             comment['comment_language'],
 92 |             comment['digg_count'],
 93 |             None
 94 |         ))
 95 | 
 96 |     comment_df = pd.DataFrame(comments_data,
 97 |                               columns=['comment_id', 'createtime', 'author_name', 'author_id', 'text', 'mentions',
 98 |                                        'video_id', 'comment_language', 'like_count', 'reply_comment_id'])
 99 |     comment_df = comment_df.drop_duplicates('comment_id')
100 |     comment_df = comment_df[comment_df['text'].notna()]
101 |     comment_df = comment_df[comment_df['video_id'].notna()]
102 |     comment_df = comment_df[comment_df['mentions'].notna()]
103 |     comment_df['text'] = comment_df['text'].str.replace(r'\n', ' ', regex=True)
104 |     comment_df['text'] = comment_df['text'].str.replace(r'\r', ' ', regex=True)
105 |     return comment_df
106 | 
107 | 
108 | def try_load_comment_df_from_file(file_path, file_paths=[]):
109 |     assert file_path.endswith('.parquet.gzip') or file_path.endswith('.csv'), "File path must be a parquet or csv file"
110 | 
111 |     if os.path.exists(file_path):
112 |         if file_path.endswith('.csv'):
113 |             comment_df = pd.read_csv(file_path)
114 |         elif file_path.endswith('.parquet.gzip'):
115 |             comment_df = pd.read_parquet(file_path)
116 |         comment_df[
117 |             ['comment_id', 'author_name', 'author_id', 'text', 'video_id', 'comment_language', 'reply_comment_id']] = \
118 |         comment_df[['comment_id', 'author_name', 'author_id', 'text', 'video_id', 'comment_language',
119 |                     'reply_comment_id']].astype(str)
120 |         comment_df['mentions'] = comment_df['mentions'].apply(_str_to_list)
121 |         comment_df['createtime'] = pd.to_datetime(comment_df['createtime'])
122 |         comment_df['createtime'] = comment_df['createtime'].astype('datetime64[ns]')
123 |     else:
124 |         if not file_paths:
125 |             raise ValueError(
126 |                 f"Parquet file: {file_path} does not exist, and no file paths provided to generate dataframe")
127 | 
128 |         comment_df = load_comment_df_from_files(file_paths)
129 | 
130 |         if file_path.endswith('.csv'):
131 |             comment_df.to_csv(file_path, index=False)
132 |         elif file_path.endswith('.parquet.gzip'):
133 |             comment_df.to_parquet(file_path, compression='gzip', index=False)
134 | 
135 |     return comment_df
136 | 
137 | 
138 | def _str_to_list(stri):
139 |     if ',' not in stri:
140 |         return []
141 |     return [word.strip()[1:-1] for word in stri[1:-1].split(',')]
142 | 
143 | 
144 | def try_load_video_df_from_file(file_path, file_paths=[]):
145 |     assert file_path.endswith('.parquet.gzip') or file_path.endswith('.csv'), "File path must be a parquet or csv file"
146 |     if os.path.exists(file_path):
147 |         if file_path.endswith('.csv'):
148 |             video_df = pd.read_csv(file_path)
149 |         elif file_path.endswith('.parquet.gzip'):
150 |             video_df = pd.read_parquet(file_path)
151 | 
152 |         video_df[
153 |             ['video_id', 'author_name', 'author_id', 'desc', 'share_video_id', 'share_video_user_id', 'share_type']] = \
154 |         video_df[['video_id', 'author_name', 'author_id', 'desc', 'share_video_id', 'share_video_user_id',
155 |                   'share_type']].astype(str)
156 |         video_df['createtime'] = pd.to_datetime(video_df['createtime'])
157 |         video_df['mentions'] = video_df['mentions'].apply(_str_to_list)
158 |         video_df['hashtags'] = video_df['hashtags'].apply(_str_to_list)
159 |         return video_df
160 | 
161 |     else:
162 |         if not file_paths:
163 |             raise ValueError(f"File: {file_path} does not exist, and no file paths provided to generate dataframe")
164 | 
165 |         videos = []
166 |         for file_path in file_paths:
167 |             with open(file_path, 'r') as f:
168 |                 file_data = json.load(f)
169 | 
170 |             if type(file_data) == list:
171 |                 videos += file_data
172 |             elif type(file_data) == dict:
173 |                 videos.append(file_data)
174 |             else:
175 |                 raise ValueError()
176 | 
177 |         video_df = get_video_df(videos)
178 |         if file_path.endswith('.csv'):
179 |             video_df.to_csv(file_path, index=False)
180 |         elif file_path.endswith('.parquet.gzip'):
181 |             video_df.to_parquet(file_path, compression='gzip', index=False)
182 |         return video_df
183 | 
184 | 
185 | def extract_video_features(video):
186 |     # get text extra relating to user names
187 |     video_mentions = [extra for extra in video.get('textExtra', []) if
188 |                       extra.get('userId', None) and extra['userId'] != '0']
189 | 
190 |     # get all hashtags used in the description
191 |     hashtags = [extra['hashtagName'] for extra in video.get('textExtra', []) if extra.get('hashtagName', None)]
192 | 
193 |     # get all reply types
194 |     match = re.search(r"^\#([^# ]+) [^@# ]+ @([^ ]+)", video['desc'])
195 |     if match and len(video_mentions) > 0:
196 |         # if there are multiple mentions we get the first
197 |         if video_mentions[0]['awemeId'] != '':
198 |             share_video_id = video_mentions[0]['awemeId']
199 |         elif video.get('duetInfo', None) and video['duetInfo']['duetFromId'] != '0':
200 |             share_video_id = video['duetInfo']['duetFromId']
201 |         else:
202 |             # no way to get shared video id
203 |             share_video_id = None
204 | 
205 |         share_video_user_id = video_mentions[0]['userId']
206 |         share_video_user_name = video_mentions[0]['userUniqueId'] if 'userUniqueId' in video_mentions[
207 |             0].keys() else None
208 |         share_type = match.group(1)
209 | 
210 |         video_mentions = video_mentions[1:]
211 |     else:
212 |         share_video_id = None
213 |         share_video_user_id = None
214 |         share_video_user_name = None
215 |         share_type = None
216 | 
217 |     # get duets that we didn't get with the regex
218 |     if video.get('duetFromId', None) and video['duetFromId'] != '0' and not share_video_id:
219 |         duet_infos = [mention for mention in video_mentions if mention['awemeId'] == video['duetInfo']['duetFromId']]
220 |         # sometimes the awemeId is missing
221 |         if duet_infos:
222 |             duet_info = duet_infos[0]
223 |             share_video_id = duet_info['awemeId']
224 |         else:
225 |             duet_info = video_mentions[0]
226 |             share_video_id = video['duetInfo']['duetFromId']
227 | 
228 |         share_video_user_id = duet_info['userId']
229 |         share_video_user_name = duet_info['userUniqueId']
230 |         share_type = 'duet'
231 | 
232 |         video_mentions = [mention for mention in video_mentions if
233 |                           mention['awemeId'] != video['duetInfo']['duetFromId']]
234 | 
235 |     # get user mentions
236 |     mentions = []
237 |     if len(video_mentions) > 0:
238 |         mentions = [mention['userId'] for mention in video_mentions]
239 | 
240 |     if video.get('duetInfo', None) and video['duetInfo']['duetFromId'] != '0' and share_video_id and video['duetInfo'][
241 |         'duetFromId'] != share_video_id:
242 |         raise ValueError("Comment metadata is mismatched")
243 | 
244 |     vid_features = (
245 |         video['id'],
246 |         datetime.utcfromtimestamp(int(video['createTime'])),
247 |         video['author']['uniqueId'],
248 |         video['author']['id'],
249 |         video['desc'],
250 |         hashtags,
251 |         share_video_id,
252 |         share_video_user_id,
253 |         share_video_user_name,
254 |         share_type,
255 |         mentions,
256 |         video['stats']['diggCount'],
257 |         video['stats']['shareCount'],
258 |         video['stats']['commentCount'],
259 |         video['stats']['playCount'],
260 |         video['aigcLabelType'] if 'aigcLabelType' in video else None
261 |     )
262 |     return vid_features
263 | 
264 | 
265 | def get_video_df(videos):
266 |     vids_data = []
267 |     for video in videos:
268 |         vid_features = extract_video_features(video)
269 |         vids_data.append(vid_features)
270 | 
271 |     video_df = pd.DataFrame(vids_data, columns=[
272 |         'video_id', 'createtime', 'author_name', 'author_id', 'desc', 'hashtags',
273 |         'share_video_id', 'share_video_user_id', 'share_video_user_name', 'share_type', 'mentions',
274 |         'digg_count', 'share_count', 'comment_count', 'view_count', 'ai_gc_label_type'
275 |     ])
276 | 
277 |     return video_df
278 | 
279 | 
280 | def try_load_user_df_from_file(file_path, file_paths=[]):
281 |     assert file_path.endswith('.parquet.gzip') or file_path.endswith('.csv'), "File path must be a parquet or csv file"
282 | 
283 |     if os.path.exists(file_path):
284 |         if file_path.endswith('.csv'):
285 |             user_df = pd.read_csv(file_path)
286 |         elif file_path.endswith('.parquet.gzip'):
287 |             user_df = pd.read_parquet(file_path)
288 | 
289 |         user_df['id'] = user_df['id'].astype(str)
290 |         user_df['num_following'] = user_df['num_following'].astype('Int64')
291 |         user_df['num_followers'] = user_df['num_followers'].astype('Int64')
292 |         user_df['num_videos'] = user_df['num_videos'].astype('Int64')
293 |         user_df['num_likes'] = user_df['num_likes'].astype('Int64')
294 |         user_df['createtime'] = pd.to_datetime(user_df['createtime'])
295 |         return user_df
296 | 
297 |     else:
298 |         if not file_paths:
299 |             raise ValueError(f"File: {file_path} does not exist, and no file paths provided to generate dataframe")
300 | 
301 |         entities = []
302 |         for file_path in tqdm.tqdm(file_paths):
303 |             if not os.path.exists(file_path):
304 |                 continue
305 | 
306 |             with open(file_path, 'r') as f:
307 |                 file_data = json.load(f)
308 | 
309 |             if isinstance(file_data, list):
310 |                 entities += file_data
311 |             else:
312 |                 raise ValueError()
313 | 
314 |         user_df = get_user_df(entities)
315 |         # protect against people with \r as nickname, how dare they
316 |         if file_path.endswith('.csv'):
317 |             user_df.to_csv(file_path, index=False, lineterminator="\r\n")
318 |         elif file_path.endswith('.parquet.gzip'):
319 |             user_df.to_parquet(file_path, compression='gzip', index=False)
320 |         return user_df
321 | 
322 | 
323 | def get_user_df(entities):
324 |     users = {}
325 |     for entity in entities:
326 |         if 'user' in entity:
327 |             user_info = entity['user']
328 |             if 'stats' in entity:
329 |                 user_info.update(entity['stats'])
330 |             if isinstance(user_info, dict):
331 |                 if 'unique_id' in user_info:
332 |                     user_id = user_info['unique_id']
333 |                 elif 'uniqueId' in user_info:
334 |                     user_id = user_info['uniqueId']
335 |                 else:
336 |                     continue
337 | 
338 |                 if user_id in users:
339 |                     users[user_id] = update_if_not_none(users[user_id], user_info)
340 |                 else:
341 |                     users[user_id] = user_info
342 | 
343 |             elif isinstance(user_info, str):
344 |                 if user_info not in users:
345 |                     users[user_id] = {'unique_id': user_info}
346 | 
347 |         elif 'author' in entity:
348 |             user_info = entity['author'] | entity['authorStats']
349 | 
350 |             user_id = user_info['uniqueId']
351 |             if user_id in users:
352 |                 users[user_id] = update_if_not_none(users[user_id], user_info)
353 |             else:
354 |                 users[user_id] = user_info
355 | 
356 |         elif 'followerCount' in entity:
357 |             user_info = entity
358 |             user_id = user_info['uniqueId']
359 |             if user_id in users:
360 |                 users[user_id] = update_if_not_none(users[user_id], user_info)
361 |             else:
362 |                 users[user_id] = user_info
363 | 
364 |         elif 'userInfo' in entity:
365 |             user_info = entity['userInfo']['user']
366 |             user_info.update(entity['userInfo']['stats'])
367 |             user_id = user_info['uniqueId']
368 |             if user_id in users:
369 |                 users[user_id] = update_if_not_none(users[user_id], user_info)
370 |             else:
371 |                 users[user_id] = user_info
372 | 
373 |         else:
374 |             raise ValueError("Unknown entity type")
375 | 
376 |     if len(users) == 0:
377 |         raise ValueError("No users found in entities")
378 | 
379 |     user_df = pd.DataFrame(list(users.values()))
380 | 
381 |     if 'unique_id' in user_df.columns:
382 |         user_df['uniqueId'] = user_df['unique_id'].combine_first(user_df['uniqueId'])
383 |         user_df = user_df.drop(columns=['unique_id'])
384 | 
385 |     user_df = user_df.drop_duplicates('uniqueId')
386 | 
387 |     if 'uid' in user_df.columns:
388 |         user_df['id'] = user_df['id'].combine_first(user_df['uid'])
389 |         user_df = user_df.drop(columns=['uid'])
390 | 
391 |     # thank you dfir!!! https://dfir.blog/tinkering-with-tiktok-timestamps/
392 |     user_df.loc[user_df['id'].notna(), 'createtime'] = user_df.loc[user_df['id'].notna(), 'id'].apply(
393 |         lambda x: datetime.utcfromtimestamp(int(x) >> 32))
394 |     user_df['createtime'] = pd.to_datetime(user_df['createtime'], utc=True)
395 |     user_df[['followingCount', 'followerCount', 'videoCount', 'diggCount']] = user_df[
396 |         ['followingCount', 'followerCount', 'videoCount', 'diggCount']].astype('Int64')
397 |     # excluding because it messes up the csv and its not accessible anyway
398 |     # user_df['avatarThumb'] = user_df['avatarThumb'].combine_first(user_df['avatar_thumb'])
399 |     if 'avatar_thumb' in user_df.columns:
400 |         user_df = user_df.drop(columns=['avatar_thumb'])
401 |     user_df = user_df[
402 |         ['id', 'uniqueId', 'nickname', 'signature', 'verified', 'followingCount', 'followerCount', 'videoCount',
403 |          'diggCount', 'createtime']]
404 |     user_df = user_df.rename(columns={
405 |         'uniqueId': 'unique_id',
406 |         'followingCount': 'num_following',
407 |         'followerCount': 'num_followers',
408 |         'videoCount': 'num_videos',
409 |         'diggCount': 'num_likes'
410 |     })
411 | 
412 |     return user_df


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | playwright
3 | pyvirtualdisplay
4 | opencv-python
5 | brotli
6 | pandas
7 | tqdm
8 | patchright
9 | pyclick


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | import os.path
 3 | import setuptools
 4 | 
 5 | with open("README.md", "r", encoding="utf-8") as fh:
 6 |     long_description = fh.read()
 7 | 
 8 | setuptools.setup(
 9 |     name="pytok",
10 |     packages=setuptools.find_packages(),
11 |     version="0.0.1",
12 |     license="MIT",
13 |     description="Playwright based version of The Unofficial TikTok API Wrapper in Python 3.",
14 |     author="Ben Steel",
15 |     author_email="bendavidsteel@gmail.com",
16 |     url="https://github.com/networkdynamics/pytok",
17 |     long_description=long_description,
18 |     long_description_content_type="text/markdown",
19 |     keywords=["tiktok", "python3", "api", "unofficial", "tiktok-api", "tiktok api"],
20 |     install_requires=["requests", "playwright", "undetected_playwright", "pyvirtualdisplay", "tqdm", "opencv-python", "brotli", "patchright", "pyclick"],
21 |     classifiers=[
22 |         "Development Status :: 3 - Alpha",
23 |         "Intended Audience :: Developers",
24 |         "Topic :: Software Development :: Build Tools",
25 |         "License :: OSI Approved :: MIT License",
26 |         "Programming Language :: Python :: 3.7",
27 |         "Programming Language :: Python :: 3.8",
28 |         "Programming Language :: Python :: 3.9",
29 |         "Programming Language :: Python :: 3.10",
30 |     ],
31 | )
32 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/networkdynamics/pytok/c1b8704be711f647d2a222bfeef9ea2b6a325375/tests/__init__.py


--------------------------------------------------------------------------------
/tests/captcha_examples.json:
--------------------------------------------------------------------------------
1 | {
2 |     "whirl": [
3 |         {
4 |             "puzzle": "b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAFbAVsDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigAooooAKKKKACiiigAooooAKKKKACigkAZJwKrvdxr935j7UAWKRmVRliAPc1nvdyvwPlHt1qLOTk8mnYVy+13EvQlvoKhe+P8CDr1NU5GKRswRnIGdq4yaUinYNSwbuVjwQPoKY08rHJdvwOKjFBNAhTPL/AM9G/M0CaQnl2/Om8Gjco70AS+bJ/wA9G/OmtcsnWVv++qheVcHkVl3T5Y4fApNjNM6ywiV2JjJGdrAZHsccUz/hI8EN5aOpz93g1zVwQQfmJNJBIiRgCs3LU05ep06eK7HKrMksbE4JABA9/X9K0rTVbC+O23uo3bOAucMe/Q8159NGZXLBTj6U1ImQ5FTzu5XIrHp9FcDZ6nqNqV+zs2wfwNyvXPTt+Fbtl4kdiq3ttsz1kiOR1/untj3NWpohwZ0NFQ291BdpvgkVwOuOo+o6ipqokKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiio5Z0iHPLdhQBISAMk4AqtLeIuQnzH17VTkuXmkdCGVRg9ODn+fSmBec96dmJskkleXljx6dqjFOpKoQHrS9qSigBcHrSGlpD0oAAaDyKaAc0+kBCVk3t02YGMZzn/DpWdPFdtL8jYWtekOACcc0NIZgz292i53kmqC295M3L4Fbd000jbEXj1psNnMByeD1rNxuykyjHpfH72Qn2FWI7G3j5C5+tX1syepqdLNVp8iFzGPKnGFTP4VAID1ZK6B4VUdqyZ2KyvlgVJ+UAYx9eeaTRUWLEqhckVE78424qRWBXJNV5mUE5NIa3GrM8UgeNyrDuDg1s2viEqAtyu8f3l4P5dP5Vz7SLmkDA9Km9irX3O8t7qC6TdBKrgdcdR9R2qauDhuZreQSQyMjDuDW/p3iGOXEV5iN8ACTsx9/T+XXpVqSZDibtFFFUSFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRQSAMk4AqhcXRcFU4T17mgCSe8CjEWCe7VTyScnrTKcKokU0CjtRTAWkpaSgQUo600Agk7iQeg9KWgNxaSloxQMKKKKAG85p3am4IYYAwevrTqSARQuelPptLmgY4GlJ4po5pwpiKs7YBzmsW5OX4rflj3Csm7gC9azkiosqwq5ODgipntcrk0kHy9elTvKChwOPWpRRQe1ANC269BzUU11hiAeahhuJVkJxkVF1cuzsWJAqHB4qpPcBPr7UXRlnkGOBSraqwBc5qW29i0lbUt6T4juLF1iZPNti2Sv8S+u3+eP5Zrtra6hvIRLbyB0zjOMYP0NcLHHEmcAVbtr6Wwm327ZBHzKw4NXC63Imk9jtaKq2F/FqEHmR8MOHQnlT/nvVqtTIKKKKACiiigAooooAKKKKACiiigAoJAGScAUVnXdx5jbEb5B19zQAl1dNIdqD5B+tQDJHNFKvTmqJEpRSGgdaYDhQKKWgBskixIXc4UYGcfhRnJpSOD601aAH0YpKWgAopqb8tvCgZ+XBzke/vTZJlj+8aAJO1FVWvoweoqNtSjXvSugLrAlSA20+opeKzDqauCV7HFQSamccUnIEr6o2sj1pGkQdxWGNRkK9Dn6U0vcyjhWpcw7G39ojUckVG978y7Cmzndzz7YrIW1u5BgjAo+yXUfGMijmY0i8+pqGwapT3qtIO4qBLSbH74BmyeQMDGeKSWAIvFRJspJXJHuQ/C1CZJEHXNQpkPxVxod8fHBFTqy7WM11BcsxyamV1VcBeafHa5c7jn2q6IYlXgAVKiynJGS0shb5VP5VYhjmfqMVOUABYDI9hUm4iIMoqkhNortaybgQwFRSgwrktk+lXYN05IyBiorZN+pNHKQQOgocewubuQ2VzqK3n2m2VvMByS/RvUH1ruLG9S+g3hdki8SRk8qf6j3rPhiRRgAVC8UlveLcwvtYDBHYj0NXGPKiJTUuh0FFMilSaMOhyP5U+qJCiiigAooooAKKKKACiioriYQRluNx+6D3oAgvbjaDEvUjk56VQFISSck5J70oqkIdThTeopwpiEIpFGKcelN70APooooAQ8imr1p9Mxg+1ADhS0gpQCTgdaACs3UYZpM7AOvGK2o7R2GWO3+dWUtol/h3H/a5pMLHHR6dcyTBAHLbd3CkjH1q8nh6d8FyQD79K6gAAYAwKKiyLuYC+HMdXU/ianj0ONOyVsUUxGYukqpzhKk+wlcYUH6Gr9FArGZKPIBLRkAdTjj86pPfQBsFxmugqtcafZ3W4zW8bM2MtjDce45obY7I56e7izgMKzLi6TzQCeK1L3wjIw3Wd+4IAwswzk5/vDpx7VgT6BqET7ZwQfXqD+NZScuxtGMO5feW1EQG5Rjn3rPm1Da21WyKP7GlbCs5qeHw+oOWYmpfO9kWvZrdkSagqIcgkkcYPeltb1MP5mc+9XTo0CAE1BNbQR9AM0/eW5DcXsRHVsRMvlkcnFVTeTshVelSGLceBxViCAKORSu2xpJIz0mu42LKTk0tlPPDf/aJskGtZUUKAeT6kdaR4lbGGK4OTjHPtVJW6k6Pc04datiAN4B96sSXcN1EVWQc9wawWtYm7DNQtYFWzG5X6Gq5mTyI3tNY6fdGXzmkRhtZSf1+v/166cEMoZSCDyCO9eepZXB5EzVveHr2W3kNjcuCrHMbMeQf7v49fr9aE/IGvM6WiiirICiiigAooooACQASTgDvWRcTmeYkH5RwtW7+fZGIgfmfr9P8/wBazSdvOCeccCmhN2HUooFApiHZAxkjnpThUbFRjdjrx9aetMQ6mkU7IGOetIaVwFopBS0xhSHpSgEnA5NXYLUL80gBPYUgRWhtZJcE/Kvc9z9KvRQpCPlHPcnqakopXKCiiikAUUUUAFFFFABRRRQAUUUUAFIyq6lWAIPYjNLRQBmXel7hutuG/uE9fpWFLcGByjqyuOoYYNdhVS+06C/iKyKA/wDDIByP/re1JgclLdPKMIKiW3Z2y5NaU9ibFgsoHI4YdDUOeeKhruWn2KzQCPoKaM9qtM2AcgMeccdBj+fWoQdvXAoGncYEY8npS7T2prXKDIHJqF7ps/dpXRSTJgCOpqaMBuM81Q3tKRgkc5+tSIZInyBkUJiZdaRouNlUHuZDcAplSDkEcYq4tyGGGGD71E7LvLBRih7aMSOv0u9F9Zo5YGVRiQd8+v49au1yOlaklpeqHIWKT5XJPA9D+H8s111aJ3IasFFFFMQUhIAJJwB1Jpaq38m232AnL8fh3oAzp5PPnaQ9Og9hURdY2Bd1VWIVc9Sx/wAin7SFz2oBqifUcDg0opm47qcKQC08VE6syEI+xj0bGcVKKoBaKKKBCZGaUDJwBk0wKwYfOSOeoHc/0rQtYdoEjdSOB6UtgWo63txGNzcv/Kp6KKksKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigCK4t47qExSrlT+YPqK5u6s5rOQoVDA/dfsRXU1FcwLcwNG3Gehx0NJq407HHMkpzniqroejHNaFyz29w8Eijep5wc1nXEpJIzWTsaq4+KKPIyKshYO4FT6b5Bt/mxn3ptw0HmEIAfpVJaEttsrO0UfQKKa13EE6j2xSzNGF5UVTkmiUjAH4Ck3YpRuPa7DHhf0qcRGSEsD16DFVxcxMAAOfpVoXbJFjA/KhPuDRHHpvmRknOa67R7priwVZGzNF8jknk+h69x39c1ySapsBBz+VX9F1dF1FY3OFm+Tv17f4fjTTitiZKT3OuooorQzCsq9l33BA6Jx+PetR2CIzHooyawSSzZJyT1zTQmO3BuKDjsKZkKxGKdnNAhRThSCnCgBQKcKFGelL2xTAKKKdjJwBzTESW0PmPuI+Ufr7VoUyKMRxhe/f60+pZaCiiikAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQBj6/YNc2v2mFGaeEcKoyXXuPw6j8fWuMs9TtLlsblOehz1r0uvm/4g2N74H8bym1Lrpt7m5tgBhVz95BgADa3YZwpTPWuLExqRftKfzR6GBpwrt0pb9D2C2igeMsjZI6jNDmJcjjNeceE/FhnuFWWTCTL0z0Nd5HcRSOCGBLcdaijjIzVpaMWIwk6MrMWUhl+4cepqoIVL5x+FajpmMj1qBIAuSe1dTRzJsriMLj5asqquuMc08hAM5piXCszKFYFTg5GM8A8evWqSJ1ZGtsm45FPFuEkV0yrA5BHBBpXfB4NPicN1OTRoGp19tOtzbRzLwGHT0PcVLWXoTt9leJ33Mjk5AwAD0HX2rUrQh7la/bFqRj7xA/r/Ssdc7zmtLUX+ZEGeBk+lZ46mmSI+C4HOcZ6cfnSjijNNZgB70CsyUUb1HUiqctwNvBINUJrg881lOqomkabZt/aI1HLCoTfR5IDD8a5ua6k3bVJJoTzmyXO0VzPFvojdYfq2dD/aQU9j9Kv6XJ9qkZ+Nqds9+1cn5iR9SWNdlosPk6ZExPMo8w46DI4/TFaUqspysyKlNRVzQooorpMQooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK4X4seGE8R+CbmVAgvdOBuoHbA4UfOmcE4K54GMsq5OBXdUUpK6sXTqOnNTXQ+TNH0bUgUkiLAjnoRj867vRV1S2lWSUtJjgg+la+qzadoGvy6RM3zqR5eT95SMjsOcEZ96sxR4HmQnKZ4r5qrCcp+9uj6OeJ547aM3Le582AMQc9CKkLEjgVShmUY7H0q1HE2z75fknJxn6cV7GHq8y5XueLVhyu6A5NIE75oPBpY23PtrpMAK+tBBA+UY96nlgAj3DrVNZMHDGm9BLU1tBmKamFIz5ilevTv/Suqrh7W5FvdxTDIVWBO3rjv+ldxVRJkZd8xa5IP8IAqpICF3Dt1qxdMWuHJ9cflURG5SD3qyDJGpIUBcqHx8wU5GfrUEl3nJDVR1BIrS6KR5Iz09KqtdAKTnpXDUrSTaZ1wpJ6l6S969eKrSXsYZQzYLHAyeprMur9FAK5HHJJ61TW+38hwCelckqje51RpI3hIq/do831NZqXYPVqcblWOQfwpJj5TStwbm5jgCjdIwRSTgZPAr0pVVEVEUKqjAUDAArzvwwftHiC2BjLqu5zkZC4U4P54/HFei13YRe65HHiX7yQUUUV1nMFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQB4H8edNEHiLTNTKqI7i1eJTHw3mofvNx6OoH09hWV4M8S389nb203zlW2s7cEivUvi5p0l74Xtpre2Ek9vdBhIEyUUqwPPYE7fxxXlGmxyWrRiWM+cfnPavIzGqorlS1PZwFHmjzt6bWPS4G3xKzjD9CPSrsE2H21zunXks+WcMM9z3rTjcBySa5qNV6NCqUt0zSJO7PSmKm055oYnapx1FPjbIwRXsxd9TzZKwkk7sNuaZFbbz3q2bddpYGokmCMB0q7dyb9h/2TC4rsoHaW3ikbG5kDHHTJFcm0wK5rpdMlM2nQuQOmOPY4/pVIhlG4/4+Hx/eNMHSnzczScY+Y8fjUeatIg5rXwiXQKjlhk1z00nJIGeCDmuk8RQyu8fkIDI3HJqvYeHC+HuTj1FcValKc9DtpVIxhqcrLbyzIAvJxioE0u7UgpEzkHsK9Pi0y1hQKIl474qZbeJD8qKPwpLB92V9ctsjgLfSprgkylYdvVe9b9l4etGQfvjn1NVdaSSC/LJ909cU23mlSMspMgPoelQoxhJpq5TlKcbp2Ot0DSIrC9eaMkkxlcn6g/0roq5PwtfSXN/LE7/AHYidpzk8jn/AD611ldlJxcfdOOopKXvBRRRWhAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFAGT4mUN4eugf9j/0MV5lN5QbiNSw74r0/wARgHQLvLBeFOTn+8OOK8a1XW4LDK8Gc/cTH3ucV5GYfxF6HqYCLlFpdzdgHTkfSrKx5cFmrndHubu8bzXOAT90V1BhbapripS5tjpqx5XZmhZgOm0846Grfkrjjr61SslKBiau+cF6mvaoO8Fc8qsvfdhXLbduB9aqmPnPFWDKr96gHzyYHSt2ZIeDhevQV1Gi/wDIJg/4F/6Ea5d0A4rpdKkePTYVEEjAZ5BXB5PqaaEyvO/+mOm1uSx3Y4GD0/Woj1qef/Xyf7x/nUJHpVkGfdgPcIf7tXYx8oqhOf3+Per6MNo5pDJe1NNAkU9GFRtKv978qLiMzVrZnKuoHXkEcGqsWmFSJUBX1APFac0iFeWz9ahh1GG2Ty8gAdOawnCLd2bRlK1kXfD8Hl6i7lMN5RG7HuK6Wua0fVVl1JIeCZAyjkduc8fSulrSFraESbbuwoooqyQooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKAMXxaSvhi9I64T/0Na+ZPEOpfZ/EQaSE7YgAxVj83vzX0v4vZv8AhH5IVXJmdUz/AHcHdn/x39a8O1/wc+rapBJC2EOBN7gV52I5Pb+/tY9LCynGi+Te5t+E3MtopMTgNhgT0wfeutDbpMDkdqoW1qlpaRQRhVjUbcA88VqRIFjDEYPauGjTV+WJtXnd8zJ1YKmKjLFjTghK5xTBXsRjypJHmt3dxjylfrU9q4Az1zVVwWkq1EqhevFNXuD2HsJJWJHQdK6vR8/2VDnr83/oRrlPPZHVVQFcHJzyPTj866zSG36XC3ru/wDQjWiIkiC8/wCPp/w/lVSaVYYy7EAD1q/fg+ardiuKxNZB/s5zjOOaqTsrkJXdjEl1MvKzlCpDdD3qR9W3YB4NZLSmRAR1qBmYNXmuvO+53qhHsaU2pskgKNkd6rvrN0GyJM9gD6VSlV5FBUkDv70oiYAEiodWT6mipwXQuSX91Kq/vAp+lUZ4JpW3CXBqxGmRjGPrUiw4fed3IxjPH5VLk3uUko7EWjebp2s2lz5m1UlG9sZwh4bj6E165XlaxHO7bXpWmXP2vTLactuZoxuOMfMOD+ua6sG7XicuK1sy1RRRXacgUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFAHlXxi8ayeHzp+mWbRmeVWnlV1PyrnahB98P+X0rzK08c6hcTRw28Q+c/MxHWrHxYNxr3xGuxDIZ0g2WsI2bfLCj5h0BPzmTk568cYrR8M+DpRJFui5UA5xXjYuUJS01Z9JhoU6NBc6W1zr9GtbyVEnuP4ulb8kL+YMA9KuxWflwqmBgDircax4+bGa6sLhPZLU8eviPaMygNq9DmoxE55C1fuHjVjjGKWKWIxmuyxzXIrS0VuXHNQ3aiKTC9O4FJLcujnZxVYztnc/f3pNoaTEYsTwK7qw/wCQda/9ck/kK4iNjPMkcYy7kKo967+nDUUirfLmJWxyD+lYeqDOnT84wtdFMnmQuo6kcfWsC/DNYz7FDNsOATgH8avdWIW6OOjhchdynnnNWRaqTkgZq/BD5yDrhRU62wzyOPSvP9g3qjsdYy/sZb+GpF0/P3sVvLajA6fSmGxZs8jHpWn1axHtzF+zxqcd6esa/d4/KtNrIKfWpI7BSc8VHsJXH7VGSYCwA4/Cui8NMYo5rY4HPmLx17H+lNSxjB+7mrVvAlu6uo+YVtToOMuYznVUlY1qKQEEAjoaWukxCiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAqrqNy1np1xcJt8xEOzcCQW6KDjtnFWq5/X5/NkS2XkR/M317fp/OplezsVG19TiNK8L21tIs0iKzju1dRE1vaL8uCT1IFUucnrTGfBrmpUY0laKOirVnVd5M0Zb0FflGKpGZ2P3iPpTQNw60hTjitm2zK1hzjIySTTEfb0pcE/WhYskDqTQAgbc/IqVreNk3d6idPSl3vt2k0X7gWdEhWTWYV2FlUljjtgcH88V2tc74ZtcPNdHPTy154Pc/0roquCsiJPUKx7qHa0kRGFPT6VsVSv48hZAOnBqiWYsNusKlVHA4zUyxrjpUjDjrTQaLIVxw4GKMkGjrQRkUxDXIwaZDOhyCwBHvWbqOoNBKYQO3UVgPeE3ShC5U/eNZTqcprCm5HbC7hhBLyjrnk1n3niOCL5IfnfpXLT3m8FVz16moYomc7q56mJltE3hhklqdz4c11r2WS0uABIMtGQRgjuPr3/AD9K6OvMLdZIJkkhB8xGDKQM4Ir0awujd2ccjrtk2jeuMYPfHtWmHqOSsyK0FF3RZoooroMAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiikZlRSzEKoGSScACgCG8uks7ZpnGccBc4yfSuUaYMzO/LsSSfc03W9W+13OIzmFOE4xn1NZRuHbpxWcpmkYNmhI2RxiqTtlqsRpIybiKgeCTdxUNFqxOsg2DC4IGCc9aA9MjhZRzUmz0oVwY1sgg8c88GlGTSFR1pRIsfWn6iHKSp3AkMOQR61GwYckY5xzUck5YnbUumWb3uowxEbl3BpOoG0denT0/GlfULHY6Ram006NGXa7fOw56n/62KvUUVujEKZLGJY2Q9xT6KAMJ1KsVIwQcGmVf1CLDCUdG4P1qjTEKDTqaBThQIydUhzOkgRScYDE96xJ4vIJMq4LeldTewGaAgcMORXPvb3F1P5ZVsDuawrJ9Dek7bleG1RogPLyxrWsdHJw0mFWtKwsUt4hkZb1q6FUkZAOORRCgt5BOtLaJDFZQRYKxjI7mrcchhfKj2Ipnc0ldCSWxg22aiOsihlPFOrMimaKTIHB6+9aEcqyjKn6j0qSh9FFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRQSAMngUAFc9rt686G1h/1Wfnb+9jsPb/AD9dO6ukK7Vche5Hf2rn9QuU+6v6UpbAtyh9mG0bqjESqe2BT3kd+BR5LEeprJmyZZSeMLgcn0qKSeNevX0pscRUHJwarSwlpCc8UOTsCSJDcqx4pDIzNhRzTEhUHJOavWoTccLk0rXBuxTkjnAyRgVF5DHkknNadxcRk7V59arCQMcCnyoFJkaRFBwK6Xw5Y+VE924+aT5E/wB0Hn9f5e9ZFrH9puY7dfvOcZ9B3P5V2SIscaogwqgAD2qoxREmOooorQgKKKKAGSxiWJkPQisWRDHIyN1BxW7VO+t9y+ao5H3uOooEzLUv5jKVGwAENnknnIx+X51IKaOtLVAO7UKgHOBzQOlHvSEOHBp4puKdTAKMd6QnFANMBacjtG25Tg02igC/FcrJhW+Vu3vU9ZNSx3EkfGdy+hqbDuaNFQx3Mbjk7T6GpqQwooooAKKKKACiiigAooooAKKhnu7e2H76VVOM46n8utY11rsrAi2hKL/ffGfy6fzoA2Lq8gs4w0z4z91RyW+lYF3q89wMKPLj/u5z+ZrIlknmlLne7HqScmpUtrqfA+6Km47F2CF5+XkJ9qnlsoxHnAzUlrayRIATRcbkXnpVW0JMxYCM8AU8Ic81BLe7cgLUS3znkLzWd0XystPCznAPFQvalRyab9sl3dqR5JJerflS0KSZHsVTycDB96rs7/NgEAdDnrVsx8E9aFiDccUmiivEPkOelLCjb8Y4Jq4LZRV3TLBbm8HXYnLeh9v8+9NRE5GjoWn+QjXTj55Fwo54XP8AXj/JrZpAAoAAAA4AFLWqVjNu4UUUUCCiiigAooooAyL63eBgYVDKxH3mxgZ57Ht09f1qGtuSNZYyjdDWRNC0Emw89wfUVSYmNoFFAoQh4NOqFiVUsATgdB1NSKT3piFPNHSikJoGKKWkHSloAB156UUUUAFKJJYx+7fafcZH5UxmYOgCFgTycj5eKdmgGiyt44+8oPH0qUXkZxkMPWqNFKwXNH7TD/f/AENH2mH+/wDoazqKLDuaH2qH+/8AoaRruIdCW+gqhRRYLlmW+bBEUYz2LH+lVXmupc7piqnsgxj8etLTNn70vubkYxnj8vWiyAYlpEo6VIbeMj7opskuwcVB58rdFNAiYQRJngU1p44vSqjtcM3PApfID4BJNQUTPqSgYWqNzPJMpGcCr0dkmPu02e3whwBTaGmjn/s+Xwzk1ZW2QDvUF0zRS4BApFnfHL1ldI01HSJt5FLG7Y6U0uWyOSKnhwVx0NC1C5Xe5IbABpFncdgKsG13MWANRPbMTwdtFmFyWFp7mVYowWdjgAd67Kxs0sbYRISxzlmPc1U0fSRYIZZDmdxg4PCj0rUq4ruRJ3CiiiqJCiiigAooooAKKKKACo54FnTa3XsfSpKKAMaSNonKOMH+dNFa80KzJtbr2PpWXJG0TlWHI/WmhDSccUZpuOacKoQ4U0uPNCYbJBbO04/Pp36U+jtQACigUd6ADDZJJGOwxS9aKKBLQKQ57UtFJjCikpaYgooooAQ0Cg0CgYtFFJQAFQetKAqjoKbSspYYFICKV0FRGZRg5AFMktGZslzUbWigckmpZSLa3CEffFQzTKynBJ+lJDAOgAxVpbbA6CmI5u6iMkmcGljt1HUVtT26Lz3rOlgEjHDVnyl82gmyML2qJiUOVFPG2Hg80ySYtwBigEKsxOATXR6Xpnlhbi4X5+qof4fc+/8AL+UOjaOYtt1dL+86oh/h9z7/AMvr03KaQNhRRRVEhRRRQAUUUUAFFFFABRRRQAUUUUAFNkjWVCrDj+VOooAypbd4GGeVPQimdK2CARgjIqjPZEZaLkd1PX8KpMVitQKQjBwRg+lGaYhelJ3ozSBhv255xnFAXHUvem859qdSAKSl7U2mAtKKaSccUopALSA5ozSKAvAAAznimIU0UpoFAwpKWigBMU4VFJKqdTUAvY1B3ODzSuBZaNm6UhgG3k1WOpxjo2ahl1EMO/ByMHFTdD1NOHYnpUd1exxKfmArEl1CQAkNimQx/aV3yEt9aV+iHYS5u5LglYifQVHBLLGCr8t71JJthZcYAB54q5Y2EmoHeq7Y/wDnow4PPb171NtSiksUlzcpEgJdugFdRp2lR2X7xgGm6ZHQfSrNpZw2UZSJepyWPU1YqkhNhRRRTEFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQBFLbpKDkYb+8KpS20keTjcvqK0qKdxWMal7VpS20cmTja3qKqSWkicgbh7U7hYhFLSfdJBGDSZpiHU00tJQAUopKBQAUUUUAIJFZ2QZ3KATx6/8A6qUnAzS9hSHkEUAZs+qpFN5feoZNSfblRUsmkrLcGRqsLp8Q5IyamzHoY4uLi4zuUgUhsppfu5FdAttEg4UVIqKOgpcvcdzn49OljHOTTnspmHXFb7ICKWO2MmNqnHqelHKguc/aacXlIkJNb0GnqEwq1ah05EffIcn+6OlXQABgDAoSsDM1dEtXlSSdd5Q7lUEgZ9/X6HitJVVFCqAqgYAAwAKWigLhRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFADXjSQYdQartYoR8rsOc881aooCxnmzlUcAN9DULI6feRhzjJFa1FO4rGPRWsyIxyyKT7imNbQsclB+HFO4WMw0VpfZYf7n6mmtbQhkATq2DyfQ0XCxn9qB1rS+yw/3P1NH2SD+5+pouFjOpQCxwASfQVp+VH/AM80/wC+RT6VwsZi20zYITAPqcVYSzP8bfgtW6KLhYiSCNMELkjualoopDCiiigAooooAKKKKACiiigAooooAKKKKACiiigD/9k='",
5 |             "piece": "b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCADTANMDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigAooooAKKKKACiqt5qFtYrmeUBsZCDlj+H4delZkmtvLnykKJ2zyT/hRcLG4zKoyxAqpLfBB+7iZznucCsYauobDE596tJexyDORRoGo2fUNQd9kYSIZJBVc8ehJ471Xea/kYF7hx2+U7f5VeWRH6YpSq+lOwrlQR3LLzdT/AISH/Gq7QXqf6u8nUZJxvPU1p9KKOVMLmbbHUUlPm3MjL7sa11uphglgfYiosD0op2Atpe9N6fUip4545BwcH0NZuDjPaiiwXNais2OeSLhTx6HpVuO6jfhvlPv0pWC5PRRRSGFFFFABRRRQAUUUUAFFFFABRRSMyopZiFUDJJOABQAtY19rQUmK0wzZIMhGR+Hr/nrVDVNZ+0MYoJCkI4JBwX/+t7ViSX8MLgbuaxnWjHdmkaUpbI0BbtPNukdnc9WY5Na0NoAlY1reB/mUj61oR6rHny1O9/RauEotXRMoyTsyaSxRuqA1EunKDxkCpRfL3yDUyXSnoQavQgIoPLGKmNMWXcelPPSqQhtFFFMYUUUUCCiiigAooooAmhuWjwD8y/yq8jrIgZTxWXTkkaNsqce3rSaHc1KKZFKsq5HXuPSn1IwooooAKKKKACiiigAryLxl8SovtUlpayKtpG5XehDGYg9Rjjb6fn9GfGjxtc2cK+GNIZxcyqJLyVHXAjOcREdQW4Y9PlwOQxx4TDLqLzujRlzNhH3IDxuB4P8ADyByMenQmuLEt1PcjKy6nq4PDxilUqxbvtp+J6O/jZy6mWRlUjIUcmlPjC1xl5tufXJriXvI0vpLoWRit4gECE7gWAx3rZ8H2NtqV99ovNOaWMtwWB2/l3Fea8NbVnpc1Fxukav/AAlesahdJa6NbyTbzjcqnA/GvWPDGnvptjm8m827k5c9h7Co9Pght0VIIY4lHQIuBWqgDMAFz616eHoxhr1PGxWIdRcsVZFqSMOMgA1Gg2NylSrgYUZz/KiRSiM+GbaCcKMk/SuxanCSrIgFPFwmOtILdSvvUT2wJ4piLAkVuhp1V4oChzmrOKYCUUUUxBnnFFFFJAFFFFMAooooAcjtGwZTzWjFIJUDD8R6VmVJDKYpM87T1FJoaNKigEEZHIoqRhRRRQAVU1O+TTrCS4bG4DCA92PT/H6A1brhPFmoveaqllAxMNufn2n7znr3xwOPUHdWOIq+zg2tzWjT5526HM3vhzR9TuZ7i5tUaad2kkcDBZmOSePc1mr4FsopEFq8iHcWAzntxXVi1jWTgvsI4J5OanwkWCyEEDjPFeV7P+Y9NYiaVos801Pw9Nc6jb2C2iCxjGZZifvN3Na+oakdMtoLQLCptU8uIx9NvrnvXW+RArYjQnJ5GR0PU81yfi3w20kBaxJzj52/p9Kd3GGhpGcak4xnsZNr4/ltJPKnVWA6Nu5rrdH8bWtyA8kyoTwAa8efSFNw0ccjyOOpxjFNAW1nSF7rL/3Y2AAPux4ohUkvgZ21cHQmvM+jbLXLO4Y7ZUOPetaO7hdA4ZQmcbietfO2k+IYbSYo+7eDgZcED/Gu5s9R+0WEt59oSOCOPezs2AD6VtDHVL8rgeZXy1Q1T0PVFmSQna4KjuKfxjgV5Rpni1iDsk3gj5SG79q6nT9eupABcOFz3HNdNLHU56PRnJVwNSm2dfnHWjNUILkbcvKGz6GrUdxE/CuCfSuxSTONpokooBBzil6CqAYEIkZ9zHIA2noMZ5/X9BTqXNFACUUdaXFAhKKM8470Ur3GFFGKCKYi3aS/8sz9RVustSUYMvUVpqwdQw6GpY0LRRRSGQ3lytpZzXDYIjQtgnGT2Gfc8V53AwkvIo2bLyv87McknvXWeKLrybWCIFgzybuOhCjofxI/KuX01BNrcLYX5AzcjviuKs+erGFzrorlpuRd8S63/wAI5pMt7BZefFCuWRDggetec3PxMg1do/KidFUfM/bJ7Cum+JHiaLQNM8s2T3DXKmNQnQHua8Oe/aW0/dNtjdtxjC4xXROKknF7HPGTi7no1t4ykfUzDIuVyBlDmuh/4SGN0aMDzZX++qjoK8z8P3cVpYXNz9nE0wXYgJ4XP8Rrt/CUqSSWqytGHlU59TiuGcfZr3ep6ELT1l0ItS0E6xaTS2IKMM4hX5dx968x1HTr22naOa1licHGGQ17rcX1jZ3jMo2sBh2xgD6mq02q2Uqlw8cn0IIrkjNQuz0aOInHTlujwmOxvC2BbzsxwBhD1rVex1GVvskU1z9jAXzN+V3N3AB7A5Festc29wuCsezHTHesm7nhgyAAq/Ss54x/ZSNYzc3ZoydFit43ES7lij4y/Wuwju44YlDEevNefz6nbRXXmCTnPStU3AuGWUznyCAQO/0rmTlFc1jWvRcmmzsU1HzV+ViQ3ep47+SFwftDYI+4SPb8e361zlrqEEceNwVQCevNW47uC6OYZFDjoH4zW8K04q6epwyoK+qOottdlU7S345rVg1kvnkNXGorgF2g2YHG05yas21xsfkshNdcMZUj8RyTw0Hqjtk1BGHIxUqXSM3JwBXNRXxCKGG/8OlTM0hi3wncO4zXYsU2rrU5Hh0dAbuMHg5p32uI/wAQrlvtkigEqcemakjlEi5U49aFjddhPDWR1CTxyZCuDg4NP4PeuTkuHjwFYmnw6lOp+9WqxceqI+rysdXSdTWKuqS7ORzUD6jdtnBArR4iCVyFRkdCTVuzfKMmenI5riJby7BBSQkY78c1a0fULpdXt/OkYxs2wgd88D9SKj6zFu1ivYSSudvRRRW5kef+LNVjfxC1qrOfs0Sq4IOFZvm4/Ar/AJFU1RS4uYD0ABweaxtWuZLvxLqlzOiq32p4RsBwVQ7FP1woz71asZWSPejA4P3a8OdW9WV+57HsrU427GrNeaZeW22/gZ2ToSmRXkd14cSK4uf3LbJJC0YHAUEk/wD1q9VR4pwVIAY881E2mq8gbb/DxWksTVaVjGFGnFu55Zpvhq4gs5op5ViM0e4Hdz7Cu38KW8bSxZjjzAhDbRgg+lat1pskm6V2DNg5Y9TTLi8axjiks7UM87eUx6YIHU05VHKLk+hpFL4Y9St4g1Syt9IvS0XmyYKiJV3MT9K8at9P12aYvbwTxAnoTtFe6SQ28UYuLswxzP1OcZqo4sXUFJEY4J+XqK5vayh0Wp00pKK0ueZQWPiaBFLuxH90EGq12NSupPJP2neBypI5P4V6DeX1nGhAKsw4AzXKalqQgLGGEtIehHasFUblolc9OjOTWxzEFmVnb7X5g287B1OKW61W4kuMxI8cCjCLg4AFXG1drkxpNa5kXjcowW+tW7fKljLHcwRgFW2AE8jpyK6nKzvNGsnIm0G2v9ZmUYaCBQGeVugHtWxquo2GjTqtr+9k2jJcHjIBGB7jnNY32h57RrTTJ9loABKS+HfPYe1VxoVy8ymKynNuoLs2OG+lZtU3pazM+Vud5vTsdZp2rXl2iSiXGfU1tpqIXCTOrN7D+tcdbW16jk7QiEccgYFXbdgAWLHb2J4BI9K5G2noZ1KUG9DsYr+DaGjLgj7wrTtdSOcsoZMdRXIRzmRgd7CTAxWjaTybwdxPr71pCu4vQ4alBNanUPtkYOhO3rgdqqTs8DlgvGB7UlpeRt8qttY8bTV9XTbtdgG9G6V6EWqiucLTg7FOO5jnJQH25qVbQ5yrg/WiXTYZm82GUJJ3VOhojKQgoZWZ89NuKOWSfvCbT+EniimdgDjjvUcsqq2xW3N7Un2p0bAyFpYzCXDYw/qK0TjsRZrUgYymQ5D4Ixt29/WkEN4rB445AwOQRwQa3Lacf3lY+4rVgkRxyoFdEaEZa8xlKtKPQ14ZRNBHKFZQ6hgG6jI70UkB3QIfbFFdhynlGpop13UNoB/0qXOPXeabEmx+hrjNT8VTWPjPWLWCMSpDfTRlHc7jtcj7xzk8dTWxF4ts3h3zRyxN6FCf1FfOzi1N33Po/YzUVbVWOnWS3Qh3OGHWh9ZgiWPhirdx6Vy7+LNL2FlkLEcHg1UvfFthFBCxDYYHHHvQqkloiFhZSeqZ3Fzfwmzk2H5m6Z7Vzeq6zDp9jGqyr555ALcZ9K53/hM4boNbqcF+Fz6+n41w+qau9zM2/wCYKSBWsXUm7JWNqeDUf4mli7r3iLV7y8dblmiGMCN16e4z/OsdNa1K3X93dYHT3qKe9aZUVzuCjAY9QPSoVs3nBZBwP1rrjCKXvI0qK+lP8NC+NY1Rhuk2tHnBcrkD6kU1dWluyIjDuc9CnWlthcrbi1RWkiJBePGc11mmeGhM73mmxALHCS0MjEOj47eozUT9itlqVBV6dnKVl5/5lPToM/Z5SIlWNT5js3Tnv711gstO1HyX/e+Wi7cREBZGxnnHtXHrpupyukU9uVKKE5UkkV1kUAttFWCQeWuTk8A59gK4JySd73N6ybtrqU5NG09JWktLwQlgSIzjHX/CtabR7mJhFBdXEkJVSu0jBBGSKoJbWUmFZQvPT2rWgurayi8uHDZXG3ArD2ilvoZVHNWtr6mff2VxbxrAsfk7ursC7H+lFnpNx5QMjMYR1Zh/IVrQymcYdyoHOWpk1xGhVDM0megXpUNp6rYlTmvd6gJLaCN7YRsWwOWyK0tMmtRGVfdk8YHaqkEsjNmSCMp23jccVdAjVfltk59DitqVr3Oaq9LFlbWGSYmMMhHIIPWiW4ntYQZZoXjPeTgj6VEZXt7NpIY1eQH5Yi+AfxrnLr+1r9iLiwUKWyBGc7a6rqMdNznjFzlq9Dqk1qwReLoKxHVafDqkEyjyrkzuDn5gBXFzaacrIu5QB0fjFaOnbFz5SHcBU+2nexUqEOW6Z1UmotH8pijGTkhutQRzjzvMEu7I5ULgA1mJco7kS79w6ZNWFkjUggjB7itfaSepjyKOhqw3BySTgVbhvH9ePWsQSgHAOasrOoUA5qoVCJQTPQ9IkMulwuTknd/6EaKpaBbRz6LbyM0wJ3fdmdR949gcUV6sHJxTPOmkpNHy34vEi+OvELxFlI1O5yR/11apLLxFe2QjW8tY3jf7rMuCfxr0X4j+GrpvG+oaiUBjuRE6d8qI1Q/qprzrX9EnW2WRCzRr0XP3a8qdWlKq6c0fSxvOjF03rZaHRi9s7u18waaYyf7q5zVG5FnPFEio7NkgL0Ark9P1e8sCE859o7ZrpItTtr1UmljZrjfjgc/XNYVKDhqa0al/IhudFDHdFCFIGepzWNqNsJZ1fyWVzxIPU+tekWd3bRsIbkBHYfKH4zWfrH2BSJoIk8xD8yMetKnVnHVle0UpcskcHp+ktcT+WRz0INdTp2gSWc56nHA9x9Kpz61YRzFUiaJ88NnpUD+INRWb9xMHjA6sM1VT21VdjVNR92CO1GmQTyfaNscEuACQMA+/FNkju7OXzklCDjlRnNc4niG/ukRCUyox8iYzV211a4W7UvGfLC7SSTge9c0qTWrepHLU2ZqyeJL5YsFLd2UYBkORisa81pXuQ8gVnK8hFIWM+1Wn8me4LrESDzk1oW2jRTLnyAc9SRUe0TdpaglTp+9axStmt7pd0UytkDdvbBzjnp75q7FMkO5Y40LDoR3pL7wvaQxGdJFtW/vZwDTrLTJZIipmBbs6nINTKnbYn2sJLcss5uE2ssUZx93dT9PtbVsqVJJOdwPeqo0+aJyrSJLsOSO4+tX4b3y/3aW6tJjhQwFSoOTsZzkox91m0kEhjVAUwOh71ZW2QjYxAb2rMtr45/eIqY6nPSrDXwkQtHNlDxuUV6FPD31Z5k6tnZFk6cEkLNK2Bz97ipYEgMpeN1Z9vQc4rhtc1l4mxHfKZw3ELgMCK3/CmovPN50ypgpg7RjNdVPDq1zCpWa0Nt7ZLsbJYwR6nqKyX0m4s5jtAkXs3eunhCON3cmrvkK8eWAwKcaCnqL28oaHFyWTzKBt3OOhJx+FZ01pPbvsxIB3ycgfjXdzWsaDfg49MVV+x28xOz8Qehpyw6asOGJaOQjNwhDCTI7g1eS8cAb0BHqK2ZfD0bBpEfY45C9jWWdMuS21SoYnkHpWDw84s29vCSuek+FpBL4btHXod/8A6G1FX9Ohe20u0gkGJI4URvqFANFexBWikzypu8m0c94ztA8MN15W7AKM2fxAx/31XnJSxuy0B+Rj/CRXsmqWi3lhJGQSwG5ceo/zj8a8U8TWTLcnyxsnXJQjjcK8fMaNpc/c9TL6l1yXOS1rwrbtO/ksInH8B6fhVXSbe201xDeRsrE/LKvI/KteHUFv4xBcErcr91j1+hqlcQyTNs2EEHg9s1w+0mlySeh7kVf4txb3Tvt7GQMZYyMAqc/iKyGsrhcoyee6fIC5YEDseD1rp9AuI9PmMNygHmnABHeuok0yFh5hRQM7sEZBrSDkkuV3MpVlB2kjzKTRjeTxyvERuGHAHetW28NRfeVGA6YFdxa2FpcszIYSc87G6f4VqRWUdsACgHuBkGiSqvS9kZyxqXwo4qz0AQcrG2O4x1q7/Y0s/Hl7VrrJpbaFcmSMDGSduMVjXWtadDIc3DzMo3eXF6VH1eUnZO5i8ZL4noMtdDt7Zd0uOPWsXXfGUWklrayjDSY++eAKh1XxTHd77G2heJpkIjmZujeleci2urm9aN0lllBO5VGW4rvw2Bs+aZwV8bzKyd2aN3rV9fymWe5diemG4FX9H8Q3lrmE75I8feGTsycAn8cD8aZZeENXvIFl+zNDGSMtJwcH2rvdN8NaT4dUNt+1XZH32GcfT0rsrOlGFmclJVpSujIitfEd/AJruZrO2B4A++4/wrMsLu1tNRnLCV3zhSHzxzkH15xz7VveILrUL2EoH2Q9DGvGfqa46S1NnMjKjYbnIHQ1531mLuont4fAuSvNnXz3FzLZPtTyA68OQTxXIx3d21yLSa6mFsG4IYjNdRZ31xNp671YIh2kkcGoXsbWcMuCyo25HZcH8smsHiGtGbQowg3dGbb+HTPqCyRoZHB3ZJ4IrvNGtnspWGwLkbiPTNYjskVqslp8sq/eNNt764hdXWViD1ye9aU8Q4qzdznrUFU1SSPSbJFOM81riSGGE5wT9a4nStakmdY5PmY+jAD9a34LuCU8sM+ma9ChOLjoeVWpyjLUku9VRCAyunYHGRVO7v41wElV+5AFXptMFzHhc8nPWoV8NSMCciqmquyRMPZ2uzNi1Vo4OoZu4JrV0d01G9gwmSXG5Se3f9Kzbvw/cwZbaW+lbHgzTXF5PdyBgIhtQEEAsepz7Dt/tVFN1OdRki6ns+RyidtRRRXonCFeeeLtB82R0jwCfmQ+3+eK9DrP1e1Fxab8fNFlvw7/AOfasa9JVYcrNaNV05cyPB7vwtql7dgquCnAY8frWhbeE9QSNUlVWI75r0PzIkOAoqWOZG6IM1wrLote9I73mdTZI86j8MXDTrJPCdkZyM9jW0xKxmMryvBrr3dWUqYsg1kz6SWctGcKT3HSplgvZr3NRLGuo/f0OcfTYi2YVEbtyQON1Mit7q2PyzvjupOa0p7WVRmXKsDgGq7SyIfm+bFctSm1qdMKl1YqXOmreowmeQg9MNWM3hKBGEkEsqup45rp1uVJJIxUu+GRRjANEas4bSsTKEXujhp/BonYqz4UtlSOq1vadokFk5GxWZsZkK8sfWtlxx/Wm+Zggbaudac1aTJhShB3iivLFcAFGJ8sYwBUckRkj+UfN1rbimXgMSMgjI9KetpE3Ss3Tcuo1U5ehyN0kuQ8wL54OaqT6elwgiAATqTjkV2sumRyjB6VU/sYRsG3cdKxlRnfQ6IYmKOMexaEtCkm5O+O9S28Cgbf0ro59MxKdqZ7fWq8mlSeYrKCMdawdOVzf26aMprbyzgL8pHIqJ7NjFsAAYHIOOoroRYseoOe1TJZRE4cY9a1jBvQzdZIwrayk8vKuVYVcebULRd5A6cDrmtVYDC3ADKRjpUMsILYAI+tdEE4o55VOZ6kNhr1/E3mfd4xg9K2rPxYxfbMWA9VrJW1J7cgZxilay+UPtHNdNOtVjszCdOnLodha+IrWdgrFiP9quqtoVgiwq7Sx3MPeuA8J6OLvVxcOD5VthyM9W/hHX15/D3r0WvSoTlOPNI8+tCMJWiFFFFbGQUUUUAcpq2lm2uzJGD5MhyOOFPp/n+lQW8QGBjv1rrp4I7iIxyLkH9D6isVrZ4LkR7V2c5J6n0IpKIXZLDAhHSpTbJ6U6NcLxSkmrsSV5LKGQEMoIPrWTd+GYJsmNtpPatzNITUSpxlui4zlHZnHT+GrmKPaqh+ex7VmTaNfx/dicfhmvQ80vFc8sFTZ0Rxc4nm/k30GAYmbPUFTShrgSYlspMf3lFejlEPVQfwo8uM/wAC/lWTwC6Mv66+qOCGNudrflSrdFWwqMa7OTTraQ5KDNRtpNqRwmDU/U5LZj+tRe6ObSV2IG05NXEtnkwCvNa0emRIxLAMO3tVuOFE5Vea1hhf5jKWI7GD/Zrq5ZiSDjAx0oms2jGSmQR1xXQ4FBUEYI4rR4SHQj6xLqcp5ZHRBgVFJEHbG0jPoOK6aSxiY5VcU3+z4iOmKx+qM0+sHKmAqwxjA65FIUDHnFdFLpZwSm0n0NUpbEKSCuDWUqEo7mka0WzLChTmlSBrqZIIULyOcKoq79kUMA+ce1dJomjR2aC5lQm4Ycbh9we3uf8A63rSp0XJ2HOqoq5f06wj06yS2jOccs2MFj3P+e2KtUUV6SSSsjibu7sKKKKYgooooAKingWePaeGH3W9KlooA52XU0srhra6/dyDkZ6MPUHuKmTULeUfLIv51oajpdpqkIjuo923OxgcMpPof8jgVxN5oF3pjEsxaLdhJF7/AIdqlykulzSMYSW9mdWsqN0YU7INcPA9/wCZtSQ496unUb+1GZORQqqfQHSa2Z1eKSN0kUsjBgCVyPUHB/UVzcPiB8jevFX4tdgfqcVanFkODW6NekJxVNNTtn6OKsCaNxkMKZBJk0ZpFIPQilNMAzRmkzziilcApSaSimAueKSiigEBOBn0pAFdQSMg+opyqWYKoyTV2C2CAM4y/X6UnYFe5Da6fGjCV1BYcqD2q9RRUlBRRRQAUUUUAFFFFABRRRQAUhAZSrAEEYIPelooAy5tFgLb4VCn+7WdeaSZFKHINdLSMoYYYZFFkBxS6K0PGMiq82luCSEI+lds9op+6cexqs9s46x5HTjmp5EPnZwZtpQ+0Kw9zSst3C+RKdpAwo65+v5V2ht4jkFRmq02l28siyFFLrnacdM0lTsV7Tuc6l5fwJuJJHvU8Wuz4+ZM1rS6YHj2g1QOkPGcKMinaS2FdMdF4gQ8MpFTx61E8gXpk96y5tMePJCk/SoobV3nVCpwTzxS5pp2C0WtDqoZhMgZTxUtNtbVkiRY0OMdccVbSzY8udvsOa1M7Fap47WR+W+Ue/WrccEcX3Rz6nrUlK47DI4liGFH1PrT6KKQwooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKAEZVcYZQR7iozbxMclB+HFFFAB9mi/ufqaalvEyklO57n1oooADaQHrGD+Jp6wQo25YkVh3CgGiigCSiiigAooooAKKKKACiiigAooooAKKKKAP//Z'"
6 |         }
7 |     ]   
8 | }


--------------------------------------------------------------------------------
/tests/test_captcha.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import json
 3 | import os
 4 | 
 5 | import cv2
 6 | import matplotlib.pyplot as plt
 7 | import numpy as np
 8 | 
 9 | from pytok import captcha_solver
10 | 
11 | def main():
12 |     this_dir_path = os.path.dirname(os.path.realpath(__file__))
13 |     with open(os.path.join(this_dir_path, 'captcha_examples.json'), 'r') as f:
14 |         data = json.load(f)
15 |     for type, examples in data.items():
16 |         for example in examples:
17 |             puzzle_b64 = example['puzzle'].strip("b'")
18 |             piece_b64 = example['piece'].strip("b'")
19 | 
20 |             best_angle = captcha_solver.whirl_solver(puzzle_b64, piece_b64)
21 |             puzzle, piece, puzzle_edge, piece_edge = captcha_solver._get_images_and_edges(puzzle_b64, piece_b64)
22 | 
23 |             solved_puzzle = puzzle.copy()
24 |             puzzle_r = (piece.shape[0] / 2) - 1
25 |             for y in range(solved_puzzle.shape[1]):
26 |                 for x in range(solved_puzzle.shape[0]):
27 |                     if (x - solved_puzzle.shape[0] / 2) ** 2 + (y - solved_puzzle.shape[1] / 2) ** 2 < puzzle_r ** 2:
28 |                         theta = np.arctan2(y - solved_puzzle.shape[1] / 2, x - solved_puzzle.shape[0] / 2)
29 |                         theta -= (best_angle / piece_edge.shape[0]) * 2 * np.pi
30 |                         r = np.sqrt((x - solved_puzzle.shape[0] / 2) ** 2 + (y - solved_puzzle.shape[1] / 2) ** 2)
31 |                         solved_puzzle[x, y] = piece[int(piece.shape[0] / 2 + r * np.cos(theta)), int(piece.shape[1] / 2 + r * np.sin(theta))]
32 | 
33 |             matches = np.zeros(puzzle_edge.shape[0])
34 |             for angle in range(puzzle_edge.shape[0]):
35 |                 match = np.sum(puzzle_edge * np.roll(piece_edge, angle, axis=0))
36 |                 matches[angle] = match
37 | 
38 |             # save the best match
39 |             fig, ax = plt.subplots(nrows=7)
40 |             ax[0].imshow(puzzle)
41 |             ax[1].imshow(piece)
42 |             ax[2].imshow(solved_puzzle)
43 |             ax[3].imshow(np.repeat(puzzle_edge[np.newaxis, :, :] / 255, 50, axis=0))
44 |             ax[4].imshow(np.repeat(piece_edge[np.newaxis, :, :] / 255, 50, axis=0))
45 |             ax[5].imshow(np.repeat(np.roll(piece_edge / 255, best_angle, axis=0)[np.newaxis, :, :], 50, axis=0))
46 |             ax[6].plot(matches)
47 |             plt.show()
48 |             
49 | 
50 | if __name__ == '__main__':
51 |     main()


--------------------------------------------------------------------------------
/tests/test_user.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | from pytok.tiktok import PyTok
 5 | 
 6 | # username = "brianjordanalvarez"
 7 | username = 'marierenaudstab'
 8 | 
 9 | 
10 | async def test_user_videos():
11 |     async with PyTok(headless=True) as api:
12 |         user = api.user(username=username)
13 |         user_data = await user.info()
14 |         count = 0
15 |         async for video in api.user(username=username).videos(count=100):
16 |             count += 1
17 | 
18 |         assert count >= 120
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     asyncio.run(test_user_videos())


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | from pytok import utils
 6 | 
 7 | @pytest.mark.parametrize("json_file_path", [os.path.join(".", "tests", "data", "20230915-200856_error_videos.json")])
 8 | def test_get_video_df(json_file_path):
 9 |     csv_file_path = json_file_path.replace(".json", ".csv")
10 |     video_df = utils.try_load_video_df_from_file(csv_file_path, file_paths=[json_file_path])
11 | 
12 |     assert video_df is not None
13 |     assert len(video_df) > 0
14 | 
15 | @pytest.mark.parametrize("json_file_path", [os.path.join(".", "tests", "data", "20230915-200856_error_users.json")])
16 | def test_get_user_df(json_file_path):
17 |     csv_file_path = json_file_path.replace(".json", ".csv")
18 |     user_df = utils.try_load_user_df_from_file(csv_file_path, file_paths=[json_file_path])
19 | 
20 |     assert user_df is not None
21 |     assert len(user_df) > 0
22 | 
23 | if __name__ == "__main__":
24 |     pytest.main([__file__])


--------------------------------------------------------------------------------