├── .editorconfig
├── .env.example
├── .gitignore
├── .styleci.yml
├── CHANGELOG.md
├── app
├── Console
│ ├── Commands
│ │ └── .gitkeep
│ └── Kernel.php
├── Events
│ └── Event.php
├── Exceptions
│ └── Handler.php
├── Http
│ ├── Controllers
│ │ ├── Controller.php
│ │ └── UserController.php
│ └── Response
│ │ └── FractalResponse.php
├── Jobs
│ ├── HandleDatabaseOperations.php
│ └── Job.php
├── Models
│ ├── User.php
│ └── Video.php
├── Providers
│ ├── AppServiceProvider.php
│ ├── AuthServiceProvider.php
│ ├── EventServiceProvider.php
│ └── FractalServiceProvider.php
├── Services
│ └── TikTokScraperService.php
└── Transformers
│ ├── UserTransformer.php
│ └── VideoTransformer.php
├── artisan
├── bootstrap
└── app.php
├── composer.json
├── composer.lock
├── config
├── testing
│ └── database.php
└── tiktok.php
├── database
├── factories
│ └── ModelFactory.php
├── migrations
│ ├── .gitkeep
│ ├── 2019_08_23_121212_create_users_table.php
│ ├── 2019_08_23_122708_create_videos_table.php
│ └── 2019_08_25_224149_create_jobs_table.php
└── seeds
│ └── DatabaseSeeder.php
├── phpunit.xml
├── public
├── .htaccess
└── index.php
├── readme.md
├── resources
└── views
│ └── .gitkeep
├── routes
└── web.php
├── storage
├── app
│ └── .gitignore
├── framework
│ ├── cache
│ │ ├── .gitignore
│ │ └── data
│ │ │ └── .gitignore
│ └── views
│ │ └── .gitignore
└── logs
│ └── .gitignore
└── tests
├── TestCase.php
└── app
├── Http
└── Controllers
│ └── UserControllerTest.php
├── Jobs
└── HandleDatabaseJobTest.php
└── Services
└── TikTokScraperTest.php
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | charset = utf-8
5 | end_of_line = lf
6 | insert_final_newline = true
7 | indent_style = space
8 | indent_size = 4
9 | trim_trailing_whitespace = true
10 |
11 | [*.md]
12 | trim_trailing_whitespace = false
13 |
14 | [*.yml]
15 | indent_size = 2
16 |
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | APP_NAME=Lumen
2 | APP_ENV=local
3 | APP_KEY=base64:rWx76IEPTYSH//teKhwR+1jQijBftek8TMO75jsQxIQ=
4 | APP_DEBUG=false
5 | APP_URL=http://localhost
6 | APP_TIMEZONE=UTC
7 |
8 | LOG_CHANNEL=stack
9 | LOG_SLACK_WEBHOOK_URL=
10 |
11 | DB_DEFAULT=mysql
12 | DB_CONNECTION=mysql
13 | DB_HOST=127.0.0.1
14 | DB_PORT=3306
15 | DB_DATABASE=tiktok
16 | DB_USERNAME=root
17 | DB_PASSWORD=io28
18 |
19 | CACHE_DRIVER=file
20 | QUEUE_CONNECTION=database
21 |
22 | TIkTOK_URL=https://www.tiktok.com
23 | SCRIPT_USER_PATH=/@:uniqueId
24 | SCRIPT_VIDEO_PATH=/@:uniqueId/video/:id
25 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /vendor
2 | /.idea
3 | Homestead.json
4 | Homestead.yaml
5 | .env
6 |
--------------------------------------------------------------------------------
/.styleci.yml:
--------------------------------------------------------------------------------
1 | php:
2 | preset: laravel
3 | disabled:
4 | - unused_use
5 | js: true
6 | css: true
7 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Release Notes
2 |
3 | ## [v2.0.0 (2020-03-19)]
4 |
5 | ### Changed
6 | - Document structure to get user details. Now, can get multiple user info
7 | - composer update done!
8 |
9 | ### Not released
10 | - Updated user videos. TikTok made a major change in the way videos are displayed
11 | (now using api - that requires unique per session signatures). Workaround in progress...
12 |
--------------------------------------------------------------------------------
/app/Console/Commands/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moseseth/tiktok-scraper/7c580031e4ea73c3c4839990cc2970d6dcea7f18/app/Console/Commands/.gitkeep
--------------------------------------------------------------------------------
/app/Console/Kernel.php:
--------------------------------------------------------------------------------
1 | getResponse()) {
74 | $status = Response::HTTP_BAD_REQUEST;
75 | $exception = new \Dotenv\Exception\ValidationException('HTTP_BAD_REQUEST',
76 | $status, $exception);
77 | }
78 |
79 | return response()->json([
80 | 'success' => false,
81 | 'message' => $exception->getMessage()],
82 | $status);
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/app/Http/Controllers/Controller.php:
--------------------------------------------------------------------------------
1 | fractal = $fractal;
24 | $this->tikTokScraperService = $tikTokScraperService;
25 | }
26 |
27 | /**
28 | * @param $data
29 | * @param TransformerAbstract $transformer
30 | * @param null $resourceKey
31 | * @return array
32 | */
33 | public function item($data, TransformerAbstract $transformer, $resourceKey = null): array
34 | {
35 | return $this->fractal->item($data, $transformer, $resourceKey);
36 | }
37 |
38 | /**
39 | * @param $data
40 | * @param TransformerAbstract $transformer
41 | * @param null $resourceKey
42 | * @return array
43 | */
44 | public function collection($data, TransformerAbstract $transformer, $resourceKey = null): array
45 | {
46 | return $this->fractal->collection($data, $transformer, $resourceKey);
47 | }
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/app/Http/Controllers/UserController.php:
--------------------------------------------------------------------------------
1 | query('id')));
25 | $userData = $this->tikTokScraperService->extractUsers($userIds);
26 |
27 | dispatch(new HandleDatabaseOperations($userData, null));
28 |
29 | if (is_array($userData)) {
30 | $userData = User::hydrate($userData);
31 | }
32 |
33 | return $this->collection($userData, new UserTransformer, 'users');
34 | }
35 |
36 | /**
37 | * @param Request $request
38 | * @param $id
39 | * @return array
40 | */
41 | public function getUserVideos(Request $request, string $id): array
42 | {
43 | $userData = $this->tikTokScraperService->extractUsers([$id]);
44 |
45 | $videoIds = array_filter(explode(',', $request->query('id')));
46 | $videoData = $this->tikTokScraperService->extractVideos($id, $videoIds);
47 |
48 | dispatch(new HandleDatabaseOperations($userData, $videoData, true));
49 |
50 | if (is_array($videoData)) {
51 | $videoData = Video::hydrate($videoData);
52 | }
53 |
54 | return $this->collection($videoData, new VideoTransformer, 'videos');
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/app/Http/Response/FractalResponse.php:
--------------------------------------------------------------------------------
1 | manager = $manager;
27 | $this->serializer = $serializer;
28 | $this->manager->setSerializer($serializer);
29 | }
30 |
31 | public function item($data, TransformerAbstract $transformer, $resourceKey = null)
32 | {
33 | return $this->createDataArray(
34 | new Item($data, $transformer, $resourceKey)
35 | );
36 | }
37 |
38 | public function collection($data, TransformerAbstract $transformer, $resourceKey = null)
39 | {
40 | return $this->createDataArray(
41 | new Collection($data, $transformer, $resourceKey)
42 | );
43 | }
44 |
45 | private function createDataArray(ResourceInterface $resource)
46 | {
47 | return $this->manager->createData($resource)->toArray();
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/app/Jobs/HandleDatabaseOperations.php:
--------------------------------------------------------------------------------
1 | users = $users;
25 | $this->videos = $videos;
26 | $this->exclude = $exclude;
27 | }
28 |
29 | /**
30 | * Execute the job.
31 | *
32 | * @return void
33 | */
34 | public function handle()
35 | {
36 | $this->createOrUpdateUsers($this->users, $this->exclude);
37 | }
38 |
39 | /**
40 | * @param array $users
41 | * @param $exclude
42 | */
43 | private function createOrUpdateUsers(array $users, bool $exclude)
44 | {
45 | DB::transaction(function () use ($users, $exclude) {
46 | foreach ($users as $user) {
47 | $user_id = ['short_name' => $user['short_name']];
48 | $updatedUser = User::updateOrCreate($user_id, $user);
49 |
50 | $videos = $user['videos'] ?? [];
51 | $this->createOrUpdateVideos($videos, $updatedUser->id);
52 | }
53 | }, 3);
54 | }
55 |
56 |
57 | /**
58 | * @param $userId
59 | * @param array $videos
60 | */
61 | private function createOrUpdateVideos(array $videos, $userId)
62 | {
63 | DB::transaction(function () use ($videos, $userId) {
64 | foreach ($videos as $video) {
65 | $video['user_id'] = $userId;
66 | $video_id = ['video_id' => $video['video_id']];
67 | unset($video['video_id']);
68 | Video::updateOrCreate($video_id, $video);
69 | }
70 | }, 3);
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/app/Jobs/Job.php:
--------------------------------------------------------------------------------
1 | hasMany('App\Models\Video')->select('video_id', 'user_id');
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/app/Models/Video.php:
--------------------------------------------------------------------------------
1 | belongsTo('App\Models\User');
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/app/Providers/AppServiceProvider.php:
--------------------------------------------------------------------------------
1 | app['auth']->viaRequest('api', function ($request) {
34 | if ($request->input('api_token')) {
35 | return User::where('api_token', $request->input('api_token'))->first();
36 | }
37 | });
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/app/Providers/EventServiceProvider.php:
--------------------------------------------------------------------------------
1 | [
16 | 'App\Listeners\ExampleListener',
17 | ],
18 | ];
19 | }
20 |
--------------------------------------------------------------------------------
/app/Providers/FractalServiceProvider.php:
--------------------------------------------------------------------------------
1 | app->bind(
16 | 'League\Fractal\Serializer\SerializerAbstract',
17 | 'League\Fractal\Serializer\DataArraySerializer'
18 | );
19 |
20 | $this->app->bind(FractalResponse::class, function ($app) {
21 | $manager = new Manager();
22 | $serializer = $app['League\Fractal\Serializer\SerializerAbstract'];
23 |
24 | return new FractalResponse($manager, $serializer);
25 | });
26 |
27 | $this->app->alias(FractalResponse::class, 'fractal');
28 | }
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/app/Services/TikTokScraperService.php:
--------------------------------------------------------------------------------
1 | client = new Client([
22 | 'base_uri' => config('tiktok.base_url'),
23 | 'headers' => [
24 | 'User-Agent' => UserAgent::random()
25 | ]
26 | ]);
27 | }
28 |
29 | /**
30 | * @param array $userIds
31 | * @return array
32 | */
33 | public function extractUsers(array $userIds)
34 | {
35 | $requests = function ($userIds) {
36 | foreach ($userIds as $userId) {
37 | yield $userId => function () use ($userId) {
38 | return $this->client->getAsync("/$userId");
39 | };
40 | }
41 | };
42 |
43 | $this->scrapeTikTok(config('tiktok.script_user_path'), $requests, $userIds);
44 |
45 | return $this->userDetails;
46 | }
47 |
48 |
49 | /**
50 | * @param string $userId
51 | * @param array $videoIds
52 | * @return array
53 | */
54 | public function extractVideos(string $userId, array $videoIds)
55 | {
56 | $requests = function ($videoIds) use ($userId) {
57 | foreach ($videoIds as $videoId) {
58 | if ((int)$videoId != 0) {
59 | yield $videoId => function () use ($userId, $videoId) {
60 | return $this->client->getAsync("/$userId/video/$videoId");
61 | };
62 | }
63 | }
64 | };
65 |
66 | $this->scrapeTikTok(config('tiktok.script_video_path'), $requests, $videoIds);
67 |
68 | return $this->videoDetails;
69 | }
70 |
71 | /**
72 | * @param string $tiktokpath
73 | * @param Closure $requests
74 | * @param array $data
75 | */
76 | private function scrapeTikTok(string $tiktokpath, Closure $requests, array $data)
77 | {
78 | $pool = new Pool($this->client, $requests($data), [
79 | 'concurrency' => 10,
80 | 'fulfilled' => function (Response $response) use ($tiktokpath) {
81 | if ($response->getStatusCode() == 200) {
82 | $crawler = new Crawler((string)$response->getBody());
83 |
84 | $scrappedData = $crawler->filter('script')->reduce(function (Crawler $node, $i) {
85 | return strpos($node->attr('id'), '__NEXT_DATA__') !== false;
86 | })->text();
87 | $result = json_decode($scrappedData, true) ?? [];
88 |
89 | [$userData, $videoListInPartial, $enhancedVideoData] = $this->getDataFromResult('props',
90 | $result);
91 |
92 | if (!empty($userData)) {
93 | $user = $this->getUser($userData);
94 | $videos = $this->getUserVideos($videoListInPartial);
95 | $this->userDetails[] = array_merge($user, ['videos' => $videos]);
96 | }
97 |
98 | if (!empty($enhancedVideoData['uniqueId']) && !empty($enhancedVideoData['itemInfos'])) {
99 | $this->videoDetails[] = $this->getVideo($enhancedVideoData);
100 | }
101 | }
102 | },
103 | 'rejected' => function ($reason) {
104 | Log::error('Promise Rejection', ['[tiktok scraper]' => $reason]);
105 | }
106 | ]);
107 |
108 | $promise = $pool->promise();
109 | $promise->wait();
110 | }
111 |
112 |
113 | /**
114 | * @param string $tiktokpath
115 | * @param array $result
116 | * @return array
117 | */
118 | function getDataFromResult(string $tiktokpath, array $result): array
119 | {
120 | $userData = null;
121 | $videoListInPartial = null;
122 | $enhancedVideoData = null;
123 |
124 | if (array_key_exists($tiktokpath, $result)) {
125 | $userData = $result[$tiktokpath]['pageProps']['userData'] ?? null;
126 | $videoListInPartial = $result[$tiktokpath]['itemList'] ?? null;
127 | $metaData = [
128 | 'uniqueId' => $result[$tiktokpath]['uniqueId'] ?? null,
129 | 'origin' => $result[$tiktokpath]['$origin'] ?? null,
130 | 'pageUrl' => $result[$tiktokpath]['$pageUrl'] ?? null
131 | ];
132 | $videoData = $result[$tiktokpath]['videoData'] ?? [];
133 | $enhancedVideoData = array_merge($metaData, $videoData) ?? null;
134 | }
135 | return [$userData, $videoListInPartial, $enhancedVideoData];
136 | }
137 |
138 | /**
139 | * @param $videoListInPartial
140 | * @return array
141 | */
142 | function getUserVideos($videoListInPartial): array
143 | {
144 | $videos = array();
145 | if (!empty($videoListInPartial)) {
146 | foreach ($videoListInPartial as $video) {
147 | $isValidUrl = filter_var($video['url'], FILTER_VALIDATE_URL);
148 | $splicedVideoUrl = $isValidUrl ? preg_split('[/]', $video['url'],
149 | -1, PREG_SPLIT_NO_EMPTY) : [];
150 |
151 | $videos[] = [
152 | 'video_id' => (int)$splicedVideoUrl[4] ?? null,
153 | 'uid' => (int)$splicedVideoUrl[2] ?? null,
154 | 'url' => $video['url'] ?? null,
155 | 'background_image' => $video['thumbnailUrl'][0] ?? null,
156 | 'content_url' => $video['contentUrl'] ?? null,
157 | 'duration_in_second' => $this->ISO8601ToSeconds($video['duration']) ?? null,
158 | 'description' => $video['name'] ?? null,
159 | 'comment_count' => $video['commentCount'] ?? 0,
160 | 'like_count' => $video['interactionCount'] ?? 0
161 | ];
162 | }
163 | }
164 | return $videos;
165 | }
166 |
167 | /**
168 | * @param $userData
169 | * @return array
170 | */
171 | function getUser(array $userData): array
172 | {
173 | return [
174 | 'short_name' => $userData['uniqueId'],
175 | 'full_name' => $userData['nickName'],
176 | 'is_verified' => $userData['verified'],
177 | 'biography' => $userData['signature'],
178 | 'avatar' => $userData['coversMedium'][0],
179 | 'following_count' => $userData['following'] ?? 0,
180 | 'fan_count' => $userData['fans'] ?? 0,
181 | 'heart_count' => $userData['heart'] ?? 0,
182 | 'video_count' => $userData['video'] ?? 0
183 | ];
184 | }
185 |
186 | /**
187 | * @param $video
188 | * @return array
189 | */
190 | function getVideo(array $video): array
191 | {
192 | return [
193 | 'video_id' => (int)$video['itemInfos']['id'] ?? null,
194 | 'uid' => $video['uniqueId'] ?? null,
195 | 'url' => $video['origin'] . $video['pageUrl'] ?? null,
196 | 'background_image' => $video['itemInfos']['covers'][0] ?? null,
197 | 'content_url' => $video['itemInfos']['video']['urls'][0] ?? null,
198 | 'duration_in_second' => $video['itemInfos']['video']['videoMeta']['duration'] ?? null,
199 | 'sound_name' => $video['musicInfos']['musicName'] ?? null,
200 | 'description' => $video['itemInfos']['text'] ?? null,
201 | 'comment_count' => $video['itemInfos']['commentCount'] ?? 0,
202 | 'like_count' => $video['itemInfos']['diggCount'] ?? 0
203 | ];
204 | }
205 |
206 |
207 | /**
208 | * @param string $ISO8601
209 | * @return int
210 | */
211 | function ISO8601ToSeconds(string $ISO8601): int
212 | {
213 | try {
214 | $interval = new \DateInterval($ISO8601);
215 | return ($interval->d * 24 * 60 * 60) +
216 | ($interval->h * 60 * 60) +
217 | ($interval->i * 60) +
218 | $interval->s;
219 | } catch (\Exception $e) {
220 | Log::warning('[ISO8601ToSeconds]', ['message' => $e->getMessage()]);
221 | }
222 |
223 | return 0;
224 | }
225 |
226 | }
227 |
--------------------------------------------------------------------------------
/app/Transformers/UserTransformer.php:
--------------------------------------------------------------------------------
1 | '@' . $user->short_name,
19 | 'full_name' => $user->full_name,
20 | 'is_verified' => (boolean)$user->is_verified,
21 | 'bio' => $user->biography,
22 | 'thumbnail_image' => $user->avatar,
23 | 'total' => [
24 | 'fans' => $user->fan_count,
25 | 'hearts' => (int) $user->heart_count,
26 | 'followings' => $user->following_count,
27 | 'videos' => $user->video_count
28 | ]
29 | ];
30 | }
31 |
32 | public function includeVideo(User $user)
33 | {
34 | $video = $user->videos;
35 |
36 | if (is_array($video)) {
37 | $video = Video::hydrate($video);
38 | }
39 |
40 | return $this->collection($video, new VideoTransformer(true));
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/app/Transformers/VideoTransformer.php:
--------------------------------------------------------------------------------
1 | shorten = $shorten;
16 | }
17 |
18 | public function transform(Video $video)
19 | {
20 | if ($this->shorten) {
21 | return [
22 | 'video_id' => $video->video_id
23 | ];
24 | }
25 |
26 | return [
27 | 'url' => $video->url,
28 | 'upload_data' => $video->content_url,
29 | 'duration' => $video->duration_in_second,
30 | 'sound' => $video->sound_name,
31 | 'description' => $video->description,
32 | 'thumbnail_image' => $video->background_image,
33 | 'total' => [
34 | 'likes' => $video->like_count,
35 | 'comments' => $video->comment_count,
36 | ]
37 | ];
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/artisan:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | make(
32 | 'Illuminate\Contracts\Console\Kernel'
33 | );
34 |
35 | exit($kernel->handle(new ArgvInput, new ConsoleOutput));
36 |
--------------------------------------------------------------------------------
/bootstrap/app.php:
--------------------------------------------------------------------------------
1 | bootstrap();
11 |
12 | /*
13 | |--------------------------------------------------------------------------
14 | | Create The Application
15 | |--------------------------------------------------------------------------
16 | |
17 | | Here we will load the environment and create the application instance
18 | | that serves as the central piece of this framework. We'll use this
19 | | application as an "IoC" container and router for this framework.
20 | |
21 | */
22 |
23 | $app = new Laravel\Lumen\Application(
24 | dirname(__DIR__)
25 | );
26 |
27 | $app->withFacades();
28 |
29 | $app->withEloquent();
30 |
31 | /*
32 | |--------------------------------------------------------------------------
33 | | Register Container Bindings
34 | |--------------------------------------------------------------------------
35 | |
36 | | Now we will register a few bindings in the service container. We will
37 | | register the exception handler and the console kernel. You may add
38 | | your own bindings here if you like or you can make another file.
39 | |
40 | */
41 |
42 | $app->singleton(
43 | Illuminate\Contracts\Debug\ExceptionHandler::class,
44 | App\Exceptions\Handler::class
45 | );
46 |
47 | $app->singleton(
48 | Illuminate\Contracts\Console\Kernel::class,
49 | App\Console\Kernel::class
50 | );
51 |
52 | /*
53 | |--------------------------------------------------------------------------
54 | | Register Middleware
55 | |--------------------------------------------------------------------------
56 | |
57 | | Next, we will register the middleware with the application. These can
58 | | be global middleware that run before and after each request into a
59 | | route or middleware that'll be assigned to some specific routes.
60 | |
61 | */
62 |
63 | // $app->middleware([
64 | // App\Http\Middleware\ExampleMiddleware::class
65 | // ]);
66 |
67 | // $app->routeMiddleware([
68 | // 'auth' => App\Http\Middleware\Authenticate::class,
69 | // ]);
70 |
71 | /*
72 | |--------------------------------------------------------------------------
73 | | Register Service Providers
74 | |--------------------------------------------------------------------------
75 | |
76 | | Here we will register all of the application's service providers which
77 | | are used to bind services into the container. Service providers are
78 | | totally optional, so you are not required to uncomment this line.
79 | |
80 | */
81 |
82 | // $app->register(App\Providers\AppServiceProvider::class);
83 | // $app->register(App\Providers\AuthServiceProvider::class);
84 | // $app->register(App\Providers\EventServiceProvider::class);
85 |
86 | $app->register(LumenGeneratorServiceProvider::class);
87 | $app->register(FractalServiceProvider::class);
88 |
89 | $app->configure('tiktok');
90 |
91 | /*
92 | |--------------------------------------------------------------------------
93 | | Load The Application Routes
94 | |--------------------------------------------------------------------------
95 | |
96 | | Next we will include the routes file so that they can all be added to
97 | | the application. This will provide all of the URLs the application
98 | | can respond to, as well as the controllers that may handle them.
99 | |
100 | */
101 |
102 | $app->router->group([
103 | 'namespace' => 'App\Http\Controllers',
104 | ], function ($router) {
105 | require __DIR__ . '/../routes/web.php';
106 | });
107 |
108 | return $app;
109 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "laravel/lumen",
3 | "description": "The Laravel Lumen Framework.",
4 | "keywords": [
5 | "framework",
6 | "laravel",
7 | "lumen"
8 | ],
9 | "license": "MIT",
10 | "type": "project",
11 | "require": {
12 | "php": ">=7.1.3",
13 | "ext-json": "*",
14 | "campo/random-user-agent": "^1.3",
15 | "flipbox/lumen-generator": "^5.6",
16 | "guzzlehttp/guzzle": "^6.5",
17 | "laravel/lumen-framework": "5.8.*",
18 | "spatie/laravel-fractal": "^5.5",
19 | "symfony/css-selector": "^4.3",
20 | "symfony/dom-crawler": "^4.3"
21 | },
22 | "require-dev": {
23 | "fzaninotto/faker": "^1.4",
24 | "phpunit/phpunit": "^7.0",
25 | "mockery/mockery": "^1.0"
26 | },
27 | "autoload": {
28 | "classmap": [
29 | "database/seeds",
30 | "database/factories"
31 | ],
32 | "psr-4": {
33 | "App\\": "app/"
34 | }
35 | },
36 | "autoload-dev": {
37 | "classmap": [
38 | "tests/"
39 | ]
40 | },
41 | "scripts": {
42 | "post-root-package-install": [
43 | "@php -r \"file_exists('.env') || copy('.env.example', '.env');\""
44 | ],
45 | "test": [
46 | "phpunit"
47 | ]
48 | },
49 | "config": {
50 | "preferred-install": "dist",
51 | "sort-packages": true,
52 | "optimize-autoloader": true
53 | },
54 | "minimum-stability": "dev",
55 | "prefer-stable": true
56 | }
57 |
--------------------------------------------------------------------------------
/config/testing/database.php:
--------------------------------------------------------------------------------
1 | 'sqlite',
5 | 'connections' => [
6 | 'sqlite' => [
7 | 'driver' => 'sqlite',
8 | 'database' => ':memory:',
9 | 'prefix' => '',
10 | ],
11 | ],
12 | ];
13 |
--------------------------------------------------------------------------------
/config/tiktok.php:
--------------------------------------------------------------------------------
1 | env('TIkTOK_URL', 'https://www.tiktok.com'),
4 | 'script_user_path' => env('SCRIPT_USER_PATH', '/@:uniqueId'),
5 | 'script_video_path' => env('SCRIPT_VIDEO_PATH', '/@:uniqueId/video/:id')
6 | ];
7 |
--------------------------------------------------------------------------------
/database/factories/ModelFactory.php:
--------------------------------------------------------------------------------
1 | bigIncrements('id');
18 | $table->string('short_name')->unique();
19 | $table->string('full_name');
20 | $table->boolean('is_verified')->default(false);
21 | $table->string('biography')->nullable();
22 | $table->string('avatar')->nullable();
23 | $table->unsignedBigInteger('fan_count')->default(0);
24 | $table->unsignedBigInteger('heart_count')->default(0);
25 | $table->unsignedBigInteger('following_count')->default(0);
26 | $table->unsignedBigInteger('video_count')->default(0);
27 | $table->timestamps();
28 | });
29 | }
30 |
31 | /**
32 | * Reverse the migrations.
33 | *
34 | * @return void
35 | */
36 | public function down()
37 | {
38 | Schema::dropIfExists('users');
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/database/migrations/2019_08_23_122708_create_videos_table.php:
--------------------------------------------------------------------------------
1 | bigIncrements('id');
18 | $table->string('video_id')->unique();
19 | $table->bigInteger('user_id')->unsigned();
20 | $table->foreign('user_id')->references('id')->on('users');
21 | $table->string('url');
22 | $table->string('background_image');
23 | $table->text('content_url');
24 | $table->unsignedInteger('duration_in_second');
25 | $table->string('description');
26 | $table->string('sound_name')->nullable();
27 | $table->unsignedBigInteger('like_count')->default(0);
28 | $table->unsignedBigInteger('comment_count')->default(0);
29 | $table->timestamps();
30 | });
31 | }
32 |
33 | /**
34 | * Reverse the migrations.
35 | *
36 | * @return void
37 | */
38 | public function down()
39 | {
40 | Schema::dropIfExists('videos');
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/database/migrations/2019_08_25_224149_create_jobs_table.php:
--------------------------------------------------------------------------------
1 | bigIncrements('id');
18 | $table->string('queue')->index();
19 | $table->longText('payload');
20 | $table->unsignedTinyInteger('attempts');
21 | $table->unsignedInteger('reserved_at')->nullable();
22 | $table->unsignedInteger('available_at');
23 | $table->unsignedInteger('created_at');
24 | });
25 | }
26 |
27 | /**
28 | * Reverse the migrations.
29 | *
30 | * @return void
31 | */
32 | public function down()
33 | {
34 | Schema::dropIfExists('jobs');
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/database/seeds/DatabaseSeeder.php:
--------------------------------------------------------------------------------
1 |
2 |
11 |
12 |
13 | ./tests
14 |
15 |
16 |
17 |
18 | ./app
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/public/.htaccess:
--------------------------------------------------------------------------------
1 |
2 |
3 | Options -MultiViews -Indexes
4 |
5 |
6 | RewriteEngine On
7 |
8 | # Handle Authorization Header
9 | RewriteCond %{HTTP:Authorization} .
10 | RewriteRule .* - [E=HTTP_AUTHORIZATION:%{HTTP:Authorization}]
11 |
12 | # Redirect Trailing Slashes If Not A Folder...
13 | RewriteCond %{REQUEST_FILENAME} !-d
14 | RewriteCond %{REQUEST_URI} (.+)/$
15 | RewriteRule ^ %1 [L,R=301]
16 |
17 | # Handle Front Controller...
18 | RewriteCond %{REQUEST_FILENAME} !-d
19 | RewriteCond %{REQUEST_FILENAME} !-f
20 | RewriteRule ^ index.php [L]
21 |
22 |
--------------------------------------------------------------------------------
/public/index.php:
--------------------------------------------------------------------------------
1 | run();
29 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | ### TikTok Scraper™
2 |
3 | The following project implements domcrawler and concurrent data retrival from tiktok website. Once promises are
4 | resolved, parses user and video related information and stores it to database for later consumption.
5 |
6 | Inorder to mitigate scraping process, all database related operations run in the background as jobs.
7 |
8 | ##### Installation:
9 |
10 | ```
11 | git clone https://github.com/moseseth/tiktok-scraper.git
12 |
13 | cd tiktok-scraper
14 |
15 | cp .env.example .env // update DB_USERNAME , DB_PASSWORD & CREATE 'tiktok' database
16 |
17 | composer install
18 |
19 | php artisan migrate
20 |
21 | php artisan serve
22 |
23 | php artisan queue:work --tries=3
24 | ```
25 |
26 | ##### Run Test:
27 | ```$xslt
28 | composer run-script test
29 | ```
30 |
31 | ##### Usage Example:
32 | * http://localhost:8000/api/users?id=@wilczewska,@realmadrid
33 | * http://localhost:8000/api/users/@wilczewska/videos?id=6727979845919214853,6722754487129246982
34 |
35 |
36 | ### Ongoing updates:
37 | https://github.com/moseseth/tiktok-scraper/blob/master/CHANGELOG.md
38 |
39 |
40 | Glossary : `#PHP 7.2, #MySQL, #Queue, #Guzzle 6.0, #Lumen`
41 |
--------------------------------------------------------------------------------
/resources/views/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moseseth/tiktok-scraper/7c580031e4ea73c3c4839990cc2970d6dcea7f18/resources/views/.gitkeep
--------------------------------------------------------------------------------
/routes/web.php:
--------------------------------------------------------------------------------
1 | get('/', function () use ($router) {
15 | return $router->app->version();
16 | });
17 |
18 | $router->group(['prefix' => 'api'], function () use ($router) {
19 | $router->get('users', 'UserController@getUsersProfile');
20 | $router->get('users/{id}/videos', 'UserController@getUserVideos');
21 | });
22 |
--------------------------------------------------------------------------------
/storage/app/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/storage/framework/cache/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !data/
3 | !.gitignore
4 |
--------------------------------------------------------------------------------
/storage/framework/cache/data/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/storage/framework/views/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/storage/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/tests/TestCase.php:
--------------------------------------------------------------------------------
1 | get('/');
16 |
17 | $this->assertEquals(
18 | $this->app->version(), $this->response->getContent()
19 | );
20 | }
21 |
22 | public function testGetUsersProfileShouldReturnUserData()
23 | {
24 | $this->get('/api/users?id=@arsenal');
25 |
26 | $this->seeStatusCode(200);
27 | $this->seeJsonStructure(["data" => ['*' =>
28 | ["user_id",
29 | "full_name",
30 | "is_verified",
31 | "bio",
32 | "thumbnail_image",
33 | "total" => [
34 | "fans",
35 | "hearts",
36 | "followings",
37 | "videos"
38 | ],
39 | "video" => [
40 | "data" => [
41 | '*' => [
42 | "video_id"
43 | ]
44 | ]
45 | ]]]]);
46 | }
47 |
48 | public function testGetUsersProfileShouldReturnNullWithInvalidOrNonExistingId()
49 | {
50 | $this->get('/api/users?id=111');
51 |
52 | $this->seeStatusCode(200);
53 | $this->seeJsonStructure([
54 | "data" => []
55 | ]);
56 | }
57 |
58 | public function testGetUserVideoShouldReturnEmptyOnNonExistingId()
59 | {
60 | $this->get('/api/users/@lorengray/videos?id=xxx');
61 |
62 | $this->seeStatusCode(200);
63 | $this->seeJsonStructure([
64 | "data" => []
65 | ]);
66 | }
67 |
68 | public function testGetUserVideoShouldReturnVideoData()
69 | {
70 | $this->get('/api/users/@wilczewska/videos?id=6728320568954768646');
71 |
72 | $this->seeStatusCode(200);
73 | $this->seeJsonStructure([
74 | "data" => ['*' => [
75 | 'url',
76 | 'upload_data',
77 | 'duration',
78 | 'sound',
79 | 'description',
80 | 'thumbnail_image',
81 | 'total' => [
82 | 'likes',
83 | 'comments',
84 | ]
85 | ]]
86 | ]);
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/tests/app/Jobs/HandleDatabaseJobTest.php:
--------------------------------------------------------------------------------
1 | faker = Faker::create();
23 | $this->expectsJobs(HandleDatabaseOperations::class);
24 |
25 | $this->users = [
26 | 'short_name' => 'realmadrid',
27 | 'full_name' => $this->faker->name,
28 | 'is_verified' => $this->faker->boolean,
29 | 'biography' => $this->faker->text,
30 | 'avatar' => $this->faker->imageUrl(),
31 | 'following_count' => $this->faker->numberBetween(10, 300),
32 | 'fan_count' => $this->faker->numberBetween(10, 300),
33 | 'heart_count' => $this->faker->numberBetween(10, 300),
34 | 'video_count' => $this->faker->numberBetween(10, 300),
35 | 'video' => []
36 | ];
37 | }
38 |
39 | public function testHandlerDispatchedFromUserRoute()
40 | {
41 | dispatch(new HandleDatabaseOperations($this->users, null));
42 |
43 | $this->beginDatabaseTransaction();
44 | User::updateOrCreate(['short_name' => $this->users['short_name']], $this->users);
45 |
46 | $this->seeInDatabase('users', [
47 | 'short_name' => 'realmadrid'
48 | ]);
49 | }
50 |
51 | public function testHandlerDispatchedFromVideoRoute()
52 | {
53 | $videos = [
54 | 'video_id' => 6728320568954768646,
55 | 'uid' => 'realmadrid',
56 | 'url' => $this->faker->url,
57 | 'background_image' => $this->faker->imageUrl(),
58 | 'content_url' => $this->faker->image(),
59 | 'duration_in_second' => $this->faker->time('s'),
60 | 'sound_name' => $this->faker->sentence,
61 | 'description' => $this->faker->sentence,
62 | 'comment_count' => $this->faker->randomDigit,
63 | 'like_count' => $this->faker->randomDigit
64 | ];
65 |
66 | dispatch(new HandleDatabaseOperations($this->users, $videos, true));
67 |
68 | $this->beginDatabaseTransaction();
69 | $user = User::updateOrCreate(['short_name' => $videos['uid']], $this->users);
70 | $videos['user_id'] = $user->id;
71 | Video::updateOrCreate(['video_id' => $videos['video_id']], $videos);
72 |
73 | $this->seeInDatabase('videos', [
74 | 'video_id' => 6728320568954768646
75 | ]);
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/tests/app/Services/TikTokScraperTest.php:
--------------------------------------------------------------------------------
1 | scraperService = new TikTokScraperService();
17 | }
18 |
19 | public function testISO8601ToSecondsShouldReturnValidConversion()
20 | {
21 | $this->assertEquals(30, $this->scraperService->ISO8601ToSeconds('PT30S'));
22 | }
23 |
24 | public function testISO8601ToSecondsShouldReturnZeroWithInvalidParameter()
25 | {
26 | $this->assertEquals(0, $this->scraperService->ISO8601ToSeconds('834784'));
27 | }
28 |
29 | public function testAttributes() {
30 | $this->assertClassHasAttribute('userDetails', TikTokScraperService::class);
31 | $this->assertClassHasAttribute('videoDetails', TikTokScraperService::class);
32 | }
33 | }
34 |
--------------------------------------------------------------------------------