├── .editorconfig ├── .env.example ├── .gitignore ├── .styleci.yml ├── CHANGELOG.md ├── app ├── Console │ ├── Commands │ │ └── .gitkeep │ └── Kernel.php ├── Events │ └── Event.php ├── Exceptions │ └── Handler.php ├── Http │ ├── Controllers │ │ ├── Controller.php │ │ └── UserController.php │ └── Response │ │ └── FractalResponse.php ├── Jobs │ ├── HandleDatabaseOperations.php │ └── Job.php ├── Models │ ├── User.php │ └── Video.php ├── Providers │ ├── AppServiceProvider.php │ ├── AuthServiceProvider.php │ ├── EventServiceProvider.php │ └── FractalServiceProvider.php ├── Services │ └── TikTokScraperService.php └── Transformers │ ├── UserTransformer.php │ └── VideoTransformer.php ├── artisan ├── bootstrap └── app.php ├── composer.json ├── composer.lock ├── config ├── testing │ └── database.php └── tiktok.php ├── database ├── factories │ └── ModelFactory.php ├── migrations │ ├── .gitkeep │ ├── 2019_08_23_121212_create_users_table.php │ ├── 2019_08_23_122708_create_videos_table.php │ └── 2019_08_25_224149_create_jobs_table.php └── seeds │ └── DatabaseSeeder.php ├── phpunit.xml ├── public ├── .htaccess └── index.php ├── readme.md ├── resources └── views │ └── .gitkeep ├── routes └── web.php ├── storage ├── app │ └── .gitignore ├── framework │ ├── cache │ │ ├── .gitignore │ │ └── data │ │ │ └── .gitignore │ └── views │ │ └── .gitignore └── logs │ └── .gitignore └── tests ├── TestCase.php └── app ├── Http └── Controllers │ └── UserControllerTest.php ├── Jobs └── HandleDatabaseJobTest.php └── Services └── TikTokScraperTest.php /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | insert_final_newline = true 7 | indent_style = space 8 | indent_size = 4 9 | trim_trailing_whitespace = true 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | 14 | [*.yml] 15 | indent_size = 2 16 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | APP_NAME=Lumen 2 | APP_ENV=local 3 | APP_KEY=base64:rWx76IEPTYSH//teKhwR+1jQijBftek8TMO75jsQxIQ= 4 | APP_DEBUG=false 5 | APP_URL=http://localhost 6 | APP_TIMEZONE=UTC 7 | 8 | LOG_CHANNEL=stack 9 | LOG_SLACK_WEBHOOK_URL= 10 | 11 | DB_DEFAULT=mysql 12 | DB_CONNECTION=mysql 13 | DB_HOST=127.0.0.1 14 | DB_PORT=3306 15 | DB_DATABASE=tiktok 16 | DB_USERNAME=root 17 | DB_PASSWORD=io28 18 | 19 | CACHE_DRIVER=file 20 | QUEUE_CONNECTION=database 21 | 22 | TIkTOK_URL=https://www.tiktok.com 23 | SCRIPT_USER_PATH=/@:uniqueId 24 | SCRIPT_VIDEO_PATH=/@:uniqueId/video/:id 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /vendor 2 | /.idea 3 | Homestead.json 4 | Homestead.yaml 5 | .env 6 | -------------------------------------------------------------------------------- /.styleci.yml: -------------------------------------------------------------------------------- 1 | php: 2 | preset: laravel 3 | disabled: 4 | - unused_use 5 | js: true 6 | css: true 7 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Release Notes 2 | 3 | ## [v2.0.0 (2020-03-19)] 4 | 5 | ### Changed 6 | - Document structure to get user details. Now, can get multiple user info 7 | - composer update done! 8 | 9 | ### Not released 10 | - Updated user videos. TikTok made a major change in the way videos are displayed 11 | (now using api - that requires unique per session signatures). Workaround in progress... 12 | -------------------------------------------------------------------------------- /app/Console/Commands/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moseseth/tiktok-scraper/7c580031e4ea73c3c4839990cc2970d6dcea7f18/app/Console/Commands/.gitkeep -------------------------------------------------------------------------------- /app/Console/Kernel.php: -------------------------------------------------------------------------------- 1 | getResponse()) { 74 | $status = Response::HTTP_BAD_REQUEST; 75 | $exception = new \Dotenv\Exception\ValidationException('HTTP_BAD_REQUEST', 76 | $status, $exception); 77 | } 78 | 79 | return response()->json([ 80 | 'success' => false, 81 | 'message' => $exception->getMessage()], 82 | $status); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /app/Http/Controllers/Controller.php: -------------------------------------------------------------------------------- 1 | fractal = $fractal; 24 | $this->tikTokScraperService = $tikTokScraperService; 25 | } 26 | 27 | /** 28 | * @param $data 29 | * @param TransformerAbstract $transformer 30 | * @param null $resourceKey 31 | * @return array 32 | */ 33 | public function item($data, TransformerAbstract $transformer, $resourceKey = null): array 34 | { 35 | return $this->fractal->item($data, $transformer, $resourceKey); 36 | } 37 | 38 | /** 39 | * @param $data 40 | * @param TransformerAbstract $transformer 41 | * @param null $resourceKey 42 | * @return array 43 | */ 44 | public function collection($data, TransformerAbstract $transformer, $resourceKey = null): array 45 | { 46 | return $this->fractal->collection($data, $transformer, $resourceKey); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /app/Http/Controllers/UserController.php: -------------------------------------------------------------------------------- 1 | query('id'))); 25 | $userData = $this->tikTokScraperService->extractUsers($userIds); 26 | 27 | dispatch(new HandleDatabaseOperations($userData, null)); 28 | 29 | if (is_array($userData)) { 30 | $userData = User::hydrate($userData); 31 | } 32 | 33 | return $this->collection($userData, new UserTransformer, 'users'); 34 | } 35 | 36 | /** 37 | * @param Request $request 38 | * @param $id 39 | * @return array 40 | */ 41 | public function getUserVideos(Request $request, string $id): array 42 | { 43 | $userData = $this->tikTokScraperService->extractUsers([$id]); 44 | 45 | $videoIds = array_filter(explode(',', $request->query('id'))); 46 | $videoData = $this->tikTokScraperService->extractVideos($id, $videoIds); 47 | 48 | dispatch(new HandleDatabaseOperations($userData, $videoData, true)); 49 | 50 | if (is_array($videoData)) { 51 | $videoData = Video::hydrate($videoData); 52 | } 53 | 54 | return $this->collection($videoData, new VideoTransformer, 'videos'); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /app/Http/Response/FractalResponse.php: -------------------------------------------------------------------------------- 1 | manager = $manager; 27 | $this->serializer = $serializer; 28 | $this->manager->setSerializer($serializer); 29 | } 30 | 31 | public function item($data, TransformerAbstract $transformer, $resourceKey = null) 32 | { 33 | return $this->createDataArray( 34 | new Item($data, $transformer, $resourceKey) 35 | ); 36 | } 37 | 38 | public function collection($data, TransformerAbstract $transformer, $resourceKey = null) 39 | { 40 | return $this->createDataArray( 41 | new Collection($data, $transformer, $resourceKey) 42 | ); 43 | } 44 | 45 | private function createDataArray(ResourceInterface $resource) 46 | { 47 | return $this->manager->createData($resource)->toArray(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /app/Jobs/HandleDatabaseOperations.php: -------------------------------------------------------------------------------- 1 | users = $users; 25 | $this->videos = $videos; 26 | $this->exclude = $exclude; 27 | } 28 | 29 | /** 30 | * Execute the job. 31 | * 32 | * @return void 33 | */ 34 | public function handle() 35 | { 36 | $this->createOrUpdateUsers($this->users, $this->exclude); 37 | } 38 | 39 | /** 40 | * @param array $users 41 | * @param $exclude 42 | */ 43 | private function createOrUpdateUsers(array $users, bool $exclude) 44 | { 45 | DB::transaction(function () use ($users, $exclude) { 46 | foreach ($users as $user) { 47 | $user_id = ['short_name' => $user['short_name']]; 48 | $updatedUser = User::updateOrCreate($user_id, $user); 49 | 50 | $videos = $user['videos'] ?? []; 51 | $this->createOrUpdateVideos($videos, $updatedUser->id); 52 | } 53 | }, 3); 54 | } 55 | 56 | 57 | /** 58 | * @param $userId 59 | * @param array $videos 60 | */ 61 | private function createOrUpdateVideos(array $videos, $userId) 62 | { 63 | DB::transaction(function () use ($videos, $userId) { 64 | foreach ($videos as $video) { 65 | $video['user_id'] = $userId; 66 | $video_id = ['video_id' => $video['video_id']]; 67 | unset($video['video_id']); 68 | Video::updateOrCreate($video_id, $video); 69 | } 70 | }, 3); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /app/Jobs/Job.php: -------------------------------------------------------------------------------- 1 | hasMany('App\Models\Video')->select('video_id', 'user_id'); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /app/Models/Video.php: -------------------------------------------------------------------------------- 1 | belongsTo('App\Models\User'); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /app/Providers/AppServiceProvider.php: -------------------------------------------------------------------------------- 1 | app['auth']->viaRequest('api', function ($request) { 34 | if ($request->input('api_token')) { 35 | return User::where('api_token', $request->input('api_token'))->first(); 36 | } 37 | }); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /app/Providers/EventServiceProvider.php: -------------------------------------------------------------------------------- 1 | [ 16 | 'App\Listeners\ExampleListener', 17 | ], 18 | ]; 19 | } 20 | -------------------------------------------------------------------------------- /app/Providers/FractalServiceProvider.php: -------------------------------------------------------------------------------- 1 | app->bind( 16 | 'League\Fractal\Serializer\SerializerAbstract', 17 | 'League\Fractal\Serializer\DataArraySerializer' 18 | ); 19 | 20 | $this->app->bind(FractalResponse::class, function ($app) { 21 | $manager = new Manager(); 22 | $serializer = $app['League\Fractal\Serializer\SerializerAbstract']; 23 | 24 | return new FractalResponse($manager, $serializer); 25 | }); 26 | 27 | $this->app->alias(FractalResponse::class, 'fractal'); 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /app/Services/TikTokScraperService.php: -------------------------------------------------------------------------------- 1 | client = new Client([ 22 | 'base_uri' => config('tiktok.base_url'), 23 | 'headers' => [ 24 | 'User-Agent' => UserAgent::random() 25 | ] 26 | ]); 27 | } 28 | 29 | /** 30 | * @param array $userIds 31 | * @return array 32 | */ 33 | public function extractUsers(array $userIds) 34 | { 35 | $requests = function ($userIds) { 36 | foreach ($userIds as $userId) { 37 | yield $userId => function () use ($userId) { 38 | return $this->client->getAsync("/$userId"); 39 | }; 40 | } 41 | }; 42 | 43 | $this->scrapeTikTok(config('tiktok.script_user_path'), $requests, $userIds); 44 | 45 | return $this->userDetails; 46 | } 47 | 48 | 49 | /** 50 | * @param string $userId 51 | * @param array $videoIds 52 | * @return array 53 | */ 54 | public function extractVideos(string $userId, array $videoIds) 55 | { 56 | $requests = function ($videoIds) use ($userId) { 57 | foreach ($videoIds as $videoId) { 58 | if ((int)$videoId != 0) { 59 | yield $videoId => function () use ($userId, $videoId) { 60 | return $this->client->getAsync("/$userId/video/$videoId"); 61 | }; 62 | } 63 | } 64 | }; 65 | 66 | $this->scrapeTikTok(config('tiktok.script_video_path'), $requests, $videoIds); 67 | 68 | return $this->videoDetails; 69 | } 70 | 71 | /** 72 | * @param string $tiktokpath 73 | * @param Closure $requests 74 | * @param array $data 75 | */ 76 | private function scrapeTikTok(string $tiktokpath, Closure $requests, array $data) 77 | { 78 | $pool = new Pool($this->client, $requests($data), [ 79 | 'concurrency' => 10, 80 | 'fulfilled' => function (Response $response) use ($tiktokpath) { 81 | if ($response->getStatusCode() == 200) { 82 | $crawler = new Crawler((string)$response->getBody()); 83 | 84 | $scrappedData = $crawler->filter('script')->reduce(function (Crawler $node, $i) { 85 | return strpos($node->attr('id'), '__NEXT_DATA__') !== false; 86 | })->text(); 87 | $result = json_decode($scrappedData, true) ?? []; 88 | 89 | [$userData, $videoListInPartial, $enhancedVideoData] = $this->getDataFromResult('props', 90 | $result); 91 | 92 | if (!empty($userData)) { 93 | $user = $this->getUser($userData); 94 | $videos = $this->getUserVideos($videoListInPartial); 95 | $this->userDetails[] = array_merge($user, ['videos' => $videos]); 96 | } 97 | 98 | if (!empty($enhancedVideoData['uniqueId']) && !empty($enhancedVideoData['itemInfos'])) { 99 | $this->videoDetails[] = $this->getVideo($enhancedVideoData); 100 | } 101 | } 102 | }, 103 | 'rejected' => function ($reason) { 104 | Log::error('Promise Rejection', ['[tiktok scraper]' => $reason]); 105 | } 106 | ]); 107 | 108 | $promise = $pool->promise(); 109 | $promise->wait(); 110 | } 111 | 112 | 113 | /** 114 | * @param string $tiktokpath 115 | * @param array $result 116 | * @return array 117 | */ 118 | function getDataFromResult(string $tiktokpath, array $result): array 119 | { 120 | $userData = null; 121 | $videoListInPartial = null; 122 | $enhancedVideoData = null; 123 | 124 | if (array_key_exists($tiktokpath, $result)) { 125 | $userData = $result[$tiktokpath]['pageProps']['userData'] ?? null; 126 | $videoListInPartial = $result[$tiktokpath]['itemList'] ?? null; 127 | $metaData = [ 128 | 'uniqueId' => $result[$tiktokpath]['uniqueId'] ?? null, 129 | 'origin' => $result[$tiktokpath]['$origin'] ?? null, 130 | 'pageUrl' => $result[$tiktokpath]['$pageUrl'] ?? null 131 | ]; 132 | $videoData = $result[$tiktokpath]['videoData'] ?? []; 133 | $enhancedVideoData = array_merge($metaData, $videoData) ?? null; 134 | } 135 | return [$userData, $videoListInPartial, $enhancedVideoData]; 136 | } 137 | 138 | /** 139 | * @param $videoListInPartial 140 | * @return array 141 | */ 142 | function getUserVideos($videoListInPartial): array 143 | { 144 | $videos = array(); 145 | if (!empty($videoListInPartial)) { 146 | foreach ($videoListInPartial as $video) { 147 | $isValidUrl = filter_var($video['url'], FILTER_VALIDATE_URL); 148 | $splicedVideoUrl = $isValidUrl ? preg_split('[/]', $video['url'], 149 | -1, PREG_SPLIT_NO_EMPTY) : []; 150 | 151 | $videos[] = [ 152 | 'video_id' => (int)$splicedVideoUrl[4] ?? null, 153 | 'uid' => (int)$splicedVideoUrl[2] ?? null, 154 | 'url' => $video['url'] ?? null, 155 | 'background_image' => $video['thumbnailUrl'][0] ?? null, 156 | 'content_url' => $video['contentUrl'] ?? null, 157 | 'duration_in_second' => $this->ISO8601ToSeconds($video['duration']) ?? null, 158 | 'description' => $video['name'] ?? null, 159 | 'comment_count' => $video['commentCount'] ?? 0, 160 | 'like_count' => $video['interactionCount'] ?? 0 161 | ]; 162 | } 163 | } 164 | return $videos; 165 | } 166 | 167 | /** 168 | * @param $userData 169 | * @return array 170 | */ 171 | function getUser(array $userData): array 172 | { 173 | return [ 174 | 'short_name' => $userData['uniqueId'], 175 | 'full_name' => $userData['nickName'], 176 | 'is_verified' => $userData['verified'], 177 | 'biography' => $userData['signature'], 178 | 'avatar' => $userData['coversMedium'][0], 179 | 'following_count' => $userData['following'] ?? 0, 180 | 'fan_count' => $userData['fans'] ?? 0, 181 | 'heart_count' => $userData['heart'] ?? 0, 182 | 'video_count' => $userData['video'] ?? 0 183 | ]; 184 | } 185 | 186 | /** 187 | * @param $video 188 | * @return array 189 | */ 190 | function getVideo(array $video): array 191 | { 192 | return [ 193 | 'video_id' => (int)$video['itemInfos']['id'] ?? null, 194 | 'uid' => $video['uniqueId'] ?? null, 195 | 'url' => $video['origin'] . $video['pageUrl'] ?? null, 196 | 'background_image' => $video['itemInfos']['covers'][0] ?? null, 197 | 'content_url' => $video['itemInfos']['video']['urls'][0] ?? null, 198 | 'duration_in_second' => $video['itemInfos']['video']['videoMeta']['duration'] ?? null, 199 | 'sound_name' => $video['musicInfos']['musicName'] ?? null, 200 | 'description' => $video['itemInfos']['text'] ?? null, 201 | 'comment_count' => $video['itemInfos']['commentCount'] ?? 0, 202 | 'like_count' => $video['itemInfos']['diggCount'] ?? 0 203 | ]; 204 | } 205 | 206 | 207 | /** 208 | * @param string $ISO8601 209 | * @return int 210 | */ 211 | function ISO8601ToSeconds(string $ISO8601): int 212 | { 213 | try { 214 | $interval = new \DateInterval($ISO8601); 215 | return ($interval->d * 24 * 60 * 60) + 216 | ($interval->h * 60 * 60) + 217 | ($interval->i * 60) + 218 | $interval->s; 219 | } catch (\Exception $e) { 220 | Log::warning('[ISO8601ToSeconds]', ['message' => $e->getMessage()]); 221 | } 222 | 223 | return 0; 224 | } 225 | 226 | } 227 | -------------------------------------------------------------------------------- /app/Transformers/UserTransformer.php: -------------------------------------------------------------------------------- 1 | '@' . $user->short_name, 19 | 'full_name' => $user->full_name, 20 | 'is_verified' => (boolean)$user->is_verified, 21 | 'bio' => $user->biography, 22 | 'thumbnail_image' => $user->avatar, 23 | 'total' => [ 24 | 'fans' => $user->fan_count, 25 | 'hearts' => (int) $user->heart_count, 26 | 'followings' => $user->following_count, 27 | 'videos' => $user->video_count 28 | ] 29 | ]; 30 | } 31 | 32 | public function includeVideo(User $user) 33 | { 34 | $video = $user->videos; 35 | 36 | if (is_array($video)) { 37 | $video = Video::hydrate($video); 38 | } 39 | 40 | return $this->collection($video, new VideoTransformer(true)); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /app/Transformers/VideoTransformer.php: -------------------------------------------------------------------------------- 1 | shorten = $shorten; 16 | } 17 | 18 | public function transform(Video $video) 19 | { 20 | if ($this->shorten) { 21 | return [ 22 | 'video_id' => $video->video_id 23 | ]; 24 | } 25 | 26 | return [ 27 | 'url' => $video->url, 28 | 'upload_data' => $video->content_url, 29 | 'duration' => $video->duration_in_second, 30 | 'sound' => $video->sound_name, 31 | 'description' => $video->description, 32 | 'thumbnail_image' => $video->background_image, 33 | 'total' => [ 34 | 'likes' => $video->like_count, 35 | 'comments' => $video->comment_count, 36 | ] 37 | ]; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /artisan: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | make( 32 | 'Illuminate\Contracts\Console\Kernel' 33 | ); 34 | 35 | exit($kernel->handle(new ArgvInput, new ConsoleOutput)); 36 | -------------------------------------------------------------------------------- /bootstrap/app.php: -------------------------------------------------------------------------------- 1 | bootstrap(); 11 | 12 | /* 13 | |-------------------------------------------------------------------------- 14 | | Create The Application 15 | |-------------------------------------------------------------------------- 16 | | 17 | | Here we will load the environment and create the application instance 18 | | that serves as the central piece of this framework. We'll use this 19 | | application as an "IoC" container and router for this framework. 20 | | 21 | */ 22 | 23 | $app = new Laravel\Lumen\Application( 24 | dirname(__DIR__) 25 | ); 26 | 27 | $app->withFacades(); 28 | 29 | $app->withEloquent(); 30 | 31 | /* 32 | |-------------------------------------------------------------------------- 33 | | Register Container Bindings 34 | |-------------------------------------------------------------------------- 35 | | 36 | | Now we will register a few bindings in the service container. We will 37 | | register the exception handler and the console kernel. You may add 38 | | your own bindings here if you like or you can make another file. 39 | | 40 | */ 41 | 42 | $app->singleton( 43 | Illuminate\Contracts\Debug\ExceptionHandler::class, 44 | App\Exceptions\Handler::class 45 | ); 46 | 47 | $app->singleton( 48 | Illuminate\Contracts\Console\Kernel::class, 49 | App\Console\Kernel::class 50 | ); 51 | 52 | /* 53 | |-------------------------------------------------------------------------- 54 | | Register Middleware 55 | |-------------------------------------------------------------------------- 56 | | 57 | | Next, we will register the middleware with the application. These can 58 | | be global middleware that run before and after each request into a 59 | | route or middleware that'll be assigned to some specific routes. 60 | | 61 | */ 62 | 63 | // $app->middleware([ 64 | // App\Http\Middleware\ExampleMiddleware::class 65 | // ]); 66 | 67 | // $app->routeMiddleware([ 68 | // 'auth' => App\Http\Middleware\Authenticate::class, 69 | // ]); 70 | 71 | /* 72 | |-------------------------------------------------------------------------- 73 | | Register Service Providers 74 | |-------------------------------------------------------------------------- 75 | | 76 | | Here we will register all of the application's service providers which 77 | | are used to bind services into the container. Service providers are 78 | | totally optional, so you are not required to uncomment this line. 79 | | 80 | */ 81 | 82 | // $app->register(App\Providers\AppServiceProvider::class); 83 | // $app->register(App\Providers\AuthServiceProvider::class); 84 | // $app->register(App\Providers\EventServiceProvider::class); 85 | 86 | $app->register(LumenGeneratorServiceProvider::class); 87 | $app->register(FractalServiceProvider::class); 88 | 89 | $app->configure('tiktok'); 90 | 91 | /* 92 | |-------------------------------------------------------------------------- 93 | | Load The Application Routes 94 | |-------------------------------------------------------------------------- 95 | | 96 | | Next we will include the routes file so that they can all be added to 97 | | the application. This will provide all of the URLs the application 98 | | can respond to, as well as the controllers that may handle them. 99 | | 100 | */ 101 | 102 | $app->router->group([ 103 | 'namespace' => 'App\Http\Controllers', 104 | ], function ($router) { 105 | require __DIR__ . '/../routes/web.php'; 106 | }); 107 | 108 | return $app; 109 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "laravel/lumen", 3 | "description": "The Laravel Lumen Framework.", 4 | "keywords": [ 5 | "framework", 6 | "laravel", 7 | "lumen" 8 | ], 9 | "license": "MIT", 10 | "type": "project", 11 | "require": { 12 | "php": ">=7.1.3", 13 | "ext-json": "*", 14 | "campo/random-user-agent": "^1.3", 15 | "flipbox/lumen-generator": "^5.6", 16 | "guzzlehttp/guzzle": "^6.5", 17 | "laravel/lumen-framework": "5.8.*", 18 | "spatie/laravel-fractal": "^5.5", 19 | "symfony/css-selector": "^4.3", 20 | "symfony/dom-crawler": "^4.3" 21 | }, 22 | "require-dev": { 23 | "fzaninotto/faker": "^1.4", 24 | "phpunit/phpunit": "^7.0", 25 | "mockery/mockery": "^1.0" 26 | }, 27 | "autoload": { 28 | "classmap": [ 29 | "database/seeds", 30 | "database/factories" 31 | ], 32 | "psr-4": { 33 | "App\\": "app/" 34 | } 35 | }, 36 | "autoload-dev": { 37 | "classmap": [ 38 | "tests/" 39 | ] 40 | }, 41 | "scripts": { 42 | "post-root-package-install": [ 43 | "@php -r \"file_exists('.env') || copy('.env.example', '.env');\"" 44 | ], 45 | "test": [ 46 | "phpunit" 47 | ] 48 | }, 49 | "config": { 50 | "preferred-install": "dist", 51 | "sort-packages": true, 52 | "optimize-autoloader": true 53 | }, 54 | "minimum-stability": "dev", 55 | "prefer-stable": true 56 | } 57 | -------------------------------------------------------------------------------- /config/testing/database.php: -------------------------------------------------------------------------------- 1 | 'sqlite', 5 | 'connections' => [ 6 | 'sqlite' => [ 7 | 'driver' => 'sqlite', 8 | 'database' => ':memory:', 9 | 'prefix' => '', 10 | ], 11 | ], 12 | ]; 13 | -------------------------------------------------------------------------------- /config/tiktok.php: -------------------------------------------------------------------------------- 1 | env('TIkTOK_URL', 'https://www.tiktok.com'), 4 | 'script_user_path' => env('SCRIPT_USER_PATH', '/@:uniqueId'), 5 | 'script_video_path' => env('SCRIPT_VIDEO_PATH', '/@:uniqueId/video/:id') 6 | ]; 7 | -------------------------------------------------------------------------------- /database/factories/ModelFactory.php: -------------------------------------------------------------------------------- 1 | bigIncrements('id'); 18 | $table->string('short_name')->unique(); 19 | $table->string('full_name'); 20 | $table->boolean('is_verified')->default(false); 21 | $table->string('biography')->nullable(); 22 | $table->string('avatar')->nullable(); 23 | $table->unsignedBigInteger('fan_count')->default(0); 24 | $table->unsignedBigInteger('heart_count')->default(0); 25 | $table->unsignedBigInteger('following_count')->default(0); 26 | $table->unsignedBigInteger('video_count')->default(0); 27 | $table->timestamps(); 28 | }); 29 | } 30 | 31 | /** 32 | * Reverse the migrations. 33 | * 34 | * @return void 35 | */ 36 | public function down() 37 | { 38 | Schema::dropIfExists('users'); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /database/migrations/2019_08_23_122708_create_videos_table.php: -------------------------------------------------------------------------------- 1 | bigIncrements('id'); 18 | $table->string('video_id')->unique(); 19 | $table->bigInteger('user_id')->unsigned(); 20 | $table->foreign('user_id')->references('id')->on('users'); 21 | $table->string('url'); 22 | $table->string('background_image'); 23 | $table->text('content_url'); 24 | $table->unsignedInteger('duration_in_second'); 25 | $table->string('description'); 26 | $table->string('sound_name')->nullable(); 27 | $table->unsignedBigInteger('like_count')->default(0); 28 | $table->unsignedBigInteger('comment_count')->default(0); 29 | $table->timestamps(); 30 | }); 31 | } 32 | 33 | /** 34 | * Reverse the migrations. 35 | * 36 | * @return void 37 | */ 38 | public function down() 39 | { 40 | Schema::dropIfExists('videos'); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /database/migrations/2019_08_25_224149_create_jobs_table.php: -------------------------------------------------------------------------------- 1 | bigIncrements('id'); 18 | $table->string('queue')->index(); 19 | $table->longText('payload'); 20 | $table->unsignedTinyInteger('attempts'); 21 | $table->unsignedInteger('reserved_at')->nullable(); 22 | $table->unsignedInteger('available_at'); 23 | $table->unsignedInteger('created_at'); 24 | }); 25 | } 26 | 27 | /** 28 | * Reverse the migrations. 29 | * 30 | * @return void 31 | */ 32 | public function down() 33 | { 34 | Schema::dropIfExists('jobs'); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /database/seeds/DatabaseSeeder.php: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | ./tests 14 | 15 | 16 | 17 | 18 | ./app 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /public/.htaccess: -------------------------------------------------------------------------------- 1 | 2 | 3 | Options -MultiViews -Indexes 4 | 5 | 6 | RewriteEngine On 7 | 8 | # Handle Authorization Header 9 | RewriteCond %{HTTP:Authorization} . 10 | RewriteRule .* - [E=HTTP_AUTHORIZATION:%{HTTP:Authorization}] 11 | 12 | # Redirect Trailing Slashes If Not A Folder... 13 | RewriteCond %{REQUEST_FILENAME} !-d 14 | RewriteCond %{REQUEST_URI} (.+)/$ 15 | RewriteRule ^ %1 [L,R=301] 16 | 17 | # Handle Front Controller... 18 | RewriteCond %{REQUEST_FILENAME} !-d 19 | RewriteCond %{REQUEST_FILENAME} !-f 20 | RewriteRule ^ index.php [L] 21 | 22 | -------------------------------------------------------------------------------- /public/index.php: -------------------------------------------------------------------------------- 1 | run(); 29 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ### TikTok Scraper™ 2 | 3 | The following project implements domcrawler and concurrent data retrival from tiktok website. Once promises are 4 | resolved, parses user and video related information and stores it to database for later consumption. 5 | 6 | Inorder to mitigate scraping process, all database related operations run in the background as jobs. 7 | 8 | ##### Installation: 9 | 10 | ``` 11 | git clone https://github.com/moseseth/tiktok-scraper.git 12 | 13 | cd tiktok-scraper 14 | 15 | cp .env.example .env // update DB_USERNAME , DB_PASSWORD & CREATE 'tiktok' database 16 | 17 | composer install 18 | 19 | php artisan migrate 20 | 21 | php artisan serve 22 | 23 | php artisan queue:work --tries=3 24 | ``` 25 | 26 | ##### Run Test: 27 | ```$xslt 28 | composer run-script test 29 | ``` 30 | 31 | ##### Usage Example: 32 | * http://localhost:8000/api/users?id=@wilczewska,@realmadrid 33 | * http://localhost:8000/api/users/@wilczewska/videos?id=6727979845919214853,6722754487129246982 34 | 35 | 36 | ### Ongoing updates: 37 | https://github.com/moseseth/tiktok-scraper/blob/master/CHANGELOG.md 38 | 39 | 40 | Glossary : `#PHP 7.2, #MySQL, #Queue, #Guzzle 6.0, #Lumen` 41 | -------------------------------------------------------------------------------- /resources/views/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moseseth/tiktok-scraper/7c580031e4ea73c3c4839990cc2970d6dcea7f18/resources/views/.gitkeep -------------------------------------------------------------------------------- /routes/web.php: -------------------------------------------------------------------------------- 1 | get('/', function () use ($router) { 15 | return $router->app->version(); 16 | }); 17 | 18 | $router->group(['prefix' => 'api'], function () use ($router) { 19 | $router->get('users', 'UserController@getUsersProfile'); 20 | $router->get('users/{id}/videos', 'UserController@getUserVideos'); 21 | }); 22 | -------------------------------------------------------------------------------- /storage/app/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /storage/framework/cache/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !data/ 3 | !.gitignore 4 | -------------------------------------------------------------------------------- /storage/framework/cache/data/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /storage/framework/views/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /storage/logs/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /tests/TestCase.php: -------------------------------------------------------------------------------- 1 | get('/'); 16 | 17 | $this->assertEquals( 18 | $this->app->version(), $this->response->getContent() 19 | ); 20 | } 21 | 22 | public function testGetUsersProfileShouldReturnUserData() 23 | { 24 | $this->get('/api/users?id=@arsenal'); 25 | 26 | $this->seeStatusCode(200); 27 | $this->seeJsonStructure(["data" => ['*' => 28 | ["user_id", 29 | "full_name", 30 | "is_verified", 31 | "bio", 32 | "thumbnail_image", 33 | "total" => [ 34 | "fans", 35 | "hearts", 36 | "followings", 37 | "videos" 38 | ], 39 | "video" => [ 40 | "data" => [ 41 | '*' => [ 42 | "video_id" 43 | ] 44 | ] 45 | ]]]]); 46 | } 47 | 48 | public function testGetUsersProfileShouldReturnNullWithInvalidOrNonExistingId() 49 | { 50 | $this->get('/api/users?id=111'); 51 | 52 | $this->seeStatusCode(200); 53 | $this->seeJsonStructure([ 54 | "data" => [] 55 | ]); 56 | } 57 | 58 | public function testGetUserVideoShouldReturnEmptyOnNonExistingId() 59 | { 60 | $this->get('/api/users/@lorengray/videos?id=xxx'); 61 | 62 | $this->seeStatusCode(200); 63 | $this->seeJsonStructure([ 64 | "data" => [] 65 | ]); 66 | } 67 | 68 | public function testGetUserVideoShouldReturnVideoData() 69 | { 70 | $this->get('/api/users/@wilczewska/videos?id=6728320568954768646'); 71 | 72 | $this->seeStatusCode(200); 73 | $this->seeJsonStructure([ 74 | "data" => ['*' => [ 75 | 'url', 76 | 'upload_data', 77 | 'duration', 78 | 'sound', 79 | 'description', 80 | 'thumbnail_image', 81 | 'total' => [ 82 | 'likes', 83 | 'comments', 84 | ] 85 | ]] 86 | ]); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /tests/app/Jobs/HandleDatabaseJobTest.php: -------------------------------------------------------------------------------- 1 | faker = Faker::create(); 23 | $this->expectsJobs(HandleDatabaseOperations::class); 24 | 25 | $this->users = [ 26 | 'short_name' => 'realmadrid', 27 | 'full_name' => $this->faker->name, 28 | 'is_verified' => $this->faker->boolean, 29 | 'biography' => $this->faker->text, 30 | 'avatar' => $this->faker->imageUrl(), 31 | 'following_count' => $this->faker->numberBetween(10, 300), 32 | 'fan_count' => $this->faker->numberBetween(10, 300), 33 | 'heart_count' => $this->faker->numberBetween(10, 300), 34 | 'video_count' => $this->faker->numberBetween(10, 300), 35 | 'video' => [] 36 | ]; 37 | } 38 | 39 | public function testHandlerDispatchedFromUserRoute() 40 | { 41 | dispatch(new HandleDatabaseOperations($this->users, null)); 42 | 43 | $this->beginDatabaseTransaction(); 44 | User::updateOrCreate(['short_name' => $this->users['short_name']], $this->users); 45 | 46 | $this->seeInDatabase('users', [ 47 | 'short_name' => 'realmadrid' 48 | ]); 49 | } 50 | 51 | public function testHandlerDispatchedFromVideoRoute() 52 | { 53 | $videos = [ 54 | 'video_id' => 6728320568954768646, 55 | 'uid' => 'realmadrid', 56 | 'url' => $this->faker->url, 57 | 'background_image' => $this->faker->imageUrl(), 58 | 'content_url' => $this->faker->image(), 59 | 'duration_in_second' => $this->faker->time('s'), 60 | 'sound_name' => $this->faker->sentence, 61 | 'description' => $this->faker->sentence, 62 | 'comment_count' => $this->faker->randomDigit, 63 | 'like_count' => $this->faker->randomDigit 64 | ]; 65 | 66 | dispatch(new HandleDatabaseOperations($this->users, $videos, true)); 67 | 68 | $this->beginDatabaseTransaction(); 69 | $user = User::updateOrCreate(['short_name' => $videos['uid']], $this->users); 70 | $videos['user_id'] = $user->id; 71 | Video::updateOrCreate(['video_id' => $videos['video_id']], $videos); 72 | 73 | $this->seeInDatabase('videos', [ 74 | 'video_id' => 6728320568954768646 75 | ]); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /tests/app/Services/TikTokScraperTest.php: -------------------------------------------------------------------------------- 1 | scraperService = new TikTokScraperService(); 17 | } 18 | 19 | public function testISO8601ToSecondsShouldReturnValidConversion() 20 | { 21 | $this->assertEquals(30, $this->scraperService->ISO8601ToSeconds('PT30S')); 22 | } 23 | 24 | public function testISO8601ToSecondsShouldReturnZeroWithInvalidParameter() 25 | { 26 | $this->assertEquals(0, $this->scraperService->ISO8601ToSeconds('834784')); 27 | } 28 | 29 | public function testAttributes() { 30 | $this->assertClassHasAttribute('userDetails', TikTokScraperService::class); 31 | $this->assertClassHasAttribute('videoDetails', TikTokScraperService::class); 32 | } 33 | } 34 | --------------------------------------------------------------------------------