├── CHANGELOG.md
├── LICENSE
├── README.md
├── composer.json
├── composer.lock
├── composer.phar
├── crawlbase-api.php
├── src
├── base-api.php
├── crawling-api.php
├── leads-api.php
├── scraper-api.php
├── screenshots-api.php
└── storage-api.php
├── test.php
└── vendor
├── autoload.php
└── composer
├── ClassLoader.php
├── InstalledVersions.php
├── LICENSE
├── autoload_classmap.php
├── autoload_namespaces.php
├── autoload_psr4.php
├── autoload_real.php
├── autoload_static.php
├── installed.json
├── installed.php
└── platform_check.php
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Library changelog
2 |
3 | Only major versions or things to communicate are written here. Minor changes or bug fixes might not appear in the changelog.
4 |
5 | ## 3.0.0
6 |
7 | Adds Screenshots API and Storage API.
8 | We have refactored the base class to allow for future development, there shouldn't be breaking changes but we have decided to release a new major version so you are aware and report if something breaks for your case.
9 |
10 | ## 2.0.0
11 |
12 | Version 2 deprecates the usage of CrawlbaseAPI (although is still usable but will be removed in future versions) in favour of Crawlbase\CrawlingAPI. Please test the upgrade before deploying to production.
13 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Crawlbase API PHP class
2 |
3 | A lightweight, dependency free PHP class that acts as wrapper for Crawlbase API.
4 |
5 | ## Installing
6 |
7 | Choose a way of installing:
8 |
9 | - Use [Packagist](https://packagist.org/packages/crawlbase/crawlbase) PHP package manager.
10 | - Download the project from Github and save it into your project so you can require it `require_once('crawlbase-php/src/[class].php')`
11 |
12 | ## Crawling API
13 |
14 | First initialize the CrawlingAPI class. You can [get your free token here](https://crawlbase.com/signup?signup=github).
15 |
16 | ```php
17 | $api = new Crawlbase\CrawlingAPI(['token' => 'YOUR_TOKEN']);
18 | ```
19 |
20 | ### GET requests
21 |
22 | Pass the url that you want to scrape plus any options from the ones available in the [API documentation](https://crawlbase.com/docs/crawling-api/).
23 |
24 | ```php
25 | $api->get(string $url, array $options = []);
26 | ```
27 |
28 | Example:
29 |
30 | ```php
31 | $response = $api->get('https://www.facebook.com/britneyspears');
32 | if ($response->statusCode === 200) {
33 | echo $response->body;
34 | }
35 | ```
36 |
37 | You can pass any options from Crawlbase API.
38 |
39 | Example:
40 |
41 | ```php
42 | $response = $api->get('https://www.reddit.com/r/pics/comments/5bx4bx/thanks_obama/', [
43 | 'user_agent' => 'Mozilla/5.0 (Windows NT 6.2; rv:20.0) Gecko/20121202 Firefox/30.0',
44 | 'format' => 'json'
45 | ]);
46 | if ($response->statusCode === 200) {
47 | echo $response->body;
48 | }
49 | ```
50 |
51 | Optionally pass [store](https://crawlbase.com/docs/crawling-api/parameters/#store) parameter to `true` to store a copy of the API response in the [Crawlbase Cloud Storage](https://crawlbase.com/dashboard/storage).
52 |
53 | Example:
54 |
55 | ```php
56 | $response = $api->get('https://www.reddit.com/r/pics/comments/5bx4bx/thanks_obama/', [
57 | 'store' => true
58 | ]);
59 |
60 | if ($response->statusCode === 200) {
61 | echo 'storage url: ' . $response->headers->storage_url . PHP_EOL;
62 | }
63 | ```
64 |
65 | ### POST requests
66 |
67 | Pass the url that you want to scrape, the data that you want to send which can be either a json or a string, plus any options from the ones available in the [API documentation](https://crawlbase.com/docs/crawling-api/).
68 |
69 | ```php
70 | $api->post(string $url, array or string $data, array options = []);
71 | ```
72 |
73 | Example:
74 |
75 | ```php
76 | $response = $api->post('https://producthunt.com/search', ['text' => 'example search']);
77 | if ($response->statusCode === 200) {
78 | echo $response->body;
79 | }
80 | ```
81 |
82 | You can send the data as `application/json` instead of `x-www-form-urlencoded` by setting option `post_content_type` as json.
83 |
84 | ```php
85 | $response = $api->post('https://httpbin.org/post', json_encode(['some_json' => 'with some value']), ['post_content_type' => 'json']);
86 | if ($response->statusCode === 200) {
87 | echo $response->body;
88 | }
89 | ```
90 |
91 | ### PUT requests
92 |
93 | Pass the url that you want to scrape, the data that you want to send which can be either a json or a string, plus any options from the ones available in the [API documentation](https://crawlbase.com/docs/crawling-api/).
94 |
95 | ```php
96 | $api->put(string $url, array or string $data, array options = []);
97 | ```
98 |
99 | Example:
100 |
101 | ```php
102 | $response = $api->put('https://producthunt.com/search', ['text' => 'example search']);
103 | if ($response->statusCode === 200) {
104 | echo $response->body;
105 | }
106 | ```
107 |
108 | ### Javascript requests
109 |
110 | If you need to scrape any website built with Javascript like React, Angular, Vue, etc. You just need to pass your javascript token and use the same calls. Note that only `->get` is available for javascript and not `->post`.
111 |
112 | ```php
113 | $api = new Crawlbase\CrawlingAPI(['token' => 'YOUR_JAVASCRIPT_TOKEN']);
114 | ```
115 |
116 | ```php
117 | $response = $api->get('https://www.nfl.com');
118 | if ($response->statusCode === 200) {
119 | echo $response->body;
120 | }
121 | ```
122 |
123 | Same way you can pass javascript additional options.
124 |
125 | ```php
126 | $response = $api->get('https://www.freelancer.com', ['page_wait' => 5000]);
127 | if ($response->statusCode === 200) {
128 | echo $response->body;
129 | }
130 | ```
131 |
132 | ## Original status
133 |
134 | You can always get the original status and crawlbase status from the response. Read the [Crawlbase documentation](https://crawlbase.com/docs/crawling-api/) to learn more about those status.
135 |
136 | ```php
137 | $response = $api->get('https://craiglist.com');
138 | echo $response->headers->original_status . PHP_EOL;
139 | echo $response->headers->pc_status . PHP_EOL;
140 | ```
141 |
142 | ## Scraper API
143 |
144 | First initialize the ScraperAPI class. You can [get your free token here](https://crawlbase.com/signup?signup=github). Please note that only some websites are supported, check the [API documentation](https://crawlbase.com/docs/scraper-api/) for more information.
145 |
146 | ```php
147 | $api = new Crawlbase\ScraperAPI(['token' => 'YOUR_TOKEN']);
148 | ```
149 |
150 | Pass the url that you want to scrape plus any options from the ones available in the [API documentation](https://crawlbase.com/docs/scraper-api/).
151 |
152 | Example:
153 |
154 | ```php
155 | $response = $api->get('https://www.amazon.com/DualSense-Wireless-Controller-PlayStation-5/dp/B08FC6C75Y/');
156 | echo 'status code: ' . $response->statusCode . PHP_EOL;
157 | if ($response->statusCode === 200) {
158 | var_dump($response->json); // Will print scraped Amazon details
159 | }
160 | ```
161 |
162 | ## Leads API
163 |
164 | First initialize the LeadsAPI class. You can [get your free token here](https://crawlbase.com/signup?signup=github).
165 |
166 | ```php
167 | $api = new Crawlbase\LeadsAPI(['token' => 'YOUR_TOKEN']);
168 | ```
169 |
170 | Pass the domain where you want to search for leads.
171 |
172 | Example:
173 |
174 | ```php
175 | $response = $api->getFromDomain('target.com');
176 | if ($response->statusCode === 200) {
177 | foreach ($response->json->leads as $key => $lead) {
178 | echo $lead->email . PHP_EOL;
179 | }
180 | }
181 | ```
182 |
183 | ## Screenshots API usage
184 |
185 | Initialize with your Screenshots API token and call the `get` method.
186 |
187 | ```php
188 | $api = new Crawlbase\ScreenshotsAPI(['token' => 'YOUR_TOKEN']);
189 | $response = $api->get('https://www.apple.com');
190 | echo 'success: ' . $response->headers->success . PHP_EOL;
191 | echo 'remaining requests: ' . $response->headers->remaining_requests . PHP_EOL;
192 | file_put_contents('apple.jpg', $response->body);
193 | ```
194 |
195 | or you can specify a callback that automatically saves the file to the temporary folder
196 |
197 | ```php
198 | $api = new Crawlbase\ScreenshotsAPI(['token' => 'YOUR_TOKEN']);
199 | $response = $api->get('https://www.apple.com', [
200 | 'callback' => function($filepath) {
201 | echo 'filepath: ' . $filepath . PHP_EOL;
202 | }
203 | ]);
204 | echo 'success: ' . $response->headers->success . PHP_EOL;
205 | echo 'remaining requests: ' . $response->headers->remaining_requests . PHP_EOL;
206 | ```
207 |
208 | or specifying a file path via `saveToPath` option
209 |
210 | ```php
211 | $api = new Crawlbase\ScreenshotsAPI(['token' => 'YOUR_TOKEN']);
212 | $response = $api->get('https://www.apple.com', [
213 | 'saveToPath' => 'apple.jpg',
214 | 'callback' => function($filepath) {
215 | echo 'filepath: ' . $filepath . PHP_EOL;
216 | }
217 | ]);
218 | echo 'success: ' . $response->headers->success . PHP_EOL;
219 | echo 'remaining requests: ' . $response->headers->remaining_requests . PHP_EOL;
220 | ```
221 |
222 | Note that `$api.get(url, options)` method accepts an [options](https://crawlbase.com/docs/screenshots-api/parameters)
223 |
224 | ## Storage API usage
225 |
226 | Initialize the Storage API using your private token.
227 |
228 | ```php
229 | $api = new Crawlbase\StorageAPI(['token' => 'YOUR_TOKEN']);
230 | ```
231 |
232 | Pass the [url](https://crawlbase.com/docs/storage-api/parameters/#url) that you want to get from [Crawlbase Storage](https://crawlbase.com/dashboard/storage).
233 |
234 | ```php
235 | $response = $api->get('https://www.apple.com');
236 |
237 | echo 'status code: ' . $response->statusCode . PHP_EOL;
238 | if ($response->statusCode === 200) {
239 | echo 'body: ' . $response->body . PHP_EOL;
240 | echo 'original status: ' . $response->headers->original_status . PHP_EOL;
241 | echo 'crawlbase status: ' . $response->headers->pc_status . PHP_EOL;
242 | echo 'rid: ' . $response->headers->rid . PHP_EOL;
243 | echo 'url: ' . $response->headers->url . PHP_EOL;
244 | echo 'stored date: ' . $response->headers->stored_at . PHP_EOL;
245 | }
246 | ```
247 |
248 | or you can use the [RID](https://crawlbase.com/docs/storage-api/parameters/#rid)
249 |
250 | ```php
251 | $response = $api->get('RID_REPLACE');
252 |
253 | echo 'status code: ' . $response->statusCode . PHP_EOL;
254 | if ($response->statusCode === 200) {
255 | echo 'body: ' . $response->body . PHP_EOL;
256 | echo 'original status: ' . $response->headers->original_status . PHP_EOL;
257 | echo 'crawlbase status: ' . $response->headers->pc_status . PHP_EOL;
258 | echo 'rid: ' . $response->headers->rid . PHP_EOL;
259 | echo 'url: ' . $response->headers->url . PHP_EOL;
260 | echo 'stored date: ' . $response->headers->stored_at . PHP_EOL;
261 | }
262 | ```
263 |
264 | Note: One of the two RID or URL must be sent. So both are optional but it's mandatory to send one of the two.
265 |
266 | ### [Delete](https://crawlbase.com/docs/storage-api/delete/) request
267 |
268 | To delete a storage item from your storage area, use the correct RID
269 |
270 | ```php
271 | if ($api->delete('RID_REPLACE')) {
272 | echo 'delete success' . PHP_EOL;
273 | echo 'status code: ' . $api->response->statusCode . PHP_EOL;
274 | } else {
275 | echo 'delete failed' . PHP_EOL;
276 | echo 'status code: ' . $api->response->statusCode . PHP_EOL;
277 | }
278 | ```
279 |
280 | ### [Bulk](https://crawlbase.com/docs/storage-api/bulk/) request
281 |
282 | To do a bulk request with a list of RIDs, please send the list of rids as an array
283 |
284 | ```php
285 | $items = $api->bulk(['RID1', 'RID2', 'RID3', ...]);
286 | foreach ($items as $item) {
287 | echo 'body: ' . $item->body . PHP_EOL;
288 | echo 'stored at: ' . $item->stored_at . PHP_EOL;
289 | echo 'original status: ' . $item->original_status . PHP_EOL;
290 | echo 'crawlbase status: ' . $item->pc_status . PHP_EOL;
291 | echo 'rid: ' . $item->rid . PHP_EOL;
292 | echo 'url: ' . $item->url . PHP_EOL;
293 | echo PHP_EOL;
294 | }
295 | ```
296 |
297 | ### [RIDs](https://crawlbase.com/docs/storage-api/rids) request
298 |
299 | To request a bulk list of RIDs from your storage area
300 |
301 | ```php
302 | $rids = $api->rids();
303 | foreach ($rids as $rid) {
304 | echo $rid . PHP_EOL;
305 | }
306 | ```
307 |
308 | You can also specify a limit as a parameter
309 |
310 | ```php
311 | $rids = $api->rids(10);
312 | ```
313 |
314 | ### [Total Count](https://crawlbase.com/docs/storage-api/total_count)
315 |
316 | To get the total number of documents in your storage area
317 |
318 | ```php
319 | $totalCount = $api->totalCount();
320 | echo 'total count: ' . $totalCount . PHP_EOL;
321 | ```
322 |
323 | If you have questions or need help using the library, please open an issue or [contact us](https://crawlbase.com/contact).
324 |
325 | ---
326 |
327 | Copyright 2025 Crawlbase
328 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "crawlbase/crawlbase",
3 | "description": "A lightweight, dependency free PHP class that acts as wrapper for Crawlbase API",
4 | "keywords": [
5 | "crawlbase",
6 | "scraping",
7 | "scraping api",
8 | "scraper",
9 | "scraper api",
10 | "crawler",
11 | "crawler api",
12 | "crawling",
13 | "crawling api",
14 | "leads",
15 | "leads api"
16 | ],
17 | "homepage": "https://github.com/crawlbase-source/crawlbase-php",
18 | "license": "Apache-2.0",
19 | "authors": [
20 | {
21 | "name": "Crawlbase",
22 | "email": "info@crawlbase.com",
23 | "homepage": "https://crawlbase.com"
24 | }
25 | ],
26 | "autoload": {
27 | "classmap": [
28 | "crawlbase-api.php",
29 | "src/crawling-api.php",
30 | "src/scraper-api.php"
31 | ]
32 | },
33 | "require": {
34 | "php": ">=5.4.0"
35 | },
36 | "scripts": {
37 | "test": "php test.php"
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/composer.lock:
--------------------------------------------------------------------------------
1 | {
2 | "_readme": [
3 | "This file locks the dependencies of your project to a known state",
4 | "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
5 | "This file is @generated automatically"
6 | ],
7 | "content-hash": "f1eea2db399ab6f8e7576dd763b5ebdd",
8 | "packages": [],
9 | "packages-dev": [],
10 | "aliases": [],
11 | "minimum-stability": "stable",
12 | "stability-flags": [],
13 | "prefer-stable": false,
14 | "prefer-lowest": false,
15 | "platform": {
16 | "php": ">=5.4.0"
17 | },
18 | "platform-dev": [],
19 | "plugin-api-version": "2.0.0"
20 | }
21 |
--------------------------------------------------------------------------------
/composer.phar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crawlbase/crawlbase-php/ee4ec17aa220c2f0758c62a39ab4e6eeac325bc1/composer.phar
--------------------------------------------------------------------------------
/crawlbase-api.php:
--------------------------------------------------------------------------------
1 | token = $options['token'];
31 |
32 | $this->apiBaseUrl = isset($options['apiBaseUrl']) ? $options['apiBaseUrl'] : static::PUBLIC_API_URL;
33 | unset($options['apiBaseUrl']);
34 |
35 | $this->options = $options;
36 |
37 | $this->setEndpoint();
38 | }
39 |
40 | public function __get($property) {
41 | $allowedProperties = array('response', 'token');
42 | if (property_exists($this, $property) && in_array($property, $allowedProperties, true)) {
43 | return $this->$property;
44 | }
45 | }
46 |
47 | protected function setEndpoint($newBasePath = null) {
48 | $path = isset($newBasePath) ? $newBasePath : $this->basePath;
49 | $this->endPointUrl = $this->apiBaseUrl . $path . '?token=' . $this->token;
50 | }
51 |
52 | protected function request(array $options = array(), $data = null) {
53 | $this->response = array();
54 | $this->response['headers'] = array();
55 | $url = $this->buildURL($options);
56 | $curl = curl_init();
57 |
58 | $beforeCallback = null;
59 | if (array_key_exists('beforeCurlExecCallback', $options) && is_callable($options['beforeCurlExecCallback'])) {
60 | $beforeCallback = $options['beforeCurlExecCallback'];
61 | }
62 | unset($options['beforeCurlExecCallback']);
63 |
64 | curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
65 | curl_setopt($curl, CURLOPT_URL, $url);
66 | curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); // Don't print the result
67 | curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, $this->timeout);
68 | curl_setopt($curl, CURLOPT_TIMEOUT, $this->timeout);
69 | curl_setopt($curl, CURLOPT_FAILONERROR, true);
70 | curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true); // Verify SSL connection
71 | curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2); // "" ""
72 | curl_setopt($curl, CURLOPT_HEADERFUNCTION, array(&$this, 'processResponseHeaders'));
73 |
74 | if ($this->advDebug) {
75 | curl_setopt($curl, CURLOPT_HEADER, true); // Display headers
76 | curl_setopt($curl, CURLINFO_HEADER_OUT, true); // Display output headers
77 | curl_setopt($curl, CURLOPT_VERBOSE, true); // Display communication with server
78 | }
79 |
80 | if (isset($options['method']) && $options['method'] === 'POST') {
81 | curl_setopt($curl, CURLOPT_POST, true);
82 | } else if (isset($options['method']) && $options['method'] === 'PUT') {
83 | curl_setopt($curl, CURLOPT_PUT, true);
84 | } else if (isset($options['method']) && $options['method'] === 'DELETE') {
85 | curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'DELETE');
86 | }
87 |
88 | if (!is_null($data) && ($options['method'] === 'POST' || $options['method'] === 'PUT')) {
89 | curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
90 | }
91 | try {
92 | if ($beforeCallback !== null) {
93 | $beforeCallback($curl);
94 | }
95 |
96 | $this->response['body'] = curl_exec($curl);
97 | $this->response['statusCode'] = curl_getinfo($curl, CURLINFO_HTTP_CODE);
98 |
99 | if (
100 | (!empty($this->response['headers']['Content-Type']) && $this->response['headers']['Content-Type'] === 'application/json; charset=utf-8') ||
101 | (!empty($options['format']) && $options['format'] === 'json')
102 | ) {
103 | $this->parseJsonResponse();
104 | }
105 |
106 | if ($this->debug || $this->advDebug) {
107 | $info = curl_getinfo($curl);
108 | echo '
';
109 | print_r($info);
110 | echo '
';
111 | if ($info['http_code'] == 0) {
112 | echo '
cURL error num: ' . curl_errno($curl);
113 | echo '
cURL error: ' . curl_error($curl);
114 | }
115 | echo '
Sent info:
';
116 | print_r($data);
117 | echo '
';
118 | }
119 | } catch (Exception $ex) {
120 | if ($this->debug || $this->advDebug) {
121 | echo '
cURL error num: ' . curl_errno($curl);
122 | echo '
cURL error: ' . curl_error($curl);
123 | }
124 | echo 'Error on cURL';
125 | $this->response = null;
126 | }
127 |
128 | curl_close($curl);
129 |
130 | // Cast to object for easier access
131 | $this->response = (object) $this->response;
132 | if (isset($this->response->headers)) {
133 | $this->response->headers = (object) $this->response->headers;
134 | }
135 |
136 | return $this->response;
137 | }
138 |
139 | private function buildURL(array $options) {
140 | $queryOptions = $options; // Copy the array.
141 | unset($queryOptions['method']);
142 | $options = http_build_query($queryOptions);
143 |
144 | return $this->endPointUrl . '&' . $options;
145 | }
146 |
147 | private function processResponseHeaders($curl, $header) {
148 | $headerSplit = preg_split('/:/', $header);
149 | $headerName = $headerSplit[0];
150 | unset($headerSplit[0]);
151 | $value = isset($headerSplit[1]) ? trim(implode(':', $headerSplit)) : '';
152 | if (is_numeric($value)) {
153 | $value = (int) $value;
154 | }
155 | $this->response['headers'][$headerName] = $value;
156 |
157 | return strlen($header);
158 | }
159 |
160 | protected function parseJsonResponse() {
161 | $json = json_decode($this->response['body']);
162 | if (!empty($json->original_status)) {
163 | $this->response['headers']['original_status'] = $json->original_status;
164 | $this->response['headers']['pc_status'] = $json->pc_status;
165 | $this->response['headers']['url'] = $json->url;
166 | }
167 | if (!empty($json->remaining_requests)) {
168 | $this->response['headers']['remaining_requests'] = $json->remaining_requests;
169 | }
170 | if (!empty($json->body)) {
171 | $this->response['json'] = $json->body;
172 | } else {
173 | $this->response['json'] = $json;
174 | }
175 | }
176 |
177 | }
178 |
--------------------------------------------------------------------------------
/src/crawling-api.php:
--------------------------------------------------------------------------------
1 | sanitizeStoreParam($options);
21 | return $this->request($options);
22 | }
23 |
24 | public function post($url, $data, array $options = array()) {
25 | if (!isset($url)) {
26 | throw new \Exception('Url must be provided');
27 | }
28 | $options['url'] = $url;
29 | $this->sanitizeStoreParam($options);
30 | if (!isset($options['method'])) {
31 | $options['method'] = 'POST';
32 | }
33 | if (is_array($data)) {
34 | $data = http_build_query($data);
35 | }
36 | return $this->request($options, $data);
37 | }
38 |
39 | public function put($url, $data, array $options = array()) {
40 | $options['method'] = 'PUT';
41 | return $this->post($url, $options, $data);
42 | }
43 |
44 | private function sanitizeStoreParam(&$options) {
45 | if (isset($options['store']) && $options['store'] === true) {
46 | $options['store'] = 'true';
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/leads-api.php:
--------------------------------------------------------------------------------
1 | request($options);
23 | }
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/src/scraper-api.php:
--------------------------------------------------------------------------------
1 | request($options);
20 | }
21 |
22 | public function post($url, $data, array $options = array()) {
23 | throw new \Exception('POST is not supported on the Scraper API');
24 | }
25 |
26 | public function put($url, $data, array $options = array()) {
27 | throw new \Exception('PUT is not supported on the Scraper API');
28 | }
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/src/screenshots-api.php:
--------------------------------------------------------------------------------
1 | request($options);
33 | if ($callback) {
34 | $filename = ($saveToPath !== null) ? $saveToPath : $this->generateFilename();
35 | file_put_contents($filename, $response->body);
36 | $callback($filename);
37 | }
38 | return $response;
39 | }
40 |
41 | private function generateFilename() {
42 | $tempName = tempnam(sys_get_temp_dir(), 'crawlbase-screenshot-');
43 | return "$tempName.jpg";
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/storage-api.php:
--------------------------------------------------------------------------------
1 | setEndpoint('storage');
25 | return $this->request($options);
26 | }
27 |
28 | public function delete($rid, array $options = array()) {
29 | if (empty($rid)) {
30 | throw new \Exception('One or more RIDs are required');
31 | }
32 | $options['rid'] = $rid;
33 | $options['method'] = 'DELETE';
34 | $this->setEndpoint('storage');
35 | $this->request($options);
36 |
37 | return $this->response->statusCode === 200;
38 | }
39 |
40 | public function bulk(array $rids, array $options = array()) {
41 | if (count($rids) === 0) {
42 | throw new \Exception('One or more RIDs are required');
43 | }
44 | $data = json_encode(['rids' => $rids]);
45 | $options['method'] = 'POST';
46 | $options['beforeCurlExecCallback'] = function($curl) {
47 | curl_setopt($curl, CURLOPT_HTTPHEADER, array(
48 | 'Content-Type: application/json'
49 | ));
50 | };
51 |
52 | $this->setEndpoint('storage/bulk');
53 | $this->request($options, $data);
54 | return $this->response->json;
55 | }
56 |
57 | public function rids($limit = -1, array $options = array()) {
58 | if ($limit > -1) {
59 | $options['limit'] = $limit;
60 | }
61 | $this->setEndpoint('storage/rids');
62 | $this->request($options);
63 | return $this->response->json;
64 | }
65 |
66 | public function totalCount(array $options = array()) {
67 | $this->setEndpoint('storage/total_count');
68 | $this->request($options);
69 | return $this->response->json->totalCount;
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/test.php:
--------------------------------------------------------------------------------
1 | statusCode === 200) {
11 | echo "Test passed\n";
12 | } else {
13 | echo "Test failed, expected statusCode 200 but got: " . $response->statusCode;
14 | exit(0);
15 | }
16 | }
17 |
18 | $normalAPI = new Crawlbase\CrawlingAPI(['token' => $normalToken]);
19 |
20 | processResponse($normalAPI->get('http://httpbin.org/anything?hello=world'));
21 |
22 | processResponse($normalAPI->get('http://httpbin.org/anything?useragent=test', ['user_agent' => 'Mozilla/5.0 (Windows NT 6.2; rv:20.0) Gecko/20121202 Firefox/20.0']));
23 |
24 | processResponse($normalAPI->get('http://httpbin.org/anything', ['format' => 'json']));
25 |
26 | processResponse($normalAPI->post('http://httpbin.org/post', ['hello' => 'post']));
27 |
28 | processResponse($normalAPI->post('http://httpbin.org/post', json_encode(['hello' => 'json']), ['post_content_type' => 'application/json']));
29 |
30 | processResponse($normalAPI->put('http://httpbin.org/put', ['hello' => 'put']));
31 |
32 | $javascriptAPI = new Crawlbase\CrawlingAPI(['token' => $javascriptToken]);
33 |
34 | processResponse($javascriptAPI->get('http://httpbin.org/anything?hello=world'));
35 |
36 | $scraperAPI = new Crawlbase\ScraperAPI(['token' => $normalToken]);
37 |
38 | processResponse($scraperAPI->get('https://www.amazon.com/DualSense-Wireless-Controller-PlayStation-5/dp/B08FC6C75Y/'));
39 |
40 | $leadsAPI = new Crawlbase\LeadsAPI(['token' => $normalToken]);
41 |
42 | processResponse($leadsAPI->getFromDomain('amazon.com'));
43 |
--------------------------------------------------------------------------------
/vendor/autoload.php:
--------------------------------------------------------------------------------
1 |
7 | * Jordi Boggiano
8 | *
9 | * For the full copyright and license information, please view the LICENSE
10 | * file that was distributed with this source code.
11 | */
12 |
13 | namespace Composer\Autoload;
14 |
15 | /**
16 | * ClassLoader implements a PSR-0, PSR-4 and classmap class loader.
17 | *
18 | * $loader = new \Composer\Autoload\ClassLoader();
19 | *
20 | * // register classes with namespaces
21 | * $loader->add('Symfony\Component', __DIR__.'/component');
22 | * $loader->add('Symfony', __DIR__.'/framework');
23 | *
24 | * // activate the autoloader
25 | * $loader->register();
26 | *
27 | * // to enable searching the include path (eg. for PEAR packages)
28 | * $loader->setUseIncludePath(true);
29 | *
30 | * In this example, if you try to use a class in the Symfony\Component
31 | * namespace or one of its children (Symfony\Component\Console for instance),
32 | * the autoloader will first look for the class under the component/
33 | * directory, and it will then fallback to the framework/ directory if not
34 | * found before giving up.
35 | *
36 | * This class is loosely based on the Symfony UniversalClassLoader.
37 | *
38 | * @author Fabien Potencier
39 | * @author Jordi Boggiano
40 | * @see https://www.php-fig.org/psr/psr-0/
41 | * @see https://www.php-fig.org/psr/psr-4/
42 | */
43 | class ClassLoader
44 | {
45 | // PSR-4
46 | private $prefixLengthsPsr4 = array();
47 | private $prefixDirsPsr4 = array();
48 | private $fallbackDirsPsr4 = array();
49 |
50 | // PSR-0
51 | private $prefixesPsr0 = array();
52 | private $fallbackDirsPsr0 = array();
53 |
54 | private $useIncludePath = false;
55 | private $classMap = array();
56 | private $classMapAuthoritative = false;
57 | private $missingClasses = array();
58 | private $apcuPrefix;
59 |
60 | public function getPrefixes()
61 | {
62 | if (!empty($this->prefixesPsr0)) {
63 | return call_user_func_array('array_merge', array_values($this->prefixesPsr0));
64 | }
65 |
66 | return array();
67 | }
68 |
69 | public function getPrefixesPsr4()
70 | {
71 | return $this->prefixDirsPsr4;
72 | }
73 |
74 | public function getFallbackDirs()
75 | {
76 | return $this->fallbackDirsPsr0;
77 | }
78 |
79 | public function getFallbackDirsPsr4()
80 | {
81 | return $this->fallbackDirsPsr4;
82 | }
83 |
84 | public function getClassMap()
85 | {
86 | return $this->classMap;
87 | }
88 |
89 | /**
90 | * @param array $classMap Class to filename map
91 | */
92 | public function addClassMap(array $classMap)
93 | {
94 | if ($this->classMap) {
95 | $this->classMap = array_merge($this->classMap, $classMap);
96 | } else {
97 | $this->classMap = $classMap;
98 | }
99 | }
100 |
101 | /**
102 | * Registers a set of PSR-0 directories for a given prefix, either
103 | * appending or prepending to the ones previously set for this prefix.
104 | *
105 | * @param string $prefix The prefix
106 | * @param array|string $paths The PSR-0 root directories
107 | * @param bool $prepend Whether to prepend the directories
108 | */
109 | public function add($prefix, $paths, $prepend = false)
110 | {
111 | if (!$prefix) {
112 | if ($prepend) {
113 | $this->fallbackDirsPsr0 = array_merge(
114 | (array) $paths,
115 | $this->fallbackDirsPsr0
116 | );
117 | } else {
118 | $this->fallbackDirsPsr0 = array_merge(
119 | $this->fallbackDirsPsr0,
120 | (array) $paths
121 | );
122 | }
123 |
124 | return;
125 | }
126 |
127 | $first = $prefix[0];
128 | if (!isset($this->prefixesPsr0[$first][$prefix])) {
129 | $this->prefixesPsr0[$first][$prefix] = (array) $paths;
130 |
131 | return;
132 | }
133 | if ($prepend) {
134 | $this->prefixesPsr0[$first][$prefix] = array_merge(
135 | (array) $paths,
136 | $this->prefixesPsr0[$first][$prefix]
137 | );
138 | } else {
139 | $this->prefixesPsr0[$first][$prefix] = array_merge(
140 | $this->prefixesPsr0[$first][$prefix],
141 | (array) $paths
142 | );
143 | }
144 | }
145 |
146 | /**
147 | * Registers a set of PSR-4 directories for a given namespace, either
148 | * appending or prepending to the ones previously set for this namespace.
149 | *
150 | * @param string $prefix The prefix/namespace, with trailing '\\'
151 | * @param array|string $paths The PSR-4 base directories
152 | * @param bool $prepend Whether to prepend the directories
153 | *
154 | * @throws \InvalidArgumentException
155 | */
156 | public function addPsr4($prefix, $paths, $prepend = false)
157 | {
158 | if (!$prefix) {
159 | // Register directories for the root namespace.
160 | if ($prepend) {
161 | $this->fallbackDirsPsr4 = array_merge(
162 | (array) $paths,
163 | $this->fallbackDirsPsr4
164 | );
165 | } else {
166 | $this->fallbackDirsPsr4 = array_merge(
167 | $this->fallbackDirsPsr4,
168 | (array) $paths
169 | );
170 | }
171 | } elseif (!isset($this->prefixDirsPsr4[$prefix])) {
172 | // Register directories for a new namespace.
173 | $length = strlen($prefix);
174 | if ('\\' !== $prefix[$length - 1]) {
175 | throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
176 | }
177 | $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
178 | $this->prefixDirsPsr4[$prefix] = (array) $paths;
179 | } elseif ($prepend) {
180 | // Prepend directories for an already registered namespace.
181 | $this->prefixDirsPsr4[$prefix] = array_merge(
182 | (array) $paths,
183 | $this->prefixDirsPsr4[$prefix]
184 | );
185 | } else {
186 | // Append directories for an already registered namespace.
187 | $this->prefixDirsPsr4[$prefix] = array_merge(
188 | $this->prefixDirsPsr4[$prefix],
189 | (array) $paths
190 | );
191 | }
192 | }
193 |
194 | /**
195 | * Registers a set of PSR-0 directories for a given prefix,
196 | * replacing any others previously set for this prefix.
197 | *
198 | * @param string $prefix The prefix
199 | * @param array|string $paths The PSR-0 base directories
200 | */
201 | public function set($prefix, $paths)
202 | {
203 | if (!$prefix) {
204 | $this->fallbackDirsPsr0 = (array) $paths;
205 | } else {
206 | $this->prefixesPsr0[$prefix[0]][$prefix] = (array) $paths;
207 | }
208 | }
209 |
210 | /**
211 | * Registers a set of PSR-4 directories for a given namespace,
212 | * replacing any others previously set for this namespace.
213 | *
214 | * @param string $prefix The prefix/namespace, with trailing '\\'
215 | * @param array|string $paths The PSR-4 base directories
216 | *
217 | * @throws \InvalidArgumentException
218 | */
219 | public function setPsr4($prefix, $paths)
220 | {
221 | if (!$prefix) {
222 | $this->fallbackDirsPsr4 = (array) $paths;
223 | } else {
224 | $length = strlen($prefix);
225 | if ('\\' !== $prefix[$length - 1]) {
226 | throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
227 | }
228 | $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
229 | $this->prefixDirsPsr4[$prefix] = (array) $paths;
230 | }
231 | }
232 |
233 | /**
234 | * Turns on searching the include path for class files.
235 | *
236 | * @param bool $useIncludePath
237 | */
238 | public function setUseIncludePath($useIncludePath)
239 | {
240 | $this->useIncludePath = $useIncludePath;
241 | }
242 |
243 | /**
244 | * Can be used to check if the autoloader uses the include path to check
245 | * for classes.
246 | *
247 | * @return bool
248 | */
249 | public function getUseIncludePath()
250 | {
251 | return $this->useIncludePath;
252 | }
253 |
254 | /**
255 | * Turns off searching the prefix and fallback directories for classes
256 | * that have not been registered with the class map.
257 | *
258 | * @param bool $classMapAuthoritative
259 | */
260 | public function setClassMapAuthoritative($classMapAuthoritative)
261 | {
262 | $this->classMapAuthoritative = $classMapAuthoritative;
263 | }
264 |
265 | /**
266 | * Should class lookup fail if not found in the current class map?
267 | *
268 | * @return bool
269 | */
270 | public function isClassMapAuthoritative()
271 | {
272 | return $this->classMapAuthoritative;
273 | }
274 |
275 | /**
276 | * APCu prefix to use to cache found/not-found classes, if the extension is enabled.
277 | *
278 | * @param string|null $apcuPrefix
279 | */
280 | public function setApcuPrefix($apcuPrefix)
281 | {
282 | $this->apcuPrefix = function_exists('apcu_fetch') && filter_var(ini_get('apc.enabled'), FILTER_VALIDATE_BOOLEAN) ? $apcuPrefix : null;
283 | }
284 |
285 | /**
286 | * The APCu prefix in use, or null if APCu caching is not enabled.
287 | *
288 | * @return string|null
289 | */
290 | public function getApcuPrefix()
291 | {
292 | return $this->apcuPrefix;
293 | }
294 |
295 | /**
296 | * Registers this instance as an autoloader.
297 | *
298 | * @param bool $prepend Whether to prepend the autoloader or not
299 | */
300 | public function register($prepend = false)
301 | {
302 | spl_autoload_register(array($this, 'loadClass'), true, $prepend);
303 | }
304 |
305 | /**
306 | * Unregisters this instance as an autoloader.
307 | */
308 | public function unregister()
309 | {
310 | spl_autoload_unregister(array($this, 'loadClass'));
311 | }
312 |
313 | /**
314 | * Loads the given class or interface.
315 | *
316 | * @param string $class The name of the class
317 | * @return bool|null True if loaded, null otherwise
318 | */
319 | public function loadClass($class)
320 | {
321 | if ($file = $this->findFile($class)) {
322 | includeFile($file);
323 |
324 | return true;
325 | }
326 | }
327 |
328 | /**
329 | * Finds the path to the file where the class is defined.
330 | *
331 | * @param string $class The name of the class
332 | *
333 | * @return string|false The path if found, false otherwise
334 | */
335 | public function findFile($class)
336 | {
337 | // class map lookup
338 | if (isset($this->classMap[$class])) {
339 | return $this->classMap[$class];
340 | }
341 | if ($this->classMapAuthoritative || isset($this->missingClasses[$class])) {
342 | return false;
343 | }
344 | if (null !== $this->apcuPrefix) {
345 | $file = apcu_fetch($this->apcuPrefix.$class, $hit);
346 | if ($hit) {
347 | return $file;
348 | }
349 | }
350 |
351 | $file = $this->findFileWithExtension($class, '.php');
352 |
353 | // Search for Hack files if we are running on HHVM
354 | if (false === $file && defined('HHVM_VERSION')) {
355 | $file = $this->findFileWithExtension($class, '.hh');
356 | }
357 |
358 | if (null !== $this->apcuPrefix) {
359 | apcu_add($this->apcuPrefix.$class, $file);
360 | }
361 |
362 | if (false === $file) {
363 | // Remember that this class does not exist.
364 | $this->missingClasses[$class] = true;
365 | }
366 |
367 | return $file;
368 | }
369 |
370 | private function findFileWithExtension($class, $ext)
371 | {
372 | // PSR-4 lookup
373 | $logicalPathPsr4 = strtr($class, '\\', DIRECTORY_SEPARATOR) . $ext;
374 |
375 | $first = $class[0];
376 | if (isset($this->prefixLengthsPsr4[$first])) {
377 | $subPath = $class;
378 | while (false !== $lastPos = strrpos($subPath, '\\')) {
379 | $subPath = substr($subPath, 0, $lastPos);
380 | $search = $subPath . '\\';
381 | if (isset($this->prefixDirsPsr4[$search])) {
382 | $pathEnd = DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $lastPos + 1);
383 | foreach ($this->prefixDirsPsr4[$search] as $dir) {
384 | if (file_exists($file = $dir . $pathEnd)) {
385 | return $file;
386 | }
387 | }
388 | }
389 | }
390 | }
391 |
392 | // PSR-4 fallback dirs
393 | foreach ($this->fallbackDirsPsr4 as $dir) {
394 | if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr4)) {
395 | return $file;
396 | }
397 | }
398 |
399 | // PSR-0 lookup
400 | if (false !== $pos = strrpos($class, '\\')) {
401 | // namespaced class name
402 | $logicalPathPsr0 = substr($logicalPathPsr4, 0, $pos + 1)
403 | . strtr(substr($logicalPathPsr4, $pos + 1), '_', DIRECTORY_SEPARATOR);
404 | } else {
405 | // PEAR-like class name
406 | $logicalPathPsr0 = strtr($class, '_', DIRECTORY_SEPARATOR) . $ext;
407 | }
408 |
409 | if (isset($this->prefixesPsr0[$first])) {
410 | foreach ($this->prefixesPsr0[$first] as $prefix => $dirs) {
411 | if (0 === strpos($class, $prefix)) {
412 | foreach ($dirs as $dir) {
413 | if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
414 | return $file;
415 | }
416 | }
417 | }
418 | }
419 | }
420 |
421 | // PSR-0 fallback dirs
422 | foreach ($this->fallbackDirsPsr0 as $dir) {
423 | if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
424 | return $file;
425 | }
426 | }
427 |
428 | // PSR-0 include paths.
429 | if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) {
430 | return $file;
431 | }
432 |
433 | return false;
434 | }
435 | }
436 |
437 | /**
438 | * Scope isolated include.
439 | *
440 | * Prevents access to $this/self from included files.
441 | */
442 | function includeFile($file)
443 | {
444 | include $file;
445 | }
446 |
--------------------------------------------------------------------------------
/vendor/composer/InstalledVersions.php:
--------------------------------------------------------------------------------
1 |
26 | array (
27 | 'pretty_version' => 'dev-master',
28 | 'version' => 'dev-master',
29 | 'aliases' =>
30 | array (
31 | ),
32 | 'reference' => '60d523539ad0ba56c7ef94b02d0e666fe398f56e',
33 | 'name' => 'crawlbase/crawlbase',
34 | ),
35 | 'versions' =>
36 | array (
37 | 'crawlbase/crawlbase' =>
38 | array (
39 | 'pretty_version' => 'dev-master',
40 | 'version' => 'dev-master',
41 | 'aliases' =>
42 | array (
43 | ),
44 | 'reference' => '60d523539ad0ba56c7ef94b02d0e666fe398f56e',
45 | ),
46 | ),
47 | );
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 | public static function getInstalledPackages()
56 | {
57 | return array_keys(self::$installed['versions']);
58 | }
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 | public static function isInstalled($packageName)
69 | {
70 | return isset(self::$installed['versions'][$packageName]);
71 | }
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 | public static function satisfies(VersionParser $parser, $packageName, $constraint)
87 | {
88 | $constraint = $parser->parseConstraints($constraint);
89 | $provided = $parser->parseConstraints(self::getVersionRanges($packageName));
90 |
91 | return $provided->matches($constraint);
92 | }
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 | public static function getVersionRanges($packageName)
104 | {
105 | if (!isset(self::$installed['versions'][$packageName])) {
106 | throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
107 | }
108 |
109 | $ranges = array();
110 | if (isset(self::$installed['versions'][$packageName]['pretty_version'])) {
111 | $ranges[] = self::$installed['versions'][$packageName]['pretty_version'];
112 | }
113 | if (array_key_exists('aliases', self::$installed['versions'][$packageName])) {
114 | $ranges = array_merge($ranges, self::$installed['versions'][$packageName]['aliases']);
115 | }
116 | if (array_key_exists('replaced', self::$installed['versions'][$packageName])) {
117 | $ranges = array_merge($ranges, self::$installed['versions'][$packageName]['replaced']);
118 | }
119 | if (array_key_exists('provided', self::$installed['versions'][$packageName])) {
120 | $ranges = array_merge($ranges, self::$installed['versions'][$packageName]['provided']);
121 | }
122 |
123 | return implode(' || ', $ranges);
124 | }
125 |
126 |
127 |
128 |
129 |
130 | public static function getVersion($packageName)
131 | {
132 | if (!isset(self::$installed['versions'][$packageName])) {
133 | throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
134 | }
135 |
136 | if (!isset(self::$installed['versions'][$packageName]['version'])) {
137 | return null;
138 | }
139 |
140 | return self::$installed['versions'][$packageName]['version'];
141 | }
142 |
143 |
144 |
145 |
146 |
147 | public static function getPrettyVersion($packageName)
148 | {
149 | if (!isset(self::$installed['versions'][$packageName])) {
150 | throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
151 | }
152 |
153 | if (!isset(self::$installed['versions'][$packageName]['pretty_version'])) {
154 | return null;
155 | }
156 |
157 | return self::$installed['versions'][$packageName]['pretty_version'];
158 | }
159 |
160 |
161 |
162 |
163 |
164 | public static function getReference($packageName)
165 | {
166 | if (!isset(self::$installed['versions'][$packageName])) {
167 | throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
168 | }
169 |
170 | if (!isset(self::$installed['versions'][$packageName]['reference'])) {
171 | return null;
172 | }
173 |
174 | return self::$installed['versions'][$packageName]['reference'];
175 | }
176 |
177 |
178 |
179 |
180 |
181 | public static function getRootPackage()
182 | {
183 | return self::$installed['root'];
184 | }
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 | public static function getRawData()
193 | {
194 | return self::$installed;
195 | }
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 | public static function reload($data)
216 | {
217 | self::$installed = $data;
218 | }
219 | }
220 |
--------------------------------------------------------------------------------
/vendor/composer/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Copyright (c) Nils Adermann, Jordi Boggiano
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is furnished
9 | to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in all
12 | copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 |
22 |
--------------------------------------------------------------------------------
/vendor/composer/autoload_classmap.php:
--------------------------------------------------------------------------------
1 | $vendorDir . '/composer/InstalledVersions.php',
10 | 'CrawlbaseAPI' => $baseDir . '/crawlbase-api.php',
11 | 'Crawlbase\\CrawlingAPI' => $baseDir . '/src/crawling-api.php',
12 | );
13 |
--------------------------------------------------------------------------------
/vendor/composer/autoload_namespaces.php:
--------------------------------------------------------------------------------
1 | = 50600 && !defined('HHVM_VERSION') && (!function_exists('zend_loader_file_encoded') || !zend_loader_file_encoded());
32 | if ($useStaticLoader) {
33 | require __DIR__ . '/autoload_static.php';
34 |
35 | call_user_func(\Composer\Autoload\ComposerStaticInit0e5268951516f12545a1acb3acee14b4::getInitializer($loader));
36 | } else {
37 | $map = require __DIR__ . '/autoload_namespaces.php';
38 | foreach ($map as $namespace => $path) {
39 | $loader->set($namespace, $path);
40 | }
41 |
42 | $map = require __DIR__ . '/autoload_psr4.php';
43 | foreach ($map as $namespace => $path) {
44 | $loader->setPsr4($namespace, $path);
45 | }
46 |
47 | $classMap = require __DIR__ . '/autoload_classmap.php';
48 | if ($classMap) {
49 | $loader->addClassMap($classMap);
50 | }
51 | }
52 |
53 | $loader->register(true);
54 |
55 | return $loader;
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/vendor/composer/autoload_static.php:
--------------------------------------------------------------------------------
1 | __DIR__ . '/..' . '/composer/InstalledVersions.php',
11 | 'CrawlbaseAPI' => __DIR__ . '/../..' . '/crawlbase-api.php',
12 | 'Crawlbase\\CrawlingAPI' => __DIR__ . '/../..' . '/src/crawling-api.php',
13 | );
14 |
15 | public static function getInitializer(ClassLoader $loader)
16 | {
17 | return \Closure::bind(function () use ($loader) {
18 | $loader->classMap = ComposerStaticInit0e5268951516f12545a1acb3acee14b4::$classMap;
19 |
20 | }, null, ClassLoader::class);
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/vendor/composer/installed.json:
--------------------------------------------------------------------------------
1 | {
2 | "packages": [],
3 | "dev": true,
4 | "dev-package-names": []
5 | }
6 |
--------------------------------------------------------------------------------
/vendor/composer/installed.php:
--------------------------------------------------------------------------------
1 |
3 | array (
4 | 'pretty_version' => 'dev-master',
5 | 'version' => 'dev-master',
6 | 'aliases' =>
7 | array (
8 | ),
9 | 'reference' => '60d523539ad0ba56c7ef94b02d0e666fe398f56e',
10 | 'name' => 'crawlbase/crawlbase',
11 | ),
12 | 'versions' =>
13 | array (
14 | 'crawlbase/crawlbase' =>
15 | array (
16 | 'pretty_version' => 'dev-master',
17 | 'version' => 'dev-master',
18 | 'aliases' =>
19 | array (
20 | ),
21 | 'reference' => '60d523539ad0ba56c7ef94b02d0e666fe398f56e',
22 | ),
23 | ),
24 | );
25 |
--------------------------------------------------------------------------------
/vendor/composer/platform_check.php:
--------------------------------------------------------------------------------
1 | = 50400)) {
8 | $issues[] = 'Your Composer dependencies require a PHP version ">= 5.4.0". You are running ' . PHP_VERSION . '.';
9 | }
10 |
11 | if ($issues) {
12 | if (!headers_sent()) {
13 | header('HTTP/1.1 500 Internal Server Error');
14 | }
15 | if (!ini_get('display_errors')) {
16 | if (PHP_SAPI === 'cli' || PHP_SAPI === 'phpdbg') {
17 | fwrite(STDERR, 'Composer detected issues in your platform:' . PHP_EOL.PHP_EOL . implode(PHP_EOL, $issues) . PHP_EOL.PHP_EOL);
18 | } elseif (!headers_sent()) {
19 | echo 'Composer detected issues in your platform:' . PHP_EOL.PHP_EOL . str_replace('You are running '.PHP_VERSION.'.', '', implode(PHP_EOL, $issues)) . PHP_EOL.PHP_EOL;
20 | }
21 | }
22 | trigger_error(
23 | 'Composer detected issues in your platform: ' . implode(' ', $issues),
24 | E_USER_ERROR
25 | );
26 | }
27 |
--------------------------------------------------------------------------------