├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug-report.md
    │   └── feature_request.md
├── .gitignore
├── .gitlab-ci.yml
├── .npmignore
├── .travis.yml
├── CONTRIBUTING.md
├── FAQ.md
├── LICENSE
├── README.md
├── assets
    ├── logo.png
    └── logo.svg
├── docs
    ├── .gitignore
    ├── Gemfile
    ├── Gemfile.lock
    ├── _config.yml
    ├── _layouts
    │   └── default.html
    ├── api-change.md
    ├── assets
    │   └── img
    │   │   └── logo.png
    ├── capture.webm
    ├── favicon.ico
    └── index.md
├── examples
    ├── README.md
    ├── complexity.ts
    ├── package-lock.json
    ├── package.json
    ├── server.ts
    ├── tsconfig.json
    └── tslint.json
├── index.ts
├── man
    └── instamancer.1
├── package-lock.json
├── package.json
├── plugins
    ├── README.md
    ├── index.ts
    ├── plugin.ts
    └── plugins
    │   ├── index.ts
    │   └── largeFirst.ts
├── src
    ├── api
    │   ├── api.ts
    │   ├── instagram.ts
    │   ├── postIdSet.ts
    │   ├── search.ts
    │   └── types.ts
    ├── cli.ts
    ├── getpool
    │   └── getPool.ts
    └── http
    │   ├── depot.ts
    │   ├── download.ts
    │   └── s3.ts
├── tests
    ├── __fixtures__
    │   ├── FakePage.ts
    │   └── QuickGraft.ts
    ├── server.ts
    ├── test.spec.ts
    └── tsconfig.json
├── tsconfig.json
├── tslint.json
└── utils
    └── validation-generator
        ├── .gitignore
        ├── README.md
        ├── generate.ts
        └── get-input.ts


/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help Instamancer improve
 4 | title: "[BUG]"
 5 | labels: bug
 6 | assignees: ScriptSmith
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior.
15 | 
16 | - If the bug is related to the CLI, include the command you used.
17 | - If it's related to using the module, provide some sample code.
18 | - If it's related to the module itself, indicate the source of the problem.
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Output**
24 | If applicable, add Instamancer's output in a code block
25 | 
26 | ```
27 | here
28 | ```
29 | 
30 | **Setup (please complete the following information):**
31 |  - OS: [e.g. Arch Linux, MacOS]
32 |  - Instamancer version [e.g. v1.1.4]
33 |  - Node version [e.g. v11.6.0]
34 |  - NPM version (if applicable) [eg. 6.5.0]
35 | 
36 | **Additional context**
37 | Add any other context about the problem here.
38 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: "[FEATURE]"
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/
 2 | node_modules/
 3 | examples/node_modules/
 4 | coverage/
 5 | downloads/
 6 | *.map
 7 | *.js
 8 | *.d.ts
 9 | *.tgz
10 | *.log
11 | *.csv
12 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | default:
 2 |     image: node:latest
 3 |     variables:
 4 |         CI: 1
 5 |         NO_SANDBOX: 1
 6 |     before_script:
 7 |         - npm install -g codacy-coverage
 8 | 
 9 |         - apt-get update
10 |         - apt-get install -y wget gnupg
11 |         - wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
12 |         - sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list'
13 |         - apt-get update
14 |         - apt-get install -y google-chrome-unstable fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 --no-install-recommends
15 |         - apt-get install -y xvfb
16 |         - rm -rf /var/lib/apt/lists/*
17 |     script:
18 |         - npm install
19 |         - npm run build -- --noEmit
20 |         - xvfb-run --server-args="-screen 0 1024x768x24" npm run test:ci
21 |     after_script:
22 |         - cat ./coverage/lcov.info | codacy-coverage --language=typescript;
23 |     artifacts:
24 |         paths:
25 |             - instamancer_tests.log
26 |         expire_in: 1 week


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | .idea/
 2 | .github/
 3 | docs/
 4 | assets/
 5 | coverage/
 6 | test*
 7 | .travis.yml
 8 | ts*.json
 9 | *.js.map
10 | *.log
11 | *.tgz
12 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: node_js
 2 | node_js:
 3 |   - "node"
 4 |   - "lts/*"
 5 | dist: bionic
 6 | addons:
 7 |   chrome: stable
 8 |   artifacts:
 9 |     paths:
10 |       - $(ls *.log | tr "\n" ":")
11 | services:
12 |   - xvfb
13 | before_install:
14 |   # Enable user namespace cloning for pyppeteer
15 |   - sysctl kernel.unprivileged_userns_clone=1
16 |   # Launch XVFB for pyppeteer
17 |   - export DISPLAY=:99.0
18 | install:
19 |   - npm install -g codacy-coverage
20 | 
21 |   # Install instamancer and deps
22 |   - npm install
23 | script:
24 |   - npm run build -- --noEmit
25 |   - npm run test:ci
26 |   - if [[ $TRAVIS_PULL_REQUEST = "false" ]] ; then cat ./coverage/lcov.info | codacy-coverage --language=typescript; fi
27 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | Thanks! I'm glad for your interest in the project. Here are some guidelines:
 4 | 
 5 | ## Bugs
 6 | Create a [new issue](https://github.com/ScriptSmith/instamancer/issues/new) with the provided template and the `bug` label.
 7 | 
 8 | ## Feature / pull requests
 9 | Make sure you submit a new issue with the `feature`  label before submitting a pull request. If you aren't sure whether a proposed change is possible / within the scope of the project, just ask.
10 | 
11 | ## Chat / Questions
12 | Pop in to the [Gitter](https://gitter.im/instamancer)


--------------------------------------------------------------------------------
/FAQ.md:
--------------------------------------------------------------------------------
  1 | # FAQ
  2 | ## Does it still work?
  3 | At the time of writing, Instamancer still works. It's possible that it will break when Instagram.com is updated, or Instagram tries to curb this method of scraping.
  4 | 
  5 | There is a daily Travis cron job which tests whether Instamancer is working as expected. You can see the results here: [![Build Status](https://img.shields.io/gitlab/pipeline/scriptsmith/instamancer)](https://gitlab.com/ScriptSmith/instamancer/pipelines)
  6 | 
  7 | ## Is there a GUI?
  8 | No, Instamancer only works from the command-line. In the future, I might implement a GUI using [Carlo](https://github.com/GoogleChromeLabs/carlo) or something more lightweight.
  9 | 
 10 | There is a instagram data exploring tool in development here: [https://github.com/andyepx/insta-explorer](https://github.com/andyepx/insta-explorer)
 11 | 
 12 | ## Do I need to log in?
 13 | No. Instamancer scrapes data that Instagram makes publicly available.
 14 | 
 15 | ## How quickly does it run?
 16 | It can processes anywhere from 3-30 posts per second depending on configuration.
 17 | 
 18 | ## Can I make it run faster?
 19 | Running without the `--full` and `-d` arguments is faster.
 20 | 
 21 | Not using `--sync` and customising the `-k` option can make downloading files quicker.
 22 | 
 23 | Disabling grafting with `-g=false` will make the scraping quicker at the cost of not being able to access all posts (see [here](#what-happens-if-i-disable-grafting)).
 24 | 
 25 | Setting `--sleep` to a decimal number below 1 speeds up page interactions at the cost of stability, as it makes you more likely to be rate limited.
 26 | 
 27 | Scraping is not parallelisable (see [here](#can-i-run-multiple-instances-at-the-same-time-rather-than-batch-scraping)).
 28 | 
 29 | Using `--plugin LargeFirst` is as much as 5x faster, but may result in undefined behavior.
 30 | 
 31 | If you want something *really* fast, try [Instaphyte](https://github.com/ScriptSmith/instaphyte). It's as much as 12x faster.
 32 | 
 33 | ## Can I run multiple instances at the same time rather than batch scraping?
 34 | No. Instagram will probably rate-limit your IP address and then Instamancer will have to pause until the limit is lifted.
 35 | 
 36 | ## What happens if I disable grafting?
 37 | Chrome / Chromium will eventually decide that it doesn't want the page to consume any more resources and future requests to the API will be aborted. This usually happens between 5k-10k posts regardless of the memory available on the system. There doesn't seem to be any combination of Chrome flags to avoid this.
 38 | 
 39 | ## How far back can I scrape?
 40 | Seemingly as far as there are posts to scrape, but you can only reach old posts by scraping the most recent ones.
 41 | 
 42 | ## How many posts can I scrape from a given endpoint?
 43 | The most I've seen is more than 5 million.
 44 | 
 45 | ## How do I scrape the first posts on the page?
 46 | 
 47 | In the default configuration, Instamancer will skip the posts that are pre-loaded on the page. This is because it only retrieves posts generated from API requests, which aren't made for these posts.
 48 | 
 49 | If you would like to retrieve these posts, then you should use full mode: `--full` or `-f`.
 50 | 
 51 | This behavior may change in the future.
 52 | 
 53 | ## How do I use the `--bucket` flag and S3?
 54 | 1. Create an S3 bucket. Find help [here](https://docs.aws.amazon.com/AmazonS3/latest/gsg/CreatingABucket.html).
 55 | 2. Configure your AWS credentials. Find help [here](https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/loading-node-credentials-shared.html).
 56 |     1. Ensure you can write to S3 with the credentials you're using.
 57 | 3. Use instamancer like so:
 58 | 
 59 | ```
 60 | instamancer ... -d --bucket=BUCKET_NAME
 61 | ```
 62 | 
 63 | Where `BUCKET_NAME` is the name of the bucket.
 64 | 
 65 | Example:
 66 | 
 67 | ```
 68 | instamancer hashtag puppies -c10 -d --bucket=instagram-puppies
 69 | ```
 70 | 
 71 | 
 72 | ## How do I use the `--depot` flag and depot?
 73 | 1. Set up [depot](https://github.com/ScriptSmith/depot)
 74 |     1. Set up basic access authentication if you're using a public server
 75 | 2. Generate a UUIDv4
 76 | 3. Use instamancer like so:
 77 | 
 78 | ```
 79 | instamancer ... -d --depot=http://127.0.0.1:8080/jobs/UUID/
 80 | ```
 81 | 
 82 | Where `UUID` is the UUID you generated.
 83 | 
 84 | Example:
 85 | 
 86 | ```
 87 | instamancer hashtag puppies -c10 -d --depot=https://depot:password@depot-vlnbfvyaiq-uc.a.run.app/jobs/4cdc21fe-6b35-473a-b26e-66f62ad66c4c/
 88 | ```
 89 | 
 90 | You can use any server that accepts `PUT` requests.
 91 | 
 92 | 
 93 | ## What does a batchfile look like?
 94 | ```
 95 | hashtag spring -d --full
 96 | hashtag summer -f=data.json
 97 | user greg -c100
 98 | ```
 99 | 
100 | ## Why does the code have so many comments?
101 | Instamancer was originally part of another project written in Python that used the [Pyppeteer](https://github.com/miyakogi/pyppeteer) clone of Puppeteer. This version was too error-prone because of the complicated asyncio code and Pyppeteer's instability when communicating via websockets during long scraping jobs.
102 | 
103 | I decided to rewrite Instamancer in TypeScript in order to be more stable and in-sync with Puppeteer. It was the first time I'd written any serious TypeScript or 'modern' JavaScript (promises, async/await etc.), so the zealous commenting helped me learn, and allowed me to figure out bugs in my algorithm and the grafting process. The comments aren't a permanent fixture and may be removed in a future commit.
104 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Adam Smith
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 | <img src="assets/logo.png" height="150" title="Icon made by Freepik (www.freepik.com) available at www.flaticon.com. CC 3.0 BY licensed (http://creativecommons.org/licenses/by/3.0/)">
  3 | </p>
  4 | 
  5 | <h1 align="center">Instamancer</h1>
  6 | 
  7 | [![Quality](https://img.shields.io/codacy/grade/98066a13fa444845aa3902d180581b86.svg)](https://app.codacy.com/project/ScriptSmith/instamancer/dashboard)
  8 | [![Coverage](https://img.shields.io/codacy/coverage/98066a13fa444845aa3902d180581b86.svg)](https://app.codacy.com/project/ScriptSmith/instamancer/dashboard)
  9 | [![Speed](https://firebasestorage.googleapis.com/v0/b/instagram-speed-test.appspot.com/o/instamancer.svg?alt=media&token=dcc3e623-ee88-4d74-ae86-2d969a1cd8ad)](https://scriptsmith.github.io/instagram-speed-test)
 10 | [![NPM](https://img.shields.io/npm/v/instamancer.svg)](https://www.npmjs.com/package/instamancer)
 11 | [![Dependencies](https://david-dm.org/scriptsmith/instamancer/status.svg)](https://david-dm.org/scriptsmith/instamancer)
 12 | [![Chat](https://img.shields.io/gitter/room/instamancer/instamancer.svg)](https://gitter.im/instamancer)
 13 | 
 14 | Scrape Instagram's API with Puppeteer.
 15 | 
 16 | ###### [Install](#Install) | [Usage](#Usage) | [Comparison](#Comparison) | [Website](https://scriptsmith.github.io/instamancer/) | [FAQ](FAQ.md) | [Examples](examples/README.md)
 17 | 
 18 | <hr/>
 19 | 
 20 | **Notice:** Instagram's Web UI and API now requires users to be logged in to access hashtag and account endpoints through a browser. As instamancer is designed to access publicly available data, it currently does not work as intended. Given that this change is unlikely to be reversed, Instamancer will remain unsupported and unmaintained indefinitely. Please use [this pinned issue](https://github.com/ScriptSmith/instamancer/issues/58) to discuss. 
 21 | 
 22 | <hr/>
 23 | 
 24 | 
 25 | Instamancer is a new type of scraping tool that leverages Puppeteer's ability to intercept requests made by a webpage to an API.
 26 | 
 27 | Read more about how Instamancer works [here](https://scriptsmith.github.io/instamancer/).
 28 | 
 29 | ### Features
 30 | - Scrape hashtags, users' posts, and individual posts
 31 | - Download images, albums, and videos
 32 | - Output JSON, CSV
 33 | - Batch scraping
 34 | - Search hashtags, users, and locations
 35 | - API response validation
 36 | - Upload files to [S3](https://github.com/ScriptSmith/instamancer/blob/master/FAQ.md#how-do-i-use-the---bucket-flag-and-s3) and [depot](https://github.com/ScriptSmith/instamancer/blob/master/FAQ.md#how-do-i-use-the---depot-flag-and-depot)
 37 | - [Plugins](plugins)
 38 | 
 39 | ### Data
 40 | Metadata that Instamancer is able to gather from posts:
 41 | 
 42 | - Text
 43 | - Timestamps
 44 | - Tagged users
 45 | - Accessibility captions
 46 | - Like counts
 47 | - Comment counts
 48 | - Images (Thumbnails, Dimensions, URLs)
 49 | - Videos (URL, View count, Duration)
 50 | - Comments (Timestamp, Text, Like count, User)
 51 | - User (Username, Full name, Profile picture, Profile privacy)
 52 | - Location (Name, Street, Zip code, City, Region, Country)
 53 | - Sponsored status
 54 | - Gating information
 55 | - Fact checking information
 56 | 
 57 | ## Install
 58 | 
 59 | #### Linux
 60 | Enable user namespace cloning:
 61 | ```
 62 | sysctl -w kernel.unprivileged_userns_clone=1
 63 | ```
 64 | 
 65 | Or run without a sandbox:
 66 | 
 67 | ```
 68 | # WARNING: unsafe
 69 | export NO_SANDBOX=true
 70 | ```
 71 | 
 72 | See [Puppeteer troubleshooting](https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md#chrome-headless-fails-due-to-sandbox-issues)
 73 | 
 74 | #### Without downloading chromium
 75 | If you wish to install Instamancer without downloading chromium, enable the `PUPPETEER_SKIP_CHROMIUM_DOWNLOAD` environment variable before installation
 76 | 
 77 | ```
 78 | export PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
 79 | ```
 80 | 
 81 | ### From NPM
 82 | 
 83 | ```
 84 | npm install -g instamancer
 85 | ```
 86 | 
 87 | If you're using root to install globally, use the following command to install the Puppeteer dependency
 88 | 
 89 | ```
 90 | sudo npm install -g instamancer --unsafe-perm=true
 91 | ```
 92 | 
 93 | ### From NPX
 94 | 
 95 | ```
 96 | npx instamancer
 97 | ```
 98 | 
 99 | ### From this repository
100 | ```
101 | git clone https://github.com/ScriptSmith/instamancer.git
102 | cd instamancer
103 | npm install
104 | npm run build
105 | npm install -g
106 | ```
107 | 
108 | ## Usage
109 | 
110 | ### Command Line
111 | ```
112 | $ instamancer
113 | Usage: instamancer <command> [options]
114 | 
115 | Commands:
116 |   instamancer hashtag [id]       Scrape a hashtag
117 |   instamancer user [id]          Scrape a users posts
118 |   instamancer post [ids]         Scrape a comma-separated list of posts
119 |   instamancer search [query]     Perform a search of users, tags and places
120 |   instamancer batch [batchfile]  Read newline-separated arguments from a file
121 | 
122 | Configuration
123 |   --count, -c    Number of posts to download (0 for all)   [number] [default: 0]
124 |   --full, -f     Retrieve full post data              [boolean] [default: false]
125 |   --sleep, -s    Seconds to sleep between interactions     [number] [default: 2]
126 |   --graft, -g    Enable grafting                       [boolean] [default: true]
127 |   --browser, -b  Browser path. Defaults to the puppeteer version        [string]
128 |   --sameBrowser  Use a single browser when grafting   [boolean] [default: false]
129 | 
130 | Download
131 |   --download, -d      Save images from posts          [boolean] [default: false]
132 |   --downdir           Download path       [default: "downloads/[endpoint]/[id]"]
133 |   --video, -v         Download videos (requires full) [boolean] [default: false]
134 |   --sync              Force download between requests [boolean] [default: false]
135 |   --threads, -k       Parallel download / depot threads    [number] [default: 4]
136 |   --waitDownload, -w  Download media after scraping   [boolean] [default: false]
137 | 
138 | Upload
139 |   --bucket  Upload files to an AWS S3 bucket                            [string]
140 |   --depot   Upload files to a URL with a PUT request (depot)            [string]
141 | 
142 | Output
143 |   --file, -o       Output filename. '-' for stdout    [string] [default: "[id]"]
144 |   --type, -t       Filetype   [choices: "csv", "json", "both"] [default: "json"]
145 |   --mediaPath, -m  Add filepaths to _mediaPath        [boolean] [default: false]
146 | 
147 | Display
148 |   --visible    Show browser on the screen             [boolean] [default: false]
149 |   --quiet, -q  Disable progress output                [boolean] [default: false]
150 | 
151 | Logging
152 |   --logging, -l    [choices: "none", "error", "info", "debug"] [default: "none"]
153 |   --logfile      Log file name             [string] [default: "instamancer.log"]
154 | 
155 | Validation
156 |   --strict  Throw an error on response type mismatch  [boolean] [default: false]
157 | 
158 | Plugins
159 |   --plugin, -p  Use a plugin from the plugins directory    [array] [default: []]
160 | 
161 | Options:
162 |   --help     Show help                                                 [boolean]
163 |   --version  Show version number                                       [boolean]
164 | 
165 | Examples:
166 |   instamancer hashtag instagood -fvd        Download all the available posts,
167 |                                             and their media from #instagood
168 |   instamancer user arianagrande --type=csv  Download Ariana Grande's posts to a
169 |   --logging=info --visible                  CSV file with a non-headless
170 |                                             browser, and log all events
171 | 
172 | Source code available at https://github.com/ScriptSmith/instamancer
173 | 
174 | ```
175 | 
176 | ### Module
177 | 
178 | ES2018 Typescript example:
179 | ```typescript
180 | import {createApi, IOptions} from "instamancer"
181 | 
182 | const options: IOptions = {
183 |     total: 10
184 | };
185 | const hashtag = createApi("hashtag", "beach", options);
186 | 
187 | (async () => {
188 |     for await (const post of hashtag.generator()) {
189 |         console.log(post);
190 |     }
191 | })();
192 | ```
193 | 
194 | #### Generator functions
195 | 
196 | ```typescript
197 | import {createApi} from "instamancer"
198 | 
199 | createApi("hashtag", id, options);
200 | createApi("user", id, options);
201 | createApi("post", ids, options);
202 | createApi("search", query, options);
203 | ```
204 | 
205 | #### Options
206 | ```typescript
207 | const options: Instamancer.IOptions = {
208 |     // Total posts to download. 0 for unlimited
209 |     total: number,
210 | 
211 |     // Run Chrome in headless mode
212 |     headless: boolean,
213 | 
214 |     // Logging events
215 |     logger: winston.Logger,
216 | 
217 |     // Run without output to stdout
218 |     silent: boolean,
219 | 
220 |     // Time to sleep between interactions with the page
221 |     sleepTime: number,
222 | 
223 |     // Throw an error if type validation has been failed
224 |     strict: boolean,
225 | 
226 |     // Time to sleep when rate-limited
227 |     hibernationTime: number,
228 | 
229 |     // Enable the grafting process
230 |     enableGrafting: boolean,
231 | 
232 |     // Extract the full amount of information from the API
233 |     fullAPI: boolean,
234 | 
235 |     // Use a proxy in Chrome to connect to Instagram
236 |     proxyURL: string,
237 | 
238 |     // Location of the chromium / chrome binary executable
239 |     executablePath: string,
240 | 
241 |     // Custom io-ts validator
242 |     validator: Type<unknown>,
243 | 
244 |     // Custom plugins
245 |     plugins: IPlugin[]
246 | }
247 | ```
248 | 
249 | ## Comparison
250 | 
251 | A comparison of Instagram scraping tools. Please suggest more tools and criteria through a pull request.
252 | 
253 | To see a speed comparison, visit [this page](https://scriptsmith.github.io/instagram-speed-test)
254 | 
255 | <table>
256 | <thead>
257 |     <tr>
258 |         <th>Tool</th>
259 |         <th>Hashtags</th>
260 |         <th>Users</th>
261 |         <th>Tagged posts</th>
262 |         <th>Locations</th>
263 |         <th>Posts</th>
264 |         <th>Stories</th>
265 |         <th>Login not required</th>
266 |         <th>Private feeds</th>
267 |         <th>Batch mode</th>
268 |         <th>Plugins</th>
269 |         <th>Command-line</th>
270 |         <th>Library/Module</th>
271 |         <th>Download media</th>
272 |         <th>Download metadata</th>
273 |         <th>Scraping method</th>
274 |         <th>Daily builds</th>
275 |         <th>Main language</th>
276 |         <th>Speed ____________________________</th>
277 |         <th>License ____________________________</th>
278 |         <th>Last commit ____________________________</th>
279 |         <th>Open Issues ____________________________</th>
280 |         <th>Closed Issues ____________________________</th>
281 |         <th>Build status ____________________________</th>
282 |         <th>Test coverage ____________________________</th>
283 |         <th>Code quality ____________________________</th>
284 |     </tr>
285 | </thead>
286 | <tbody>
287 |     <tr>
288 |         <td><a href="https://github.com/ScriptSmith/instamancer">Instamancer</a></td>
289 |         <td>:heavy_check_mark:</td>
290 |         <td>:heavy_check_mark:</td>
291 |         <td>:x:</td>
292 |         <td>:x:</td>
293 |         <td>:heavy_check_mark:</td>
294 |         <td>:x:</td>
295 |         <td>:heavy_check_mark:</td>
296 |         <td>:x:</td>
297 |         <td>:heavy_check_mark:</td>
298 |         <td>:heavy_check_mark:</td>
299 |         <td>:heavy_check_mark:</td>
300 |         <td>:heavy_check_mark:</td>
301 |         <td>:heavy_check_mark:</td>
302 |         <td>:heavy_check_mark:</td>
303 |         <td>Web API request interception</td>
304 |         <td>:heavy_check_mark:</td>
305 |         <td>Typescript</td>
306 |         <td><a href="https://scriptsmith.github.io/instagram-speed-test"><img src="https://firebasestorage.googleapis.com/v0/b/instagram-speed-test.appspot.com/o/instamancer.svg?alt=media&token=dcc3e623-ee88-4d74-ae86-2d969a1cd8ad"></a></td>
307 |         <td><img src="https://img.shields.io/github/license/scriptsmith/instamancer.svg"></td>
308 |         <td><img src="https://img.shields.io/github/last-commit/scriptsmith/instamancer.svg"></td>
309 |         <td><img src="https://img.shields.io/github/issues/scriptsmith/instamancer.svg"></td>
310 |         <td><img src="https://img.shields.io/github/issues-closed/scriptsmith/instamancer.svg"></td>
311 |         <td><img src="https://img.shields.io/gitlab/pipeline/scriptsmith/instamancer"></td>
312 |         <td><img src="https://img.shields.io/codacy/coverage/98066a13fa444845aa3902d180581b86.svg"></td>
313 |         <td><img src="https://img.shields.io/codacy/grade/98066a13fa444845aa3902d180581b86.svg"></td>
314 |     </tr>
315 |     <tr>
316 |         <td><a href="https://github.com/ScriptSmith/instaphyte">Instaphyte</a></td>
317 |         <td>:heavy_check_mark:</td>
318 |         <td>:x:</td>
319 |         <td>:x:</td>
320 |         <td>:x:</td>
321 |         <td>:x:</td>
322 |         <td>:x:</td>
323 |         <td>:heavy_check_mark:</td>
324 |         <td>:x:</td>
325 |         <td>:x:</td>
326 |         <td>:x:</td>
327 |         <td>:heavy_check_mark:</td>
328 |         <td>:heavy_check_mark:</td>
329 |         <td>:heavy_check_mark:</td>
330 |         <td>:heavy_check_mark:</td>
331 |         <td>Web API simulation</td>
332 |         <td>:heavy_check_mark:</td>
333 |         <td>Python</td>
334 |         <td><a href="https://scriptsmith.github.io/instagram-speed-test"><img src="https://firebasestorage.googleapis.com/v0/b/instagram-speed-test.appspot.com/o/instaphyte.svg?alt=media&token=8e16505d-1259-42a4-90bc-cf8a6d326f55"></a></td>
335 |         <td><img src="https://img.shields.io/github/license/scriptsmith/instaphyte.svg"></td>
336 |         <td><img src="https://img.shields.io/github/last-commit/scriptsmith/instaphyte.svg"></td>
337 |         <td><img src="https://img.shields.io/github/issues/scriptsmith/instaphyte.svg"></td>
338 |         <td><img src="https://img.shields.io/github/issues-closed/scriptsmith/instaphyte.svg"></td>
339 |         <td><img src="https://img.shields.io/travis/ScriptSmith/instaphyte.svg"></td>
340 |         <td><img src="https://img.shields.io/codacy/coverage/a2322f650025499bb8aee2368ca43207.svg"></td>
341 |         <td><img src="https://img.shields.io/codacy/grade/a2322f650025499bb8aee2368ca43207.svg"></td>
342 |     </tr>
343 |     <tr>
344 |         <td><a href="https://github.com/instaloader/instaloader">Instaloader</a></td>
345 |         <td>:heavy_check_mark:</td>
346 |         <td>:heavy_check_mark:</td>
347 |         <td>:heavy_check_mark:</td>
348 |         <td>:heavy_check_mark:</td>
349 |         <td>:heavy_check_mark:</td>
350 |         <td>:heavy_check_mark:</td>
351 |         <td>:heavy_check_mark:</td>
352 |         <td>:heavy_check_mark:</td>
353 |         <td>:x:</td>
354 |         <td>:x:</td>
355 |         <td>:heavy_check_mark:</td>
356 |         <td>:heavy_check_mark:</td>
357 |         <td>:heavy_check_mark:</td>
358 |         <td>:heavy_check_mark:</td>
359 |         <td>Web API simulation</td>
360 |         <td>:x:</td>
361 |         <td>Python</td>
362 |         <td><a href="https://scriptsmith.github.io/instagram-speed-test"><img src="https://firebasestorage.googleapis.com/v0/b/instagram-speed-test.appspot.com/o/instaloader.svg?alt=media&token=e7b05b24-6c96-43b7-9e5e-4951f7b1d9ba"></a></td>
363 |         <td><img src="https://img.shields.io/github/license/instaloader/instaloader.svg"></td>
364 |         <td><img src="https://img.shields.io/github/last-commit/instaloader/instaloader.svg"></td>
365 |         <td><img src="https://img.shields.io/github/issues/instaloader/instaloader.svg"></td>
366 |         <td><img src="https://img.shields.io/github/issues-closed/instaloader/instaloader.svg"></td>
367 |         <td><img src="https://img.shields.io/travis/instaloader/instaloader.svg"></td>
368 |         <td>:question:</td>
369 |         <td>:question:</td>
370 |     </tr>
371 |     <tr>
372 |         <td><a href="https://github.com/althonos/InstaLooter">Instalooter</a></td>
373 |         <td>:heavy_check_mark:</td>
374 |         <td>:heavy_check_mark:</td>
375 |         <td>:x:</td>
376 |         <td>:heavy_check_mark:</td>
377 |         <td>:heavy_check_mark:</td>
378 |         <td>:x:</td>
379 |         <td>:x:</td>
380 |         <td>:heavy_check_mark:</td>
381 |         <td>:heavy_check_mark:</td>
382 |         <td>:x:</td>
383 |         <td>:heavy_check_mark:</td>
384 |         <td>:heavy_check_mark:</td>
385 |         <td>:heavy_check_mark:</td>
386 |         <td>:heavy_check_mark:</td>
387 |         <td>Web API simulation</td>
388 |         <td>:x:</td>
389 |         <td>Python</td>
390 |         <td><a href="https://scriptsmith.github.io/instagram-speed-test"><img src="https://firebasestorage.googleapis.com/v0/b/instagram-speed-test.appspot.com/o/instalooter.svg?alt=media&token=ed9564f5-6011-4090-95e7-2b80e7f6e41f"></a></td>
391 |         <td><img src="https://img.shields.io/github/license/althonos/instalooter.svg"></td>
392 |         <td><img src="https://img.shields.io/github/last-commit/althonos/instalooter.svg"></td>
393 |         <td><img src="https://img.shields.io/github/issues/althonos/instalooter.svg"></td>
394 |         <td><img src="https://img.shields.io/github/issues-closed/althonos/instalooter.svg"></td>
395 |         <td><img src="https://img.shields.io/travis/althonos/InstaLooter.svg"></td>
396 |         <td><img src="https://img.shields.io/codecov/c/github/althonos/InstaLooter.svg"></td>
397 |         <td><img src="https://img.shields.io/codacy/grade/9b8c7da6887c4195b9e960cb04b59a91.svg"></td>
398 |     </tr>
399 |     <tr>
400 |         <td><a href="https://github.com/huaying/instagram-crawler">Instagram crawler</a></td>
401 |         <td>:heavy_check_mark:</td>
402 |         <td>:heavy_check_mark:</td>
403 |         <td>:x:</td>
404 |         <td>:x:</td>
405 |         <td>:heavy_check_mark:</td>
406 |         <td>:x:</td>
407 |         <td>:heavy_check_mark:</td>
408 |         <td>:x:</td>
409 |         <td>:x:</td>
410 |         <td>:x:</td>
411 |         <td>:heavy_check_mark:</td>
412 |         <td>:heavy_check_mark:</td>
413 |         <td>:x:</td>
414 |         <td>:heavy_check_mark:</td>
415 |         <td>Web DOM reading</td>
416 |         <td>:x:</td>
417 |         <td>Python</td>
418 |        <td>:question:</td>
419 |         <td><img src="https://img.shields.io/github/license/huaying/instagram-crawler.svg"></td>
420 |         <td><img src="https://img.shields.io/github/last-commit/huaying/instagram-crawler.svg"></td>
421 |         <td><img src="https://img.shields.io/github/issues/huaying/instagram-crawler.svg"></td>
422 |         <td><img src="https://img.shields.io/github/issues-closed/huaying/instagram-crawler.svg"></td>
423 |         <td><img src="https://img.shields.io/travis/huaying/instagram-crawler.svg"></td>
424 |         <td>:question:</td>
425 |         <td>:question:</td>
426 |     </tr>
427 |     <tr>
428 |         <td><a href="https://github.com/rarcega/instagram-scraper">Instagram Scraper</a></td>
429 |         <td>:heavy_check_mark:</td>
430 |         <td>:heavy_check_mark:</td>
431 |         <td>:heavy_check_mark:</td>
432 |         <td>:heavy_check_mark:</td>
433 |         <td>:x:</td>
434 |         <td>:heavy_check_mark:</td>
435 |         <td>:x:</td>
436 |         <td>:heavy_check_mark:</td>
437 |         <td>:x:</td>
438 |         <td>:x:</td>
439 |         <td>:heavy_check_mark:</td>
440 |         <td>:heavy_check_mark:</td>
441 |         <td>:heavy_check_mark:</td>
442 |         <td>:heavy_check_mark:</td>
443 |         <td>Web API simulation</td>
444 |         <td>:x:</td>
445 |         <td>Python</td>
446 |         <td><a href="https://scriptsmith.github.io/instagram-speed-test"><img src="https://firebasestorage.googleapis.com/v0/b/instagram-speed-test.appspot.com/o/instagram-scraper.svg?alt=media&token=ecdf626f-e3fd-4959-b047-5b13f244370b"></a></td>
447 |         <td><img src="https://img.shields.io/github/license/rarcega/instagram-scraper.svg"></td>
448 |         <td><img src="https://img.shields.io/github/last-commit/rarcega/instagram-scraper.svg"></td>
449 |         <td><img src="https://img.shields.io/github/issues/rarcega/instagram-scraper.svg"></td>
450 |         <td><img src="https://img.shields.io/github/issues-closed/rarcega/instagram-scraper.svg"></td>
451 |         <td><img src="https://img.shields.io/travis/rarcega/instagram-scraper.svg"></td>
452 |         <td>:question:</td>
453 |         <td>:question:</td>
454 |     </tr>
455 |     <tr>
456 |         <td><a href="https://github.com/ping/instagram_private_api">Instagram Private API</a></td>
457 |         <td>:heavy_check_mark:</td>
458 |         <td>:heavy_check_mark:</td>
459 |         <td>:heavy_check_mark:</td>
460 |         <td>:heavy_check_mark:</td>
461 |         <td>:heavy_check_mark:</td>
462 |         <td>:heavy_check_mark:</td>
463 |         <td>:heavy_check_mark:</td>
464 |         <td>:heavy_check_mark:</td>
465 |         <td>:x:</td>
466 |         <td>:x:</td>
467 |         <td>:x:</td>
468 |         <td>:heavy_check_mark:</td>
469 |         <td>:heavy_check_mark:</td>
470 |         <td>:heavy_check_mark:</td>
471 |         <td>App and Web API simulation</td>
472 |         <td>:x:</td>
473 |         <td>Python</td>
474 |         <td>:question:</td>
475 |         <td><img src="https://img.shields.io/github/license/ping/instagram_private_api.svg"></td>
476 |         <td><img src="https://img.shields.io/github/last-commit/ping/instagram_private_api.svg"></td>
477 |         <td><img src="https://img.shields.io/github/issues/ping/instagram_private_api.svg"></td>
478 |         <td><img src="https://img.shields.io/github/issues-closed/ping/instagram_private_api.svg"></td>
479 |         <td><img src="https://img.shields.io/travis/ping/instagram_private_api.svg"></td>
480 |         <td>:question:</td>
481 |         <td>:question:</td>
482 |     </tr>
483 |     <tr>
484 |         <td><a href="https://github.com/postaddictme/instagram-php-scraper">Instagram PHP Scraper</a></td>
485 |         <td>:heavy_check_mark:</td>
486 |         <td>:heavy_check_mark:</td>
487 |         <td>:x:</td>
488 |         <td>:heavy_check_mark:</td>
489 |         <td>:heavy_check_mark:</td>
490 |         <td>:x:</td>
491 |         <td>:heavy_check_mark:</td>
492 |         <td>:heavy_check_mark:</td>
493 |         <td>:x:</td>
494 |         <td>:x:</td>
495 |         <td>:x:</td>
496 |         <td>:heavy_check_mark:</td>
497 |         <td>:heavy_check_mark:</td>
498 |         <td>:heavy_check_mark:</td>
499 |         <td>Web API simulation</td>
500 |         <td>:x:</td>
501 |         <td>PHP</td>
502 |         <td>:question:</td>
503 |         <td><img src="https://img.shields.io/github/license/postaddictme/instagram-php-scraper.svg"></td>
504 |         <td><img src="https://img.shields.io/github/last-commit/postaddictme/instagram-php-scraper.svg"></td>
505 |         <td><img src="https://img.shields.io/github/issues/postaddictme/instagram-php-scraper.svg"></td>
506 |         <td><img src="https://img.shields.io/github/issues-closed/postaddictme/instagram-php-scraper.svg"></td>
507 |         <td>:question:</td>
508 |         <td>:question:</td>
509 |         <td>:question:</td>
510 |     </tr>
511 | </tbody>
512 | </table>
513 | 


--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScriptSmith/instamancer/1c63cad47886d0831ae6cc44812b72aefa7414a9/assets/logo.png


--------------------------------------------------------------------------------
/assets/logo.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <svg
  3 |    xmlns:osb="http://www.openswatchbook.org/uri/2009/osb"
  4 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  5 |    xmlns:cc="http://creativecommons.org/ns#"
  6 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  7 |    xmlns:svg="http://www.w3.org/2000/svg"
  8 |    xmlns="http://www.w3.org/2000/svg"
  9 |    xmlns:xlink="http://www.w3.org/1999/xlink"
 10 |    viewBox="0 0 186.8272 186.82657"
 11 |    height="186.82657"
 12 |    width="186.82719"
 13 |    id="svg869"
 14 |    version="1.1">
 15 |   <metadata
 16 |      id="metadata875">
 17 |   </metadata>
 18 |   <defs
 19 |      id="defs873">
 20 |     <linearGradient
 21 |        id="linearGradient7409">
 22 |       <stop
 23 |          id="stop7405"
 24 |          offset="0"
 25 |          style="stop-color:#ba2aaa;stop-opacity:1;" />
 26 |       <stop
 27 |          id="stop7407"
 28 |          offset="1"
 29 |          style="stop-color:#515ad4;stop-opacity:1" />
 30 |     </linearGradient>
 31 |     <linearGradient
 32 |        id="linearGradient7401">
 33 |       <stop
 34 |          id="stop7397"
 35 |          offset="0"
 36 |          style="stop-color:#fa7921;stop-opacity:1;" />
 37 |       <stop
 38 |          id="stop7399"
 39 |          offset="1"
 40 |          style="stop-color:#ba2aaa;stop-opacity:1" />
 41 |     </linearGradient>
 42 |     <linearGradient
 43 |        id="linearGradient7393">
 44 |       <stop
 45 |          id="stop7389"
 46 |          offset="0"
 47 |          style="stop-color:#fdc052;stop-opacity:1;" />
 48 |       <stop
 49 |          id="stop7391"
 50 |          offset="1"
 51 |          style="stop-color:#fa7921;stop-opacity:1" />
 52 |     </linearGradient>
 53 |     <linearGradient
 54 |        osb:paint="solid"
 55 |        id="linearGradient7370">
 56 |       <stop
 57 |          id="stop7368"
 58 |          offset="0"
 59 |          style="stop-color:#fdc052;stop-opacity:1;" />
 60 |     </linearGradient>
 61 |     <linearGradient
 62 |        gradientTransform="translate(-99.91973,-25.874928)"
 63 |        gradientUnits="userSpaceOnUse"
 64 |        y2="155.94446"
 65 |        x2="194.31883"
 66 |        y1="203.69716"
 67 |        x1="193.89247"
 68 |        id="linearGradient7395"
 69 |        xlink:href="#linearGradient7393" />
 70 |     <linearGradient
 71 |        gradientTransform="translate(-99.91973,-25.874928)"
 72 |        gradientUnits="userSpaceOnUse"
 73 |        y2="150.14113"
 74 |        x2="189.77151"
 75 |        y1="179.39778"
 76 |        x1="189.05211"
 77 |        id="linearGradient7403"
 78 |        xlink:href="#linearGradient7401" />
 79 |     <linearGradient
 80 |        gradientTransform="translate(-99.91973,-25.874928)"
 81 |        gradientUnits="userSpaceOnUse"
 82 |        y2="32.667362"
 83 |        x2="197.06355"
 84 |        y1="137.94281"
 85 |        x1="197.3024"
 86 |        id="linearGradient7411"
 87 |        xlink:href="#linearGradient7409" />
 88 |     <linearGradient
 89 |        gradientUnits="userSpaceOnUse"
 90 |        y2="9.9003134"
 91 |        x2="168.90088"
 92 |        y1="113.60381"
 93 |        x1="169.12015"
 94 |        id="linearGradient7446"
 95 |        xlink:href="#linearGradient7409" />
 96 |   </defs>
 97 |   <path
 98 |      id="path937"
 99 |      d="m 183.17822,151.22204 c 0,-20.06362 -40.18854,-43.59662 -89.76463,-43.59662 -49.57608,0 -89.76462,23.533 -89.76462,43.59662 0,9.60706 9.21792,17.34258 24.26279,22.7975 l 14.41627,-11.53415 v 15.68058 c 14.50038,3.2798 32.1038,5.02731 51.08556,5.02731 22.34138,0 42.77561,-2.41887 58.4847,-6.91166 l -3.38527,-10.88989 h 8.75752 v -6.90311 l 15.72478,8.59646 c 6.50115,-4.47284 10.1829,-9.79377 10.1829,-15.86304 z m 0,0"
100 |      style="fill:url(#linearGradient7395);fill-opacity:1;stroke:none;stroke-width:0.36489683;stroke-opacity:1" />
101 |   <path
102 |      id="path939"
103 |      d="m 51.39202,118.43834 c -1.84872,11.40588 -2.86074,19.63459 -2.86074,21.88526 0,10.03182 20.09499,18.16502 44.88231,18.16502 24.78733,0 44.88232,-8.1332 44.88232,-18.16502 0,-2.27918 -1.03625,-10.68036 -2.92915,-22.30717 -27.15489,10.03894 -55.14647,10.18006 -83.97474,0.42191 z m 0,0"
104 |      style="fill:url(#linearGradient7403);fill-opacity:1;stroke-width:0.36489683" />
105 |   <path
106 |      id="path941"
107 |      d="m 142.18291,8.756912 c -5.22971,-5.09145 -12.76853,-6.38285 -19.17418,-3.91837 -20.4157,5.95096 -65.48901,75.79307 -71.61671,113.599798 28.8297,9.75815 56.81985,9.61703 83.97474,-0.42191 -2.55856,-15.72334 0.84524,-34.438558 1.15598,-54.787258 4.11791,-0.44329 7.62149,-2.92203 9.47591,-6.40708 2.41032,1.64632 4.83489,3.25414 7.22239,4.77502 3.05887,-8.41544 8.37838,-14.01717 15.96282,-16.80094 -9.7895,-15.38126 -23.03269,-32.17506 -27.00095,-36.03926 z m 0,0"
108 |      style="fill:url(#linearGradient7411);fill-opacity:1;stroke-width:0.36489683" />
109 |   <path
110 |      id="path943"
111 |      d="m 169.18386,44.796172 c -7.58444,2.78519 -12.90395,8.3855 -15.96282,16.80094 12.10289,7.70987 23.23225,13.19188 26.30821,10.06174 3.10448,-3.16006 -2.51722,-14.56024 -10.34539,-26.86268 z m 0,0"
112 |      style="fill:url(#linearGradient7446);fill-opacity:1;stroke-width:0.36489683" />
113 |   <path
114 |      id="path945"
115 |      d="m 158.56764,118.67496 c -6.27024,-3.23846 -13.10778,-5.97377 -20.33302,-8.1774 -0.4604,-7.95219 0.16249,-16.692598 0.81816,-25.827848 0.43047,-5.98232 0.87233,-12.14138 1.05621,-18.50854 2.60132,-0.8424 4.93894,-2.34047 6.78337,-4.34741 1.49523,0.99634 2.95482,1.94707 4.36309,2.84363 6.84467,4.36166 12.59892,7.5203 17.10026,9.38897 3.98964,1.65629 10.23564,3.763 13.78199,0.15252 3.46224,-3.52496 1.47954,-9.64269 -0.31216,-13.93735 -1.91999,-4.60112 -5.13564,-10.458 -9.56001,-17.40957 -9.59423,-15.07481 -23.10396,-32.38175 -27.53119,-36.69209 -6.03363,-5.87541 -15.00638,-7.73696 -22.90155,-4.76361 -15.60362,4.71229 -36.90162,36.64932 -43.07351,46.35187 -14.28229,22.45398 -24.88141,45.13746 -29.34997,62.508538 -7.52599,2.24497 -14.64433,5.05725 -21.14976,8.41829 C 10.29978,127.95274 0,139.81474 0,151.22204 c 0,7.54738 4.62535,18.22061 26.66313,26.21129 1.19874,0.43474 2.5386,0.2138 3.53351,-0.58156 l 8.48243,-6.78622 v 8.099 c 0,1.69762 1.17879,3.16719 2.84078,3.54349 14.80113,3.34821 32.74521,5.11853 51.89374,5.11853 22.7861,0 43.35859,-2.43882 59.49102,-7.05277 1.91144,-0.54735 3.06456,-2.68399 2.47874,-4.56691 l -1.92142,-6.18187 h 3.80861 c 2.01549,0 3.64897,-1.62778 3.64897,-3.63329 v -0.764 l 10.31974,5.64165 c 1.21157,0.6628 2.69397,0.58725 3.83142,-0.19528 7.8011,-5.36797 11.75652,-11.70948 11.75652,-18.85063 0,-11.40873 -10.29978,-23.27073 -28.25955,-32.54851 z m -0.80676,-58.524608 c 2.31196,-4.94749 5.61028,-8.50096 9.99617,-10.76589 8.14747,13.22467 8.99271,18.10944 8.99984,19.54479 -1.41682,-0.0228 -6.19184,-0.88373 -18.99601,-8.7789 z m -72.83541,-8.51379 c 8.53516,-13.41851 18.08805,-27.30739 30.26506,-37.75399 6.88742,-5.90962 16.92921,-9.84224 24.44096,-2.5272 3.2969,3.20996 14.72843,17.54071 24.19579,31.88001 -5.26392,2.86787 -9.36473,7.20529 -12.24827,12.95669 -1.05621,-0.69986 -14.86527,-10.05319 -14.88523,-10.06745 -1.64773,-1.11179 -3.95257,-0.67135 -5.06722,0.96926 -1.11036,1.63206 -0.65282,3.9483 0.97354,5.04727 l 8.14603,5.49483 c -1.28854,1.09184 -2.88781,1.79455 -4.61537,1.98128 -1.83161,0.1967 -3.22848,1.72185 -3.25557,3.55632 -0.11118,7.1896 -0.61433,14.19819 -1.10039,20.97729 -0.80106,11.14788 -1.56221,21.734168 -0.42904,31.436718 0.0542,0.46467 -16.09252,4.3417 -19.4279,4.88477 -12.51197,2.03829 -25.34323,1.95704 -37.85093,-0.0684 -1.58501,-0.25657 -18.66818,-3.65324 -18.50426,-4.43008 3.51641,-16.800928 14.54884,-41.049468 29.3628,-64.337298 z M 52.43254,137.10082 c 4.25191,1.57362 8.51379,2.91775 12.72008,4.00674 1.92854,0.50031 3.9483,-0.6799 4.45145,-2.59704 0.50744,-1.93994 -0.66422,-3.92692 -2.61271,-4.43292 -4.49137,-1.16311 -9.06113,-2.64123 -13.62092,-4.39445 0.26797,-1.90715 0.58583,-4.06232 0.95928,-6.47834 26.62179,8.17169 52.86016,8.06622 78.11786,-0.31358 0.39341,2.52862 0.73407,4.84913 1.02485,6.92306 -12.72435,5.05154 -25.68247,7.66854 -38.55791,7.78684 -1.97558,0.0185 -3.63899,1.70618 -3.61761,3.66607 0.0214,1.93282 1.71758,3.60051 3.64897,3.60051 h 0.0328 c 7.30649,-0.0656 14.68424,-0.88231 21.92802,-2.42599 5.82979,-1.24151 11.69808,-2.99473 17.50364,-5.20264 0.1568,1.4824 0.23661,2.53575 0.23661,3.08452 0,6.06784 -15.68628,14.53174 -41.23334,14.53174 -25.54706,0 -41.23334,-8.4639 -41.23334,-14.53174 0,-0.51028 0.0784,-1.58216 0.25229,-3.22278 z m 120.27485,25.68105 c 0,0 -13.66225,-7.46756 -13.68078,-7.47754 -2.38751,-1.30565 -5.40503,0.49461 -5.40503,3.18429 v 3.26982 c 0,0 -5.09431,0 -5.10856,0 -2.39321,0 -4.19061,2.43883 -3.48648,4.70803 l 2.26778,7.29508 c -14.91658,3.79864 -33.44222,5.79844 -53.88073,5.79844 -17.39674,0 -33.71874,-1.49094 -47.43659,-4.32317 0,0 0,-12.73005 0,-12.75143 0,-2.99045 -3.59052,-4.70946 -5.93527,-2.83223 l -12.78992,10.23279 c -8.22016,-3.22136 -19.95387,-9.43743 -19.95387,-18.66391 0,-8.34701 9.32055,-18.34747 24.32265,-26.09725 5.00451,-2.58563 10.39956,-4.8306 16.08112,-6.72066 -1.71615,10.64473 -2.8194,19.21553 -2.8194,21.91947 0,6.42846 5.4763,12.18556 15.42117,16.21083 8.90292,3.60336 20.66229,5.58749 33.11011,5.58749 12.44783,0 24.20721,-1.98413 33.11012,-5.58749 9.94487,-4.02527 15.42117,-9.78237 15.42117,-16.21083 0,-2.67116 -1.12605,-11.40873 -2.81798,-21.91234 5.68014,1.89005 11.07662,4.12932 16.0797,6.71353 15.0021,7.74978 24.32265,17.75024 24.32265,26.09725 0,4.10509 -2.29201,7.9864 -6.82186,11.55983 z m 0,0"
116 |      style="stroke-width:0.36489683" />
117 |   <path
118 |      id="path947"
119 |      d="m 83.92628,140.3236 c 0,-2.00693 -1.63349,-3.63329 -3.64897,-3.63329 -2.01549,0 -3.64897,1.62636 -3.64897,3.63329 0,2.00551 1.63348,3.63187 3.64897,3.63187 2.01548,0 3.64897,-1.62636 3.64897,-3.63187 z m 0,0"
120 |      style="stroke-width:0.36489683" />
121 | </svg>
122 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _site/
2 | .jekyll-metadata


--------------------------------------------------------------------------------
/docs/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | gem "github-pages", group: :jekyll_plugins


--------------------------------------------------------------------------------
/docs/Gemfile.lock:
--------------------------------------------------------------------------------
  1 | GEM
  2 |   remote: https://rubygems.org/
  3 |   specs:
  4 |     activesupport (4.2.11.1)
  5 |       i18n (~> 0.7)
  6 |       minitest (~> 5.1)
  7 |       thread_safe (~> 0.3, >= 0.3.4)
  8 |       tzinfo (~> 1.1)
  9 |     addressable (2.7.0)
 10 |       public_suffix (>= 2.0.2)
 11 |     coffee-script (2.4.1)
 12 |       coffee-script-source
 13 |       execjs
 14 |     coffee-script-source (1.11.1)
 15 |     colorator (1.1.0)
 16 |     commonmarker (0.17.13)
 17 |       ruby-enum (~> 0.5)
 18 |     concurrent-ruby (1.1.5)
 19 |     dnsruby (1.61.3)
 20 |       addressable (~> 2.5)
 21 |     em-websocket (0.5.1)
 22 |       eventmachine (>= 0.12.9)
 23 |       http_parser.rb (~> 0.6.0)
 24 |     ethon (0.12.0)
 25 |       ffi (>= 1.3.0)
 26 |     eventmachine (1.2.7)
 27 |     execjs (2.7.0)
 28 |     faraday (0.17.0)
 29 |       multipart-post (>= 1.2, < 3)
 30 |     ffi (1.11.1)
 31 |     forwardable-extended (2.6.0)
 32 |     gemoji (3.0.1)
 33 |     github-pages (201)
 34 |       activesupport (= 4.2.11.1)
 35 |       github-pages-health-check (= 1.16.1)
 36 |       jekyll (= 3.8.5)
 37 |       jekyll-avatar (= 0.6.0)
 38 |       jekyll-coffeescript (= 1.1.1)
 39 |       jekyll-commonmark-ghpages (= 0.1.6)
 40 |       jekyll-default-layout (= 0.1.4)
 41 |       jekyll-feed (= 0.11.0)
 42 |       jekyll-gist (= 1.5.0)
 43 |       jekyll-github-metadata (= 2.12.1)
 44 |       jekyll-mentions (= 1.4.1)
 45 |       jekyll-optional-front-matter (= 0.3.0)
 46 |       jekyll-paginate (= 1.1.0)
 47 |       jekyll-readme-index (= 0.2.0)
 48 |       jekyll-redirect-from (= 0.14.0)
 49 |       jekyll-relative-links (= 0.6.0)
 50 |       jekyll-remote-theme (= 0.4.0)
 51 |       jekyll-sass-converter (= 1.5.2)
 52 |       jekyll-seo-tag (= 2.5.0)
 53 |       jekyll-sitemap (= 1.2.0)
 54 |       jekyll-swiss (= 0.4.0)
 55 |       jekyll-theme-architect (= 0.1.1)
 56 |       jekyll-theme-cayman (= 0.1.1)
 57 |       jekyll-theme-dinky (= 0.1.1)
 58 |       jekyll-theme-hacker (= 0.1.1)
 59 |       jekyll-theme-leap-day (= 0.1.1)
 60 |       jekyll-theme-merlot (= 0.1.1)
 61 |       jekyll-theme-midnight (= 0.1.1)
 62 |       jekyll-theme-minimal (= 0.1.1)
 63 |       jekyll-theme-modernist (= 0.1.1)
 64 |       jekyll-theme-primer (= 0.5.3)
 65 |       jekyll-theme-slate (= 0.1.1)
 66 |       jekyll-theme-tactile (= 0.1.1)
 67 |       jekyll-theme-time-machine (= 0.1.1)
 68 |       jekyll-titles-from-headings (= 0.5.1)
 69 |       jemoji (= 0.10.2)
 70 |       kramdown (= 1.17.0)
 71 |       liquid (= 4.0.0)
 72 |       listen (= 3.1.5)
 73 |       mercenary (~> 0.3)
 74 |       minima (= 2.5.0)
 75 |       nokogiri (>= 1.10.4, < 2.0)
 76 |       rouge (= 3.11.0)
 77 |       terminal-table (~> 1.4)
 78 |     github-pages-health-check (1.16.1)
 79 |       addressable (~> 2.3)
 80 |       dnsruby (~> 1.60)
 81 |       octokit (~> 4.0)
 82 |       public_suffix (~> 3.0)
 83 |       typhoeus (~> 1.3)
 84 |     html-pipeline (2.12.0)
 85 |       activesupport (>= 2)
 86 |       nokogiri (>= 1.4)
 87 |     http_parser.rb (0.6.0)
 88 |     i18n (0.9.5)
 89 |       concurrent-ruby (~> 1.0)
 90 |     jekyll (3.8.5)
 91 |       addressable (~> 2.4)
 92 |       colorator (~> 1.0)
 93 |       em-websocket (~> 0.5)
 94 |       i18n (~> 0.7)
 95 |       jekyll-sass-converter (~> 1.0)
 96 |       jekyll-watch (~> 2.0)
 97 |       kramdown (~> 1.14)
 98 |       liquid (~> 4.0)
 99 |       mercenary (~> 0.3.3)
100 |       pathutil (~> 0.9)
101 |       rouge (>= 1.7, < 4)
102 |       safe_yaml (~> 1.0)
103 |     jekyll-avatar (0.6.0)
104 |       jekyll (~> 3.0)
105 |     jekyll-coffeescript (1.1.1)
106 |       coffee-script (~> 2.2)
107 |       coffee-script-source (~> 1.11.1)
108 |     jekyll-commonmark (1.3.1)
109 |       commonmarker (~> 0.14)
110 |       jekyll (>= 3.7, < 5.0)
111 |     jekyll-commonmark-ghpages (0.1.6)
112 |       commonmarker (~> 0.17.6)
113 |       jekyll-commonmark (~> 1.2)
114 |       rouge (>= 2.0, < 4.0)
115 |     jekyll-default-layout (0.1.4)
116 |       jekyll (~> 3.0)
117 |     jekyll-feed (0.11.0)
118 |       jekyll (~> 3.3)
119 |     jekyll-gist (1.5.0)
120 |       octokit (~> 4.2)
121 |     jekyll-github-metadata (2.12.1)
122 |       jekyll (~> 3.4)
123 |       octokit (~> 4.0, != 4.4.0)
124 |     jekyll-mentions (1.4.1)
125 |       html-pipeline (~> 2.3)
126 |       jekyll (~> 3.0)
127 |     jekyll-optional-front-matter (0.3.0)
128 |       jekyll (~> 3.0)
129 |     jekyll-paginate (1.1.0)
130 |     jekyll-readme-index (0.2.0)
131 |       jekyll (~> 3.0)
132 |     jekyll-redirect-from (0.14.0)
133 |       jekyll (~> 3.3)
134 |     jekyll-relative-links (0.6.0)
135 |       jekyll (~> 3.3)
136 |     jekyll-remote-theme (0.4.0)
137 |       addressable (~> 2.0)
138 |       jekyll (~> 3.5)
139 |       rubyzip (>= 1.2.1, < 3.0)
140 |     jekyll-sass-converter (1.5.2)
141 |       sass (~> 3.4)
142 |     jekyll-seo-tag (2.5.0)
143 |       jekyll (~> 3.3)
144 |     jekyll-sitemap (1.2.0)
145 |       jekyll (~> 3.3)
146 |     jekyll-swiss (0.4.0)
147 |     jekyll-theme-architect (0.1.1)
148 |       jekyll (~> 3.5)
149 |       jekyll-seo-tag (~> 2.0)
150 |     jekyll-theme-cayman (0.1.1)
151 |       jekyll (~> 3.5)
152 |       jekyll-seo-tag (~> 2.0)
153 |     jekyll-theme-dinky (0.1.1)
154 |       jekyll (~> 3.5)
155 |       jekyll-seo-tag (~> 2.0)
156 |     jekyll-theme-hacker (0.1.1)
157 |       jekyll (~> 3.5)
158 |       jekyll-seo-tag (~> 2.0)
159 |     jekyll-theme-leap-day (0.1.1)
160 |       jekyll (~> 3.5)
161 |       jekyll-seo-tag (~> 2.0)
162 |     jekyll-theme-merlot (0.1.1)
163 |       jekyll (~> 3.5)
164 |       jekyll-seo-tag (~> 2.0)
165 |     jekyll-theme-midnight (0.1.1)
166 |       jekyll (~> 3.5)
167 |       jekyll-seo-tag (~> 2.0)
168 |     jekyll-theme-minimal (0.1.1)
169 |       jekyll (~> 3.5)
170 |       jekyll-seo-tag (~> 2.0)
171 |     jekyll-theme-modernist (0.1.1)
172 |       jekyll (~> 3.5)
173 |       jekyll-seo-tag (~> 2.0)
174 |     jekyll-theme-primer (0.5.3)
175 |       jekyll (~> 3.5)
176 |       jekyll-github-metadata (~> 2.9)
177 |       jekyll-seo-tag (~> 2.0)
178 |     jekyll-theme-slate (0.1.1)
179 |       jekyll (~> 3.5)
180 |       jekyll-seo-tag (~> 2.0)
181 |     jekyll-theme-tactile (0.1.1)
182 |       jekyll (~> 3.5)
183 |       jekyll-seo-tag (~> 2.0)
184 |     jekyll-theme-time-machine (0.1.1)
185 |       jekyll (~> 3.5)
186 |       jekyll-seo-tag (~> 2.0)
187 |     jekyll-titles-from-headings (0.5.1)
188 |       jekyll (~> 3.3)
189 |     jekyll-watch (2.2.1)
190 |       listen (~> 3.0)
191 |     jemoji (0.10.2)
192 |       gemoji (~> 3.0)
193 |       html-pipeline (~> 2.2)
194 |       jekyll (~> 3.0)
195 |     kramdown (1.17.0)
196 |     liquid (4.0.0)
197 |     listen (3.1.5)
198 |       rb-fsevent (~> 0.9, >= 0.9.4)
199 |       rb-inotify (~> 0.9, >= 0.9.7)
200 |       ruby_dep (~> 1.2)
201 |     mercenary (0.3.6)
202 |     mini_portile2 (2.4.0)
203 |     minima (2.5.0)
204 |       jekyll (~> 3.5)
205 |       jekyll-feed (~> 0.9)
206 |       jekyll-seo-tag (~> 2.1)
207 |     minitest (5.12.2)
208 |     multipart-post (2.1.1)
209 |     nokogiri (1.10.8)
210 |       mini_portile2 (~> 2.4.0)
211 |     octokit (4.14.0)
212 |       sawyer (~> 0.8.0, >= 0.5.3)
213 |     pathutil (0.16.2)
214 |       forwardable-extended (~> 2.6)
215 |     public_suffix (3.1.1)
216 |     rb-fsevent (0.10.3)
217 |     rb-inotify (0.10.0)
218 |       ffi (~> 1.0)
219 |     rouge (3.11.0)
220 |     ruby-enum (0.7.2)
221 |       i18n
222 |     ruby_dep (1.5.0)
223 |     rubyzip (2.0.0)
224 |     safe_yaml (1.0.5)
225 |     sass (3.7.4)
226 |       sass-listen (>= 4.0.0)
227 |     sass-listen (4.0.0)
228 |       rb-inotify (>= 0.9.7, >= 0.9)
229 |     sawyer (0.8.2)
230 |       addressable (>= 2.3.5)
231 |       faraday (> 0.8, < 2.0)
232 |     terminal-table (1.8.0)
233 |       unicode-display_width (~> 1.1, >= 1.1.1)
234 |     thread_safe (0.3.6)
235 |     typhoeus (1.3.1)
236 |       ethon (>= 0.9.0)
237 |     tzinfo (1.2.5)
238 |       thread_safe (~> 0.1)
239 |     unicode-display_width (1.6.0)
240 | 
241 | PLATFORMS
242 |   ruby
243 | 
244 | DEPENDENCIES
245 |   github-pages
246 | 
247 | BUNDLED WITH
248 |    2.0.1
249 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | title: Instamancer
2 | logo: /assets/img/logo.png
3 | description: Scrape Instagram's API with Puppeteer.
4 | show_downloads: false
5 | theme: jekyll-theme-minimal
6 | repository: ScriptSmith/instamancer
7 | google_analytics: UA-79900226-3
8 | 


--------------------------------------------------------------------------------
/docs/_layouts/default.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="{{ site.lang | default: "en-US" }}">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 7 | 
 8 |     {% if site.logo %}
 9 |     <link rel="shortcut icon" type="image/png" href="{{site.logo | relative_url}}"/>
10 |     {% endif %}
11 | 
12 |     {% seo %}
13 |     <link rel="stylesheet" href="{{ "/assets/css/style.css?v=" | append: site.github.build_revision | relative_url }}">
14 |     <!--[if lt IE 9]>
15 |     <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv.min.js"></script>
16 |     <![endif]-->
17 | </head>
18 | <body>
19 | <div class="wrapper">
20 |     <header>
21 |         <h1>
22 |             {% if site.logo %}
23 |             <img src="{{site.logo | relative_url}}" alt="Logo" height="28px"/>
24 |             {% endif %}
25 |             <a href="{{ "/" | absolute_url }}">{{ site.title | default: site.github.repository_name }}</a>
26 |         </h1>
27 | 
28 |         <p>{{ site.description | default: site.github.project_tagline }}</p>
29 | 
30 |         {% if site.github.is_project_page %}
31 |         <p class="view"><a href="{{ site.github.repository_url }}">View the Project on GitHub <small>{{ site.github.repository_nwo }}</small></a></p>
32 |         {% endif %}
33 | 
34 |         {% if site.github.is_user_page %}
35 |         <p class="view"><a href="{{ site.github.owner_url }}">View My GitHub Profile</a></p>
36 |         {% endif %}
37 | 
38 |         {% if site.show_downloads %}
39 |         <ul class="downloads">
40 |             <li><a href="{{ site.github.zip_url }}">Download <strong>ZIP File</strong></a></li>
41 |             <li><a href="{{ site.github.tar_url }}">Download <strong>TAR Ball</strong></a></li>
42 |             <li><a href="{{ site.github.repository_url }}">View On <strong>GitHub</strong></a></li>
43 |         </ul>
44 |         {% endif %}
45 |     </header>
46 |     <section>
47 | 
48 |         {{ content }}
49 | 
50 |     </section>
51 |     <footer>
52 |         {% if site.github.is_project_page %}
53 |         <p>This project is maintained by <a href="{{ site.github.owner_url }}">{{ site.github.owner_name }}</a></p>
54 |         {% endif %}
55 |         <p><small>Hosted on GitHub Pages &mdash; Theme by <a href="https://github.com/orderedlist">orderedlist</a></small></p>
56 |     </footer>
57 | </div>
58 | <script src="{{ "/assets/js/scale.fix.js" | relative_url }}"></script>
59 | {% if site.google_analytics %}
60 | <script>
61 | (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
62 |     (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
63 |     m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
64 | })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
65 | ga('create', '{{ site.google_analytics }}', 'auto');
66 | ga('send', 'pageview');
67 | </script>
68 | {% endif %}
69 | </body>
70 | </html>
71 | 


--------------------------------------------------------------------------------
/docs/api-change.md:
--------------------------------------------------------------------------------
 1 | # The Instagram API has changed
 2 | Because of the way instamancer works, when Instagram changes the API for their web frontend, the data that Instamancer gathers will be affected.
 3 | 
 4 | If you see this warning, you can:
 5 | 
 6 | - Check for [updates](https://github.com/ScriptSmith/instamancer/releases). A new version of instamancer may have been released.
 7 | - Look reports in [open issues](https://github.com/ScriptSmith/instamancer/issues). Maybe someone else is having this problem, and is already working on a fix.
 8 | - Open a [new issue](https://github.com/ScriptSmith/instamancer/issues/new/choose) if you can't find an existing one.
 9 | - Create a fork of the repository and [fix the typings](https://github.com/ScriptSmith/instamancer/blob/master/utils/validation-generator/README.md#fix-typings) yourself.
10 | 


--------------------------------------------------------------------------------
/docs/assets/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScriptSmith/instamancer/1c63cad47886d0831ae6cc44812b72aefa7414a9/docs/assets/img/logo.png


--------------------------------------------------------------------------------
/docs/capture.webm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScriptSmith/instamancer/1c63cad47886d0831ae6cc44812b72aefa7414a9/docs/capture.webm


--------------------------------------------------------------------------------
/docs/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScriptSmith/instamancer/1c63cad47886d0831ae6cc44812b72aefa7414a9/docs/favicon.ico


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
  1 | <video src="capture.webm" width="500px" autoplay loop></video>
  2 | 
  3 | <hr>
  4 | 
  5 | # About Instamancer
  6 | 
  7 | Instamancer is a scraping tool used in Instagram data mining and analysis projects.
  8 | 
  9 | Traditional Instagram scrapers either use a browser to access a web-page and read the DOM, or they manually reimplement the requests that browsers make to an API endpoint. This isn't ideal because:
 10 |  
 11 | 1. Reading the DOM ignores some information that's only stored in memory.
 12 | 2. Reimplementing requests requires the deciphering and reproduction of pagination and authentication mechanisms.
 13 | 3. Both methods don't easily tolerate changes to the front and back end.
 14 | 
 15 | Instamancer is unique because it doesn't read the DOM or reimplement requests. Using [Puppeteer](https://github.com/GoogleChrome/puppeteer/) it interacts with Instagram.com, then intercepts and saves the responses to requests that the page's JavaScript initiates. This means that it can retrieve the full amount of information from the API while tolerating failed requests and rate limits, without having to reimplement client-side code. This makes it much better at withstanding regular changes to the interface and API.
 16 | 
 17 | As browsers become more and more like black boxes, this new scraping method will become increasingly relevant.
 18 | 
 19 | Instamancer also comes with some clever tricks:
 20 | 
 21 | - Because using a browser consumes lots of memory in large scraping jobs, Instamancer employs a new scraping technique called *grafting*. It intercepts and saves the URL and headers of each request, and then after a certain number of interactions with the page it will restart the browser and navigate back to the same page. Once the page initiates the first request to the API, its URL and headers are swapped on-the-fly with the most recently saved ones. The scraping continues without incident because the response from the API is in the correct form despite being for the incorrect data.
 22 | - Requests from pages for media and other non-API urls are intercepted and aborted to speed up scraping and conserve resources.
 23 | - Instagram sends limited information through its feed API. To get extra information like the location, tagged users, and comments, Instamancer can open new tabs for each post that it scrapes, and then read the metadata from memory.
 24 | 
 25 | # Installation
 26 | 
 27 | To get started with Instamancer, follow the installation instructions [here](https://github.com/ScriptSmith/instamancer#Install)
 28 | 
 29 | # Output
 30 | 
 31 | ## Metadata
 32 | 
 33 | Instamancer outputs metadata into JSON and CSV files.
 34 | 
 35 | Here's a sample of output without `--full` mode:
 36 | 
 37 | ```json
 38 | [
 39 |   {
 40 |     "node": {
 41 |       "comments_disabled": false,
 42 |       "__typename": "GraphImage",
 43 |       "id": "1953636359851103977",
 44 |       "edge_media_to_caption": {
 45 |         "edges": [
 46 |           {
 47 |             "node": {
 48 |               "text": "Love my #dogs"
 49 |             }
 50 |           }
 51 |         ]
 52 |       },
 53 |       "shortcode": "BsrrAClca9F",
 54 |       "edge_media_to_comment": {
 55 |         "count": 1
 56 |       },
 57 |       "taken_at_timestamp": 1547102918,
 58 |       "dimensions": {
 59 |         "height": 1350,
 60 |         "width": 1080
 61 |       },
 62 |       "display_url": "https://instagram.fbne3-1.fna.fbcdn.net/vp/5edccf8779ca7659a5ee7bb3e5bb0ec4/5CD38B5F/t51.2885-15/e35/49522041_130894740706474_725467490028727537_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
 63 |       "edge_liked_by": {
 64 |         "count": 3
 65 |       },
 66 |       "edge_media_preview_like": {
 67 |         "count": 3
 68 |       },
 69 |       "owner": {
 70 |         "id": "1838071775"
 71 |       },
 72 |       "thumbnail_src": "https://instagram.fbne3-1.fna.fbcdn.net/vp/5d074edce4bd1bdb02cadb670dd62571/5CBF791C/t51.2885-15/sh0.08/e35/c0.135.1080.1080/s640x640/49522041_130894740706474_725467490028727537_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
 73 |       "thumbnail_resources": [
 74 |         {
 75 |           "src": "https://instagram.fbne3-1.fna.fbcdn.net/vp/418024ac735200f61193e0de0bc2b79f/5CC9DD07/t51.2885-15/e35/c0.135.1080.1080/s150x150/49522041_130894740706474_725467490028727537_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
 76 |           "config_width": 150,
 77 |           "config_height": 150
 78 |         },
 79 |         {
 80 |           "src": "https://instagram.fbne3-1.fna.fbcdn.net/vp/ca0843efc1fa41da05f401d1d2d99c80/5CC6C84D/t51.2885-15/e35/c0.135.1080.1080/s240x240/49522041_130894740706474_725467490028727537_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
 81 |           "config_width": 240,
 82 |           "config_height": 240
 83 |         },
 84 |         {
 85 |           "src": "https://instagram.fbne3-1.fna.fbcdn.net/vp/5560c9aa0cbaf43d93b9f57da63f46ae/5CD068F7/t51.2885-15/e35/c0.135.1080.1080/s320x320/49522041_130894740706474_725467490028727537_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
 86 |           "config_width": 320,
 87 |           "config_height": 320
 88 |         },
 89 |         {
 90 |           "src": "https://instagram.fbne3-1.fna.fbcdn.net/vp/1842510041138b9f71cba3a7e7991f47/5CCEDFAD/t51.2885-15/e35/c0.135.1080.1080/s480x480/49522041_130894740706474_725467490028727537_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
 91 |           "config_width": 480,
 92 |           "config_height": 480
 93 |         },
 94 |         {
 95 |           "src": "https://instagram.fbne3-1.fna.fbcdn.net/vp/5d074edce4bd1bdb02cadb670dd62571/5CBF791C/t51.2885-15/sh0.08/e35/c0.135.1080.1080/s640x640/49522041_130894740706474_725467490028727537_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
 96 |           "config_width": 640,
 97 |           "config_height": 640
 98 |         }
 99 |       ],
100 |       "is_video": false,
101 |       "accessibility_caption": "Image may contain: 1 person, dog, outdoor, closeup, water and nature"
102 |     }
103 |   }
104 | ]
105 | ```
106 | 
107 | And with `--full` mode:
108 | 
109 | ```json
110 | [
111 |   {
112 |     "shortcode_media": {
113 |       "__typename": "GraphImage",
114 |       "id": "1958565413572638000",
115 |       "shortcode": "BsHcdeHyEgY",
116 |       "dimensions": {
117 |         "height": 1349,
118 |         "width": 1080
119 |       },
120 |       "gating_info": null,
121 |       "media_preview": "ACEqQWKuuSmQWblCFPU8YPGPQc/WpoLWFSGRSpzwWJB7cH056eh9qvwlU3KeNrn8m+YfzxUMk8e4gfNn7wHT/wDXWd/IuwGMEkgsPXJ6H3zntR5Jzwcj04/nj/61EU6sflOT6Hr/APXHp3Hv0q0u05wMHuPShyt0CxT2D3/Sirm3/OKKXMPlM1sgEjq2B9akEIhXL/KfXGc/l/KoTcQHlWJOeBtPH/6qtI5ZQT0HXP8AP/69Uk3sK6RlzRuMMePccf59s81dUtcEGV9igAfLwT6kt2B9KDdI+VABx0yOPc89hWRK/mOxXkHpgZz/APWoa7iuafl2v/PWisjn+7RRYdyM8+/+f896lhmki+UHcB/CeR/n8cVWJI6U89BVElmS5aRQDg47AYHr+P0qMEkZ7UHq30/pSpSAb8vvRU2BRQM//9k=",
122 |       "display_url": "https://instagram.fbne3-1.fna.fbcdn.net/vp/ff493b6b24e6e2be7df1ec9644d5339c/5CD16638/t51.2885-15/e35/49472607_1820670783526329_6546442839896910927_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
123 |       "display_resources": [
124 |         {
125 |           "src": "https://instagram.fbne3-1.fna.fbcdn.net/vp/5150c80ce526c6f6bd4da78e4f57979f/5CBBD552/t51.2885-15/sh0.08/e35/p640x640/49472607_1820670783526329_6546442839896910927_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
126 |           "config_width": 640,
127 |           "config_height": 799
128 |         },
129 |         {
130 |           "src": "https://instagram.fbne3-1.fna.fbcdn.net/vp/54585546542f3fae7f25ab23d219fd75/5CB87296/t51.2885-15/sh0.08/e35/p750x750/49472607_1820670783526329_6546442839896910927_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
131 |           "config_width": 750,
132 |           "config_height": 937
133 |         },
134 |         {
135 |           "src": "https://instagram.fbne3-1.fna.fbcdn.net/vp/ff493b6b24e6e2be7df1ec9644d5339c/5CD16638/t51.2885-15/e35/49472607_1820670783526329_6546442839896910927_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
136 |           "config_width": 1080,
137 |           "config_height": 1349
138 |         }
139 |       ],
140 |       "accessibility_caption": "Image may contain: dog",
141 |       "is_video": false,
142 |       "should_log_client_event": false,
143 |       "tracking_token": "eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FdGGelBj5c190cmFjJa2VkIjpmYWxzZSwidXVpZCI6IjRlODVlYjAyYzdmYjRmMmViNWYwNzg1ODZlZjRhZTEwMTk1MzU2NDE4NDYzNTI2MzAwMCJ9LCJzaWduYXR1cmUiOiIifQ==",
144 |       "edge_media_to_tagged_user": {
145 |         "edges": []
146 |       },
147 |       "edge_media_to_caption": {
148 |         "edges": [
149 |           {
150 |             "node": {
151 |               "text": "Cool pic #dogs 👌🏻"
152 |             }
153 |           }
154 |         ]
155 |       },
156 |       "caption_is_edited": false,
157 |       "has_ranked_comments": false,
158 |       "edge_media_to_comment": {
159 |         "count": 0,
160 |         "page_info": {
161 |           "has_next_page": false,
162 |           "end_cursor": null
163 |         },
164 |         "edges": []
165 |       },
166 |       "comments_disabled": false,
167 |       "taken_at_timestamp": 1547103020,
168 |       "edge_media_preview_like": {
169 |         "count": 3,
170 |         "edges": []
171 |       },
172 |       "edge_media_to_sponsor_user": {
173 |         "edges": []
174 |       },
175 |       "location": null,
176 |       "viewer_has_liked": false,
177 |       "viewer_has_saved": false,
178 |       "viewer_has_saved_to_collection": false,
179 |       "viewer_in_photo_of_you": false,
180 |       "viewer_can_reshare": true,
181 |       "owner": {
182 |         "id": "7050323018",
183 |         "is_verified": false,
184 |         "profile_pic_url": "https://instagram.fbne3-1.fna.fbcdn.net/vp/0859933bacb7ef085efcd513c7336f21/5CCBC50C/t51.2885-19/s150x150/47446882_612896971943840_3814256767933636272_n.jpg?_nc_ht=instagram.fbne3-1.fna.fbcdn.net",
185 |         "username": "user.name",
186 |         "blocked_by_viewer": false,
187 |         "followed_by_viewer": false,
188 |         "full_name": "Full name",
189 |         "has_blocked_viewer": false,
190 |         "is_private": false,
191 |         "is_unpublished": false,
192 |         "requested_by_viewer": false
193 |       },
194 |       "is_ad": false,
195 |       "edge_web_media_to_related_media": {
196 |         "edges": []
197 |       }
198 |     }
199 |   }
200 | ]
201 | 
202 | ```
203 | 
204 | ## Media
205 | To download media as well as scrape metadata, include the `-d` flag. By default, Instamancer downloads the highest-quality image available for each post.
206 | 
207 | By enabling full mode with `--full`, all images in albums are downloaded as well. 
208 | 
209 | Videos are downloaded when the `--video` flag is used along with `--full`.
210 | 
211 | The default download location for media is `downloads/[endpoint]/[id]`. This can be changed with the `--downdir` flag.
212 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | See the command-line interface in action [here](https://scriptsmith.github.io/instamancer), and instructions and examples [here](../README.md#command-line)
 4 | 
 5 | |Name|Description|
 6 | |---------------------------------------|----------------------------------------------------------|
 7 | |[Express server](server.ts)            |Express server acting as an API endpoint                  |
 8 | |[Page complexity plugin](complexity.ts)|Plugin that outputs the number of DOM elements on the page|
 9 | 
10 | Please suggest more examples with a pull request
11 | 


--------------------------------------------------------------------------------
/examples/complexity.ts:
--------------------------------------------------------------------------------
 1 | import * as instamancer from "instamancer";
 2 | import {Response} from "puppeteer";
 3 | 
 4 | class Complexity<PostType> implements instamancer.IPlugin<PostType> {
 5 |     private query: string;
 6 | 
 7 |     constructor(query: string) {
 8 |         this.query = query;
 9 |     }
10 | 
11 |     public async responseEvent(
12 |         this: instamancer.IPluginContext<Complexity<PostType>, PostType>,
13 |         res: Response,
14 |         data: {[key: string]: any},
15 |     ): Promise<void> {
16 |         const elementCount = await this.state.page.evaluate((query) => {
17 |             return document.querySelectorAll(query).length;
18 |         }, this.plugin.query);
19 |         process.stdout.write(
20 |             `${this.plugin.query} elements: ${elementCount}\n`,
21 |         );
22 |     }
23 | }
24 | 
25 | const user = instamancer.createApi("user", "therock", {
26 |     enableGrafting: false,
27 |     plugins: [
28 |         new Complexity("div"),
29 |         new Complexity("span"),
30 |         new Complexity("img"),
31 |     ],
32 |     silent: true,
33 |     total: 500,
34 | });
35 | 
36 | (async () => {
37 |     const posts: instamancer.TPost[] = [];
38 |     for await (const post of user.generator()) {
39 |         posts.push(post);
40 |     }
41 | 
42 |     process.stdout.write(`Total posts ${posts.length}`);
43 | })();
44 | 


--------------------------------------------------------------------------------
/examples/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "instamancer-examples",
 3 |     "version": "1.0.0",
 4 |     "description": "Examples of instamancer usage",
 5 |     "author": "ScriptSmith",
 6 |     "license": "MIT-0",
 7 |     "dependencies": {
 8 |         "express": "^4.17.1",
 9 |         "instamancer": "file:..",
10 |         "puppeteer": "^1.20.0"
11 |     },
12 |     "devDependencies": {
13 |         "@types/puppeteer": "^1.20.2"
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/examples/server.ts:
--------------------------------------------------------------------------------
 1 | import express from "express";
 2 | import * as instamancer from "instamancer";
 3 | 
 4 | const app = express();
 5 | const port = 3000;
 6 | 
 7 | async function getPosts(tag: string): Promise<instamancer.TPost[]> {
 8 |     const hashtag = instamancer.createApi("hashtag", tag, {
 9 |         total: 5,
10 |     });
11 |     const posts = [];
12 | 
13 |     for await (const post of hashtag.generator()) {
14 |         posts.push(post);
15 |     }
16 | 
17 |     return posts;
18 | }
19 | 
20 | let cachedPosts: instamancer.TPost[] = [];
21 | 
22 | async function getCached() {
23 |     cachedPosts = await getPosts("puppies");
24 | }
25 | setTimeout(getCached, 3000);
26 | 
27 | app.get("/cached", async (req, res) => {
28 |     res.json(cachedPosts);
29 | });
30 | 
31 | app.get("/live", async (req, res) => {
32 |     if ("tag" in req.params) {
33 |         const posts = await getPosts(req.params.tag);
34 |         res.json(posts);
35 |     }
36 | });
37 | 
38 | app.listen(port, () =>
39 |     process.stdout.write(`Example app listening on port ${port}!\n`),
40 | );
41 | 


--------------------------------------------------------------------------------
/examples/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         /* Basic Options */
 4 |         // "incremental": true,                   /* Enable incremental compilation */
 5 |         "target": "es5" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019' or 'ESNEXT'. */,
 6 |         "module": "commonjs" /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */,
 7 |         // "lib": [],                             /* Specify library files to be included in the compilation. */
 8 |         // "allowJs": true,                       /* Allow javascript files to be compiled. */
 9 |         // "checkJs": true,                       /* Report errors in .js files. */
10 |         // "jsx": "preserve",                     /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */
11 |         // "declaration": true,                   /* Generates corresponding '.d.ts' file. */
12 |         // "declarationMap": true,                /* Generates a sourcemap for each corresponding '.d.ts' file. */
13 |         // "sourceMap": true,                     /* Generates corresponding '.map' file. */
14 |         // "outFile": "./",                       /* Concatenate and emit output to single file. */
15 |         // "outDir": "./",                        /* Redirect output structure to the directory. */
16 |         // "rootDir": "./",                       /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */
17 |         // "composite": true,                     /* Enable project compilation */
18 |         // "tsBuildInfoFile": "./",               /* Specify file to store incremental compilation information */
19 |         // "removeComments": true,                /* Do not emit comments to output. */
20 |         // "noEmit": true,                        /* Do not emit outputs. */
21 |         // "importHelpers": true,                 /* Import emit helpers from 'tslib'. */
22 |         // "downlevelIteration": true,            /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */
23 |         // "isolatedModules": true,               /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */
24 | 
25 |         /* Strict Type-Checking Options */
26 |         "strict": true /* Enable all strict type-checking options. */,
27 |         // "noImplicitAny": true,                 /* Raise error on expressions and declarations with an implied 'any' type. */
28 |         // "strictNullChecks": true,              /* Enable strict null checks. */
29 |         // "strictFunctionTypes": true,           /* Enable strict checking of function types. */
30 |         // "strictBindCallApply": true,           /* Enable strict 'bind', 'call', and 'apply' methods on functions. */
31 |         // "strictPropertyInitialization": true,  /* Enable strict checking of property initialization in classes. */
32 |         // "noImplicitThis": true,                /* Raise error on 'this' expressions with an implied 'any' type. */
33 |         // "alwaysStrict": true,                  /* Parse in strict mode and emit "use strict" for each source file. */
34 | 
35 |         /* Additional Checks */
36 |         // "noUnusedLocals": true,                /* Report errors on unused locals. */
37 |         // "noUnusedParameters": true,            /* Report errors on unused parameters. */
38 |         // "noImplicitReturns": true,             /* Report error when not all code paths in function return a value. */
39 |         // "noFallthroughCasesInSwitch": true,    /* Report errors for fallthrough cases in switch statement. */
40 | 
41 |         /* Module Resolution Options */
42 |         // "moduleResolution": "node",            /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */
43 |         // "baseUrl": "./",                       /* Base directory to resolve non-absolute module names. */
44 |         // "paths": {},                           /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */
45 |         // "rootDirs": [],                        /* List of root folders whose combined content represents the structure of the project at runtime. */
46 |         // "typeRoots": [],                       /* List of folders to include type definitions from. */
47 |         // "types": [],                           /* Type declaration files to be included in compilation. */
48 |         // "allowSyntheticDefaultImports": true,  /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */
49 |         "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */
50 |         // "preserveSymlinks": true,              /* Do not resolve the real path of symlinks. */
51 |         // "allowUmdGlobalAccess": true,          /* Allow accessing UMD globals from modules. */
52 | 
53 |         /* Source Map Options */
54 |         // "sourceRoot": "",                      /* Specify the location where debugger should locate TypeScript files instead of source locations. */
55 |         // "mapRoot": "",                         /* Specify the location where debugger should locate map files instead of generated locations. */
56 |         // "inlineSourceMap": true,               /* Emit a single file with source maps instead of having a separate file. */
57 |         // "inlineSources": true,                 /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */
58 | 
59 |         /* Experimental Options */
60 |         // "experimentalDecorators": true,        /* Enables experimental support for ES7 decorators. */
61 |         // "emitDecoratorMetadata": true,         /* Enables experimental support for emitting type metadata for decorators. */
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/examples/tslint.json:
--------------------------------------------------------------------------------
1 | {
2 |     "defaultSeverity": "error",
3 |     "extends": ["tslint:recommended"],
4 |     "jsRules": {},
5 |     "rules": {},
6 |     "rulesDirectory": []
7 | }
8 | 


--------------------------------------------------------------------------------
/index.ts:
--------------------------------------------------------------------------------
 1 | import {createApi} from "./src/api/api";
 2 | 
 3 | export {
 4 |     Hashtag,
 5 |     Post,
 6 |     User,
 7 |     IOptions,
 8 |     createApi,
 9 |     IOptionsCommon,
10 |     IOptionsFullApi,
11 |     IOptionsRegular,
12 | } from "./src/api/api";
13 | export {Instagram} from "./src/api/instagram";
14 | export {TSearchResult, ISearchOptions} from "./src/api/search";
15 | export {TPost, TSinglePost, TFullApiPost} from "./src/api/types";
16 | 
17 | export * from "./plugins";
18 | 


--------------------------------------------------------------------------------
/man/instamancer.1:
--------------------------------------------------------------------------------
 1 | .\" Manpage for instamancer.
 2 | .TH Instamancer 1
 3 | .SH NAME
 4 | instamancer \- Scrape Instagram's API with Puppeteer
 5 | .SH SYNOPSIS
 6 | .B instamancer
 7 | [\fIoptions\fR]
 8 | .IR command
 9 | .IR query
10 | .SH DESCRIPTION
11 | Instamancer is an Instagram scraper that uses Puppeteer to control a chromium / chrome browser instance and intercept requests made to APIs.
12 | 
13 | Instamancer scrapes hashtags, users, search results, and individual posts.
14 | 
15 | Both data and media can be scraped, and then saved to disk or uploaded to external object storage.
16 | 
17 | The plugin system can be used to extend instamancer and add other functionality.
18 | .SH OPTIONS
19 | .TP
20 | .BR \-h ", " \-\-help
21 | Show the list of options and examples
22 | .SH SEE ALSO
23 | The Instamancer project and further documentation can be accessed at https://github.com/ScriptSmith/instamancer
24 | .SH BUGS
25 | Please report bugs at
26 | https://github.com/ScriptSmith/instamancer/issues
27 | .SH AUTHOR
28 | Adam Smith https://github.com/ScriptSmith
29 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "name": "instamancer",
  3 |     "version": "3.3.1",
  4 |     "description": "Scrape the Instagram API with Puppeteer",
  5 |     "main": "index.js",
  6 |     "types": "index.d.ts",
  7 |     "bin": {
  8 |         "instamancer": "src/cli.js"
  9 |     },
 10 |     "man": [
 11 |         "./man/instamancer.1"
 12 |     ],
 13 |     "files": [
 14 |         "index.js",
 15 |         "index.d.ts",
 16 |         "src/**/*.js",
 17 |         "src/**/*.d.ts",
 18 |         "plugins/*.js",
 19 |         "plugins/*.d.ts",
 20 |         "plugins/**/*.js",
 21 |         "plugins/**/*.d.ts"
 22 |     ],
 23 |     "scripts": {
 24 |         "build": "tsc",
 25 |         "prepack": "tsc --declaration",
 26 |         "test": "jest --env=node",
 27 |         "test:ci": "jest --forceExit --env=node",
 28 |         "lint": "tslint -p tsconfig.json -p tests/tsconfig.json",
 29 |         "lint:fix": "npm run lint -- --fix",
 30 |         "prettier": "prettier --write \"{src,tests}/**/*.ts\"",
 31 |         "clean": "rimraf src/**/*{.js,.d.ts} src/*{.js,.d.ts} plugins/**/*{.js,.d.ts} plugins/*{.js,.d.ts} tests/**/*{.js,.d.ts} tests/*{.js,.d.ts} examples/*{.js,.d.ts} index{.js,.d.ts} *.log"
 32 |     },
 33 |     "author": "ScriptSmith",
 34 |     "license": "MIT",
 35 |     "keywords": [
 36 |         "instagram",
 37 |         "instagram api",
 38 |         "data mining",
 39 |         "scraping"
 40 |     ],
 41 |     "dependencies": {
 42 |         "await-lock": "^2.0.1",
 43 |         "aws-sdk": "^2.715.0",
 44 |         "axios": "^0.19.2",
 45 |         "chalk": "^4.1.0",
 46 |         "env-paths": "^2.2.0",
 47 |         "fp-ts": "^2.7.0",
 48 |         "io-ts": "^2.2.9",
 49 |         "io-ts-excess": "^1.0.1",
 50 |         "json2csv": "^5.0.1",
 51 |         "lodash": "^4.17.19",
 52 |         "puppeteer": "^5.2.0",
 53 |         "tmp": "^0.2.1",
 54 |         "uuid": "^8.2.0",
 55 |         "winston": "^3.3.3",
 56 |         "yargs": "^15.4.1"
 57 |     },
 58 |     "engines": {
 59 |         "node": ">=10.15.0"
 60 |     },
 61 |     "repository": {
 62 |         "type": "git",
 63 |         "url": "git@github.com:ScriptSmith/instamancer.git"
 64 |     },
 65 |     "devDependencies": {
 66 |         "@types/aws-sdk": "^2.7.0",
 67 |         "@types/concat-stream": "^1.6.0",
 68 |         "@types/express": "^4.17.7",
 69 |         "@types/jest": "^26.0.4",
 70 |         "@types/json2csv": "^5.0.1",
 71 |         "@types/node": "^14.0.23",
 72 |         "@types/tmp": "^0.2.0",
 73 |         "@types/uuid": "^8.0.0",
 74 |         "@types/yargs": "^15.0.5",
 75 |         "express": "^4.17.1",
 76 |         "husky": "^4.2.5",
 77 |         "jest": "^26.1.0",
 78 |         "lint-staged": "^10.2.11",
 79 |         "prettier": "^2.0.5",
 80 |         "rimraf": "^3.0.2",
 81 |         "transform-json-types": "^0.7.0",
 82 |         "ts-jest": "^26.1.3",
 83 |         "tslint": "^6.1.2",
 84 |         "typescript": "^3.9.7"
 85 |     },
 86 |     "jest": {
 87 |         "coverageDirectory": "./coverage/",
 88 |         "collectCoverage": true,
 89 |         "preset": "ts-jest",
 90 |         "transform": {
 91 |             "^.+\\.(ts|tsx)$": "ts-jest"
 92 |         }
 93 |     },
 94 |     "husky": {
 95 |         "hooks": {
 96 |             "pre-commit": "lint-staged && npm run lint"
 97 |         }
 98 |     },
 99 |     "prettier": {
100 |         "trailingComma": "all",
101 |         "arrowParens": "always",
102 |         "bracketSpacing": false,
103 |         "tabWidth": 4
104 |     },
105 |     "lint-staged": {
106 |         "*.json": [
107 |             "prettier --write",
108 |             "git add"
109 |         ],
110 |         "*.ts": [
111 |             "prettier --write",
112 |             "tslint --fix",
113 |             "git add"
114 |         ]
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/plugins/README.md:
--------------------------------------------------------------------------------
 1 | # Plugins
 2 | 
 3 | Plugins allow you to modify instamancer's functionality and behavior while gathering data.
 4 | 
 5 | The following internal plugins are included with instamancer (but not enabled by default):
 6 | 
 7 | |Plugin    |Description                                                        |
 8 | |----------|-------------------------------------------------------------------|
 9 | |LargeFirst|Increase the `first` parameter in API requests to ask for more data|
10 | 
11 | ## Using plugins with the CLI
12 | 
13 | Example:
14 | 
15 | ``` 
16 | instamancer hashtag puppies -c1000 --plugin LargeFirst --plugin MyPlugin
17 | ```
18 | 
19 | ## Using external plugins with the CLI
20 | 
21 | To install external plugins, you need to clone and install instamancer from source
22 | 
23 | Steps:
24 | 
25 | 1. Clone the instamancer repository
26 | 2. Install instamancer's dependencies
27 | 3. Install the plugin with npm / yarn
28 | 4. Add the plugin to `plugins/plugins/index.ts` 
29 | 
30 |     Example:
31 |     
32 | 
33 | ``` typescript
34 |    export { MyPlugin } from "myplugin";
35 |    ```
36 | 
37 | 5. Install instamancer 
38 |     1. You can skip this step if you want to run the CLI from source
39 | 6. Run the CLI with the plugin:
40 | 
41 |     
42 |     Example:
43 |     
44 | 
45 | ``` 
46 |    instamancer hashtag puppies -c100 --plugin MyPlugin
47 |    ```
48 | 
49 | ## Using plugins with the module
50 | 
51 | Add the plugin to the `options` :
52 | 
53 | ``` typescript
54 | import * as instamancer from ".";
55 | 
56 | const options: instamancer.IOptions = {
57 |     plugins: [new instamancer.plugins.LargeFirst()],
58 |     silent: true,
59 |     total: 100,
60 | };
61 | const hashtag = instamancer.createApi("hashtag", "puppies", options);
62 | 
63 | (async () => {
64 |     for await (const post of hashtag.generator()) {
65 |         console.log(post);
66 |     }
67 | })();
68 | 
69 | ```
70 | 


--------------------------------------------------------------------------------
/plugins/index.ts:
--------------------------------------------------------------------------------
1 | import * as allPlugins from "./plugins";
2 | 
3 | export const plugins = allPlugins;
4 | export * from "./plugin";
5 | 


--------------------------------------------------------------------------------
/plugins/plugin.ts:
--------------------------------------------------------------------------------
 1 | import * as puppeteer from "puppeteer";
 2 | import {Instagram, TFullApiPost, TPost, TSearchResult, TSinglePost} from "..";
 3 | 
 4 | export type DType = TPost | TSinglePost | TFullApiPost | TSearchResult;
 5 | 
 6 | export interface IPluginContext<Plugin, PostType> {
 7 |     plugin: Plugin;
 8 |     state: Instagram<PostType>;
 9 | }
10 | 
11 | export interface IPlugin<PostType> {
12 |     constructionEvent?(this: IPluginContext<IPlugin<PostType>, PostType>): void;
13 | 
14 |     requestEvent?(
15 |         this: IPluginContext<IPlugin<PostType>, PostType>,
16 |         req: puppeteer.Request,
17 |         overrides: puppeteer.Overrides,
18 |     ): Promise<void>;
19 | 
20 |     responseEvent?(
21 |         this: IPluginContext<IPlugin<PostType>, PostType>,
22 |         res: puppeteer.Response,
23 |         data: {[key: string]: any},
24 |     ): Promise<void>;
25 | 
26 |     postPageEvent?(
27 |         this: IPluginContext<IPlugin<PostType>, PostType>,
28 |         data: PostType,
29 |     ): Promise<void>;
30 | 
31 |     graftingEvent?(
32 |         this: IPluginContext<IPlugin<PostType>, PostType>,
33 |     ): Promise<void>;
34 | }
35 | 
36 | export enum AsyncPluginEvents {
37 |     browser,
38 |     grafting,
39 |     postPage,
40 |     request,
41 |     response,
42 | }
43 | 
44 | export type AsyncPluginEventsType = keyof typeof AsyncPluginEvents;
45 | 
46 | export enum SyncPluginEvents {
47 |     construction,
48 | }
49 | 
50 | export type SyncPluginEventsType = keyof typeof SyncPluginEvents;
51 | 
52 | export type PluginEventsType = SyncPluginEventsType | AsyncPluginEventsType;
53 | 


--------------------------------------------------------------------------------
/plugins/plugins/index.ts:
--------------------------------------------------------------------------------
1 | export {LargeFirst} from "./largeFirst";
2 | 
3 | // Add your own plugins here
4 | 


--------------------------------------------------------------------------------
/plugins/plugins/largeFirst.ts:
--------------------------------------------------------------------------------
 1 | import {Overrides, Request} from "puppeteer";
 2 | import * as querystring from "querystring";
 3 | import {format as urlFormat, parse as urlParse} from "url";
 4 | import {IPlugin, IPluginContext} from "../plugin";
 5 | 
 6 | export class LargeFirst<PostType> implements IPlugin<PostType> {
 7 |     public constructionEvent(
 8 |         this: IPluginContext<IPlugin<PostType>, PostType>,
 9 |     ): void {
10 |         this.state.jumpSize = 150;
11 |     }
12 | 
13 |     public async requestEvent(req: Request, overrides: Overrides) {
14 |         const url = overrides["url"] ? overrides["url"] : req.url();
15 |         const parsedUrl = urlParse(url);
16 |         const query = querystring.parse(parsedUrl.query);
17 |         const variables = JSON.parse(query["variables"] as string);
18 | 
19 |         variables.first = 50;
20 | 
21 |         query.variables = JSON.stringify(variables);
22 |         parsedUrl.search = "?" + querystring.stringify(query);
23 |         overrides["url"] = urlFormat(parsedUrl);
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/api/api.ts:
--------------------------------------------------------------------------------
  1 | import {Type} from "io-ts";
  2 | import {Browser} from "puppeteer";
  3 | import * as winston from "winston";
  4 | import {DType, IPlugin} from "../../plugins";
  5 | import {Instagram} from "./instagram";
  6 | import {
  7 |     ISearchOptions,
  8 |     ISearchOptionsPlugins,
  9 |     Search,
 10 |     TSearchResult,
 11 | } from "./search";
 12 | import {
 13 |     FullApiPost,
 14 |     Post as PostValidator,
 15 |     SinglePost,
 16 |     TFullApiPost,
 17 |     TPost,
 18 |     TSinglePost,
 19 | } from "./types";
 20 | 
 21 | /**
 22 |  * Optional arguments for the API
 23 |  */
 24 | export interface IOptionsCommon {
 25 |     // Total posts to download. 0 for unlimited
 26 |     total?: number;
 27 | 
 28 |     // Run Chrome in headless mode
 29 |     headless?: boolean;
 30 | 
 31 |     // Logging events
 32 |     logger?: winston.Logger;
 33 | 
 34 |     // Run without output to stdout
 35 |     silent?: boolean;
 36 | 
 37 |     // Time to sleep between interactions with the page
 38 |     sleepTime?: number;
 39 | 
 40 |     // Throw an error if type validation has been failed
 41 |     strict?: boolean;
 42 | 
 43 |     // Time to sleep when rate-limited
 44 |     hibernationTime?: number;
 45 | 
 46 |     // Enable the grafting process
 47 |     enableGrafting?: boolean;
 48 | 
 49 |     // Use the same browser instance when grafting
 50 |     sameBrowser?: boolean;
 51 | 
 52 |     // Extract the full amount of information from the API
 53 |     fullAPI?: boolean;
 54 | 
 55 |     // Use a proxy in Chrome to connect to Instagram
 56 |     proxyURL?: string;
 57 | 
 58 |     // Location of the chromium / chrome binary executable
 59 |     executablePath?: string;
 60 | 
 61 |     // Custom io-ts validator
 62 |     validator?: Type<unknown>;
 63 | 
 64 |     // Pass puppeter Browser instance from outside.
 65 |     // Be careful to close Browser by yourself, when there is no need in it anymore.
 66 |     browserInstance?: Browser;
 67 | }
 68 | 
 69 | export interface IOptionsFullApi extends IOptionsCommon {
 70 |     fullAPI: true;
 71 | }
 72 | 
 73 | export interface IOptionsRegular extends IOptionsCommon {
 74 |     fullAPI?: false;
 75 | }
 76 | 
 77 | export interface IOptionsFullApiPlugins<PostType> extends IOptionsFullApi {
 78 |     plugins?: IPlugin<PostType>[];
 79 | }
 80 | 
 81 | export interface IOptionsRegularPlugins<PostType> extends IOptionsRegular {
 82 |     plugins?: IPlugin<PostType>[];
 83 | }
 84 | 
 85 | export type IOptions =
 86 |     | IOptionsFullApi
 87 |     | IOptionsRegular
 88 |     | IOptionsFullApiPlugins<DType>
 89 |     | IOptionsRegularPlugins<DType>;
 90 | 
 91 | /**
 92 |  * An Instagram post API wrapper
 93 |  */
 94 | export class Post extends Instagram<TSinglePost> {
 95 |     // Post ids
 96 |     private readonly ids: string[];
 97 | 
 98 |     constructor(ids: string[], options: IOptions = {}) {
 99 |         // fullAPI option makes no sense for Post class
100 |         // But usage with fullAPI option brings an extra post, because of scrapeDefaultPosts
101 |         // So we force it to be disabled
102 |         options.fullAPI = false;
103 |         super(
104 |             "https://instagram.com/p/[id]",
105 |             ids[0],
106 |             "",
107 |             "",
108 |             options,
109 |             SinglePost,
110 |         );
111 |         this.ids = ids;
112 |     }
113 | 
114 |     /**
115 |      * Get the post metadata
116 |      */
117 |     protected async getNext() {
118 |         for (const id of this.ids) {
119 |             this.id = id;
120 |             await this.postPage(id, 5);
121 |             await this.sleep(2);
122 |         }
123 |         this.finished = true;
124 |     }
125 | }
126 | 
127 | const getPageValidator = (options: IOptions) =>
128 |     options.fullAPI ? FullApiPost : PostValidator;
129 | 
130 | export type InstagramPostClass = Hashtag<TPost> | User<TPost>;
131 | export type InstagramFullPostClass = Hashtag<TFullApiPost> | User<TFullApiPost>;
132 | 
133 | export function createApi(
134 |     type: "search",
135 |     query: string,
136 |     options?: ISearchOptions | ISearchOptionsPlugins<TSearchResult>,
137 | ): Search;
138 | export function createApi(type: "post", id: string[], options?: IOptions): Post;
139 | export function createApi(
140 |     type: "hashtag" | "user",
141 |     id: string,
142 |     options?: IOptionsRegular | IOptionsRegularPlugins<InstagramPostClass>,
143 | ): InstagramPostClass;
144 | export function createApi(
145 |     type: "hashtag" | "user",
146 |     id: string,
147 |     options?: IOptionsFullApi | IOptionsFullApiPlugins<InstagramFullPostClass>,
148 | ): InstagramFullPostClass;
149 | 
150 | export function createApi(
151 |     type: "hashtag" | "user" | "post" | "search",
152 |     id: string | string[],
153 |     options?: IOptions,
154 | ): Post | InstagramPostClass | InstagramFullPostClass | Search {
155 |     let ClassConstructor: typeof Hashtag | typeof User;
156 |     switch (type) {
157 |         case "search":
158 |             return new Search(id as string, options as ISearchOptions);
159 |         case "post":
160 |             return new Post(id as string[], options);
161 |         case "hashtag":
162 |             ClassConstructor = Hashtag;
163 |             break;
164 |         case "user":
165 |             ClassConstructor = User;
166 |             break;
167 |     }
168 |     if (options.fullAPI) {
169 |         return new ClassConstructor<TFullApiPost>(id as string, options);
170 |     }
171 |     return new ClassConstructor<TPost>(id as string, options);
172 | }
173 | 
174 | /**
175 |  * An Instagram hashtag API wrapper
176 |  */
177 | export class Hashtag<T> extends Instagram<T> {
178 |     constructor(id: string, options: IOptions = {}) {
179 |         super(
180 |             "https://instagram.com/explore/tags/[id]",
181 |             id,
182 |             "data.hashtag.edge_hashtag_to_media.page_info",
183 |             "data.hashtag.edge_hashtag_to_media.edges",
184 |             options,
185 |             getPageValidator(options),
186 |         );
187 |     }
188 | }
189 | 
190 | /**
191 |  * An Instagram user API wrapper
192 |  */
193 | export class User<T> extends Instagram<T> {
194 |     defaultPageFunctions = [
195 |         /* istanbul ignore next */
196 |         () => {
197 |             let morePostsIntervalCounter = 0;
198 |             const morePostsInterval = setInterval(() => {
199 |                 const searchDiv = Array.from(
200 |                     document.getElementsByTagName("div"),
201 |                 ).filter((d) =>
202 |                     d.innerHTML.startsWith("Show More Posts from"),
203 |                 )[0];
204 | 
205 |                 morePostsIntervalCounter++;
206 | 
207 |                 if (searchDiv !== undefined) {
208 |                     searchDiv.parentElement.parentElement.click();
209 |                     clearInterval(morePostsInterval);
210 |                 } else if (morePostsIntervalCounter > 10) {
211 |                     clearInterval(morePostsInterval);
212 |                 }
213 |             }, 1000);
214 |         },
215 |     ];
216 | 
217 |     constructor(id: string, options: IOptions = {}) {
218 |         super(
219 |             "https://instagram.com/[id]",
220 |             id,
221 |             "data.user.edge_owner_to_timeline_media.page_info",
222 |             "data.user.edge_owner_to_timeline_media.edges",
223 |             options,
224 |             getPageValidator(options),
225 |         );
226 |     }
227 | }
228 | 


--------------------------------------------------------------------------------
/src/api/instagram.ts:
--------------------------------------------------------------------------------
   1 | import AwaitLock from "await-lock";
   2 | import chalk from "chalk";
   3 | import {isLeft} from "fp-ts/lib/Either";
   4 | import {Type} from "io-ts";
   5 | import {PathReporter} from "io-ts/lib/PathReporter";
   6 | import {ThrowReporter} from "io-ts/lib/ThrowReporter";
   7 | import * as _ from "lodash/object";
   8 | import {
   9 |     Browser,
  10 |     Headers,
  11 |     launch,
  12 |     LaunchOptions,
  13 |     Page,
  14 |     Request,
  15 |     Response,
  16 | } from "puppeteer";
  17 | import * as winston from "winston";
  18 | import {
  19 |     AsyncPluginEventsType,
  20 |     IPlugin,
  21 |     IPluginContext,
  22 |     PluginEventsType,
  23 |     SyncPluginEvents,
  24 |     SyncPluginEventsType,
  25 | } from "../../plugins";
  26 | import {IOptions} from "./api";
  27 | import {PostIdSet} from "./postIdSet";
  28 | 
  29 | type AsyncPluginFunctions = {
  30 |     [key in AsyncPluginEventsType]: ((...args: any[]) => Promise<void>)[];
  31 | };
  32 | type SyncPluginFunctions = {
  33 |     [key in SyncPluginEventsType]: ((...args: any[]) => void)[];
  34 | };
  35 | type PluginFunctions = AsyncPluginFunctions & SyncPluginFunctions;
  36 | 
  37 | /**
  38 |  * Instagram API wrapper
  39 |  */
  40 | export class Instagram<PostType> {
  41 |     /**
  42 |      * Apply defaults to undefined options
  43 |      */
  44 |     private static defaultOptions(options: IOptions) {
  45 |         if (options.enableGrafting === undefined) {
  46 |             options.enableGrafting = true;
  47 |         }
  48 |         if (options.sameBrowser === undefined) {
  49 |             options.sameBrowser = false;
  50 |         }
  51 |         if (options.fullAPI === undefined) {
  52 |             options.fullAPI = false;
  53 |         }
  54 |         if (options.headless === undefined) {
  55 |             options.headless = true;
  56 |         }
  57 |         if (options.logger === undefined) {
  58 |             options.logger = winston.createLogger({
  59 |                 silent: true,
  60 |             });
  61 |         }
  62 |         if (options.silent === undefined) {
  63 |             options.silent = true;
  64 |         }
  65 |         if (options.sleepTime === undefined) {
  66 |             options.sleepTime = 2;
  67 |         }
  68 |         if (options.hibernationTime === undefined) {
  69 |             options.hibernationTime = 60 * 20;
  70 |         }
  71 |         if (options.total === undefined) {
  72 |             options.total = 0;
  73 |         }
  74 |         return options;
  75 |     }
  76 | 
  77 |     // Resource identifier
  78 |     public id: string;
  79 |     public url: string;
  80 | 
  81 |     // Iteration state
  82 |     public started: boolean = false;
  83 |     public paused: boolean = false;
  84 |     public finished: boolean = false;
  85 |     public finishedReason: FinishedReasons;
  86 | 
  87 |     // Instagram URLs
  88 |     public catchURL: string = "https://www.instagram.com/graphql/query";
  89 |     public postURL: string = "https://www.instagram.com/p/";
  90 |     public defaultPostURL: string = "https://www.instagram.com/p/";
  91 | 
  92 |     // Number of jumps before grafting
  93 |     public jumpMod: number = 100;
  94 | 
  95 |     // Depth of jumps
  96 |     public jumpSize: number = 2;
  97 | 
  98 |     // Puppeteer resources
  99 |     public page: Page;
 100 | 
 101 |     // Logging object
 102 |     public logger: winston.Logger;
 103 | 
 104 |     // Implementation-specific page functions
 105 |     public defaultPageFunctions: (() => void)[] = [];
 106 | 
 107 |     // Validations
 108 |     private readonly strict: boolean = false;
 109 |     private readonly validator: Type<unknown>;
 110 | 
 111 |     // Puppeteer state
 112 |     private browser: Browser;
 113 |     private browserDisconnected: boolean = true;
 114 |     private readonly browserInstance?: Browser;
 115 |     private readonly headless: boolean;
 116 | 
 117 |     // Array of scraped posts and lock
 118 |     private postBuffer: PostType[] = [];
 119 |     private postBufferLock: AwaitLock = new AwaitLock();
 120 | 
 121 |     // Request and Response buffers and locks
 122 |     private requestBuffer: Request[] = [];
 123 |     private requestBufferLock: AwaitLock = new AwaitLock();
 124 |     private responseBuffer: Response[] = [];
 125 |     private responseBufferLock: AwaitLock = new AwaitLock();
 126 | 
 127 |     // Get full amount of data from API
 128 |     private readonly fullAPI: boolean = false;
 129 |     private pagePromises: Promise<void>[] = [];
 130 | 
 131 |     // Grafting state
 132 |     private readonly enableGrafting: boolean = true;
 133 |     private readonly sameBrowser: boolean = false;
 134 |     private graft: boolean = false;
 135 |     private graftURL: string = null;
 136 |     private graftHeaders: Headers = null;
 137 |     private foundGraft: boolean = false;
 138 | 
 139 |     // Hibernation due to rate limiting
 140 |     private hibernate: boolean = false;
 141 |     private readonly hibernationTime: number = 60 * 20; // 20 minutes
 142 | 
 143 |     // Number of jumps before exiting because lack of data
 144 |     private failedJumps: number = 20;
 145 |     private responseFromAPI: boolean = false;
 146 | 
 147 |     // Strings denoting the access methods of API objects
 148 |     private readonly pageQuery: string;
 149 |     private readonly edgeQuery: string;
 150 | 
 151 |     // Cache of post ids
 152 |     private postIds: PostIdSet;
 153 | 
 154 |     // Iteration variables
 155 |     private readonly total: number;
 156 |     private index: number = 0;
 157 |     private jumps: number = 0;
 158 | 
 159 |     // Number of times to attempt to visit url initially
 160 |     private readonly maxPageUrlAttempts = 3;
 161 |     private pageUrlAttempts = 0;
 162 |     private postPageRetries = 5;
 163 | 
 164 |     // Output
 165 |     private readonly silent: boolean = false;
 166 |     private writeLock: AwaitLock = new AwaitLock();
 167 | 
 168 |     // Sleep time remaining
 169 |     private sleepRemaining: number = 0;
 170 | 
 171 |     // Length of time to sleep for
 172 |     private readonly sleepTime: number = 2;
 173 | 
 174 |     // Proxy for Instagram connection
 175 |     private readonly proxyURL: string;
 176 | 
 177 |     // Location of chromium / chrome binary executable
 178 |     private readonly executablePath: string;
 179 | 
 180 |     // Plugins to be run
 181 |     private pluginFunctions: PluginFunctions = {
 182 |         browser: [],
 183 |         construction: [],
 184 |         grafting: [],
 185 |         postPage: [],
 186 |         request: [],
 187 |         response: [],
 188 |     };
 189 | 
 190 |     /**
 191 |      * Create API wrapper instance
 192 |      * @param endpoint the url for the type of resource to scrape
 193 |      * @param id the identifier for the resource
 194 |      * @param pageQuery the query to identify future pages in the nested API structure
 195 |      * @param edgeQuery the query to identify posts in the nested API structure
 196 |      * @param options configuration details
 197 |      * @param validator response type validator
 198 |      */
 199 |     constructor(
 200 |         endpoint: string,
 201 |         id: string,
 202 |         pageQuery: string,
 203 |         edgeQuery: string,
 204 |         options: IOptions = {},
 205 |         validator: Type<unknown>,
 206 |     ) {
 207 |         this.id = id;
 208 |         this.postIds = new PostIdSet();
 209 |         this.url = endpoint.replace("[id]", id);
 210 | 
 211 |         options = Instagram.defaultOptions(options);
 212 |         this.total = options.total;
 213 |         this.pageQuery = pageQuery;
 214 |         this.edgeQuery = edgeQuery;
 215 |         this.browserInstance = options.browserInstance;
 216 |         this.headless = options.headless;
 217 |         this.logger = options.logger;
 218 |         this.silent = options.silent;
 219 |         this.strict = options.strict;
 220 |         this.enableGrafting = options.enableGrafting;
 221 |         this.sameBrowser = options.sameBrowser;
 222 |         this.sleepTime = options.sleepTime;
 223 |         this.hibernationTime = options.hibernationTime;
 224 |         this.fullAPI = options.fullAPI;
 225 |         this.proxyURL = options.proxyURL;
 226 |         this.executablePath = options.executablePath;
 227 |         this.validator = options.validator || validator;
 228 | 
 229 |         this.addPlugins(options["plugins"]);
 230 |         this.executePlugins("construction");
 231 |     }
 232 | 
 233 |     /**
 234 |      * Toggle pausing data collection
 235 |      */
 236 |     public pause() {
 237 |         this.paused = !this.paused;
 238 |     }
 239 | 
 240 |     /**
 241 |      * Toggle prolonged pausing
 242 |      */
 243 |     public toggleHibernation() {
 244 |         this.hibernate = true;
 245 |     }
 246 | 
 247 |     /**
 248 |      * Force the API to stop
 249 |      */
 250 |     public async forceStop(force?: boolean) {
 251 |         if (!force && !this.started) {
 252 |             return;
 253 |         }
 254 |         this.started = false;
 255 |         this.finish(FinishedReasons.FORCED_STOP);
 256 |         try {
 257 |             this.requestBufferLock.release();
 258 |             // tslint:disable-next-line: no-empty
 259 |         } catch (e) {}
 260 |         try {
 261 |             this.responseBufferLock.release();
 262 |             // tslint:disable-next-line: no-empty
 263 |         } catch (e) {}
 264 |         await this.stop();
 265 |     }
 266 | 
 267 |     /**
 268 |      * Generator of posts on page
 269 |      */
 270 |     public async *generator(): AsyncIterableIterator<PostType> {
 271 |         // Start if haven't done so already
 272 |         if (!this.started) {
 273 |             await this.start();
 274 |         }
 275 | 
 276 |         while (true) {
 277 |             // Get more posts
 278 |             await this.getNext();
 279 | 
 280 |             // Yield posts from buffer
 281 |             let post = await this.postPop();
 282 |             while (post) {
 283 |                 yield post;
 284 |                 post = await this.postPop();
 285 |             }
 286 | 
 287 |             // End loop when finished, check for pagePromises if fullAPI
 288 |             if (this.finished && this.pagePromises.length === 0) {
 289 |                 break;
 290 |             }
 291 |         }
 292 |         await this.stop();
 293 | 
 294 |         // Add newline to end of output
 295 |         if (!this.silent) {
 296 |             process.stdout.write("\n");
 297 |         }
 298 |     }
 299 | 
 300 |     /**
 301 |      * Construct page and add listeners
 302 |      */
 303 |     public async start() {
 304 |         let pageConstructed: boolean;
 305 |         this.pageUrlAttempts = 0;
 306 |         while (this.pageUrlAttempts++ < this.maxPageUrlAttempts) {
 307 |             pageConstructed = await this.constructPage();
 308 |             if (pageConstructed) {
 309 |                 break;
 310 |             }
 311 |         }
 312 |         if (!pageConstructed) {
 313 |             await this.forceStop(true);
 314 |             throw new Error("Failed to visit URL");
 315 |         }
 316 | 
 317 |         // Build page and visit url
 318 |         await this.executePlugins("browser");
 319 | 
 320 |         this.started = true;
 321 | 
 322 |         // Add event listeners for requests and responses
 323 |         await this.page.setRequestInterception(true);
 324 |         this.page.on("request", (req) => this.interceptRequest(req));
 325 |         this.page.on("response", (res) => this.interceptResponse(res));
 326 |         this.page.on("requestfailed", (res) => this.interceptFailure(res));
 327 |         this.page.on("console", (message) =>
 328 |             this.logger.info("Console log", {message}),
 329 |         );
 330 | 
 331 |         // Ignore dialog boxes
 332 |         this.page.on("dialog", (dialog) => dialog.dismiss());
 333 | 
 334 |         // Log errors
 335 |         /* istanbul ignore next */
 336 |         this.page.on("error", (error) =>
 337 |             this.logger.error("Console error", {error}),
 338 |         );
 339 | 
 340 |         // Gather initial posts from web page
 341 |         if (this.fullAPI) {
 342 |             await this.scrapeDefaultPosts();
 343 |         }
 344 |     }
 345 | 
 346 |     /**
 347 |      * Match the url to the url used in API requests
 348 |      */
 349 |     public matchURL(url: string) {
 350 |         return url.startsWith(this.catchURL) && !url.includes("include_reel");
 351 |     }
 352 | 
 353 |     /**
 354 |      * Close the page and browser
 355 |      */
 356 |     protected async stop() {
 357 |         await this.progress(Progress.CLOSING);
 358 | 
 359 |         // Remove listeners
 360 |         if (!this.page.isClosed()) {
 361 |             this.page.removeAllListeners("request");
 362 |             this.page.removeAllListeners("response");
 363 |             this.page.removeAllListeners("requestfailed");
 364 |         }
 365 | 
 366 |         // Clear request buffers
 367 |         await this.requestBufferLock.acquireAsync();
 368 |         this.requestBuffer = [];
 369 |         this.requestBufferLock.release();
 370 | 
 371 |         // Clear response buffers
 372 |         await this.responseBufferLock.acquireAsync();
 373 |         this.responseBuffer = [];
 374 |         this.responseBufferLock.release();
 375 | 
 376 |         // Wait for pagePromises to empty
 377 |         while (true) {
 378 |             if (this.pagePromises.length === 0) {
 379 |                 break;
 380 |             } else {
 381 |                 /* istanbul ignore next */
 382 |                 await this.sleep(1);
 383 |             }
 384 |         }
 385 | 
 386 |         // Close page
 387 |         if (!this.page.isClosed()) {
 388 |             await this.page.close();
 389 |         }
 390 | 
 391 |         if (!this.browserDisconnected && !this.browserInstance) {
 392 |             await this.browser.close();
 393 |         }
 394 |     }
 395 | 
 396 |     /**
 397 |      * Finish retrieving data for the generator
 398 |      */
 399 |     protected finish(reason: FinishedReasons) {
 400 |         this.finished = true;
 401 |         this.finishedReason = reason;
 402 |         this.logger.info("Finished collecting", {reason});
 403 |     }
 404 | 
 405 |     /**
 406 |      * Process the requests in the request buffer
 407 |      */
 408 |     protected async processRequests() {
 409 |         await this.requestBufferLock.acquireAsync();
 410 | 
 411 |         let newApiRequest = false;
 412 |         for (const req of this.requestBuffer) {
 413 |             // Match url
 414 |             if (!this.matchURL(req.url())) {
 415 |                 continue;
 416 |             } else {
 417 |                 newApiRequest = true;
 418 |             }
 419 | 
 420 |             // Begin grafting if required, else continue the request
 421 |             if (this.graft) {
 422 |                 if (this.foundGraft === false) {
 423 |                     // Gather details
 424 |                     this.graftURL = req.url();
 425 |                     this.graftHeaders = req.headers();
 426 |                     this.foundGraft = true;
 427 | 
 428 |                     // Cancel request
 429 |                     await req.abort();
 430 |                 } else {
 431 |                     // Swap request
 432 |                     const overrides = {
 433 |                         headers: this.graftHeaders,
 434 |                         url: this.graftURL,
 435 |                     };
 436 |                     await this.executePlugins("request", req, overrides);
 437 |                     await req.continue(overrides);
 438 | 
 439 |                     // Reset grafting data
 440 |                     this.graft = false;
 441 |                     this.foundGraft = false;
 442 |                     this.graftURL = null;
 443 |                     this.graftHeaders = null;
 444 |                 }
 445 | 
 446 |                 // Stop reading requests
 447 |                 break;
 448 |             } else {
 449 |                 const overrides = {};
 450 |                 this.executePlugins("request", req, overrides);
 451 |                 await req.continue(overrides);
 452 |             }
 453 |         }
 454 | 
 455 |         // Clear buffer and release
 456 |         this.requestBuffer = [];
 457 |         this.requestBufferLock.release();
 458 | 
 459 |         if (this.foundGraft && newApiRequest) {
 460 |             // Restart browser and page, clearing all buffers
 461 |             await this.stop();
 462 |             await this.start();
 463 |         }
 464 |     }
 465 | 
 466 |     /**
 467 |      * Process the responses in the response buffer
 468 |      */
 469 |     protected async processResponses() {
 470 |         await this.responseBufferLock.acquireAsync();
 471 | 
 472 |         for (const res of this.responseBuffer) {
 473 |             // Match url
 474 |             if (!this.matchURL(res.url())) {
 475 |                 continue;
 476 |             }
 477 | 
 478 |             // Acknowledge receipt of response
 479 |             this.responseFromAPI = true;
 480 | 
 481 |             // Get JSON data
 482 |             let data: unknown;
 483 |             try {
 484 |                 data = await res.json();
 485 |                 if (typeof data !== "object") {
 486 |                     this.logger.error("Response data is not an object", {data});
 487 |                     continue;
 488 |                 }
 489 |             } catch (error) {
 490 |                 this.logger.error("Error processing response JSON", {
 491 |                     data,
 492 |                     error,
 493 |                 });
 494 |                 continue;
 495 |             }
 496 | 
 497 |             // Emit event
 498 |             this.executePlugins("response", res, data);
 499 | 
 500 |             // Check for rate limiting
 501 |             if (data && "status" in data && data["status"] === "fail") {
 502 |                 this.logger.info("Rate limited");
 503 |                 this.hibernate = true;
 504 |                 continue;
 505 |             }
 506 | 
 507 |             // Check for next page
 508 |             if (
 509 |                 !(
 510 |                     _.get(data, this.pageQuery + ".has_next_page", false) &&
 511 |                     _.get(data, this.pageQuery + ".end_cursor", false)
 512 |                 )
 513 |             ) {
 514 |                 this.logger.info("No posts remaining", {data});
 515 |                 this.finish(FinishedReasons.API_FINISHED);
 516 |             }
 517 | 
 518 |             await this.processResponseData(data);
 519 |         }
 520 | 
 521 |         // Clear buffer and release
 522 |         this.responseBuffer = [];
 523 |         this.responseBufferLock.release();
 524 |     }
 525 | 
 526 |     protected async processResponseData(data: unknown) {
 527 |         // Get posts
 528 |         const posts = _.get(data, this.edgeQuery, []);
 529 |         for (const post of posts) {
 530 |             const postId = post["node"]["id"];
 531 | 
 532 |             // Check it hasn't already been cached
 533 |             const contains = this.postIds.add(postId);
 534 |             if (contains) {
 535 |                 this.logger.info("Duplicate id found", {postId});
 536 |                 continue;
 537 |             }
 538 | 
 539 |             // Add to postBuffer
 540 |             if (this.index < this.total || this.total === 0) {
 541 |                 this.index++;
 542 |                 if (this.fullAPI) {
 543 |                     this.pagePromises.push(
 544 |                         this.postPage(
 545 |                             post["node"]["shortcode"],
 546 |                             this.postPageRetries,
 547 |                         ),
 548 |                     );
 549 |                 } else {
 550 |                     await this.addToPostBuffer(post);
 551 |                 }
 552 |             } else {
 553 |                 this.finish(FinishedReasons.TOTAL_REACHED_API);
 554 |                 break;
 555 |             }
 556 |         }
 557 |     }
 558 | 
 559 |     /**
 560 |      * Open a post in a new page, then extract its metadata
 561 |      */
 562 |     protected async postPage(post: string, retries: number) {
 563 |         // Create page
 564 |         const postPage = await this.browser.newPage();
 565 |         await postPage.setRequestInterception(true);
 566 |         postPage.on("request", async (req) => {
 567 |             if (!req.url().includes("/p/" + post)) {
 568 |                 await req.abort();
 569 |             } else {
 570 |                 await req.continue();
 571 |             }
 572 |         });
 573 |         postPage.on("requestfailed", async (req) => this.interceptFailure(req));
 574 | 
 575 |         // Visit post and read state
 576 |         let parsed;
 577 |         try {
 578 |             await postPage.goto(this.postURL + post + "/");
 579 |         } catch (error) {
 580 |             await this.handlePostPageError(
 581 |                 postPage,
 582 |                 error,
 583 |                 "Couldn't navigate to page",
 584 |                 post,
 585 |                 retries,
 586 |             );
 587 |             return;
 588 |         }
 589 | 
 590 |         // Load data from memory
 591 |         let data;
 592 |         try {
 593 |             /* istanbul ignore next */
 594 |             data = await postPage.evaluate(async () => {
 595 |                 // Wait for _sharedData value to be set
 596 |                 await new Promise((resolve) => {
 597 |                     let i = 0;
 598 |                     const findSharedData = setInterval(() => {
 599 |                         if (window["_sharedData"] !== undefined || i++ > 5) {
 600 |                             resolve();
 601 |                             clearInterval(findSharedData);
 602 |                         }
 603 |                     }, 2000);
 604 |                 });
 605 | 
 606 |                 return JSON.stringify(
 607 |                     window["_sharedData"].entry_data.PostPage[0].graphql,
 608 |                 );
 609 |             });
 610 |         } catch (error) /* istanbul ignore next */ {
 611 |             await this.handlePostPageError(
 612 |                 postPage,
 613 |                 error,
 614 |                 "Couldn't evaluate on page",
 615 |                 post,
 616 |                 retries,
 617 |             );
 618 |             return;
 619 |         }
 620 | 
 621 |         // Close page
 622 |         await postPage.close();
 623 | 
 624 |         // Parse data to PostType
 625 |         try {
 626 |             parsed = JSON.parse(data) as PostType;
 627 |         } catch (error) /* istanbul ignore next */ {
 628 |             await this.handlePostPageError(
 629 |                 postPage,
 630 |                 error,
 631 |                 "Couldn't parse page data",
 632 |                 post,
 633 |                 retries,
 634 |             );
 635 |             return;
 636 |         }
 637 | 
 638 |         await this.executePlugins("postPage", parsed);
 639 |         await this.addToPostBuffer(parsed);
 640 |     }
 641 | 
 642 |     private async handlePostPageError(
 643 |         page: Page,
 644 |         error: Error,
 645 |         message: string,
 646 |         post: string,
 647 |         retries: number,
 648 |     ) {
 649 |         // Log error and wait
 650 |         this.logger.error(message, {error});
 651 |         await this.progress(Progress.ABORTED);
 652 |         await this.sleep(2);
 653 | 
 654 |         // Close existing attempt
 655 |         if (!page.isClosed()) {
 656 |             await page.close();
 657 |         }
 658 | 
 659 |         // Retry
 660 |         if (retries > 0) {
 661 |             await this.postPage(post, --retries);
 662 |         }
 663 |     }
 664 | 
 665 |     protected async validatePost(post: PostType) {
 666 |         const validationResult = this.validator.decode(post);
 667 |         if (this.strict) {
 668 |             ThrowReporter.report(validationResult);
 669 |             return;
 670 |         }
 671 |         if (isLeft(validationResult)) {
 672 |             const validationReporter = PathReporter.report(validationResult);
 673 |             this.logger.warn(
 674 |                 `
 675 |       Warning! The Instagram API has been changed since this version of instamancer was released.
 676 |       More info: https://scriptsmith.github.io/instamancer/api-change
 677 |       `,
 678 |                 {validationReporter, post},
 679 |             );
 680 |         }
 681 |     }
 682 | 
 683 |     /**
 684 |      * Stimulate the page until responses gathered
 685 |      */
 686 |     protected async getNext() {
 687 |         await this.progress(Progress.SCRAPING);
 688 |         while (true) {
 689 |             // Process results (if any)
 690 |             await this.processRequests();
 691 |             await this.processResponses();
 692 | 
 693 |             // Finish page promises
 694 |             if (this.pagePromises.length > 0) {
 695 |                 await this.progress(Progress.BRANCHING);
 696 |                 await Promise.all(this.pagePromises);
 697 |                 this.pagePromises = [];
 698 |             }
 699 | 
 700 |             // Check if finished
 701 |             if (this.finished) {
 702 |                 break;
 703 |             }
 704 | 
 705 |             // Pause if paused
 706 |             await this.waitResume();
 707 | 
 708 |             // Interact with page to stimulate request
 709 |             await this.jump();
 710 | 
 711 |             // Stop if no data is being gathered
 712 |             if (this.jumps === this.failedJumps) {
 713 |                 if (this.fullAPI) {
 714 |                     if (!this.responseFromAPI) {
 715 |                         this.finish(FinishedReasons.NO_RESPONSE);
 716 |                     }
 717 |                 } else if (this.index === 0) {
 718 |                     this.finish(FinishedReasons.NO_INCREMENT);
 719 | 
 720 |                     const pageContent = {content: ""};
 721 |                     try {
 722 |                         pageContent.content = await this.page.content();
 723 |                     } catch (e) {
 724 |                         // No content
 725 |                     }
 726 | 
 727 |                     this.logger.error(
 728 |                         "Page failed to make requests",
 729 |                         pageContent,
 730 |                     );
 731 |                     break;
 732 |                 }
 733 |             }
 734 | 
 735 |             // Enable grafting if required
 736 |             if (this.jumps % this.jumpMod === 0) {
 737 |                 await this.initiateGraft();
 738 |             }
 739 | 
 740 |             // Sleep
 741 |             await this.sleep(this.sleepTime);
 742 | 
 743 |             // Hibernate if rate-limited
 744 |             if (this.hibernate) {
 745 |                 await this.sleep(this.hibernationTime);
 746 |                 this.hibernate = false;
 747 |             }
 748 | 
 749 |             // Break if posts in buffer
 750 |             await this.postBufferLock.acquireAsync();
 751 |             const posts = this.postBuffer.length;
 752 |             this.postBufferLock.release();
 753 |             if (posts > 0) {
 754 |                 break;
 755 |             }
 756 |         }
 757 |     }
 758 | 
 759 |     /**
 760 |      * Halt execution
 761 |      * @param time Seconds
 762 |      */
 763 |     protected async sleep(time: number) {
 764 |         for (let i = time; i > 0; i--) {
 765 |             this.sleepRemaining = i;
 766 |             await this.progress(Progress.SCRAPING);
 767 | 
 768 |             await new Promise((resolve) => {
 769 |                 setTimeout(resolve, i >= 1 ? 1000 : i * 1000);
 770 |             });
 771 |         }
 772 |         this.sleepRemaining = 0;
 773 |         await this.progress(Progress.SCRAPING);
 774 |     }
 775 | 
 776 |     /**
 777 |      * Create the browser and page, then visit the url
 778 |      */
 779 |     private async constructPage(): Promise<boolean> {
 780 |         // Browser args
 781 |         const args = [];
 782 |         /* istanbul ignore if */
 783 |         if (process.env.NO_SANDBOX) {
 784 |             args.push("--no-sandbox");
 785 |             args.push("--disable-setuid-sandbox");
 786 |         }
 787 |         if (this.proxyURL !== undefined) {
 788 |             args.push("--proxy-server=" + this.proxyURL);
 789 |         }
 790 | 
 791 |         // Browser launch options
 792 |         const options: LaunchOptions = {
 793 |             args,
 794 |             headless: this.headless,
 795 |         };
 796 |         if (this.executablePath !== undefined) {
 797 |             options.executablePath = this.executablePath;
 798 |         }
 799 | 
 800 |         // Launch browser
 801 |         if (this.browserInstance) {
 802 |             await this.progress(Progress.LAUNCHING);
 803 |             this.browser = this.browserInstance;
 804 |             this.browserDisconnected = !this.browser.isConnected();
 805 |             this.browser.on(
 806 |                 "disconnected",
 807 |                 () => (this.browserDisconnected = true),
 808 |             );
 809 |         } else if (!this.sameBrowser || (this.sameBrowser && !this.started)) {
 810 |             await this.progress(Progress.LAUNCHING);
 811 |             this.browser = await launch(options);
 812 |             this.browserDisconnected = false;
 813 |             this.browser.on(
 814 |                 "disconnected",
 815 |                 () => (this.browserDisconnected = true),
 816 |             );
 817 |         }
 818 | 
 819 |         // New page
 820 |         this.page = await this.browser.newPage();
 821 |         await this.progress(Progress.OPENING);
 822 | 
 823 |         // Attempt to visit URL
 824 |         try {
 825 |             await this.page.goto(this.url);
 826 | 
 827 |             // Check page loads
 828 |             /* istanbul ignore next */
 829 |             const pageLoaded = await this.page.evaluate(() => {
 830 |                 const headings = document.querySelectorAll("h2");
 831 |                 for (const heading of Array.from(headings)) {
 832 |                     if (
 833 |                         heading.innerHTML ===
 834 |                         "Sorry, this page isn't available."
 835 |                     ) {
 836 |                         return false;
 837 |                     }
 838 |                 }
 839 |                 return true;
 840 |             });
 841 |             if (!pageLoaded) {
 842 |                 await this.handleConstructionError(
 843 |                     "Page loaded with no content",
 844 |                     10,
 845 |                 );
 846 |                 return false;
 847 |             }
 848 | 
 849 |             // Run defaultPagePlugins
 850 |             for (const f of this.defaultPageFunctions) {
 851 |                 await this.page.evaluate(f);
 852 |             }
 853 | 
 854 |             // Fix issue with disabled scrolling
 855 |             /* istanbul ignore next */
 856 |             await this.page.evaluate(() => {
 857 |                 setInterval(() => {
 858 |                     try {
 859 |                         document.body.style.overflow = "";
 860 |                     } catch (error) {
 861 |                         this.logger.error("Failed to update style", {error});
 862 |                     }
 863 |                 }, 10000);
 864 |             });
 865 |         } catch (e) {
 866 |             await this.handleConstructionError(e, 60);
 867 |             return false;
 868 |         }
 869 |         return true;
 870 |     }
 871 | 
 872 |     /***
 873 |      * Handle errors that occur during page construction
 874 |      */
 875 |     private async handleConstructionError(error: string, timeout: number) {
 876 |         // Log error and wait
 877 |         this.logger.error("Construction error", {error, url: this.url});
 878 |         await this.progress(Progress.ABORTED);
 879 |         await this.sleep(timeout);
 880 | 
 881 |         // Close existing attempt
 882 |         if (!this.page.isClosed()) {
 883 |             await this.page.close();
 884 |         }
 885 |         await this.browser.close();
 886 |     }
 887 | 
 888 |     /**
 889 |      * Pause and wait until resumed
 890 |      */
 891 |     private async waitResume() {
 892 |         // Pause for 200 milliseconds
 893 |         function f() {
 894 |             return new Promise((resolve) => {
 895 |                 setTimeout(resolve, 200);
 896 |             });
 897 |         }
 898 | 
 899 |         // Pause until pause toggled
 900 |         while (this.paused === true) {
 901 |             await this.progress(Progress.PAUSED);
 902 |             await f();
 903 |         }
 904 |     }
 905 | 
 906 |     /**
 907 |      * Pop a post off the postBuffer (using locks). Returns null if no posts in buffer
 908 |      */
 909 |     private async postPop() {
 910 |         let post = null;
 911 |         await this.postBufferLock.acquireAsync();
 912 |         if (this.postBuffer.length > 0) {
 913 |             post = this.postBuffer.shift();
 914 |         }
 915 |         this.postBufferLock.release();
 916 |         return post;
 917 |     }
 918 | 
 919 |     /**
 920 |      * Print progress to stderr
 921 |      */
 922 |     private async progress(state: Progress) {
 923 |         // End if silent
 924 |         if (this.silent) {
 925 |             return;
 926 |         }
 927 | 
 928 |         // Lock
 929 |         await this.writeLock.acquireAsync();
 930 | 
 931 |         // Calculate total
 932 |         const total = this.total === 0 ? "Unlimited" : this.total;
 933 | 
 934 |         // Generate output string
 935 |         const idStr = chalk.bgYellow.black(` ${this.id} `);
 936 |         const totalStr = chalk.bgBlack(` Total: ${total} `);
 937 |         const stateStr = chalk.bgWhite.black(` State: ${state} `);
 938 |         const sleepStr = chalk.bgWhite.black(
 939 |             ` Sleeping: ${this.sleepRemaining} `,
 940 |         );
 941 |         const indexStr = chalk.bgWhite.black(` Scraped: ${this.index} `);
 942 | 
 943 |         this.logger.debug({
 944 |             id: this.id,
 945 |             index: this.index,
 946 |             sleepRemaining: this.sleepRemaining,
 947 |             state,
 948 |             total,
 949 |         });
 950 | 
 951 |         // Print output
 952 |         process.stderr.write(
 953 |             `\r${idStr}${totalStr}${stateStr}${sleepStr}${indexStr}\u001B[K`,
 954 |         );
 955 | 
 956 |         // Release
 957 |         this.writeLock.release();
 958 |     }
 959 | 
 960 |     /**
 961 |      * Add request to the request buffer
 962 |      */
 963 |     private async interceptRequest(req: Request) {
 964 |         await this.requestBufferLock.acquireAsync();
 965 |         this.requestBuffer.push(req);
 966 |         await this.requestBufferLock.release();
 967 |     }
 968 | 
 969 |     /**
 970 |      * Add the response to the response buffer
 971 |      */
 972 |     private async interceptResponse(res: Response) {
 973 |         await this.responseBufferLock.acquireAsync();
 974 |         this.responseBuffer.push(res);
 975 |         await this.responseBufferLock.release();
 976 |     }
 977 | 
 978 |     /**
 979 |      * Log failed requests
 980 |      */
 981 |     private async interceptFailure(req: Request) {
 982 |         this.logger.info("Failed request", {url: req.url()});
 983 |         await this.progress(Progress.ABORTED);
 984 |     }
 985 | 
 986 |     /**
 987 |      * Add post to buffer
 988 |      */
 989 |     private async addToPostBuffer(post: PostType) {
 990 |         await this.postBufferLock.acquireAsync();
 991 |         await this.validatePost(post);
 992 |         this.postBuffer.push(post);
 993 |         this.postBufferLock.release();
 994 |     }
 995 | 
 996 |     /**
 997 |      * Manipulate the page to stimulate a request
 998 |      */
 999 |     private async jump() {
1000 |         await this.page.keyboard.press("PageUp");
1001 |         const jumpSize = this.graft ? 1 : this.jumpSize;
1002 |         for (let i = 0; i < jumpSize; i++) {
1003 |             await this.page.keyboard.press("End");
1004 |         }
1005 | 
1006 |         // Move mouse randomly
1007 |         const width = this.page.viewport()["width"];
1008 |         const height = this.page.viewport()["height"];
1009 |         await this.page.mouse.move(
1010 |             Math.round(width * Math.random()),
1011 |             Math.round(height * Math.random()),
1012 |         );
1013 | 
1014 |         ++this.jumps;
1015 |     }
1016 | 
1017 |     /**
1018 |      * Clear request and response buffers
1019 |      */
1020 |     private async initiateGraft() {
1021 |         // Check if enabled
1022 |         if (!this.enableGrafting) {
1023 |             return;
1024 |         }
1025 | 
1026 |         await this.progress(Progress.GRAFTING);
1027 | 
1028 |         this.executePlugins("grafting");
1029 | 
1030 |         // Enable grafting
1031 |         this.graft = true;
1032 |     }
1033 | 
1034 |     /**
1035 |      * Read the posts that are pre-loaded on the page
1036 |      */
1037 |     private async scrapeDefaultPosts() {
1038 |         // Get shortcodes from page
1039 |         /* istanbul ignore next */
1040 |         const shortCodes = await this.page.evaluate((url) => {
1041 |             return Array.from(document.links)
1042 |                 .filter((link) => {
1043 |                     return (
1044 |                         link.href.startsWith(url) &&
1045 |                         link.href.split("/").length >= 2
1046 |                     );
1047 |                 })
1048 |                 .map((link) => {
1049 |                     const linkSplit = link.href.split("/");
1050 |                     return linkSplit[linkSplit.length - 2];
1051 |                 });
1052 |         }, this.defaultPostURL);
1053 | 
1054 |         // Add postPage promises
1055 |         for (const shortCode of shortCodes) {
1056 |             if (this.index < this.total || this.total === 0) {
1057 |                 this.index++;
1058 |                 this.pagePromises.push(
1059 |                     this.postPage(shortCode, this.postPageRetries),
1060 |                 );
1061 |             } else {
1062 |                 this.finish(FinishedReasons.TOTAL_REACHED_PAGE);
1063 |                 break;
1064 |             }
1065 |         }
1066 |     }
1067 | 
1068 |     private addPlugins(plugins: IPlugin<PostType>[]) {
1069 |         if (!plugins) {
1070 |             return;
1071 |         }
1072 | 
1073 |         for (const plugin of plugins) {
1074 |             for (const event of Object.keys(this.pluginFunctions)) {
1075 |                 const pluginEvent = plugin[event + "Event"];
1076 |                 if (pluginEvent) {
1077 |                     const context: IPluginContext<typeof plugin, PostType> = {
1078 |                         plugin,
1079 |                         state: this,
1080 |                     };
1081 | 
1082 |                     this.pluginFunctions[event].push(pluginEvent.bind(context));
1083 |                 }
1084 |             }
1085 |         }
1086 |     }
1087 | 
1088 |     private executePlugins(event: SyncPluginEventsType, ...args): void;
1089 |     private executePlugins(
1090 |         event: AsyncPluginEventsType,
1091 |         ...args
1092 |     ): Promise<unknown>;
1093 |     private executePlugins(event: PluginEventsType, ...args) {
1094 |         if (event in SyncPluginEvents) {
1095 |             for (const pluginFunction of this.pluginFunctions["construction"]) {
1096 |                 pluginFunction();
1097 |             }
1098 |             return;
1099 |         }
1100 | 
1101 |         return Promise.all(
1102 |             // @ts-ignore
1103 |             this.pluginFunctions[event].map((cb) => cb(...args)),
1104 |         );
1105 |     }
1106 | }
1107 | 
1108 | /**
1109 |  * The states of progress that the API can be in. Used to output status.
1110 |  */
1111 | enum Progress {
1112 |     LAUNCHING = "Launching",
1113 |     OPENING = "Navigating",
1114 |     SCRAPING = "Scraping",
1115 |     BRANCHING = "Branching",
1116 |     GRAFTING = "Grafting",
1117 |     CLOSING = "Closing",
1118 | 
1119 |     PAUSED = "Paused",
1120 |     ABORTED = "Request aborted",
1121 | }
1122 | 
1123 | /**
1124 |  * Reasons why the collection finished
1125 |  */
1126 | enum FinishedReasons {
1127 |     // forceStop used
1128 |     FORCED_STOP,
1129 | 
1130 |     // API response doesn't contain next page
1131 |     API_FINISHED,
1132 | 
1133 |     // Total posts required have been collected from the API
1134 |     TOTAL_REACHED_API,
1135 | 
1136 |     // Total posts required have been collected from the default posts
1137 |     TOTAL_REACHED_PAGE,
1138 | 
1139 |     // No API response intercepted after interacting with page
1140 |     NO_RESPONSE,
1141 | 
1142 |     // Index hasn't increased after interacting with page
1143 |     NO_INCREMENT,
1144 | }
1145 | 


--------------------------------------------------------------------------------
/src/api/postIdSet.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * A set of post ids used to detect duplicates
 3 |  */
 4 | export class PostIdSet {
 5 |     private ids: Set<string> = new Set<string>();
 6 | 
 7 |     /**
 8 |      * Add a post id to the set.
 9 |      * @return true if the id was already in the set, false if not.
10 |      */
11 |     public add(id: string): boolean {
12 |         const contains = this.ids.has(id);
13 |         this.ids.add(id);
14 |         return contains;
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/api/search.ts:
--------------------------------------------------------------------------------
  1 | import * as t from "io-ts";
  2 | import {excess} from "io-ts-excess";
  3 | import {IPlugin} from "../../plugins";
  4 | import {IOptions} from "./api";
  5 | import {Instagram} from "./instagram";
  6 | 
  7 | export const Users = t.type({
  8 |     position: t.number,
  9 |     user: excess(
 10 |         t.type({
 11 |             full_name: t.string,
 12 |             account_badges: t.array(t.undefined),
 13 |             biography_product_mentions: t.array(t.undefined),
 14 |             has_anonymous_profile_picture: t.boolean,
 15 |             is_private: t.boolean,
 16 |             is_verified: t.boolean,
 17 |             latest_reel_media: t.number,
 18 |             mutual_followers_count: t.number,
 19 |             pk: t.string,
 20 |             profile_pic_id: t.union([t.string, t.undefined]),
 21 |             profile_pic_url: t.string,
 22 |             username: t.string,
 23 |         }),
 24 |     ),
 25 | });
 26 | 
 27 | export const Places = t.type({
 28 |     place: excess(
 29 |         t.type({
 30 |             header_media: t.any,
 31 |             location: excess(
 32 |                 t.type({
 33 |                     address: t.string,
 34 |                     city: t.string,
 35 |                     external_source: t.string,
 36 |                     facebook_places_id: t.number,
 37 |                     lat: t.union([t.undefined, t.number]),
 38 |                     lng: t.union([t.undefined, t.number]),
 39 |                     name: t.string,
 40 |                     pk: t.string,
 41 |                     short_name: t.string,
 42 |                 }),
 43 |             ),
 44 |             media_bundles: t.UnknownArray,
 45 |             slug: t.string,
 46 |             subtitle: t.string,
 47 |             title: t.string,
 48 |         }),
 49 |     ),
 50 |     position: t.number,
 51 | });
 52 | 
 53 | export const Hashtags = t.type({
 54 |     hashtag: excess(
 55 |         t.type({
 56 |             id: t.string,
 57 |             media_count: t.number,
 58 |             name: t.string,
 59 |             profile_pic_url: t.string,
 60 |             search_result_subtitle: t.string,
 61 |             use_default_avatar: t.boolean,
 62 |         }),
 63 |     ),
 64 |     position: t.number,
 65 | });
 66 | 
 67 | export const SearchResult = t.type({
 68 |     clear_client_cache: t.boolean,
 69 |     has_more: t.boolean,
 70 |     hashtags: t.array(Hashtags),
 71 |     places: t.array(Places),
 72 |     rank_token: t.string,
 73 |     status: t.string,
 74 |     users: t.array(Users),
 75 | });
 76 | 
 77 | export type TSearchResult = t.TypeOf<typeof SearchResult>;
 78 | 
 79 | export type ISearchOptions = Pick<
 80 |     IOptions,
 81 |     Exclude<
 82 |         keyof IOptions,
 83 |         "total" | "fullAPI" | "hibernationTime" | "sleepTime"
 84 |     >
 85 | >;
 86 | 
 87 | export interface ISearchOptionsPlugins<PostType> extends ISearchOptions {
 88 |     plugins?: IPlugin<PostType>[];
 89 | }
 90 | 
 91 | export class Search extends Instagram<TSearchResult> {
 92 |     public readonly catchURL = "https://www.instagram.com/web/";
 93 |     private searchResult: TSearchResult;
 94 |     private readonly searchQuery: string;
 95 |     private readonly inputElementQuery: string = "input[type='text']";
 96 | 
 97 |     constructor(query: string, options: ISearchOptions = {}) {
 98 |         super(
 99 |             "https://instagram.com/explore/tags/instagram",
100 |             "",
101 |             "",
102 |             "",
103 |             options,
104 |             SearchResult,
105 |         );
106 |         this.searchQuery = query;
107 |     }
108 | 
109 |     public async get() {
110 |         if (!this.started) {
111 |             await this.start();
112 |         }
113 |         try {
114 |             await this.page.waitForSelector(this.inputElementQuery, {
115 |                 timeout: 30000,
116 |             });
117 |         } catch {
118 |             // Timeout
119 |         }
120 |         await this.page.click(this.inputElementQuery);
121 | 
122 |         await this.page.keyboard.sendCharacter(this.searchQuery);
123 |         await this.page.waitForRequest((req) => this.matchURL(req.url()));
124 |         await this.processRequests();
125 |         await this.page.waitForResponse((res) => this.matchURL(res.url()));
126 |         await this.processResponses();
127 |         await this.stop();
128 |         return this.searchResult;
129 |     }
130 | 
131 |     public matchURL(url: string) {
132 |         return url.startsWith(this.catchURL);
133 |     }
134 | 
135 |     protected async processResponseData(data: TSearchResult) {
136 |         await this.validatePost(data);
137 |         this.searchResult = data;
138 |     }
139 | }
140 | 


--------------------------------------------------------------------------------
/src/api/types.ts:
--------------------------------------------------------------------------------
  1 | // tslint:disable: object-literal-sort-keys
  2 | import * as t from "io-ts";
  3 | import {excess} from "io-ts-excess";
  4 | 
  5 | export const Location = t.type({
  6 |     id: t.string,
  7 |     has_public_page: t.boolean,
  8 |     name: t.string,
  9 |     slug: t.string,
 10 |     address_json: t.union([t.string, t.undefined, t.null]),
 11 | });
 12 | 
 13 | export const PostNodeOwner = t.type({
 14 |     id: t.string,
 15 | });
 16 | 
 17 | export const CommentNodeOwner = t.type({
 18 |     id: t.string,
 19 |     is_verified: t.boolean,
 20 |     profile_pic_url: t.string,
 21 |     username: t.string,
 22 | });
 23 | 
 24 | export const ShortcodeMediaOwner = t.type({
 25 |     id: t.string,
 26 |     is_verified: t.boolean,
 27 |     profile_pic_url: t.string,
 28 |     username: t.string,
 29 |     blocked_by_viewer: t.boolean,
 30 |     followed_by_viewer: t.boolean,
 31 |     full_name: t.string,
 32 |     has_blocked_viewer: t.boolean,
 33 |     is_private: t.boolean,
 34 |     is_unpublished: t.boolean,
 35 |     requested_by_viewer: t.boolean,
 36 | });
 37 | 
 38 | export const PageInfo = t.type({
 39 |     has_next_page: t.boolean,
 40 |     end_cursor: t.union([t.string, t.null]),
 41 | });
 42 | 
 43 | export const Dimensions = t.type({
 44 |     height: t.number,
 45 |     width: t.number,
 46 | });
 47 | 
 48 | export const Counter = t.type({
 49 |     count: t.number,
 50 | });
 51 | 
 52 | export const GatingInfo = t.type({
 53 |     buttons: t.array(t.string),
 54 |     description: t.string,
 55 |     gating_type: t.string,
 56 |     title: t.string,
 57 | });
 58 | 
 59 | export const DisplayResources = t.array(
 60 |     t.type({
 61 |         src: t.string,
 62 |         config_width: t.number,
 63 |         config_height: t.number,
 64 |     }),
 65 | );
 66 | 
 67 | export const EdgeMediaToCaptionNode = t.type({
 68 |     text: t.union([t.string, t.undefined]),
 69 |     shortcode: t.union([t.string, t.undefined]),
 70 |     is_video: t.union([t.boolean, t.undefined]),
 71 |     video_url: t.union([t.string, t.undefined]),
 72 |     display_resources: t.union([DisplayResources, t.undefined]),
 73 | });
 74 | 
 75 | export const EdgeMediaToCaption = t.type({
 76 |     edges: t.array(
 77 |         t.type({
 78 |             node: EdgeMediaToCaptionNode,
 79 |         }),
 80 |     ),
 81 | });
 82 | 
 83 | export const RelatedProfile = t.type({
 84 |     id: t.string,
 85 |     full_name: t.string,
 86 |     is_private: t.boolean,
 87 |     is_verified: t.boolean,
 88 |     profile_pic_url: t.string,
 89 |     username: t.string,
 90 |     edge_followed_by: t.type({
 91 |         count: t.number,
 92 |     }),
 93 |     edge_owner_to_timeline_media: t.type({
 94 |         count: t.number,
 95 |         edges: t.array(
 96 |             t.type({
 97 |                 node: t.type({
 98 |                     __typename: t.string,
 99 |                     id: t.string,
100 |                     shortcode: t.string,
101 |                     edge_media_preview_like: Counter,
102 |                     edge_media_preview_comment: Counter,
103 |                     thumbnail_src: t.string,
104 |                     owner: t.type({
105 |                         id: t.string,
106 |                         username: t.string,
107 |                     }),
108 |                     gating_info: t.union([GatingInfo, t.null, t.undefined]),
109 |                     is_video: t.boolean,
110 |                     accessibility_caption: t.union([t.string, t.null]),
111 |                 }),
112 |             }),
113 |         ),
114 |     }),
115 | });
116 | 
117 | export const EdgeRelatedProfiles = t.type({
118 |     edges: t.array(
119 |         t.type({
120 |             node: t.union([t.undefined, RelatedProfile]),
121 |         }),
122 |     ),
123 | });
124 | 
125 | const EdgeSidecarToChildren = t.type({
126 |     edges: t.array(
127 |         t.type({
128 |             node: t.type({
129 |                 __typename: t.string,
130 |                 id: t.string,
131 |                 shortcode: t.union([t.string, t.undefined]),
132 |                 dimensions: Dimensions,
133 |                 gating_info: t.union([t.null, t.undefined]),
134 |                 fact_check_information: t.union([t.null, t.undefined]),
135 |                 media_preview: t.union([t.undefined, t.string, t.null]),
136 |                 display_url: t.string,
137 |                 display_resources: DisplayResources,
138 |                 accessibility_caption: t.union([t.string, t.undefined, t.null]),
139 |                 is_video: t.boolean,
140 |                 video_url: t.union([t.string, t.undefined]),
141 |                 tracking_token: t.string,
142 |                 edge_media_to_tagged_user: EdgeMediaToCaption,
143 |             }),
144 |         }),
145 |     ),
146 | });
147 | 
148 | export const PostNode = t.type({
149 |     __typename: t.union([t.string, t.undefined]),
150 |     comments_disabled: t.boolean,
151 |     location: t.union([t.null, t.undefined, Location]),
152 |     id: t.string,
153 |     edge_media_to_caption: EdgeMediaToCaption,
154 |     shortcode: t.string,
155 |     edge_media_to_comment: Counter,
156 |     taken_at_timestamp: t.number,
157 |     sensitivity_friction_info: t.union([GatingInfo, t.null, t.undefined]),
158 |     media_overlay_info: t.union([t.null, t.undefined]),
159 |     fact_check_information: t.union([t.null, t.undefined]),
160 |     fact_check_overall_rating: t.union([t.undefined, t.null]),
161 |     dimensions: Dimensions,
162 |     display_url: t.string,
163 |     edge_liked_by: t.union([Counter, t.undefined]),
164 |     edge_media_preview_like: Counter,
165 |     owner: PostNodeOwner,
166 |     thumbnail_src: t.string,
167 |     thumbnail_resources: t.union([DisplayResources, t.undefined]),
168 |     is_video: t.boolean,
169 |     accessibility_caption: t.union([t.string, t.undefined, t.null]),
170 |     display_resources: t.union([DisplayResources, t.undefined]),
171 |     should_log_client_event: t.union([t.undefined, t.boolean]),
172 |     tracking_token: t.union([t.undefined, t.string]),
173 |     edge_media_to_tagged_user: t.union([t.undefined, EdgeMediaToCaption]),
174 |     edge_media_to_sponsor_user: t.union([t.undefined, EdgeMediaToCaption]),
175 |     dash_info: t.union([
176 |         t.undefined,
177 |         t.type({
178 |             is_dash_eligible: t.boolean,
179 |             video_dash_manifest: t.null,
180 |             number_of_qualities: t.number,
181 |         }),
182 |     ]),
183 |     video_url: t.union([t.undefined, t.string]),
184 |     video_view_count: t.union([t.undefined, t.number]),
185 |     gating_info: t.union([t.null, t.undefined]),
186 |     media_preview: t.union([t.undefined, t.string, t.null]),
187 |     product_type: t.union([t.undefined, t.string]),
188 |     viewer_has_liked: t.union([t.undefined, t.boolean]),
189 |     viewer_has_saved: t.union([t.boolean, t.undefined]),
190 |     viewer_has_saved_to_collection: t.union([t.boolean, t.undefined]),
191 |     viewer_in_photo_of_you: t.union([t.boolean, t.undefined]),
192 |     viewer_can_reshare: t.union([t.boolean, t.undefined]),
193 |     edge_sidecar_to_children: t.union([EdgeSidecarToChildren, t.undefined]),
194 | });
195 | 
196 | export const CommentNode = t.type({
197 |     id: t.string,
198 |     text: t.string,
199 |     created_at: t.number,
200 |     did_report_as_spam: t.boolean,
201 |     owner: CommentNodeOwner,
202 |     viewer_has_liked: t.boolean,
203 |     edge_liked_by: Counter,
204 | });
205 | 
206 | export const EdgeMediaPreviewComment = t.type({
207 |     count: t.number,
208 |     edges: t.array(
209 |         t.type({
210 |             node: CommentNode,
211 |         }),
212 |     ),
213 | });
214 | 
215 | export const EdgeMediaHoistedComment = t.type({
216 |     edges: t.array(
217 |         t.type({
218 |             node: CommentNode,
219 |         }),
220 |     ),
221 | });
222 | 
223 | const EdgeMediaToParentCommentNode = t.intersection([
224 |     CommentNode,
225 |     t.type({
226 |         edge_threaded_comments: t.type({
227 |             count: t.number,
228 |             page_info: PageInfo,
229 |             edges: t.array(
230 |                 t.type({
231 |                     node: CommentNode,
232 |                 }),
233 |             ),
234 |         }),
235 |     }),
236 | ]);
237 | 
238 | export const Post = t.type({
239 |     node: excess(PostNode),
240 | });
241 | 
242 | export const EdgeMediaToParentComment = t.type({
243 |     count: t.number,
244 |     page_info: PageInfo,
245 |     edges: t.array(
246 |         t.type({
247 |             node: EdgeMediaToParentCommentNode,
248 |         }),
249 |     ),
250 | });
251 | 
252 | export const ShortcodeMedia = t.type({
253 |     __typename: t.string,
254 |     id: t.string,
255 |     shortcode: t.string,
256 |     edge_media_to_comment: t.union([Counter, t.undefined]),
257 |     thumbnail_src: t.union([t.undefined, t.string]),
258 |     dimensions: Dimensions,
259 |     gating_info: t.union([GatingInfo, t.null, t.undefined]),
260 |     sensitivity_friction_info: t.union([GatingInfo, t.null, t.undefined]),
261 |     fact_check_information: t.null,
262 |     fact_check_overall_rating: t.union([t.undefined, t.null]),
263 |     media_overlay_info: t.null,
264 |     media_preview: t.union([t.string, t.null]),
265 |     display_url: t.string,
266 |     display_resources: DisplayResources,
267 |     accessibility_caption: t.union([t.string, t.undefined, t.null]),
268 |     is_video: t.boolean,
269 |     should_log_client_event: t.union([t.boolean, t.undefined]),
270 |     tracking_token: t.string,
271 |     edge_media_to_tagged_user: EdgeMediaToCaption,
272 |     edge_media_to_caption: EdgeMediaToCaption,
273 |     caption_is_edited: t.boolean,
274 |     has_ranked_comments: t.boolean,
275 |     has_audio: t.union([t.boolean, t.undefined]),
276 |     edge_media_to_parent_comment: t.union([
277 |         EdgeMediaToParentComment,
278 |         t.undefined,
279 |     ]),
280 |     edge_media_to_hoisted_comment: t.union([
281 |         EdgeMediaHoistedComment,
282 |         t.undefined,
283 |     ]),
284 |     edge_media_preview_comment: t.union([EdgeMediaPreviewComment, t.undefined]),
285 |     edge_related_profiles: EdgeRelatedProfiles,
286 |     comments_disabled: t.boolean,
287 |     commenting_disabled_for_viewer: t.boolean,
288 |     clips_music_attribution_info: t.union([t.null, t.undefined]),
289 |     taken_at_timestamp: t.number,
290 |     edge_media_preview_like: EdgeMediaPreviewComment,
291 |     edge_media_to_sponsor_user: EdgeMediaToCaption,
292 |     location: t.union([t.string, t.null]),
293 |     viewer_has_liked: t.boolean,
294 |     viewer_has_saved: t.boolean,
295 |     viewer_has_saved_to_collection: t.boolean,
296 |     viewer_in_photo_of_you: t.boolean,
297 |     viewer_can_reshare: t.boolean,
298 |     owner: ShortcodeMediaOwner,
299 |     is_ad: t.boolean,
300 |     edge_web_media_to_related_media: EdgeMediaToCaption,
301 |     edge_sidecar_to_children: t.union([EdgeSidecarToChildren, t.undefined]),
302 |     dash_info: t.union([
303 |         t.undefined,
304 |         t.type({
305 |             is_dash_eligible: t.boolean,
306 |             video_dash_manifest: t.null,
307 |             number_of_qualities: t.number,
308 |         }),
309 |     ]),
310 |     video_url: t.union([t.undefined, t.string]),
311 |     video_view_count: t.union([t.undefined, t.number]),
312 |     video_play_count: t.union([t.undefined, t.null, t.number]),
313 |     encoding_status: t.union([t.undefined, t.string, t.null]),
314 |     is_published: t.union([t.undefined, t.boolean]),
315 |     product_type: t.union([t.undefined, t.string]),
316 |     title: t.union([t.undefined, t.string, t.null]),
317 |     video_duration: t.union([t.undefined, t.number]),
318 | });
319 | 
320 | export const SinglePost = t.type({
321 |     shortcode_media: excess(ShortcodeMedia),
322 | });
323 | 
324 | export const FullApiPost = t.type({
325 |     shortcode_media: excess(
326 |         t.type({
327 |             ...ShortcodeMedia.props,
328 |             location: t.union([Location, t.null]),
329 |         }),
330 |     ),
331 | });
332 | 
333 | // tslint:enable: object-literal-sort-keys
334 | 
335 | export type TPost = t.TypeOf<typeof Post>;
336 | 
337 | export type TSinglePost = t.TypeOf<typeof SinglePost>;
338 | 
339 | export type TFullApiPost = t.TypeOf<typeof FullApiPost>;
340 | 


--------------------------------------------------------------------------------
/src/cli.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | 
  3 | import * as aws from "aws-sdk";
  4 | import * as fs from "fs";
  5 | import * as readline from "readline";
  6 | import * as winston from "winston";
  7 | 
  8 | import * as path from "path";
  9 | import {v4 as uuid} from "uuid";
 10 | import * as plugins from "../plugins";
 11 | import {createApi, IOptions} from "./api/api";
 12 | import {TFullApiPost, TPost} from "./api/types";
 13 | import {GetPool} from "./getpool/getPool";
 14 | import * as depotUpload from "./http/depot";
 15 | import {download, toCSV, toJSON} from "./http/download";
 16 | import * as s3Upload from "./http/s3";
 17 | 
 18 | const getLogger = (args) => {
 19 |     const transports = [];
 20 |     if (args["logging"] !== "none") {
 21 |         transports.push(
 22 |             new winston.transports.File({
 23 |                 filename: args["logfile"],
 24 |                 level: args["logging"],
 25 |                 silent: args["logging"] === "none",
 26 |             }),
 27 |         );
 28 |     }
 29 |     return winston.createLogger({
 30 |         level: args["logging"],
 31 |         silent: args["logging"] === "none",
 32 |         transports,
 33 |     });
 34 | };
 35 | 
 36 | function getOptions(args, logger) {
 37 |     const options: IOptions = {
 38 |         enableGrafting: args["graft"],
 39 |         executablePath: args["browser"],
 40 |         fullAPI: args["full"],
 41 |         headless: !args["visible"],
 42 |         logger,
 43 |         plugins: [],
 44 |         sameBrowser: args["sameBrowser"],
 45 |         silent: args["quiet"],
 46 |         sleepTime: args["sleep"],
 47 |         strict: args["strict"],
 48 |         total: args["count"],
 49 |     };
 50 | 
 51 |     for (const pluginName of args["plugin"]) {
 52 |         if (plugins.plugins[pluginName]) {
 53 |             options.plugins.push(new plugins.plugins[pluginName]());
 54 |         } else {
 55 |             throw new Error("Couldn't find plugin " + pluginName);
 56 |         }
 57 |     }
 58 |     return options;
 59 | }
 60 | 
 61 | /**
 62 |  * Build argument parser
 63 |  */
 64 | function buildParser(args, callback) {
 65 |     /* tslint:disable:no-unused-expression */
 66 |     require("yargs")(args)
 67 |         .usage("Usage: $0 <command> [options]")
 68 |         .command("hashtag [id]", "Scrape a hashtag", {}, async (handleArgs) => {
 69 |             await spawn(handleArgs);
 70 |             callback();
 71 |         })
 72 |         .command(
 73 |             "user [id]",
 74 |             "Scrape a users posts",
 75 |             {},
 76 |             async (handleArgs) => {
 77 |                 await spawn(handleArgs);
 78 |                 callback();
 79 |             },
 80 |         )
 81 |         .command(
 82 |             "post [ids]",
 83 |             "Scrape a comma-separated list of posts",
 84 |             {},
 85 |             async (handleArgs) => {
 86 |                 await spawn(handleArgs);
 87 |                 callback();
 88 |             },
 89 |         )
 90 |         .command(
 91 |             "search [query]",
 92 |             "Perform a search of users, tags and places",
 93 |             {},
 94 |             async (handleArgs) => {
 95 |                 const logger = getLogger(handleArgs);
 96 |                 const options = getOptions(handleArgs, logger);
 97 |                 if (!handleArgs["query"]) {
 98 |                     throw new Error("query required");
 99 |                 }
100 |                 const search = createApi(
101 |                     "search",
102 |                     handleArgs["query"],
103 |                     options,
104 |                 );
105 |                 const result = await search.get();
106 |                 process.stdout.write("\n");
107 |                 process.stdout.write(JSON.stringify(result, null, 2));
108 |                 process.stdout.write("\n");
109 |                 callback();
110 |             },
111 |         )
112 |         .command(
113 |             "batch [batchfile]",
114 |             "Read newline-separated arguments from a file",
115 |             {},
116 |             () => {
117 |                 // A list of functions which create new Promises that are
118 |                 // resolved by buildParser when the spawn commands are
119 |                 // finished
120 |                 // See https://stackoverflow.com/a/45951080/7435520
121 |                 const functions = [];
122 | 
123 |                 // Read the list of commands from file
124 |                 readline
125 |                     .createInterface({
126 |                         crlfDelay: Infinity,
127 |                         input: fs.createReadStream(args[1]),
128 |                     })
129 |                     .on(
130 |                         "line",
131 |                         // For each line, create a new function which
132 |                         // creates a new promise to be resolved by
133 |                         // buildParser
134 |                         (line) => {
135 |                             if (line.length > 0 && line.charAt(0) !== "#") {
136 |                                 functions.push(
137 |                                     () =>
138 |                                         new Promise((res) =>
139 |                                             buildParser(line, res),
140 |                                         ),
141 |                                 );
142 |                             }
143 |                         },
144 |                     )
145 |                     .on(
146 |                         "close",
147 |                         // When all lines have been read, synchronously
148 |                         // execute the commands by waiting for their
149 |                         // promises to be resolved
150 |                         async () => {
151 |                             for (const f of functions) {
152 |                                 await f();
153 |                             }
154 |                             process.exit();
155 |                         },
156 |                     );
157 |             },
158 |         )
159 |         /* tslint:disable:object-literal-sort-keys */
160 |         .options({
161 |             count: {
162 |                 alias: "c",
163 |                 number: true,
164 |                 default: 0,
165 |                 describe: "Number of posts to download (0 for all)",
166 |                 group: "Configuration",
167 |             },
168 |             full: {
169 |                 alias: ["f"],
170 |                 boolean: true,
171 |                 default: false,
172 |                 describe: "Retrieve full post data",
173 |                 group: "Configuration",
174 |             },
175 |             sleep: {
176 |                 alias: ["s"],
177 |                 number: true,
178 |                 default: 2,
179 |                 describe: "Seconds to sleep between interactions",
180 |                 group: "Configuration",
181 |             },
182 |             graft: {
183 |                 alias: "g",
184 |                 boolean: true,
185 |                 default: true,
186 |                 describe: "Enable grafting",
187 |                 group: "Configuration",
188 |             },
189 |             browser: {
190 |                 alias: ["b"],
191 |                 string: true,
192 |                 default: undefined,
193 |                 describe: "Browser path. Defaults to the puppeteer version",
194 |                 group: "Configuration",
195 |             },
196 |             sameBrowser: {
197 |                 boolean: true,
198 |                 default: false,
199 |                 describe: "Use a single browser when grafting",
200 |                 group: "Configuration",
201 |             },
202 |             download: {
203 |                 alias: "d",
204 |                 boolean: true,
205 |                 default: false,
206 |                 describe: "Save images from posts",
207 |                 group: "Download",
208 |             },
209 |             downdir: {
210 |                 default: "downloads/[endpoint]/[id]",
211 |                 describe: "Download path",
212 |                 group: "Download",
213 |             },
214 |             video: {
215 |                 alias: "v",
216 |                 boolean: true,
217 |                 default: false,
218 |                 describe: "Download videos (requires full)",
219 |                 implies: "full",
220 |                 group: "Download",
221 |             },
222 |             sync: {
223 |                 boolean: true,
224 |                 default: false,
225 |                 describe: "Force download between requests",
226 |                 group: "Download",
227 |             },
228 |             threads: {
229 |                 alias: "k",
230 |                 number: true,
231 |                 default: 4,
232 |                 describe: "Parallel download / depot threads",
233 |                 group: "Download",
234 |             },
235 |             waitDownload: {
236 |                 alias: "w",
237 |                 boolean: true,
238 |                 default: false,
239 |                 describe: "Download media after scraping",
240 |                 group: "Download",
241 |             },
242 |             bucket: {
243 |                 string: true,
244 |                 default: undefined,
245 |                 describe: "Upload files to an AWS S3 bucket",
246 |                 group: "Upload",
247 |             },
248 |             depot: {
249 |                 string: true,
250 |                 default: undefined,
251 |                 describe: "Upload files to a URL with a PUT request (depot)",
252 |                 group: "Upload",
253 |             },
254 |             file: {
255 |                 alias: ["o"],
256 |                 string: true,
257 |                 default: "[id]",
258 |                 describe: "Output filename. '-' for stdout",
259 |                 group: "Output",
260 |             },
261 |             type: {
262 |                 alias: ["t"],
263 |                 default: "json",
264 |                 describe: "Filetype",
265 |                 choices: ["csv", "json", "both"],
266 |                 group: "Output",
267 |             },
268 |             mediaPath: {
269 |                 alias: ["m"],
270 |                 boolean: true,
271 |                 default: false,
272 |                 describe: "Add filepaths to _mediaPath",
273 |                 group: "Output",
274 |             },
275 |             visible: {
276 |                 boolean: true,
277 |                 default: false,
278 |                 describe: "Show browser on the screen",
279 |                 group: "Display",
280 |             },
281 |             quiet: {
282 |                 alias: ["q"],
283 |                 boolean: true,
284 |                 default: false,
285 |                 describe: "Disable progress output",
286 |                 group: "Display",
287 |             },
288 |             logging: {
289 |                 alias: ["l"],
290 |                 default: "none",
291 |                 choices: ["none", "error", "info", "debug"],
292 |                 group: "Logging",
293 |             },
294 |             logfile: {
295 |                 string: true,
296 |                 default: "instamancer.log",
297 |                 describe: "Log file name",
298 |                 group: "Logging",
299 |             },
300 |             strict: {
301 |                 boolean: true,
302 |                 default: false,
303 |                 describe: "Throw an error on response type mismatch",
304 |                 group: "Validation",
305 |             },
306 |             plugin: {
307 |                 alias: ["p"],
308 |                 array: true,
309 |                 default: [],
310 |                 describe: "Use a plugin from the plugins directory",
311 |                 group: "Plugins",
312 |             },
313 |         })
314 |         .demandCommand()
315 |         .example(
316 |             "$0 hashtag instagood -fvd",
317 |             "Download all the available posts, and their media from #instagood",
318 |         )
319 |         .example(
320 |             "$0 user arianagrande --type=csv --logging=info --visible",
321 |             "Download Ariana Grande's posts to a CSV file with a non-headless browser, and log all events",
322 |         )
323 |         .epilog(
324 |             "Source code available at https://github.com/ScriptSmith/instamancer",
325 |         )
326 |         .strict().argv;
327 |     /* tslint:enable:no-unused-expression */
328 | }
329 | 
330 | /**
331 |  * Spawn an instance of the API
332 |  * @param args
333 |  */
334 | async function spawn(args) {
335 |     // Initiate logger
336 |     const logger = getLogger(args);
337 | 
338 |     // Check id
339 |     if (!(args["id"] || args["ids"])) {
340 |         throw new Error("Id required");
341 |     }
342 | 
343 |     // Pick endpoint
344 |     let ids;
345 |     if (args["_"][0] === "post") {
346 |         ids = args["ids"].split(",");
347 |         args["id"] = ids.length === 1 ? ids[0] : "posts";
348 |         args["full"] = true;
349 |     } else {
350 |         ids = args["id"];
351 |     }
352 | 
353 |     // Define options
354 |     const options: IOptions = getOptions(args, logger);
355 | 
356 |     // Replace downdir
357 |     const downdir = args["downdir"]
358 |         .replace("[id]", args["id"])
359 |         .replace("[endpoint]", args["_"]);
360 | 
361 |     // Replace depot url
362 |     let depotUrl = args["depot"];
363 |     if (depotUrl && depotUrl.includes("[uuid]")) {
364 |         depotUrl = depotUrl.replace("[uuid]", uuid());
365 |         if (!args["quiet"]) {
366 |             process.stdout.write(depotUrl + "\n");
367 |         }
368 |     }
369 | 
370 |     // Get s3 bucket
371 |     const s3Bucket = args["bucket"];
372 | 
373 |     // Check if outputting to stdout
374 |     const printOutput = args["file"] === "-";
375 | 
376 |     // Connect to object storage
377 |     let downloadUpload;
378 |     let toCSVFunc = toCSV;
379 |     let toJSONFunc = toJSON;
380 |     if (depotUrl) {
381 |         // Depot
382 |         const depotConfig = {
383 |             directory: downdir,
384 |             url: depotUrl,
385 |             logger,
386 |         };
387 | 
388 |         downloadUpload = depotUpload.depot.bind(depotConfig);
389 |         toCSVFunc = depotUpload.toCSV.bind(depotConfig);
390 |         toJSONFunc = depotUpload.toJSON.bind(depotConfig);
391 |     } else if (s3Bucket) {
392 |         // s3
393 |         const s3Config = {
394 |             bucket: s3Bucket,
395 |             directory: downdir,
396 |             s3: new aws.S3(),
397 |             logger,
398 |         };
399 | 
400 |         downloadUpload = s3Upload.s3.bind(s3Config);
401 |         toCSVFunc = s3Upload.toCSV.bind(s3Config);
402 |         toJSONFunc = s3Upload.toJSON.bind(s3Config);
403 |     } else {
404 |         // Download
405 |         downloadUpload = download.bind({
406 |             directory: downdir,
407 |             logger,
408 |         });
409 |     }
410 | 
411 |     // Start API
412 |     logger.info("Starting API at " + Date.now());
413 |     const obj = createApi(args["_"][0], ids, options);
414 |     await obj.start();
415 | 
416 |     // Start download pool
417 |     const getPool = new GetPool(args["threads"], downloadUpload);
418 | 
419 |     // Pick between synchronous and parallel downloads
420 |     const downloadFunction = args["sync"]
421 |         ? downloadUpload
422 |         : getPool.add.bind(getPool);
423 | 
424 |     // Add pause callback
425 |     function handleKeypress(str, key) {
426 |         if (key.name === "space") {
427 |             obj.pause();
428 |         } else if (key.name === "c" && key.ctrl) {
429 |             process.stdout.write("\n");
430 |             process.kill(process.pid, "SIGINT");
431 |         }
432 |     }
433 | 
434 |     process.stdin.on("keypress", handleKeypress);
435 | 
436 |     // Array of urls and filenames
437 |     let downloadMedia: [string, string, FILETYPES][] = [];
438 | 
439 |     // Download posts
440 |     const posts = [];
441 |     for await (const post of obj.generator()) {
442 |         // Add _mediaPath key
443 |         if (args["mediaPath"]) {
444 |             post["_mediaPath"] = [];
445 |         }
446 | 
447 |         // Identify download urls
448 |         if (args["download"] && ("node" in post || "shortcode_media" in post)) {
449 |             // Check the scraping level
450 |             if (args["full"]) {
451 |                 // Check if album
452 |                 const postObject = post as TFullApiPost;
453 |                 const children =
454 |                     postObject.shortcode_media.edge_sidecar_to_children;
455 |                 if (children !== undefined) {
456 |                     for (const child of children.edges) {
457 |                         const shortcode = child.node.shortcode;
458 | 
459 |                         // Check if video
460 |                         let mediaUrl: string;
461 |                         let mediaType: FILETYPES;
462 |                         if (child.node.is_video && args["video"]) {
463 |                             mediaUrl = child.node.video_url;
464 |                             mediaType = FILETYPES.VIDEO;
465 |                         } else {
466 |                             mediaUrl = child.node.display_resources.pop().src;
467 |                             mediaType = FILETYPES.IMAGE;
468 |                         }
469 |                         saveMediaMetadata(
470 |                             post,
471 |                             args,
472 |                             downloadMedia,
473 |                             downdir,
474 |                             mediaUrl,
475 |                             shortcode,
476 |                             mediaType,
477 |                         );
478 |                     }
479 |                 } else {
480 |                     const shortcode = postObject.shortcode_media.shortcode;
481 | 
482 |                     // Check if video
483 |                     let mediaUrl: string;
484 |                     let mediaType: FILETYPES;
485 |                     if (postObject.shortcode_media.is_video && args["video"]) {
486 |                         mediaUrl = postObject.shortcode_media.video_url;
487 |                         mediaType = FILETYPES.VIDEO;
488 |                     } else {
489 |                         mediaUrl = postObject.shortcode_media.display_resources.pop()
490 |                             .src;
491 |                         mediaType = FILETYPES.IMAGE;
492 |                     }
493 |                     saveMediaMetadata(
494 |                         post,
495 |                         args,
496 |                         downloadMedia,
497 |                         downdir,
498 |                         mediaUrl,
499 |                         shortcode,
500 |                         mediaType,
501 |                     );
502 |                 }
503 |             } else {
504 |                 const postObject = post as TPost;
505 |                 saveMediaMetadata(
506 |                     post,
507 |                     args,
508 |                     downloadMedia,
509 |                     downdir,
510 |                     postObject.node.thumbnail_src,
511 |                     postObject.node.shortcode,
512 |                     FILETYPES.IMAGE,
513 |                 );
514 |             }
515 |         }
516 | 
517 |         // Output if required
518 |         if (printOutput) {
519 |             process.stdout.write(JSON.stringify(post, null, 2) + "\n");
520 |         } else {
521 |             posts.push(post);
522 |         }
523 | 
524 |         // Download the identified media
525 |         if (!args["waitDownload"]) {
526 |             for (const asset of downloadMedia) {
527 |                 await downloadFunction(...asset);
528 |             }
529 |             downloadMedia = [];
530 |         }
531 |     }
532 | 
533 |     // Download remaining media
534 |     for (const asset of downloadMedia) {
535 |         await downloadFunction(...asset);
536 |     }
537 | 
538 |     // Close download pool
539 |     await new Promise((resolve) => {
540 |         getPool.close(resolve);
541 |     });
542 |     await Promise.all(getPool.promises);
543 | 
544 |     // Replace filename
545 |     const filename = args["file"]
546 |         .replace("[id]", args["id"])
547 |         .replace("[endpoint]", args["_"]);
548 | 
549 |     // Save file
550 |     if (!printOutput) {
551 |         if (args["type"] !== "json") {
552 |             let saveFile = filename;
553 |             if (args["type"] === "both" || args["file"] === "[id]") {
554 |                 saveFile += ".csv";
555 |             }
556 |             await toCSVFunc(posts, saveFile);
557 |         }
558 |         if (args["type"] !== "csv") {
559 |             let saveFile = filename;
560 |             if (args["type"] === "both" || args["file"] === "[id]") {
561 |                 saveFile += ".json";
562 |             }
563 |             await toJSONFunc(posts, saveFile);
564 |         }
565 |     }
566 | 
567 |     // Remove pause callback
568 |     process.stdin.removeAllListeners("keypress");
569 | 
570 |     // Close logger
571 |     logger.close();
572 | }
573 | 
574 | function saveMediaMetadata(
575 |     post: object,
576 |     args: object,
577 |     downloadMedia: [string, string, FILETYPES][],
578 |     downDir: string,
579 |     url: string,
580 |     shortcode: string,
581 |     fileType: FILETYPES,
582 | ) {
583 |     if (args["mediaPath"]) {
584 |         let uri = path.join(downDir, shortcode + "." + fileType);
585 |         uri = args["swift"] ? "swift://" + uri : uri;
586 |         post["_mediaPath"].push(uri);
587 |     }
588 |     downloadMedia.push([url, shortcode, fileType]);
589 | }
590 | 
591 | // Catch key presses
592 | readline.emitKeypressEvents(process.stdin);
593 | if ("setRawMode" in process.stdin) {
594 |     process.stdin.setRawMode(true);
595 | }
596 | 
597 | // Parse args
598 | buildParser(process.argv.slice(2), () => {
599 |     process.exit(0);
600 | });
601 | 
602 | enum FILETYPES {
603 |     VIDEO = "mp4",
604 |     IMAGE = "jpg",
605 | }
606 | 


--------------------------------------------------------------------------------
/src/getpool/getPool.ts:
--------------------------------------------------------------------------------
  1 | import * as winston from "winston";
  2 | 
  3 | class GetJob {
  4 |     public finished: boolean = false;
  5 |     private readonly url: string;
  6 |     private readonly name: string;
  7 |     private readonly extension: string;
  8 |     private readonly downloadUpload: (
  9 |         url: string,
 10 |         name: string,
 11 |         extension: string,
 12 |     ) => Promise<void>;
 13 | 
 14 |     constructor(url: string, name: string, extension: string, downloadUpload) {
 15 |         this.url = url;
 16 |         this.name = name;
 17 |         this.extension = extension;
 18 |         this.downloadUpload = downloadUpload;
 19 |     }
 20 | 
 21 |     public async start() {
 22 |         await this.downloadUpload(this.url, this.name, this.extension);
 23 |         this.finished = true;
 24 |     }
 25 | }
 26 | 
 27 | /**
 28 |  * A pool of jobs that only executes k jobs 'simultaneously'
 29 |  */
 30 | export class GetPool {
 31 |     // Job promises
 32 |     public promises: Array<Promise<void>> = [];
 33 | 
 34 |     // Jobs that are currently being executed
 35 |     private runningJobs: GetJob[] = [];
 36 | 
 37 |     // Jobs that are yet to be executed
 38 |     private queuedJobs: GetJob[] = [];
 39 | 
 40 |     // Maximum number of jobs to be executed simultaneously
 41 |     private readonly maxConnections: number;
 42 | 
 43 |     // Looping interval executing promises
 44 |     private readonly loop;
 45 | 
 46 |     // Lock loop function execution
 47 |     private lock: boolean = false;
 48 | 
 49 |     // End-of-input signal triggered externally by close()
 50 |     private finished: boolean = false;
 51 | 
 52 |     // End-of-input resolve function
 53 |     private resolve: () => {};
 54 | 
 55 |     // Download / Upload function
 56 |     private readonly downloadUpload: (
 57 |         url: string,
 58 |         name: string,
 59 |         extension: string,
 60 |         directory: string,
 61 |         logger: winston.Logger,
 62 |     ) => Promise<void>;
 63 | 
 64 |     constructor(
 65 |         connections: number = 1,
 66 |         downloadUpload: (
 67 |             url: string,
 68 |             name: string,
 69 |             extension: string,
 70 |         ) => Promise<void>,
 71 |     ) {
 72 |         this.maxConnections = connections;
 73 |         this.loop = setInterval(() => {
 74 |             this.poolLoop.bind(this)();
 75 |         }, 100);
 76 |         this.downloadUpload = downloadUpload;
 77 |     }
 78 | 
 79 |     public add(url: string, name: string, extension: string) {
 80 |         this.queuedJobs.push(
 81 |             new GetJob(url, name, extension, this.downloadUpload),
 82 |         );
 83 |     }
 84 | 
 85 |     public close(resolve) {
 86 |         this.finished = true;
 87 |         this.resolve = resolve;
 88 |     }
 89 | 
 90 |     private poolLoop() {
 91 |         // Obtain lock or cancel
 92 |         if (this.lock) {
 93 |             return;
 94 |         } else {
 95 |             this.lock = true;
 96 |         }
 97 | 
 98 |         // Remove finished jobs
 99 |         for (let i = 0; i < this.runningJobs.length; i++) {
100 |             if (this.runningJobs[i].finished) {
101 |                 this.runningJobs.splice(i);
102 |                 i = 0;
103 |             }
104 |         }
105 | 
106 |         // Add new jobs to empty running slots
107 |         while (
108 |             this.queuedJobs.length > 0 &&
109 |             this.runningJobs.length < this.maxConnections
110 |         ) {
111 |             const job = this.queuedJobs.shift();
112 |             this.promises.push(job.start());
113 |             this.runningJobs.push(job);
114 |         }
115 | 
116 |         // End the interval when end-of-input signal given
117 |         if (
118 |             this.finished &&
119 |             this.queuedJobs.length === 0 &&
120 |             this.runningJobs.length === 0
121 |         ) {
122 |             clearInterval(this.loop);
123 |             this.resolve();
124 |         }
125 | 
126 |         // Release lock
127 |         this.lock = false;
128 |     }
129 | }
130 | 


--------------------------------------------------------------------------------
/src/http/depot.ts:
--------------------------------------------------------------------------------
  1 | import axios from "axios";
  2 | import * as fs from "fs";
  3 | import * as path from "path";
  4 | import * as tmp from "tmp";
  5 | import {resolve, URL} from "url";
  6 | import * as winston from "winston";
  7 | import * as download from "./download";
  8 | 
  9 | interface IUpload {
 10 |     url: string;
 11 |     directory: string;
 12 |     logger: winston.Logger;
 13 | }
 14 | 
 15 | export async function depot(
 16 |     this: IUpload,
 17 |     url: string,
 18 |     name: string,
 19 |     extension: string,
 20 | ) {
 21 |     try {
 22 |         // Axios download
 23 |         const downloadStream = await axios({
 24 |             method: "GET",
 25 |             responseType: "stream",
 26 |             url,
 27 |         });
 28 | 
 29 |         // Extract headers
 30 |         const contentType = downloadStream.headers["content-type"];
 31 |         const contentLength = downloadStream.headers["content-length"];
 32 | 
 33 |         // Upload path
 34 |         const filePath = path.join(this.directory, name + "." + extension);
 35 |         const uploadUrl = resolve(this.url, filePath);
 36 | 
 37 |         // Axios depot
 38 |         await axios({
 39 |             data: downloadStream.data,
 40 |             headers: {
 41 |                 "Content-Length": contentLength,
 42 |                 "Content-Type": contentType,
 43 |             },
 44 |             maxContentLength: Infinity,
 45 |             method: "PUT",
 46 |             ...authURL(uploadUrl),
 47 |         }).catch((error) => {
 48 |             this.logger.error(`Uploading ${url} failed`, error);
 49 |         });
 50 |     } catch (e) {
 51 |         this.logger.error(`Uploading ${url} failed`, e);
 52 |     }
 53 | }
 54 | 
 55 | function authURL(
 56 |     url: string,
 57 | ): {url: string; auth: {username: string; password: string}} {
 58 |     const components = new URL(url);
 59 |     const auth = {
 60 |         password: components.password,
 61 |         username: components.username,
 62 |     };
 63 |     components.username = "";
 64 |     components.password = "";
 65 | 
 66 |     return {
 67 |         auth,
 68 |         url: components.toString(),
 69 |     };
 70 | }
 71 | 
 72 | async function uploadFile(
 73 |     this: IUpload,
 74 |     posts: object[],
 75 |     filePath: string,
 76 |     fileFunc: (posts: object[], filePath: string) => Promise<void>,
 77 |     contentType: string,
 78 | ) {
 79 |     // Create tmp file
 80 |     const tmpFile = tmp.fileSync({keep: true});
 81 | 
 82 |     // Dump posts to file
 83 |     await fileFunc(posts, tmpFile.name);
 84 | 
 85 |     // Read file to a stream
 86 |     const fileStream = fs.createReadStream(tmpFile.name);
 87 |     const contentLength = fs.statSync(tmpFile.name).size;
 88 | 
 89 |     // Upload file
 90 |     const uploadUrl = resolve(this.url, filePath);
 91 |     await axios({
 92 |         data: fileStream,
 93 |         headers: {
 94 |             "Content-Length": contentLength,
 95 |             "Content-Type": contentType,
 96 |         },
 97 |         maxContentLength: Infinity,
 98 |         method: "PUT",
 99 |         url: uploadUrl,
100 |     });
101 | 
102 |     // Delete file
103 |     fs.unlinkSync(tmpFile.name);
104 | }
105 | 
106 | /**
107 |  * Upload list of posts to a CSV file
108 |  */
109 | export async function toCSV(this: IUpload, posts: object[], filePath: string) {
110 |     const uploader = uploadFile.bind(this);
111 |     await uploader(posts, filePath, download.toCSV, "text/csv");
112 | }
113 | 
114 | /**
115 |  * Upload list of posts to a JSON file
116 |  */
117 | export async function toJSON(this: IUpload, posts: object[], filePath: string) {
118 |     const uploader = uploadFile.bind(this);
119 |     await uploader(posts, filePath, download.toJSON, "text/json");
120 | }
121 | 


--------------------------------------------------------------------------------
/src/http/download.ts:
--------------------------------------------------------------------------------
 1 | import axios from "axios";
 2 | import * as fs from "fs";
 3 | import {Parser, transforms} from "json2csv";
 4 | import * as winston from "winston";
 5 | 
 6 | interface IDownload {
 7 |     directory: string;
 8 |     logger: winston.Logger;
 9 | }
10 | 
11 | /**
12 |  * Download file
13 |  * @param url The URL of the file
14 |  * @param name The name used to identify the file
15 |  * @param extension The file extension (eg. ".jpg" or ".mp4")
16 |  */
17 | export async function download(
18 |     this: IDownload,
19 |     url: string,
20 |     name: string,
21 |     extension: string,
22 | ) {
23 |     await new Promise((resolve) => {
24 |         fs.mkdir(this.directory, {recursive: true}, resolve);
25 |     });
26 |     try {
27 |         // Get data
28 |         const response = await axios({
29 |             method: "get",
30 |             responseType: "stream",
31 |             url,
32 |         });
33 | 
34 |         // Write to file
35 |         await new Promise(async (resolve) => {
36 |             const stream = fs.createWriteStream(
37 |                 this.directory + "/" + name + "." + extension,
38 |             );
39 |             // noinspection TypeScriptValidateJSTypes
40 |             response.data.pipe(stream);
41 |             stream.on("finish", resolve);
42 |         });
43 |     } catch (e) {
44 |         this.logger.info(`Downloading ${url} failed`);
45 |         this.logger.debug(e);
46 |     }
47 | }
48 | 
49 | /**
50 |  * Save list of posts to a CSV file
51 |  */
52 | export async function toCSV(posts: object[], filePath: string) {
53 |     const parser = new Parser({transforms: [transforms.flatten()]});
54 |     const csv = parser.parse(posts);
55 |     fs.writeFileSync(filePath, csv);
56 | }
57 | 
58 | /**
59 |  * Save list of posts to a JSON file
60 |  */
61 | export async function toJSON(posts: object[], filePath: string) {
62 |     let first = true;
63 |     fs.writeFileSync(filePath, "[");
64 |     for (const post of posts) {
65 |         if (first) {
66 |             first = false;
67 |         } else {
68 |             fs.appendFileSync(filePath, ", ");
69 |         }
70 |         fs.appendFileSync(filePath, JSON.stringify(post));
71 |     }
72 |     fs.appendFileSync(filePath, "]");
73 | }
74 | 


--------------------------------------------------------------------------------
/src/http/s3.ts:
--------------------------------------------------------------------------------
  1 | import * as aws from "aws-sdk";
  2 | import axios from "axios";
  3 | import * as fs from "fs";
  4 | import * as tmp from "tmp";
  5 | import * as winston from "winston";
  6 | import * as download from "./download";
  7 | 
  8 | interface IUpload {
  9 |     bucket: string;
 10 |     directory: string;
 11 |     s3: aws.S3;
 12 |     logger: winston.Logger;
 13 | }
 14 | 
 15 | export async function s3(
 16 |     this: IUpload,
 17 |     url: string,
 18 |     name: string,
 19 |     extension: string,
 20 | ) {
 21 |     try {
 22 |         // Axios download
 23 |         const downloadStream = await axios({
 24 |             method: "GET",
 25 |             responseType: "stream",
 26 |             url,
 27 |         });
 28 | 
 29 |         // Extract headers
 30 |         const contentType = downloadStream.headers["content-type"];
 31 |         const contentLength = downloadStream.headers["content-length"];
 32 | 
 33 |         // s3 upload
 34 |         await new Promise((resolve) => {
 35 |             this.s3.upload(
 36 |                 {
 37 |                     Body: downloadStream.data,
 38 |                     Bucket: this.bucket,
 39 |                     ContentLength: contentLength,
 40 |                     ContentType: contentType,
 41 |                     Key: this.directory + "/" + name + "." + extension,
 42 |                 },
 43 |                 (err) => {
 44 |                     if (err !== null) {
 45 |                         this.logger.error(`Uploading ${url} failed`, err);
 46 |                     }
 47 |                     resolve();
 48 |                 },
 49 |             );
 50 |         });
 51 |     } catch (e) {
 52 |         this.logger.error(`Uploading ${url} failed`, e);
 53 |     }
 54 | }
 55 | 
 56 | async function uploadFile(
 57 |     this: IUpload,
 58 |     posts: object[],
 59 |     filePath: string,
 60 |     fileFunc: (posts: object[], filePath: string) => Promise<void>,
 61 |     contentType: string,
 62 | ) {
 63 |     // Create tmp file
 64 |     const tmpFile = tmp.fileSync({keep: true});
 65 | 
 66 |     // Dump posts to file
 67 |     await fileFunc(posts, tmpFile.name);
 68 | 
 69 |     // Read file to a stream
 70 |     const fileStream = fs.createReadStream(tmpFile.name);
 71 |     const contentLength = fs.statSync(tmpFile.name).size;
 72 | 
 73 |     // s3 upload
 74 |     await new Promise((resolve) => {
 75 |         this.s3.upload(
 76 |             {
 77 |                 Body: fileStream,
 78 |                 Bucket: this.bucket,
 79 |                 ContentLength: contentLength,
 80 |                 ContentType: contentType,
 81 |                 Key: filePath,
 82 |             },
 83 |             (err) => {
 84 |                 if (err !== null) {
 85 |                     this.logger.error(`Uploading ${filePath} failed`, err);
 86 |                 }
 87 |                 resolve();
 88 |             },
 89 |         );
 90 |     });
 91 | 
 92 |     // Delete file
 93 |     fs.unlinkSync(tmpFile.name);
 94 | }
 95 | 
 96 | /**
 97 |  * Upload list of posts to a CSV file
 98 |  */
 99 | export async function toCSV(this: IUpload, posts: object[], filePath: string) {
100 |     const uploader = uploadFile.bind(this);
101 |     await uploader(posts, filePath, download.toCSV, "text/csv");
102 | }
103 | 
104 | /**
105 |  * Upload list of posts to a JSON file
106 |  */
107 | export async function toJSON(this: IUpload, posts: object[], filePath: string) {
108 |     const uploader = uploadFile.bind(this);
109 |     await uploader(posts, filePath, download.toJSON, "text/json");
110 | }
111 | 


--------------------------------------------------------------------------------
/tests/__fixtures__/FakePage.ts:
--------------------------------------------------------------------------------
 1 | import * as t from "io-ts";
 2 | import {IOptions} from "../../src/api/api";
 3 | import {Instagram} from "../../src/api/instagram";
 4 | 
 5 | export interface IFakePageOptions {
 6 |     // The path on the server
 7 |     path?: string;
 8 | 
 9 |     // The port the server is hosted on
10 |     port?: number;
11 | 
12 |     // The query to get API pages
13 |     pageQuery?: string;
14 | 
15 |     // The query to get posts
16 |     edgeQuery?: string;
17 | 
18 |     // The page to catch api requests on
19 |     catchPage?: string;
20 | 
21 |     // The page to visit posts
22 |     postPage?: string;
23 | 
24 |     // Regular API options
25 |     options?: IOptions;
26 | }
27 | 
28 | const FakeValidator = t.type({
29 |     node: t.type({
30 |         id: t.string,
31 |     }),
32 | });
33 | 
34 | export class FakePage extends Instagram<t.TypeOf<typeof FakeValidator>> {
35 |     constructor(options: IFakePageOptions = {path: "", port: 0}) {
36 |         let baseURL = "http://127.0.0.1:" + options.port;
37 |         if (options.path) {
38 |             baseURL += options.path;
39 |         }
40 | 
41 |         const silentOptions: IOptions = {silent: true};
42 |         super(
43 |             baseURL,
44 |             "",
45 |             options.pageQuery,
46 |             options.edgeQuery,
47 |             {
48 |                 ...options.options,
49 |                 ...silentOptions,
50 |             },
51 |             FakeValidator,
52 |         );
53 | 
54 |         this.catchURL = baseURL + "/" + options.catchPage;
55 |         this.postURL = baseURL + "/" + options.postPage;
56 | 
57 |         setTimeout(async () => {
58 |             await this.forceStop();
59 |         }, 30000);
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/tests/__fixtures__/QuickGraft.ts:
--------------------------------------------------------------------------------
1 | import {Hashtag, IOptions} from "../../src/api/api";
2 | 
3 | export class QuickGraft extends Hashtag<{}> {
4 |     constructor(id: string, options: IOptions = {}) {
5 |         super(id, options);
6 |         this.jumpMod = 2;
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/server.ts:
--------------------------------------------------------------------------------
  1 | import express from "express";
  2 | import {AddressInfo} from "net";
  3 | 
  4 | const app = express();
  5 | 
  6 | app.get("/", (req, res) => {
  7 |     res.send(`
  8 |             <!DOCTYPE html>
  9 |             <script type="text/javascript">
 10 |                 const endpoints = [
 11 |                     "rate_limit",
 12 |                     "invalid_json",
 13 |                     "non_object",
 14 |                     "no_next_page",
 15 |                     "duplicate_ids",
 16 |                     "invalid_id"
 17 |                 ];
 18 |                 setInterval(() => {
 19 |                     for (const endpoint of endpoints) {
 20 |                         console.log("API request to " + endpoint);
 21 |                         const xhttp = new XMLHttpRequest();
 22 |                         xhttp.open("GET", endpoint, true);
 23 |                         xhttp.send();
 24 |                     }
 25 |                 }, 2000)
 26 |             </script>
 27 |         `);
 28 | });
 29 | 
 30 | app.get("/rate_limit", (req, res) => {
 31 |     res.send(
 32 |         JSON.stringify({
 33 |             status: "fail",
 34 |         }),
 35 |     );
 36 | });
 37 | 
 38 | app.get("/invalid_json", (req, res) => {
 39 |     res.send("invalid");
 40 | });
 41 | 
 42 | app.get("/non_object", (req, res) => {
 43 |     res.send("1");
 44 | });
 45 | 
 46 | app.get("/no_next_page", (req, res) => {
 47 |     res.send(
 48 |         JSON.stringify({
 49 |             data: {
 50 |                 end_cursor: "cursor",
 51 |                 has_next_page: false,
 52 |             },
 53 |         }),
 54 |     );
 55 | });
 56 | 
 57 | app.get("/duplicate_ids", (req, res) => {
 58 |     res.send(
 59 |         JSON.stringify({
 60 |             data: {
 61 |                 edges: [
 62 |                     {
 63 |                         node: {
 64 |                             id: "1",
 65 |                         },
 66 |                     },
 67 |                     {
 68 |                         node: {
 69 |                             id: "1",
 70 |                         },
 71 |                     },
 72 |                 ],
 73 |                 end_cursor: "cursor",
 74 |                 has_next_page: true,
 75 |             },
 76 |         }),
 77 |     );
 78 | });
 79 | 
 80 | app.get("/invalid_id", (req, res) => {
 81 |     res.send(
 82 |         JSON.stringify({
 83 |             data: {
 84 |                 edges: [
 85 |                     {
 86 |                         node: {
 87 |                             id: "badid",
 88 |                         },
 89 |                     },
 90 |                 ],
 91 |                 end_cursor: "cursor",
 92 |                 has_next_page: false,
 93 |             },
 94 |         }),
 95 |     );
 96 | });
 97 | 
 98 | app.get("/invalid_page", (req, res) => {
 99 |     res.send("<h2>Sorry, this page isn't available.</h2>");
100 | });
101 | 
102 | let listener;
103 | 
104 | export async function startServer(): Promise<number> {
105 |     await new Promise((resolve) => {
106 |         listener = app.listen(0, resolve);
107 |     });
108 | 
109 |     return (listener.address() as AddressInfo).port;
110 | }
111 | 
112 | export async function stopServer() {
113 |     await new Promise((resolve) => {
114 |         listener.close(resolve);
115 |     });
116 | }
117 | 


--------------------------------------------------------------------------------
/tests/test.spec.ts:
--------------------------------------------------------------------------------
  1 | import * as t from "io-ts";
  2 | import {launch, Overrides, Request} from "puppeteer";
  3 | import * as winston from "winston";
  4 | import {createApi, IPlugin} from "..";
  5 | import {plugins} from "..";
  6 | import {IPluginContext} from "../plugins";
  7 | import {IOptions, IOptionsFullApi} from "../src/api/api";
  8 | import {FakePage, IFakePageOptions} from "./__fixtures__/FakePage";
  9 | import {QuickGraft} from "./__fixtures__/QuickGraft";
 10 | import {startServer, stopServer} from "./server";
 11 | 
 12 | jest.setTimeout(8 * 60 * 1000);
 13 | /* tslint:disable:no-console */
 14 | 
 15 | const hashtags = ["beach", "gym", "puppies", "party", "throwback"];
 16 | const users = ["snoopdogg", "arianagrande", "bbc", "whitehouse", "australia"];
 17 | const posts = [
 18 |     "By54GDoHGzK",
 19 |     "Be3rTNplCHf",
 20 |     "BlBvw2_jBKp",
 21 |     "Bzi33wDnxOz",
 22 |     "BfzEfy-lK1N",
 23 |     "Bneu_dCHVdn",
 24 |     "Brx-adXA9C1",
 25 |     "Bz5flRagYQt",
 26 |     "BmRZH7NFwi6",
 27 |     "BpiIJCUnYwy",
 28 | ];
 29 | 
 30 | let smallSize = 10;
 31 | let mediumSize = 100;
 32 | let largeSize = 1000;
 33 | 
 34 | // Run faster unless executing in CI
 35 | if (!process.env.CI) {
 36 |     smallSize /= 10;
 37 |     mediumSize /= 10;
 38 |     largeSize /= 10;
 39 | }
 40 | 
 41 | const browserPath = process.env.CHROME
 42 |     ? process.env.CHROME
 43 |     : "/usr/bin/google-chrome";
 44 | 
 45 | // Name of an account with 0 posts to test graceful exit
 46 | const emptyAccountName = "emptyaccount";
 47 | 
 48 | const createLogger = () =>
 49 |     winston.createLogger({
 50 |         format: winston.format.json(),
 51 |         level: "debug",
 52 |         silent: false,
 53 |         transports: [
 54 |             new winston.transports.File({
 55 |                 filename: "instamancer_tests.log",
 56 |                 level: "debug",
 57 |             }),
 58 |             new winston.transports.Console({
 59 |                 level: "error",
 60 |             }),
 61 |         ],
 62 |     });
 63 | const testWrapperLogger = createLogger();
 64 | 
 65 | const libraryTestOptions: IOptions = {
 66 |     logger: createLogger(),
 67 |     silent: true,
 68 |     strict: true,
 69 |     total: 10,
 70 | };
 71 | 
 72 | /**
 73 |  * Used to debug stalled builds in travis
 74 |  * @param name Test name
 75 |  * @param callback Test function
 76 |  */
 77 | function testWrapper(name: string, callback: () => Promise<void>) {
 78 |     test(name, async () => {
 79 |         const logSignPost = `JEST: Testing ${name}`;
 80 |         if (process.env.CI) {
 81 |             console.log(logSignPost);
 82 |             testWrapperLogger.info(logSignPost);
 83 |         }
 84 | 
 85 |         await callback();
 86 |     });
 87 | }
 88 | 
 89 | describe("Library Classes", () => {
 90 |     const total = 10;
 91 |     const objects = {
 92 |         hashtag: createApi("hashtag", hashtags[0], libraryTestOptions),
 93 |         post: createApi("post", posts, libraryTestOptions),
 94 |         user: createApi("user", users[0], libraryTestOptions),
 95 |     };
 96 | 
 97 |     for (const [key, object] of Object.entries(objects)) {
 98 |         testWrapper(key, async () => {
 99 |             const scraped = [];
100 |             for await (const post of object.generator()) {
101 |                 expect(post).toBeDefined();
102 |                 scraped.push(post);
103 |             }
104 |             expect(scraped.length).toBe(total);
105 |         });
106 |     }
107 | });
108 | 
109 | describe("Library Functions", () => {
110 |     const total = 10;
111 |     const generators = {
112 |         hashtag: createApi(
113 |             "hashtag",
114 |             hashtags[0],
115 |             libraryTestOptions,
116 |         ).generator(),
117 |         post: createApi("post", posts, libraryTestOptions).generator(),
118 |         user: createApi("user", users[0], libraryTestOptions).generator(),
119 |     };
120 | 
121 |     for (const [key, generator] of Object.entries(generators)) {
122 |         testWrapper(key, async () => {
123 |             const scraped = [];
124 |             for await (const post of generator) {
125 |                 expect(post).toBeDefined();
126 |                 scraped.push(post);
127 |             }
128 |             expect(scraped.length).toBe(total);
129 |         });
130 |     }
131 | });
132 | 
133 | describe("Full API", () => {
134 |     const total = 10;
135 |     const fullApiOption: IOptionsFullApi = {
136 |         ...libraryTestOptions,
137 |         fullAPI: true,
138 |     };
139 |     const generators = {
140 |         hashtag: createApi("hashtag", hashtags[0], fullApiOption).generator(),
141 |         post: createApi("post", posts, fullApiOption).generator(),
142 |         user: createApi("user", users[0], fullApiOption).generator(),
143 |     };
144 | 
145 |     for (const [key, generator] of Object.entries(generators)) {
146 |         testWrapper(key, async () => {
147 |             const scraped = [];
148 |             for await (const post of generator) {
149 |                 expect(post).toBeDefined();
150 |                 scraped.push(post);
151 |             }
152 |             expect(scraped.length).toBe(total);
153 |         });
154 |     }
155 | });
156 | 
157 | testWrapper("Account with < 10 photos", async () => {
158 |     // This is a not well-known account and it can be deleted at any moment
159 |     // If this test starts to fail, need to find another user
160 |     // which has less then 10 photos
161 |     const id = "zhiznizmelochei";
162 |     const fullApiOption: IOptionsFullApi = {
163 |         ...libraryTestOptions,
164 |         fullAPI: true,
165 |     };
166 |     const api = createApi("user", id, fullApiOption);
167 |     const scraped = [];
168 |     for await (const post of api.generator()) {
169 |         expect(post).toBeDefined();
170 |         scraped.push(post);
171 |     }
172 |     expect(scraped.length).toBeGreaterThan(0);
173 |     // If this user will start to do new posts
174 |     // Need to find a new one
175 |     expect(scraped.length).toBeLessThan(10);
176 | });
177 | 
178 | describe("API limits", () => {
179 |     class ApiTestConditions {
180 |         public api: "hashtag" | "user";
181 |         public ids: string[];
182 |         public sizes: number[];
183 | 
184 |         constructor(api: "hashtag" | "user", ids: string[], sizes: number[]) {
185 |             this.api = api;
186 |             this.ids = ids;
187 |             this.sizes = sizes;
188 |         }
189 |     }
190 | 
191 |     const endpoints: ApiTestConditions[] = [
192 |         new ApiTestConditions("hashtag", hashtags, [largeSize]),
193 |         new ApiTestConditions("user", users, [mediumSize]),
194 |     ];
195 | 
196 |     for (const endpoint of endpoints) {
197 |         // Get params
198 |         const sourceApi = endpoint.api;
199 |         const ids = endpoint.ids;
200 |         const sizes = endpoint.sizes;
201 | 
202 |         for (const size of sizes) {
203 |             // Decide how many ids to test based on size
204 |             let sizeIds;
205 |             let splitLen = 5;
206 |             if (size === mediumSize) {
207 |                 splitLen = 3;
208 |             } else if (size === largeSize) {
209 |                 splitLen = 1;
210 |             }
211 |             sizeIds = ids.slice(0, splitLen);
212 | 
213 |             for (const id of sizeIds) {
214 |                 testWrapper(`${endpoint.api} ${id} ${size}`, async () => {
215 |                     // Specify API options
216 |                     const options: IOptions = {
217 |                         enableGrafting: true,
218 |                         fullAPI: false,
219 |                         headless: true,
220 |                         logger: createLogger(),
221 |                         silent: false,
222 |                         sleepTime: 2,
223 |                         strict: true,
224 |                         total: size,
225 |                     };
226 | 
227 |                     // Create API
228 |                     const api = createApi(sourceApi, id, options);
229 | 
230 |                     // Get posts
231 |                     const scraped = [];
232 |                     const postIds = new Set();
233 |                     for await (const post of api.generator()) {
234 |                         postIds.add(post.node.id);
235 |                         scraped.push(post);
236 |                     }
237 | 
238 |                     // Assert sizes
239 |                     expect(scraped.length).toBe(size);
240 | 
241 |                     // Check duplicates
242 |                     expect(scraped.length).toBe(postIds.size);
243 |                 });
244 |             }
245 |         }
246 |     }
247 | });
248 | 
249 | describe("API options", () => {
250 |     const hashtagId = "vetinari";
251 |     const total = 50;
252 |     const optionsCollection: [string, IOptions][] = [
253 |         ["No options", {}],
254 |         ["Silence", {silent: true, total}],
255 |         ["Sleep", {sleepTime: 5, total}],
256 |         ["Headless", {headless: false, total}],
257 |         ["Grafting", {enableGrafting: false, total}],
258 |         ["Executable path", {executablePath: browserPath, total}],
259 |         ["Full api", {fullAPI: true, total}],
260 |         ["Limited full api", {fullAPI: true, total: 5}],
261 |     ];
262 | 
263 |     for (const [index, [name, options]] of optionsCollection.entries()) {
264 |         testWrapper(name, async () => {
265 |             // @ts-ignore
266 |             const tag = createApi("hashtag", hashtagId, options);
267 |             const scraped = [];
268 | 
269 |             for await (const post of tag.generator()) {
270 |                 expect(post).toBeDefined();
271 |                 scraped.push(post);
272 |             }
273 | 
274 |             if (index === 0) {
275 |                 expect(scraped.length).toBeGreaterThan(total);
276 |             } else if (index === optionsCollection.length - 1) {
277 |                 expect(scraped.length).toBe(5);
278 |             } else {
279 |                 expect(scraped.length).toBe(total);
280 |             }
281 |         });
282 |     }
283 | });
284 | 
285 | describe("Unusual behavior", () => {
286 |     testWrapper("Empty page", async () => {
287 |         const user = createApi("user", emptyAccountName, {}).generator();
288 |         const userPosts = [];
289 |         for await (const post of user) {
290 |             userPosts.push(post);
291 |         }
292 |         expect(userPosts.length).toBe(0);
293 |     });
294 | 
295 |     testWrapper("No grafting", async () => {
296 |         const total = 100;
297 |         const hashtag = hashtags[0];
298 |         const api = new QuickGraft(hashtag, {total, enableGrafting: false});
299 |         const scraped = [];
300 | 
301 |         for await (const post of api.generator()) {
302 |             scraped.push(post);
303 |         }
304 | 
305 |         expect(scraped.length).toBe(total);
306 |     });
307 | 
308 |     testWrapper("Pausing", async () => {
309 |         const api = createApi("hashtag", hashtags[0], {total: 100});
310 |         const iterator = api.generator();
311 | 
312 |         api.pause();
313 |         setTimeout(() => {
314 |             api.pause();
315 |         }, 20000);
316 | 
317 |         for await (const post of iterator) {
318 |             expect(post).toBeDefined();
319 |         }
320 |     });
321 | 
322 |     testWrapper("Hibernation", async () => {
323 |         const options: IOptions = {
324 |             hibernationTime: 10,
325 |             total: smallSize,
326 |         };
327 | 
328 |         const api = createApi("hashtag", hashtags[0], options);
329 |         const iterator = api.generator();
330 | 
331 |         await iterator.next();
332 |         api.toggleHibernation();
333 | 
334 |         for await (const post of iterator) {
335 |             expect(post).toBeDefined();
336 |         }
337 |     });
338 | 
339 |     testWrapper("Failed Page visit", async () => {
340 |         const options: IOptions = {
341 |             proxyURL: "127.0.0.1:9999",
342 |         };
343 |         const api = createApi("hashtag", hashtags[0], options);
344 |         const scraped = [];
345 | 
346 |         try {
347 |             for await (const post of api.generator()) {
348 |                 scraped.push(post);
349 |             }
350 |         } catch (e) {
351 |             expect(e).toBeDefined();
352 |         }
353 | 
354 |         expect(scraped.length).toBe(0);
355 |     });
356 | });
357 | 
358 | describe("Network and API issues", () => {
359 |     async function testOptions(options: IFakePageOptions) {
360 |         options.port = await startServer();
361 |         const api = new FakePage(options);
362 |         const mock = jest.fn();
363 | 
364 |         try {
365 |             for await (const post of api.generator()) {
366 |                 mock(post);
367 |             }
368 |         } catch (e) {
369 |             expect(e).toBeDefined();
370 |         }
371 |         await api.forceStop();
372 | 
373 |         await stopServer();
374 |     }
375 | 
376 |     testWrapper("Rate limit", async () => {
377 |         await testOptions({
378 |             catchPage: "rate_limit",
379 |             options: {hibernationTime: 10},
380 |         });
381 |     });
382 | 
383 |     testWrapper("Invalid JSON", async () => {
384 |         await testOptions({catchPage: "invalid_json"});
385 |     });
386 | 
387 |     testWrapper("Non object", async () => {
388 |         await testOptions({catchPage: "non_object"});
389 |     });
390 | 
391 |     testWrapper("No next page", async () => {
392 |         await testOptions({catchPage: "no_next_page", pageQuery: "data"});
393 |     });
394 | 
395 |     testWrapper("Duplicate post ids", async () => {
396 |         await testOptions({
397 |             catchPage: "duplicate_ids",
398 |             edgeQuery: "data.edges",
399 |             pageQuery: "data",
400 |         });
401 |     });
402 | 
403 |     testWrapper("Invalid post id", async () => {
404 |         await testOptions({
405 |             catchPage: "invalid_id",
406 |             edgeQuery: "data.edges",
407 |             options: {fullAPI: true, total: 1},
408 |             pageQuery: "data",
409 |         });
410 |     });
411 | 
412 |     testWrapper("Invalid page", async () => {
413 |         await testOptions({
414 |             path: "/invalid_page",
415 |         });
416 |     });
417 | });
418 | 
419 | describe("Strict mode", () => {
420 |     const failingValidator = t.type({
421 |         foo: t.string,
422 |     });
423 | 
424 |     testWrapper(
425 |         "Should fire warning if strict is false and validations are different",
426 |         async () => {
427 |             const logger = createLogger();
428 |             logger.warn = jest.fn();
429 |             const iterator = createApi("hashtag", hashtags[0], {
430 |                 logger,
431 |                 strict: false,
432 |                 total: 1,
433 |                 validator: failingValidator,
434 |             }).generator();
435 | 
436 |             let i = 0;
437 |             for await (const post of iterator) {
438 |                 i++;
439 |                 expect(logger.warn).toBeCalledTimes(i);
440 |             }
441 |         },
442 |     );
443 | 
444 |     testWrapper(
445 |         "Should not fire warning if strict is false and validations are ok",
446 |         async () => {
447 |             const logger = createLogger();
448 |             logger.warn = jest.fn();
449 |             const iterator = createApi("hashtag", hashtags[0], {
450 |                 logger,
451 |                 strict: false,
452 |                 total: 1,
453 |             }).generator();
454 | 
455 |             for await (const post of iterator) {
456 |                 expect(logger.warn).toBeCalledTimes(0);
457 |             }
458 |         },
459 |     );
460 | 
461 |     testWrapper(
462 |         "Should throw validation error if strict is true and types are incorrect",
463 |         async () => {
464 |             expect.hasAssertions();
465 |             const iterator = createApi("hashtag", hashtags[0], {
466 |                 strict: true,
467 |                 total: 1,
468 |                 validator: failingValidator,
469 |             }).generator();
470 | 
471 |             try {
472 |                 await iterator.next();
473 |             } catch (e) {
474 |                 expect(e).toBeInstanceOf(Error);
475 |                 expect(e.message).toMatch(/^Invalid value/);
476 |             }
477 |         },
478 |     );
479 | 
480 |     testWrapper(
481 |         "Should throw validation error if strict is true and types are incorrect (Post)",
482 |         async () => {
483 |             expect.hasAssertions();
484 |             const iterator = createApi("post", posts, {
485 |                 strict: true,
486 |                 total: 1,
487 |                 validator: failingValidator,
488 |             }).generator();
489 | 
490 |             try {
491 |                 await iterator.next();
492 |             } catch (e) {
493 |                 expect(e).toBeInstanceOf(Error);
494 |                 expect(e.message).toMatch(/^Invalid value/);
495 |             }
496 |         },
497 |     );
498 | 
499 |     testWrapper(
500 |         "Should throw validation error if strict is true and types are incorrect (Full Mode)",
501 |         async () => {
502 |             expect.hasAssertions();
503 |             const iterator = createApi("hashtag", hashtags[0], {
504 |                 fullAPI: true,
505 |                 strict: true,
506 |                 total: 1,
507 |                 validator: failingValidator,
508 |             }).generator();
509 | 
510 |             try {
511 |                 await iterator.next();
512 |             } catch (e) {
513 |                 expect(e).toBeInstanceOf(Error);
514 |                 expect(e.message).toMatch(/^Invalid value/);
515 |             }
516 |         },
517 |     );
518 | });
519 | 
520 | describe("Search", () => {
521 |     testWrapper("Search Result Users", async () => {
522 |         const result = await createApi(
523 |             "search",
524 |             "therock",
525 |             libraryTestOptions,
526 |         ).get();
527 |         expect(result.users.length).toBeGreaterThan(0);
528 |         const user = result.users[0].user;
529 |         expect(user.username).toBe("therock");
530 |         expect(user.full_name).toBeTruthy();
531 |         expect(user.profile_pic_url).toBeTruthy();
532 |     });
533 | 
534 |     testWrapper("Search Result Hashtags", async () => {
535 |         const result = await createApi(
536 |             "search",
537 |             "nofilter",
538 |             libraryTestOptions,
539 |         ).get();
540 |         expect(result.hashtags.length).toBeGreaterThan(0);
541 |         const hashtag = result.hashtags[0].hashtag;
542 |         expect(hashtag.media_count).not.toBeUndefined();
543 |         expect(hashtag.name).toBe("nofilter");
544 |     });
545 | 
546 |     testWrapper("Search Result Places", async () => {
547 |         const result = await createApi(
548 |             "search",
549 |             "New york",
550 |             libraryTestOptions,
551 |         ).get();
552 |         expect(result.places.length).toBeGreaterThan(0);
553 |         const place = result.places[0].place;
554 |         expect(place.title).toMatch(/New York/);
555 |     });
556 | 
557 |     testWrapper("Incorrect validation", async () => {
558 |         const failingValidator = t.type({
559 |             foo: t.string,
560 |         });
561 | 
562 |         expect.hasAssertions();
563 |         const search = createApi("search", "Doesn't matter", {
564 |             strict: true,
565 |             validator: failingValidator,
566 |         });
567 | 
568 |         try {
569 |             await search.get();
570 |         } catch (e) {
571 |             expect(e).toBeInstanceOf(Error);
572 |             expect(e.message).toMatch(/^Invalid value/);
573 |         }
574 |         await search.forceStop();
575 |     });
576 | 
577 |     testWrapper("Search should fire only one network request", async () => {
578 |         const searchRequestsSpy = jest.fn();
579 | 
580 |         class RequestCounter<PostType> implements IPlugin<PostType> {
581 |             public async requestEvent(
582 |                 this: IPluginContext<IPlugin<PostType>, PostType>,
583 |                 req: Request,
584 |                 overrides: Overrides,
585 |             ) {
586 |                 if (this.state.matchURL(req.url())) {
587 |                     searchRequestsSpy();
588 |                 }
589 |             }
590 |         }
591 | 
592 |         const search = createApi(
593 |             "search",
594 |             "A really long long long string to find something in Instagram",
595 |             {
596 |                 plugins: [new RequestCounter()],
597 |             },
598 |         );
599 | 
600 |         await search.get();
601 |         expect(searchRequestsSpy).toBeCalledTimes(1);
602 |     });
603 | });
604 | 
605 | describe("Plugins", () => {
606 |     testWrapper("Internal plugins", async () => {
607 |         for (const plugin in plugins) {
608 |             if (!plugins.hasOwnProperty(plugin)) {
609 |                 continue;
610 |             }
611 | 
612 |             const options: IOptions = {
613 |                 plugins: [new plugins[plugin]()],
614 |                 silent: true,
615 |                 total: 100,
616 |             };
617 |             const hashtag = createApi("hashtag", hashtags[0], options);
618 | 
619 |             const mock = jest.fn();
620 |             for await (const post of hashtag.generator()) {
621 |                 mock(post);
622 |             }
623 |             expect(mock).toBeCalledTimes(100);
624 |         }
625 |     });
626 | });
627 | 
628 | describe("Browser instance passed from outside", () => {
629 |     const browserOptions = {
630 |         headless: true,
631 |         args: ["--no-sandbox", "--disable-setuid-sandbox"],
632 |     };
633 |     testWrapper("Should re-use this browser instance", async () => {
634 |         const browser = await launch(browserOptions);
635 | 
636 |         const hashtagGenerator = createApi("hashtag", hashtags[0], {
637 |             browserInstance: browser,
638 |         }).generator();
639 |         await hashtagGenerator.next();
640 | 
641 |         const pages = await browser.pages();
642 | 
643 |         expect(pages.length).toBe(2);
644 | 
645 |         await browser.close();
646 |     });
647 | 
648 |     testWrapper("Should not close browser instance", async () => {
649 |         const browser = await launch(browserOptions);
650 | 
651 |         const searchGenerator = createApi("search", "therock", {
652 |             browserInstance: browser,
653 |         }).generator();
654 |         await searchGenerator.next();
655 | 
656 |         expect(browser.isConnected()).toBe(true);
657 | 
658 |         await browser.close();
659 |     });
660 | });
661 | 


--------------------------------------------------------------------------------
/tests/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "module": "commonjs",
 4 |     "target": "es2018",
 5 |     "noImplicitAny": false,
 6 |     "inlineSourceMap": true,
 7 |     "lib": ["dom", "es2018", "esnext.asynciterable"],
 8 |     "esModuleInterop": true,
 9 |     "resolveJsonModule": true
10 |   },
11 |   "compileOnSave": false
12 | }
13 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "module": "commonjs",
 4 |         "target": "es2018",
 5 |         "noImplicitAny": false,
 6 |         "inlineSourceMap": true,
 7 |         "lib": ["dom", "es2018", "esnext.asynciterable"],
 8 |         "resolveJsonModule": true,
 9 |         "esModuleInterop": true
10 |     },
11 |     "include": ["*.ts", "!*.d.ts", "src/cli.ts"],
12 |     "exclude": ["node_modules", "tests/*", "examples/*"]
13 | }
14 | 


--------------------------------------------------------------------------------
/tslint.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "defaultSeverity": "error",
 3 |   "extends": [
 4 |     "tslint:recommended"
 5 |   ],
 6 |   "jsRules": {},
 7 |   "rules": {
 8 |     // Could eventually be re-enabled by removing Hashtag Location User classes
 9 |     "max-classes-per-file": [false],
10 | 
11 |     // This needs to be robust enough to support API changes without refactoring
12 |     "no-string-literal": false
13 |   },
14 |   "rulesDirectory": []
15 | }


--------------------------------------------------------------------------------
/utils/validation-generator/.gitignore:
--------------------------------------------------------------------------------
1 | /input.json
2 | /output.ts


--------------------------------------------------------------------------------
/utils/validation-generator/README.md:
--------------------------------------------------------------------------------
 1 | # API validation generator
 2 | 
 3 | > Warning! The output which we get from `transform-json-types` library is not perfect. `output.ts` needs to be checked after the automatic transformation.
 4 | 
 5 | This util is used to automatically generate [io-ts](https://github.com/gcanti/io-ts) runtime and type validations for an actual Instagram API. 
 6 | 
 7 | To generate these validations two steps are required:
 8 | 
 9 | *   Get an actual Instagram API response and save as json
10 | *   Get `io-ts` typings from it
11 | 
12 | ## Actual API response
13 | 
14 | `ts-node utils/validation-generator/get-input.ts` 
15 | 
16 | The script will save an actual API response for different endpoints in `input.json` file (gitignored)
17 | 
18 | ## Generate typings
19 | 
20 | > Warning! By some weird reasons these typings are a little bit screwed. Need to replace Node3 with Node inside Post type to make them ok.
21 | 
22 | 1.  `ts-node utils/validation-generator/generate.ts` (The script will save typing to `output.ts` file.)
23 | 2.  Move all primitive types (which does not use other types, like `ThumbnailResources`, `Owner` and others) to the top of the file, final types (like `Post`) to the bottom of the file and fix all the block-scoped variables order errors manually.
24 | 3.  Write typing for FullApiPost (generally it is a SinglePost, but with location as an object)
25 | 4.  It is better to make the main type excessive by using [io-ts-excess](https://github.com/goooseman/io-ts-excess). Here's an example:
26 | ```typescript
27 | export const SinglePost = t.type({
28 |   shortcode_media: excess(ShortcodeMedia),
29 | });
30 | ```
31 | By make this type excessive, you will get validation error, if some new properties appeared in the API.
32 | 5.  Move `SearchResult`, `User`, `Places`, `Hashtags` types to `src/api/search.ts`
33 | 6.  Fix the rest of the typings
34 | 
35 | ## Fix typings
36 | 
37 | To quickly find all the typing errors in the project, you can run `npm test -- -t "Strict mode"` and `npm test -- -t "Full API"`.
38 | 
39 | You can get a lot of really verbose errors, like:
40 | 
41 | ``` typescript
42 |     Invalid value 
43 |     {"id":"219469050","has_public_page":true,"name":"Costa Nova, Aveiro, Portugal","slug":"costa-nova-aveiro-portugal","address_json":"{\"street_address\": \"\", \"zip_code\": \"\", \"city_name\": \"Costa Nova, Aveiro, Portugal\", \"region_name\": \"\", \"country_code\": \"PT\", \"exact_city_match\": true, \"exact_region_match\": false, \"exact_country_match\": false}"}
44 |     supplied to : 
45 |     { shortcode_media: { __typename: string, id: string, shortcode: string, dimensions: { height: number, width: number }, gating_info: (string | null), media_preview: (string | null), display_url: string, display_resources: Array<{ src: string, config_width: number, config_height: number }>, accessibility_caption: (string | undefined), is_video: boolean, should_log_client_event: boolean, tracking_token: string, edge_media_to_tagged_user: { edges: Array<{ node: { text: (string | undefined) } }> }, edge_media_to_caption: { edges: Array<{ node: { text: (string | undefined) } }> }, caption_is_edited: boolean, has_ranked_comments: boolean, edge_media_to_parent_comment: ({ count: number, page_info: { has_next_page: boolean, end_cursor: (string | null) }, edges: Array<{ node: ({ id: string, text: string, created_at: number, did_report_as_spam: boolean, owner: { id: string, is_verified: boolean, profile_pic_url: string, username: string }, viewer_has_liked: boolean, edge_liked_by: { count: number } } & { edge_threaded_comments: { count: number, page_info: { has_next_page: boolean, end_cursor: (string | null) }, edges: Array<{ node: { id: string, text: string, created_at: number, did_report_as_spam: boolean, owner: { id: string, is_verified: boolean, profile_pic_url: string, username: string }, viewer_has_liked: boolean, edge_liked_by: { count: number } } }> } }) }> } | undefined), edge_media_preview_comment: ({ count: number, edges: Array<{ node: { id: string, text: string, created_at: number, did_report_as_spam: boolean, owner: { id: string, is_verified: boolean, profile_pic_url: string, username: string }, viewer_has_liked: boolean, edge_liked_by: { count: number } } }> } | undefined), comments_disabled: boolean, taken_at_timestamp: number, edge_media_preview_like: { count: number, edges: Array<{ node: { id: string, text: string, created_at: number, did_report_as_spam: boolean, owner: { id: string, is_verified: boolean, profile_pic_url: string, username: string }, viewer_has_liked: boolean, edge_liked_by: { count: number } } }> }, edge_media_to_sponsor_user: { edges: Array<{ node: { text: (string | undefined) } }> }, location: (string | null), viewer_has_liked: boolean, viewer_has_saved: boolean, viewer_has_saved_to_collection: boolean, viewer_in_photo_of_you: boolean, viewer_can_reshare: boolean, owner: { id: string, is_verified: boolean, profile_pic_url: string, username: string, blocked_by_viewer: boolean, followed_by_viewer: boolean, full_name: string, has_blocked_viewer: boolean, is_private: boolean, is_unpublished: boolean, requested_by_viewer: boolean }, is_ad: boolean, edge_web_media_to_related_media: { edges: Array<{ node: { text: (string | undefined) } }> } } }
46 |     /shortcode_media: { __typename: string, id: string, shortcode: string, dimensions: { height: number, width: number }, gating_info: (string | null), media_preview: (string | null), display_url: string, display_resources: Array<{ src: string, config_width: number, config_height: number }>, accessibility_caption: (string | undefined), is_video: boolean, should_log_client_event: boolean, tracking_token: string, edge_media_to_tagged_user: { edges: Array<{ node: { text: (string | undefined) } }> }, edge_media_to_caption: { edges: Array<{ node: { text: (string | undefined) } }> }, caption_is_edited: boolean, has_ranked_comments: boolean, edge_media_to_parent_comment: ({ count: number, page_info: { has_next_page: boolean, end_cursor: (string | null) }, edges: Array<{ node: ({ id: string, text: string, created_at: number, did_report_as_spam: boolean, owner: { id: string, is_verified: boolean, profile_pic_url: string, username: string }, viewer_has_liked: boolean, edge_liked_by: { count: number } } & { edge_threaded_comments: { count: number, page_info: { has_next_page: boolean, end_cursor: (string | null) }, edges: Array<{ node: { id: string, text: string, created_at: number, did_report_as_spam: boolean, owner: { id: string, is_verified: boolean, profile_pic_url: string, username: string }, viewer_has_liked: boolean, edge_liked_by: { count: number } } }> } }) }> } | undefined), edge_media_preview_comment: ({ count: number, edges: Array<{ node: { id: string, text: string, created_at: number, did_report_as_spam: boolean, owner: { id: string, is_verified: boolean, profile_pic_url: string, username: string }, viewer_has_liked: boolean, edge_liked_by: { count: number } } }> } | undefined), comments_disabled: boolean, taken_at_timestamp: number, edge_media_preview_like: { count: number, edges: Array<{ node: { id: string, text: string, created_at: number, did_report_as_spam: boolean, owner: { id: string, is_verified: boolean, profile_pic_url: string, username: string }, viewer_has_liked: boolean, edge_liked_by: { count: number } } }> }, edge_media_to_sponsor_user: { edges: Array<{ node: { text: (string | undefined) } }> }, location: (string | null), viewer_has_liked: boolean, viewer_has_saved: boolean, viewer_has_saved_to_collection: boolean, viewer_in_photo_of_you: boolean, viewer_can_reshare: boolean, owner: { id: string, is_verified: boolean, profile_pic_url: string, username: string, blocked_by_viewer: boolean, followed_by_viewer: boolean, full_name: string, has_blocked_viewer: boolean, is_private: boolean, is_unpublished: boolean, requested_by_viewer: boolean }, is_ad: boolean, edge_web_media_to_related_media: { edges: Array<{ node: { text: (string | undefined) } }> } }
47 |     /location: (string | null)
48 |     /1: null
49 | ```
50 | 
51 | This looks scary, but let's make it simple. We need just two parts from the output. 
52 | 
53 | The first one is the text representation of the value, which validator could not validate. It is between `Invalid value` and `supplied to` strings.
54 | The second one is the type of value it has expected, and it can be found after last or one before last `/` sign.
55 | 
56 | In our case validator expected `string` or `null`, but an object has been recieved.
57 | 
58 | So we can fix the typing in the following way:
59 | 
60 | ``` typescript
61 | export const Location = t.type({
62 |   id: t.string,
63 |   has_public_page: t.boolean,
64 |   name: t.string,
65 |   slug: t.string,
66 |   address_json: t.string,
67 | });
68 | ...
69 |  location: t.union([t.string, t.null, Location])
70 | 
71 | ```
72 | 


--------------------------------------------------------------------------------
/utils/validation-generator/generate.ts:
--------------------------------------------------------------------------------
 1 | import {writeFileSync} from "fs";
 2 | import {dirname, join} from "path";
 3 | import transform from "transform-json-types";
 4 | // @ts-ignore
 5 | import * as json from "./input.json";
 6 | 
 7 | const getPath = () => join(dirname(__filename), "./output.ts");
 8 | 
 9 | const removeVarFromCode = (code: string, varName: string): string => {
10 |   const regexp = new RegExp(`\nconst ${varName} =[^;]+;\n`, "gm");
11 |   return code.replace(regexp, "");
12 | };
13 | 
14 | const addTypeToCode = (code: string, typeName: string): string => {
15 |   return `${code}\nexport type T${typeName} = t.TypeOf<typeof ${typeName}>;\n`;
16 | };
17 | 
18 | const singularizeVarNameInCode = (
19 |   code: string,
20 |   varNameSingle: string,
21 | ): string => {
22 |   const regexp = new RegExp(`${varNameSingle}s`, "gm");
23 |   return code.replace(regexp, varNameSingle);
24 | };
25 | 
26 | let output = transform(json, {
27 |   lang: "io-ts",
28 | });
29 | 
30 | output = `import * as t from "io-ts";\n\n${output}`;
31 | output = `// tslint:disable: object-literal-sort-keys\n${output}`;
32 | output = `${output}// tslint:enable: object-literal-sort-keys\n`;
33 | output = removeVarFromCode(output, "RootInterface");
34 | output = removeVarFromCode(output, "Default");
35 | output = output.replace(/^const/gm, "export const");
36 | output = output.replace(/t\.Array/gm, "t.UnknownArray");
37 | output = output.replace(/\ string/gm, " t.string"); // Really weird
38 | output = output.replace(/t\.Integer/gm, "t.number"); // Integer does not have ts type
39 | output = singularizeVarNameInCode(output, "Post");
40 | output = singularizeVarNameInCode(output, "SearchResult");
41 | output = addTypeToCode(output, "Post");
42 | output = addTypeToCode(output, "SinglePost");
43 | output = addTypeToCode(output, "SearchResult");
44 | 
45 | writeFileSync(getPath(), output, {
46 |   encoding: "utf-8",
47 | });
48 | 


--------------------------------------------------------------------------------
/utils/validation-generator/get-input.ts:
--------------------------------------------------------------------------------
 1 | import {writeFileSync} from "fs";
 2 | import {dirname, join} from "path";
 3 | import {createApi} from "../../";
 4 | 
 5 | const getPath = () => join(dirname(__filename), "./input.json");
 6 | 
 7 | const getResult = async () => {
 8 |   const posts = await getPosts({
 9 |     hashtagId: "beach",
10 |     userId: "snoopdogg",
11 |   });
12 | 
13 |   const singlePosts = await getSinglePosts({
14 |     postsIds: [
15 |       "BsOGulcndj-",
16 |       "Be3rTNplCHf",
17 |       "BlBvw2_jBKp",
18 |       "Bi-hISIghYe",
19 |       "BfzEfy-lK1N",
20 |       "Bneu_dCHVdn",
21 |       "Brx-adXA9C1",
22 |       "BlTYHvXFrvm",
23 |       "BmRZH7NFwi6",
24 |       "BpiIJCUnYwy",
25 |     ],
26 |   });
27 | 
28 |   const searchResults = await getSearch({
29 |     queries: ["beach", "nofilter", "donald"],
30 |   });
31 | 
32 |   return {
33 |     posts,
34 |     searchResults,
35 |     singlePosts,
36 |   };
37 | };
38 | 
39 | const getSearch = async ({queries}: {queries: string[]}) => {
40 |   const result = [];
41 |   const objects = queries.map((q) => createApi("search", q, {}));
42 |   for (const object of objects) {
43 |     result.push(await object.get());
44 |   }
45 |   return result;
46 | };
47 | 
48 | const getPosts = async ({
49 |   hashtagId,
50 |   userId,
51 | }: {
52 |   hashtagId: string;
53 |   userId: string;
54 | }) => {
55 |   const result = [];
56 | 
57 |   const options = {
58 |     total: 10,
59 |   };
60 |   const objects = [
61 |     createApi("hashtag", hashtagId, options),
62 |     createApi("user", userId, options),
63 |   ];
64 | 
65 |   for (const object of objects) {
66 |     for await (const post of object.generator()) {
67 |       result.push(post);
68 |     }
69 |   }
70 |   return result;
71 | };
72 | 
73 | const getSinglePosts = async ({postsIds}: {postsIds: string[]}) => {
74 |   const result = [];
75 |   const post = createApi("post", postsIds, {});
76 |   for await (const singlePost of post.generator()) {
77 |     result.push(singlePost);
78 |   }
79 |   return result;
80 | };
81 | 
82 | const run = async () => {
83 |   const result = await getResult();
84 |   const json = JSON.stringify(result, null, 2);
85 |   writeFileSync(getPath(), json, {
86 |     encoding: "utf-8",
87 |   });
88 | };
89 | 
90 | // tslint:disable-next-line: no-console
91 | run().catch(console.error);
92 | 


--------------------------------------------------------------------------------