├── .flake8
├── .gitattributes
├── .github
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   └── codeql-analysis.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── conf.py
    ├── index.rst
    ├── instascrape.core.rst
    ├── instascrape.exceptions.rst
    ├── instascrape.rst
    ├── instascrape.scrapers.rst
    ├── make.bat
    └── modules.rst
├── instascrape
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── _mappings.py
    │   ├── _static_scraper.py
    │   └── json_algos.py
    ├── exceptions
    │   ├── __init__.py
    │   └── exceptions.py
    └── scrapers
    │   ├── __init__.py
    │   ├── comment.py
    │   ├── hashtag.py
    │   ├── igtv.py
    │   ├── location.py
    │   ├── post.py
    │   ├── profile.py
    │   ├── reel.py
    │   └── scrape_tools.py
├── media
    ├── 6x6scatter_matrix.png
    ├── instascrape.gif
    ├── likes_heatmap.png
    ├── logo.png
    ├── logopic.png
    ├── realpython.png
    ├── scatter_matrix.png
    └── techprofiles.gif
├── pypi.bash
├── pyproject.toml
├── pytest.ini
├── requirements.txt
├── setup.py
├── tests
    ├── __init__.py
    └── scrapers
    │   ├── __init__.py
    │   ├── test_hashtag.py
    │   ├── test_igtv.py
    │   ├── test_location.py
    │   ├── test_post.py
    │   ├── test_profile.py
    │   └── test_reel.py
└── tutorial
    ├── examples
        ├── DonaldTrump
        │   ├── Donald Trump.ipynb
        │   ├── donald_trump.csv
        │   └── plots
        │   │   ├── comments_per_post.png
        │   │   ├── hashtags.png
        │   │   ├── likes_per_post.png
        │   │   ├── likes_vs_comments.png
        │   │   ├── locations.png
        │   │   ├── views_and_likes_per_view.png
        │   │   └── views_per_video.png
        ├── JoeBiden
        │   ├── joebiden.csv
        │   ├── joebiden.png
        │   ├── joebiden.py
        │   └── joebiden_urls.txt
        ├── README.md
        ├── download_recent_photos
        │   ├── 2020-09-08 09h06m.png
        │   ├── 2020-09-09 10h24m.png
        │   ├── 2020-09-14 10h05m.png
        │   ├── 2020-09-17 17h49m.png
        │   ├── 2020-09-24 11h01m.png
        │   ├── 2020-09-25 10h18m.png
        │   ├── 2020-09-26 11h38m.png
        │   ├── 2020-09-27 09h27m.png
        │   ├── 2020-09-28 12h17m.png
        │   ├── 2020-10-14 12h36m.png
        │   ├── 2020-10-15 13h11m.png
        │   ├── 2020-10-16 14h39m.png
        │   └── download_recent_photos.ipynb
        ├── max_liked_post.ipynb
        └── simple_hashtag_comparison
        │   └── simple_hashtag_comparison.ipynb
    └── tutorial
        ├── Part 0 - Orientation.ipynb
        └── Part 1 - Intro to the API.ipynb


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude = media,docs
3 | ignore=E402,F401,F403,F405,F821
4 | max-line-length=120


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | tutorial/** linguist-language=Python
2 | tutorial/examples/** linguist-language=Python
3 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at chris@christophergreening.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to instascrape
 2 | We love developers and want to hear your input! Contributing to this project should be as easy and transparent as possible, whether it's:
 3 | 
 4 | - Reporting a bug
 5 | - Discussing the current state of the code
 6 | - Submitting a fix
 7 | - Proposing new features
 8 | - Becoming a maintainer
 9 | - etc. 
10 | 
11 | ## We develop with GitHub
12 | We use GitHub to host code, track issues and feature requests, as well as accept pull requests. Changes that have been accepted to `master` will also be uploaded to the instascrape PyPI package.
13 | 
14 | ## We use the [GitHub flow](https://guides.github.com/introduction/flow/), so all code changes happen through pull requests
15 | Pull requests are the best way to propose changes to the codebase. We actively welcome your pull requests:
16 | 
17 | 1. Fork the repo and create your branch from `master`.
18 | 2. If you've added code that should be tested, add tests.
19 | 3. If you've changed APIs, update the documentation.
20 | 4. Ensure the test suite passes.
21 | 5. Make sure your code lints.
22 | 6. Issue that pull request!
23 | 
24 | ## Report bugs using Github's [issues](https://github.com/chris-greening/instascrape/issues)
25 | We use GitHub issues to track public bugs. Report a bug by [opening a new issue](https://github.com/chris-greening/instascrape/issues/new/choose).
26 | 
27 | ## Write bug reports with detail, background, and sample code
28 | 
29 | **Great Bug Reports** tend to have:
30 | 
31 | - A quick summary and/or background
32 | - Steps to reproduce
33 |   - Be specific!
34 |   - Give sample code if you can. 
35 | - What you expected would happen
36 | - What actually happens
37 | - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work)
38 | 
39 | <!-- ## Use a consistent coding style
40 | This project uses [black](https://pypi.org/project/black/) coding style to maintain consistency and readability. Please install [pre-commit first](https://pre-commit.com/#quick-start) to first as discussed below.
41 | ## Install pre-commit
42 | `pre-commit` is now used to check the codebase has been formatted across the entire project.
43 | But before installing pre-commit, you need to first have [isort](https://github.com/timothycrosley/isort), [black](https://github.com/ambv/black) & [flake8](https://gitlab.com/pycqa/flake8) installed into your local machine. This is so when you commit any changes pre-commit automatically runs first then, warns you of unformatted code or errors.
44 | You can just run these commands separately against your code (e.g. with black the command would look like `black path/to/your/python/code.py` ) before committing changes.
45 | -->
46 | 
47 | ## Any contributions you make will be under the MIT Software License
48 | In short, when you submit code changes, your submissions are understood to be under the same [MIT License](http://choosealicense.com/licenses/mit/) that covers the project. Feel free to contact the maintainers if that's a concern.
49 | 
50 | ## References
51 | This document was adapted from [briandk's](https://gist.github.com/briandk/3d2e8b3ec8daf5a27a62) CONTRIBUTING.md template.
52 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Description
 2 | Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
 3 | 
 4 | Do not include any personal data.
 5 | 
 6 | Fixes # (issue)
 7 | 
 8 | ## Checklist
 9 | 
10 | * [ ] I followed the guidelines in our Contributing document
11 | * [ ] I added an explanation of my changes
12 | * [ ] I have written new tests for my changes, as applicable
13 | * [ ] I successfully ran tests with my changes locally
14 | 
15 | ## Additional notes (optional) 
16 | 
17 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | name: "CodeQL"
 7 | 
 8 | on:
 9 |   push:
10 |     branches: [master]
11 |   pull_request:
12 |     # The branches below must be a subset of the branches above
13 |     branches: [master]
14 |   schedule:
15 |     - cron: '0 18 * * 3'
16 | 
17 | jobs:
18 |   analyze:
19 |     name: Analyze
20 |     runs-on: ubuntu-latest
21 | 
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         # Override automatic language detection by changing the below list
26 |         # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python']
27 |         language: ['python']
28 |         # Learn more...
29 |         # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection
30 | 
31 |     steps:
32 |     - name: Checkout repository
33 |       uses: actions/checkout@v2
34 |       with:
35 |         # We must fetch at least the immediate parents so that if this is
36 |         # a pull request then we can checkout the head.
37 |         fetch-depth: 2
38 | 
39 |     # If this run was triggered by a pull request event, then checkout
40 |     # the head of the pull request instead of the merge commit.
41 |     - run: git checkout HEAD^2
42 |       if: ${{ github.event_name == 'pull_request' }}
43 | 
44 |     # Initializes the CodeQL tools for scanning.
45 |     - name: Initialize CodeQL
46 |       uses: github/codeql-action/init@v1
47 |       with:
48 |         languages: ${{ matrix.language }}
49 |         # If you wish to specify custom queries, you can do so here or in a config file.
50 |         # By default, queries listed here will override any specified in a config file. 
51 |         # Prefix the list here with "+" to use these queries and those in the config file.
52 |         # queries: ./path/to/local/query, your-org/your-repo/queries@main
53 | 
54 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
55 |     # If this step fails, then you should remove it and run the build manually (see below)
56 |     - name: Autobuild
57 |       uses: github/codeql-action/autobuild@v1
58 | 
59 |     # ℹ️ Command-line programs to run using the OS shell.
60 |     # 📚 https://git.io/JvXDl
61 | 
62 |     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
63 |     #    and modify them (or add more) to build your code if your project
64 |     #    uses a compiled language
65 | 
66 |     #- run: |
67 |     #   make bootstrap
68 |     #   make release
69 | 
70 |     - name: Perform CodeQL Analysis
71 |       uses: github/codeql-action/analyze@v1
72 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | .idea/
132 | <<<<<<< HEAD
133 | 
134 | # Large file ignores
135 | tutorial/examples/Donald\ Trump/*.db*
136 | =======
137 | >>>>>>> aa8b12067d01e19fb94af91be18f49d8042feeac
138 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/timothycrosley/isort
 3 |     rev: 5.5.4
 4 |     hooks:
 5 |     - id: isort
 6 |   - repo: https://github.com/ambv/black
 7 |     rev: 20.8b1
 8 |     hooks:
 9 |       - id: black
10 |   - repo: https://gitlab.com/pycqa/flake8
11 |     rev: 3.8.4
12 |     hooks:
13 |       - id: flake8


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Christopher Greening
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <p align="center">
  3 |   <img src="media/logo.png" width="800px">
  4 | </p>
  5 | 
  6 | # _instascrape_: powerful Instagram data scraping toolkit 
  7 | 
  8 | ## Note: This module is no longer actively maintained.
  9 | 
 10 | ## DISCLAIMER: 
 11 | 
 12 | Instagram has gotten increasingly strict with scraping and using this library can result in getting flagged for botting AND POSSIBLE DISABLING OF YOUR INSTAGRAM ACCOUNT. This is a research project and I am not responsible for how you use it. Independently, the library is designed to be responsible and respectful and it is up to you to decide what you do with it. I don't claim any responsibility if your Instagram account is affected by how you use this library.
 13 | 
 14 | [![Version](https://img.shields.io/pypi/pyversions/insta-scrape)](https://www.python.org/downloads/release/python-360/)
 15 | [![Downloads](https://pepy.tech/badge/insta-scrape)](https://pepy.tech/project/insta-scrape)
 16 | [![Release](https://img.shields.io/pypi/v/insta-scrape)](https://pypi.org/project/insta-scrape/)
 17 | [![License](http://img.shields.io/:license-mit-blue.svg?style=flat-square)](https://opensource.org/licenses/MIT)
 18 | 
 19 | [![Activity](https://img.shields.io/github/last-commit/chris-greening/instascrape)](https://github.com/chris-greening/instascrape)
 20 | [![Dependencies](https://img.shields.io/librariesio/github/chris-greening/instascrape)](https://github.com/chris-greening/instascrape/blob/master/requirements.txt)
 21 | [![Issues](https://img.shields.io/github/issues/chris-greening/instascrape?style=flat)](https://github.com/chris-greening/instascrape/issues)
 22 | 
 23 | ## What is it?
 24 | _instascrape_ is a lightweight Python package that provides an expressive and flexible API for scraping Instagram data. It is geared towards being a high-level building block on the data scientist's toolchain and can be seamlessly integrated and extended with industry standard tools for web scraping, data science, and analysis. 
 25 | 
 26 | <!-- ![Example showing tech profile scrapes](/media/techprofiles.gif) -->
 27 | 
 28 | ## Key features  
 29 | Here are a few of the things that `instascrape` does well:
 30 | 
 31 | * Powerful, object-oriented scraping tools for profiles, posts, hashtags, reels, and IGTV
 32 | * Scrapes HTML, BeautifulSoup, and JSON
 33 | * Download content to your computer as _png_, _jpg_, _mp4_, and _mp3_
 34 | * Dynamically retrieve HTML embed code for posts
 35 | * Expressive and consistent API for concise and elegant code
 36 | * Designed for seamless integration with [_Selenium_](https://selenium-python.readthedocs.io/), [_Pandas_](https://pandas.pydata.org/), and other industry standard tools for data collection and analysis
 37 | * Lightweight; no boilerplate or configurations necessary 
 38 | * The only hard dependencies are [_Requests_](https://requests.readthedocs.io/en/master/) and [_Beautiful Soup_](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
 39 | ---
 40 | 
 41 | ## Table of Contents
 42 | * [Installation](#installation)
 43 | * [Sample Usage](#features)
 44 | * [Documentation](#documentation)
 45 | * [Blog Posts](#blog-posts)
 46 | * [Contributing](#contributing)
 47 | * [Dependencies](#dependencies)
 48 | * [License](#license)
 49 | * [Support](#support)
 50 | 
 51 | ---
 52 | 
 53 | ## :computer: Installation <a name="installation"></a>
 54 | 
 55 | ### Minimum Python version
 56 | 
 57 | This library currently requires [Python 3.7](https://www.python.org/downloads/release/python-370/) or higher.
 58 | 
 59 | 
 60 | ### pip
 61 | Install from PyPI using
 62 | ```shell
 63 | $ pip3 install insta-scrape
 64 | ```
 65 | WARNING: make sure you install _insta-scrape_ and not a package with a similar name! 
 66 | 
 67 | ---
 68 | 
 69 | ## :mag_right: Sample Usage <a name="features"></a>
 70 | All top-level, ready-to-use features can be imported using:
 71 | ```python
 72 | from instascrape import *
 73 | ```
 74 | 
 75 | _instascrape_ uses clean, consistent, and expressive syntax to make the developer experience as _painless_ as possible. 
 76 | 
 77 | ```python
 78 | # Instantiate the scraper objects 
 79 | google = Profile('https://www.instagram.com/google/')
 80 | google_post = Post('https://www.instagram.com/p/CG0UU3ylXnv/')
 81 | google_hashtag = Hashtag('https://www.instagram.com/explore/tags/google/')
 82 | 
 83 | # Scrape their respective data 
 84 | google.scrape()
 85 | google_post.scrape()
 86 | google_hashtag.scrape()
 87 | 
 88 | print(google.followers)
 89 | print(google_post['hashtags'])
 90 | print(google_hashtag.amount_of_posts)
 91 | >>> 12262794
 92 | >>> ['growwithgoogle']
 93 | >>> 9053408
 94 | ```
 95 | 
 96 | See the [Scraped data points](https://github.com/chris-greening/instascrape/wiki/Scraped-data-points) section of the [Wiki](https://github.com/chris-greening/instascrape/wiki) for a complete list of the scraped attributes provided by each scraper. 
 97 | 
 98 | ## :books: Documentation <a name="documentation"></a>
 99 | The official documentation can be found on [Read The Docs](https://instascrape.readthedocs.io/en/latest/index.html)
100 | 
101 | ---
102 | 
103 | ## :newspaper: Blog Posts <a name="blog-posts"></a>
104 | 
105 | 
106 | Check out blog posts on the [official site](https://chris-greening.github.io/instascrape/blog/) or [DEV](https://dev.to/) for ideas and tutorials!
107 | 
108 | - [Scrape data from Instagram with instascrape](https://dev.to/chrisgreening/scrape-data-from-instagram-with-instascrape-5e3e) 
109 | - [Visualizing Instagram engagement with instascrape](https://dev.to/chrisgreening/visualizing-instagram-engagement-with-instascrape-326h)
110 | - [Exploratory data analysis of Instagram using instascrape and Python](https://dev.to/chrisgreening/exploratory-data-analysis-of-instagram-using-python-1o5c)
111 | - [Creating a scatter matrix of Instagram data using Python](https://dev.to/chrisgreening/visualizing-the-relationship-between-instagram-variables-using-python-55gg)
112 | - [Downloading an Instagram profile's recent photos using Python](https://dev.to/chrisgreening/downloading-an-instagram-profile-s-recent-photos-using-python-25b2)
113 | - [Scraping 25,000 data points from Joe Biden's Instagram using instascrape](https://dev.to/chrisgreening/scraping-25-000-data-points-from-joe-biden-s-instagram-using-instascrape-1026)
114 | - [Compare major tech Instagram page's with instascrape](https://dev.to/chrisgreening/compare-major-tech-instagram-page-s-with-instascrape-2419)
115 | - [Tracking an Instagram posts engagement in real time with instascrape](https://dev.to/chrisgreening/tracking-an-instagram-posts-engagement-in-real-time-with-instascrape-1m1j)
116 | - [Dynamically generate embeddable Instagram HTML with instascrape](https://dev.to/chrisgreening/dynamically-generate-embeddable-instagram-html-using-instascrape-3o4b)
117 | - [Scraping an Instagram location tag with instascrape](https://dev.to/chrisgreening/scraping-an-instagram-location-tag-with-instascrape-554f)
118 | - [Scraping Instagram reels with instascrape](https://dev.to/chrisgreening/scraping-instagram-reels-with-instascrape-3khb)
119 | - [Scraping IGTV data with instascrape](https://dev.to/chrisgreening/scraping-igtv-data-with-instascrape-595f)
120 | - [Scraping 10,000 data points from Donald Trump's Instagram with Python](https://dev.to/chrisgreening/scraping-10-000-data-points-from-donald-trump-s-instagram-page-with-python-2jcg)
121 | ---
122 | 
123 | ## :pray: Contributing <a name="contributing"></a>
124 | All contributions, bug reports, bug fixes, documentation improvements, enhancements, and ideas are welcome!
125 | 
126 | Feel free to [open an Issue](https://github.com/chris-greening/instascrape/issues/new/choose), check out existing [Issues](https://github.com/chris-greening/instascrape/issues), or [start a discussion](https://github.com/chris-greening/instascrape/discussions). 
127 | 
128 | Beginners to open source are highly encouraged to participate and ask questions if you're unsure what to do/where to start :heart:
129 | 
130 | ---
131 | 
132 | ## :spider_web: Dependencies <a name="dependencies"></a>
133 | 
134 | - [Requests](https://requests.readthedocs.io/en/master/)
135 | - [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
136 | 
137 | ---
138 | 
139 | 
140 | ## :credit_card: License <a name="license"></a>
141 | This library operates under the [MIT](LICENSE) license.
142 | 
143 | ---
144 | 
145 | ## :grey_question: Support <a name="support"></a>
146 | 
147 | Check out the [FAQ](https://github.com/chris-greening/instascrape/wiki/Frequently-Asked-Questions)
148 | 
149 | Reach out to me if you want to connect or have any questions and I will do my best to get back to you
150 | * Email:
151 |   * chris@christophergreening.com
152 | * Twitter:
153 |   * [@ChrisGreening](https://twitter.com/ChrisGreening)
154 | * LinkedIn
155 |   * [Chris Greening](https://www.linkedin.com/in/chris-greening-646411139/)
156 | * Personal contact form: 
157 |   * [www.christophergreening.com](https://www.christophergreening.com/contact)
158 | ---
159 | 
160 | <p align="center">
161 |   <img src="media/logopic.png" width="150px">
162 | </p>
163 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | 
16 | sys.path.insert(0, os.path.abspath(".."))
17 | # sys.path.insert(0, r'D:\Programming\pythonstuff\instascrape')
18 | 
19 | 
20 | # -- Project information -----------------------------------------------------
21 | 
22 | project = "instascrape"
23 | copyright = "2020, Chris Greening"
24 | author = "Chris Greening"
25 | 
26 | # The full version, including alpha/beta/rc tags
27 | release = "0.0.7"
28 | 
29 | 
30 | # -- General configuration ---------------------------------------------------
31 | 
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"]
36 | 
37 | master_doc = "index"
38 | 
39 | # Add any paths that contain templates here, relative to this directory.
40 | templates_path = ["_templates"]
41 | 
42 | # List of patterns, relative to source directory, that match files and
43 | # directories to ignore when looking for source files.
44 | # This pattern also affects html_static_path and html_extra_path.
45 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
46 | 
47 | 
48 | # -- Options for HTML output -------------------------------------------------
49 | 
50 | # The theme to use for HTML and HTML Help pages.  See the documentation for
51 | # a list of builtin themes.
52 | #
53 | html_theme = "sphinx_rtd_theme"
54 | 
55 | # Add any paths that contain custom static files (such as style sheets) here,
56 | # relative to this directory. They are copied after the builtin static files,
57 | # so a file named "default.css" will overwrite the builtin "default.css".
58 | html_static_path = ["_static"]
59 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. instascrape documentation master file, created by
 2 |    sphinx-quickstart on Sat Sep 26 16:24:31 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to instascrape's documentation!
 7 | =======================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    instascrape.scrapers
14 |    instascrape.exceptions
15 | 
16 | Indices and tables
17 | ==================
18 | 
19 | * :ref:`genindex`
20 | * :ref:`modindex`
21 | * :ref:`search`
22 | 


--------------------------------------------------------------------------------
/docs/instascrape.core.rst:
--------------------------------------------------------------------------------
 1 | instascrape.core package
 2 | ========================
 3 | 
 4 | Module contents
 5 | ---------------
 6 | 
 7 | .. automodule:: instascrape.core
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 


--------------------------------------------------------------------------------
/docs/instascrape.exceptions.rst:
--------------------------------------------------------------------------------
 1 | instascrape.exceptions package
 2 | ==============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | instascrape.exceptions.exceptions module
 8 | ----------------------------------------
 9 | 
10 | .. automodule:: instascrape.exceptions.exceptions
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: instascrape.exceptions
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/docs/instascrape.rst:
--------------------------------------------------------------------------------
 1 | instascrape package
 2 | ===================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    instascrape.core
10 |    instascrape.scrapers
11 | 
12 | Submodules
13 | ----------
14 | 
15 | instascrape.instascrape module
16 | ------------------------------
17 | 
18 | .. automodule:: instascrape.instascrape
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: instascrape
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/docs/instascrape.scrapers.rst:
--------------------------------------------------------------------------------
 1 | instascrape.scrapers package
 2 | ============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | instascrape.scrapers.hashtag module
 8 | -----------------------------------
 9 | 
10 | .. automodule:: instascrape.scrapers.hashtag
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 |    :inherited-members:
15 | 
16 | instascrape.scrapers.post module
17 | --------------------------------
18 | 
19 | .. automodule:: instascrape.scrapers.post
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 |    :inherited-members:
24 | 
25 | instascrape.scrapers.profile module
26 | -----------------------------------
27 | 
28 | .. automodule:: instascrape.scrapers.profile
29 |    :members:
30 |    :undoc-members:
31 |    :show-inheritance:
32 |    :inherited-members:
33 | 
34 | instascrape.scrapers.reel module
35 | --------------------------------
36 | 
37 | .. automodule:: instascrape.scrapers.reel
38 |    :members:
39 |    :undoc-members:
40 |    :show-inheritance:
41 |    :inherited-members:
42 | 
43 | instascrape.scrapers.location module
44 | ------------------------------------
45 | 
46 | .. automodule:: instascrape.scrapers.location
47 |    :members:
48 |    :undoc-members:
49 |    :show-inheritance:
50 |    :inherited-members:
51 | 
52 | instascrape.scrapers.igtv module
53 | --------------------------------
54 | 
55 | .. automodule:: instascrape.scrapers.igtv
56 |    :members:
57 |    :undoc-members:
58 |    :show-inheritance:
59 |    :inherited-members:
60 | 
61 | instascrape.scrapers.scrape_tools module
62 | --------------------------------
63 | 
64 | .. automodule:: instascrape.scrapers.scrape_tools
65 |    :members:
66 |    :undoc-members:
67 |    :show-inheritance:
68 |    :inherited-members:
69 | 
70 | Module contents
71 | ---------------
72 | 
73 | .. automodule:: instascrape.scrapers
74 |    :members:
75 |    :undoc-members:
76 |    :show-inheritance:
77 |    :inherited-members:
78 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/modules.rst:
--------------------------------------------------------------------------------
1 | instascrape
2 | ===========
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    instascrape
8 | 


--------------------------------------------------------------------------------
/instascrape/__init__.py:
--------------------------------------------------------------------------------
1 | from instascrape.scrapers import *


--------------------------------------------------------------------------------
/instascrape/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/instascrape/core/__init__.py


--------------------------------------------------------------------------------
/instascrape/core/_mappings.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mappings that tell the _JsonEngine the user facing attribute names and the
  3 | steps needed to get there in a JSON dictionary
  4 | """
  5 | 
  6 | from __future__ import annotations
  7 | 
  8 | from abc import ABC
  9 | from collections import deque
 10 | from copy import deepcopy
 11 | from typing import Dict, List, Union
 12 | 
 13 | # pylint: disable=used-before-assignment
 14 | 
 15 | 
 16 | MappingObject = Union["_PostMapping", "_ProfileMapping", "_HashtagMapping", "_LoginMapping"]
 17 | 
 18 | 
 19 | class _GeneralMapping(ABC):
 20 |     """
 21 |     Maps the user interfacing attribute names with their keys as given in a JSON
 22 |     dict that has been flattened using
 23 |     instascrape.core._json_flattener.JsonFlattener
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     mapping : Dict[str, deque]
 28 |         Each key: val pair represents one data point and the directive for
 29 |         traversing a JSON dict and accessing that value
 30 | 
 31 |     Methods
 32 |     -------
 33 |     return_mapping(keys: List[str]=[]) -> Dict[str, deque]
 34 |         Interface for returning only mapping directives that are specified in
 35 |         a list of keys
 36 | 
 37 |     """
 38 | 
 39 |     mapping = {
 40 |         # "csrf_token": deque(["csrf_token"]),
 41 |         # "viewer_id": deque(["viewerId"]),
 42 |         # "country_code": deque(["country_code"]),
 43 |         # "language_code": deque(["language_code"]),
 44 |         # "locale": deque(["locale"]),
 45 |         # "device_id": deque(["device_id"]),
 46 |         # "browser_push_pub_key": deque(["browser_push_pub_key"]),
 47 |         # "key_id": deque(["key_id"]),
 48 |         # "public_key": deque(["public_key"]),
 49 |         # "version": deque(["version"]),
 50 |         # "is_dev": deque(["is_dev"]),
 51 |         # "rollout_hash": deque(["rollout_hash"]),
 52 |         # "bundle_variant": deque(["bundle_variant"]),
 53 |         # "frontend_dev": deque(["frontend_env"]),
 54 |     }
 55 | 
 56 |     @classmethod
 57 |     def return_mapping(cls, keys: List[str] = None, exclude: List[str] = None) -> Dict[str, deque]:
 58 |         """
 59 |         Return key-directive pairs specified by key names. If no keys are
 60 |         specified, return all
 61 | 
 62 |         Parameters
 63 |         ----------
 64 |         keys : List[str]
 65 |             Keys that specify what directives to return
 66 | 
 67 |         Returns
 68 |         -------
 69 |         directive_dict : Dict[str, deque]
 70 |             Dictionary of keys and their directives
 71 |         """
 72 |         if keys is None:
 73 |             keys = []
 74 |         if exclude is None:
 75 |             exclude = []
 76 |         if isinstance(keys, str):
 77 |             keys = [keys]
 78 |         if isinstance(exclude, str):
 79 |             exclude = [exclude]
 80 | 
 81 |         if not keys:
 82 |             keys = list(cls.mapping)
 83 |         if exclude:
 84 |             keys = [key for key in keys if key not in exclude]
 85 |         directive_dict = {key: deepcopy(cls.mapping[key]) for key in keys}
 86 |         return directive_dict
 87 | 
 88 | 
 89 | class _PostMapping(_GeneralMapping):
 90 |     """Mapping specific to Instagram post pages"""
 91 | 
 92 |     mapping = _GeneralMapping.return_mapping().copy()
 93 |     mapping.update(
 94 |         {
 95 |             "id": deque(["id"]),
 96 |             "shortcode": deque(["shortcode"]),
 97 |             "height": deque(["height"]),
 98 |             "width": deque(["width"]),
 99 |             "gating_info": deque(["gating_info"]),
100 |             "fact_check_overall_rating": deque(["fact_check_overall_rating"]),
101 |             "fact_check_information": deque(["fact_check_information"]),
102 |             "sensitivity_friction_info": deque(["sensitivity_friction_info"]),
103 |             "media_overlay_info": deque(["media_overlay_info"]),
104 |             "media_preview": deque(["media_preview"]),
105 |             "display_url": deque(["display_url"]),
106 |             "accessibility_caption": deque(["accessibility_caption"]),
107 |             "is_video": deque(["is_video"]),
108 |             "tracking_token": deque(["tracking_token"]),
109 |             "tagged_users": deque(["edge_media_to_tagged_user"]),
110 |             "caption": deque(["text"]),
111 |             "caption_is_edited": deque(["caption_is_edited"]),
112 |             "has_ranked_comments": deque(["has_ranked_comments"]),
113 |             "comments": deque(["count"]),
114 |             "comments_disabled": deque(["comments_disabled"]),
115 |             "commenting_disabled_for_viewer": deque(["commenting_disabled_for_viewer"]),
116 |             "timestamp": deque(["taken_at_timestamp"]),
117 |             "likes": deque(["edge_media_preview_like_count"]),
118 |             "location": deque(["name"]),
119 |             "viewer_has_liked": deque(["viewer_has_liked"]),
120 |             "viewer_has_saved": deque(["viewer_has_saved"]),
121 |             "viewer_has_saved_to_collection": deque(["viewer_has_saved_to_collection"]),
122 |             "viewer_in_photo_of_you": deque(["viewer_in_photo_of_you"]),
123 |             "viewer_can_reshare": deque(["viewer_can_reshare"]),
124 |             "video_url": deque(["video_url"]),
125 |             "has_audio": deque(["has_audio"]),
126 |             "video_view_count": deque(["video_view_count"]),
127 |             "username": deque(["shortcode_media_owner_username"]),
128 |             "full_name": deque(['owner_full_name']),
129 |         }
130 |     )
131 | 
132 |     @classmethod
133 |     def post_from_profile_mapping(cls):
134 |         """
135 |         Return the mapping needed for parsing a post's JSON data from the JSON
136 |         served back after requesting a Profile page.
137 |         """
138 |         return {
139 |             "id": deque(["id"]),
140 |             "shortcode": deque(["shortcode"]),
141 |             "dimensions": deque(["dimensions"]),
142 |             "display_url": deque(["display_url"]),
143 |             "tagged_users": deque(["edge_media_to_tagged_user", "edges"]),
144 |             "fact_check_overall_rating": deque(["fact_check_overall_rating"]),
145 |             "fact_check_information": deque(["fact_check_information"]),
146 |             "is_video": deque(["is_video"]),
147 |             "accessibility_caption": deque(["accessibility_caption"]),
148 |             "caption": deque(["edge_media_to_caption", "edges", 0, "node", "text"]),
149 |             "comments": deque(["count"]),
150 |             "comments_disabled": deque(["comments_disabled"]),
151 |             "timestamp": deque(["taken_at_timestamp"]),
152 |             "likes": deque(["edge_media_preview_like_count"]),
153 |             "location": deque(["location"]),
154 |         }
155 | 
156 |     @classmethod
157 |     def post_from_hashtag_mapping(cls):
158 |         """
159 |         Return the mapping needed for parsing a post's JSON data from the JSON
160 |         served back after requesting a Hashtag page.
161 |         """
162 |         return {
163 |             "comments_disabled": deque(["comments_disabled"]),
164 |             "id": deque(["id"]),
165 |             "caption": deque(["edge_media_to_caption", "edges", 0, "node", "text"]),
166 |             "shortcode": deque(["shortcode"]),
167 |             "comments": deque(["edge_media_to_comment", "count"]),
168 |             "upload_date": deque(["taken_at_timestamp"]),
169 |             "dimensions": deque(["dimensions"]),
170 |             "display_url": deque(["display_url"]),
171 |             "likes": deque(["edge_media_preview_like", "count"]),
172 |             "owner": deque(["owner", "id"]),
173 |             "is_video": deque(["is_video"]),
174 |             "accessibility_caption": deque(["accessibility_caption"]),
175 |         }
176 | 
177 | 
178 | class _ReelMapping(_PostMapping):
179 |     mapping = _PostMapping.return_mapping().copy()
180 |     mapping.update(
181 |         {
182 |             "video_play_count": deque(["video_play_count"]),
183 |         }
184 |     )
185 | 
186 | 
187 | class _IGTVMapping(_PostMapping):
188 |     mapping = _PostMapping.return_mapping().copy()
189 | 
190 | 
191 | class _ProfileMapping(_GeneralMapping):
192 |     """Mapping specific to Instagram profile pages"""
193 | 
194 |     mapping = _GeneralMapping.return_mapping().copy()
195 |     mapping.update(
196 |         {
197 |             "logging_page_id": deque(["logging_page_id"]),
198 |             "show_suggested_profiles": deque(["show_suggested_profiles"]),
199 |             "show_follow_dialog": deque(["show_follow_dialog"]),
200 |             "biography": deque(["biography"]),
201 |             "blocked_by_viewer": deque(["blocked_by_viewer"]),
202 |             "restricted_by_viewer": deque(["restricted_by_viewer"]),
203 |             "country_block": deque(["country_block"]),
204 |             "external_url": deque(["external_url"]),
205 |             "external_url_linkshimmed": deque(["external_url_linkshimmed"]),
206 |             "followers": deque(["count"]),
207 |             "followed_by_viewer": deque(["followed_by_viewer"]),
208 |             "following": deque(["edge_follow_count"]),
209 |             "follows_viewer": deque(["follows_viewer"]),
210 |             "full_name": deque(["user_full_name"]),
211 |             "has_ar_effects": deque(["has_ar_effects"]),
212 |             "has_clips": deque(["has_clips"]),
213 |             "has_guides": deque(["has_guides"]),
214 |             "has_channel": deque(["has_channel"]),
215 |             "has_blocked_viewer": deque(["has_blocked_viewer"]),
216 |             "highlight_reel_count": deque(["highlight_reel_count"]),
217 |             "has_requested_viewer": deque(["has_requested_viewer"]),
218 |             "id": deque(["id"]),
219 |             "is_business_account": deque(["is_business_account"]),
220 |             "is_joined_recently": deque(["is_joined_recently"]),
221 |             "business_category_name": deque(["business_category_name"]),
222 |             "overall_category_name": deque(["overall_category_name"]),
223 |             "category_enum": deque(["category_enum"]),
224 |             "is_private": deque(["is_private"]),
225 |             "is_verified": deque(["is_verified"]),
226 |             "mutual_followers": deque(["edge_mutual_followed_by_count"]),
227 |             "profile_pic_url": deque(["profile_pic_url"]),
228 |             "profile_pic_url_hd": deque(["profile_pic_url_hd"]),
229 |             "requested_by_viewer": deque(["requested_by_viewer"]),
230 |             "username": deque(["user_username"]),
231 |             "connected_fb_page": deque(["connected_fb_page"]),
232 |             "posts": deque(["edge_owner_to_timeline_media_count"]),
233 |         }
234 |     )
235 | 
236 | 
237 | class _HashtagMapping(_GeneralMapping):
238 |     """Mapping specific to Instagram hashtag pages"""
239 | 
240 |     mapping = _GeneralMapping.return_mapping().copy()
241 |     mapping.update(
242 |         {
243 |             "id": deque(["id"]),
244 |             "name": deque(["name"]),
245 |             "allow_following": deque(["allow_following"]),
246 |             "is_following": deque(["is_following"]),
247 |             "is_top_media_only": deque(["is_top_media_only"]),
248 |             "profile_pic_url": deque(["profile_pic_url"]),
249 |             "amount_of_posts": deque(["count"]),
250 |         }
251 |     )
252 | 
253 | 
254 | class _LocationMapping(_GeneralMapping):
255 |     """Mapping specific to Instagram profile pages"""
256 | 
257 |     mapping = _GeneralMapping.return_mapping().copy()
258 |     mapping.update(
259 |         {
260 |             "id": deque(["id"]),
261 |             "name": deque(["name"]),
262 |             "has_public_page": deque(["has_public_page"]),
263 |             "latitude": deque(["lat"]),
264 |             "longitude": deque(["lng"]),
265 |             "slug": deque(["slug"]),
266 |             "blurb": deque(["blurb"]),
267 |             "website": deque(["website"]),
268 |             "phone": deque(["phone"]),
269 |             "primary_alias_on_fb": deque(["primary_alias_on_fb"]),
270 |             "stress_address": deque(["street_address"]),
271 |             "zip_code": deque(["zip_code"]),
272 |             "city_name": deque(["city_name"]),
273 |             "region_name": deque(["region_name"]),
274 |             "country_code": deque(["country_code"]),
275 |             "amount_of_posts": deque(["count"]),
276 |         }
277 |     )
278 | 
279 | 
280 | class _LoginMapping(_GeneralMapping):
281 |     """Mapping specific to Instagram login page"""
282 | 
283 |     mapping = _GeneralMapping.return_mapping().copy()
284 | 
285 | 
286 | class _HttpErrorMapping(_GeneralMapping):
287 |     """Mapping specific to Instagram login page"""
288 | 
289 |     mapping = _GeneralMapping.return_mapping().copy()
290 | 
291 | 
292 | class _MetaMapping:
293 |     """
294 |     Map the string in the Instagram JSON that indicates the type of page the
295 |     JSON was scraped from
296 | 
297 |     Attributes
298 |     ----------
299 |     str_to_mapper_obj : Dict[str, Any]
300 |         Dictionary that maps the string name of the JSON type to the specific
301 |         mapping object
302 | 
303 |     Methods
304 |     -------
305 |     get_mapper(page_type: str)
306 |         Return the mapping object that correlates to the string
307 |     """
308 | 
309 |     str_to_mapper_obj = {
310 |         "ProfilePage": _ProfileMapping,
311 |         "TagPage": _HashtagMapping,
312 |         "PostPage": _PostMapping,
313 |         "LoginAndSignupPage": _LoginMapping,
314 |         "LocationsPage": _LocationMapping
315 |     }
316 | 
317 |     @classmethod
318 |     def get_mapper(cls, page_type: str) -> MappingObject:
319 |         """
320 |         Return the appropriate mapper that corresponds to the page_type as
321 |         given in the requested Instagram JSON data
322 |         """
323 |         return cls.str_to_mapper_obj[page_type]
324 | 


--------------------------------------------------------------------------------
/instascrape/core/_static_scraper.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import datetime
  4 | import json
  5 | import csv
  6 | from abc import ABC, abstractmethod
  7 | from typing import Union, Dict, List, Any
  8 | import sys
  9 | import os
 10 | from collections import namedtuple, deque
 11 | import warnings
 12 | 
 13 | import requests
 14 | from bs4 import BeautifulSoup
 15 | 
 16 | from instascrape.scrapers.scrape_tools import parse_data_from_json, determine_json_type, flatten_dict, json_from_soup
 17 | from instascrape.exceptions.exceptions import InstagramLoginRedirectError, MissingSessionIDWarning, MissingCookiesWarning
 18 | 
 19 | # pylint: disable=no-member
 20 | 
 21 | JSONDict = Dict[str, Any]
 22 | 
 23 | class _StaticHtmlScraper(ABC):
 24 |     """
 25 |     Base class for all of the scrapers, handles general functionality that all
 26 |     scraper objects will have
 27 |     """
 28 | 
 29 |     # Keys that represent metadata attr that the user doesn't necessarily need
 30 |     # to worry about
 31 |     _METADATA_KEYS = [
 32 |         "json_dict",
 33 |         "url",
 34 |         "_json_scraper",
 35 |         "scrape_timestamp",
 36 |         "map_dict",
 37 |         "json_data",
 38 |         "json_flattener",
 39 |         "flat_json_dict",
 40 |         "soup",
 41 |         "html",
 42 |         "source",
 43 |     ]
 44 |     _ASSOCIATED_JSON_TYPE = None
 45 | 
 46 |     session = requests.Session()
 47 | 
 48 |     def __init__(self, source: Union[str, BeautifulSoup, JSONDict]) -> None:
 49 |         """
 50 |         Parameters
 51 |         ----------
 52 |         source : Union[str, BeautifulSoup, JSONDict]
 53 |             The given source for scraping the data from. Available sources are
 54 |             a URL, HTML, JSON dictionary, BeautifulSoup, etc.
 55 |         """
 56 |         self.source = source
 57 | 
 58 |         # Instance variables that are given values elsewhere
 59 |         self.url = None
 60 |         self.html = None
 61 |         self.soup = None
 62 |         self.json_dict = None
 63 |         self.flat_json_dict = None
 64 |         self.scrape_timestamp = None
 65 | 
 66 |     def __getitem__(self, key: str) -> Any:
 67 |         return getattr(self, key)
 68 | 
 69 |     def __repr__(self) -> str:
 70 |         return f"<{type(self).__name__}>"
 71 | 
 72 |     def scrape(
 73 |         self,
 74 |         mapping=None,
 75 |         keys: List[str] = None,
 76 |         exclude: List[str] = None,
 77 |         headers={
 78 |             "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57"
 79 |         },
 80 |         inplace=True,
 81 |         session=None,
 82 |         webdriver=None
 83 |     ) -> None:
 84 |         """
 85 |         Scrape data from the source
 86 | 
 87 |         Parameters
 88 |         ----------
 89 |         mapping : Dict[str, deque]
 90 |             Dictionary of parsing queue's that tell the JSON engine how to
 91 |             process the JSON data
 92 |         keys : List[str]
 93 |             List of strings that correspond to desired attributes for scraping
 94 |         exclude : List[str]
 95 |             List of strings that correspond to which attributes to exclude from
 96 |             being scraped
 97 |         headers : Dict[str, str]
 98 |             Dictionary of request headers to be passed on the GET request
 99 |         inplace : bool
100 |             Determines if data modified inplace or return a new object with the
101 |             scraped data
102 |         session : requests.Session
103 |             Session for making the GET request
104 |         webdriver : selenium.webdriver.chrome.webdriver.WebDriver
105 |             Webdriver for scraping the page, overrides any default or passed
106 |             session
107 | 
108 |         Returns
109 |         -------
110 |         return_instance
111 |             Optionally returns a scraped instance instead of modifying inplace
112 |             if inplace arg is True
113 |         """
114 | 
115 |         if mapping is None:
116 |             mapping = self._Mapping.return_mapping(keys=keys, exclude=exclude)
117 |         if session is None:
118 |             session = self.session
119 |         if webdriver is not None:
120 |             session = webdriver
121 |         if keys is None:
122 |             keys = []
123 |         if exclude is None:
124 |             exclude = []
125 | 
126 |         if webdriver is None:
127 |             try:
128 |                 if "sessionid" not in headers["cookie"]:
129 |                     warnings.warn(
130 |                         "Session ID not in cookies! It's recommended you pass a valid sessionid otherwise Instagram will likely redirect you to their login page.",
131 |                         MissingSessionIDWarning
132 |                     )
133 |             except KeyError:
134 |                 warnings.warn(
135 |                     "Request header does not contain cookies! It's recommended you pass at least a valid sessionid otherwise Instagram will likely redirect you to their login page.",
136 |                     MissingCookiesWarning
137 |                     )
138 | 
139 |         # If the passed source was already an object, construct data from
140 |         # source else parse it
141 |         if isinstance(self.source, type(self)):
142 |             scraped_dict = self.source.to_dict()
143 |         else:
144 |             return_data = self._get_json_from_source(self.source, headers=headers, session=session)
145 |             flat_json_dict = flatten_dict(return_data["json_dict"])
146 | 
147 |             #HACK: patch mapping to fix the profile pic scrape when a sessionid is present
148 |             try:
149 |                 if "sessionid" in headers["cookie"]:
150 |                     mapping["profile_pic_url"] = deque(["user_profile_pic_url"])
151 |                     mapping["profile_pic_url_hd"] = deque(["user_profile_pic_url_hd"])
152 |             except KeyError:
153 |                 pass
154 | 
155 |             scraped_dict = parse_data_from_json(
156 |                 json_dict=flat_json_dict,
157 |                 map_dict=mapping,
158 |             )
159 |         return_data["scrape_timestamp"] = datetime.datetime.now()
160 |         return_data["flat_json_dict"] = flat_json_dict
161 |         return_instance = self._load_into_namespace(
162 |                             scraped_dict=scraped_dict,
163 |                             return_data=return_data,
164 |                             inplace=inplace
165 |         )
166 |         return None if return_instance is self else return_instance
167 | 
168 |     def to_dict(self, metadata: bool = False) -> Dict[str, Any]:
169 |         """
170 |         Return a dictionary containing all of the data that has been scraped
171 | 
172 |         Parameters
173 |         ----------
174 |         metadata : bool
175 |             Boolean value that determines if metadata specified in self._METADATA_KEYS
176 |             will be included in the dictionary.
177 | 
178 |         Returns
179 |         -------
180 |         data_dict : Dict[str, Any]
181 |             Dictionary containing the scraped data
182 |         """
183 |         data_dict = (
184 |             {key: val for key, val in self.__dict__.items() if key not in self._METADATA_KEYS}
185 |             if not metadata
186 |             else self.__dict__
187 |         )
188 |         return data_dict
189 | 
190 |     def to_csv(self, fp: str) -> None:
191 |         """
192 |         Write scraped data to .csv at the given filepath
193 | 
194 |         Parameters
195 |         ----------
196 |         fp : str
197 |             Filepath to write data to
198 |         """
199 |         with open(fp, "w", newline="", encoding="utf-8") as csv_file:
200 |             writer = csv.writer(csv_file)
201 |             for key, value in self.to_dict().items():
202 |                 writer.writerow([key, str(value)])
203 | 
204 |     def to_json(self, fp: str) -> None:
205 |         """
206 |         Write scraped data to .json file at the given filepath
207 | 
208 |         Parameters
209 |         ----------
210 |         fp : str
211 |             Filepath to write data to
212 |         """
213 |         outdict = {key: str(val) for key, val in self.to_dict().items()}
214 |         with open(fp, "w") as outjson:
215 |             json.dump(outdict, outjson)
216 | 
217 |     @abstractmethod
218 |     def _url_from_suburl(self, suburl: str) -> str:
219 |         pass
220 | 
221 |     def _get_json_from_source(self, source: Any, headers: dict, session: requests.Session) -> JSONDict:
222 |         """Parses the JSON data out from the source based on what type the source is"""
223 |         initial_type = True
224 |         return_data = {"source": self.source}
225 |         if isinstance(source, str):
226 |             source_type = self._determine_string_type(source)
227 |         elif isinstance(source, dict):
228 |             json_dict = source
229 |             source_type = "json dict"
230 |         elif isinstance(source, BeautifulSoup):
231 |             source_type = "soup"
232 | 
233 |         if source_type == "suburl":
234 |             if initial_type:
235 |                 suburl = self.source
236 |             url = self._url_from_suburl(suburl=suburl)
237 |             source_type = "url"
238 |             initial_type = False
239 |             return_data["url"] = url
240 | 
241 |         if source_type == "url":
242 |             if initial_type:
243 |                 url = self.source
244 |             html = self._html_from_url(url=url, headers=headers, session=session)
245 |             source_type = "html"
246 |             initial_type = False
247 |             return_data["html"] = html
248 | 
249 |         if source_type == "html":
250 |             if initial_type:
251 |                 html = self.source
252 |             soup = self._soup_from_html(html)
253 |             source_type = "soup"
254 |             initial_type = False
255 |             return_data["soup"] = soup
256 | 
257 |         if source_type == "soup":
258 |             if initial_type:
259 |                 soup = self.source
260 |             json_dict_arr = json_from_soup(soup)
261 |             if len(json_dict_arr) == 1:
262 |                 json_dict = json_dict_arr[0]
263 |             else:
264 |                 json_dict = json_dict_arr[1]
265 |             self._validate_scrape(json_dict)
266 | 
267 |         return_data["json_dict"] = json_dict
268 | 
269 |         return return_data
270 | 
271 |     def _load_into_namespace(self, scraped_dict: dict, return_data, inplace) -> None:
272 |         """Loop through the scraped dictionary and set them as instance attr"""
273 |         instance = self if inplace else type(self)(return_data["source"])
274 |         for key, val in scraped_dict.items():
275 |             setattr(instance, key, val)
276 |         for key, val in return_data.items():
277 |             setattr(instance, key, val)
278 |         return instance
279 | 
280 | 
281 |     @staticmethod
282 |     def _html_from_url(url: str, headers: dict, session: requests.Session) -> str:
283 |         """Return HTML from requested URL"""
284 |         if isinstance(session, requests.Session):
285 |             response = session.get(url, headers=headers)
286 |             page_source = response.text
287 |         else:
288 |             session.get(url)
289 |             page_source = session.page_source
290 |         return page_source
291 | 
292 |     @staticmethod
293 |     def _soup_from_html(html: str) -> BeautifulSoup:
294 |         """Return BeautifulSoup from source HTML"""
295 |         return BeautifulSoup(html, features="html.parser")
296 | 
297 |     def _validate_scrape(self, json_dict: str) -> JSONDict:
298 |         """Raise exceptions if the scrape did not properly execute"""
299 |         json_type = determine_json_type(json_dict)
300 |         if json_type == "LoginAndSignupPage" and not type(self).__name__ == "LoginAndSignupPage":
301 |             raise InstagramLoginRedirectError
302 |         elif json_type == "HttpErrorPage" and not type(self).__name__ == "HttpErrorPage":
303 |             source_str = self.url if hasattr(self, "url") else "Source"
304 |             raise ValueError(f"{source_str} is not a valid Instagram page. Please provide a valid argument.")
305 | 
306 |     @staticmethod
307 |     def _determine_string_type(string_data: str) -> str:
308 |         """Match and return string representation of appropriate source"""
309 |         string_type_map = [("https://", "url"), ("window._sharedData", "html"), ('{"config"', "JSON dict str")]
310 |         for substr, str_type in string_type_map:
311 |             if substr in string_data:
312 |                 #BUG: !DOCTYPE isnt returned in selenium source code, use </html> as secondary identifier instead
313 |                 if substr == "https://" and "!DOCTYPE" in string_data:
314 |                     continue
315 |                 break
316 |         else:
317 |             str_type = "suburl"
318 |         return str_type
319 | 


--------------------------------------------------------------------------------
/instascrape/core/json_algos.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Algorithms and implementations for working with/manipulating JSON data. NOT
 3 | intended for top level use but instead imported for top level functions to leverage
 4 | """
 5 | 
 6 | from collections import deque
 7 | from typing import Any, Dict, List, Union
 8 | 
 9 | from bs4 import BeautifulSoup
10 | 
11 | JSONDict = Dict[str, Any]
12 | 
13 | class _JSONTree:
14 |     """Tree of linked lists that map out the JSON data"""
15 | 
16 |     def __init__(self, json_dict: JSONDict) -> None:
17 |         self.json_dict = json_dict
18 |         self.map_tree(self.json_dict)
19 | 
20 |     def map_tree(self, json_dict) -> None:
21 |         """Map the entire JSON tree and get access to leaf _JSONNodes"""
22 |         self.leaf_nodes = []
23 |         self.root_node = _JSONNode(json_data=json_dict, tree=self)
24 | 
25 | class _JSONNode:
26 |     """Representation of one step into a JSON Tree"""
27 | 
28 |     def __init__(self, json_data: Any, tree: _JSONTree, linked_list: deque = None, prior_keys: List[Union[str, int]] = None) -> None:
29 |         self.json_data = json_data
30 |         self.tree = tree
31 | 
32 |         self.linked_list = linked_list if linked_list is not None else deque([])
33 |         self.prior_keys = prior_keys if prior_keys is not None else []
34 | 
35 |         self.dtype = type(self.json_data)
36 | 
37 |         self.nodes = []
38 | 
39 |         # If the node is a leaf then it has no edges
40 |         if self.is_leaf:
41 |             self.json_data = {prior_keys[-1]: self.json_data}
42 |             self.tree.leaf_nodes.append(self)
43 | 
44 |         else:
45 |             self.get_edges()
46 | 
47 |     @property
48 |     def is_leaf(self) -> bool:
49 |         """
50 |         If the dtype of self.json_data is not a dict or a list then it must be
51 |         a leaf node
52 |         """
53 |         return self.dtype is not list and self.dtype is not dict
54 | 
55 |     def get_edges(self) -> None:
56 |         """Get all edges connected to current _JSONNode"""
57 |         if self.dtype is list:
58 |             iter_arr = zip(range(len(self.json_data)), self.json_data)
59 |         else:
60 |             iter_arr = self.json_data.items()
61 | 
62 |         for key, value in iter_arr:
63 |             next_linked_list = self.linked_list + deque([self])
64 |             next_key = self.prior_keys + [key]
65 |             node = _JSONNode(value, self.tree, next_linked_list, next_key)
66 |             self.nodes.append(node)
67 | 
68 |     def __repr__(self) -> str:
69 |         return str(self.json_data)
70 | 
71 | 
72 | def _parse_json_str(source: str) -> str:
73 |     """Return the parsed string of JSON data from the BeautifulSoup"""
74 |     json_data = []
75 |     json_scripts = [str(script) for script in source.find_all("script") if "config" in str(script)]
76 |     for script_tag in json_scripts:
77 |         left_index = script_tag.find("{")
78 |         right_index = script_tag.rfind("}") + 1
79 |         json_str = script_tag[left_index:right_index]
80 |         json_data.append(json_str)
81 |     return json_data
82 | 


--------------------------------------------------------------------------------
/instascrape/exceptions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/instascrape/exceptions/__init__.py


--------------------------------------------------------------------------------
/instascrape/exceptions/exceptions.py:
--------------------------------------------------------------------------------
 1 | class InstagramLoginRedirectError(Exception):
 2 |     """
 3 |     Exception that indicates Instagram is redirecting away from the page
 4 |     that should be getting scraped. Can be remedied by logging into Instagram.
 5 |     """
 6 | 
 7 |     def __init__(
 8 |         self,
 9 |         message="Instagram is redirecting you to the login page instead of the page you are trying to scrape. This could be occuring because you made too many requests too quickly or are not logged into Instagram on your machine. Try passing a valid session ID to the scrape method as a cookie to bypass the login requirement",
10 |     ):
11 |         super().__init__(message)
12 | 
13 | 
14 | class WrongSourceError(Exception):
15 |     """
16 |     Exception that indicates user passed the wrong source type to the scraper.
17 |     An example is passing a URL for a hashtag page to a Profile.
18 |     """
19 | 
20 |     def __init__(self, message="Wrong input source, use the correct class"):
21 |         super().__init__(message)
22 | 
23 | 
24 | class MissingSessionIDWarning(UserWarning):
25 |     pass
26 | 
27 | class MissingCookiesWarning(UserWarning):
28 |     pass


--------------------------------------------------------------------------------
/instascrape/scrapers/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Primary API scraper tools
 3 | """
 4 | 
 5 | from instascrape.scrapers.hashtag import *
 6 | from instascrape.scrapers.post import *
 7 | from instascrape.scrapers.profile import *
 8 | from instascrape.scrapers.comment import *
 9 | from instascrape.scrapers.location import *
10 | from instascrape.scrapers.reel import *
11 | from instascrape.scrapers.igtv import *
12 | from instascrape.scrapers.scrape_tools import *


--------------------------------------------------------------------------------
/instascrape/scrapers/comment.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Parse data related to comments, including comments in a thread
 3 | """
 4 | 
 5 | import datetime
 6 | 
 7 | 
 8 | class Comment:
 9 |     """A single comment and its respective data"""
10 | 
11 |     # pylint: disable=too-many-instance-attributes, too-few-public-methods
12 | 
13 |     def __init__(self, comment_dict: dict) -> None:
14 |         self.comment_dict = comment_dict["node"]
15 | 
16 |         self._parse_data()
17 | 
18 |     def __repr__(self) -> str:
19 |         return f"<Comment: {self.username}: {self.text}"
20 | 
21 |     def _parse_data(self) -> None:
22 |         self.text = self.comment_dict["text"]
23 |         self.created_at = datetime.datetime.fromtimestamp(self.comment_dict["created_at"])
24 |         self.did_report_as_spam = self.comment_dict["did_report_as_spam"]
25 |         self.is_verified = self.comment_dict["owner"]["is_verified"]
26 |         self.profile_pic_url = self.comment_dict["owner"]["profile_pic_url"]
27 |         self.username = self.comment_dict["owner"]["username"]
28 |         self.viewer_has_liked = self.comment_dict["viewer_has_liked"]
29 |         self.likes = self.comment_dict["edge_liked_by"]["count"]
30 |         self.is_restricted_pending = self.comment_dict["is_restricted_pending"]
31 | 
32 |         try:
33 |             comments = self.comment_dict["edge_threaded_comments"]["edges"]
34 |             self.replies = [Comment(comment_dict) for comment_dict in comments]
35 |         except KeyError:
36 |             self.replies = []
37 | 


--------------------------------------------------------------------------------
/instascrape/scrapers/hashtag.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Hashtag
 3 | -------
 4 |     Scrape data from a Hashtag page
 5 | """
 6 | from __future__ import annotations
 7 | 
 8 | from typing import List
 9 | import time
10 | 
11 | from instascrape.core._mappings import _HashtagMapping, _PostMapping
12 | from instascrape.core._static_scraper import _StaticHtmlScraper
13 | from instascrape.scrapers.post import Post
14 | 
15 | class Hashtag(_StaticHtmlScraper):
16 |     """Scraper for an Instagram hashtag page"""
17 | 
18 |     _Mapping = _HashtagMapping
19 | 
20 |     def get_recent_posts(self, amt: int = 71) -> List[Post]:
21 |         """
22 |         Return a list of recent posts to the hasthag
23 | 
24 |         Parameters
25 |         ----------
26 |         amt : int
27 |             Amount of recent posts to return
28 | 
29 |         Returns
30 |         -------
31 |         posts : List[Post]
32 |             List containing the recent 12 posts and their available data
33 |         """
34 |         posts = []
35 |         post_arr = self.json_dict["entry_data"]["TagPage"][0]["graphql"]["hashtag"]["edge_hashtag_to_media"]["edges"]
36 |         amount_of_posts = len(post_arr)
37 |         if amt > amount_of_posts:
38 |             amt = amount_of_posts
39 |         for post in post_arr[:amt]:
40 |             json_dict = post["node"]
41 |             mapping = _PostMapping.post_from_hashtag_mapping()
42 |             post = Post(json_dict)
43 |             post.scrape(mapping=mapping)
44 |             posts.append(post)
45 |         return posts
46 | 
47 |     def _url_from_suburl(self, suburl: str) -> str:
48 |         return f"https://www.instagram.com/tags/{suburl}/"


--------------------------------------------------------------------------------
/instascrape/scrapers/igtv.py:
--------------------------------------------------------------------------------
 1 | from instascrape.scrapers.post import Post
 2 | from instascrape.core._mappings import _IGTVMapping
 3 | 
 4 | 
 5 | class IGTV(Post):
 6 |     """Scraper for an IGTV post"""
 7 | 
 8 |     _Mapping = _IGTVMapping
 9 | 
10 |     @staticmethod
11 |     def _url_from_suburl(suburl: str) -> str:
12 |         return f"https://www.instagram.com/tv/{suburl}/"
13 | 


--------------------------------------------------------------------------------
/instascrape/scrapers/location.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import List
 4 | 
 5 | from instascrape.core._mappings import _LocationMapping, _PostMapping
 6 | from instascrape.core._static_scraper import _StaticHtmlScraper
 7 | from instascrape.scrapers.post import Post
 8 | 
 9 | 
10 | class Location(_StaticHtmlScraper):
11 |     """Scraper for an Instagram profile page"""
12 | 
13 |     _Mapping = _LocationMapping
14 | 
15 |     def get_recent_posts(self, amt: int = 24) -> List[Post]:
16 |         """
17 |         Return a list of recent posts to the location
18 | 
19 |         Parameters
20 |         ----------
21 |         amt : int
22 |             Amount of recent posts to return
23 | 
24 |         Returns
25 |         -------
26 |         posts : List[Post]
27 |             List containing the recent 24 posts and their available data
28 |         """
29 |         posts = []
30 |         post_arr = self.json_dict["entry_data"]["LocationsPage"][0]["graphql"]["location"]["edge_location_to_media"][
31 |             "edges"
32 |         ]
33 |         amount_of_posts = len(post_arr)
34 |         if amt > amount_of_posts:
35 |             amt = amount_of_posts
36 |         for post in post_arr[:amt]:
37 |             json_dict = post["node"]
38 |             mapping = _PostMapping.post_from_hashtag_mapping()
39 |             post = Post(json_dict)
40 |             post.scrape(mapping=mapping)
41 |             posts.append(post)
42 |         return posts
43 | 
44 |     def _url_from_suburl(self, suburl):
45 |         return f"https://www.instagram.com/explore/locations/{suburl}/"
46 | 


--------------------------------------------------------------------------------
/instascrape/scrapers/post.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Post
  3 | ----
  4 |     Scrape data from a Post page
  5 | """
  6 | from __future__ import annotations
  7 | 
  8 | import datetime
  9 | from typing import List
 10 | import re
 11 | import shutil
 12 | import pathlib
 13 | import math
 14 | 
 15 | import requests
 16 | 
 17 | from instascrape.core._mappings import _PostMapping
 18 | from instascrape.core._static_scraper import _StaticHtmlScraper
 19 | from instascrape.scrapers.scrape_tools import parse_data_from_json
 20 | from instascrape.scrapers.comment import Comment
 21 | 
 22 | class Post(_StaticHtmlScraper):
 23 |     """Scraper for an Instagram post page"""
 24 | 
 25 |     _Mapping = _PostMapping
 26 |     SUPPORTED_DOWNLOAD_EXTENSIONS = [".mp3", ".mp4", ".png", ".jpg"]
 27 | 
 28 |     def scrape(
 29 |             self,
 30 |             mapping=None,
 31 |             keys: List[str] = None,
 32 |             exclude: List[str] = None,
 33 |             headers={
 34 |                 "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57"
 35 |             },
 36 |             inplace=True,
 37 |             session=None,
 38 |             webdriver=None
 39 |         ) -> None:
 40 |         """
 41 |         Scrape data from the source
 42 | 
 43 |         Parameters
 44 |         ----------
 45 |         mapping : Dict[str, deque]
 46 |             Dictionary of parsing queue's that tell the JSON engine how to
 47 |             process the JSON data
 48 |         keys : List[str]
 49 |             List of strings that correspond to desired attributes for scraping
 50 |         exclude : List[str]
 51 |             List of strings that correspond to which attributes to exclude from
 52 |             being scraped
 53 |         headers : Dict[str, str]
 54 |             Dictionary of request headers to be passed on the GET request
 55 |         inplace : bool
 56 |             Determines if data modified inplace or return a new object with the
 57 |             scraped data
 58 |         session : requests.Session
 59 |             Session for making the GET request
 60 |         webdriver : selenium.webdriver.chrome.webdriver.WebDriver
 61 |             Webdriver for scraping the page, overrides any default or passed
 62 |             session
 63 | 
 64 |         Returns
 65 |         -------
 66 |         return_instance
 67 |             Optionally returns a scraped instance instead of modifying inplace
 68 |             if inplace arg is True
 69 |         """
 70 |         # pylint: disable=no-member, attribute-defined-outside-init
 71 |         if hasattr(self, "shortcode"):
 72 |             self.source = self.shortcode
 73 |         return_instance = super().scrape(
 74 |                             mapping=mapping,
 75 |                             keys=keys,
 76 |                             exclude=exclude,
 77 |                             headers=headers,
 78 |                             inplace=inplace,
 79 |                             session=session,
 80 |                             webdriver=webdriver
 81 |                         )
 82 |         if return_instance is None:
 83 |             return_instance = self
 84 | 
 85 |         # HACK: This isn't a very clean solution and there is certainly a better
 86 |         # way to deal with returning a Post object with only partial data
 87 |         if hasattr(return_instance, "timestamp"):
 88 |             return_instance.upload_date = datetime.datetime.fromtimestamp(return_instance.timestamp)
 89 |         if hasattr(return_instance, "shortcode"):
 90 |             return_instance.url = self._url_from_suburl(return_instance.shortcode)
 91 | 
 92 |         if mapping is None:
 93 |             return_instance.tagged_users = return_instance._parse_tagged_users(return_instance.json_dict)
 94 |             return_instance.hashtags = return_instance._parse_hashtags(return_instance.caption) if isinstance(return_instance.caption, str) else float("nan")
 95 |             try:
 96 |                 if math.isnan(return_instance.full_name):
 97 |                     return_instance.full_name = return_instance.flat_json_dict["full_name"]
 98 |             except TypeError:
 99 |                 pass
100 |         return return_instance if return_instance is not self else None
101 | 
102 |     def download(self, fp: str) -> None:
103 |         """
104 |         Download an image or video from a post to your local machine at the given filepath
105 | 
106 |         Parameters
107 |         ----------
108 |         fp : str
109 |             Filepath to download the image to
110 |         """
111 |         # pylint: disable=no-member
112 | 
113 |         ext = pathlib.Path(fp).suffix
114 |         if ext not in self.SUPPORTED_DOWNLOAD_EXTENSIONS:
115 |             raise NameError(
116 |                 f"{ext} is not a supported file extension. Please use {', '.join(self.SUPPORTED_DOWNLOAD_EXTENSIONS)}"
117 |             )
118 |         url = self.video_url if self.is_video else self.display_url
119 | 
120 |         resp = requests.get(url, stream=True)
121 |         if not self.is_video:
122 |             self._download_photo(fp, resp)
123 |         else:
124 |             self._download_video(fp, resp)
125 | 
126 |     def get_recent_comments(self) -> List[Comment]:
127 |         """
128 |         Returns a list of Comment objects that contain data regarding
129 |         some of the posts comments
130 | 
131 |         Returns
132 |         -------
133 |         comments_arr : List[Comment]
134 |             List of Comment objects
135 |         """
136 |         list_of_dicts = self.json_dict["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"][
137 |             "edge_media_to_parent_comment"
138 |         ]["edges"]
139 |         comments_arr = [Comment(comment_dict) for comment_dict in list_of_dicts]
140 |         return comments_arr
141 | 
142 |     def embed(self) -> str:
143 |         """
144 |         Return embeddable HTML str for this post
145 | 
146 |         Returns
147 |         -------
148 |         html_template : str
149 |             HTML string with embed markup for this Post
150 |         """
151 |         html_template = f'<blockquote class="instagram-media" data-instgrm-captioned data-instgrm-permalink="{self.url}?utm_source=ig_embed&amp;utm_campaign=loading" data-instgrm-version="13" style=" background:#FFF; border:0; border-radius:3px; box-shadow:0 0 1px 0 rgba(0,0,0,0.5),0 1px 10px 0 rgba(0,0,0,0.15); margin: 1px; max-width:540px; min-width:326px; padding:0; width:99.375%; width:-webkit-calc(100% - 2px); width:calc(100% - 2px);"><div style="padding:16px;"> <a href="{self.url}?utm_source=ig_embed&amp;utm_campaign=loading" style=" background:#FFFFFF; line-height:0; padding:0 0; text-align:center; text-decoration:none; width:100%;" target="_blank"> <div style=" display: flex; flex-direction: row; align-items: center;"> <div style="background-color: #F4F4F4; border-radius: 50%; flex-grow: 0; height: 40px; margin-right: 14px; width: 40px;"></div> <div style="display: flex; flex-direction: column; flex-grow: 1; justify-content: center;"> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; margin-bottom: 6px; width: 100px;"></div> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; width: 60px;"></div></div></div><div style="padding: 19% 0;"></div> <div style="display:block; height:50px; margin:0 auto 12px; width:50px;"><svg width="50px" height="50px" viewBox="0 0 60 60" version="1.1" xmlns="https://www.w3.org/2000/svg" xmlns:xlink="https://www.w3.org/1999/xlink"><g stroke="none" stroke-width="1" fill="none" fill-rule="evenodd"><g transform="translate(-511.000000, -20.000000)" fill="#000000"><g><path d="M556.869,30.41 C554.814,30.41 553.148,32.076 553.148,34.131 C553.148,36.186 554.814,37.852 556.869,37.852 C558.924,37.852 560.59,36.186 560.59,34.131 C560.59,32.076 558.924,30.41 556.869,30.41 M541,60.657 C535.114,60.657 530.342,55.887 530.342,50 C530.342,44.114 535.114,39.342 541,39.342 C546.887,39.342 551.658,44.114 551.658,50 C551.658,55.887 546.887,60.657 541,60.657 M541,33.886 C532.1,33.886 524.886,41.1 524.886,50 C524.886,58.899 532.1,66.113 541,66.113 C549.9,66.113 557.115,58.899 557.115,50 C557.115,41.1 549.9,33.886 541,33.886 M565.378,62.101 C565.244,65.022 564.756,66.606 564.346,67.663 C563.803,69.06 563.154,70.057 562.106,71.106 C561.058,72.155 560.06,72.803 558.662,73.347 C557.607,73.757 556.021,74.244 553.102,74.378 C549.944,74.521 548.997,74.552 541,74.552 C533.003,74.552 532.056,74.521 528.898,74.378 C525.979,74.244 524.393,73.757 523.338,73.347 C521.94,72.803 520.942,72.155 519.894,71.106 C518.846,70.057 518.197,69.06 517.654,67.663 C517.244,66.606 516.755,65.022 516.623,62.101 C516.479,58.943 516.448,57.996 516.448,50 C516.448,42.003 516.479,41.056 516.623,37.899 C516.755,34.978 517.244,33.391 517.654,32.338 C518.197,30.938 518.846,29.942 519.894,28.894 C520.942,27.846 521.94,27.196 523.338,26.654 C524.393,26.244 525.979,25.756 528.898,25.623 C532.057,25.479 533.004,25.448 541,25.448 C548.997,25.448 549.943,25.479 553.102,25.623 C556.021,25.756 557.607,26.244 558.662,26.654 C560.06,27.196 561.058,27.846 562.106,28.894 C563.154,29.942 563.803,30.938 564.346,32.338 C564.756,33.391 565.244,34.978 565.378,37.899 C565.522,41.056 565.552,42.003 565.552,50 C565.552,57.996 565.522,58.943 565.378,62.101 M570.82,37.631 C570.674,34.438 570.167,32.258 569.425,30.349 C568.659,28.377 567.633,26.702 565.965,25.035 C564.297,23.368 562.623,22.342 560.652,21.575 C558.743,20.834 556.562,20.326 553.369,20.18 C550.169,20.033 549.148,20 541,20 C532.853,20 531.831,20.033 528.631,20.18 C525.438,20.326 523.257,20.834 521.349,21.575 C519.376,22.342 517.703,23.368 516.035,25.035 C514.368,26.702 513.342,28.377 512.574,30.349 C511.834,32.258 511.326,34.438 511.181,37.631 C511.035,40.831 511,41.851 511,50 C511,58.147 511.035,59.17 511.181,62.369 C511.326,65.562 511.834,67.743 512.574,69.651 C513.342,71.625 514.368,73.296 516.035,74.965 C517.703,76.634 519.376,77.658 521.349,78.425 C523.257,79.167 525.438,79.673 528.631,79.82 C531.831,79.965 532.853,80.001 541,80.001 C549.148,80.001 550.169,79.965 553.369,79.82 C556.562,79.673 558.743,79.167 560.652,78.425 C562.623,77.658 564.297,76.634 565.965,74.965 C567.633,73.296 568.659,71.625 569.425,69.651 C570.167,67.743 570.674,65.562 570.82,62.369 C570.966,59.17 571,58.147 571,50 C571,41.851 570.966,40.831 570.82,37.631"></path></g></g></g></svg></div><div style="padding-top: 8px;"> <div style=" color:#3897f0; font-family:Arial,sans-serif; font-size:14px; font-style:normal; font-weight:550; line-height:18px;"> View this post on Instagram</div></div><div style="padding: 12.5% 0;"></div> <div style="display: flex; flex-direction: row; margin-bottom: 14px; align-items: center;"><div> <div style="background-color: #F4F4F4; border-radius: 50%; height: 12.5px; width: 12.5px; transform: translateX(0px) translateY(7px);"></div> <div style="background-color: #F4F4F4; height: 12.5px; transform: rotate(-45deg) translateX(3px) translateY(1px); width: 12.5px; flex-grow: 0; margin-right: 14px; margin-left: 2px;"></div> <div style="background-color: #F4F4F4; border-radius: 50%; height: 12.5px; width: 12.5px; transform: translateX(9px) translateY(-18px);"></div></div><div style="margin-left: 8px;"> <div style=" background-color: #F4F4F4; border-radius: 50%; flex-grow: 0; height: 20px; width: 20px;"></div> <div style=" width: 0; height: 0; border-top: 2px solid transparent; border-left: 6px solid #f4f4f4; border-bottom: 2px solid transparent; transform: translateX(16px) translateY(-4px) rotate(30deg)"></div></div><div style="margin-left: auto;"> <div style=" width: 0px; border-top: 8px solid #F4F4F4; border-right: 8px solid transparent; transform: translateY(16px);"></div> <div style=" background-color: #F4F4F4; flex-grow: 0; height: 12px; width: 16px; transform: translateY(-4px);"></div> <div style=" width: 0; height: 0; border-top: 8px solid #F4F4F4; border-left: 8px solid transparent; transform: translateY(-4px) translateX(8px);"></div></div></div> <div style="display: flex; flex-direction: column; flex-grow: 1; justify-content: center; margin-bottom: 24px;"> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; margin-bottom: 6px; width: 224px;"></div> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; width: 144px;"></div></div></a><p style=" color:#c9c8cd; font-family:Arial,sans-serif; font-size:14px; line-height:17px; margin-bottom:0; margin-top:8px; overflow:hidden; padding:8px 0 7px; text-align:center; text-overflow:ellipsis; white-space:nowrap;"><a href="{self.url}?utm_source=ig_embed&amp;utm_campaign=loading" style=" color:#c9c8cd; font-family:Arial,sans-serif; font-size:14px; font-style:normal; font-weight:normal; line-height:17px; text-decoration:none;" target="_blank">A post shared by {self.full_name} (@{self.username})</a></p></div></blockquote> <script async src="//www.instagram.com/embed.js"></script>'
152 |         return html_template
153 | 
154 |     @staticmethod
155 |     def _url_from_suburl(suburl: str) -> str:
156 |         return f"https://www.instagram.com/p/{suburl}/"
157 | 
158 |     def _download_photo(self, fp: str, resp: requests.models.Response) -> None:
159 |         with open(fp, "wb") as outfile:
160 |             resp.raw.decode_content = True
161 |             shutil.copyfileobj(resp.raw, outfile)
162 | 
163 |     def _download_video(self, fp: str, resp: requests.models.Response) -> None:
164 |         """Write the media to file at given fp from the response"""
165 |         with open(fp, "wb") as outfile:
166 |             for chunk in resp.iter_content(chunk_size=1024):
167 |                 if chunk:
168 |                     outfile.write(chunk)
169 |                     outfile.flush()
170 | 
171 |     def _parse_tagged_users(self, json_dict: dict) -> List[str]:
172 |         """Parse the tagged users from JSON dict containing the tagged users"""
173 |         if "graphql" in json_dict:
174 |             json_dict = [json_dict]
175 |             json_dict = {"PostPage": json_dict}
176 |             json_dict = {"entry_data": json_dict}
177 |         tagged_arr = json_dict["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"]["edge_media_to_tagged_user"][
178 |             "edges"
179 |         ]
180 |         return [node["node"]["user"]["username"] for node in tagged_arr]
181 | 
182 |     def _parse_hashtags(self, caption: str) -> List[str]:
183 |         """Parse the hastags from the post's caption using regex"""
184 |         pattern = r"#(\w+)"
185 |         return re.findall(pattern, caption)
186 | 


--------------------------------------------------------------------------------
/instascrape/scrapers/profile.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import time
  4 | 
  5 | from typing import List
  6 | 
  7 | from bs4 import BeautifulSoup
  8 | 
  9 | from instascrape.core._mappings import _PostMapping, _ProfileMapping
 10 | from instascrape.core._static_scraper import _StaticHtmlScraper
 11 | from instascrape.scrapers.post import Post
 12 | 
 13 | class Profile(_StaticHtmlScraper):
 14 |     """Scraper for an Instagram profile page"""
 15 | 
 16 |     _Mapping = _ProfileMapping
 17 | 
 18 |     def get_recent_posts(self, amt: int = 12) -> List[Post]:
 19 |         """
 20 |         Return a list of the profiles recent posts. Max available for return
 21 |         is 12.
 22 | 
 23 |         Parameters
 24 |         ----------
 25 |         amt : int
 26 |             Amount of recent posts to return
 27 | 
 28 |         Returns
 29 |         -------
 30 |         posts : List[Post]
 31 |             List containing the recent 12 posts and their available data
 32 |         """
 33 |         if amt > 12:
 34 |             raise IndexError(
 35 |                 f"{amt} is too large, 12 is max available posts. Getting more posts will require an out-of-the-box extension."
 36 |             )
 37 |         posts = []
 38 |         try:
 39 |             post_arr = self.json_dict["entry_data"]["ProfilePage"][0]["graphql"]["user"][
 40 |                 "edge_owner_to_timeline_media"
 41 |             ]["edges"]
 42 |         except TypeError:
 43 |             raise ValueError(
 44 |                 "Can't return posts without first scraping the Profile. Call the scrape method on your object first."
 45 |             )
 46 | 
 47 |         for post in post_arr[:amt]:
 48 |             json_dict = post["node"]
 49 |             mapping = _PostMapping.post_from_profile_mapping()
 50 |             post = Post(json_dict)
 51 |             post.scrape(mapping=mapping)
 52 |             post.username = self.username
 53 |             post.full_name = self.full_name
 54 |             posts.append(post)
 55 |         return posts
 56 | 
 57 |     def get_posts(self, webdriver, amount=None, login_first=False, login_pause=60, max_failed_scroll=300, scrape=False, scrape_pause=5):
 58 |         """
 59 |         Return Post objects from profile scraped using a webdriver (not included)
 60 | 
 61 |         Parameters
 62 |         ----------
 63 |         webdriver : selenium.webdriver.chrome.webdriver.WebDriver
 64 |             Selenium webdriver for rendering JavaScript and loading dynamic
 65 |             content
 66 |         amount : int
 67 |             Amount of posts to return, default is all of them
 68 |         login_first : bool
 69 |             Start on login page to allow user to manually login to Instagram
 70 |         login_pause : int
 71 |             Length of time in seconds to pause before starting scrape
 72 |         max_failed_scroll : int
 73 |             Maximum amount of scroll attempts before stopping if scroll is stuck
 74 |         scrape : bool
 75 |             Scrape posts with the webdriver prior to returning
 76 |         scrape_pause : int
 77 |             Time in seconds between each scrape
 78 | 
 79 |         Returns
 80 |         -------
 81 |         posts : List[Post]
 82 |             Post objects gathered from the profile page
 83 |         """
 84 | 
 85 |         JS_SCROLL_SCRIPT = "window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;"
 86 |         JS_PAGE_LENGTH_SCRIPT = "var lenOfPage=document.body.scrollHeight; return lenOfPage;"
 87 | 
 88 |         # Determine how many posts are available on the page
 89 |         try:
 90 |             posts_len = self.posts
 91 |             if amount is None:
 92 |                 amount = posts_len
 93 |             if amount > posts_len:
 94 |                 raise ValueError(f"{amount} posts requested but {self.username} only has {posts_len} posts")
 95 |         except AttributeError:
 96 |             raise AttributeError(f"{type(self)} must be scraped first")
 97 | 
 98 |         # Manual login
 99 |         if login_first:
100 |             webdriver.get("https://www.instagram.com")
101 |             time.sleep(login_pause)
102 | 
103 |         # Get profile page
104 |         webdriver.get(self.url)
105 | 
106 |         # Continuously scroll, collect HTML, and parse Post objects
107 |         posts = []
108 |         shortcodes = []
109 |         scroll_attempts = 0
110 |         last_position = webdriver.execute_script(JS_PAGE_LENGTH_SCRIPT)
111 |         scrolling = True
112 |         while scrolling:
113 |             current_position = webdriver.execute_script(JS_SCROLL_SCRIPT)
114 |             source_data = webdriver.page_source
115 |             found_posts = self._separate_posts(source_data)
116 | 
117 |             # Append found posts into total posts
118 |             for post in found_posts:
119 |                 if post.source not in shortcodes:
120 |                     shortcodes.append(post.source)
121 |                     posts.append(post)
122 | 
123 |             # If scroll is stuck and exceeds max allowed attempts, exit loop
124 |             if current_position == last_position:
125 |                 scroll_attempts += 1
126 |                 if scroll_attempts > max_failed_scroll:
127 |                     scrolling = False
128 |             else:
129 |                 scroll_attempts = 0
130 |                 last_position = current_position
131 | 
132 |             current_post_len = len(posts)
133 |             if (current_post_len >= posts_len) or (current_post_len >= amount):
134 |                 break
135 | 
136 |         # Remove excess posts from right of list
137 |         posts = posts[:amount]
138 | 
139 |         # If scrape arg is True, scrape all posts using webdriver
140 |         scraped_posts = []
141 |         if scrape:
142 |             for post in posts:
143 |                 scraped_posts.append(post.scrape(inplace=False, webdriver=webdriver))
144 |                 time.sleep(scrape_pause)
145 |             posts = scraped_posts
146 | 
147 |         return posts
148 | 
149 |     def _separate_posts(self, source_data):
150 |         """Separate the HTML and parse out BeautifulSoup for every post"""
151 |         post_soup = []
152 | 
153 |         soup = BeautifulSoup(source_data, features="lxml")
154 |         anchor_tags = soup.find_all("a")
155 |         post_tags = [tag for tag in anchor_tags if tag.find(
156 |             "div", {"class": "eLAPa"})]
157 | 
158 |         #Filter new posts that have not been stored yet
159 |         new_posts = [tag for tag in post_tags if tag not in post_soup]
160 |         post_soup += new_posts
161 | 
162 |         return self._create_post_objects(post_soup)
163 | 
164 |     def _create_post_objects(self, post_soup):
165 |         """Create a Post object from the given shortcode"""
166 |         posts = []
167 |         for post in post_soup:
168 |             shortcode = post["href"].replace("/p/", "")[:-1]
169 |             posts.append(Post(shortcode))
170 |         return posts
171 | 
172 |     def _url_from_suburl(self, suburl):
173 |         return f"https://www.instagram.com/{suburl}/"
174 | 


--------------------------------------------------------------------------------
/instascrape/scrapers/reel.py:
--------------------------------------------------------------------------------
 1 | from instascrape.scrapers.post import Post
 2 | from instascrape.core._mappings import _ReelMapping
 3 | 
 4 | 
 5 | class Reel(Post):
 6 |     """Scraper for an Instagram reel"""
 7 | 
 8 |     _Mapping = _ReelMapping
 9 | 
10 |     @staticmethod
11 |     def _url_from_suburl(suburl: str) -> str:
12 |         return f"https://www.instagram.com/reel/{suburl}/"
13 | 


--------------------------------------------------------------------------------
/instascrape/scrapers/scrape_tools.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | from typing import Any, Dict, Union, Callable, List
  5 | from collections import deque
  6 | import datetime
  7 | from functools import partial
  8 | import copy
  9 | import time
 10 | 
 11 | import requests
 12 | from bs4 import BeautifulSoup
 13 | 
 14 | from instascrape.core.json_algos import _JSONTree, _parse_json_str
 15 | 
 16 | JSONDict = Dict[str, Any]
 17 | 
 18 | def parse_data_from_json(json_dict, map_dict, default_value=float('nan')):
 19 |     """
 20 |     Parse data from a JSON dictionary using a mapping dictionary that tells
 21 |     the program how to parse the data
 22 |     """
 23 |     return_data = {}
 24 |     for key in map_dict:
 25 |         steps_to_value = map_dict[key]
 26 | 
 27 |         # Loop through all steps into the JSON dict that will give us our data
 28 |         first_step = steps_to_value.popleft()
 29 |         try:
 30 |             value = json_dict[first_step]
 31 |         except KeyError:
 32 |             value = default_value
 33 |         else:
 34 |             for step in steps_to_value:
 35 |                 value = json_dict[step]
 36 |         finally:
 37 |             return_data[key] = value
 38 |     return return_data
 39 | 
 40 | def flatten_dict(json_dict: JSONDict) -> JSONDict:
 41 |     """
 42 |     Returns a flattened dictionary of data
 43 | 
 44 |     Parameters
 45 |     ----------
 46 |     json_dict : dict
 47 |         Input dictionary for flattening
 48 | 
 49 |     Returns
 50 |     -------
 51 |     flattened_dict : dict
 52 |         Flattened dictionary
 53 |     """
 54 |     json_tree = _JSONTree(json_dict)
 55 |     flattened_dict = {}
 56 |     for leaf_node in json_tree.leaf_nodes:
 57 |         key_arr = deque([])
 58 |         for key in leaf_node.prior_keys[::-1]:
 59 |             key_arr.appendleft(str(key))
 60 |             new_key = "_".join(key_arr)
 61 |             if new_key not in flattened_dict:
 62 |                 break
 63 |         flattened_dict[new_key] = list(leaf_node.json_data.values())[0]
 64 |     return flattened_dict
 65 | 
 66 | def json_from_html(source: Union[str, "BeautifulSoup"], as_dict: bool = True, flatten=False) -> Union[JSONDict, str]:
 67 |     """
 68 |     Return JSON data parsed from Instagram source HTML
 69 | 
 70 |     Parameters
 71 |     ----------
 72 |     source : Union[str, BeautifulSoup]
 73 |         Instagram HTML source code to parse the JSON from
 74 |     as_dict : bool = True
 75 |         Return JSON as dict if True else return JSON as string
 76 |     flatten : bool
 77 |         Flatten the dictionary prior to returning it
 78 | 
 79 |     Returns
 80 |     -------
 81 |     json_data : Union[JSONDict, str]
 82 |         Parsed JSON data from the HTML source as either a JSON-like dictionary
 83 |         or just the string serialization
 84 |     """
 85 | 
 86 |     soup = BeautifulSoup(source, features="html.parser")
 87 |     json_data = json_from_soup(source=soup, as_dict=as_dict, flatten=flatten)
 88 |     return json_data
 89 | 
 90 | def json_from_soup(source, as_dict: bool = True, flatten=False):
 91 |     json_data = _parse_json_str(source=source)
 92 | 
 93 |     if as_dict:
 94 |         json_data = [json.loads(json_str) for json_str in json_data]
 95 |     if flatten:
 96 |         json_data = [flatten_dict(json_dict) for json_dict in json_data]
 97 | 
 98 |     return json_data
 99 | 
100 | def determine_json_type(json_data: Union[JSONDict, str]) -> str:
101 |     """
102 |     Return the type of Instagram page based on the JSON data parsed from source
103 | 
104 |     Parameters
105 |     ----------
106 |     json_data: Union[JSONDict, str]
107 |         JSON data that will be checked and parsed to determine what type of page
108 |         the program is looking at (Profile, Post, Hashtag, etc)
109 | 
110 |     Returns
111 |     -------
112 |     instagram_type : str
113 |         Name of the type of page the program is currently parsing or looking at
114 |     """
115 |     if not isinstance(json_data, dict):
116 |         json_data = json.loads(json_data)
117 |     try:
118 |         instagram_type = list(json_data["entry_data"])[0]
119 |     except KeyError:
120 |         instagram_type = "Inconclusive"
121 |     return instagram_type
122 | 
123 | def json_from_url(
124 |     url: str,
125 |     as_dict: bool = True,
126 |     headers={
127 |         "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57"
128 |     },
129 |     flatten=False
130 | ) -> Union[JSONDict, str]:
131 |     """
132 |     Return JSON data parsed from a provided Instagram URL
133 | 
134 |     Parameters
135 |     ----------
136 |     url : str
137 |         URL of the page to get the JSON data from
138 |     as_dict : bool = True
139 |         Return JSON as dict if True else return JSON as string
140 |     headers : Dict[str, str]
141 |             Dictionary of request headers to be passed on the GET request
142 |     flatten : bool
143 |         Flatten the dictionary prior to returning it
144 | 
145 |     Returns
146 |     -------
147 |     json_data : Union[JSONDict, str]
148 |         Parsed JSON data from the URL as either a JSON-like dictionary
149 |         or just the string serialization
150 |     """
151 |     source = requests.get(url, headers=headers).text
152 |     return json_from_html(source, as_dict=as_dict, flatten=flatten)
153 | 
154 | 
155 | def scrape_posts(
156 |         posts: List["Post"],
157 |         session: requests.Session = None,
158 |         webdriver: "selenium.webdriver.chrome.webdriver.WebDriver" = None,
159 |         limit: Union[int, datetime.datetime] = None,
160 |         headers: dict = {
161 |             "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57"
162 |         },
163 |         pause: int = 5,
164 |         on_exception: str = "raise",
165 |         silent: bool = True,
166 |         inplace: bool = False
167 |     ):
168 | 
169 |     # Default setup
170 |     if not inplace:
171 |         posts = copy.deepcopy(posts)
172 |     if limit is None:
173 |         limit = len(posts)
174 | 
175 |     scraped_posts = []
176 |     for i, post in enumerate(posts):
177 |         temporary_post = copy.deepcopy(post)
178 |         try:
179 |             post.scrape(session=session, webdriver=webdriver, headers=headers)
180 |             scraped_posts.append(post)
181 |         except Exception as e:
182 |             if on_exception == "raise":
183 |                 raise
184 |             elif on_exception == "pass":
185 |                 if not silent:
186 |                     print(f"PASSING EXCEPTION: {e}")
187 |                 pass
188 |             elif on_exception == "return":
189 |                 if not silent:
190 |                     print(f"{e}, RETURNING SCRAPED AND UNSCRAPED")
191 |                 break
192 |         if not silent:
193 |             output_str = f"{i}: {post.shortcode} - {post.upload_date}"
194 |             print(output_str)
195 |         if _stop_scraping(limit, post, i):
196 |             break
197 |         time.sleep(pause)
198 | 
199 |     unscraped_posts = list(set(posts) - set(scraped_posts))
200 |     if not isinstance(limit, int):
201 |         scraped_posts.pop()
202 |         unscraped_posts.insert(0, temporary_post)
203 | 
204 |     return scraped_posts, unscraped_posts if not inplace else None
205 | 
206 | def _stop_scraping(limit, post, i):
207 |     stop = False
208 |     if isinstance(limit, int):
209 |         if i == limit - 1:
210 |             stop = True
211 |     elif (isinstance(limit, datetime.datetime) or isinstance(limit, datetime.date)):
212 |         if post.upload_date <= limit:
213 |             stop = True
214 |     return stop
215 | 


--------------------------------------------------------------------------------
/media/6x6scatter_matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/6x6scatter_matrix.png


--------------------------------------------------------------------------------
/media/instascrape.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/instascrape.gif


--------------------------------------------------------------------------------
/media/likes_heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/likes_heatmap.png


--------------------------------------------------------------------------------
/media/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/logo.png


--------------------------------------------------------------------------------
/media/logopic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/logopic.png


--------------------------------------------------------------------------------
/media/realpython.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/realpython.png


--------------------------------------------------------------------------------
/media/scatter_matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/scatter_matrix.png


--------------------------------------------------------------------------------
/media/techprofiles.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/techprofiles.gif


--------------------------------------------------------------------------------
/pypi.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #Automate the process of uploading package to PyPI
 4 | 
 5 | #Delete older version build directories
 6 | directories=("dist/" "build/" "insta_scrape.egg-info/")
 7 | for d in ${directories[@]};
 8 | do
 9 | 	if [ -d $d ];
10 | 	then
11 | 		rm -rf $d
12 | 		echo "$d deleted!"
13 | 	fi
14 | done
15 | 
16 | #Setup package and initiate upload to PyPI
17 | python3 setup.py sdist bdist_wheel
18 | twine upload dist/*
19 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length=120


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | # pytest
 2 | [pytest]
 3 | markers =
 4 |     file_io: Involves file I/O
 5 | 
 6 | addopts = --strict-markers
 7 | 
 8 | filterwarnings =
 9 |     ignore::DeprecationWarning
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.9.3
 2 | certifi==2020.6.20
 3 | chardet==3.0.4
 4 | idna==2.10
 5 | lxml==4.6.2
 6 | pytest==6.1.1
 7 | requests==2.24.0
 8 | soupsieve==2.0.1
 9 | urllib3==1.25.10
10 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="insta-scrape",
 8 |     version="2.1.2",
 9 |     author="Chris Greening",
10 |     author_email="chris@christophergreening.com",
11 |     description="Super lightweight Instagram web scraper for data analysis",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/chris-greening/instascrape",
15 |     packages=["instascrape", "instascrape.core", "instascrape.scrapers", "instascrape.exceptions"],
16 |     install_requires=["requests", "beautifulsoup4"],
17 |     classifiers=[
18 |         "Programming Language :: Python :: 3",
19 |         "License :: OSI Approved :: MIT License",
20 |         "Operating System :: OS Independent",
21 |     ],
22 |     python_requires=">=3.7",
23 | )
24 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tests/__init__.py


--------------------------------------------------------------------------------
/tests/scrapers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tests/scrapers/__init__.py


--------------------------------------------------------------------------------
/tests/scrapers/test_hashtag.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import json
 3 | import os
 4 | 
 5 | from bs4 import BeautifulSoup
 6 | import pytest
 7 | import requests
 8 | 
 9 | from instascrape import Hashtag
10 | 
11 | 
12 | class TestHashtag:
13 | 
14 |     @pytest.fixture
15 |     def url(self):
16 |         return "https://www.instagram.com/tags/kotlin/"
17 | 
18 |     @pytest.fixture(scope="session")
19 |     def headers(self):
20 |         return {"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57",
21 |                 "cookie": f"sessionid={os.environ.get('sessionid')};"}
22 | 
23 |     @pytest.fixture
24 |     def get_request(self, url, headers):
25 |         return requests.get(url, headers=headers)
26 | 
27 |     @pytest.fixture
28 |     def page_instance(self, url, headers):
29 |         random_hashtag = Hashtag(url)
30 |         random_hashtag.scrape(headers=headers)
31 |         return random_hashtag
32 | 
33 |     def test_from_html(self, get_request, page_instance):
34 |         hashtag_html = get_request.text
35 |         hashtag_obj = Hashtag(hashtag_html)
36 |         hashtag_obj.scrape()
37 |         assert hashtag_obj.amount_of_posts == page_instance.amount_of_posts
38 | 
39 |     def test_from_soup(self, get_request, page_instance):
40 |         hashtag_html = get_request.text
41 |         hashtag_soup = BeautifulSoup(hashtag_html, features='lxml')
42 |         hashtag_obj = Hashtag(hashtag_soup)
43 |         hashtag_obj.scrape()
44 |         assert hashtag_obj.amount_of_posts == page_instance.amount_of_posts
45 | 
46 |     def test_to_dict(self, page_instance):
47 |         assert isinstance(page_instance.to_dict(), dict)
48 | 
49 |     @pytest.mark.file_io
50 |     def test_to_json(self, page_instance, tmpdir):
51 |         file = tmpdir.join("data.json")
52 |         page_instance.to_json(fp=str(file))
53 |         with open(str(file), "r") as injson:
54 |             json_dict = json.load(injson)
55 |         assert page_instance['name'] == json_dict['name']
56 | 
57 |     @pytest.mark.file_io
58 |     def test_to_csv(self, page_instance, tmpdir):
59 | 
60 |         # write to CSV
61 |         file = tmpdir.join("data.csv")
62 |         page_instance.to_csv(fp=str(file))
63 | 
64 |         # reread the csv
65 |         with open(str(file), mode="r") as infile:
66 |             reader = csv.reader(infile)
67 |             csv_dict = {row[0]: row[1] for row in reader}
68 | 
69 |         assert page_instance['name'] == csv_dict['name']
70 | 


--------------------------------------------------------------------------------
/tests/scrapers/test_igtv.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import datetime
 3 | import json
 4 | import re
 5 | import os
 6 | 
 7 | import pytest
 8 | from bs4 import BeautifulSoup
 9 | import requests
10 | 
11 | from instascrape import IGTV
12 | 
13 | 
14 | class TestIGTV:
15 |     @pytest.fixture
16 |     def url(self):
17 |         return "https://www.instagram.com/tv/CIrIIMYl8VQ/"
18 | 
19 |     @pytest.fixture
20 |     def get_request(self, url):
21 |         return requests.get(url, headers={"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57"})
22 | 
23 |     @pytest.fixture
24 |     def page_instance(self, url):
25 |         random_google_igtv = IGTV(url)
26 |         random_google_igtv.scrape()
27 |         return random_google_igtv
28 | 
29 |     def test_from_html(self, get_request, page_instance):
30 |         igtv_html = get_request.text
31 |         igtv_obj = IGTV(igtv_html)
32 |         igtv_obj.scrape()
33 |         assert igtv_obj.likes == page_instance.likes
34 | 
35 |     def test_from_soup(self, get_request, page_instance):
36 |         igtv_html = get_request.text
37 |         igtv_soup = BeautifulSoup(igtv_html, features='lxml')
38 |         igtv_obj = IGTV(igtv_soup)
39 |         igtv_obj.scrape()
40 |         assert igtv_obj.likes == page_instance.likes
41 | 
42 |     def test_to_dict(self, page_instance):
43 |         assert isinstance(page_instance.to_dict(), dict)
44 | 
45 |     def test_embed(self, page_instance):
46 |         html_embed = page_instance.embed()
47 |         embed_copied_from_instagram = '<blockquote class="instagram-media" data-instgrm-captioned data-instgrm-permalink="https://www.instagram.com/tv/CIrIIMYl8VQ/?utm_source=ig_embed&amp;utm_campaign=loading" data-instgrm-version="13" style=" background:#FFF; border:0; border-radius:3px; box-shadow:0 0 1px 0 rgba(0,0,0,0.5),0 1px 10px 0 rgba(0,0,0,0.15); margin: 1px; max-width:540px; min-width:326px; padding:0; width:99.375%; width:-webkit-calc(100% - 2px); width:calc(100% - 2px);"><div style="padding:16px;"> <a href="https://www.instagram.com/tv/CIrIIMYl8VQ/?utm_source=ig_embed&amp;utm_campaign=loading" style=" background:#FFFFFF; line-height:0; padding:0 0; text-align:center; text-decoration:none; width:100%;" target="_blank"> <div style=" display: flex; flex-direction: row; align-items: center;"> <div style="background-color: #F4F4F4; border-radius: 50%; flex-grow: 0; height: 40px; margin-right: 14px; width: 40px;"></div> <div style="display: flex; flex-direction: column; flex-grow: 1; justify-content: center;"> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; margin-bottom: 6px; width: 100px;"></div> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; width: 60px;"></div></div></div><div style="padding: 19% 0;"></div> <div style="display:block; height:50px; margin:0 auto 12px; width:50px;"><svg width="50px" height="50px" viewBox="0 0 60 60" version="1.1" xmlns="https://www.w3.org/2000/svg" xmlns:xlink="https://www.w3.org/1999/xlink"><g stroke="none" stroke-width="1" fill="none" fill-rule="evenodd"><g transform="translate(-511.000000, -20.000000)" fill="#000000"><g><path d="M556.869,30.41 C554.814,30.41 553.148,32.076 553.148,34.131 C553.148,36.186 554.814,37.852 556.869,37.852 C558.924,37.852 560.59,36.186 560.59,34.131 C560.59,32.076 558.924,30.41 556.869,30.41 M541,60.657 C535.114,60.657 530.342,55.887 530.342,50 C530.342,44.114 535.114,39.342 541,39.342 C546.887,39.342 551.658,44.114 551.658,50 C551.658,55.887 546.887,60.657 541,60.657 M541,33.886 C532.1,33.886 524.886,41.1 524.886,50 C524.886,58.899 532.1,66.113 541,66.113 C549.9,66.113 557.115,58.899 557.115,50 C557.115,41.1 549.9,33.886 541,33.886 M565.378,62.101 C565.244,65.022 564.756,66.606 564.346,67.663 C563.803,69.06 563.154,70.057 562.106,71.106 C561.058,72.155 560.06,72.803 558.662,73.347 C557.607,73.757 556.021,74.244 553.102,74.378 C549.944,74.521 548.997,74.552 541,74.552 C533.003,74.552 532.056,74.521 528.898,74.378 C525.979,74.244 524.393,73.757 523.338,73.347 C521.94,72.803 520.942,72.155 519.894,71.106 C518.846,70.057 518.197,69.06 517.654,67.663 C517.244,66.606 516.755,65.022 516.623,62.101 C516.479,58.943 516.448,57.996 516.448,50 C516.448,42.003 516.479,41.056 516.623,37.899 C516.755,34.978 517.244,33.391 517.654,32.338 C518.197,30.938 518.846,29.942 519.894,28.894 C520.942,27.846 521.94,27.196 523.338,26.654 C524.393,26.244 525.979,25.756 528.898,25.623 C532.057,25.479 533.004,25.448 541,25.448 C548.997,25.448 549.943,25.479 553.102,25.623 C556.021,25.756 557.607,26.244 558.662,26.654 C560.06,27.196 561.058,27.846 562.106,28.894 C563.154,29.942 563.803,30.938 564.346,32.338 C564.756,33.391 565.244,34.978 565.378,37.899 C565.522,41.056 565.552,42.003 565.552,50 C565.552,57.996 565.522,58.943 565.378,62.101 M570.82,37.631 C570.674,34.438 570.167,32.258 569.425,30.349 C568.659,28.377 567.633,26.702 565.965,25.035 C564.297,23.368 562.623,22.342 560.652,21.575 C558.743,20.834 556.562,20.326 553.369,20.18 C550.169,20.033 549.148,20 541,20 C532.853,20 531.831,20.033 528.631,20.18 C525.438,20.326 523.257,20.834 521.349,21.575 C519.376,22.342 517.703,23.368 516.035,25.035 C514.368,26.702 513.342,28.377 512.574,30.349 C511.834,32.258 511.326,34.438 511.181,37.631 C511.035,40.831 511,41.851 511,50 C511,58.147 511.035,59.17 511.181,62.369 C511.326,65.562 511.834,67.743 512.574,69.651 C513.342,71.625 514.368,73.296 516.035,74.965 C517.703,76.634 519.376,77.658 521.349,78.425 C523.257,79.167 525.438,79.673 528.631,79.82 C531.831,79.965 532.853,80.001 541,80.001 C549.148,80.001 550.169,79.965 553.369,79.82 C556.562,79.673 558.743,79.167 560.652,78.425 C562.623,77.658 564.297,76.634 565.965,74.965 C567.633,73.296 568.659,71.625 569.425,69.651 C570.167,67.743 570.674,65.562 570.82,62.369 C570.966,59.17 571,58.147 571,50 C571,41.851 570.966,40.831 570.82,37.631"></path></g></g></g></svg></div><div style="padding-top: 8px;"> <div style=" color:#3897f0; font-family:Arial,sans-serif; font-size:14px; font-style:normal; font-weight:550; line-height:18px;"> View this post on Instagram</div></div><div style="padding: 12.5% 0;"></div> <div style="display: flex; flex-direction: row; margin-bottom: 14px; align-items: center;"><div> <div style="background-color: #F4F4F4; border-radius: 50%; height: 12.5px; width: 12.5px; transform: translateX(0px) translateY(7px);"></div> <div style="background-color: #F4F4F4; height: 12.5px; transform: rotate(-45deg) translateX(3px) translateY(1px); width: 12.5px; flex-grow: 0; margin-right: 14px; margin-left: 2px;"></div> <div style="background-color: #F4F4F4; border-radius: 50%; height: 12.5px; width: 12.5px; transform: translateX(9px) translateY(-18px);"></div></div><div style="margin-left: 8px;"> <div style=" background-color: #F4F4F4; border-radius: 50%; flex-grow: 0; height: 20px; width: 20px;"></div> <div style=" width: 0; height: 0; border-top: 2px solid transparent; border-left: 6px solid #f4f4f4; border-bottom: 2px solid transparent; transform: translateX(16px) translateY(-4px) rotate(30deg)"></div></div><div style="margin-left: auto;"> <div style=" width: 0px; border-top: 8px solid #F4F4F4; border-right: 8px solid transparent; transform: translateY(16px);"></div> <div style=" background-color: #F4F4F4; flex-grow: 0; height: 12px; width: 16px; transform: translateY(-4px);"></div> <div style=" width: 0; height: 0; border-top: 8px solid #F4F4F4; border-left: 8px solid transparent; transform: translateY(-4px) translateX(8px);"></div></div></div> <div style="display: flex; flex-direction: column; flex-grow: 1; justify-content: center; margin-bottom: 24px;"> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; margin-bottom: 6px; width: 224px;"></div> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; width: 144px;"></div></div></a><p style=" color:#c9c8cd; font-family:Arial,sans-serif; font-size:14px; line-height:17px; margin-bottom:0; margin-top:8px; overflow:hidden; padding:8px 0 7px; text-align:center; text-overflow:ellipsis; white-space:nowrap;"><a href="https://www.instagram.com/tv/CIrIIMYl8VQ/?utm_source=ig_embed&amp;utm_campaign=loading" style=" color:#c9c8cd; font-family:Arial,sans-serif; font-size:14px; font-style:normal; font-weight:normal; line-height:17px; text-decoration:none;" target="_blank">A post shared by Google (@google)</a></p></div></blockquote> <script async src="//www.instagram.com/embed.js"></script>'
48 |         assert html_embed == embed_copied_from_instagram
49 | 
50 |     @pytest.mark.file_io
51 |     def test_to_json(self, page_instance, tmpdir):
52 |         file = tmpdir.join("data.json")
53 |         page_instance.to_json(fp=str(file))
54 |         with open(str(file), "r") as injson:
55 |             json_dict = json.load(injson)
56 |         assert page_instance['shortcode'] == json_dict['shortcode']
57 | 
58 |     @pytest.mark.file_io
59 |     def test_to_csv(self, page_instance, tmpdir):
60 | 
61 |         # write to CSV
62 |         file = tmpdir.join("data.csv")
63 |         page_instance.to_csv(fp=str(file))
64 | 
65 |         # reread the csv
66 |         with open(str(file), mode="r") as infile:
67 |             reader = csv.reader(infile)
68 |             csv_dict = {row[0]: row[1] for row in reader}
69 | 
70 |         assert page_instance['shortcode'] == csv_dict['shortcode']
71 | 
72 |     @pytest.mark.file_io
73 |     def test_download_photo(self, page_instance, tmpdir):
74 | 
75 |         # donwload photo
76 |         file = tmpdir.join("image.jpg")
77 |         page_instance.download(fp=str(file))
78 |         assert os.path.exists(file)
79 | 


--------------------------------------------------------------------------------
/tests/scrapers/test_location.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import json
 3 | import os
 4 | 
 5 | from bs4 import BeautifulSoup
 6 | import pytest
 7 | import requests
 8 | 
 9 | from instascrape import Location
10 | 
11 | class TestHashtag:
12 | 
13 |     @pytest.fixture
14 |     def url(self):
15 |         return "https://www.instagram.com/explore/locations/212918601/grand-central-terminal/"
16 | 
17 |     @pytest.fixture(scope="session")
18 |     def headers(self):
19 |         return {"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57",
20 |                 "cookie": f"sessionid={os.environ.get('sessionid')};"}
21 | 
22 |     @pytest.fixture
23 |     def get_request(self, url, headers):
24 |         return requests.get(url, headers=headers)
25 | 
26 |     @pytest.fixture
27 |     def page_instance(self, url, headers):
28 |         random_location = Location(url)
29 |         random_location.scrape(headers=headers)
30 |         return random_location
31 | 
32 |     def test_from_html(self, get_request, page_instance):
33 |         location_html = get_request.text
34 |         location_obj = Location(location_html)
35 |         location_obj.scrape()
36 |         assert location_obj.amount_of_posts == page_instance.amount_of_posts
37 | 
38 |     def test_from_soup(self, get_request, page_instance):
39 |         location_html = get_request.text
40 |         location_soup = BeautifulSoup(location_html, features='lxml')
41 |         location_obj = Location(location_soup)
42 |         location_obj.scrape()
43 |         assert location_obj.amount_of_posts == page_instance.amount_of_posts
44 | 
45 |     def test_to_dict(self, page_instance):
46 |         assert isinstance(page_instance.to_dict(), dict)
47 | 
48 |     @pytest.mark.file_io
49 |     def test_to_json(self, page_instance, tmpdir):
50 |         file = tmpdir.join("data.json")
51 |         page_instance.to_json(fp=str(file))
52 |         with open(str(file), "r") as injson:
53 |             json_dict = json.load(injson)
54 |         assert page_instance['name'] == json_dict['name']
55 | 
56 |     @pytest.mark.file_io
57 |     def test_to_csv(self, page_instance, tmpdir):
58 | 
59 |         # write to CSV
60 |         file = tmpdir.join("data.csv")
61 |         page_instance.to_csv(fp=str(file))
62 | 
63 |         # reread the csv
64 |         with open(str(file), mode="r") as infile:
65 |             reader = csv.reader(infile)
66 |             csv_dict = {row[0]: row[1] for row in reader}
67 | 
68 |         assert page_instance['name'] == csv_dict['name']
69 | 


--------------------------------------------------------------------------------
/tests/scrapers/test_post.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import datetime
 3 | import json
 4 | import re
 5 | import os
 6 | 
 7 | import pytest
 8 | from bs4 import BeautifulSoup
 9 | import requests
10 | 
11 | from instascrape import Post
12 | 
13 | 
14 | class TestPost:
15 |     @pytest.fixture
16 |     def url(self):
17 |         return "https://www.instagram.com/p/CJpBmOtAmNr/"
18 | 
19 |     @pytest.fixture(scope="session")
20 |     def headers(self):
21 |         return {"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57",
22 |                 "cookie": f"sessionid={os.environ.get('sessionid')};"}
23 | 
24 |     @pytest.fixture
25 |     def get_request(self, url, headers):
26 |         return requests.get(url, headers=headers)
27 | 
28 |     @pytest.fixture
29 |     def page_instance(self, url, headers):
30 |         random_google_post = Post(url)
31 |         random_google_post.scrape(headers=headers)
32 |         return random_google_post
33 | 
34 |     def test_from_html(self, get_request, page_instance):
35 |         post_html = get_request.text
36 |         post_obj = Post(post_html)
37 |         post_obj.scrape()
38 |         assert post_obj.likes == page_instance.likes
39 | 
40 |     def test_from_soup(self, get_request, page_instance):
41 |         post_html = get_request.text
42 |         post_soup = BeautifulSoup(post_html, features='lxml')
43 |         post_obj = Post(post_soup)
44 |         post_obj.scrape()
45 |         assert post_obj.likes == page_instance.likes
46 | 
47 |     def test_to_dict(self, page_instance):
48 |         assert isinstance(page_instance.to_dict(), dict)
49 | 
50 |     def test_embed(self, page_instance):
51 |         html_embed = page_instance.embed()
52 |         embed_copied_from_instagram = '<blockquote class="instagram-media" data-instgrm-captioned data-instgrm-permalink="https://www.instagram.com/p/CGX0G64hu4Q/?utm_source=ig_embed&amp;utm_campaign=loading" data-instgrm-version="13" style=" background:#FFF; border:0; border-radius:3px; box-shadow:0 0 1px 0 rgba(0,0,0,0.5),0 1px 10px 0 rgba(0,0,0,0.15); margin: 1px; max-width:540px; min-width:326px; padding:0; width:99.375%; width:-webkit-calc(100% - 2px); width:calc(100% - 2px);"><div style="padding:16px;"> <a href="https://www.instagram.com/p/CGX0G64hu4Q/?utm_source=ig_embed&amp;utm_campaign=loading" style=" background:#FFFFFF; line-height:0; padding:0 0; text-align:center; text-decoration:none; width:100%;" target="_blank"> <div style=" display: flex; flex-direction: row; align-items: center;"> <div style="background-color: #F4F4F4; border-radius: 50%; flex-grow: 0; height: 40px; margin-right: 14px; width: 40px;"></div> <div style="display: flex; flex-direction: column; flex-grow: 1; justify-content: center;"> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; margin-bottom: 6px; width: 100px;"></div> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; width: 60px;"></div></div></div><div style="padding: 19% 0;"></div> <div style="display:block; height:50px; margin:0 auto 12px; width:50px;"><svg width="50px" height="50px" viewBox="0 0 60 60" version="1.1" xmlns="https://www.w3.org/2000/svg" xmlns:xlink="https://www.w3.org/1999/xlink"><g stroke="none" stroke-width="1" fill="none" fill-rule="evenodd"><g transform="translate(-511.000000, -20.000000)" fill="#000000"><g><path d="M556.869,30.41 C554.814,30.41 553.148,32.076 553.148,34.131 C553.148,36.186 554.814,37.852 556.869,37.852 C558.924,37.852 560.59,36.186 560.59,34.131 C560.59,32.076 558.924,30.41 556.869,30.41 M541,60.657 C535.114,60.657 530.342,55.887 530.342,50 C530.342,44.114 535.114,39.342 541,39.342 C546.887,39.342 551.658,44.114 551.658,50 C551.658,55.887 546.887,60.657 541,60.657 M541,33.886 C532.1,33.886 524.886,41.1 524.886,50 C524.886,58.899 532.1,66.113 541,66.113 C549.9,66.113 557.115,58.899 557.115,50 C557.115,41.1 549.9,33.886 541,33.886 M565.378,62.101 C565.244,65.022 564.756,66.606 564.346,67.663 C563.803,69.06 563.154,70.057 562.106,71.106 C561.058,72.155 560.06,72.803 558.662,73.347 C557.607,73.757 556.021,74.244 553.102,74.378 C549.944,74.521 548.997,74.552 541,74.552 C533.003,74.552 532.056,74.521 528.898,74.378 C525.979,74.244 524.393,73.757 523.338,73.347 C521.94,72.803 520.942,72.155 519.894,71.106 C518.846,70.057 518.197,69.06 517.654,67.663 C517.244,66.606 516.755,65.022 516.623,62.101 C516.479,58.943 516.448,57.996 516.448,50 C516.448,42.003 516.479,41.056 516.623,37.899 C516.755,34.978 517.244,33.391 517.654,32.338 C518.197,30.938 518.846,29.942 519.894,28.894 C520.942,27.846 521.94,27.196 523.338,26.654 C524.393,26.244 525.979,25.756 528.898,25.623 C532.057,25.479 533.004,25.448 541,25.448 C548.997,25.448 549.943,25.479 553.102,25.623 C556.021,25.756 557.607,26.244 558.662,26.654 C560.06,27.196 561.058,27.846 562.106,28.894 C563.154,29.942 563.803,30.938 564.346,32.338 C564.756,33.391 565.244,34.978 565.378,37.899 C565.522,41.056 565.552,42.003 565.552,50 C565.552,57.996 565.522,58.943 565.378,62.101 M570.82,37.631 C570.674,34.438 570.167,32.258 569.425,30.349 C568.659,28.377 567.633,26.702 565.965,25.035 C564.297,23.368 562.623,22.342 560.652,21.575 C558.743,20.834 556.562,20.326 553.369,20.18 C550.169,20.033 549.148,20 541,20 C532.853,20 531.831,20.033 528.631,20.18 C525.438,20.326 523.257,20.834 521.349,21.575 C519.376,22.342 517.703,23.368 516.035,25.035 C514.368,26.702 513.342,28.377 512.574,30.349 C511.834,32.258 511.326,34.438 511.181,37.631 C511.035,40.831 511,41.851 511,50 C511,58.147 511.035,59.17 511.181,62.369 C511.326,65.562 511.834,67.743 512.574,69.651 C513.342,71.625 514.368,73.296 516.035,74.965 C517.703,76.634 519.376,77.658 521.349,78.425 C523.257,79.167 525.438,79.673 528.631,79.82 C531.831,79.965 532.853,80.001 541,80.001 C549.148,80.001 550.169,79.965 553.369,79.82 C556.562,79.673 558.743,79.167 560.652,78.425 C562.623,77.658 564.297,76.634 565.965,74.965 C567.633,73.296 568.659,71.625 569.425,69.651 C570.167,67.743 570.674,65.562 570.82,62.369 C570.966,59.17 571,58.147 571,50 C571,41.851 570.966,40.831 570.82,37.631"></path></g></g></g></svg></div><div style="padding-top: 8px;"> <div style=" color:#3897f0; font-family:Arial,sans-serif; font-size:14px; font-style:normal; font-weight:550; line-height:18px;"> View this post on Instagram</div></div><div style="padding: 12.5% 0;"></div> <div style="display: flex; flex-direction: row; margin-bottom: 14px; align-items: center;"><div> <div style="background-color: #F4F4F4; border-radius: 50%; height: 12.5px; width: 12.5px; transform: translateX(0px) translateY(7px);"></div> <div style="background-color: #F4F4F4; height: 12.5px; transform: rotate(-45deg) translateX(3px) translateY(1px); width: 12.5px; flex-grow: 0; margin-right: 14px; margin-left: 2px;"></div> <div style="background-color: #F4F4F4; border-radius: 50%; height: 12.5px; width: 12.5px; transform: translateX(9px) translateY(-18px);"></div></div><div style="margin-left: 8px;"> <div style=" background-color: #F4F4F4; border-radius: 50%; flex-grow: 0; height: 20px; width: 20px;"></div> <div style=" width: 0; height: 0; border-top: 2px solid transparent; border-left: 6px solid #f4f4f4; border-bottom: 2px solid transparent; transform: translateX(16px) translateY(-4px) rotate(30deg)"></div></div><div style="margin-left: auto;"> <div style=" width: 0px; border-top: 8px solid #F4F4F4; border-right: 8px solid transparent; transform: translateY(16px);"></div> <div style=" background-color: #F4F4F4; flex-grow: 0; height: 12px; width: 16px; transform: translateY(-4px);"></div> <div style=" width: 0; height: 0; border-top: 8px solid #F4F4F4; border-left: 8px solid transparent; transform: translateY(-4px) translateX(8px);"></div></div></div> <div style="display: flex; flex-direction: column; flex-grow: 1; justify-content: center; margin-bottom: 24px;"> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; margin-bottom: 6px; width: 224px;"></div> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; width: 144px;"></div></div></a><p style=" color:#c9c8cd; font-family:Arial,sans-serif; font-size:14px; line-height:17px; margin-bottom:0; margin-top:8px; overflow:hidden; padding:8px 0 7px; text-align:center; text-overflow:ellipsis; white-space:nowrap;"><a href="https://www.instagram.com/p/CGX0G64hu4Q/?utm_source=ig_embed&amp;utm_campaign=loading" style=" color:#c9c8cd; font-family:Arial,sans-serif; font-size:14px; font-style:normal; font-weight:normal; line-height:17px; text-decoration:none;" target="_blank">A post shared by Chris Greening (@chris_greening)</a></p></div></blockquote> <script async src="//www.instagram.com/embed.js"></script>'
53 |         assert html_embed == embed_copied_from_instagram
54 | 
55 |     @pytest.mark.file_io
56 |     def test_to_json(self, page_instance, tmpdir):
57 |         file = tmpdir.join("data.json")
58 |         page_instance.to_json(fp=str(file))
59 |         with open(str(file), "r") as injson:
60 |             json_dict = json.load(injson)
61 |         assert page_instance['shortcode'] == json_dict['shortcode']
62 | 
63 |     @pytest.mark.file_io
64 |     def test_to_csv(self, page_instance, tmpdir):
65 | 
66 |         # write to CSV
67 |         file = tmpdir.join("data.csv")
68 |         page_instance.to_csv(fp=str(file))
69 | 
70 |         # reread the csv
71 |         with open(str(file), mode="r") as infile:
72 |             reader = csv.reader(infile)
73 |             csv_dict = {row[0]: row[1] for row in reader}
74 | 
75 |         assert page_instance['shortcode'] == csv_dict['shortcode']
76 | 
77 |     @pytest.mark.file_io
78 |     def test_download_photo(self, page_instance, tmpdir):
79 | 
80 |         # donwload photo
81 |         file = tmpdir.join("image.jpg")
82 |         page_instance.download(fp=str(file))
83 |         assert os.path.exists(file)
84 | 


--------------------------------------------------------------------------------
/tests/scrapers/test_profile.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import json
 3 | import os
 4 | 
 5 | import pytest
 6 | from bs4 import BeautifulSoup
 7 | import requests
 8 | 
 9 | from instascrape import Post, Profile
10 | 
11 | 
12 | class TestProfile:
13 | 
14 |     @pytest.fixture
15 |     def url(self):
16 |         return "https://www.instagram.com/chris_greening/"
17 | 
18 |     @pytest.fixture(scope="session")
19 |     def headers(self):
20 |         return {"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57",
21 |         "cookie": f"sessionid={os.environ.get('sessionid')};"}
22 | 
23 |     @pytest.fixture
24 |     def get_request(self, url, headers):
25 |         return requests.get(url, headers=headers)
26 | 
27 |     @pytest.fixture
28 |     def page_instance(self, url, headers):
29 |         random_profile = Profile(url)
30 |         random_profile.scrape(headers=headers)
31 |         return random_profile
32 | 
33 |     def test_from_html(self, get_request, page_instance):
34 |         profile_html = get_request.text
35 |         profile_obj = Profile(profile_html)
36 |         profile_obj.scrape()
37 |         assert profile_obj.followers == page_instance.followers
38 | 
39 |     def test_from_soup(self, get_request, page_instance):
40 |         profile_html = get_request.text
41 |         profile_soup = BeautifulSoup(profile_html, features='lxml')
42 |         profile_obj = Profile(profile_soup)
43 |         profile_obj.scrape()
44 |         assert profile_obj.followers == page_instance.followers
45 | 
46 |     def test_to_dict(self, page_instance):
47 |         assert isinstance(page_instance.to_dict(), dict)
48 | 
49 |     def test_get_recent_posts(self, page_instance):
50 |         posts = page_instance.get_recent_posts(amt=6)
51 |         assert len(posts) == 6
52 |         assert all([type(post) is Post for post in posts])
53 |         assert all([hasattr(post, "id") for post in posts])
54 | 
55 |     @pytest.mark.file_io
56 |     def test_to_json(self, page_instance, tmpdir):
57 |         file = tmpdir.join("data.json")
58 |         page_instance.to_json(fp=str(file))
59 |         with open(str(file), "r") as injson:
60 |             json_dict = json.load(injson)
61 |         assert page_instance['username'] == json_dict['username']
62 | 
63 |     @pytest.mark.file_io
64 |     def test_to_csv(self, page_instance, tmpdir):
65 | 
66 |         # write to CSV
67 |         file = tmpdir.join("data.csv")
68 |         page_instance.to_csv(fp=str(file))
69 | 
70 |         # reread the csv
71 |         with open(str(file), mode="r") as infile:
72 |             reader = csv.reader(infile)
73 |             csv_dict = {row[0]: row[1] for row in reader}
74 | 
75 |         assert page_instance['username'] == csv_dict['username']
76 | 


--------------------------------------------------------------------------------
/tests/scrapers/test_reel.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import datetime
 3 | import json
 4 | import re
 5 | import os
 6 | 
 7 | import pytest
 8 | from bs4 import BeautifulSoup
 9 | import requests
10 | 
11 | from instascrape import Reel
12 | 
13 | 
14 | class TestReel:
15 |     @pytest.fixture
16 |     def url(self):
17 |         return "https://www.instagram.com/reel/CIrJSrFFHM_/"
18 | 
19 |     @pytest.fixture
20 |     def get_request(self, url):
21 |         return requests.get(url, headers={"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57"})
22 | 
23 |     @pytest.fixture
24 |     def page_instance(self, url):
25 |         random_google_reel = Reel(url)
26 |         random_google_reel.scrape()
27 |         return random_google_reel
28 | 
29 |     def test_from_html(self, get_request, page_instance):
30 |         reel_html = get_request.text
31 |         reel_obj = Reel(reel_html)
32 |         reel_obj.scrape()
33 |         assert reel_obj.likes == page_instance.likes
34 | 
35 |     def test_from_soup(self, get_request, page_instance):
36 |         reel_html = get_request.text
37 |         reel_soup = BeautifulSoup(reel_html, features='lxml')
38 |         reel_obj = Reel(reel_soup)
39 |         reel_obj.scrape()
40 |         assert reel_obj.likes == page_instance.likes
41 | 
42 |     def test_to_dict(self, page_instance):
43 |         assert isinstance(page_instance.to_dict(), dict)
44 | 
45 |     def test_embed(self, page_instance):
46 |         html_embed = page_instance.embed()
47 |         embed_copied_from_instagram = '<blockquote class="instagram-media" data-instgrm-captioned data-instgrm-permalink="https://www.instagram.com/reel/CIrJSrFFHM_/?utm_source=ig_embed&amp;utm_campaign=loading" data-instgrm-version="13" style=" background:#FFF; border:0; border-radius:3px; box-shadow:0 0 1px 0 rgba(0,0,0,0.5),0 1px 10px 0 rgba(0,0,0,0.15); margin: 1px; max-width:540px; min-width:326px; padding:0; width:99.375%; width:-webkit-calc(100% - 2px); width:calc(100% - 2px);"><div style="padding:16px;"> <a href="https://www.instagram.com/reel/CIrJSrFFHM_/?utm_source=ig_embed&amp;utm_campaign=loading" style=" background:#FFFFFF; line-height:0; padding:0 0; text-align:center; text-decoration:none; width:100%;" target="_blank"> <div style=" display: flex; flex-direction: row; align-items: center;"> <div style="background-color: #F4F4F4; border-radius: 50%; flex-grow: 0; height: 40px; margin-right: 14px; width: 40px;"></div> <div style="display: flex; flex-direction: column; flex-grow: 1; justify-content: center;"> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; margin-bottom: 6px; width: 100px;"></div> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; width: 60px;"></div></div></div><div style="padding: 19% 0;"></div> <div style="display:block; height:50px; margin:0 auto 12px; width:50px;"><svg width="50px" height="50px" viewBox="0 0 60 60" version="1.1" xmlns="https://www.w3.org/2000/svg" xmlns:xlink="https://www.w3.org/1999/xlink"><g stroke="none" stroke-width="1" fill="none" fill-rule="evenodd"><g transform="translate(-511.000000, -20.000000)" fill="#000000"><g><path d="M556.869,30.41 C554.814,30.41 553.148,32.076 553.148,34.131 C553.148,36.186 554.814,37.852 556.869,37.852 C558.924,37.852 560.59,36.186 560.59,34.131 C560.59,32.076 558.924,30.41 556.869,30.41 M541,60.657 C535.114,60.657 530.342,55.887 530.342,50 C530.342,44.114 535.114,39.342 541,39.342 C546.887,39.342 551.658,44.114 551.658,50 C551.658,55.887 546.887,60.657 541,60.657 M541,33.886 C532.1,33.886 524.886,41.1 524.886,50 C524.886,58.899 532.1,66.113 541,66.113 C549.9,66.113 557.115,58.899 557.115,50 C557.115,41.1 549.9,33.886 541,33.886 M565.378,62.101 C565.244,65.022 564.756,66.606 564.346,67.663 C563.803,69.06 563.154,70.057 562.106,71.106 C561.058,72.155 560.06,72.803 558.662,73.347 C557.607,73.757 556.021,74.244 553.102,74.378 C549.944,74.521 548.997,74.552 541,74.552 C533.003,74.552 532.056,74.521 528.898,74.378 C525.979,74.244 524.393,73.757 523.338,73.347 C521.94,72.803 520.942,72.155 519.894,71.106 C518.846,70.057 518.197,69.06 517.654,67.663 C517.244,66.606 516.755,65.022 516.623,62.101 C516.479,58.943 516.448,57.996 516.448,50 C516.448,42.003 516.479,41.056 516.623,37.899 C516.755,34.978 517.244,33.391 517.654,32.338 C518.197,30.938 518.846,29.942 519.894,28.894 C520.942,27.846 521.94,27.196 523.338,26.654 C524.393,26.244 525.979,25.756 528.898,25.623 C532.057,25.479 533.004,25.448 541,25.448 C548.997,25.448 549.943,25.479 553.102,25.623 C556.021,25.756 557.607,26.244 558.662,26.654 C560.06,27.196 561.058,27.846 562.106,28.894 C563.154,29.942 563.803,30.938 564.346,32.338 C564.756,33.391 565.244,34.978 565.378,37.899 C565.522,41.056 565.552,42.003 565.552,50 C565.552,57.996 565.522,58.943 565.378,62.101 M570.82,37.631 C570.674,34.438 570.167,32.258 569.425,30.349 C568.659,28.377 567.633,26.702 565.965,25.035 C564.297,23.368 562.623,22.342 560.652,21.575 C558.743,20.834 556.562,20.326 553.369,20.18 C550.169,20.033 549.148,20 541,20 C532.853,20 531.831,20.033 528.631,20.18 C525.438,20.326 523.257,20.834 521.349,21.575 C519.376,22.342 517.703,23.368 516.035,25.035 C514.368,26.702 513.342,28.377 512.574,30.349 C511.834,32.258 511.326,34.438 511.181,37.631 C511.035,40.831 511,41.851 511,50 C511,58.147 511.035,59.17 511.181,62.369 C511.326,65.562 511.834,67.743 512.574,69.651 C513.342,71.625 514.368,73.296 516.035,74.965 C517.703,76.634 519.376,77.658 521.349,78.425 C523.257,79.167 525.438,79.673 528.631,79.82 C531.831,79.965 532.853,80.001 541,80.001 C549.148,80.001 550.169,79.965 553.369,79.82 C556.562,79.673 558.743,79.167 560.652,78.425 C562.623,77.658 564.297,76.634 565.965,74.965 C567.633,73.296 568.659,71.625 569.425,69.651 C570.167,67.743 570.674,65.562 570.82,62.369 C570.966,59.17 571,58.147 571,50 C571,41.851 570.966,40.831 570.82,37.631"></path></g></g></g></svg></div><div style="padding-top: 8px;"> <div style=" color:#3897f0; font-family:Arial,sans-serif; font-size:14px; font-style:normal; font-weight:550; line-height:18px;"> View this post on Instagram</div></div><div style="padding: 12.5% 0;"></div> <div style="display: flex; flex-direction: row; margin-bottom: 14px; align-items: center;"><div> <div style="background-color: #F4F4F4; border-radius: 50%; height: 12.5px; width: 12.5px; transform: translateX(0px) translateY(7px);"></div> <div style="background-color: #F4F4F4; height: 12.5px; transform: rotate(-45deg) translateX(3px) translateY(1px); width: 12.5px; flex-grow: 0; margin-right: 14px; margin-left: 2px;"></div> <div style="background-color: #F4F4F4; border-radius: 50%; height: 12.5px; width: 12.5px; transform: translateX(9px) translateY(-18px);"></div></div><div style="margin-left: 8px;"> <div style=" background-color: #F4F4F4; border-radius: 50%; flex-grow: 0; height: 20px; width: 20px;"></div> <div style=" width: 0; height: 0; border-top: 2px solid transparent; border-left: 6px solid #f4f4f4; border-bottom: 2px solid transparent; transform: translateX(16px) translateY(-4px) rotate(30deg)"></div></div><div style="margin-left: auto;"> <div style=" width: 0px; border-top: 8px solid #F4F4F4; border-right: 8px solid transparent; transform: translateY(16px);"></div> <div style=" background-color: #F4F4F4; flex-grow: 0; height: 12px; width: 16px; transform: translateY(-4px);"></div> <div style=" width: 0; height: 0; border-top: 8px solid #F4F4F4; border-left: 8px solid transparent; transform: translateY(-4px) translateX(8px);"></div></div></div> <div style="display: flex; flex-direction: column; flex-grow: 1; justify-content: center; margin-bottom: 24px;"> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; margin-bottom: 6px; width: 224px;"></div> <div style=" background-color: #F4F4F4; border-radius: 4px; flex-grow: 0; height: 14px; width: 144px;"></div></div></a><p style=" color:#c9c8cd; font-family:Arial,sans-serif; font-size:14px; line-height:17px; margin-bottom:0; margin-top:8px; overflow:hidden; padding:8px 0 7px; text-align:center; text-overflow:ellipsis; white-space:nowrap;"><a href="https://www.instagram.com/reel/CIrJSrFFHM_/?utm_source=ig_embed&amp;utm_campaign=loading" style=" color:#c9c8cd; font-family:Arial,sans-serif; font-size:14px; font-style:normal; font-weight:normal; line-height:17px; text-decoration:none;" target="_blank">A post shared by Google (@google)</a></p></div></blockquote> <script async src="//www.instagram.com/embed.js"></script>'
48 |         assert html_embed == embed_copied_from_instagram
49 | 
50 |     @pytest.mark.file_io
51 |     def test_to_json(self, page_instance, tmpdir):
52 |         file = tmpdir.join("data.json")
53 |         page_instance.to_json(fp=str(file))
54 |         with open(str(file), "r") as injson:
55 |             json_dict = json.load(injson)
56 |         assert page_instance['shortcode'] == json_dict['shortcode']
57 | 
58 |     @pytest.mark.file_io
59 |     def test_to_csv(self, page_instance, tmpdir):
60 | 
61 |         # write to CSV
62 |         file = tmpdir.join("data.csv")
63 |         page_instance.to_csv(fp=str(file))
64 | 
65 |         # reread the csv
66 |         with open(str(file), mode="r") as infile:
67 |             reader = csv.reader(infile)
68 |             csv_dict = {row[0]: row[1] for row in reader}
69 | 
70 |         assert page_instance['shortcode'] == csv_dict['shortcode']
71 | 
72 |     @pytest.mark.file_io
73 |     def test_download_photo(self, page_instance, tmpdir):
74 | 
75 |         # donwload photo
76 |         file = tmpdir.join("image.jpg")
77 |         page_instance.download(fp=str(file))
78 |         assert os.path.exists(file)
79 | 


--------------------------------------------------------------------------------
/tutorial/examples/DonaldTrump/plots/comments_per_post.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/comments_per_post.png


--------------------------------------------------------------------------------
/tutorial/examples/DonaldTrump/plots/hashtags.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/hashtags.png


--------------------------------------------------------------------------------
/tutorial/examples/DonaldTrump/plots/likes_per_post.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/likes_per_post.png


--------------------------------------------------------------------------------
/tutorial/examples/DonaldTrump/plots/likes_vs_comments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/likes_vs_comments.png


--------------------------------------------------------------------------------
/tutorial/examples/DonaldTrump/plots/locations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/locations.png


--------------------------------------------------------------------------------
/tutorial/examples/DonaldTrump/plots/views_and_likes_per_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/views_and_likes_per_view.png


--------------------------------------------------------------------------------
/tutorial/examples/DonaldTrump/plots/views_per_video.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/views_per_video.png


--------------------------------------------------------------------------------
/tutorial/examples/JoeBiden/joebiden.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/JoeBiden/joebiden.png


--------------------------------------------------------------------------------
/tutorial/examples/JoeBiden/joebiden.py:
--------------------------------------------------------------------------------
 1 | # See this tutorial to find your sessionid:
 2 | # http://valvepress.com/how-to-get-instagram-session-cookie/
 3 | 
 4 | from selenium.webdriver import Chrome
 5 | from instascrape import Profile, scrape_posts
 6 | 
 7 | # Creating our webdriver
 8 | webdriver = Chrome("path/to/chromedriver.exe")
 9 | 
10 | # Scraping Joe Biden's profile
11 | SESSIONID = 'ENTER_YOUR_SESSION_ID_HERE'
12 | headers = {"user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57",
13 |            "cookie": f"sessionid={SESSIONID};"}
14 | joe = Profile("joebiden")
15 | joe.scrape(headers=headers)
16 | 
17 | # Scraping the posts
18 | posts = joe.get_posts(webdriver=webdriver, login_first=True)
19 | scraped, unscraped = scrape_posts(posts, silent=False, headers=headers, pause=10)
20 | 


--------------------------------------------------------------------------------
/tutorial/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples 
2 | 
3 | ## Note: Selenium is required for some of these examples but is not a required dependency for instascrape. To use those examples you will have to install and configure on your own
4 | 


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-09-08 09h06m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-08 09h06m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-09-09 10h24m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-09 10h24m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-09-14 10h05m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-14 10h05m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-09-17 17h49m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-17 17h49m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-09-24 11h01m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-24 11h01m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-09-25 10h18m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-25 10h18m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-09-26 11h38m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-26 11h38m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-09-27 09h27m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-27 09h27m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-09-28 12h17m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-28 12h17m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-10-14 12h36m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-10-14 12h36m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-10-15 13h11m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-10-15 13h11m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/2020-10-16 14h39m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-10-16 14h39m.png


--------------------------------------------------------------------------------
/tutorial/examples/download_recent_photos/download_recent_photos.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Downloading recent photos from a profile\n",
 8 |     "\n",
 9 |     "In this example, I'm going to show you how you can quickly download a user's most recently posted photos to your computer using `instascrape`!\n",
10 |     "\n",
11 |     "To start, we're going to instantiate an `instascrape.Profile` object with a given username (for this example, we'll use my profile [@chris_greening](https://www.instagram.com/chris_greening/)) and load the user's data."
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": 5,
17 |    "metadata": {},
18 |    "outputs": [],
19 |    "source": [
20 |     "import instascrape\n",
21 |     "chris = instascrape.Profile('chris_greening')\n",
22 |     "chris.scrape()"
23 |    ]
24 |   },
25 |   {
26 |    "cell_type": "markdown",
27 |    "metadata": {},
28 |    "source": [
29 |     "Now, to get the user's recent posts, we will use the `get_recent_posts` method"
30 |    ]
31 |   },
32 |   {
33 |    "cell_type": "code",
34 |    "execution_count": null,
35 |    "metadata": {},
36 |    "outputs": [],
37 |    "source": [
38 |     "chris_posts = chris.get_recent_posts()"
39 |    ]
40 |   },
41 |   {
42 |    "cell_type": "markdown",
43 |    "metadata": {},
44 |    "source": [
45 |     "And now for the main event! We're ready to download the images to our computer now. All we have to do is pass `Post.download` a filepath and it will handle the rest, downloading the image to that filepath. In this case, we will create a filename `fname` that will use every posts upload_date as its name."
46 |    ]
47 |   },
48 |   {
49 |    "cell_type": "code",
50 |    "execution_count": 7,
51 |    "metadata": {},
52 |    "outputs": [],
53 |    "source": [
54 |     "for post in chris_photos: \n",
55 |     "    fname = post.upload_date.strftime(\"%Y-%m-%d %Hh%Mm\")\n",
56 |     "    post.download(f\"{fname}.png\")   "
57 |    ]
58 |   },
59 |   {
60 |    "cell_type": "markdown",
61 |    "metadata": {},
62 |    "source": [
63 |     "That's it! We have now downloaded all of the recent photos from @chris_greening to our computer"
64 |    ]
65 |   }
66 |  ],
67 |  "metadata": {
68 |   "kernelspec": {
69 |    "display_name": "Python 3.7.4 64-bit ('base': conda)",
70 |    "language": "python",
71 |    "name": "python37464bitbaseconda5a7fa83a9e6c4e43a4941e0b76e31b98"
72 |   },
73 |   "language_info": {
74 |    "codemirror_mode": {
75 |     "name": "ipython",
76 |     "version": 3
77 |    },
78 |    "file_extension": ".py",
79 |    "mimetype": "text/x-python",
80 |    "name": "python",
81 |    "nbconvert_exporter": "python",
82 |    "pygments_lexer": "ipython3",
83 |    "version": "3.7.4"
84 |   }
85 |  },
86 |  "nbformat": 4,
87 |  "nbformat_minor": 2
88 | }
89 | 


--------------------------------------------------------------------------------
/tutorial/examples/max_liked_post.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": true,
  7 |     "pycharm": {
  8 |      "name": "#%% md\n"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "#### Import library"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 1,
 18 |    "metadata": {
 19 |     "pycharm": {
 20 |      "is_executing": false,
 21 |      "name": "#%%\n"
 22 |     }
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "from examples.profile_scrape.dynamic_profile import DynamicProfile\n",
 27 |     "    "
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "#### Load a Profile (Accept the cookie warning when it pops out)"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 2,
 40 |    "metadata": {
 41 |     "pycharm": {
 42 |      "is_executing": false,
 43 |      "name": "#%%\n"
 44 |     }
 45 |    },
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stdout",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "Read 0 posts\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "chris = DynamicProfile.from_username('chris_greening')\n",
 57 |     "chris.static_load()\n",
 58 |     "max_post_to_load = 10\n",
 59 |     "chris.dynamic_load(max_posts=max_post_to_load)\n"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "#### Get URL with max likes"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 3,
 72 |    "metadata": {
 73 |     "pycharm": {
 74 |      "is_executing": false,
 75 |      "name": "#%% \n"
 76 |     }
 77 |    },
 78 |    "outputs": [
 79 |     {
 80 |      "name": "stdout",
 81 |      "output_type": "stream",
 82 |      "text": [
 83 |       "Likes:  388\n",
 84 |       "Posts:  <https://www.instagram.com/p/CE4GlrJBRuf/: Post>\n"
 85 |      ]
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "max_like = 0\n",
 90 |     "max_post = 0\n",
 91 |     "for p in chris.posts:\n",
 92 |     "    if p.data_points[0].likes > max_like:\n",
 93 |     "        max_like = p.data_points[0].likes\n",
 94 |     "        max_post = p\n",
 95 |     "print('Likes: ', max_like)\n",
 96 |     "print('Posts: ', max_post)\n"
 97 |    ]
 98 |   }
 99 |  ],
100 |  "metadata": {
101 |   "kernelspec": {
102 |    "display_name": "Python 3",
103 |    "language": "python",
104 |    "name": "python3"
105 |   },
106 |   "language_info": {
107 |    "codemirror_mode": {
108 |     "name": "ipython",
109 |     "version": 3
110 |    },
111 |    "file_extension": ".py",
112 |    "mimetype": "text/x-python",
113 |    "name": "python",
114 |    "nbconvert_exporter": "python",
115 |    "pygments_lexer": "ipython3",
116 |    "version": "3.7.4"
117 |   },
118 |   "pycharm": {
119 |    "stem_cell": {
120 |     "cell_type": "raw",
121 |     "metadata": {
122 |      "collapsed": false
123 |     },
124 |     "source": []
125 |    }
126 |   }
127 |  },
128 |  "nbformat": 4,
129 |  "nbformat_minor": 1
130 | }
131 | 


--------------------------------------------------------------------------------
/tutorial/examples/simple_hashtag_comparison/simple_hashtag_comparison.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Hashtag Growth Tracker \n",
  8 |     "The following is a simple example that uses instascrape to track the growth of two hashtag's over a given period and comparing their growth"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import time\n",
 18 |     "import datetime\n",
 19 |     "import sys \n",
 20 |     "import os \n",
 21 |     "\n",
 22 |     "import numpy as np\n",
 23 |     "import matplotlib.pyplot as plt \n",
 24 |     "import pandas as pd \n",
 25 |     "\n",
 26 |     "sys.path.insert(0, os.path.abspath('..'))\n",
 27 |     "from instascrape import Hashtag"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "We will start by running a loop and scraping data at random intervals until the predefined timeframe has elapsed."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 2,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "#Metadata  \n",
 44 |     "TOTAL_TIME = 60           #Total time \n",
 45 |     "WAIT_TIME = 5             #Mean wait time for random normal distribution\n",
 46 |     "photography = Hashtag('photography')  \n",
 47 |     "instagram = Hashtag('instagram')\n",
 48 |     " \n",
 49 |     "#Create a list of tuples containing data scraped at random \n",
 50 |     "# intervals during the time period \n",
 51 |     "current_time = datetime.datetime.now()\n",
 52 |     "end_time = current_time + datetime.timedelta(seconds=TOTAL_TIME)\n",
 53 |     "photography_data = []\n",
 54 |     "instagram_data = []\n",
 55 |     "while current_time < end_time:\n",
 56 |     "    #Wait for  normally randomized amount of time \n",
 57 |     "    rand_time = abs(np.random.normal(WAIT_TIME, 1.5))\n",
 58 |     "    time.sleep(WAIT_TIME)\n",
 59 |     "    \n",
 60 |     "    #Scrape data and append to respective lists  \n",
 61 |     "    photography.scrape()\n",
 62 |     "    photography_data.append((datetime.datetime.now(), photography.amount_of_posts))\n",
 63 |     "    instagram.scrape()\n",
 64 |     "    instagram_data.append((datetime.datetime.now(), instagram.amount_of_posts))\n",
 65 |     "    \n",
 66 |     "    #Increment time \n",
 67 |     "    current_time = datetime.datetime.now()"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "Now that the data is scraped, we can create some DataFrame's to make manipulation and working with the data easier. "
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 3,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "columns = ['time', 'posts']\n",
 84 |     "photo_df = pd.DataFrame(photography_data, columns=columns)\n",
 85 |     "insta_df = pd.DataFrame(instagram_data, columns=columns)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "Let's prepare the datasets so that we can plot and see how they're growing. We want them to both start at 0 so we'll subtract all values in the dataframe's by their first value"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 4,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "photo_df['posts'] -= photo_df['posts'].iloc[0]\n",
102 |     "insta_df['posts'] -= insta_df['posts'].iloc[0]"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 6,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "data": {
112 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deVhV1frA8e8LoijO84A45KwIKs5zo6ZmZVresszmsttoWbf6NdzbtTIbboNZZmXOU5oNmjmU5qwgOI8oijiBoAgCZ/3+2BtCBQTlcAbez/Oc5+yzh7PfBYfzstdaey0xxqCUUkoB+Lg6AKWUUu5Dk4JSSqksmhSUUkpl0aSglFIqiyYFpZRSWTQpKKWUyqJJQXkNEXlZRL5ydRxKeTLR+xSUpxCRM9lelgFSgQz79SPGmClFH1XhEREDJAMGOA3MAEYZYzLyPDD39+sFfG+MCSy0IJXXK+HqAJTKL2NM2cxlETkAPGiMWeK6iJwixBizR0SaAcuBXcB414akihOtPlJeQ0ReF5Hv7eX6ImJE5H4ROSQi8SLyqIi0F5EtIpIgIp9cdPwIEdlu77tIROrlcp5fRWTkResiROR2sXwgIsdE5LR9rlYFLYsxZgfwJ9DKfv/mIrLcjnuriNyS7dw3i8g2EUkSkcMi8ryIBAC/ALVF5Iz9qC0iHURkg4gkikiciIwraGzKu2lSUN6uI9AYuBP4EPgXcD3QEhgiIj0BRORW4GXgdqAa1hfytFzecyowNPOFiLQA6gE/ATcCPYAmQEX7vCcLGrT9nt2BzSLiB/wILAaqA08CU0Skqb37RKzqs3JYSWSpMeYs0Bc4Yowpaz+OAB8BHxljygPXADMLGpvybpoUlLd7yxiTYoxZDJwFphljjhljDmN98bex93sE+K8xZrsxJh14GwjN5Wph3kXb7gbmGmNSgTSgHNAMq81uuzEmtgDxbhKReKwk8BUwCegElAXGGGPOG2OWAgv5OzGlAS1EpLwxJt4YsymP908DGolIVWPMGWPMmgLEpooBTQrK28VlWz6Xw+vMdop6wEd29UwCcAoQoM7Fb2iMScK6KrjLXnUXMMXethT4BPgUiBORCSJSvgDxtjXGVDLGXGOMecUY4wBqA4fs5UzR2WIbBNwMRIvIChHpnMf7P4B1FbNDRNaLSP8CxKaKAU0KSlkOYVXBVMz2KG2M+SuX/acBQ+0v4NLAsswNxpiPjTHtsKqomgCjrjK2I0BdEcn+9xoEHLbPt94YMxCraukH/q4SuqRroTFmtzFmqL3vO8Bsu/1BKUCTglKZxgMviUhLABGpICKD89j/Z6yrizeBGZn/xdsN2R3tdoCzQAp/d5u9Umvt93pBRPzsrqYDgOkiUlJE7haRCsaYNCAx2/nigCoiUiHzjUTkHhGpZsebYK++2viUF9GkoBRgjJmH9Z/zdBFJBKKwGmpz2z8VmIvVaD0126bywJdAPFYVz0lgLGTdXPfLFcR2HrjFjucE8Blwr91DCWAYcMCO+1HgHvu4HVhXNPvsarHaQB9gq33Px0fAXcaYlILGpLyX3rymlFIqi14pKKWUyqJJQSmlVBZNCkoppbJoUlBKKZXFowfEq1q1qqlfv76rw1BKKY+ycePGE8aYajlt8+ikUL9+fTZs2ODqMJRSyqOISHRu27T6SCmlVBZNCkoppbJoUlBKKZXFo9sUcpKWlkZMTAwpKXrnvrvz9/cnMDAQPz8/V4eilLJ5XVKIiYmhXLly1K9fHxFxdTgqF8YYTp48SUxMDA0aNHB1OEopm9dVH6WkpFClShVNCG5ORKhSpYpe0SnlZrwuKQCaEDyE/p6Ucj9emRSUUsrTzA8/zM+RBZm51Tk0KTjZSy+9xPLly/nhhx8YM2ZMnvu+/vrrjB07Nt/vnZCQwGeffXa1IV6Vb775hpEjR7o0BqU8mcNheH/xTp6aHs709Ydw9XQGmhScbO3atXTs2JEVK1bQvXv3Qn3vwk4KGRk6AZdSRSn5fDpPTN3E/5bu4c6wunx1b5jLq1U1KTjJqFGjaN26NevXr6dz58589dVXPPbYY7z55pv06tWLp59+mi5dutCqVSvWrVuXddy2bdvo1asXDRs25OOPP85aP27cOFq1akWrVq348MMPARg9ejR79+4lNDSUUaNGYYxh1KhRtGrViuDgYGbMmAGAw+Hg8ccfp2XLlvTv35+bb76Z2bNnA9ZQIW+++SbdunVj1qxZfPnll7Rv356QkBAGDRpEcnIyAMOHD+fRRx+le/fuNGnShIULF2bFduTIEfr06UPjxo154YUXAJg4cSLPPPNM1j5ffvklzz77rJN+2kp5ntjT5xg8fjWLth7llX7NGTMomJIlXP+V7HVdUrN748etbDuSWKjv2aJ2ef5vQMvL7vfee+8xePBgJk+ezLhx4+jVqxerVq0CYOnSpZw9e5a//vqLP/74gxEjRhAVFQXAjh07WLZsGUlJSTRt2pTHHnuMLVu2MGnSJNauXYsxho4dO9KzZ0/GjBlDVFQU4eHhAMyZM4fw8HAiIiI4ceIE7du3p0ePHqxatYoDBw4QGRnJsWPHaN68OSNGjMiK1d/fn5UrVwJw8uRJHnroIQBeeeUVJk6cyJNPPgnAgQMHWLFiBXv37qV3797s2bMHgPDwcDZv3kypUqVo2rQpTz75JHfddRetW7fm3Xffxc/Pj0mTJvHFF18U0m9BKc+2+WA8D0/eyLnzGUy8rz29m1V3dUhZnJaWRMRfRNaJSISIbBWRN+z134jIfhEJtx+h9noRkY9FZI+IbBGRts6Krahs3ryZ0NBQduzYQYsWLS7YNnToUAB69OhBYmIiCQnWHOr9+vWjVKlSVK1alerVqxMXF8fKlSu57bbbCAgIoGzZstx+++38+eefl5xv5cqVDB06FF9fX2rUqEHPnj1Zv349K1euZPDgwfj4+FCzZk169+59wXF33nln1nJUVBTdu3cnODiYKVOmsHXr1qxtQ4YMwcfHh8aNG9OwYUN27LCmCL7uuuuoUKEC/v7+tGjRgujoaAICArj22mtZuHAhO3bsIC0tjeDg4ML5wSrlweaHH+bOCWvw9/Nh7uNd3CohgHOvFFKBa40xZ0TED1iZbdLyUcaY2Rft3xdobD86Ap/bz1csP//RO0N4eDjDhw8nJiaGqlWrkpycjDGG0NBQVq9eDVzaHTPzdalSpbLW+fr6kp6enu+Gp9z2u9zxAQEBWcvDhw/nhx9+ICQkhG+++Ybly5dfEmN+YgZ48MEHefvtt2nWrBn3339/vsqglLdyOAwfLNnF/5buoUP9yowf1o7KASVdHdYlnHalYCxn7Jd+9iOvb6eBwHf2cWuAiiJSy1nxOVNoaCjh4eE0adKEbdu2ce2117Jo0SLCw8MpXbo0QFZ9/8qVK6lQoQIVKlTI9f169OjBDz/8QHJyMmfPnmXevHl0796dcuXKkZSUdMF+M2bMICMjg+PHj/PHH3/QoUMHunXrxpw5c3A4HMTFxV3wRX+xpKQkatWqRVpaGlOmTLlg26xZs3A4HOzdu5d9+/bRtGnTPH8OHTt25NChQ0ydOjXrykip4ujiBuXvH+zolgkBnNymICK+wEagEfCpMWatiDwG/EdEXgN+B0YbY1KBOsChbIfH2OtiL3rPh4GHAYKCgpwZ/lU5fvw4lSpVwsfHJ8fqo0qVKtGlSxcSExP5+uuv83yvtm3bMnz4cDp06ABY/4G3adMGgK5du9KqVSv69u3Lu+++y+rVqwkJCUFEePfdd6lZsyaDBg3i999/p1WrVjRp0oSOHTvmmoTeeustOnbsSL169QgODr4g6TRt2pSePXsSFxfH+PHj8ff3v+zPYciQIYSHh1OpUqXL7quUN4o9fY4Hv93A9thEXunXnAe6NXB5D6M8GWOc/gAqAsuAVkAtQIBSwLfAa/Y+PwHdsh3zO9Aur/dt166dudi2bdsuWeduevbsadavX1+k50xKSjLGGHPixAnTsGFDExsbW6Dj77vvPjNr1qwCn7dfv35myZIluW73hN+XUldqU/QpE/bv30zL1341S7fHuTqcLMAGk8v3apH0fzLGJADLgT7GmMxvo1RgEtDB3i0GqJvtsEDgSFHEVxz079+f0NBQunfvzquvvkrNmjWder6EhASaNGlC6dKlue6665x6LqXckbs3KOfGadVHIlINSDPGJIhIaeB64B0RqWWMiRXr+ulWIMo+ZAEwUkSmYzUwnzbGuP6ebyfIq07fXc/5zTffFGj/ihUrsmvXrqs6p1Ke6IIG5QaVGX+PezYo58aZbQq1gG/tdgUfYKYxZqGILLUThgDhwKP2/j8DNwN7gGRAu6sopTxK8vl0npsZwS9RR7kzrC5v3drKLW5IKwinJQVjzBagTQ7rr81lfwM84ax4lFLKmTyuQTkXXn1Hs1JKFQV3vkO5oDQpKKXUVZgffphRs7dQo3wppjzYkSY1yrk6pKviWZVdHiinobNfe+01lixZUuD3OnDgAFOnTi3sEJVSVyD7kNehdSsy/4luHp8QQJOC0+U0dPabb77J9ddfX+D3KuykoENlK3VlLrlD+QH3vUO5oLT6yElGjRrFokWL2L9/P507d2bv3r38/vvv3HHHHezbt4/+/ftzxx13UL9+fe677z5+/PFH0tLSmDVrFs2aNWPFihU89dRTgDXG0B9//MHo0aPZvn07oaGh3Hfffdx2220MGzaMs2fPAvDJJ5/QpUsXHA4HI0eOZMWKFTRo0ACHw8GIESOyzjdixAgWL17MyJEjSUpKYsKECZw/f55GjRoxefJkypQpw/DhwyldujQ7duwgOjqaSZMm8e2337J69Wo6duxY4C6qSnkLb2lQzo13J4VfRsPRyMJ9z5rB0DfvGdQg76Gzhw8ffsG+VatWZdOmTXz22WeMHTuWr776irFjx/Lpp5/StWtXzpw5g7+/P2PGjGHs2LFZcxkkJyfz22+/4e/vz+7duxk6dCgbNmxg7ty5hTJUdnx8PEuXLmXBggUMGDCAVatW8dVXX9G+fXvCw8MJDQ296h+nUp7EmxqUc6PVR06U19DZ2d1+++0AtGvXjgMHDgDWmEbPPvssH3/8MQkJCZQocWn+TktL46GHHiI4OJjBgwezbds2gEIbKnvAgAGICMHBwdSoUYPg4GB8fHxo2bJlVpxKFReeeodyQXn3lUI+/qN3hvwMnZ1d5tDT2YedHj16NP369ePnn3+mU6dOOTZMf/DBB9SoUYOIiAgcDkfWAHWmkIbKzozLx8fnguGxfXx8suJUytt5+h3KBaVXCk6Qn6GzL2fv3r0EBwfz4osvEhYWxo4dOy4ZKvv06dPUqlULHx8fJk+enNVwXFhDZStV3Hlzg3JuvPtKwYUuN3T25Xz44YcsW7YMX19fWrRoQd++ffHx8aFEiRKEhIQwfPhwHn/8cQYNGsSsWbPo3bt31hVAYQ2VrVRx5u0NyrmRy1U1uLOwsDCzYcOGC9Zt376d5s2buygi93HmzBnKli3LyZMn6dChA6tWrXL6yKhXQn9fyh1lb1D+39A2Xtd+ICIbjTFhOW3TKwUv1b9/fxISEjh//nyRDJWtlLfwtjuUC0qTgpdyxfDcSnmy4tagnBuvTArGmGJR9+fpPLnqUnkXbxjyurB4XVLw9/fn5MmTVKlSRRODGzPGcPLkyXzN86yUMxXXBuXceF1SCAwMJCYmhuPHj7s6FHUZ/v7+BAYGujoMVYxdcIfy8Pb0bupdDcpXwuuSgp+fHw0aNHB1GEopN1fcG5Rz43VJQSml8qINynlzWkuKiPiLyDoRiRCRrSLyhr2+gYisFZHdIjJDREra60vZr/fY2+s7KzalVPFUHO9QLihnNq+nAtcaY0KAUKCPiHQC3gE+MMY0BuKBB+z9HwDijTGNgA/s/ZRSqlDEnj7H4PGrWbT1KK/0a86YQcHFtodRXpz2EzGWM/ZLP/thgGuB2fb6b4Fb7eWB9mvs7ddJce4CoJQqNJsPxnPLJ6uIPpnMxOHtebB7w2LdwygvTk2TIuIrIuHAMeA3YC+QYIzJHGIzBqhjL9cBDgHY208DVZwZn1LK+10y5LX2MMqTUxuajTEZQKiIVATmATkNcpN5B1NOafuSu5tE5GHgYYCgoKBCilQp5W20QfnKFEmFmjEmAVgOdAIqikhmMgoEjtjLMUBdAHt7BeBUDu81wRgTZowJq1atmrNDV0p5IG1QvnLO7H1Uzb5CQERKA9cD24FlwB32bvcB8+3lBfZr7O1LjY6DoJQqoOwNyq/2b6ENygXkzOqjWsC3IuKLlXxmGmMWisg2YLqI/BvYDEy0958ITBaRPVhXCHc5MTallBfSO5SvntOSgjFmC9Amh/X7gA45rE8BBjsrHqWU9zLGMHPDIV6dv1XvUL5KekezUsqjxZ89z0tzI/l161G6XFOFT/7RVtsProImBaWUx1q5+wTPzQrn1NnzvHxzMx7s1hAfH73/4GpoUlBKeZyUtAzeW7STiSv306h6Wb4e3p6WtXOeh1wVjCYFpZRH2Xk0iaemb2bH0STu7VyPl/o2p3RJX1eH5TU0KSilPIIxhm/+OsB/f9lBef8STBrent7NtHdRYdOkoJRye8cSU3h+9hb+2HWc65pV5507WlO1bClXh+WVNCkopdza4q1HGT03kuTz6fz71lbc3TFIB7NzIk0KSim3lHw+nbcWbmPaukO0qlOeD+9sQ6PqZV0dltfTpKCUcjsRhxJ4ekY4B06e5dGe1/DsDU10qIoioklBKeU2MhyG8Sv28sFvu6herhRTH+xE52t0BP2ipElBKeUWYuKTeXZGBOsOnKJ/61r859ZgKpTxc3VYxY4mBaWUy/2w+TCv/hCFAcYNCeG2NnW0MdlFNCkopVzm9Lk0Xv0higURRwirV4kP7gylbuUyrg6rWNOkoJRyibX7TvLszAiOJqbw3A1NeKzXNZTw1cZkV9OkoJQqUufTHXy4ZBefr9hLvcplmP1oZ9oEVXJ1WMqmSUEpVWT2Hj/D09PDiTx8mjvD6vLagBYElNKvIXeivw2llNMZY5i67iD/XridUn4+jL+nHX1a1XR1WCoHmhSUUk518kwqL86JZMn2OLo3rsrYwSHUKO/v6rBULjQpKKWcZvnOY4yavYXTyWm82r8F93epr5PguDmnNfWLSF0RWSYi20Vkq4g8Za9/XUQOi0i4/bg52zEvicgeEdkpIjc5KzallHOlpGXw+oKtDJ+0nsplSjJ/ZFce6NZAE4IHcOaVQjrwnDFmk4iUAzaKyG/2tg+MMWOz7ywiLYC7gJZAbWCJiDQxxmQ4MUalVCHbdiSRp2dsZlfcGe7vWp8X+zTD308nwfEUTksKxphYINZeThKR7UCdPA4ZCEw3xqQC+0VkD9ABWO2sGJVShcfhMHy9aj/v/rqTCmX8+HZEB3o2qebqsFQBFcmdIiJSH2gDrLVXjRSRLSLytYhkdlCuAxzKdlgMOSQREXlYRDaIyIbjx487MWqlVH4dPZ3CvV+v498/badX02oserqHJgQP5fSkICJlgTnA08aYROBz4BogFOtK4v3MXXM43FyywpgJxpgwY0xYtWr6oVPK1X6JjKXPR3+wMTqeMbcH88WwdlQOKOnqsNQVcmrvIxHxw0oIU4wxcwGMMXHZtn8JLLRfxgB1sx0eCBxxZnxKqSt3JjWdNxZsZdbGGFoHVuDDO0NpWE0nwfF0TksKYg1xOBHYbowZl219Lbu9AeA2IMpeXgBMFZFxWA3NjYF1zopPKXXlNh2M55kZ4Rw6lczI3o146vrG+Om4RV7BmVcKXYFhQKSIhNvrXgaGikgoVtXQAeARAGPMVhGZCWzD6rn0hPY8Usq9pGc4+HTZXj5eupua5f2Z/nBnOjSo7OqwVCFyZu+jleTcTvBzHsf8B/iPs2JSSl25gyeTeWZmOBuj47mtTR3eGNiS8v46CY630TualVJ5MsYwZ9NhXl+wFRH46K5QBobm1btceTJNCkqpXJ1OTuPleZH8FBlLhwaVGTckhMBKOgmOS2WkQWwE+FeEqo0K/e01KSilcvTX3hM8NzOC40mpvNCnKY/0uAZfHaai6J1Phpj1cHA1RK+CmA2QlgydnoA+bxf66TQpKKUukJqewbjFu5jw5z4aVAlg3uNdCQ6s4Oqwio9z8XBwLRz8C6L/giPh4EgDBGq0gjbDoF5nqNfVKafXpKCUyrL/xFmenLaJqMOJ/KNjEK/0a06Zkvo14VRJR60v/4Orree4rYABHz+o0xY6P2ElgLodoHRFp4ejv22lFADzww/z8txISvj6MGFYO25sqZPgFDpjIH4/RNsJ4OBfcGqftc0vAOq2h14vQb0uUKcdlCz69htNCkoVc8nn03l9wVZmboghrF4lPh7ahtoVS7s6LO/gcMDx7VYCyLwaSLLv3S1dCYI6Q9gICOoCtVqDr+u7+GpSUKoY23E0kZFTN7P3+BlG9m7E09c3poTemXzlMnsGRa+yrgYOroaUBGtbudpWNVBme0DVpuDjfj9rTQpKFUOZcya/+eM2yvn7MXlER7o1rurqsDzP+WQ4vOHvK4GY9VbPIIAqjaD5AKsqqF4XqFgPxP17b2lSUKqYSUxJ46U51r0H3RtXZdyQUKqVK+XqsDzDuQQ4tPbvK4Ejm//uGVQzs2dQF6taqFwNV0d7RTQpKFWMhB9K4MlpmziSkMKLfZrxSI+GOkVmXi7oGbQa4qK4oGdQl5FWe0AR9QwqCpoUlCoGHA7DxJX7eefXHdQo78/MRzrTrl6lyx9YnBgD8Qf+7hUUfXHPoA7Q+2XrKiAwDPy8szFek4JSXu7kmVSenxXBsp3HuallDd4dFEKFMq7v5eJyDgcc32FVBWXeI3BBz6AuVs+gel2gpnv0DCoKmhSU8mKr957k6RmbiT+bxpsDWzKsUz3EAxo7nerkXlj1EWybn0PPILtR2E17BhUFTQpKeaEMh+Hj33fzv6W7qV8lgK+Ht6dl7WI+VMWRcFj5gZUMSpSClrdBg55WF1EP6RlUFDQpKOVljp5O4anpm1m7/xS3t63DWwNbEVCqmP6pGwMHVsLKcbB3KZQqD92egU6PQdnqro7OLRXTT4pS3mnZjmM8NyuClLQM3h8cwqB2ga4OyTUcDtj1i3VlELMeAqrD9a9bbQT+xfyK6TI0KSjlBc6nO3hv0Q6+/HM/zWuV55N/tOGaamVdHVbRy0iDyNmw6kOrEbliPeg3DkLvBj9/V0fnEZyWFESkLvAdUBNwABOMMR+JSGVgBlAfa47mIcaYeLFavz4CbgaSgeHGmE3Oik8pb3HwZDJPTttERMxp7u1cj5dvbo6/n6+rwypa55Nh8/fw1//g9EFriOlBE6HFreCr//sWhDN/WunAc8aYTSJSDtgoIr8Bw4HfjTFjRGQ0MBp4EegLNLYfHYHP7WelVC4WbjnCS3MiEYHx97SlT6targ6paJ2Lh/VfwZrxkHzCuoeg3/vQ+AZtOL5CTksKxphYINZeThKR7UAdYCDQy97tW2A5VlIYCHxnjDHAGhGpKCK17PdRSmWTkpbBGz9uY9q6g7QJqsjHd7WhbuViNE1m0lFY/SlsmATnk6DxTVYDcr3Oro7M4+UrKYjIU8AkIAn4CmgDjDbGLM7n8fXtY9YCNTK/6I0xsSKS2QWgDnAo22Ex9roLkoKIPAw8DBAUFJSf0yvlVXbHJTFy6mZ2xiXxaM9reO7GJvgVl5FNT+6Fvz6G8KngSIdWg6Dr09a4Q6pQ5PdKYYTdHnATUA24HytJXDYpiEhZYA7wtDEmMY8bZ3LaYC5ZYcwEYAJAWFjYJduV8lbGGGZtiOG1BVEElCzBtyM60LNJNVeHVTRit9j3GPxgjTvU5h7o8k+o3MDVkXmd/CaFzC/sm4FJxpgIycdtkSLih5UQphhj5tqr4zKrhUSkFnDMXh8D1M12eCBwJJ/xKeXVklLSeOWHKOaHH6Froyp8MCSU6uW9vDeNMdbQEyvHwZ4lULKclQg6Pe6xI5B6gvwmhY0ishhoALxkNxw78jrAThoTge3GmHHZNi0A7gPG2M/zs60fKSLTsRqYT2t7glIQGXOaJ6dt4uCpZJ6/sQmP9WqErzePbOpwwO5F1pXBobUQUA2uew3CHvCakUjdWX6TwgNAKLDPGJMsIlWwqpDy0hUYBkSKSLi97mWsZDBTRB4ADgKD7W0/Y12J7MHqknq591fKqxljmLTqAP/9ZTtVy5ZixiOdaV+/sqvDcp6MdIiaY91jcGwbVAyCm8daVUVeOiKpO8pvUvjNGHNd5gtjzEkRmQlcl9sBxpiV5NxOQE7H2b2OnshnPEp5tfiz5xk1ewtLtsdxffMavHdHayoFlHR1WM6Rds6+x+BjSDgI1VvA7V9Cy9v1HgMXyPMnLiL+QBmgqohU4u8v+fJAbSfHplSxtP7AKf45bTMnzqTyWv8W3N+1vneObHouwb7H4HPrHoPADtD3Xat7aTEdodQdXC4NPwI8jZUANvJ3UkgEPnViXEoVOxkOw+fL9/DBkt3UrVSauY91JTjQC8fpSYqDNZ/Bhq8hNREa3WDfY9BFbzhzA3kmBWPMR8BHIvKkMeZ/RRSTUsXOscQUnpkZzqo9JxkYWpt/39qKcv5eNqnLqf1WFdHmKda8xi1utZJBrdaujkxlk98Ku6MiUs6+M/kVoC3wbx2bSKmrt2LXcZ6bGc6Z1HTeHdSawWGB3lVddDTK6km0dS74lIDQf1hdS6tc4+rIVA7ymxReNcbMEpFuwE3AWHRsIqWuSlqGg/cX72L8ir00rVGOaQ91onGNcq4Oq/BEr7buMdi9GEqWhc4jofMTUK6mqyNTechvUsiwn/sBnxtj5ovI684JSSnvFxOfzD+nbWbTwQSGdgji/wa08I6RTY2xksCf4+DQGihTBa59Bdo/aM17rNxefpPCYRH5ArgeeEdESgHaPUCpK/BrVCwvzN6CMfDJP9rQv7UXdOTLSIet86xqomNboUJd6PuedY9ByWI0UJ8XyG9SGAL0AcYaYxLs4SlGOS8spbxPSloGb/+8ne9WRxMSWIH/DW1LUBUP/8JMS4Hw72HVx5AQDdWawa3jIfgO8PWyhvJiIl9Jwb6LeS9wkz0o3j3dT9YAABzRSURBVJ/5HSFVKQV7j59h5NTNbI9N5KHuDRh1UzNKlvDgi21jrMHpFv0LEg9DnTDo819o0lfvMfBwBRk6+yEgc1C770VkgnZTVery5myM4dX5UZQq4cPXw8O4tpmHD+Z2fCf8PAr2r4CawXDbeKjfXe8x8BIFGfuoozHmLICIvAOsBjQpKJWLs6npvDo/irmbDtOxQWU+uqsNNSt48MimqWfgj3etyW1KBljjEoWNAB8vaCBXWQoydHZGttcZ5D6ukVLF3paYBJ6eEc6BE2d56rrG/PO6xp47sqkxViPyon9B0hGr8fi616FsMZnLoZjJb1KYBKwVkXn261uxhsVWSmWTluHgs2V7+d/S3VQrV4opD3ai8zVVXB3WlTu+E35+Hvb/ATVbw5BvoW4HV0elnCi/Dc3jRGQ50A3rCuF+Y8xmZwamlKfZe/wMz84IJyLmNLe1qcPrt7SkQmkP7YGTmgQr3rEGqysZAP3eh3b3a1VRMZCfUVIfBRoBkcBnxpj0oghMKU/hcBgmr4nmv79sx9/Pl8/ubsvNwbVcHdaVMcaa02DxK5AUC22GwfWvQ0BVV0emisjlrhS+BdKAP4G+QHOsUVOVUkDs6XOMmrWFlXtO0LtpNd4Z1Npzp8k8tt3qVXTgT6gVAkMmQ932ro5KFbHLJYUWxphgABGZCKxzfkhKuT9jDPPDj/Dq/CgyHIa3bwtmaIe6njmQXWoSLB8Da8dbYxT1GwfthmtVUTF1uaSQlrlgjEn3yA+8UoUs/ux5Xvkhip8iY2lXrxLjhoRQr0qAq8MquIuritrea/UqCvDghnF11S6XFEJEJNFeFqC0/VqwZtAsn9uBIvI10B84ZoxpZa97HesmuOP2bi8bY362t72EdT9EBvBPY8yiKyuSUs6zbOcxXpy9hfjk84y6qSmP9rzGM7uaXlBVFAp3fg+BYa6OSrmBy02yczXXj98AnwDfXbT+A2PM2OwrRKQFcBfQEmuWtyUi0sQYk4FSbuBsajpv/7ydKWsP0rRGOSbd356WtT1wVrSURKtXUWZVUf8PoO19WlWksjhtVmxjzB8iUj+fuw8EphtjUoH9IrIH6IB117RSLrUxOp5nZ4Zz8FQyD/doyLM3NPG8Ya6NgcjZVlXRmTi7quj/tKpIXcJpSSEPI0XkXmAD8JwxJh6oA6zJtk+Mve4SIvIw8DBAUFCQk0NVxdn5dAcf/b6Lz5fvpXbF0kx/qBMdG3rgl2jcNquqKHol1G4Dd02FwHaujkq5qaJOCp8DbwHGfn4fGEHOQ2aYnN7AGDMBmAAQFhaW4z5KXa2dR5N4ZkY422ITuTOsLq/0b+55cyanJP7dq8i/PPT/0LpC0KoilYciTQrGmLjMZRH5Elhov4wB6mbbNRA4UoShKQVAhsPw9cr9vLdoJ+VLl+DLe8O4oYWHjWpqDETOsquKjkG7+6yqojKVXR2Z8gBFmhREpJYxJtZ+eRsQZS8vAKaKyDishubG6D0RqogdOpXMc7MiWLf/FDe2qMF/bw+mStlSrg6rYOK22lVFq6B2Wxg6DepoVZHKP6clBRGZBvQCqopIDPB/QC8RCcWqGjoAPAJgjNkqIjOBbUA68IT2PFJFxRjDrI0xvPnjNgDGDg5hUNs6nnUjWsppu6roC6uqaMBH0OZenfBGFZgY47nV8mFhYWbDhg2uDkN5sBNnUnlpbiS/bYujU8PKjB0cQmAlD5oi0xjYMtOqKjp73LoT+brXtKpI5UlENhpjcrwxxRW9j5RyC4u3HuWluZEkpabzSr/mjOjaAB9PuhHtaJRVVXTwL6uK6B8zoE5bV0elPJwmBVXsJKWk8eaP25i1MYaWtcsz7c5QmtQo5+qw8i/lNCz7L6ybAP4VYMDH1mimWlWkCoEmBVWsrNl3kudmRhB7+hwjezfin9c1pmQJD/kyNQYipsNvr1lVRWH3w7WvalWRKlSaFFSxkJKWwfuLd/LVyv3Uq1yG2Y91oW1QJVeHlX9HI+2qotVQJ0yripTTaFJQXi/q8GmenRnOrrgz3NMpiJdvbk6Zkh7y0T+XAMvtqqLSleCW/0HoPVpVpJzGQ/4ylCq49AwHX/yxjw+X7KJSmZJ8O6IDPZt4yGTzDgdsyawqOgFhI+DaV7SqSDmdJgXllQ6cOMuzM8PZdDCB/q1r8e9bW1GxTElXh5U/RyPhp+fh0BqrqujuWdaYRUoVAU0KyqsYY5iy9iD/+Wk7fr7CR3eFMjA0x7EV3c+5BFj2Nqz/0q4q+gRC79aqIlWkNCkor3EsMYUX5mxh+c7jdG9clffuCKFmBQ+YLzn9PETOhCWvQ/JJCHsArv2XlRiUKmKaFJRXWLjlCK/8EEVKWgZvDmzJsE713HuYCmPgyGari2nUbCsZBHaAe+ZArRBXR6eKMU0KyqOdTk7jtQVRzA8/QkjdinwwJISG1cq6OqzcnY6xhqWImA4ndoJvKWh2M4QMhUY3aFWRcjlNCspj/bn7OKNmbeHEmVSevaEJj/e6hhK+bvilmnoGtv8IEdNg/x+AgaDO1qB1LW6F0hVdHaFSWTQpKI9z7nwGY37Zzrero2lUvSxf3htGcKCbzZfsyLASQMR02L4A0pKhUn3oNRpaD4HKDV0doVI50qSgPEr4oQSenRHOvhNnGdG1AS/0aepe8yUf22FdEWyZCUlHoFQFKwmEDIW6HcGd2zmUQpOC8hBpGQ7+t3QPny7bQ41ypZj6YEe6NKrq6rAsZ09A5GwrGcSGg/hC4xugz9vQpC/4eUAPKKVsmhSU29tzLIlnZkQQefg0t7etw/8NaEmF0i6eLzk9FXb+YlUP7fkNHOlWr6E+Y6DVHVDWQ+6cVuoimhSU23I4DN/8dYB3ft1BmZK+fH53W/oG13JdQMZAzHrriiBqjjWEdbla0PkJaH0X1GjhutiUKiSaFJRbOpJwjlGzI1i15yTXNavOfwcFU72ci6ph4g/Y3Uinwal94FcGmg+AkLugQU/wcaM2DaWukjPnaP4a6A8cM8a0stdVBmYA9bHmaB5ijIkX6y6jj4CbgWRguDFmk7NiU+7LGMO8zYf5vwVbcTgMY24P5s72dYv+RrSU07BtvlU9FL3KWle/O3R/HlrcAqU8aFIepQrAmVcK3wCfAN9lWzca+N0YM0ZERtuvXwT6Ao3tR0fgc/tZFSPHk1J5eZ41X3JYvUqMGxJKUJUinC85Ix32LbOuCHb8BOkpUKWxNZFN6yFQMajoYlHKRZyWFIwxf4hI/YtWDwR62cvfAsuxksJA4DtjjAHWiEhFEalljIl1VnzKvSzccoRXf4ji7PkM/nVzc0Z0a4BvUc2XfDTKSgSRs+BMnDXmUJthVjfSOm21G6kqVoq6TaFG5he9MSZWRKrb6+sAh7LtF2OvuyQpiMjDwMMAQUH6n5unO3X2PK/Oj+KnLbGEBFbg/SEhNKpeBFUzSXFWEoiYDnGR4OMHTW6yEkHjG6GEhwyzrVQhc5eG5pz+FTM57WiMmQBMAAgLC8txH+UZFm89ysvzIjl9Lo1RNzXlkR4NnTtMRdo5q1ooYjrs/R2Mw5qv4Oax0GqQTmCjFEWfFOIyq4VEpBZwzF4fA9TNtl8gcKSIY1NF5HRyGm/8uJW5mw/TsnZ5vn+wI81qlnfOyRwOa17jiGlWw3FqIlSoC92etXoPVW3snPMq5aGKOiksAO4DxtjP87OtHyki07EamE9re4J3WrbzGKPnbOHkmfM8dV1jRl7bCD9nXB2c3GtdEWyZDgkHoWRZa/C5kLugXlcdjVSpXDizS+o0rEblqiISA/wfVjKYKSIPAAeBwfbuP2N1R92D1SX1fmfFpVwjKSWNfy/czowNh2hSoywT72tPqzqFPIhd8inYOs9KBjHrQHygYW+49jVo1g9KFmFPJqU8lDN7Hw3NZdN1OexrgCecFYtyrVV7TvDC7C3Enj7HY72u4enrG1OqRCHd8JWRBnuWQPhU2PUrZJyHas3hhjcheAiUd+Ed0Ep5IHdpaFZe6GxqOmN+2cHkNdE0rBbA7Me60DaokKaYjNsG4VNgyww4exwCqkH7B63qoZqttRupUldIk4JyinX7T/H8rAgOxSfzQLcGjLqpEIa4PhdvjUYaPsWaytLHD5r2sSa3b3Q9+Lp4kDylvIAmBVWoUtIyeG/RTr5etZ+6lcow4+HOdGhwFV09HRmwd5mVCHb8BBmpUCPYGo00eAgEVCm84JVSmhRU4dl8MJ7nZkWw7/hZhnWqx+i+zQgodYUfsRN7rEQQMd2arKZ0ZQi737oqqNW6cANXSmXRpKCuWmp6Bh8u2c0XK/ZSq0JppjzYka5XMgFOapLVe2jzFDi0xuo91OgG6DsGmvSBEqUKP3il1AU0KairEhlzmudmhbMr7gx3ta/Lv/o1p5x/Aer2HQ6IXmklgsy5jKs2gevfsBqNy9V0XvBKqUtoUlBX5Hy6g0+WWdNjVi1bkkn3t6d30+qXPzBTfLR1l3H4VEiIhlLlofWd0OYeqNNOew8p5SKaFFSBbY9N5LmZEWyLTeT2Nvb0mGXycXVwPhm2/wjh38P+PwCBhj2toamb9we/0k6PXSmVN00KKt/SMxyMX7GXj37fTYXSfkwY1o4bW16mescYOLTOSgRR8+B8ElSqD73/ZY1IWrFu3scrpYqUJgWVL3uOJfHczAgiYk7Tr3Ut3hrYisoBeQwvnRj7d/XQyd3gFwAtb7V6D9XrotVDSrkpTQoqTxkOw8SV+xi7eBcBJX355B9t6N+6ds47p6da9xKET/17aOqgLtDtaWswulJlizZ4pVSBaVJQudp/4iyjZkWwITqeG1rU4O3bgqlW7qJuocZAbLjVeyhyFqQkQPlA6P6cVT1U5RrXBK+UuiKaFNQlHA7Dd6sPMObXHZT09eGDO0O4NbQOkr3K58xxiJxpJYNjW8G3FDQfAG3uhgY9waeQBrxTShUpTQrqAodOJfPC7C2s3neSXk2rMeb21tSs4G9tzEiD3YutRLB7ETjSre6j/cZZM5eVruja4JVSV02TggLAGMO0dYf4z0/bEBHeGRTMkLC61tXBJSOSVodOj0HoPVC9matDV0oVIk0KitjT53hxTiR/7DpO10ZVeGdQawJLpcD6r2Dz91abQdaIpPfYI5LqR0cpb6R/2cWYMYbZG2N4c+E20jMM/76lGf+oug+fJY/bI5Ke1xFJlSpmNCkUU8cSU3h5XiRLth9jYGAyb9aLoMLqZ7KNSDpCRyRVqhjSpFDMGGNYEHGEd+evp1f6KtbWWEuNExFwUkckVUq5KCmIyAEgCcgA0o0xYSJSGZgB1AcOAEOMMfGuiM9bnUxK4btp3xN0aB5LfddTyjcV/HREUqXU31x5pdDbGHMi2+vRwO/GmDEiMtp+/aJrQvM+K1ctp/RvL/EM20gtGYBfyFBoO0xHJFVKXcCdqo8GAr3s5W+B5WhSuGoJ8ScI/+5Fup2aS7JPAEe7/ZeaPe7XEUmVUjlyVVIwwGIRMcAXxpgJQA1jTCyAMSZWRHIcnF9EHgYeBggKCiqqeD2PMWxd9CU11/yHHuY0W2vdRrO736N8uSuYEU0pVWy4Kil0NcYcsb/4fxORHfk90E4gEwDCwsKMswL0ZGcOhnNs+pO0TN7CDt8mxA+YTHBoD1eHpZTyAC5JCsaYI/bzMRGZB3QA4kSkln2VUAs45orYPFrKaWLmvUbNnd9R0ZTh12tepvfQZynlV4DpMZVSxZpPUZ9QRAJEpFzmMnAjEAUsAO6zd7sPmF/UsXksY0jZ8D1JY0OpveNbfvK7icPDVtHn3hc1ISilCsQVVwo1gHn2iJslgKnGmF9FZD0wU0QeAA4Cg10Qm+c5Gkni3Kcpf2wD2x2N2NTqfe6+bSD+fjpKqVKq4Io8KRhj9gEhOaw/CVxX1PF4qvSz8Rye9yp190whzQTwTqknuHboszzQQBuSlVJXzp26pKp8OJ54joiF42m76wPqmkTm+d7EqY6jeLJ3KGVK6q9TKXV19FvEAxhj2BAdz9JlS7h+/3tc77OL3SWbs6PHJAZ27k0J3yJvGlJKeSlNCm7sbGo6P4QfZu6qrQw4NYnnS/xGSskKHOsxjsbd7qexjyYDpVTh0qTghvYcO8P3a6KZu/EgN6YvY2LJ6ZQvkURGuxEEXP8qAaUruTpEpZSX0qTgJtIzHCzZHsd3q6P5a+9JQnyjmV/+exqc24qp0wHpNxafWpe0zyulVKHSpOBixxJTmL7+EFPXHuRoYgrNKmSwoOGPBMfORqQyDPwMCRkKWlWklCoCmhRcwBjDuv2n+G5NNIuijpLuMPRoVJmJITtpsfV9JPYUtH8Ier8MpSu6OlylVDGiSaEInUlNZ97mw3y/OpqdcUmU9y/B8C71ub/haer89SqsXw91O8HN7+mMZ0opl9CkUAR2xyUxeU00czcd5kxqOq3qlOfdQa0Z0KQMpVe+DTMnQkBVuHW8NdmNzm+glHIRTQpOkpbhYPHWOCavOcCafaco6etD/9a1GNa5HqGB5ZHwKfDF63AuHjo+Ar1e0qoipZTLaVIoZHGJKUxde5Bp6w5yLCmVOhVL82KfZgwJC6RK2VJwZDNMfB4Ob4CgzlZVUc1gV4etlFKAJoVCYYxhzb5TTF5zgEVb43AYQ88m1fhvp3r0alodXx+B5FOwcDRsmAQB1eC2L6D1nVpVpJRyK5oUrkJSShrzNh9m8upodh87Q8UyfjzQrQF3dwyiXpUAayeHAzZ+B0teh5TT0PFR6P0S+FdwaexKKZUTTQpXYOfRJL5bfYB5mw+TfD6D1oEVeO+O1gwIqX3hkNWHN8HPz8PhjRDUxa4qauWyuJVS6nI0KeTT+XQHi7YeZfLqaNYdOEXJEj4MaF2bezvXI6TuRQ3Eyafg9zdh4zdQtjrc/iUED9aqIqWU29OkcBmxp88xbe1Bpq0/xPGkVOpWLs1LfZsxJKwulQJKXrizwwGbv4Mlb1hVRZ0eh16jwb+8a4JXSqkC0qSQA2MMf+09yeTV0fy23Wo47t20OsM61aNnk2r4+OTwH//hjfDT83BkE9TralUV1WhZ9MErpdRV0KSQTWJKGnM2xjB5TTT7jp+lUhk/HuzegLs71COoSpmcD0o+Bb+/ARu/tauKvoLgO7SqSCnlkdwuKYhIH+AjwBf4yhgzxtnn3HYkkclrovlh82HOpWUQWrci7w8OoV/rWrnPdezIgE3fWm0HKYnQ+Qno+aJWFSmlPJpbJQUR8QU+BW4AYoD1IrLAGLOtsM+Vmp7Br1FWw/GG6HhKlfDhlpDa3Nu5PsGBl+kuGrMRfn7OuhGtXje7qqhFYYeolFJFzq2SAtAB2GOM2QcgItOBgUChJoWlO+J4YfYWTpw5T70qZXilX3PuaBdIxTIlL3/won/B6k+hbA0YNBFaDdKqIqWU13C3pFAHOJTtdQzQMfsOIvIw8DBAUFDQFZ0kqHIAIYEVGda5Hj0a59JwnJtK9bWqSCnltdwtKeT07WwueGHMBGACQFhYmMlh/8tqVL0sE4e3v5JDocNDV3acUkp5AHebzisGqJvtdSBwxEWxKKVUseNuSWE90FhEGohISeAuYIGLY1JKqWLDraqPjDHpIjISWITVJfVrY8xWF4ellFLFhlslBQBjzM/Az66OQymliiN3qz5SSinlQpoUlFJKZdGkoJRSKosmBaWUUlnEmCu6/8stiMhxINrVcRSCqsAJVwdRSLQs7knL4p5cVZZ6xphqOW3w6KTgLURkgzEmzNVxFAYti3vSsrgndyyLVh8ppZTKoklBKaVUFk0K7mGCqwMoRFoW96RlcU9uVxZtU1BKKZVFrxSUUkpl0aSglFIqiyaFAhCRr0XkmIhEZVv3uogcFpFw+3FzLsceEJFIe58N2da/JyI7RGSLiMwTkYo5HOsvIutEJEJEtorIG9m2NRCRtSKyW0Rm2EOOu21Zsu3rKyKbRWShJ5clj+Mri8hvdll+E5FK+SmLE8vzll2WcBFZLCK1czn+Pjvm3SJyX7b17ez33SMiH4vkbw5aF5flVxFJyP4Zs9e70+fssmURkVARWS3W3/4WEbnzasuSJ2OMPvL5AHoAbYGobOteB57Px7EHgKo5rL8RKGEvvwO8k8M+ApS1l/2AtUAn+/VM4C57eTzwmDuXJdu+zwJTgYXZ1nlcWfI4/l1gtL08Oq+fRRGVp3y25X8C43PYpzKwz36uZC9XsretAzrbn8VfgL7uXBZ723XAgOyfMTf8nOXn99IEaGwv1wZigYpXU5a8HnqlUADGmD+AU4X8nouNMen2yzVYs81dvI8xxpyxX/rZD2P/t3YtMNve9i1waz7P65KyAIhIINAP+CrbOo8sSx4GYpUBClAW+9zOKE9itpcBXDTNre0m4DdjzCljTDzwG9BHRGphfXmtNta3z3e49neTn7JgjPkdSMq+zg0/Z5ctizFmlzFmt718BDgGVLuasuRFk0LhGGlf1n2dWU0gIrVFJPu8EAZYLCIbReThXN5nBNZ/YZccL1Z1SzjWB+I3Y8xaoAqQkO3LKwao4+5lAT4EXgAc2dZ5allyO76GMSYWwH6ufpVlueryiMh/ROQQcDfwmr0uTEQyk3Md4FC2QzJ/B3Xs5YvXu3NZcuN2n7OClEVEOgAlgb1OKotWHxX0AdTnwsvHGlizxPkA/8GaLS6n42rbz9WBCKDHRdv/BczD7iacx/krAsuAVkA1YE+2bXWBSHcuC9Af+Mxe7oV9ae+JZcnreKw/1uz7xbvD58ze9hLwRg7rRwGvZHv9KvAc0B5Ykm19d+BHdy5Ltu1ZnzF3/pzlsyy1gJ38XXV8VWXJ7aFXClfJGBNnjMkwxjiAL4EOuex3xH4+hvUlk7WfWA16/YG7jf3bzeN8CcByoA/WQFoVRSRzBr1A4Iibl6UrcIuIHACmA9eKyPceWpa8jo+zq12wn49daVkKqzzZTAUG5bA+BuuLJVPm7yCGC6vPXP67ySa3suTG7T5n2eRaFhEpD/yElbTXOKMsmTQpXKXMP3zbbUBUDvsEiEi5zGWsRswo+3Uf4EXgFmNMci7nqCZ27xcRKQ1cD+ywv6iWAXfYu94HzHfnshhjXjLGBBpj6gN3AUuNMfd4YlnyOh5YYJfhqstSSOVpnG3XW4AdOZxmEXCjiFSyq0FuBBYZq/orSUQ62fXY915NeYqoLDlyw8/ZZcsiVo+iecB3xphZzipLlqu91ChOD2AaVst/GtZ/Tw8Ak4FIYAvWF0Ete9/awM/2ckOsS8YIYCvwr2zvuQerHjfcfozP4fjWwGb7HFHAa9mOb4jVM2QPMAso5c5luSiGXlx4ae9RZbnM8VWA34Hd9nNlF3/O5tifnS3Aj0Ade30Y8FW2/UbYZd8D3J9tfZh9/F7gEy5TzekmZfkTOA6cs899kxt+zi5bFuAe+5zh2R6hV1OWvB46zIVSSqksWn2klFIqiyYFpZRSWTQpKKWUyqJJQSmlVBZNCkoppbKUuPwuSikRyexiClATyMDq7giQbIzp4pLAlCpk2iVVqQISkdeBM8aYsa6ORanCptVHSl0lETljP/cSkRUiMlNEdonIGBG5W6y5MCJF5Bp7v2oiMkdE1tuPrq4tgVJ/06SgVOEKAZ4CgoFhQBNjTAesYcKftPf5CPjAGNMea6yby43sqVSR0TYFpQrXemMPmS0ie4HF9vpIoLe9fD3QQv6evKy8iJQzxlww9r9SrqBJQanClZpt2ZHttYO//958gM7GmHNFGZhS+aHVR0oVvcXAyMwXIhLqwliUuoAmBaWK3j+BMHu2rm3Ao64OSKlM2iVVKaVUFr1SUEoplUWTglJKqSyaFJRSSmXRpKCUUiqLJgWllFJZNCkopZTKoklBKaVUlv8H+stnB+Zx2cwAAAAASUVORK5CYII=\n",
113 |       "text/plain": [
114 |        "<Figure size 432x288 with 1 Axes>"
115 |       ]
116 |      },
117 |      "metadata": {
118 |       "needs_background": "light"
119 |      },
120 |      "output_type": "display_data"
121 |     }
122 |    ],
123 |    "source": [
124 |     "from pandas.plotting import register_matplotlib_converters\n",
125 |     "plt.plot(photo_df['time'], photo_df['posts'], label='#photography')\n",
126 |     "plt.plot(insta_df['time'], insta_df['posts'], label='#instagram')\n",
127 |     "plt.xlabel('Time')\n",
128 |     "plt.ylabel('Posts')\n",
129 |     "plt.title('Time vs. Posts')\n",
130 |     "plt.legend(loc=\"upper left\")\n",
131 |     "plt.show()"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "And that's it! This is just a super small sampling of data and a single usecase of instascrape.Hashtag. If we wanted, we could focus on one hashtag and run a program for 24 hours straight to find the best time of day to post to that hashtag. We could compare 100 different hashtags and see which one's are growing the fastest. There are a ton of possibilities and instascrape.Hashtag is just a simple tool for abstracting out the actual scraping of the data so that you can focus on your algorithms and not on sifting through the HTML and JSON data!"
139 |    ]
140 |   }
141 |  ],
142 |  "metadata": {
143 |   "kernelspec": {
144 |    "display_name": "Python 3.7.4 64-bit ('Chris': virtualenv)",
145 |    "language": "python",
146 |    "name": "python37464bitchrisvirtualenv7257ff887f2f42e49b4f10d8b8979f3e"
147 |   },
148 |   "language_info": {
149 |    "codemirror_mode": {
150 |     "name": "ipython",
151 |     "version": 3
152 |    },
153 |    "file_extension": ".py",
154 |    "mimetype": "text/x-python",
155 |    "name": "python",
156 |    "nbconvert_exporter": "python",
157 |    "pygments_lexer": "ipython3",
158 |    "version": "3.7.4"
159 |   }
160 |  },
161 |  "nbformat": 4,
162 |  "nbformat_minor": 2
163 | }
164 | 


--------------------------------------------------------------------------------
/tutorial/tutorial/Part 1 - Intro to the API.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Part 1 - Intro to the API\n",
  8 |     "\n",
  9 |     "In this section, we're going to get a quick feel for the API so you can get back to doing what you do best: programming. "
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Importing the library\n",
 17 |     "\n",
 18 |     "Most of what you'll need from the API is exposed by simply calling:"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import instascrape "
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "## The three pillars of instascrape"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "instascrape's API is designed in such a way such that the scrapers also double as semantic representations of what it is they're scraping (this will make more sense in a second). The three workhorse scrapers you'll mostly be dealing with are the `Post`, `Profile`, and `Hashtag` objects. Let's take a look at how these work:"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "from instascrape import Post, Profile, Hashtag\n",
 51 |     "\n",
 52 |     "google_post = Post('CGQG3-hlcNQ')\n",
 53 |     "google = Profile('google')\n",
 54 |     "google_hashtag = Hashtag('google')"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "Awesome! We can see almost immediately what our intentions are with this script: \n",
 62 |     "- scrape one post\n",
 63 |     "- one profile\n",
 64 |     "- and one hashtag. \n",
 65 |     "\n",
 66 |     "Before we actually scrape our data though, let's build some contextual background knowledge on the API. \n",
 67 |     "\n",
 68 |     "Under the hood, `Post`, `Profile`, and `Hashtag` are sibling subclasses. Thus, once you're familiar with the methods and expected behaviors of one object, this will mostly translate to the others as well. There are only a handful of methods that are obvious and specific to their respective subclass as we will explore in later sections. \n"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "## Scraping the data \n",
 76 |     "\n",
 77 |     "Let's now scrape some data!"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 3,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "google_post.scrape()\n",
 87 |     "google.scrape()\n",
 88 |     "google_hashtag.scrape()"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "And that's it! No, seriously. That's it. To access the scraped data now, we have a couple options. As we discussed earlier, the scrapers also serve as semantic representations of what it is they scraped, let's take a look at what this means in practice:"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 4,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "Google has 12,276,851 followers.\n",
108 |       "Google is verified\n",
109 |       "Google is a business account\n",
110 |       "Google is following 31 accounts\n",
111 |       "Google has 1,439 posts\n"
112 |      ]
113 |     }
114 |    ],
115 |    "source": [
116 |     "# instascrape supports dot notation\n",
117 |     "print(f\"Google has {google.followers:,} followers.\")\n",
118 |     "if google.is_verified: \n",
119 |     "    print(\"Google is verified\")\n",
120 |     "if google.is_business_account:\n",
121 |     "    print(\"Google is a business account\")\n",
122 |     "\n",
123 |     "# instascrape also supports bracket notation\n",
124 |     "print(f\"Google is following {google['following']} accounts\")\n",
125 |     "print(f\"Google has {google['posts']:,} posts\")"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "Concise and expressive syntax is what it's all about. Each subclass has their own unique attributes relevant to what it is they scraped. A `Profile` is obviously not going to have an amount of `likes` and a `Post` is not going to have `followers`. \n",
133 |     "\n",
134 |     "OKAY, now you're about to see a wall of code output. **_DO NOT PANIC_**. EVERYTHING IS FINE. OKAY? Let's take a peak at what the `Post` object offers us after scraping. "
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 5,
140 |    "metadata": {},
141 |    "outputs": [
142 |     {
143 |      "name": "stdout",
144 |      "output_type": "stream",
145 |      "text": [
146 |       "{'csrf_token': 'aY0TLjFUrEAd4RvROeiaWYisEvouQU5E', 'viewer': None, 'viewer_id': None, 'country_code': 'US', 'language_code': 'en', 'locale': 'en_US', 'device_id': '8944C850-29C8-4AF7-B5E4-FF5423F02E1B', 'browser_push_pub_key': 'BIBn3E_rWTci8Xn6P9Xj3btShT85Wdtne0LtwNUyRQ5XjFNkuTq9j4MPAVLvAFhXrUU1A9UxyxBA7YIOjqDIDHI', 'key_id': '238', 'public_key': '68cfdea6eca263a4ace2853a8630039dc43a3a9febaa9e67822793196b475744', 'version': '10', 'is_dev': False, 'rollout_hash': '9fcc62e59353', 'bundle_variant': 'metro', 'frontend_dev': 'prod', 'id': '2418463234883699536', 'shortcode': 'CGQG3-hlcNQ', 'height': 1333, 'width': 750, 'gating_info': None, 'fact_check_overall_rating': None, 'fact_check_information': None, 'sensitivity_friction_info': None, 'media_overlay_info': None, 'media_preview': None, 'display_url': 'https://scontent-iad3-1.cdninstagram.com/v/t51.2885-15/e35/121225391_196671788493786_8291234317057247918_n.jpg?_nc_ht=scontent-iad3-1.cdninstagram.com&_nc_cat=106&_nc_ohc=lOyn-AmAha4AX8pZzcy&tp=1&oh=566c3450f4c31e47999ac0e26189baee&oe=5FCA03F1', 'accessibility_caption': None, 'is_video': True, 'tracking_token': 'eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FuYWx5dGljc190cmFja2VkIjp0cnVlLCJ1dWlkIjoiYjU2ODA2MTQyN2U0NGQ2NDk1NTAzMzQ2ZjBjY2UxNzUyNDE4NDYzMjM0ODgzNjk5NTM2In0sInNpZ25hdHVyZSI6IiJ9', 'tagged_users': [], 'caption': 'Sometimes the best moments in life are the ones where we receive little acts of kindness. Hit ▶️ on kindness this #NationalBullyingPreventionMonth with Interland, the online #BeInternetAwesome video game that shows #ItsCoolToBeKind. g.co/KindKingdom. #MySuperG', 'caption_is_edited': False, 'has_ranked_comments': False, 'comments': 215, 'comments_disabled': False, 'commenting_disabled_for_viewer': False, 'timestamp': 1602523317, 'likes': 17534, 'location': nan, 'viewer_has_liked': False, 'viewer_has_saved': False, 'viewer_has_saved_to_collection': False, 'viewer_in_photo_of_you': False, 'viewer_can_reshare': True, 'video_url': 'https://scontent-iad3-1.cdninstagram.com/v/t50.2886-16/121217884_199359144934914_151140672758339747_n.mp4?_nc_ht=scontent-iad3-1.cdninstagram.com&_nc_cat=106&_nc_ohc=ahcCSz9jx9AAX_rbC46&oe=5FC9F317&oh=2458a0cc02181c839f411c49bc97eb2c', 'has_audio': True, 'video_view_count': 88030, 'upload_date': datetime.datetime(2020, 10, 12, 13, 21, 57), 'hashtags': ['NationalBullyingPreventionMonth', 'BeInternetAwesome', 'ItsCoolToBeKind', 'MySuperG']}\n"
147 |      ]
148 |     }
149 |    ],
150 |    "source": [
151 |     "print(google_post.to_dict())"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "That there's 49 data points. Not bad for essentially three lines of code! I'll leave examining `Profile` and `Hashtag`'s available attributes as an exercise for the reader (the syntax is exactly the same).    "
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {},
164 |    "source": [
165 |     "## Alright, that's kinda neat but what else can instascrape handle?\n",
166 |     "\n",
167 |     "While being able to pass just a shortcode/username/hashtag is pretty to look at, sometimes we'll want to scrape from a different source. instascrape puts the power in your hands with how you want to use it. \n",
168 |     "\n",
169 |     "Earlier, when we instantiated `google = Profile('google')` and called `google.scrape()`, we kicked off an assembly line that \n",
170 |     "- creates the full URL for Google's Instagram profile \n",
171 |     "- requests HTML from that URL \n",
172 |     "- creates BeautifulSoup out of the HTML\n",
173 |     "- parses the JSON data from the BeautifulSoup\n",
174 |     "- converts the serialized JSON into a Python `dict`\n",
175 |     "\n",
176 |     "and finally parses data from that `dict` into the user friendly instance attributes that you will use. Internally, you can bypass any of these steps by passing an expected format in as an argument. \n",
177 |     "\n",
178 |     "This means that instascrape can scrape\n",
179 |     "- shortcode/username/hashtag\n",
180 |     "- full URL \n",
181 |     "- HTML \n",
182 |     "- BeautifulSoup\n",
183 |     "- serialized JSON \n",
184 |     "- JSON dictionary \n",
185 |     "\n",
186 |     "assuming it comes from a valid Instagram page. \n",
187 |     "\n",
188 |     "In practice, this allows you to do something like:"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 6,
194 |    "metadata": {},
195 |    "outputs": [
196 |     {
197 |      "name": "stdout",
198 |      "output_type": "stream",
199 |      "text": [
200 |       "Google has 6 highlight reels.\n"
201 |      ]
202 |     }
203 |    ],
204 |    "source": [
205 |     "import requests\n",
206 |     "from instascrape import Profile\n",
207 |     "\n",
208 |     "resp = requests.get(\"https://www.instagram.com/google/\")\n",
209 |     "source_html = resp.text\n",
210 |     "google = instascrape.Profile(source_html)\n",
211 |     "google.scrape()\n",
212 |     "print(f\"Google has {google.highlight_reel_count} highlight reels.\")"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "_Flexibility is key_. Some people want to autonomously scrape their business' Instagram once a day and log follower growth over time. Other people are asynchrounously scraping hundreds of posts a second from rotating proxy servers and don't want the restriction of sequential HTTP requests. instascrape hopefully has something for everyone <3"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "metadata": {},
225 |    "source": [
226 |     "# Wrapping up \n",
227 |     "\n",
228 |     "If you understood most of this, then you're ready to get out there and start scraping some data! There is of course more to explore that we will do in later tutorials but for now, having a solid grasp of the API will certainly give you a legup in using instascrape effectively. "
229 |    ]
230 |   }
231 |  ],
232 |  "metadata": {
233 |   "kernelspec": {
234 |    "display_name": "Python 3",
235 |    "language": "python",
236 |    "name": "python3"
237 |   },
238 |   "language_info": {
239 |    "codemirror_mode": {
240 |     "name": "ipython",
241 |     "version": 3
242 |    },
243 |    "file_extension": ".py",
244 |    "mimetype": "text/x-python",
245 |    "name": "python",
246 |    "nbconvert_exporter": "python",
247 |    "pygments_lexer": "ipython3",
248 |    "version": "3.7.4"
249 |   }
250 |  },
251 |  "nbformat": 4,
252 |  "nbformat_minor": 2
253 | }
254 | 


--------------------------------------------------------------------------------