├── .flake8 ├── .gitattributes ├── .github ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── codeql-analysis.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── docs ├── Makefile ├── conf.py ├── index.rst ├── instascrape.core.rst ├── instascrape.exceptions.rst ├── instascrape.rst ├── instascrape.scrapers.rst ├── make.bat └── modules.rst ├── instascrape ├── __init__.py ├── core │ ├── __init__.py │ ├── _mappings.py │ ├── _static_scraper.py │ └── json_algos.py ├── exceptions │ ├── __init__.py │ └── exceptions.py └── scrapers │ ├── __init__.py │ ├── comment.py │ ├── hashtag.py │ ├── igtv.py │ ├── location.py │ ├── post.py │ ├── profile.py │ ├── reel.py │ └── scrape_tools.py ├── media ├── 6x6scatter_matrix.png ├── instascrape.gif ├── likes_heatmap.png ├── logo.png ├── logopic.png ├── realpython.png ├── scatter_matrix.png └── techprofiles.gif ├── pypi.bash ├── pyproject.toml ├── pytest.ini ├── requirements.txt ├── setup.py ├── tests ├── __init__.py └── scrapers │ ├── __init__.py │ ├── test_hashtag.py │ ├── test_igtv.py │ ├── test_location.py │ ├── test_post.py │ ├── test_profile.py │ └── test_reel.py └── tutorial ├── examples ├── DonaldTrump │ ├── Donald Trump.ipynb │ ├── donald_trump.csv │ └── plots │ │ ├── comments_per_post.png │ │ ├── hashtags.png │ │ ├── likes_per_post.png │ │ ├── likes_vs_comments.png │ │ ├── locations.png │ │ ├── views_and_likes_per_view.png │ │ └── views_per_video.png ├── JoeBiden │ ├── joebiden.csv │ ├── joebiden.png │ ├── joebiden.py │ └── joebiden_urls.txt ├── README.md ├── download_recent_photos │ ├── 2020-09-08 09h06m.png │ ├── 2020-09-09 10h24m.png │ ├── 2020-09-14 10h05m.png │ ├── 2020-09-17 17h49m.png │ ├── 2020-09-24 11h01m.png │ ├── 2020-09-25 10h18m.png │ ├── 2020-09-26 11h38m.png │ ├── 2020-09-27 09h27m.png │ ├── 2020-09-28 12h17m.png │ ├── 2020-10-14 12h36m.png │ ├── 2020-10-15 13h11m.png │ ├── 2020-10-16 14h39m.png │ └── download_recent_photos.ipynb ├── max_liked_post.ipynb └── simple_hashtag_comparison │ └── simple_hashtag_comparison.ipynb └── tutorial ├── Part 0 - Orientation.ipynb └── Part 1 - Intro to the API.ipynb /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = media,docs 3 | ignore=E402,F401,F403,F405,F821 4 | max-line-length=120 -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | tutorial/** linguist-language=Python 2 | tutorial/examples/** linguist-language=Python 3 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at chris@christophergreening.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to instascrape 2 | We love developers and want to hear your input! Contributing to this project should be as easy and transparent as possible, whether it's: 3 | 4 | - Reporting a bug 5 | - Discussing the current state of the code 6 | - Submitting a fix 7 | - Proposing new features 8 | - Becoming a maintainer 9 | - etc. 10 | 11 | ## We develop with GitHub 12 | We use GitHub to host code, track issues and feature requests, as well as accept pull requests. Changes that have been accepted to `master` will also be uploaded to the instascrape PyPI package. 13 | 14 | ## We use the [GitHub flow](https://guides.github.com/introduction/flow/), so all code changes happen through pull requests 15 | Pull requests are the best way to propose changes to the codebase. We actively welcome your pull requests: 16 | 17 | 1. Fork the repo and create your branch from `master`. 18 | 2. If you've added code that should be tested, add tests. 19 | 3. If you've changed APIs, update the documentation. 20 | 4. Ensure the test suite passes. 21 | 5. Make sure your code lints. 22 | 6. Issue that pull request! 23 | 24 | ## Report bugs using Github's [issues](https://github.com/chris-greening/instascrape/issues) 25 | We use GitHub issues to track public bugs. Report a bug by [opening a new issue](https://github.com/chris-greening/instascrape/issues/new/choose). 26 | 27 | ## Write bug reports with detail, background, and sample code 28 | 29 | **Great Bug Reports** tend to have: 30 | 31 | - A quick summary and/or background 32 | - Steps to reproduce 33 | - Be specific! 34 | - Give sample code if you can. 35 | - What you expected would happen 36 | - What actually happens 37 | - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work) 38 | 39 | 46 | 47 | ## Any contributions you make will be under the MIT Software License 48 | In short, when you submit code changes, your submissions are understood to be under the same [MIT License](http://choosealicense.com/licenses/mit/) that covers the project. Feel free to contact the maintainers if that's a concern. 49 | 50 | ## References 51 | This document was adapted from [briandk's](https://gist.github.com/briandk/3d2e8b3ec8daf5a27a62) CONTRIBUTING.md template. 52 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change. 3 | 4 | Do not include any personal data. 5 | 6 | Fixes # (issue) 7 | 8 | ## Checklist 9 | 10 | * [ ] I followed the guidelines in our Contributing document 11 | * [ ] I added an explanation of my changes 12 | * [ ] I have written new tests for my changes, as applicable 13 | * [ ] I successfully ran tests with my changes locally 14 | 15 | ## Additional notes (optional) 16 | 17 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | name: "CodeQL" 7 | 8 | on: 9 | push: 10 | branches: [master] 11 | pull_request: 12 | # The branches below must be a subset of the branches above 13 | branches: [master] 14 | schedule: 15 | - cron: '0 18 * * 3' 16 | 17 | jobs: 18 | analyze: 19 | name: Analyze 20 | runs-on: ubuntu-latest 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | # Override automatic language detection by changing the below list 26 | # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python'] 27 | language: ['python'] 28 | # Learn more... 29 | # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection 30 | 31 | steps: 32 | - name: Checkout repository 33 | uses: actions/checkout@v2 34 | with: 35 | # We must fetch at least the immediate parents so that if this is 36 | # a pull request then we can checkout the head. 37 | fetch-depth: 2 38 | 39 | # If this run was triggered by a pull request event, then checkout 40 | # the head of the pull request instead of the merge commit. 41 | - run: git checkout HEAD^2 42 | if: ${{ github.event_name == 'pull_request' }} 43 | 44 | # Initializes the CodeQL tools for scanning. 45 | - name: Initialize CodeQL 46 | uses: github/codeql-action/init@v1 47 | with: 48 | languages: ${{ matrix.language }} 49 | # If you wish to specify custom queries, you can do so here or in a config file. 50 | # By default, queries listed here will override any specified in a config file. 51 | # Prefix the list here with "+" to use these queries and those in the config file. 52 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 53 | 54 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 55 | # If this step fails, then you should remove it and run the build manually (see below) 56 | - name: Autobuild 57 | uses: github/codeql-action/autobuild@v1 58 | 59 | # ℹ️ Command-line programs to run using the OS shell. 60 | # 📚 https://git.io/JvXDl 61 | 62 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 63 | # and modify them (or add more) to build your code if your project 64 | # uses a compiled language 65 | 66 | #- run: | 67 | # make bootstrap 68 | # make release 69 | 70 | - name: Perform CodeQL Analysis 71 | uses: github/codeql-action/analyze@v1 72 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .idea/ 132 | <<<<<<< HEAD 133 | 134 | # Large file ignores 135 | tutorial/examples/Donald\ Trump/*.db* 136 | ======= 137 | >>>>>>> aa8b12067d01e19fb94af91be18f49d8042feeac 138 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/timothycrosley/isort 3 | rev: 5.5.4 4 | hooks: 5 | - id: isort 6 | - repo: https://github.com/ambv/black 7 | rev: 20.8b1 8 | hooks: 9 | - id: black 10 | - repo: https://gitlab.com/pycqa/flake8 11 | rev: 3.8.4 12 | hooks: 13 | - id: flake8 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Christopher Greening 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |

3 | 4 |

5 | 6 | # _instascrape_: powerful Instagram data scraping toolkit 7 | 8 | ## Note: This module is no longer actively maintained. 9 | 10 | ## DISCLAIMER: 11 | 12 | Instagram has gotten increasingly strict with scraping and using this library can result in getting flagged for botting AND POSSIBLE DISABLING OF YOUR INSTAGRAM ACCOUNT. This is a research project and I am not responsible for how you use it. Independently, the library is designed to be responsible and respectful and it is up to you to decide what you do with it. I don't claim any responsibility if your Instagram account is affected by how you use this library. 13 | 14 | [![Version](https://img.shields.io/pypi/pyversions/insta-scrape)](https://www.python.org/downloads/release/python-360/) 15 | [![Downloads](https://pepy.tech/badge/insta-scrape)](https://pepy.tech/project/insta-scrape) 16 | [![Release](https://img.shields.io/pypi/v/insta-scrape)](https://pypi.org/project/insta-scrape/) 17 | [![License](http://img.shields.io/:license-mit-blue.svg?style=flat-square)](https://opensource.org/licenses/MIT) 18 | 19 | [![Activity](https://img.shields.io/github/last-commit/chris-greening/instascrape)](https://github.com/chris-greening/instascrape) 20 | [![Dependencies](https://img.shields.io/librariesio/github/chris-greening/instascrape)](https://github.com/chris-greening/instascrape/blob/master/requirements.txt) 21 | [![Issues](https://img.shields.io/github/issues/chris-greening/instascrape?style=flat)](https://github.com/chris-greening/instascrape/issues) 22 | 23 | ## What is it? 24 | _instascrape_ is a lightweight Python package that provides an expressive and flexible API for scraping Instagram data. It is geared towards being a high-level building block on the data scientist's toolchain and can be seamlessly integrated and extended with industry standard tools for web scraping, data science, and analysis. 25 | 26 | 27 | 28 | ## Key features 29 | Here are a few of the things that `instascrape` does well: 30 | 31 | * Powerful, object-oriented scraping tools for profiles, posts, hashtags, reels, and IGTV 32 | * Scrapes HTML, BeautifulSoup, and JSON 33 | * Download content to your computer as _png_, _jpg_, _mp4_, and _mp3_ 34 | * Dynamically retrieve HTML embed code for posts 35 | * Expressive and consistent API for concise and elegant code 36 | * Designed for seamless integration with [_Selenium_](https://selenium-python.readthedocs.io/), [_Pandas_](https://pandas.pydata.org/), and other industry standard tools for data collection and analysis 37 | * Lightweight; no boilerplate or configurations necessary 38 | * The only hard dependencies are [_Requests_](https://requests.readthedocs.io/en/master/) and [_Beautiful Soup_](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) 39 | --- 40 | 41 | ## Table of Contents 42 | * [Installation](#installation) 43 | * [Sample Usage](#features) 44 | * [Documentation](#documentation) 45 | * [Blog Posts](#blog-posts) 46 | * [Contributing](#contributing) 47 | * [Dependencies](#dependencies) 48 | * [License](#license) 49 | * [Support](#support) 50 | 51 | --- 52 | 53 | ## :computer: Installation 54 | 55 | ### Minimum Python version 56 | 57 | This library currently requires [Python 3.7](https://www.python.org/downloads/release/python-370/) or higher. 58 | 59 | 60 | ### pip 61 | Install from PyPI using 62 | ```shell 63 | $ pip3 install insta-scrape 64 | ``` 65 | WARNING: make sure you install _insta-scrape_ and not a package with a similar name! 66 | 67 | --- 68 | 69 | ## :mag_right: Sample Usage 70 | All top-level, ready-to-use features can be imported using: 71 | ```python 72 | from instascrape import * 73 | ``` 74 | 75 | _instascrape_ uses clean, consistent, and expressive syntax to make the developer experience as _painless_ as possible. 76 | 77 | ```python 78 | # Instantiate the scraper objects 79 | google = Profile('https://www.instagram.com/google/') 80 | google_post = Post('https://www.instagram.com/p/CG0UU3ylXnv/') 81 | google_hashtag = Hashtag('https://www.instagram.com/explore/tags/google/') 82 | 83 | # Scrape their respective data 84 | google.scrape() 85 | google_post.scrape() 86 | google_hashtag.scrape() 87 | 88 | print(google.followers) 89 | print(google_post['hashtags']) 90 | print(google_hashtag.amount_of_posts) 91 | >>> 12262794 92 | >>> ['growwithgoogle'] 93 | >>> 9053408 94 | ``` 95 | 96 | See the [Scraped data points](https://github.com/chris-greening/instascrape/wiki/Scraped-data-points) section of the [Wiki](https://github.com/chris-greening/instascrape/wiki) for a complete list of the scraped attributes provided by each scraper. 97 | 98 | ## :books: Documentation 99 | The official documentation can be found on [Read The Docs](https://instascrape.readthedocs.io/en/latest/index.html) 100 | 101 | --- 102 | 103 | ## :newspaper: Blog Posts 104 | 105 | 106 | Check out blog posts on the [official site](https://chris-greening.github.io/instascrape/blog/) or [DEV](https://dev.to/) for ideas and tutorials! 107 | 108 | - [Scrape data from Instagram with instascrape](https://dev.to/chrisgreening/scrape-data-from-instagram-with-instascrape-5e3e) 109 | - [Visualizing Instagram engagement with instascrape](https://dev.to/chrisgreening/visualizing-instagram-engagement-with-instascrape-326h) 110 | - [Exploratory data analysis of Instagram using instascrape and Python](https://dev.to/chrisgreening/exploratory-data-analysis-of-instagram-using-python-1o5c) 111 | - [Creating a scatter matrix of Instagram data using Python](https://dev.to/chrisgreening/visualizing-the-relationship-between-instagram-variables-using-python-55gg) 112 | - [Downloading an Instagram profile's recent photos using Python](https://dev.to/chrisgreening/downloading-an-instagram-profile-s-recent-photos-using-python-25b2) 113 | - [Scraping 25,000 data points from Joe Biden's Instagram using instascrape](https://dev.to/chrisgreening/scraping-25-000-data-points-from-joe-biden-s-instagram-using-instascrape-1026) 114 | - [Compare major tech Instagram page's with instascrape](https://dev.to/chrisgreening/compare-major-tech-instagram-page-s-with-instascrape-2419) 115 | - [Tracking an Instagram posts engagement in real time with instascrape](https://dev.to/chrisgreening/tracking-an-instagram-posts-engagement-in-real-time-with-instascrape-1m1j) 116 | - [Dynamically generate embeddable Instagram HTML with instascrape](https://dev.to/chrisgreening/dynamically-generate-embeddable-instagram-html-using-instascrape-3o4b) 117 | - [Scraping an Instagram location tag with instascrape](https://dev.to/chrisgreening/scraping-an-instagram-location-tag-with-instascrape-554f) 118 | - [Scraping Instagram reels with instascrape](https://dev.to/chrisgreening/scraping-instagram-reels-with-instascrape-3khb) 119 | - [Scraping IGTV data with instascrape](https://dev.to/chrisgreening/scraping-igtv-data-with-instascrape-595f) 120 | - [Scraping 10,000 data points from Donald Trump's Instagram with Python](https://dev.to/chrisgreening/scraping-10-000-data-points-from-donald-trump-s-instagram-page-with-python-2jcg) 121 | --- 122 | 123 | ## :pray: Contributing 124 | All contributions, bug reports, bug fixes, documentation improvements, enhancements, and ideas are welcome! 125 | 126 | Feel free to [open an Issue](https://github.com/chris-greening/instascrape/issues/new/choose), check out existing [Issues](https://github.com/chris-greening/instascrape/issues), or [start a discussion](https://github.com/chris-greening/instascrape/discussions). 127 | 128 | Beginners to open source are highly encouraged to participate and ask questions if you're unsure what to do/where to start :heart: 129 | 130 | --- 131 | 132 | ## :spider_web: Dependencies 133 | 134 | - [Requests](https://requests.readthedocs.io/en/master/) 135 | - [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) 136 | 137 | --- 138 | 139 | 140 | ## :credit_card: License 141 | This library operates under the [MIT](LICENSE) license. 142 | 143 | --- 144 | 145 | ## :grey_question: Support 146 | 147 | Check out the [FAQ](https://github.com/chris-greening/instascrape/wiki/Frequently-Asked-Questions) 148 | 149 | Reach out to me if you want to connect or have any questions and I will do my best to get back to you 150 | * Email: 151 | * chris@christophergreening.com 152 | * Twitter: 153 | * [@ChrisGreening](https://twitter.com/ChrisGreening) 154 | * LinkedIn 155 | * [Chris Greening](https://www.linkedin.com/in/chris-greening-646411139/) 156 | * Personal contact form: 157 | * [www.christophergreening.com](https://www.christophergreening.com/contact) 158 | --- 159 | 160 |

161 | 162 |

163 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | 16 | sys.path.insert(0, os.path.abspath("..")) 17 | # sys.path.insert(0, r'D:\Programming\pythonstuff\instascrape') 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = "instascrape" 23 | copyright = "2020, Chris Greening" 24 | author = "Chris Greening" 25 | 26 | # The full version, including alpha/beta/rc tags 27 | release = "0.0.7" 28 | 29 | 30 | # -- General configuration --------------------------------------------------- 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"] 36 | 37 | master_doc = "index" 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ["_templates"] 41 | 42 | # List of patterns, relative to source directory, that match files and 43 | # directories to ignore when looking for source files. 44 | # This pattern also affects html_static_path and html_extra_path. 45 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 46 | 47 | 48 | # -- Options for HTML output ------------------------------------------------- 49 | 50 | # The theme to use for HTML and HTML Help pages. See the documentation for 51 | # a list of builtin themes. 52 | # 53 | html_theme = "sphinx_rtd_theme" 54 | 55 | # Add any paths that contain custom static files (such as style sheets) here, 56 | # relative to this directory. They are copied after the builtin static files, 57 | # so a file named "default.css" will overwrite the builtin "default.css". 58 | html_static_path = ["_static"] 59 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. instascrape documentation master file, created by 2 | sphinx-quickstart on Sat Sep 26 16:24:31 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to instascrape's documentation! 7 | ======================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | instascrape.scrapers 14 | instascrape.exceptions 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | -------------------------------------------------------------------------------- /docs/instascrape.core.rst: -------------------------------------------------------------------------------- 1 | instascrape.core package 2 | ======================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: instascrape.core 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/instascrape.exceptions.rst: -------------------------------------------------------------------------------- 1 | instascrape.exceptions package 2 | ============================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | instascrape.exceptions.exceptions module 8 | ---------------------------------------- 9 | 10 | .. automodule:: instascrape.exceptions.exceptions 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: instascrape.exceptions 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/instascrape.rst: -------------------------------------------------------------------------------- 1 | instascrape package 2 | =================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | instascrape.core 10 | instascrape.scrapers 11 | 12 | Submodules 13 | ---------- 14 | 15 | instascrape.instascrape module 16 | ------------------------------ 17 | 18 | .. automodule:: instascrape.instascrape 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: instascrape 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/instascrape.scrapers.rst: -------------------------------------------------------------------------------- 1 | instascrape.scrapers package 2 | ============================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | instascrape.scrapers.hashtag module 8 | ----------------------------------- 9 | 10 | .. automodule:: instascrape.scrapers.hashtag 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | :inherited-members: 15 | 16 | instascrape.scrapers.post module 17 | -------------------------------- 18 | 19 | .. automodule:: instascrape.scrapers.post 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | :inherited-members: 24 | 25 | instascrape.scrapers.profile module 26 | ----------------------------------- 27 | 28 | .. automodule:: instascrape.scrapers.profile 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | :inherited-members: 33 | 34 | instascrape.scrapers.reel module 35 | -------------------------------- 36 | 37 | .. automodule:: instascrape.scrapers.reel 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | :inherited-members: 42 | 43 | instascrape.scrapers.location module 44 | ------------------------------------ 45 | 46 | .. automodule:: instascrape.scrapers.location 47 | :members: 48 | :undoc-members: 49 | :show-inheritance: 50 | :inherited-members: 51 | 52 | instascrape.scrapers.igtv module 53 | -------------------------------- 54 | 55 | .. automodule:: instascrape.scrapers.igtv 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: 59 | :inherited-members: 60 | 61 | instascrape.scrapers.scrape_tools module 62 | -------------------------------- 63 | 64 | .. automodule:: instascrape.scrapers.scrape_tools 65 | :members: 66 | :undoc-members: 67 | :show-inheritance: 68 | :inherited-members: 69 | 70 | Module contents 71 | --------------- 72 | 73 | .. automodule:: instascrape.scrapers 74 | :members: 75 | :undoc-members: 76 | :show-inheritance: 77 | :inherited-members: 78 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | instascrape 2 | =========== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | instascrape 8 | -------------------------------------------------------------------------------- /instascrape/__init__.py: -------------------------------------------------------------------------------- 1 | from instascrape.scrapers import * -------------------------------------------------------------------------------- /instascrape/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/instascrape/core/__init__.py -------------------------------------------------------------------------------- /instascrape/core/_mappings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mappings that tell the _JsonEngine the user facing attribute names and the 3 | steps needed to get there in a JSON dictionary 4 | """ 5 | 6 | from __future__ import annotations 7 | 8 | from abc import ABC 9 | from collections import deque 10 | from copy import deepcopy 11 | from typing import Dict, List, Union 12 | 13 | # pylint: disable=used-before-assignment 14 | 15 | 16 | MappingObject = Union["_PostMapping", "_ProfileMapping", "_HashtagMapping", "_LoginMapping"] 17 | 18 | 19 | class _GeneralMapping(ABC): 20 | """ 21 | Maps the user interfacing attribute names with their keys as given in a JSON 22 | dict that has been flattened using 23 | instascrape.core._json_flattener.JsonFlattener 24 | 25 | Attributes 26 | ---------- 27 | mapping : Dict[str, deque] 28 | Each key: val pair represents one data point and the directive for 29 | traversing a JSON dict and accessing that value 30 | 31 | Methods 32 | ------- 33 | return_mapping(keys: List[str]=[]) -> Dict[str, deque] 34 | Interface for returning only mapping directives that are specified in 35 | a list of keys 36 | 37 | """ 38 | 39 | mapping = { 40 | # "csrf_token": deque(["csrf_token"]), 41 | # "viewer_id": deque(["viewerId"]), 42 | # "country_code": deque(["country_code"]), 43 | # "language_code": deque(["language_code"]), 44 | # "locale": deque(["locale"]), 45 | # "device_id": deque(["device_id"]), 46 | # "browser_push_pub_key": deque(["browser_push_pub_key"]), 47 | # "key_id": deque(["key_id"]), 48 | # "public_key": deque(["public_key"]), 49 | # "version": deque(["version"]), 50 | # "is_dev": deque(["is_dev"]), 51 | # "rollout_hash": deque(["rollout_hash"]), 52 | # "bundle_variant": deque(["bundle_variant"]), 53 | # "frontend_dev": deque(["frontend_env"]), 54 | } 55 | 56 | @classmethod 57 | def return_mapping(cls, keys: List[str] = None, exclude: List[str] = None) -> Dict[str, deque]: 58 | """ 59 | Return key-directive pairs specified by key names. If no keys are 60 | specified, return all 61 | 62 | Parameters 63 | ---------- 64 | keys : List[str] 65 | Keys that specify what directives to return 66 | 67 | Returns 68 | ------- 69 | directive_dict : Dict[str, deque] 70 | Dictionary of keys and their directives 71 | """ 72 | if keys is None: 73 | keys = [] 74 | if exclude is None: 75 | exclude = [] 76 | if isinstance(keys, str): 77 | keys = [keys] 78 | if isinstance(exclude, str): 79 | exclude = [exclude] 80 | 81 | if not keys: 82 | keys = list(cls.mapping) 83 | if exclude: 84 | keys = [key for key in keys if key not in exclude] 85 | directive_dict = {key: deepcopy(cls.mapping[key]) for key in keys} 86 | return directive_dict 87 | 88 | 89 | class _PostMapping(_GeneralMapping): 90 | """Mapping specific to Instagram post pages""" 91 | 92 | mapping = _GeneralMapping.return_mapping().copy() 93 | mapping.update( 94 | { 95 | "id": deque(["id"]), 96 | "shortcode": deque(["shortcode"]), 97 | "height": deque(["height"]), 98 | "width": deque(["width"]), 99 | "gating_info": deque(["gating_info"]), 100 | "fact_check_overall_rating": deque(["fact_check_overall_rating"]), 101 | "fact_check_information": deque(["fact_check_information"]), 102 | "sensitivity_friction_info": deque(["sensitivity_friction_info"]), 103 | "media_overlay_info": deque(["media_overlay_info"]), 104 | "media_preview": deque(["media_preview"]), 105 | "display_url": deque(["display_url"]), 106 | "accessibility_caption": deque(["accessibility_caption"]), 107 | "is_video": deque(["is_video"]), 108 | "tracking_token": deque(["tracking_token"]), 109 | "tagged_users": deque(["edge_media_to_tagged_user"]), 110 | "caption": deque(["text"]), 111 | "caption_is_edited": deque(["caption_is_edited"]), 112 | "has_ranked_comments": deque(["has_ranked_comments"]), 113 | "comments": deque(["count"]), 114 | "comments_disabled": deque(["comments_disabled"]), 115 | "commenting_disabled_for_viewer": deque(["commenting_disabled_for_viewer"]), 116 | "timestamp": deque(["taken_at_timestamp"]), 117 | "likes": deque(["edge_media_preview_like_count"]), 118 | "location": deque(["name"]), 119 | "viewer_has_liked": deque(["viewer_has_liked"]), 120 | "viewer_has_saved": deque(["viewer_has_saved"]), 121 | "viewer_has_saved_to_collection": deque(["viewer_has_saved_to_collection"]), 122 | "viewer_in_photo_of_you": deque(["viewer_in_photo_of_you"]), 123 | "viewer_can_reshare": deque(["viewer_can_reshare"]), 124 | "video_url": deque(["video_url"]), 125 | "has_audio": deque(["has_audio"]), 126 | "video_view_count": deque(["video_view_count"]), 127 | "username": deque(["shortcode_media_owner_username"]), 128 | "full_name": deque(['owner_full_name']), 129 | } 130 | ) 131 | 132 | @classmethod 133 | def post_from_profile_mapping(cls): 134 | """ 135 | Return the mapping needed for parsing a post's JSON data from the JSON 136 | served back after requesting a Profile page. 137 | """ 138 | return { 139 | "id": deque(["id"]), 140 | "shortcode": deque(["shortcode"]), 141 | "dimensions": deque(["dimensions"]), 142 | "display_url": deque(["display_url"]), 143 | "tagged_users": deque(["edge_media_to_tagged_user", "edges"]), 144 | "fact_check_overall_rating": deque(["fact_check_overall_rating"]), 145 | "fact_check_information": deque(["fact_check_information"]), 146 | "is_video": deque(["is_video"]), 147 | "accessibility_caption": deque(["accessibility_caption"]), 148 | "caption": deque(["edge_media_to_caption", "edges", 0, "node", "text"]), 149 | "comments": deque(["count"]), 150 | "comments_disabled": deque(["comments_disabled"]), 151 | "timestamp": deque(["taken_at_timestamp"]), 152 | "likes": deque(["edge_media_preview_like_count"]), 153 | "location": deque(["location"]), 154 | } 155 | 156 | @classmethod 157 | def post_from_hashtag_mapping(cls): 158 | """ 159 | Return the mapping needed for parsing a post's JSON data from the JSON 160 | served back after requesting a Hashtag page. 161 | """ 162 | return { 163 | "comments_disabled": deque(["comments_disabled"]), 164 | "id": deque(["id"]), 165 | "caption": deque(["edge_media_to_caption", "edges", 0, "node", "text"]), 166 | "shortcode": deque(["shortcode"]), 167 | "comments": deque(["edge_media_to_comment", "count"]), 168 | "upload_date": deque(["taken_at_timestamp"]), 169 | "dimensions": deque(["dimensions"]), 170 | "display_url": deque(["display_url"]), 171 | "likes": deque(["edge_media_preview_like", "count"]), 172 | "owner": deque(["owner", "id"]), 173 | "is_video": deque(["is_video"]), 174 | "accessibility_caption": deque(["accessibility_caption"]), 175 | } 176 | 177 | 178 | class _ReelMapping(_PostMapping): 179 | mapping = _PostMapping.return_mapping().copy() 180 | mapping.update( 181 | { 182 | "video_play_count": deque(["video_play_count"]), 183 | } 184 | ) 185 | 186 | 187 | class _IGTVMapping(_PostMapping): 188 | mapping = _PostMapping.return_mapping().copy() 189 | 190 | 191 | class _ProfileMapping(_GeneralMapping): 192 | """Mapping specific to Instagram profile pages""" 193 | 194 | mapping = _GeneralMapping.return_mapping().copy() 195 | mapping.update( 196 | { 197 | "logging_page_id": deque(["logging_page_id"]), 198 | "show_suggested_profiles": deque(["show_suggested_profiles"]), 199 | "show_follow_dialog": deque(["show_follow_dialog"]), 200 | "biography": deque(["biography"]), 201 | "blocked_by_viewer": deque(["blocked_by_viewer"]), 202 | "restricted_by_viewer": deque(["restricted_by_viewer"]), 203 | "country_block": deque(["country_block"]), 204 | "external_url": deque(["external_url"]), 205 | "external_url_linkshimmed": deque(["external_url_linkshimmed"]), 206 | "followers": deque(["count"]), 207 | "followed_by_viewer": deque(["followed_by_viewer"]), 208 | "following": deque(["edge_follow_count"]), 209 | "follows_viewer": deque(["follows_viewer"]), 210 | "full_name": deque(["user_full_name"]), 211 | "has_ar_effects": deque(["has_ar_effects"]), 212 | "has_clips": deque(["has_clips"]), 213 | "has_guides": deque(["has_guides"]), 214 | "has_channel": deque(["has_channel"]), 215 | "has_blocked_viewer": deque(["has_blocked_viewer"]), 216 | "highlight_reel_count": deque(["highlight_reel_count"]), 217 | "has_requested_viewer": deque(["has_requested_viewer"]), 218 | "id": deque(["id"]), 219 | "is_business_account": deque(["is_business_account"]), 220 | "is_joined_recently": deque(["is_joined_recently"]), 221 | "business_category_name": deque(["business_category_name"]), 222 | "overall_category_name": deque(["overall_category_name"]), 223 | "category_enum": deque(["category_enum"]), 224 | "is_private": deque(["is_private"]), 225 | "is_verified": deque(["is_verified"]), 226 | "mutual_followers": deque(["edge_mutual_followed_by_count"]), 227 | "profile_pic_url": deque(["profile_pic_url"]), 228 | "profile_pic_url_hd": deque(["profile_pic_url_hd"]), 229 | "requested_by_viewer": deque(["requested_by_viewer"]), 230 | "username": deque(["user_username"]), 231 | "connected_fb_page": deque(["connected_fb_page"]), 232 | "posts": deque(["edge_owner_to_timeline_media_count"]), 233 | } 234 | ) 235 | 236 | 237 | class _HashtagMapping(_GeneralMapping): 238 | """Mapping specific to Instagram hashtag pages""" 239 | 240 | mapping = _GeneralMapping.return_mapping().copy() 241 | mapping.update( 242 | { 243 | "id": deque(["id"]), 244 | "name": deque(["name"]), 245 | "allow_following": deque(["allow_following"]), 246 | "is_following": deque(["is_following"]), 247 | "is_top_media_only": deque(["is_top_media_only"]), 248 | "profile_pic_url": deque(["profile_pic_url"]), 249 | "amount_of_posts": deque(["count"]), 250 | } 251 | ) 252 | 253 | 254 | class _LocationMapping(_GeneralMapping): 255 | """Mapping specific to Instagram profile pages""" 256 | 257 | mapping = _GeneralMapping.return_mapping().copy() 258 | mapping.update( 259 | { 260 | "id": deque(["id"]), 261 | "name": deque(["name"]), 262 | "has_public_page": deque(["has_public_page"]), 263 | "latitude": deque(["lat"]), 264 | "longitude": deque(["lng"]), 265 | "slug": deque(["slug"]), 266 | "blurb": deque(["blurb"]), 267 | "website": deque(["website"]), 268 | "phone": deque(["phone"]), 269 | "primary_alias_on_fb": deque(["primary_alias_on_fb"]), 270 | "stress_address": deque(["street_address"]), 271 | "zip_code": deque(["zip_code"]), 272 | "city_name": deque(["city_name"]), 273 | "region_name": deque(["region_name"]), 274 | "country_code": deque(["country_code"]), 275 | "amount_of_posts": deque(["count"]), 276 | } 277 | ) 278 | 279 | 280 | class _LoginMapping(_GeneralMapping): 281 | """Mapping specific to Instagram login page""" 282 | 283 | mapping = _GeneralMapping.return_mapping().copy() 284 | 285 | 286 | class _HttpErrorMapping(_GeneralMapping): 287 | """Mapping specific to Instagram login page""" 288 | 289 | mapping = _GeneralMapping.return_mapping().copy() 290 | 291 | 292 | class _MetaMapping: 293 | """ 294 | Map the string in the Instagram JSON that indicates the type of page the 295 | JSON was scraped from 296 | 297 | Attributes 298 | ---------- 299 | str_to_mapper_obj : Dict[str, Any] 300 | Dictionary that maps the string name of the JSON type to the specific 301 | mapping object 302 | 303 | Methods 304 | ------- 305 | get_mapper(page_type: str) 306 | Return the mapping object that correlates to the string 307 | """ 308 | 309 | str_to_mapper_obj = { 310 | "ProfilePage": _ProfileMapping, 311 | "TagPage": _HashtagMapping, 312 | "PostPage": _PostMapping, 313 | "LoginAndSignupPage": _LoginMapping, 314 | "LocationsPage": _LocationMapping 315 | } 316 | 317 | @classmethod 318 | def get_mapper(cls, page_type: str) -> MappingObject: 319 | """ 320 | Return the appropriate mapper that corresponds to the page_type as 321 | given in the requested Instagram JSON data 322 | """ 323 | return cls.str_to_mapper_obj[page_type] 324 | -------------------------------------------------------------------------------- /instascrape/core/_static_scraper.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import datetime 4 | import json 5 | import csv 6 | from abc import ABC, abstractmethod 7 | from typing import Union, Dict, List, Any 8 | import sys 9 | import os 10 | from collections import namedtuple, deque 11 | import warnings 12 | 13 | import requests 14 | from bs4 import BeautifulSoup 15 | 16 | from instascrape.scrapers.scrape_tools import parse_data_from_json, determine_json_type, flatten_dict, json_from_soup 17 | from instascrape.exceptions.exceptions import InstagramLoginRedirectError, MissingSessionIDWarning, MissingCookiesWarning 18 | 19 | # pylint: disable=no-member 20 | 21 | JSONDict = Dict[str, Any] 22 | 23 | class _StaticHtmlScraper(ABC): 24 | """ 25 | Base class for all of the scrapers, handles general functionality that all 26 | scraper objects will have 27 | """ 28 | 29 | # Keys that represent metadata attr that the user doesn't necessarily need 30 | # to worry about 31 | _METADATA_KEYS = [ 32 | "json_dict", 33 | "url", 34 | "_json_scraper", 35 | "scrape_timestamp", 36 | "map_dict", 37 | "json_data", 38 | "json_flattener", 39 | "flat_json_dict", 40 | "soup", 41 | "html", 42 | "source", 43 | ] 44 | _ASSOCIATED_JSON_TYPE = None 45 | 46 | session = requests.Session() 47 | 48 | def __init__(self, source: Union[str, BeautifulSoup, JSONDict]) -> None: 49 | """ 50 | Parameters 51 | ---------- 52 | source : Union[str, BeautifulSoup, JSONDict] 53 | The given source for scraping the data from. Available sources are 54 | a URL, HTML, JSON dictionary, BeautifulSoup, etc. 55 | """ 56 | self.source = source 57 | 58 | # Instance variables that are given values elsewhere 59 | self.url = None 60 | self.html = None 61 | self.soup = None 62 | self.json_dict = None 63 | self.flat_json_dict = None 64 | self.scrape_timestamp = None 65 | 66 | def __getitem__(self, key: str) -> Any: 67 | return getattr(self, key) 68 | 69 | def __repr__(self) -> str: 70 | return f"<{type(self).__name__}>" 71 | 72 | def scrape( 73 | self, 74 | mapping=None, 75 | keys: List[str] = None, 76 | exclude: List[str] = None, 77 | headers={ 78 | "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57" 79 | }, 80 | inplace=True, 81 | session=None, 82 | webdriver=None 83 | ) -> None: 84 | """ 85 | Scrape data from the source 86 | 87 | Parameters 88 | ---------- 89 | mapping : Dict[str, deque] 90 | Dictionary of parsing queue's that tell the JSON engine how to 91 | process the JSON data 92 | keys : List[str] 93 | List of strings that correspond to desired attributes for scraping 94 | exclude : List[str] 95 | List of strings that correspond to which attributes to exclude from 96 | being scraped 97 | headers : Dict[str, str] 98 | Dictionary of request headers to be passed on the GET request 99 | inplace : bool 100 | Determines if data modified inplace or return a new object with the 101 | scraped data 102 | session : requests.Session 103 | Session for making the GET request 104 | webdriver : selenium.webdriver.chrome.webdriver.WebDriver 105 | Webdriver for scraping the page, overrides any default or passed 106 | session 107 | 108 | Returns 109 | ------- 110 | return_instance 111 | Optionally returns a scraped instance instead of modifying inplace 112 | if inplace arg is True 113 | """ 114 | 115 | if mapping is None: 116 | mapping = self._Mapping.return_mapping(keys=keys, exclude=exclude) 117 | if session is None: 118 | session = self.session 119 | if webdriver is not None: 120 | session = webdriver 121 | if keys is None: 122 | keys = [] 123 | if exclude is None: 124 | exclude = [] 125 | 126 | if webdriver is None: 127 | try: 128 | if "sessionid" not in headers["cookie"]: 129 | warnings.warn( 130 | "Session ID not in cookies! It's recommended you pass a valid sessionid otherwise Instagram will likely redirect you to their login page.", 131 | MissingSessionIDWarning 132 | ) 133 | except KeyError: 134 | warnings.warn( 135 | "Request header does not contain cookies! It's recommended you pass at least a valid sessionid otherwise Instagram will likely redirect you to their login page.", 136 | MissingCookiesWarning 137 | ) 138 | 139 | # If the passed source was already an object, construct data from 140 | # source else parse it 141 | if isinstance(self.source, type(self)): 142 | scraped_dict = self.source.to_dict() 143 | else: 144 | return_data = self._get_json_from_source(self.source, headers=headers, session=session) 145 | flat_json_dict = flatten_dict(return_data["json_dict"]) 146 | 147 | #HACK: patch mapping to fix the profile pic scrape when a sessionid is present 148 | try: 149 | if "sessionid" in headers["cookie"]: 150 | mapping["profile_pic_url"] = deque(["user_profile_pic_url"]) 151 | mapping["profile_pic_url_hd"] = deque(["user_profile_pic_url_hd"]) 152 | except KeyError: 153 | pass 154 | 155 | scraped_dict = parse_data_from_json( 156 | json_dict=flat_json_dict, 157 | map_dict=mapping, 158 | ) 159 | return_data["scrape_timestamp"] = datetime.datetime.now() 160 | return_data["flat_json_dict"] = flat_json_dict 161 | return_instance = self._load_into_namespace( 162 | scraped_dict=scraped_dict, 163 | return_data=return_data, 164 | inplace=inplace 165 | ) 166 | return None if return_instance is self else return_instance 167 | 168 | def to_dict(self, metadata: bool = False) -> Dict[str, Any]: 169 | """ 170 | Return a dictionary containing all of the data that has been scraped 171 | 172 | Parameters 173 | ---------- 174 | metadata : bool 175 | Boolean value that determines if metadata specified in self._METADATA_KEYS 176 | will be included in the dictionary. 177 | 178 | Returns 179 | ------- 180 | data_dict : Dict[str, Any] 181 | Dictionary containing the scraped data 182 | """ 183 | data_dict = ( 184 | {key: val for key, val in self.__dict__.items() if key not in self._METADATA_KEYS} 185 | if not metadata 186 | else self.__dict__ 187 | ) 188 | return data_dict 189 | 190 | def to_csv(self, fp: str) -> None: 191 | """ 192 | Write scraped data to .csv at the given filepath 193 | 194 | Parameters 195 | ---------- 196 | fp : str 197 | Filepath to write data to 198 | """ 199 | with open(fp, "w", newline="", encoding="utf-8") as csv_file: 200 | writer = csv.writer(csv_file) 201 | for key, value in self.to_dict().items(): 202 | writer.writerow([key, str(value)]) 203 | 204 | def to_json(self, fp: str) -> None: 205 | """ 206 | Write scraped data to .json file at the given filepath 207 | 208 | Parameters 209 | ---------- 210 | fp : str 211 | Filepath to write data to 212 | """ 213 | outdict = {key: str(val) for key, val in self.to_dict().items()} 214 | with open(fp, "w") as outjson: 215 | json.dump(outdict, outjson) 216 | 217 | @abstractmethod 218 | def _url_from_suburl(self, suburl: str) -> str: 219 | pass 220 | 221 | def _get_json_from_source(self, source: Any, headers: dict, session: requests.Session) -> JSONDict: 222 | """Parses the JSON data out from the source based on what type the source is""" 223 | initial_type = True 224 | return_data = {"source": self.source} 225 | if isinstance(source, str): 226 | source_type = self._determine_string_type(source) 227 | elif isinstance(source, dict): 228 | json_dict = source 229 | source_type = "json dict" 230 | elif isinstance(source, BeautifulSoup): 231 | source_type = "soup" 232 | 233 | if source_type == "suburl": 234 | if initial_type: 235 | suburl = self.source 236 | url = self._url_from_suburl(suburl=suburl) 237 | source_type = "url" 238 | initial_type = False 239 | return_data["url"] = url 240 | 241 | if source_type == "url": 242 | if initial_type: 243 | url = self.source 244 | html = self._html_from_url(url=url, headers=headers, session=session) 245 | source_type = "html" 246 | initial_type = False 247 | return_data["html"] = html 248 | 249 | if source_type == "html": 250 | if initial_type: 251 | html = self.source 252 | soup = self._soup_from_html(html) 253 | source_type = "soup" 254 | initial_type = False 255 | return_data["soup"] = soup 256 | 257 | if source_type == "soup": 258 | if initial_type: 259 | soup = self.source 260 | json_dict_arr = json_from_soup(soup) 261 | if len(json_dict_arr) == 1: 262 | json_dict = json_dict_arr[0] 263 | else: 264 | json_dict = json_dict_arr[1] 265 | self._validate_scrape(json_dict) 266 | 267 | return_data["json_dict"] = json_dict 268 | 269 | return return_data 270 | 271 | def _load_into_namespace(self, scraped_dict: dict, return_data, inplace) -> None: 272 | """Loop through the scraped dictionary and set them as instance attr""" 273 | instance = self if inplace else type(self)(return_data["source"]) 274 | for key, val in scraped_dict.items(): 275 | setattr(instance, key, val) 276 | for key, val in return_data.items(): 277 | setattr(instance, key, val) 278 | return instance 279 | 280 | 281 | @staticmethod 282 | def _html_from_url(url: str, headers: dict, session: requests.Session) -> str: 283 | """Return HTML from requested URL""" 284 | if isinstance(session, requests.Session): 285 | response = session.get(url, headers=headers) 286 | page_source = response.text 287 | else: 288 | session.get(url) 289 | page_source = session.page_source 290 | return page_source 291 | 292 | @staticmethod 293 | def _soup_from_html(html: str) -> BeautifulSoup: 294 | """Return BeautifulSoup from source HTML""" 295 | return BeautifulSoup(html, features="html.parser") 296 | 297 | def _validate_scrape(self, json_dict: str) -> JSONDict: 298 | """Raise exceptions if the scrape did not properly execute""" 299 | json_type = determine_json_type(json_dict) 300 | if json_type == "LoginAndSignupPage" and not type(self).__name__ == "LoginAndSignupPage": 301 | raise InstagramLoginRedirectError 302 | elif json_type == "HttpErrorPage" and not type(self).__name__ == "HttpErrorPage": 303 | source_str = self.url if hasattr(self, "url") else "Source" 304 | raise ValueError(f"{source_str} is not a valid Instagram page. Please provide a valid argument.") 305 | 306 | @staticmethod 307 | def _determine_string_type(string_data: str) -> str: 308 | """Match and return string representation of appropriate source""" 309 | string_type_map = [("https://", "url"), ("window._sharedData", "html"), ('{"config"', "JSON dict str")] 310 | for substr, str_type in string_type_map: 311 | if substr in string_data: 312 | #BUG: !DOCTYPE isnt returned in selenium source code, use as secondary identifier instead 313 | if substr == "https://" and "!DOCTYPE" in string_data: 314 | continue 315 | break 316 | else: 317 | str_type = "suburl" 318 | return str_type 319 | -------------------------------------------------------------------------------- /instascrape/core/json_algos.py: -------------------------------------------------------------------------------- 1 | """ 2 | Algorithms and implementations for working with/manipulating JSON data. NOT 3 | intended for top level use but instead imported for top level functions to leverage 4 | """ 5 | 6 | from collections import deque 7 | from typing import Any, Dict, List, Union 8 | 9 | from bs4 import BeautifulSoup 10 | 11 | JSONDict = Dict[str, Any] 12 | 13 | class _JSONTree: 14 | """Tree of linked lists that map out the JSON data""" 15 | 16 | def __init__(self, json_dict: JSONDict) -> None: 17 | self.json_dict = json_dict 18 | self.map_tree(self.json_dict) 19 | 20 | def map_tree(self, json_dict) -> None: 21 | """Map the entire JSON tree and get access to leaf _JSONNodes""" 22 | self.leaf_nodes = [] 23 | self.root_node = _JSONNode(json_data=json_dict, tree=self) 24 | 25 | class _JSONNode: 26 | """Representation of one step into a JSON Tree""" 27 | 28 | def __init__(self, json_data: Any, tree: _JSONTree, linked_list: deque = None, prior_keys: List[Union[str, int]] = None) -> None: 29 | self.json_data = json_data 30 | self.tree = tree 31 | 32 | self.linked_list = linked_list if linked_list is not None else deque([]) 33 | self.prior_keys = prior_keys if prior_keys is not None else [] 34 | 35 | self.dtype = type(self.json_data) 36 | 37 | self.nodes = [] 38 | 39 | # If the node is a leaf then it has no edges 40 | if self.is_leaf: 41 | self.json_data = {prior_keys[-1]: self.json_data} 42 | self.tree.leaf_nodes.append(self) 43 | 44 | else: 45 | self.get_edges() 46 | 47 | @property 48 | def is_leaf(self) -> bool: 49 | """ 50 | If the dtype of self.json_data is not a dict or a list then it must be 51 | a leaf node 52 | """ 53 | return self.dtype is not list and self.dtype is not dict 54 | 55 | def get_edges(self) -> None: 56 | """Get all edges connected to current _JSONNode""" 57 | if self.dtype is list: 58 | iter_arr = zip(range(len(self.json_data)), self.json_data) 59 | else: 60 | iter_arr = self.json_data.items() 61 | 62 | for key, value in iter_arr: 63 | next_linked_list = self.linked_list + deque([self]) 64 | next_key = self.prior_keys + [key] 65 | node = _JSONNode(value, self.tree, next_linked_list, next_key) 66 | self.nodes.append(node) 67 | 68 | def __repr__(self) -> str: 69 | return str(self.json_data) 70 | 71 | 72 | def _parse_json_str(source: str) -> str: 73 | """Return the parsed string of JSON data from the BeautifulSoup""" 74 | json_data = [] 75 | json_scripts = [str(script) for script in source.find_all("script") if "config" in str(script)] 76 | for script_tag in json_scripts: 77 | left_index = script_tag.find("{") 78 | right_index = script_tag.rfind("}") + 1 79 | json_str = script_tag[left_index:right_index] 80 | json_data.append(json_str) 81 | return json_data 82 | -------------------------------------------------------------------------------- /instascrape/exceptions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/instascrape/exceptions/__init__.py -------------------------------------------------------------------------------- /instascrape/exceptions/exceptions.py: -------------------------------------------------------------------------------- 1 | class InstagramLoginRedirectError(Exception): 2 | """ 3 | Exception that indicates Instagram is redirecting away from the page 4 | that should be getting scraped. Can be remedied by logging into Instagram. 5 | """ 6 | 7 | def __init__( 8 | self, 9 | message="Instagram is redirecting you to the login page instead of the page you are trying to scrape. This could be occuring because you made too many requests too quickly or are not logged into Instagram on your machine. Try passing a valid session ID to the scrape method as a cookie to bypass the login requirement", 10 | ): 11 | super().__init__(message) 12 | 13 | 14 | class WrongSourceError(Exception): 15 | """ 16 | Exception that indicates user passed the wrong source type to the scraper. 17 | An example is passing a URL for a hashtag page to a Profile. 18 | """ 19 | 20 | def __init__(self, message="Wrong input source, use the correct class"): 21 | super().__init__(message) 22 | 23 | 24 | class MissingSessionIDWarning(UserWarning): 25 | pass 26 | 27 | class MissingCookiesWarning(UserWarning): 28 | pass -------------------------------------------------------------------------------- /instascrape/scrapers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Primary API scraper tools 3 | """ 4 | 5 | from instascrape.scrapers.hashtag import * 6 | from instascrape.scrapers.post import * 7 | from instascrape.scrapers.profile import * 8 | from instascrape.scrapers.comment import * 9 | from instascrape.scrapers.location import * 10 | from instascrape.scrapers.reel import * 11 | from instascrape.scrapers.igtv import * 12 | from instascrape.scrapers.scrape_tools import * -------------------------------------------------------------------------------- /instascrape/scrapers/comment.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parse data related to comments, including comments in a thread 3 | """ 4 | 5 | import datetime 6 | 7 | 8 | class Comment: 9 | """A single comment and its respective data""" 10 | 11 | # pylint: disable=too-many-instance-attributes, too-few-public-methods 12 | 13 | def __init__(self, comment_dict: dict) -> None: 14 | self.comment_dict = comment_dict["node"] 15 | 16 | self._parse_data() 17 | 18 | def __repr__(self) -> str: 19 | return f" None: 22 | self.text = self.comment_dict["text"] 23 | self.created_at = datetime.datetime.fromtimestamp(self.comment_dict["created_at"]) 24 | self.did_report_as_spam = self.comment_dict["did_report_as_spam"] 25 | self.is_verified = self.comment_dict["owner"]["is_verified"] 26 | self.profile_pic_url = self.comment_dict["owner"]["profile_pic_url"] 27 | self.username = self.comment_dict["owner"]["username"] 28 | self.viewer_has_liked = self.comment_dict["viewer_has_liked"] 29 | self.likes = self.comment_dict["edge_liked_by"]["count"] 30 | self.is_restricted_pending = self.comment_dict["is_restricted_pending"] 31 | 32 | try: 33 | comments = self.comment_dict["edge_threaded_comments"]["edges"] 34 | self.replies = [Comment(comment_dict) for comment_dict in comments] 35 | except KeyError: 36 | self.replies = [] 37 | -------------------------------------------------------------------------------- /instascrape/scrapers/hashtag.py: -------------------------------------------------------------------------------- 1 | """ 2 | Hashtag 3 | ------- 4 | Scrape data from a Hashtag page 5 | """ 6 | from __future__ import annotations 7 | 8 | from typing import List 9 | import time 10 | 11 | from instascrape.core._mappings import _HashtagMapping, _PostMapping 12 | from instascrape.core._static_scraper import _StaticHtmlScraper 13 | from instascrape.scrapers.post import Post 14 | 15 | class Hashtag(_StaticHtmlScraper): 16 | """Scraper for an Instagram hashtag page""" 17 | 18 | _Mapping = _HashtagMapping 19 | 20 | def get_recent_posts(self, amt: int = 71) -> List[Post]: 21 | """ 22 | Return a list of recent posts to the hasthag 23 | 24 | Parameters 25 | ---------- 26 | amt : int 27 | Amount of recent posts to return 28 | 29 | Returns 30 | ------- 31 | posts : List[Post] 32 | List containing the recent 12 posts and their available data 33 | """ 34 | posts = [] 35 | post_arr = self.json_dict["entry_data"]["TagPage"][0]["graphql"]["hashtag"]["edge_hashtag_to_media"]["edges"] 36 | amount_of_posts = len(post_arr) 37 | if amt > amount_of_posts: 38 | amt = amount_of_posts 39 | for post in post_arr[:amt]: 40 | json_dict = post["node"] 41 | mapping = _PostMapping.post_from_hashtag_mapping() 42 | post = Post(json_dict) 43 | post.scrape(mapping=mapping) 44 | posts.append(post) 45 | return posts 46 | 47 | def _url_from_suburl(self, suburl: str) -> str: 48 | return f"https://www.instagram.com/tags/{suburl}/" -------------------------------------------------------------------------------- /instascrape/scrapers/igtv.py: -------------------------------------------------------------------------------- 1 | from instascrape.scrapers.post import Post 2 | from instascrape.core._mappings import _IGTVMapping 3 | 4 | 5 | class IGTV(Post): 6 | """Scraper for an IGTV post""" 7 | 8 | _Mapping = _IGTVMapping 9 | 10 | @staticmethod 11 | def _url_from_suburl(suburl: str) -> str: 12 | return f"https://www.instagram.com/tv/{suburl}/" 13 | -------------------------------------------------------------------------------- /instascrape/scrapers/location.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import List 4 | 5 | from instascrape.core._mappings import _LocationMapping, _PostMapping 6 | from instascrape.core._static_scraper import _StaticHtmlScraper 7 | from instascrape.scrapers.post import Post 8 | 9 | 10 | class Location(_StaticHtmlScraper): 11 | """Scraper for an Instagram profile page""" 12 | 13 | _Mapping = _LocationMapping 14 | 15 | def get_recent_posts(self, amt: int = 24) -> List[Post]: 16 | """ 17 | Return a list of recent posts to the location 18 | 19 | Parameters 20 | ---------- 21 | amt : int 22 | Amount of recent posts to return 23 | 24 | Returns 25 | ------- 26 | posts : List[Post] 27 | List containing the recent 24 posts and their available data 28 | """ 29 | posts = [] 30 | post_arr = self.json_dict["entry_data"]["LocationsPage"][0]["graphql"]["location"]["edge_location_to_media"][ 31 | "edges" 32 | ] 33 | amount_of_posts = len(post_arr) 34 | if amt > amount_of_posts: 35 | amt = amount_of_posts 36 | for post in post_arr[:amt]: 37 | json_dict = post["node"] 38 | mapping = _PostMapping.post_from_hashtag_mapping() 39 | post = Post(json_dict) 40 | post.scrape(mapping=mapping) 41 | posts.append(post) 42 | return posts 43 | 44 | def _url_from_suburl(self, suburl): 45 | return f"https://www.instagram.com/explore/locations/{suburl}/" 46 | -------------------------------------------------------------------------------- /instascrape/scrapers/post.py: -------------------------------------------------------------------------------- 1 | """ 2 | Post 3 | ---- 4 | Scrape data from a Post page 5 | """ 6 | from __future__ import annotations 7 | 8 | import datetime 9 | from typing import List 10 | import re 11 | import shutil 12 | import pathlib 13 | import math 14 | 15 | import requests 16 | 17 | from instascrape.core._mappings import _PostMapping 18 | from instascrape.core._static_scraper import _StaticHtmlScraper 19 | from instascrape.scrapers.scrape_tools import parse_data_from_json 20 | from instascrape.scrapers.comment import Comment 21 | 22 | class Post(_StaticHtmlScraper): 23 | """Scraper for an Instagram post page""" 24 | 25 | _Mapping = _PostMapping 26 | SUPPORTED_DOWNLOAD_EXTENSIONS = [".mp3", ".mp4", ".png", ".jpg"] 27 | 28 | def scrape( 29 | self, 30 | mapping=None, 31 | keys: List[str] = None, 32 | exclude: List[str] = None, 33 | headers={ 34 | "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57" 35 | }, 36 | inplace=True, 37 | session=None, 38 | webdriver=None 39 | ) -> None: 40 | """ 41 | Scrape data from the source 42 | 43 | Parameters 44 | ---------- 45 | mapping : Dict[str, deque] 46 | Dictionary of parsing queue's that tell the JSON engine how to 47 | process the JSON data 48 | keys : List[str] 49 | List of strings that correspond to desired attributes for scraping 50 | exclude : List[str] 51 | List of strings that correspond to which attributes to exclude from 52 | being scraped 53 | headers : Dict[str, str] 54 | Dictionary of request headers to be passed on the GET request 55 | inplace : bool 56 | Determines if data modified inplace or return a new object with the 57 | scraped data 58 | session : requests.Session 59 | Session for making the GET request 60 | webdriver : selenium.webdriver.chrome.webdriver.WebDriver 61 | Webdriver for scraping the page, overrides any default or passed 62 | session 63 | 64 | Returns 65 | ------- 66 | return_instance 67 | Optionally returns a scraped instance instead of modifying inplace 68 | if inplace arg is True 69 | """ 70 | # pylint: disable=no-member, attribute-defined-outside-init 71 | if hasattr(self, "shortcode"): 72 | self.source = self.shortcode 73 | return_instance = super().scrape( 74 | mapping=mapping, 75 | keys=keys, 76 | exclude=exclude, 77 | headers=headers, 78 | inplace=inplace, 79 | session=session, 80 | webdriver=webdriver 81 | ) 82 | if return_instance is None: 83 | return_instance = self 84 | 85 | # HACK: This isn't a very clean solution and there is certainly a better 86 | # way to deal with returning a Post object with only partial data 87 | if hasattr(return_instance, "timestamp"): 88 | return_instance.upload_date = datetime.datetime.fromtimestamp(return_instance.timestamp) 89 | if hasattr(return_instance, "shortcode"): 90 | return_instance.url = self._url_from_suburl(return_instance.shortcode) 91 | 92 | if mapping is None: 93 | return_instance.tagged_users = return_instance._parse_tagged_users(return_instance.json_dict) 94 | return_instance.hashtags = return_instance._parse_hashtags(return_instance.caption) if isinstance(return_instance.caption, str) else float("nan") 95 | try: 96 | if math.isnan(return_instance.full_name): 97 | return_instance.full_name = return_instance.flat_json_dict["full_name"] 98 | except TypeError: 99 | pass 100 | return return_instance if return_instance is not self else None 101 | 102 | def download(self, fp: str) -> None: 103 | """ 104 | Download an image or video from a post to your local machine at the given filepath 105 | 106 | Parameters 107 | ---------- 108 | fp : str 109 | Filepath to download the image to 110 | """ 111 | # pylint: disable=no-member 112 | 113 | ext = pathlib.Path(fp).suffix 114 | if ext not in self.SUPPORTED_DOWNLOAD_EXTENSIONS: 115 | raise NameError( 116 | f"{ext} is not a supported file extension. Please use {', '.join(self.SUPPORTED_DOWNLOAD_EXTENSIONS)}" 117 | ) 118 | url = self.video_url if self.is_video else self.display_url 119 | 120 | resp = requests.get(url, stream=True) 121 | if not self.is_video: 122 | self._download_photo(fp, resp) 123 | else: 124 | self._download_video(fp, resp) 125 | 126 | def get_recent_comments(self) -> List[Comment]: 127 | """ 128 | Returns a list of Comment objects that contain data regarding 129 | some of the posts comments 130 | 131 | Returns 132 | ------- 133 | comments_arr : List[Comment] 134 | List of Comment objects 135 | """ 136 | list_of_dicts = self.json_dict["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"][ 137 | "edge_media_to_parent_comment" 138 | ]["edges"] 139 | comments_arr = [Comment(comment_dict) for comment_dict in list_of_dicts] 140 | return comments_arr 141 | 142 | def embed(self) -> str: 143 | """ 144 | Return embeddable HTML str for this post 145 | 146 | Returns 147 | ------- 148 | html_template : str 149 | HTML string with embed markup for this Post 150 | """ 151 | html_template = f'
View this post on Instagram

A post shared by {self.full_name} (@{self.username})

' 152 | return html_template 153 | 154 | @staticmethod 155 | def _url_from_suburl(suburl: str) -> str: 156 | return f"https://www.instagram.com/p/{suburl}/" 157 | 158 | def _download_photo(self, fp: str, resp: requests.models.Response) -> None: 159 | with open(fp, "wb") as outfile: 160 | resp.raw.decode_content = True 161 | shutil.copyfileobj(resp.raw, outfile) 162 | 163 | def _download_video(self, fp: str, resp: requests.models.Response) -> None: 164 | """Write the media to file at given fp from the response""" 165 | with open(fp, "wb") as outfile: 166 | for chunk in resp.iter_content(chunk_size=1024): 167 | if chunk: 168 | outfile.write(chunk) 169 | outfile.flush() 170 | 171 | def _parse_tagged_users(self, json_dict: dict) -> List[str]: 172 | """Parse the tagged users from JSON dict containing the tagged users""" 173 | if "graphql" in json_dict: 174 | json_dict = [json_dict] 175 | json_dict = {"PostPage": json_dict} 176 | json_dict = {"entry_data": json_dict} 177 | tagged_arr = json_dict["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"]["edge_media_to_tagged_user"][ 178 | "edges" 179 | ] 180 | return [node["node"]["user"]["username"] for node in tagged_arr] 181 | 182 | def _parse_hashtags(self, caption: str) -> List[str]: 183 | """Parse the hastags from the post's caption using regex""" 184 | pattern = r"#(\w+)" 185 | return re.findall(pattern, caption) 186 | -------------------------------------------------------------------------------- /instascrape/scrapers/profile.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import time 4 | 5 | from typing import List 6 | 7 | from bs4 import BeautifulSoup 8 | 9 | from instascrape.core._mappings import _PostMapping, _ProfileMapping 10 | from instascrape.core._static_scraper import _StaticHtmlScraper 11 | from instascrape.scrapers.post import Post 12 | 13 | class Profile(_StaticHtmlScraper): 14 | """Scraper for an Instagram profile page""" 15 | 16 | _Mapping = _ProfileMapping 17 | 18 | def get_recent_posts(self, amt: int = 12) -> List[Post]: 19 | """ 20 | Return a list of the profiles recent posts. Max available for return 21 | is 12. 22 | 23 | Parameters 24 | ---------- 25 | amt : int 26 | Amount of recent posts to return 27 | 28 | Returns 29 | ------- 30 | posts : List[Post] 31 | List containing the recent 12 posts and their available data 32 | """ 33 | if amt > 12: 34 | raise IndexError( 35 | f"{amt} is too large, 12 is max available posts. Getting more posts will require an out-of-the-box extension." 36 | ) 37 | posts = [] 38 | try: 39 | post_arr = self.json_dict["entry_data"]["ProfilePage"][0]["graphql"]["user"][ 40 | "edge_owner_to_timeline_media" 41 | ]["edges"] 42 | except TypeError: 43 | raise ValueError( 44 | "Can't return posts without first scraping the Profile. Call the scrape method on your object first." 45 | ) 46 | 47 | for post in post_arr[:amt]: 48 | json_dict = post["node"] 49 | mapping = _PostMapping.post_from_profile_mapping() 50 | post = Post(json_dict) 51 | post.scrape(mapping=mapping) 52 | post.username = self.username 53 | post.full_name = self.full_name 54 | posts.append(post) 55 | return posts 56 | 57 | def get_posts(self, webdriver, amount=None, login_first=False, login_pause=60, max_failed_scroll=300, scrape=False, scrape_pause=5): 58 | """ 59 | Return Post objects from profile scraped using a webdriver (not included) 60 | 61 | Parameters 62 | ---------- 63 | webdriver : selenium.webdriver.chrome.webdriver.WebDriver 64 | Selenium webdriver for rendering JavaScript and loading dynamic 65 | content 66 | amount : int 67 | Amount of posts to return, default is all of them 68 | login_first : bool 69 | Start on login page to allow user to manually login to Instagram 70 | login_pause : int 71 | Length of time in seconds to pause before starting scrape 72 | max_failed_scroll : int 73 | Maximum amount of scroll attempts before stopping if scroll is stuck 74 | scrape : bool 75 | Scrape posts with the webdriver prior to returning 76 | scrape_pause : int 77 | Time in seconds between each scrape 78 | 79 | Returns 80 | ------- 81 | posts : List[Post] 82 | Post objects gathered from the profile page 83 | """ 84 | 85 | JS_SCROLL_SCRIPT = "window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;" 86 | JS_PAGE_LENGTH_SCRIPT = "var lenOfPage=document.body.scrollHeight; return lenOfPage;" 87 | 88 | # Determine how many posts are available on the page 89 | try: 90 | posts_len = self.posts 91 | if amount is None: 92 | amount = posts_len 93 | if amount > posts_len: 94 | raise ValueError(f"{amount} posts requested but {self.username} only has {posts_len} posts") 95 | except AttributeError: 96 | raise AttributeError(f"{type(self)} must be scraped first") 97 | 98 | # Manual login 99 | if login_first: 100 | webdriver.get("https://www.instagram.com") 101 | time.sleep(login_pause) 102 | 103 | # Get profile page 104 | webdriver.get(self.url) 105 | 106 | # Continuously scroll, collect HTML, and parse Post objects 107 | posts = [] 108 | shortcodes = [] 109 | scroll_attempts = 0 110 | last_position = webdriver.execute_script(JS_PAGE_LENGTH_SCRIPT) 111 | scrolling = True 112 | while scrolling: 113 | current_position = webdriver.execute_script(JS_SCROLL_SCRIPT) 114 | source_data = webdriver.page_source 115 | found_posts = self._separate_posts(source_data) 116 | 117 | # Append found posts into total posts 118 | for post in found_posts: 119 | if post.source not in shortcodes: 120 | shortcodes.append(post.source) 121 | posts.append(post) 122 | 123 | # If scroll is stuck and exceeds max allowed attempts, exit loop 124 | if current_position == last_position: 125 | scroll_attempts += 1 126 | if scroll_attempts > max_failed_scroll: 127 | scrolling = False 128 | else: 129 | scroll_attempts = 0 130 | last_position = current_position 131 | 132 | current_post_len = len(posts) 133 | if (current_post_len >= posts_len) or (current_post_len >= amount): 134 | break 135 | 136 | # Remove excess posts from right of list 137 | posts = posts[:amount] 138 | 139 | # If scrape arg is True, scrape all posts using webdriver 140 | scraped_posts = [] 141 | if scrape: 142 | for post in posts: 143 | scraped_posts.append(post.scrape(inplace=False, webdriver=webdriver)) 144 | time.sleep(scrape_pause) 145 | posts = scraped_posts 146 | 147 | return posts 148 | 149 | def _separate_posts(self, source_data): 150 | """Separate the HTML and parse out BeautifulSoup for every post""" 151 | post_soup = [] 152 | 153 | soup = BeautifulSoup(source_data, features="lxml") 154 | anchor_tags = soup.find_all("a") 155 | post_tags = [tag for tag in anchor_tags if tag.find( 156 | "div", {"class": "eLAPa"})] 157 | 158 | #Filter new posts that have not been stored yet 159 | new_posts = [tag for tag in post_tags if tag not in post_soup] 160 | post_soup += new_posts 161 | 162 | return self._create_post_objects(post_soup) 163 | 164 | def _create_post_objects(self, post_soup): 165 | """Create a Post object from the given shortcode""" 166 | posts = [] 167 | for post in post_soup: 168 | shortcode = post["href"].replace("/p/", "")[:-1] 169 | posts.append(Post(shortcode)) 170 | return posts 171 | 172 | def _url_from_suburl(self, suburl): 173 | return f"https://www.instagram.com/{suburl}/" 174 | -------------------------------------------------------------------------------- /instascrape/scrapers/reel.py: -------------------------------------------------------------------------------- 1 | from instascrape.scrapers.post import Post 2 | from instascrape.core._mappings import _ReelMapping 3 | 4 | 5 | class Reel(Post): 6 | """Scraper for an Instagram reel""" 7 | 8 | _Mapping = _ReelMapping 9 | 10 | @staticmethod 11 | def _url_from_suburl(suburl: str) -> str: 12 | return f"https://www.instagram.com/reel/{suburl}/" 13 | -------------------------------------------------------------------------------- /instascrape/scrapers/scrape_tools.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | from typing import Any, Dict, Union, Callable, List 5 | from collections import deque 6 | import datetime 7 | from functools import partial 8 | import copy 9 | import time 10 | 11 | import requests 12 | from bs4 import BeautifulSoup 13 | 14 | from instascrape.core.json_algos import _JSONTree, _parse_json_str 15 | 16 | JSONDict = Dict[str, Any] 17 | 18 | def parse_data_from_json(json_dict, map_dict, default_value=float('nan')): 19 | """ 20 | Parse data from a JSON dictionary using a mapping dictionary that tells 21 | the program how to parse the data 22 | """ 23 | return_data = {} 24 | for key in map_dict: 25 | steps_to_value = map_dict[key] 26 | 27 | # Loop through all steps into the JSON dict that will give us our data 28 | first_step = steps_to_value.popleft() 29 | try: 30 | value = json_dict[first_step] 31 | except KeyError: 32 | value = default_value 33 | else: 34 | for step in steps_to_value: 35 | value = json_dict[step] 36 | finally: 37 | return_data[key] = value 38 | return return_data 39 | 40 | def flatten_dict(json_dict: JSONDict) -> JSONDict: 41 | """ 42 | Returns a flattened dictionary of data 43 | 44 | Parameters 45 | ---------- 46 | json_dict : dict 47 | Input dictionary for flattening 48 | 49 | Returns 50 | ------- 51 | flattened_dict : dict 52 | Flattened dictionary 53 | """ 54 | json_tree = _JSONTree(json_dict) 55 | flattened_dict = {} 56 | for leaf_node in json_tree.leaf_nodes: 57 | key_arr = deque([]) 58 | for key in leaf_node.prior_keys[::-1]: 59 | key_arr.appendleft(str(key)) 60 | new_key = "_".join(key_arr) 61 | if new_key not in flattened_dict: 62 | break 63 | flattened_dict[new_key] = list(leaf_node.json_data.values())[0] 64 | return flattened_dict 65 | 66 | def json_from_html(source: Union[str, "BeautifulSoup"], as_dict: bool = True, flatten=False) -> Union[JSONDict, str]: 67 | """ 68 | Return JSON data parsed from Instagram source HTML 69 | 70 | Parameters 71 | ---------- 72 | source : Union[str, BeautifulSoup] 73 | Instagram HTML source code to parse the JSON from 74 | as_dict : bool = True 75 | Return JSON as dict if True else return JSON as string 76 | flatten : bool 77 | Flatten the dictionary prior to returning it 78 | 79 | Returns 80 | ------- 81 | json_data : Union[JSONDict, str] 82 | Parsed JSON data from the HTML source as either a JSON-like dictionary 83 | or just the string serialization 84 | """ 85 | 86 | soup = BeautifulSoup(source, features="html.parser") 87 | json_data = json_from_soup(source=soup, as_dict=as_dict, flatten=flatten) 88 | return json_data 89 | 90 | def json_from_soup(source, as_dict: bool = True, flatten=False): 91 | json_data = _parse_json_str(source=source) 92 | 93 | if as_dict: 94 | json_data = [json.loads(json_str) for json_str in json_data] 95 | if flatten: 96 | json_data = [flatten_dict(json_dict) for json_dict in json_data] 97 | 98 | return json_data 99 | 100 | def determine_json_type(json_data: Union[JSONDict, str]) -> str: 101 | """ 102 | Return the type of Instagram page based on the JSON data parsed from source 103 | 104 | Parameters 105 | ---------- 106 | json_data: Union[JSONDict, str] 107 | JSON data that will be checked and parsed to determine what type of page 108 | the program is looking at (Profile, Post, Hashtag, etc) 109 | 110 | Returns 111 | ------- 112 | instagram_type : str 113 | Name of the type of page the program is currently parsing or looking at 114 | """ 115 | if not isinstance(json_data, dict): 116 | json_data = json.loads(json_data) 117 | try: 118 | instagram_type = list(json_data["entry_data"])[0] 119 | except KeyError: 120 | instagram_type = "Inconclusive" 121 | return instagram_type 122 | 123 | def json_from_url( 124 | url: str, 125 | as_dict: bool = True, 126 | headers={ 127 | "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57" 128 | }, 129 | flatten=False 130 | ) -> Union[JSONDict, str]: 131 | """ 132 | Return JSON data parsed from a provided Instagram URL 133 | 134 | Parameters 135 | ---------- 136 | url : str 137 | URL of the page to get the JSON data from 138 | as_dict : bool = True 139 | Return JSON as dict if True else return JSON as string 140 | headers : Dict[str, str] 141 | Dictionary of request headers to be passed on the GET request 142 | flatten : bool 143 | Flatten the dictionary prior to returning it 144 | 145 | Returns 146 | ------- 147 | json_data : Union[JSONDict, str] 148 | Parsed JSON data from the URL as either a JSON-like dictionary 149 | or just the string serialization 150 | """ 151 | source = requests.get(url, headers=headers).text 152 | return json_from_html(source, as_dict=as_dict, flatten=flatten) 153 | 154 | 155 | def scrape_posts( 156 | posts: List["Post"], 157 | session: requests.Session = None, 158 | webdriver: "selenium.webdriver.chrome.webdriver.WebDriver" = None, 159 | limit: Union[int, datetime.datetime] = None, 160 | headers: dict = { 161 | "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57" 162 | }, 163 | pause: int = 5, 164 | on_exception: str = "raise", 165 | silent: bool = True, 166 | inplace: bool = False 167 | ): 168 | 169 | # Default setup 170 | if not inplace: 171 | posts = copy.deepcopy(posts) 172 | if limit is None: 173 | limit = len(posts) 174 | 175 | scraped_posts = [] 176 | for i, post in enumerate(posts): 177 | temporary_post = copy.deepcopy(post) 178 | try: 179 | post.scrape(session=session, webdriver=webdriver, headers=headers) 180 | scraped_posts.append(post) 181 | except Exception as e: 182 | if on_exception == "raise": 183 | raise 184 | elif on_exception == "pass": 185 | if not silent: 186 | print(f"PASSING EXCEPTION: {e}") 187 | pass 188 | elif on_exception == "return": 189 | if not silent: 190 | print(f"{e}, RETURNING SCRAPED AND UNSCRAPED") 191 | break 192 | if not silent: 193 | output_str = f"{i}: {post.shortcode} - {post.upload_date}" 194 | print(output_str) 195 | if _stop_scraping(limit, post, i): 196 | break 197 | time.sleep(pause) 198 | 199 | unscraped_posts = list(set(posts) - set(scraped_posts)) 200 | if not isinstance(limit, int): 201 | scraped_posts.pop() 202 | unscraped_posts.insert(0, temporary_post) 203 | 204 | return scraped_posts, unscraped_posts if not inplace else None 205 | 206 | def _stop_scraping(limit, post, i): 207 | stop = False 208 | if isinstance(limit, int): 209 | if i == limit - 1: 210 | stop = True 211 | elif (isinstance(limit, datetime.datetime) or isinstance(limit, datetime.date)): 212 | if post.upload_date <= limit: 213 | stop = True 214 | return stop 215 | -------------------------------------------------------------------------------- /media/6x6scatter_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/6x6scatter_matrix.png -------------------------------------------------------------------------------- /media/instascrape.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/instascrape.gif -------------------------------------------------------------------------------- /media/likes_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/likes_heatmap.png -------------------------------------------------------------------------------- /media/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/logo.png -------------------------------------------------------------------------------- /media/logopic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/logopic.png -------------------------------------------------------------------------------- /media/realpython.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/realpython.png -------------------------------------------------------------------------------- /media/scatter_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/scatter_matrix.png -------------------------------------------------------------------------------- /media/techprofiles.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/media/techprofiles.gif -------------------------------------------------------------------------------- /pypi.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Automate the process of uploading package to PyPI 4 | 5 | #Delete older version build directories 6 | directories=("dist/" "build/" "insta_scrape.egg-info/") 7 | for d in ${directories[@]}; 8 | do 9 | if [ -d $d ]; 10 | then 11 | rm -rf $d 12 | echo "$d deleted!" 13 | fi 14 | done 15 | 16 | #Setup package and initiate upload to PyPI 17 | python3 setup.py sdist bdist_wheel 18 | twine upload dist/* 19 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length=120 -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | # pytest 2 | [pytest] 3 | markers = 4 | file_io: Involves file I/O 5 | 6 | addopts = --strict-markers 7 | 8 | filterwarnings = 9 | ignore::DeprecationWarning 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.9.3 2 | certifi==2020.6.20 3 | chardet==3.0.4 4 | idna==2.10 5 | lxml==4.6.2 6 | pytest==6.1.1 7 | requests==2.24.0 8 | soupsieve==2.0.1 9 | urllib3==1.25.10 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="insta-scrape", 8 | version="2.1.2", 9 | author="Chris Greening", 10 | author_email="chris@christophergreening.com", 11 | description="Super lightweight Instagram web scraper for data analysis", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/chris-greening/instascrape", 15 | packages=["instascrape", "instascrape.core", "instascrape.scrapers", "instascrape.exceptions"], 16 | install_requires=["requests", "beautifulsoup4"], 17 | classifiers=[ 18 | "Programming Language :: Python :: 3", 19 | "License :: OSI Approved :: MIT License", 20 | "Operating System :: OS Independent", 21 | ], 22 | python_requires=">=3.7", 23 | ) 24 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tests/__init__.py -------------------------------------------------------------------------------- /tests/scrapers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tests/scrapers/__init__.py -------------------------------------------------------------------------------- /tests/scrapers/test_hashtag.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import os 4 | 5 | from bs4 import BeautifulSoup 6 | import pytest 7 | import requests 8 | 9 | from instascrape import Hashtag 10 | 11 | 12 | class TestHashtag: 13 | 14 | @pytest.fixture 15 | def url(self): 16 | return "https://www.instagram.com/tags/kotlin/" 17 | 18 | @pytest.fixture(scope="session") 19 | def headers(self): 20 | return {"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57", 21 | "cookie": f"sessionid={os.environ.get('sessionid')};"} 22 | 23 | @pytest.fixture 24 | def get_request(self, url, headers): 25 | return requests.get(url, headers=headers) 26 | 27 | @pytest.fixture 28 | def page_instance(self, url, headers): 29 | random_hashtag = Hashtag(url) 30 | random_hashtag.scrape(headers=headers) 31 | return random_hashtag 32 | 33 | def test_from_html(self, get_request, page_instance): 34 | hashtag_html = get_request.text 35 | hashtag_obj = Hashtag(hashtag_html) 36 | hashtag_obj.scrape() 37 | assert hashtag_obj.amount_of_posts == page_instance.amount_of_posts 38 | 39 | def test_from_soup(self, get_request, page_instance): 40 | hashtag_html = get_request.text 41 | hashtag_soup = BeautifulSoup(hashtag_html, features='lxml') 42 | hashtag_obj = Hashtag(hashtag_soup) 43 | hashtag_obj.scrape() 44 | assert hashtag_obj.amount_of_posts == page_instance.amount_of_posts 45 | 46 | def test_to_dict(self, page_instance): 47 | assert isinstance(page_instance.to_dict(), dict) 48 | 49 | @pytest.mark.file_io 50 | def test_to_json(self, page_instance, tmpdir): 51 | file = tmpdir.join("data.json") 52 | page_instance.to_json(fp=str(file)) 53 | with open(str(file), "r") as injson: 54 | json_dict = json.load(injson) 55 | assert page_instance['name'] == json_dict['name'] 56 | 57 | @pytest.mark.file_io 58 | def test_to_csv(self, page_instance, tmpdir): 59 | 60 | # write to CSV 61 | file = tmpdir.join("data.csv") 62 | page_instance.to_csv(fp=str(file)) 63 | 64 | # reread the csv 65 | with open(str(file), mode="r") as infile: 66 | reader = csv.reader(infile) 67 | csv_dict = {row[0]: row[1] for row in reader} 68 | 69 | assert page_instance['name'] == csv_dict['name'] 70 | -------------------------------------------------------------------------------- /tests/scrapers/test_igtv.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import datetime 3 | import json 4 | import re 5 | import os 6 | 7 | import pytest 8 | from bs4 import BeautifulSoup 9 | import requests 10 | 11 | from instascrape import IGTV 12 | 13 | 14 | class TestIGTV: 15 | @pytest.fixture 16 | def url(self): 17 | return "https://www.instagram.com/tv/CIrIIMYl8VQ/" 18 | 19 | @pytest.fixture 20 | def get_request(self, url): 21 | return requests.get(url, headers={"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57"}) 22 | 23 | @pytest.fixture 24 | def page_instance(self, url): 25 | random_google_igtv = IGTV(url) 26 | random_google_igtv.scrape() 27 | return random_google_igtv 28 | 29 | def test_from_html(self, get_request, page_instance): 30 | igtv_html = get_request.text 31 | igtv_obj = IGTV(igtv_html) 32 | igtv_obj.scrape() 33 | assert igtv_obj.likes == page_instance.likes 34 | 35 | def test_from_soup(self, get_request, page_instance): 36 | igtv_html = get_request.text 37 | igtv_soup = BeautifulSoup(igtv_html, features='lxml') 38 | igtv_obj = IGTV(igtv_soup) 39 | igtv_obj.scrape() 40 | assert igtv_obj.likes == page_instance.likes 41 | 42 | def test_to_dict(self, page_instance): 43 | assert isinstance(page_instance.to_dict(), dict) 44 | 45 | def test_embed(self, page_instance): 46 | html_embed = page_instance.embed() 47 | embed_copied_from_instagram = '
View this post on Instagram

A post shared by Google (@google)

' 48 | assert html_embed == embed_copied_from_instagram 49 | 50 | @pytest.mark.file_io 51 | def test_to_json(self, page_instance, tmpdir): 52 | file = tmpdir.join("data.json") 53 | page_instance.to_json(fp=str(file)) 54 | with open(str(file), "r") as injson: 55 | json_dict = json.load(injson) 56 | assert page_instance['shortcode'] == json_dict['shortcode'] 57 | 58 | @pytest.mark.file_io 59 | def test_to_csv(self, page_instance, tmpdir): 60 | 61 | # write to CSV 62 | file = tmpdir.join("data.csv") 63 | page_instance.to_csv(fp=str(file)) 64 | 65 | # reread the csv 66 | with open(str(file), mode="r") as infile: 67 | reader = csv.reader(infile) 68 | csv_dict = {row[0]: row[1] for row in reader} 69 | 70 | assert page_instance['shortcode'] == csv_dict['shortcode'] 71 | 72 | @pytest.mark.file_io 73 | def test_download_photo(self, page_instance, tmpdir): 74 | 75 | # donwload photo 76 | file = tmpdir.join("image.jpg") 77 | page_instance.download(fp=str(file)) 78 | assert os.path.exists(file) 79 | -------------------------------------------------------------------------------- /tests/scrapers/test_location.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import os 4 | 5 | from bs4 import BeautifulSoup 6 | import pytest 7 | import requests 8 | 9 | from instascrape import Location 10 | 11 | class TestHashtag: 12 | 13 | @pytest.fixture 14 | def url(self): 15 | return "https://www.instagram.com/explore/locations/212918601/grand-central-terminal/" 16 | 17 | @pytest.fixture(scope="session") 18 | def headers(self): 19 | return {"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57", 20 | "cookie": f"sessionid={os.environ.get('sessionid')};"} 21 | 22 | @pytest.fixture 23 | def get_request(self, url, headers): 24 | return requests.get(url, headers=headers) 25 | 26 | @pytest.fixture 27 | def page_instance(self, url, headers): 28 | random_location = Location(url) 29 | random_location.scrape(headers=headers) 30 | return random_location 31 | 32 | def test_from_html(self, get_request, page_instance): 33 | location_html = get_request.text 34 | location_obj = Location(location_html) 35 | location_obj.scrape() 36 | assert location_obj.amount_of_posts == page_instance.amount_of_posts 37 | 38 | def test_from_soup(self, get_request, page_instance): 39 | location_html = get_request.text 40 | location_soup = BeautifulSoup(location_html, features='lxml') 41 | location_obj = Location(location_soup) 42 | location_obj.scrape() 43 | assert location_obj.amount_of_posts == page_instance.amount_of_posts 44 | 45 | def test_to_dict(self, page_instance): 46 | assert isinstance(page_instance.to_dict(), dict) 47 | 48 | @pytest.mark.file_io 49 | def test_to_json(self, page_instance, tmpdir): 50 | file = tmpdir.join("data.json") 51 | page_instance.to_json(fp=str(file)) 52 | with open(str(file), "r") as injson: 53 | json_dict = json.load(injson) 54 | assert page_instance['name'] == json_dict['name'] 55 | 56 | @pytest.mark.file_io 57 | def test_to_csv(self, page_instance, tmpdir): 58 | 59 | # write to CSV 60 | file = tmpdir.join("data.csv") 61 | page_instance.to_csv(fp=str(file)) 62 | 63 | # reread the csv 64 | with open(str(file), mode="r") as infile: 65 | reader = csv.reader(infile) 66 | csv_dict = {row[0]: row[1] for row in reader} 67 | 68 | assert page_instance['name'] == csv_dict['name'] 69 | -------------------------------------------------------------------------------- /tests/scrapers/test_post.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import datetime 3 | import json 4 | import re 5 | import os 6 | 7 | import pytest 8 | from bs4 import BeautifulSoup 9 | import requests 10 | 11 | from instascrape import Post 12 | 13 | 14 | class TestPost: 15 | @pytest.fixture 16 | def url(self): 17 | return "https://www.instagram.com/p/CJpBmOtAmNr/" 18 | 19 | @pytest.fixture(scope="session") 20 | def headers(self): 21 | return {"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57", 22 | "cookie": f"sessionid={os.environ.get('sessionid')};"} 23 | 24 | @pytest.fixture 25 | def get_request(self, url, headers): 26 | return requests.get(url, headers=headers) 27 | 28 | @pytest.fixture 29 | def page_instance(self, url, headers): 30 | random_google_post = Post(url) 31 | random_google_post.scrape(headers=headers) 32 | return random_google_post 33 | 34 | def test_from_html(self, get_request, page_instance): 35 | post_html = get_request.text 36 | post_obj = Post(post_html) 37 | post_obj.scrape() 38 | assert post_obj.likes == page_instance.likes 39 | 40 | def test_from_soup(self, get_request, page_instance): 41 | post_html = get_request.text 42 | post_soup = BeautifulSoup(post_html, features='lxml') 43 | post_obj = Post(post_soup) 44 | post_obj.scrape() 45 | assert post_obj.likes == page_instance.likes 46 | 47 | def test_to_dict(self, page_instance): 48 | assert isinstance(page_instance.to_dict(), dict) 49 | 50 | def test_embed(self, page_instance): 51 | html_embed = page_instance.embed() 52 | embed_copied_from_instagram = '
View this post on Instagram

A post shared by Chris Greening (@chris_greening)

' 53 | assert html_embed == embed_copied_from_instagram 54 | 55 | @pytest.mark.file_io 56 | def test_to_json(self, page_instance, tmpdir): 57 | file = tmpdir.join("data.json") 58 | page_instance.to_json(fp=str(file)) 59 | with open(str(file), "r") as injson: 60 | json_dict = json.load(injson) 61 | assert page_instance['shortcode'] == json_dict['shortcode'] 62 | 63 | @pytest.mark.file_io 64 | def test_to_csv(self, page_instance, tmpdir): 65 | 66 | # write to CSV 67 | file = tmpdir.join("data.csv") 68 | page_instance.to_csv(fp=str(file)) 69 | 70 | # reread the csv 71 | with open(str(file), mode="r") as infile: 72 | reader = csv.reader(infile) 73 | csv_dict = {row[0]: row[1] for row in reader} 74 | 75 | assert page_instance['shortcode'] == csv_dict['shortcode'] 76 | 77 | @pytest.mark.file_io 78 | def test_download_photo(self, page_instance, tmpdir): 79 | 80 | # donwload photo 81 | file = tmpdir.join("image.jpg") 82 | page_instance.download(fp=str(file)) 83 | assert os.path.exists(file) 84 | -------------------------------------------------------------------------------- /tests/scrapers/test_profile.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import os 4 | 5 | import pytest 6 | from bs4 import BeautifulSoup 7 | import requests 8 | 9 | from instascrape import Post, Profile 10 | 11 | 12 | class TestProfile: 13 | 14 | @pytest.fixture 15 | def url(self): 16 | return "https://www.instagram.com/chris_greening/" 17 | 18 | @pytest.fixture(scope="session") 19 | def headers(self): 20 | return {"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57", 21 | "cookie": f"sessionid={os.environ.get('sessionid')};"} 22 | 23 | @pytest.fixture 24 | def get_request(self, url, headers): 25 | return requests.get(url, headers=headers) 26 | 27 | @pytest.fixture 28 | def page_instance(self, url, headers): 29 | random_profile = Profile(url) 30 | random_profile.scrape(headers=headers) 31 | return random_profile 32 | 33 | def test_from_html(self, get_request, page_instance): 34 | profile_html = get_request.text 35 | profile_obj = Profile(profile_html) 36 | profile_obj.scrape() 37 | assert profile_obj.followers == page_instance.followers 38 | 39 | def test_from_soup(self, get_request, page_instance): 40 | profile_html = get_request.text 41 | profile_soup = BeautifulSoup(profile_html, features='lxml') 42 | profile_obj = Profile(profile_soup) 43 | profile_obj.scrape() 44 | assert profile_obj.followers == page_instance.followers 45 | 46 | def test_to_dict(self, page_instance): 47 | assert isinstance(page_instance.to_dict(), dict) 48 | 49 | def test_get_recent_posts(self, page_instance): 50 | posts = page_instance.get_recent_posts(amt=6) 51 | assert len(posts) == 6 52 | assert all([type(post) is Post for post in posts]) 53 | assert all([hasattr(post, "id") for post in posts]) 54 | 55 | @pytest.mark.file_io 56 | def test_to_json(self, page_instance, tmpdir): 57 | file = tmpdir.join("data.json") 58 | page_instance.to_json(fp=str(file)) 59 | with open(str(file), "r") as injson: 60 | json_dict = json.load(injson) 61 | assert page_instance['username'] == json_dict['username'] 62 | 63 | @pytest.mark.file_io 64 | def test_to_csv(self, page_instance, tmpdir): 65 | 66 | # write to CSV 67 | file = tmpdir.join("data.csv") 68 | page_instance.to_csv(fp=str(file)) 69 | 70 | # reread the csv 71 | with open(str(file), mode="r") as infile: 72 | reader = csv.reader(infile) 73 | csv_dict = {row[0]: row[1] for row in reader} 74 | 75 | assert page_instance['username'] == csv_dict['username'] 76 | -------------------------------------------------------------------------------- /tests/scrapers/test_reel.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import datetime 3 | import json 4 | import re 5 | import os 6 | 7 | import pytest 8 | from bs4 import BeautifulSoup 9 | import requests 10 | 11 | from instascrape import Reel 12 | 13 | 14 | class TestReel: 15 | @pytest.fixture 16 | def url(self): 17 | return "https://www.instagram.com/reel/CIrJSrFFHM_/" 18 | 19 | @pytest.fixture 20 | def get_request(self, url): 21 | return requests.get(url, headers={"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57"}) 22 | 23 | @pytest.fixture 24 | def page_instance(self, url): 25 | random_google_reel = Reel(url) 26 | random_google_reel.scrape() 27 | return random_google_reel 28 | 29 | def test_from_html(self, get_request, page_instance): 30 | reel_html = get_request.text 31 | reel_obj = Reel(reel_html) 32 | reel_obj.scrape() 33 | assert reel_obj.likes == page_instance.likes 34 | 35 | def test_from_soup(self, get_request, page_instance): 36 | reel_html = get_request.text 37 | reel_soup = BeautifulSoup(reel_html, features='lxml') 38 | reel_obj = Reel(reel_soup) 39 | reel_obj.scrape() 40 | assert reel_obj.likes == page_instance.likes 41 | 42 | def test_to_dict(self, page_instance): 43 | assert isinstance(page_instance.to_dict(), dict) 44 | 45 | def test_embed(self, page_instance): 46 | html_embed = page_instance.embed() 47 | embed_copied_from_instagram = '
View this post on Instagram

A post shared by Google (@google)

' 48 | assert html_embed == embed_copied_from_instagram 49 | 50 | @pytest.mark.file_io 51 | def test_to_json(self, page_instance, tmpdir): 52 | file = tmpdir.join("data.json") 53 | page_instance.to_json(fp=str(file)) 54 | with open(str(file), "r") as injson: 55 | json_dict = json.load(injson) 56 | assert page_instance['shortcode'] == json_dict['shortcode'] 57 | 58 | @pytest.mark.file_io 59 | def test_to_csv(self, page_instance, tmpdir): 60 | 61 | # write to CSV 62 | file = tmpdir.join("data.csv") 63 | page_instance.to_csv(fp=str(file)) 64 | 65 | # reread the csv 66 | with open(str(file), mode="r") as infile: 67 | reader = csv.reader(infile) 68 | csv_dict = {row[0]: row[1] for row in reader} 69 | 70 | assert page_instance['shortcode'] == csv_dict['shortcode'] 71 | 72 | @pytest.mark.file_io 73 | def test_download_photo(self, page_instance, tmpdir): 74 | 75 | # donwload photo 76 | file = tmpdir.join("image.jpg") 77 | page_instance.download(fp=str(file)) 78 | assert os.path.exists(file) 79 | -------------------------------------------------------------------------------- /tutorial/examples/DonaldTrump/plots/comments_per_post.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/comments_per_post.png -------------------------------------------------------------------------------- /tutorial/examples/DonaldTrump/plots/hashtags.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/hashtags.png -------------------------------------------------------------------------------- /tutorial/examples/DonaldTrump/plots/likes_per_post.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/likes_per_post.png -------------------------------------------------------------------------------- /tutorial/examples/DonaldTrump/plots/likes_vs_comments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/likes_vs_comments.png -------------------------------------------------------------------------------- /tutorial/examples/DonaldTrump/plots/locations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/locations.png -------------------------------------------------------------------------------- /tutorial/examples/DonaldTrump/plots/views_and_likes_per_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/views_and_likes_per_view.png -------------------------------------------------------------------------------- /tutorial/examples/DonaldTrump/plots/views_per_video.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/DonaldTrump/plots/views_per_video.png -------------------------------------------------------------------------------- /tutorial/examples/JoeBiden/joebiden.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/JoeBiden/joebiden.png -------------------------------------------------------------------------------- /tutorial/examples/JoeBiden/joebiden.py: -------------------------------------------------------------------------------- 1 | # See this tutorial to find your sessionid: 2 | # http://valvepress.com/how-to-get-instagram-session-cookie/ 3 | 4 | from selenium.webdriver import Chrome 5 | from instascrape import Profile, scrape_posts 6 | 7 | # Creating our webdriver 8 | webdriver = Chrome("path/to/chromedriver.exe") 9 | 10 | # Scraping Joe Biden's profile 11 | SESSIONID = 'ENTER_YOUR_SESSION_ID_HERE' 12 | headers = {"user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57", 13 | "cookie": f"sessionid={SESSIONID};"} 14 | joe = Profile("joebiden") 15 | joe.scrape(headers=headers) 16 | 17 | # Scraping the posts 18 | posts = joe.get_posts(webdriver=webdriver, login_first=True) 19 | scraped, unscraped = scrape_posts(posts, silent=False, headers=headers, pause=10) 20 | -------------------------------------------------------------------------------- /tutorial/examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | ## Note: Selenium is required for some of these examples but is not a required dependency for instascrape. To use those examples you will have to install and configure on your own 4 | -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-09-08 09h06m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-08 09h06m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-09-09 10h24m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-09 10h24m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-09-14 10h05m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-14 10h05m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-09-17 17h49m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-17 17h49m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-09-24 11h01m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-24 11h01m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-09-25 10h18m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-25 10h18m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-09-26 11h38m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-26 11h38m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-09-27 09h27m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-27 09h27m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-09-28 12h17m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-09-28 12h17m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-10-14 12h36m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-10-14 12h36m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-10-15 13h11m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-10-15 13h11m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/2020-10-16 14h39m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chris-greening/instascrape/a720355474b2a0506bdbe32dc67f19d464de3556/tutorial/examples/download_recent_photos/2020-10-16 14h39m.png -------------------------------------------------------------------------------- /tutorial/examples/download_recent_photos/download_recent_photos.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Downloading recent photos from a profile\n", 8 | "\n", 9 | "In this example, I'm going to show you how you can quickly download a user's most recently posted photos to your computer using `instascrape`!\n", 10 | "\n", 11 | "To start, we're going to instantiate an `instascrape.Profile` object with a given username (for this example, we'll use my profile [@chris_greening](https://www.instagram.com/chris_greening/)) and load the user's data." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 5, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import instascrape\n", 21 | "chris = instascrape.Profile('chris_greening')\n", 22 | "chris.scrape()" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "Now, to get the user's recent posts, we will use the `get_recent_posts` method" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "chris_posts = chris.get_recent_posts()" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "And now for the main event! We're ready to download the images to our computer now. All we have to do is pass `Post.download` a filepath and it will handle the rest, downloading the image to that filepath. In this case, we will create a filename `fname` that will use every posts upload_date as its name." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 7, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "for post in chris_photos: \n", 55 | " fname = post.upload_date.strftime(\"%Y-%m-%d %Hh%Mm\")\n", 56 | " post.download(f\"{fname}.png\") " 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "That's it! We have now downloaded all of the recent photos from @chris_greening to our computer" 64 | ] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "Python 3.7.4 64-bit ('base': conda)", 70 | "language": "python", 71 | "name": "python37464bitbaseconda5a7fa83a9e6c4e43a4941e0b76e31b98" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.7.4" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 2 88 | } 89 | -------------------------------------------------------------------------------- /tutorial/examples/max_liked_post.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true, 7 | "pycharm": { 8 | "name": "#%% md\n" 9 | } 10 | }, 11 | "source": [ 12 | "#### Import library" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": { 19 | "pycharm": { 20 | "is_executing": false, 21 | "name": "#%%\n" 22 | } 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "from examples.profile_scrape.dynamic_profile import DynamicProfile\n", 27 | " " 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "#### Load a Profile (Accept the cookie warning when it pops out)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": { 41 | "pycharm": { 42 | "is_executing": false, 43 | "name": "#%%\n" 44 | } 45 | }, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "Read 0 posts\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "chris = DynamicProfile.from_username('chris_greening')\n", 57 | "chris.static_load()\n", 58 | "max_post_to_load = 10\n", 59 | "chris.dynamic_load(max_posts=max_post_to_load)\n" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "#### Get URL with max likes" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 3, 72 | "metadata": { 73 | "pycharm": { 74 | "is_executing": false, 75 | "name": "#%% \n" 76 | } 77 | }, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "Likes: 388\n", 84 | "Posts: \n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "max_like = 0\n", 90 | "max_post = 0\n", 91 | "for p in chris.posts:\n", 92 | " if p.data_points[0].likes > max_like:\n", 93 | " max_like = p.data_points[0].likes\n", 94 | " max_post = p\n", 95 | "print('Likes: ', max_like)\n", 96 | "print('Posts: ', max_post)\n" 97 | ] 98 | } 99 | ], 100 | "metadata": { 101 | "kernelspec": { 102 | "display_name": "Python 3", 103 | "language": "python", 104 | "name": "python3" 105 | }, 106 | "language_info": { 107 | "codemirror_mode": { 108 | "name": "ipython", 109 | "version": 3 110 | }, 111 | "file_extension": ".py", 112 | "mimetype": "text/x-python", 113 | "name": "python", 114 | "nbconvert_exporter": "python", 115 | "pygments_lexer": "ipython3", 116 | "version": "3.7.4" 117 | }, 118 | "pycharm": { 119 | "stem_cell": { 120 | "cell_type": "raw", 121 | "metadata": { 122 | "collapsed": false 123 | }, 124 | "source": [] 125 | } 126 | } 127 | }, 128 | "nbformat": 4, 129 | "nbformat_minor": 1 130 | } 131 | -------------------------------------------------------------------------------- /tutorial/examples/simple_hashtag_comparison/simple_hashtag_comparison.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Hashtag Growth Tracker \n", 8 | "The following is a simple example that uses instascrape to track the growth of two hashtag's over a given period and comparing their growth" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import time\n", 18 | "import datetime\n", 19 | "import sys \n", 20 | "import os \n", 21 | "\n", 22 | "import numpy as np\n", 23 | "import matplotlib.pyplot as plt \n", 24 | "import pandas as pd \n", 25 | "\n", 26 | "sys.path.insert(0, os.path.abspath('..'))\n", 27 | "from instascrape import Hashtag" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "We will start by running a loop and scraping data at random intervals until the predefined timeframe has elapsed." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "#Metadata \n", 44 | "TOTAL_TIME = 60 #Total time \n", 45 | "WAIT_TIME = 5 #Mean wait time for random normal distribution\n", 46 | "photography = Hashtag('photography') \n", 47 | "instagram = Hashtag('instagram')\n", 48 | " \n", 49 | "#Create a list of tuples containing data scraped at random \n", 50 | "# intervals during the time period \n", 51 | "current_time = datetime.datetime.now()\n", 52 | "end_time = current_time + datetime.timedelta(seconds=TOTAL_TIME)\n", 53 | "photography_data = []\n", 54 | "instagram_data = []\n", 55 | "while current_time < end_time:\n", 56 | " #Wait for normally randomized amount of time \n", 57 | " rand_time = abs(np.random.normal(WAIT_TIME, 1.5))\n", 58 | " time.sleep(WAIT_TIME)\n", 59 | " \n", 60 | " #Scrape data and append to respective lists \n", 61 | " photography.scrape()\n", 62 | " photography_data.append((datetime.datetime.now(), photography.amount_of_posts))\n", 63 | " instagram.scrape()\n", 64 | " instagram_data.append((datetime.datetime.now(), instagram.amount_of_posts))\n", 65 | " \n", 66 | " #Increment time \n", 67 | " current_time = datetime.datetime.now()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "Now that the data is scraped, we can create some DataFrame's to make manipulation and working with the data easier. " 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "columns = ['time', 'posts']\n", 84 | "photo_df = pd.DataFrame(photography_data, columns=columns)\n", 85 | "insta_df = pd.DataFrame(instagram_data, columns=columns)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "Let's prepare the datasets so that we can plot and see how they're growing. We want them to both start at 0 so we'll subtract all values in the dataframe's by their first value" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 4, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "photo_df['posts'] -= photo_df['posts'].iloc[0]\n", 102 | "insta_df['posts'] -= insta_df['posts'].iloc[0]" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deVhV1frA8e8LoijO84A45KwIKs5zo6ZmZVresszmsttoWbf6NdzbtTIbboNZZmXOU5oNmjmU5qwgOI8oijiBoAgCZ/3+2BtCBQTlcAbez/Oc5+yzh7PfBYfzstdaey0xxqCUUkoB+Lg6AKWUUu5Dk4JSSqksmhSUUkpl0aSglFIqiyYFpZRSWTQpKKWUyqJJQXkNEXlZRL5ydRxKeTLR+xSUpxCRM9lelgFSgQz79SPGmClFH1XhEREDJAMGOA3MAEYZYzLyPDD39+sFfG+MCSy0IJXXK+HqAJTKL2NM2cxlETkAPGiMWeK6iJwixBizR0SaAcuBXcB414akihOtPlJeQ0ReF5Hv7eX6ImJE5H4ROSQi8SLyqIi0F5EtIpIgIp9cdPwIEdlu77tIROrlcp5fRWTkResiROR2sXwgIsdE5LR9rlYFLYsxZgfwJ9DKfv/mIrLcjnuriNyS7dw3i8g2EUkSkcMi8ryIBAC/ALVF5Iz9qC0iHURkg4gkikiciIwraGzKu2lSUN6uI9AYuBP4EPgXcD3QEhgiIj0BRORW4GXgdqAa1hfytFzecyowNPOFiLQA6gE/ATcCPYAmQEX7vCcLGrT9nt2BzSLiB/wILAaqA08CU0Skqb37RKzqs3JYSWSpMeYs0Bc4Yowpaz+OAB8BHxljygPXADMLGpvybpoUlLd7yxiTYoxZDJwFphljjhljDmN98bex93sE+K8xZrsxJh14GwjN5Wph3kXb7gbmGmNSgTSgHNAMq81uuzEmtgDxbhKReKwk8BUwCegElAXGGGPOG2OWAgv5OzGlAS1EpLwxJt4YsymP908DGolIVWPMGWPMmgLEpooBTQrK28VlWz6Xw+vMdop6wEd29UwCcAoQoM7Fb2iMScK6KrjLXnUXMMXethT4BPgUiBORCSJSvgDxtjXGVDLGXGOMecUY4wBqA4fs5UzR2WIbBNwMRIvIChHpnMf7P4B1FbNDRNaLSP8CxKaKAU0KSlkOYVXBVMz2KG2M+SuX/acBQ+0v4NLAsswNxpiPjTHtsKqomgCjrjK2I0BdEcn+9xoEHLbPt94YMxCraukH/q4SuqRroTFmtzFmqL3vO8Bsu/1BKUCTglKZxgMviUhLABGpICKD89j/Z6yrizeBGZn/xdsN2R3tdoCzQAp/d5u9Umvt93pBRPzsrqYDgOkiUlJE7haRCsaYNCAx2/nigCoiUiHzjUTkHhGpZsebYK++2viUF9GkoBRgjJmH9Z/zdBFJBKKwGmpz2z8VmIvVaD0126bywJdAPFYVz0lgLGTdXPfLFcR2HrjFjucE8Blwr91DCWAYcMCO+1HgHvu4HVhXNPvsarHaQB9gq33Px0fAXcaYlILGpLyX3rymlFIqi14pKKWUyqJJQSmlVBZNCkoppbJoUlBKKZXFowfEq1q1qqlfv76rw1BKKY+ycePGE8aYajlt8+ikUL9+fTZs2ODqMJRSyqOISHRu27T6SCmlVBZNCkoppbJoUlBKKZXFo9sUcpKWlkZMTAwpKXrnvrvz9/cnMDAQPz8/V4eilLJ5XVKIiYmhXLly1K9fHxFxdTgqF8YYTp48SUxMDA0aNHB1OEopm9dVH6WkpFClShVNCG5ORKhSpYpe0SnlZrwuKQCaEDyE/p6Ucj9emRSUUsrTzA8/zM+RBZm51Tk0KTjZSy+9xPLly/nhhx8YM2ZMnvu+/vrrjB07Nt/vnZCQwGeffXa1IV6Vb775hpEjR7o0BqU8mcNheH/xTp6aHs709Ydw9XQGmhScbO3atXTs2JEVK1bQvXv3Qn3vwk4KGRk6AZdSRSn5fDpPTN3E/5bu4c6wunx1b5jLq1U1KTjJqFGjaN26NevXr6dz58589dVXPPbYY7z55pv06tWLp59+mi5dutCqVSvWrVuXddy2bdvo1asXDRs25OOPP85aP27cOFq1akWrVq348MMPARg9ejR79+4lNDSUUaNGYYxh1KhRtGrViuDgYGbMmAGAw+Hg8ccfp2XLlvTv35+bb76Z2bNnA9ZQIW+++SbdunVj1qxZfPnll7Rv356QkBAGDRpEcnIyAMOHD+fRRx+le/fuNGnShIULF2bFduTIEfr06UPjxo154YUXAJg4cSLPPPNM1j5ffvklzz77rJN+2kp5ntjT5xg8fjWLth7llX7NGTMomJIlXP+V7HVdUrN748etbDuSWKjv2aJ2ef5vQMvL7vfee+8xePBgJk+ezLhx4+jVqxerVq0CYOnSpZw9e5a//vqLP/74gxEjRhAVFQXAjh07WLZsGUlJSTRt2pTHHnuMLVu2MGnSJNauXYsxho4dO9KzZ0/GjBlDVFQU4eHhAMyZM4fw8HAiIiI4ceIE7du3p0ePHqxatYoDBw4QGRnJsWPHaN68OSNGjMiK1d/fn5UrVwJw8uRJHnroIQBeeeUVJk6cyJNPPgnAgQMHWLFiBXv37qV3797s2bMHgPDwcDZv3kypUqVo2rQpTz75JHfddRetW7fm3Xffxc/Pj0mTJvHFF18U0m9BKc+2+WA8D0/eyLnzGUy8rz29m1V3dUhZnJaWRMRfRNaJSISIbBWRN+z134jIfhEJtx+h9noRkY9FZI+IbBGRts6Krahs3ryZ0NBQduzYQYsWLS7YNnToUAB69OhBYmIiCQnWHOr9+vWjVKlSVK1alerVqxMXF8fKlSu57bbbCAgIoGzZstx+++38+eefl5xv5cqVDB06FF9fX2rUqEHPnj1Zv349K1euZPDgwfj4+FCzZk169+59wXF33nln1nJUVBTdu3cnODiYKVOmsHXr1qxtQ4YMwcfHh8aNG9OwYUN27LCmCL7uuuuoUKEC/v7+tGjRgujoaAICArj22mtZuHAhO3bsIC0tjeDg4ML5wSrlweaHH+bOCWvw9/Nh7uNd3CohgHOvFFKBa40xZ0TED1iZbdLyUcaY2Rft3xdobD86Ap/bz1csP//RO0N4eDjDhw8nJiaGqlWrkpycjDGG0NBQVq9eDVzaHTPzdalSpbLW+fr6kp6enu+Gp9z2u9zxAQEBWcvDhw/nhx9+ICQkhG+++Ybly5dfEmN+YgZ48MEHefvtt2nWrBn3339/vsqglLdyOAwfLNnF/5buoUP9yowf1o7KASVdHdYlnHalYCxn7Jd+9iOvb6eBwHf2cWuAiiJSy1nxOVNoaCjh4eE0adKEbdu2ce2117Jo0SLCw8MpXbo0QFZ9/8qVK6lQoQIVKlTI9f169OjBDz/8QHJyMmfPnmXevHl0796dcuXKkZSUdMF+M2bMICMjg+PHj/PHH3/QoUMHunXrxpw5c3A4HMTFxV3wRX+xpKQkatWqRVpaGlOmTLlg26xZs3A4HOzdu5d9+/bRtGnTPH8OHTt25NChQ0ydOjXrykip4ujiBuXvH+zolgkBnNymICK+wEagEfCpMWatiDwG/EdEXgN+B0YbY1KBOsChbIfH2OtiL3rPh4GHAYKCgpwZ/lU5fvw4lSpVwsfHJ8fqo0qVKtGlSxcSExP5+uuv83yvtm3bMnz4cDp06ABY/4G3adMGgK5du9KqVSv69u3Lu+++y+rVqwkJCUFEePfdd6lZsyaDBg3i999/p1WrVjRp0oSOHTvmmoTeeustOnbsSL169QgODr4g6TRt2pSePXsSFxfH+PHj8ff3v+zPYciQIYSHh1OpUqXL7quUN4o9fY4Hv93A9thEXunXnAe6NXB5D6M8GWOc/gAqAsuAVkAtQIBSwLfAa/Y+PwHdsh3zO9Aur/dt166dudi2bdsuWeduevbsadavX1+k50xKSjLGGHPixAnTsGFDExsbW6Dj77vvPjNr1qwCn7dfv35myZIluW73hN+XUldqU/QpE/bv30zL1341S7fHuTqcLMAGk8v3apH0fzLGJADLgT7GmMxvo1RgEtDB3i0GqJvtsEDgSFHEVxz079+f0NBQunfvzquvvkrNmjWder6EhASaNGlC6dKlue6665x6LqXckbs3KOfGadVHIlINSDPGJIhIaeB64B0RqWWMiRXr+ulWIMo+ZAEwUkSmYzUwnzbGuP6ebyfIq07fXc/5zTffFGj/ihUrsmvXrqs6p1Ke6IIG5QaVGX+PezYo58aZbQq1gG/tdgUfYKYxZqGILLUThgDhwKP2/j8DNwN7gGRAu6sopTxK8vl0npsZwS9RR7kzrC5v3drKLW5IKwinJQVjzBagTQ7rr81lfwM84ax4lFLKmTyuQTkXXn1Hs1JKFQV3vkO5oDQpKKXUVZgffphRs7dQo3wppjzYkSY1yrk6pKviWZVdHiinobNfe+01lixZUuD3OnDgAFOnTi3sEJVSVyD7kNehdSsy/4luHp8QQJOC0+U0dPabb77J9ddfX+D3KuykoENlK3VlLrlD+QH3vUO5oLT6yElGjRrFokWL2L9/P507d2bv3r38/vvv3HHHHezbt4/+/ftzxx13UL9+fe677z5+/PFH0tLSmDVrFs2aNWPFihU89dRTgDXG0B9//MHo0aPZvn07oaGh3Hfffdx2220MGzaMs2fPAvDJJ5/QpUsXHA4HI0eOZMWKFTRo0ACHw8GIESOyzjdixAgWL17MyJEjSUpKYsKECZw/f55GjRoxefJkypQpw/DhwyldujQ7duwgOjqaSZMm8e2337J69Wo6duxY4C6qSnkLb2lQzo13J4VfRsPRyMJ9z5rB0DfvGdQg76Gzhw8ffsG+VatWZdOmTXz22WeMHTuWr776irFjx/Lpp5/StWtXzpw5g7+/P2PGjGHs2LFZcxkkJyfz22+/4e/vz+7duxk6dCgbNmxg7ty5hTJUdnx8PEuXLmXBggUMGDCAVatW8dVXX9G+fXvCw8MJDQ296h+nUp7EmxqUc6PVR06U19DZ2d1+++0AtGvXjgMHDgDWmEbPPvssH3/8MQkJCZQocWn+TktL46GHHiI4OJjBgwezbds2gEIbKnvAgAGICMHBwdSoUYPg4GB8fHxo2bJlVpxKFReeeodyQXn3lUI+/qN3hvwMnZ1d5tDT2YedHj16NP369ePnn3+mU6dOOTZMf/DBB9SoUYOIiAgcDkfWAHWmkIbKzozLx8fnguGxfXx8suJUytt5+h3KBaVXCk6Qn6GzL2fv3r0EBwfz4osvEhYWxo4dOy4ZKvv06dPUqlULHx8fJk+enNVwXFhDZStV3Hlzg3JuvPtKwYUuN3T25Xz44YcsW7YMX19fWrRoQd++ffHx8aFEiRKEhIQwfPhwHn/8cQYNGsSsWbPo3bt31hVAYQ2VrVRx5u0NyrmRy1U1uLOwsDCzYcOGC9Zt376d5s2buygi93HmzBnKli3LyZMn6dChA6tWrXL6yKhXQn9fyh1lb1D+39A2Xtd+ICIbjTFhOW3TKwUv1b9/fxISEjh//nyRDJWtlLfwtjuUC0qTgpdyxfDcSnmy4tagnBuvTArGmGJR9+fpPLnqUnkXbxjyurB4XVLw9/fn5MmTVKlSRRODGzPGcPLkyXzN86yUMxXXBuXceF1SCAwMJCYmhuPHj7s6FHUZ/v7+BAYGujoMVYxdcIfy8Pb0bupdDcpXwuuSgp+fHw0aNHB1GEopN1fcG5Rz43VJQSml8qINynlzWkuKiPiLyDoRiRCRrSLyhr2+gYisFZHdIjJDREra60vZr/fY2+s7KzalVPFUHO9QLihnNq+nAtcaY0KAUKCPiHQC3gE+MMY0BuKBB+z9HwDijTGNgA/s/ZRSqlDEnj7H4PGrWbT1KK/0a86YQcHFtodRXpz2EzGWM/ZLP/thgGuB2fb6b4Fb7eWB9mvs7ddJce4CoJQqNJsPxnPLJ6uIPpnMxOHtebB7w2LdwygvTk2TIuIrIuHAMeA3YC+QYIzJHGIzBqhjL9cBDgHY208DVZwZn1LK+10y5LX2MMqTUxuajTEZQKiIVATmATkNcpN5B1NOafuSu5tE5GHgYYCgoKBCilQp5W20QfnKFEmFmjEmAVgOdAIqikhmMgoEjtjLMUBdAHt7BeBUDu81wRgTZowJq1atmrNDV0p5IG1QvnLO7H1Uzb5CQERKA9cD24FlwB32bvcB8+3lBfZr7O1LjY6DoJQqoOwNyq/2b6ENygXkzOqjWsC3IuKLlXxmGmMWisg2YLqI/BvYDEy0958ITBaRPVhXCHc5MTallBfSO5SvntOSgjFmC9Amh/X7gA45rE8BBjsrHqWU9zLGMHPDIV6dv1XvUL5KekezUsqjxZ89z0tzI/l161G6XFOFT/7RVtsProImBaWUx1q5+wTPzQrn1NnzvHxzMx7s1hAfH73/4GpoUlBKeZyUtAzeW7STiSv306h6Wb4e3p6WtXOeh1wVjCYFpZRH2Xk0iaemb2bH0STu7VyPl/o2p3RJX1eH5TU0KSilPIIxhm/+OsB/f9lBef8STBrent7NtHdRYdOkoJRye8cSU3h+9hb+2HWc65pV5507WlO1bClXh+WVNCkopdza4q1HGT03kuTz6fz71lbc3TFIB7NzIk0KSim3lHw+nbcWbmPaukO0qlOeD+9sQ6PqZV0dltfTpKCUcjsRhxJ4ekY4B06e5dGe1/DsDU10qIoioklBKeU2MhyG8Sv28sFvu6herhRTH+xE52t0BP2ipElBKeUWYuKTeXZGBOsOnKJ/61r859ZgKpTxc3VYxY4mBaWUy/2w+TCv/hCFAcYNCeG2NnW0MdlFNCkopVzm9Lk0Xv0higURRwirV4kP7gylbuUyrg6rWNOkoJRyibX7TvLszAiOJqbw3A1NeKzXNZTw1cZkV9OkoJQqUufTHXy4ZBefr9hLvcplmP1oZ9oEVXJ1WMqmSUEpVWT2Hj/D09PDiTx8mjvD6vLagBYElNKvIXeivw2llNMZY5i67iD/XridUn4+jL+nHX1a1XR1WCoHmhSUUk518kwqL86JZMn2OLo3rsrYwSHUKO/v6rBULjQpKKWcZvnOY4yavYXTyWm82r8F93epr5PguDmnNfWLSF0RWSYi20Vkq4g8Za9/XUQOi0i4/bg52zEvicgeEdkpIjc5KzallHOlpGXw+oKtDJ+0nsplSjJ/ZFce6NZAE4IHcOaVQjrwnDFmk4iUAzaKyG/2tg+MMWOz7ywiLYC7gJZAbWCJiDQxxmQ4MUalVCHbdiSRp2dsZlfcGe7vWp8X+zTD308nwfEUTksKxphYINZeThKR7UCdPA4ZCEw3xqQC+0VkD9ABWO2sGJVShcfhMHy9aj/v/rqTCmX8+HZEB3o2qebqsFQBFcmdIiJSH2gDrLVXjRSRLSLytYhkdlCuAxzKdlgMOSQREXlYRDaIyIbjx487MWqlVH4dPZ3CvV+v498/badX02oserqHJgQP5fSkICJlgTnA08aYROBz4BogFOtK4v3MXXM43FyywpgJxpgwY0xYtWr6oVPK1X6JjKXPR3+wMTqeMbcH88WwdlQOKOnqsNQVcmrvIxHxw0oIU4wxcwGMMXHZtn8JLLRfxgB1sx0eCBxxZnxKqSt3JjWdNxZsZdbGGFoHVuDDO0NpWE0nwfF0TksKYg1xOBHYbowZl219Lbu9AeA2IMpeXgBMFZFxWA3NjYF1zopPKXXlNh2M55kZ4Rw6lczI3o146vrG+Om4RV7BmVcKXYFhQKSIhNvrXgaGikgoVtXQAeARAGPMVhGZCWzD6rn0hPY8Usq9pGc4+HTZXj5eupua5f2Z/nBnOjSo7OqwVCFyZu+jleTcTvBzHsf8B/iPs2JSSl25gyeTeWZmOBuj47mtTR3eGNiS8v46CY630TualVJ5MsYwZ9NhXl+wFRH46K5QBobm1btceTJNCkqpXJ1OTuPleZH8FBlLhwaVGTckhMBKOgmOS2WkQWwE+FeEqo0K/e01KSilcvTX3hM8NzOC40mpvNCnKY/0uAZfHaai6J1Phpj1cHA1RK+CmA2QlgydnoA+bxf66TQpKKUukJqewbjFu5jw5z4aVAlg3uNdCQ6s4Oqwio9z8XBwLRz8C6L/giPh4EgDBGq0gjbDoF5nqNfVKafXpKCUyrL/xFmenLaJqMOJ/KNjEK/0a06Zkvo14VRJR60v/4Orree4rYABHz+o0xY6P2ElgLodoHRFp4ejv22lFADzww/z8txISvj6MGFYO25sqZPgFDpjIH4/RNsJ4OBfcGqftc0vAOq2h14vQb0uUKcdlCz69htNCkoVc8nn03l9wVZmboghrF4lPh7ahtoVS7s6LO/gcMDx7VYCyLwaSLLv3S1dCYI6Q9gICOoCtVqDr+u7+GpSUKoY23E0kZFTN7P3+BlG9m7E09c3poTemXzlMnsGRa+yrgYOroaUBGtbudpWNVBme0DVpuDjfj9rTQpKFUOZcya/+eM2yvn7MXlER7o1rurqsDzP+WQ4vOHvK4GY9VbPIIAqjaD5AKsqqF4XqFgPxP17b2lSUKqYSUxJ46U51r0H3RtXZdyQUKqVK+XqsDzDuQQ4tPbvK4Ejm//uGVQzs2dQF6taqFwNV0d7RTQpKFWMhB9K4MlpmziSkMKLfZrxSI+GOkVmXi7oGbQa4qK4oGdQl5FWe0AR9QwqCpoUlCoGHA7DxJX7eefXHdQo78/MRzrTrl6lyx9YnBgD8Qf+7hUUfXHPoA7Q+2XrKiAwDPy8szFek4JSXu7kmVSenxXBsp3HuallDd4dFEKFMq7v5eJyDgcc32FVBWXeI3BBz6AuVs+gel2gpnv0DCoKmhSU8mKr957k6RmbiT+bxpsDWzKsUz3EAxo7nerkXlj1EWybn0PPILtR2E17BhUFTQpKeaEMh+Hj33fzv6W7qV8lgK+Ht6dl7WI+VMWRcFj5gZUMSpSClrdBg55WF1EP6RlUFDQpKOVljp5O4anpm1m7/xS3t63DWwNbEVCqmP6pGwMHVsLKcbB3KZQqD92egU6PQdnqro7OLRXTT4pS3mnZjmM8NyuClLQM3h8cwqB2ga4OyTUcDtj1i3VlELMeAqrD9a9bbQT+xfyK6TI0KSjlBc6nO3hv0Q6+/HM/zWuV55N/tOGaamVdHVbRy0iDyNmw6kOrEbliPeg3DkLvBj9/V0fnEZyWFESkLvAdUBNwABOMMR+JSGVgBlAfa47mIcaYeLFavz4CbgaSgeHGmE3Oik8pb3HwZDJPTttERMxp7u1cj5dvbo6/n6+rwypa55Nh8/fw1//g9EFriOlBE6HFreCr//sWhDN/WunAc8aYTSJSDtgoIr8Bw4HfjTFjRGQ0MBp4EegLNLYfHYHP7WelVC4WbjnCS3MiEYHx97SlT6targ6paJ2Lh/VfwZrxkHzCuoeg3/vQ+AZtOL5CTksKxphYINZeThKR7UAdYCDQy97tW2A5VlIYCHxnjDHAGhGpKCK17PdRSmWTkpbBGz9uY9q6g7QJqsjHd7WhbuViNE1m0lFY/SlsmATnk6DxTVYDcr3Oro7M4+UrKYjIU8AkIAn4CmgDjDbGLM7n8fXtY9YCNTK/6I0xsSKS2QWgDnAo22Ex9roLkoKIPAw8DBAUFJSf0yvlVXbHJTFy6mZ2xiXxaM9reO7GJvgVl5FNT+6Fvz6G8KngSIdWg6Dr09a4Q6pQ5PdKYYTdHnATUA24HytJXDYpiEhZYA7wtDEmMY8bZ3LaYC5ZYcwEYAJAWFjYJduV8lbGGGZtiOG1BVEElCzBtyM60LNJNVeHVTRit9j3GPxgjTvU5h7o8k+o3MDVkXmd/CaFzC/sm4FJxpgIycdtkSLih5UQphhj5tqr4zKrhUSkFnDMXh8D1M12eCBwJJ/xKeXVklLSeOWHKOaHH6Froyp8MCSU6uW9vDeNMdbQEyvHwZ4lULKclQg6Pe6xI5B6gvwmhY0ishhoALxkNxw78jrAThoTge3GmHHZNi0A7gPG2M/zs60fKSLTsRqYT2t7glIQGXOaJ6dt4uCpZJ6/sQmP9WqErzePbOpwwO5F1pXBobUQUA2uew3CHvCakUjdWX6TwgNAKLDPGJMsIlWwqpDy0hUYBkSKSLi97mWsZDBTRB4ADgKD7W0/Y12J7MHqknq591fKqxljmLTqAP/9ZTtVy5ZixiOdaV+/sqvDcp6MdIiaY91jcGwbVAyCm8daVUVeOiKpO8pvUvjNGHNd5gtjzEkRmQlcl9sBxpiV5NxOQE7H2b2OnshnPEp5tfiz5xk1ewtLtsdxffMavHdHayoFlHR1WM6Rds6+x+BjSDgI1VvA7V9Cy9v1HgMXyPMnLiL+QBmgqohU4u8v+fJAbSfHplSxtP7AKf45bTMnzqTyWv8W3N+1vneObHouwb7H4HPrHoPADtD3Xat7aTEdodQdXC4NPwI8jZUANvJ3UkgEPnViXEoVOxkOw+fL9/DBkt3UrVSauY91JTjQC8fpSYqDNZ/Bhq8hNREa3WDfY9BFbzhzA3kmBWPMR8BHIvKkMeZ/RRSTUsXOscQUnpkZzqo9JxkYWpt/39qKcv5eNqnLqf1WFdHmKda8xi1utZJBrdaujkxlk98Ku6MiUs6+M/kVoC3wbx2bSKmrt2LXcZ6bGc6Z1HTeHdSawWGB3lVddDTK6km0dS74lIDQf1hdS6tc4+rIVA7ymxReNcbMEpFuwE3AWHRsIqWuSlqGg/cX72L8ir00rVGOaQ91onGNcq4Oq/BEr7buMdi9GEqWhc4jofMTUK6mqyNTechvUsiwn/sBnxtj5ovI684JSSnvFxOfzD+nbWbTwQSGdgji/wa08I6RTY2xksCf4+DQGihTBa59Bdo/aM17rNxefpPCYRH5ArgeeEdESgHaPUCpK/BrVCwvzN6CMfDJP9rQv7UXdOTLSIet86xqomNboUJd6PuedY9ByWI0UJ8XyG9SGAL0AcYaYxLs4SlGOS8spbxPSloGb/+8ne9WRxMSWIH/DW1LUBUP/8JMS4Hw72HVx5AQDdWawa3jIfgO8PWyhvJiIl9Jwb6LeS9wkz0o3j3dT9YAABzRSURBVJ/5HSFVKQV7j59h5NTNbI9N5KHuDRh1UzNKlvDgi21jrMHpFv0LEg9DnTDo819o0lfvMfBwBRk6+yEgc1C770VkgnZTVery5myM4dX5UZQq4cPXw8O4tpmHD+Z2fCf8PAr2r4CawXDbeKjfXe8x8BIFGfuoozHmLICIvAOsBjQpKJWLs6npvDo/irmbDtOxQWU+uqsNNSt48MimqWfgj3etyW1KBljjEoWNAB8vaCBXWQoydHZGttcZ5D6ukVLF3paYBJ6eEc6BE2d56rrG/PO6xp47sqkxViPyon9B0hGr8fi616FsMZnLoZjJb1KYBKwVkXn261uxhsVWSmWTluHgs2V7+d/S3VQrV4opD3ai8zVVXB3WlTu+E35+Hvb/ATVbw5BvoW4HV0elnCi/Dc3jRGQ50A3rCuF+Y8xmZwamlKfZe/wMz84IJyLmNLe1qcPrt7SkQmkP7YGTmgQr3rEGqysZAP3eh3b3a1VRMZCfUVIfBRoBkcBnxpj0oghMKU/hcBgmr4nmv79sx9/Pl8/ubsvNwbVcHdaVMcaa02DxK5AUC22GwfWvQ0BVV0emisjlrhS+BdKAP4G+QHOsUVOVUkDs6XOMmrWFlXtO0LtpNd4Z1Npzp8k8tt3qVXTgT6gVAkMmQ932ro5KFbHLJYUWxphgABGZCKxzfkhKuT9jDPPDj/Dq/CgyHIa3bwtmaIe6njmQXWoSLB8Da8dbYxT1GwfthmtVUTF1uaSQlrlgjEn3yA+8UoUs/ux5Xvkhip8iY2lXrxLjhoRQr0qAq8MquIuritrea/UqCvDghnF11S6XFEJEJNFeFqC0/VqwZtAsn9uBIvI10B84ZoxpZa97HesmuOP2bi8bY362t72EdT9EBvBPY8yiKyuSUs6zbOcxXpy9hfjk84y6qSmP9rzGM7uaXlBVFAp3fg+BYa6OSrmBy02yczXXj98AnwDfXbT+A2PM2OwrRKQFcBfQEmuWtyUi0sQYk4FSbuBsajpv/7ydKWsP0rRGOSbd356WtT1wVrSURKtXUWZVUf8PoO19WlWksjhtVmxjzB8iUj+fuw8EphtjUoH9IrIH6IB117RSLrUxOp5nZ4Zz8FQyD/doyLM3NPG8Ya6NgcjZVlXRmTi7quj/tKpIXcJpSSEPI0XkXmAD8JwxJh6oA6zJtk+Mve4SIvIw8DBAUFCQk0NVxdn5dAcf/b6Lz5fvpXbF0kx/qBMdG3rgl2jcNquqKHol1G4Dd02FwHaujkq5qaJOCp8DbwHGfn4fGEHOQ2aYnN7AGDMBmAAQFhaW4z5KXa2dR5N4ZkY422ITuTOsLq/0b+55cyanJP7dq8i/PPT/0LpC0KoilYciTQrGmLjMZRH5Elhov4wB6mbbNRA4UoShKQVAhsPw9cr9vLdoJ+VLl+DLe8O4oYWHjWpqDETOsquKjkG7+6yqojKVXR2Z8gBFmhREpJYxJtZ+eRsQZS8vAKaKyDishubG6D0RqogdOpXMc7MiWLf/FDe2qMF/bw+mStlSrg6rYOK22lVFq6B2Wxg6DepoVZHKP6clBRGZBvQCqopIDPB/QC8RCcWqGjoAPAJgjNkqIjOBbUA68IT2PFJFxRjDrI0xvPnjNgDGDg5hUNs6nnUjWsppu6roC6uqaMBH0OZenfBGFZgY47nV8mFhYWbDhg2uDkN5sBNnUnlpbiS/bYujU8PKjB0cQmAlD5oi0xjYMtOqKjp73LoT+brXtKpI5UlENhpjcrwxxRW9j5RyC4u3HuWluZEkpabzSr/mjOjaAB9PuhHtaJRVVXTwL6uK6B8zoE5bV0elPJwmBVXsJKWk8eaP25i1MYaWtcsz7c5QmtQo5+qw8i/lNCz7L6ybAP4VYMDH1mimWlWkCoEmBVWsrNl3kudmRhB7+hwjezfin9c1pmQJD/kyNQYipsNvr1lVRWH3w7WvalWRKlSaFFSxkJKWwfuLd/LVyv3Uq1yG2Y91oW1QJVeHlX9HI+2qotVQJ0yripTTaFJQXi/q8GmenRnOrrgz3NMpiJdvbk6Zkh7y0T+XAMvtqqLSleCW/0HoPVpVpJzGQ/4ylCq49AwHX/yxjw+X7KJSmZJ8O6IDPZt4yGTzDgdsyawqOgFhI+DaV7SqSDmdJgXllQ6cOMuzM8PZdDCB/q1r8e9bW1GxTElXh5U/RyPhp+fh0BqrqujuWdaYRUoVAU0KyqsYY5iy9iD/+Wk7fr7CR3eFMjA0x7EV3c+5BFj2Nqz/0q4q+gRC79aqIlWkNCkor3EsMYUX5mxh+c7jdG9clffuCKFmBQ+YLzn9PETOhCWvQ/JJCHsArv2XlRiUKmKaFJRXWLjlCK/8EEVKWgZvDmzJsE713HuYCmPgyGari2nUbCsZBHaAe+ZArRBXR6eKMU0KyqOdTk7jtQVRzA8/QkjdinwwJISG1cq6OqzcnY6xhqWImA4ndoJvKWh2M4QMhUY3aFWRcjlNCspj/bn7OKNmbeHEmVSevaEJj/e6hhK+bvilmnoGtv8IEdNg/x+AgaDO1qB1LW6F0hVdHaFSWTQpKI9z7nwGY37Zzrero2lUvSxf3htGcKCbzZfsyLASQMR02L4A0pKhUn3oNRpaD4HKDV0doVI50qSgPEr4oQSenRHOvhNnGdG1AS/0aepe8yUf22FdEWyZCUlHoFQFKwmEDIW6HcGd2zmUQpOC8hBpGQ7+t3QPny7bQ41ypZj6YEe6NKrq6rAsZ09A5GwrGcSGg/hC4xugz9vQpC/4eUAPKKVsmhSU29tzLIlnZkQQefg0t7etw/8NaEmF0i6eLzk9FXb+YlUP7fkNHOlWr6E+Y6DVHVDWQ+6cVuoimhSU23I4DN/8dYB3ft1BmZK+fH53W/oG13JdQMZAzHrriiBqjjWEdbla0PkJaH0X1GjhutiUKiSaFJRbOpJwjlGzI1i15yTXNavOfwcFU72ci6ph4g/Y3Uinwal94FcGmg+AkLugQU/wcaM2DaWukjPnaP4a6A8cM8a0stdVBmYA9bHmaB5ijIkX6y6jj4CbgWRguDFmk7NiU+7LGMO8zYf5vwVbcTgMY24P5s72dYv+RrSU07BtvlU9FL3KWle/O3R/HlrcAqU8aFIepQrAmVcK3wCfAN9lWzca+N0YM0ZERtuvXwT6Ao3tR0fgc/tZFSPHk1J5eZ41X3JYvUqMGxJKUJUinC85Ix32LbOuCHb8BOkpUKWxNZFN6yFQMajoYlHKRZyWFIwxf4hI/YtWDwR62cvfAsuxksJA4DtjjAHWiEhFEalljIl1VnzKvSzccoRXf4ji7PkM/nVzc0Z0a4BvUc2XfDTKSgSRs+BMnDXmUJthVjfSOm21G6kqVoq6TaFG5he9MSZWRKrb6+sAh7LtF2OvuyQpiMjDwMMAQUH6n5unO3X2PK/Oj+KnLbGEBFbg/SEhNKpeBFUzSXFWEoiYDnGR4OMHTW6yEkHjG6GEhwyzrVQhc5eG5pz+FTM57WiMmQBMAAgLC8txH+UZFm89ysvzIjl9Lo1RNzXlkR4NnTtMRdo5q1ooYjrs/R2Mw5qv4Oax0GqQTmCjFEWfFOIyq4VEpBZwzF4fA9TNtl8gcKSIY1NF5HRyGm/8uJW5mw/TsnZ5vn+wI81qlnfOyRwOa17jiGlWw3FqIlSoC92etXoPVW3snPMq5aGKOiksAO4DxtjP87OtHyki07EamE9re4J3WrbzGKPnbOHkmfM8dV1jRl7bCD9nXB2c3GtdEWyZDgkHoWRZa/C5kLugXlcdjVSpXDizS+o0rEblqiISA/wfVjKYKSIPAAeBwfbuP2N1R92D1SX1fmfFpVwjKSWNfy/czowNh2hSoywT72tPqzqFPIhd8inYOs9KBjHrQHygYW+49jVo1g9KFmFPJqU8lDN7Hw3NZdN1OexrgCecFYtyrVV7TvDC7C3Enj7HY72u4enrG1OqRCHd8JWRBnuWQPhU2PUrZJyHas3hhjcheAiUd+Ed0Ep5IHdpaFZe6GxqOmN+2cHkNdE0rBbA7Me60DaokKaYjNsG4VNgyww4exwCqkH7B63qoZqttRupUldIk4JyinX7T/H8rAgOxSfzQLcGjLqpEIa4PhdvjUYaPsWaytLHD5r2sSa3b3Q9+Lp4kDylvIAmBVWoUtIyeG/RTr5etZ+6lcow4+HOdGhwFV09HRmwd5mVCHb8BBmpUCPYGo00eAgEVCm84JVSmhRU4dl8MJ7nZkWw7/hZhnWqx+i+zQgodYUfsRN7rEQQMd2arKZ0ZQi737oqqNW6cANXSmXRpKCuWmp6Bh8u2c0XK/ZSq0JppjzYka5XMgFOapLVe2jzFDi0xuo91OgG6DsGmvSBEqUKP3il1AU0KairEhlzmudmhbMr7gx3ta/Lv/o1p5x/Aer2HQ6IXmklgsy5jKs2gevfsBqNy9V0XvBKqUtoUlBX5Hy6g0+WWdNjVi1bkkn3t6d30+qXPzBTfLR1l3H4VEiIhlLlofWd0OYeqNNOew8p5SKaFFSBbY9N5LmZEWyLTeT2Nvb0mGXycXVwPhm2/wjh38P+PwCBhj2toamb9we/0k6PXSmVN00KKt/SMxyMX7GXj37fTYXSfkwY1o4bW16mescYOLTOSgRR8+B8ElSqD73/ZY1IWrFu3scrpYqUJgWVL3uOJfHczAgiYk7Tr3Ut3hrYisoBeQwvnRj7d/XQyd3gFwAtb7V6D9XrotVDSrkpTQoqTxkOw8SV+xi7eBcBJX355B9t6N+6ds47p6da9xKET/17aOqgLtDtaWswulJlizZ4pVSBaVJQudp/4iyjZkWwITqeG1rU4O3bgqlW7qJuocZAbLjVeyhyFqQkQPlA6P6cVT1U5RrXBK+UuiKaFNQlHA7Dd6sPMObXHZT09eGDO0O4NbQOkr3K58xxiJxpJYNjW8G3FDQfAG3uhgY9waeQBrxTShUpTQrqAodOJfPC7C2s3neSXk2rMeb21tSs4G9tzEiD3YutRLB7ETjSre6j/cZZM5eVruja4JVSV02TggLAGMO0dYf4z0/bEBHeGRTMkLC61tXBJSOSVodOj0HoPVC9matDV0oVIk0KitjT53hxTiR/7DpO10ZVeGdQawJLpcD6r2Dz91abQdaIpPfYI5LqR0cpb6R/2cWYMYbZG2N4c+E20jMM/76lGf+oug+fJY/bI5Ke1xFJlSpmNCkUU8cSU3h5XiRLth9jYGAyb9aLoMLqZ7KNSDpCRyRVqhjSpFDMGGNYEHGEd+evp1f6KtbWWEuNExFwUkckVUq5KCmIyAEgCcgA0o0xYSJSGZgB1AcOAEOMMfGuiM9bnUxK4btp3xN0aB5LfddTyjcV/HREUqXU31x5pdDbGHMi2+vRwO/GmDEiMtp+/aJrQvM+K1ctp/RvL/EM20gtGYBfyFBoO0xHJFVKXcCdqo8GAr3s5W+B5WhSuGoJ8ScI/+5Fup2aS7JPAEe7/ZeaPe7XEUmVUjlyVVIwwGIRMcAXxpgJQA1jTCyAMSZWRHIcnF9EHgYeBggKCiqqeD2PMWxd9CU11/yHHuY0W2vdRrO736N8uSuYEU0pVWy4Kil0NcYcsb/4fxORHfk90E4gEwDCwsKMswL0ZGcOhnNs+pO0TN7CDt8mxA+YTHBoD1eHpZTyAC5JCsaYI/bzMRGZB3QA4kSkln2VUAs45orYPFrKaWLmvUbNnd9R0ZTh12tepvfQZynlV4DpMZVSxZpPUZ9QRAJEpFzmMnAjEAUsAO6zd7sPmF/UsXksY0jZ8D1JY0OpveNbfvK7icPDVtHn3hc1ISilCsQVVwo1gHn2iJslgKnGmF9FZD0wU0QeAA4Cg10Qm+c5Gkni3Kcpf2wD2x2N2NTqfe6+bSD+fjpKqVKq4Io8KRhj9gEhOaw/CVxX1PF4qvSz8Rye9yp190whzQTwTqknuHboszzQQBuSlVJXzp26pKp8OJ54joiF42m76wPqmkTm+d7EqY6jeLJ3KGVK6q9TKXV19FvEAxhj2BAdz9JlS7h+/3tc77OL3SWbs6PHJAZ27k0J3yJvGlJKeSlNCm7sbGo6P4QfZu6qrQw4NYnnS/xGSskKHOsxjsbd7qexjyYDpVTh0qTghvYcO8P3a6KZu/EgN6YvY2LJ6ZQvkURGuxEEXP8qAaUruTpEpZSX0qTgJtIzHCzZHsd3q6P5a+9JQnyjmV/+exqc24qp0wHpNxafWpe0zyulVKHSpOBixxJTmL7+EFPXHuRoYgrNKmSwoOGPBMfORqQyDPwMCRkKWlWklCoCmhRcwBjDuv2n+G5NNIuijpLuMPRoVJmJITtpsfV9JPYUtH8Ier8MpSu6OlylVDGiSaEInUlNZ97mw3y/OpqdcUmU9y/B8C71ub/haer89SqsXw91O8HN7+mMZ0opl9CkUAR2xyUxeU00czcd5kxqOq3qlOfdQa0Z0KQMpVe+DTMnQkBVuHW8NdmNzm+glHIRTQpOkpbhYPHWOCavOcCafaco6etD/9a1GNa5HqGB5ZHwKfDF63AuHjo+Ar1e0qoipZTLaVIoZHGJKUxde5Bp6w5yLCmVOhVL82KfZgwJC6RK2VJwZDNMfB4Ob4CgzlZVUc1gV4etlFKAJoVCYYxhzb5TTF5zgEVb43AYQ88m1fhvp3r0alodXx+B5FOwcDRsmAQB1eC2L6D1nVpVpJRyK5oUrkJSShrzNh9m8upodh87Q8UyfjzQrQF3dwyiXpUAayeHAzZ+B0teh5TT0PFR6P0S+FdwaexKKZUTTQpXYOfRJL5bfYB5mw+TfD6D1oEVeO+O1gwIqX3hkNWHN8HPz8PhjRDUxa4qauWyuJVS6nI0KeTT+XQHi7YeZfLqaNYdOEXJEj4MaF2bezvXI6TuRQ3Eyafg9zdh4zdQtjrc/iUED9aqIqWU29OkcBmxp88xbe1Bpq0/xPGkVOpWLs1LfZsxJKwulQJKXrizwwGbv4Mlb1hVRZ0eh16jwb+8a4JXSqkC0qSQA2MMf+09yeTV0fy23Wo47t20OsM61aNnk2r4+OTwH//hjfDT83BkE9TralUV1WhZ9MErpdRV0KSQTWJKGnM2xjB5TTT7jp+lUhk/HuzegLs71COoSpmcD0o+Bb+/ARu/tauKvoLgO7SqSCnlkdwuKYhIH+AjwBf4yhgzxtnn3HYkkclrovlh82HOpWUQWrci7w8OoV/rWrnPdezIgE3fWm0HKYnQ+Qno+aJWFSmlPJpbJQUR8QU+BW4AYoD1IrLAGLOtsM+Vmp7Br1FWw/GG6HhKlfDhlpDa3Nu5PsGBl+kuGrMRfn7OuhGtXje7qqhFYYeolFJFzq2SAtAB2GOM2QcgItOBgUChJoWlO+J4YfYWTpw5T70qZXilX3PuaBdIxTIlL3/won/B6k+hbA0YNBFaDdKqIqWU13C3pFAHOJTtdQzQMfsOIvIw8DBAUFDQFZ0kqHIAIYEVGda5Hj0a59JwnJtK9bWqSCnltdwtKeT07WwueGHMBGACQFhYmMlh/8tqVL0sE4e3v5JDocNDV3acUkp5AHebzisGqJvtdSBwxEWxKKVUseNuSWE90FhEGohISeAuYIGLY1JKqWLDraqPjDHpIjISWITVJfVrY8xWF4ellFLFhlslBQBjzM/Az66OQymliiN3qz5SSinlQpoUlFJKZdGkoJRSKosmBaWUUlnEmCu6/8stiMhxINrVcRSCqsAJVwdRSLQs7knL4p5cVZZ6xphqOW3w6KTgLURkgzEmzNVxFAYti3vSsrgndyyLVh8ppZTKoklBKaVUFk0K7mGCqwMoRFoW96RlcU9uVxZtU1BKKZVFrxSUUkpl0aSglFIqiyaFAhCRr0XkmIhEZVv3uogcFpFw+3FzLsceEJFIe58N2da/JyI7RGSLiMwTkYo5HOsvIutEJEJEtorIG9m2NRCRtSKyW0Rm2EOOu21Zsu3rKyKbRWShJ5clj+Mri8hvdll+E5FK+SmLE8vzll2WcBFZLCK1czn+Pjvm3SJyX7b17ez33SMiH4vkbw5aF5flVxFJyP4Zs9e70+fssmURkVARWS3W3/4WEbnzasuSJ2OMPvL5AHoAbYGobOteB57Px7EHgKo5rL8RKGEvvwO8k8M+ApS1l/2AtUAn+/VM4C57eTzwmDuXJdu+zwJTgYXZ1nlcWfI4/l1gtL08Oq+fRRGVp3y25X8C43PYpzKwz36uZC9XsretAzrbn8VfgL7uXBZ723XAgOyfMTf8nOXn99IEaGwv1wZigYpXU5a8HnqlUADGmD+AU4X8nouNMen2yzVYs81dvI8xxpyxX/rZD2P/t3YtMNve9i1waz7P65KyAIhIINAP+CrbOo8sSx4GYpUBClAW+9zOKE9itpcBXDTNre0m4DdjzCljTDzwG9BHRGphfXmtNta3z3e49neTn7JgjPkdSMq+zg0/Z5ctizFmlzFmt718BDgGVLuasuRFk0LhGGlf1n2dWU0gIrVFJPu8EAZYLCIbReThXN5nBNZ/YZccL1Z1SzjWB+I3Y8xaoAqQkO3LKwao4+5lAT4EXgAc2dZ5allyO76GMSYWwH6ufpVlueryiMh/ROQQcDfwmr0uTEQyk3Md4FC2QzJ/B3Xs5YvXu3NZcuN2n7OClEVEOgAlgb1OKotWHxX0AdTnwsvHGlizxPkA/8GaLS6n42rbz9WBCKDHRdv/BczD7iacx/krAsuAVkA1YE+2bXWBSHcuC9Af+Mxe7oV9ae+JZcnreKw/1uz7xbvD58ze9hLwRg7rRwGvZHv9KvAc0B5Ykm19d+BHdy5Ltu1ZnzF3/pzlsyy1gJ38XXV8VWXJ7aFXClfJGBNnjMkwxjiAL4EOuex3xH4+hvUlk7WfWA16/YG7jf3bzeN8CcByoA/WQFoVRSRzBr1A4Iibl6UrcIuIHACmA9eKyPceWpa8jo+zq12wn49daVkKqzzZTAUG5bA+BuuLJVPm7yCGC6vPXP67ySa3suTG7T5n2eRaFhEpD/yElbTXOKMsmTQpXKXMP3zbbUBUDvsEiEi5zGWsRswo+3Uf4EXgFmNMci7nqCZ27xcRKQ1cD+ywv6iWAXfYu94HzHfnshhjXjLGBBpj6gN3AUuNMfd4YlnyOh5YYJfhqstSSOVpnG3XW4AdOZxmEXCjiFSyq0FuBBYZq/orSUQ62fXY915NeYqoLDlyw8/ZZcsiVo+iecB3xphZzipLlqu91ChOD2AaVst/GtZ/Tw8Ak4FIYAvWF0Ete9/awM/2ckOsS8YIYCvwr2zvuQerHjfcfozP4fjWwGb7HFHAa9mOb4jVM2QPMAso5c5luSiGXlx4ae9RZbnM8VWA34Hd9nNlF3/O5tifnS3Aj0Ade30Y8FW2/UbYZd8D3J9tfZh9/F7gEy5TzekmZfkTOA6cs899kxt+zi5bFuAe+5zh2R6hV1OWvB46zIVSSqksWn2klFIqiyYFpZRSWTQpKKWUyqJJQSmlVBZNCkoppbKUuPwuSikRyexiClATyMDq7giQbIzp4pLAlCpk2iVVqQISkdeBM8aYsa6ORanCptVHSl0lETljP/cSkRUiMlNEdonIGBG5W6y5MCJF5Bp7v2oiMkdE1tuPrq4tgVJ/06SgVOEKAZ4CgoFhQBNjTAesYcKftPf5CPjAGNMea6yby43sqVSR0TYFpQrXemMPmS0ie4HF9vpIoLe9fD3QQv6evKy8iJQzxlww9r9SrqBJQanClZpt2ZHttYO//958gM7GmHNFGZhS+aHVR0oVvcXAyMwXIhLqwliUuoAmBaWK3j+BMHu2rm3Ao64OSKlM2iVVKaVUFr1SUEoplUWTglJKqSyaFJRSSmXRpKCUUiqLJgWllFJZNCkopZTKoklBKaVUlv8H+stnB+Zx2cwAAAAASUVORK5CYII=\n", 113 | "text/plain": [ 114 | "
" 115 | ] 116 | }, 117 | "metadata": { 118 | "needs_background": "light" 119 | }, 120 | "output_type": "display_data" 121 | } 122 | ], 123 | "source": [ 124 | "from pandas.plotting import register_matplotlib_converters\n", 125 | "plt.plot(photo_df['time'], photo_df['posts'], label='#photography')\n", 126 | "plt.plot(insta_df['time'], insta_df['posts'], label='#instagram')\n", 127 | "plt.xlabel('Time')\n", 128 | "plt.ylabel('Posts')\n", 129 | "plt.title('Time vs. Posts')\n", 130 | "plt.legend(loc=\"upper left\")\n", 131 | "plt.show()" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "And that's it! This is just a super small sampling of data and a single usecase of instascrape.Hashtag. If we wanted, we could focus on one hashtag and run a program for 24 hours straight to find the best time of day to post to that hashtag. We could compare 100 different hashtags and see which one's are growing the fastest. There are a ton of possibilities and instascrape.Hashtag is just a simple tool for abstracting out the actual scraping of the data so that you can focus on your algorithms and not on sifting through the HTML and JSON data!" 139 | ] 140 | } 141 | ], 142 | "metadata": { 143 | "kernelspec": { 144 | "display_name": "Python 3.7.4 64-bit ('Chris': virtualenv)", 145 | "language": "python", 146 | "name": "python37464bitchrisvirtualenv7257ff887f2f42e49b4f10d8b8979f3e" 147 | }, 148 | "language_info": { 149 | "codemirror_mode": { 150 | "name": "ipython", 151 | "version": 3 152 | }, 153 | "file_extension": ".py", 154 | "mimetype": "text/x-python", 155 | "name": "python", 156 | "nbconvert_exporter": "python", 157 | "pygments_lexer": "ipython3", 158 | "version": "3.7.4" 159 | } 160 | }, 161 | "nbformat": 4, 162 | "nbformat_minor": 2 163 | } 164 | -------------------------------------------------------------------------------- /tutorial/tutorial/Part 1 - Intro to the API.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Part 1 - Intro to the API\n", 8 | "\n", 9 | "In this section, we're going to get a quick feel for the API so you can get back to doing what you do best: programming. " 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Importing the library\n", 17 | "\n", 18 | "Most of what you'll need from the API is exposed by simply calling:" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import instascrape " 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## The three pillars of instascrape" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "instascrape's API is designed in such a way such that the scrapers also double as semantic representations of what it is they're scraping (this will make more sense in a second). The three workhorse scrapers you'll mostly be dealing with are the `Post`, `Profile`, and `Hashtag` objects. Let's take a look at how these work:" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "from instascrape import Post, Profile, Hashtag\n", 51 | "\n", 52 | "google_post = Post('CGQG3-hlcNQ')\n", 53 | "google = Profile('google')\n", 54 | "google_hashtag = Hashtag('google')" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "Awesome! We can see almost immediately what our intentions are with this script: \n", 62 | "- scrape one post\n", 63 | "- one profile\n", 64 | "- and one hashtag. \n", 65 | "\n", 66 | "Before we actually scrape our data though, let's build some contextual background knowledge on the API. \n", 67 | "\n", 68 | "Under the hood, `Post`, `Profile`, and `Hashtag` are sibling subclasses. Thus, once you're familiar with the methods and expected behaviors of one object, this will mostly translate to the others as well. There are only a handful of methods that are obvious and specific to their respective subclass as we will explore in later sections. \n" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## Scraping the data \n", 76 | "\n", 77 | "Let's now scrape some data!" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 3, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "google_post.scrape()\n", 87 | "google.scrape()\n", 88 | "google_hashtag.scrape()" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "And that's it! No, seriously. That's it. To access the scraped data now, we have a couple options. As we discussed earlier, the scrapers also serve as semantic representations of what it is they scraped, let's take a look at what this means in practice:" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 4, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "Google has 12,276,851 followers.\n", 108 | "Google is verified\n", 109 | "Google is a business account\n", 110 | "Google is following 31 accounts\n", 111 | "Google has 1,439 posts\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "# instascrape supports dot notation\n", 117 | "print(f\"Google has {google.followers:,} followers.\")\n", 118 | "if google.is_verified: \n", 119 | " print(\"Google is verified\")\n", 120 | "if google.is_business_account:\n", 121 | " print(\"Google is a business account\")\n", 122 | "\n", 123 | "# instascrape also supports bracket notation\n", 124 | "print(f\"Google is following {google['following']} accounts\")\n", 125 | "print(f\"Google has {google['posts']:,} posts\")" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "Concise and expressive syntax is what it's all about. Each subclass has their own unique attributes relevant to what it is they scraped. A `Profile` is obviously not going to have an amount of `likes` and a `Post` is not going to have `followers`. \n", 133 | "\n", 134 | "OKAY, now you're about to see a wall of code output. **_DO NOT PANIC_**. EVERYTHING IS FINE. OKAY? Let's take a peak at what the `Post` object offers us after scraping. " 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 5, 140 | "metadata": {}, 141 | "outputs": [ 142 | { 143 | "name": "stdout", 144 | "output_type": "stream", 145 | "text": [ 146 | "{'csrf_token': 'aY0TLjFUrEAd4RvROeiaWYisEvouQU5E', 'viewer': None, 'viewer_id': None, 'country_code': 'US', 'language_code': 'en', 'locale': 'en_US', 'device_id': '8944C850-29C8-4AF7-B5E4-FF5423F02E1B', 'browser_push_pub_key': 'BIBn3E_rWTci8Xn6P9Xj3btShT85Wdtne0LtwNUyRQ5XjFNkuTq9j4MPAVLvAFhXrUU1A9UxyxBA7YIOjqDIDHI', 'key_id': '238', 'public_key': '68cfdea6eca263a4ace2853a8630039dc43a3a9febaa9e67822793196b475744', 'version': '10', 'is_dev': False, 'rollout_hash': '9fcc62e59353', 'bundle_variant': 'metro', 'frontend_dev': 'prod', 'id': '2418463234883699536', 'shortcode': 'CGQG3-hlcNQ', 'height': 1333, 'width': 750, 'gating_info': None, 'fact_check_overall_rating': None, 'fact_check_information': None, 'sensitivity_friction_info': None, 'media_overlay_info': None, 'media_preview': None, 'display_url': 'https://scontent-iad3-1.cdninstagram.com/v/t51.2885-15/e35/121225391_196671788493786_8291234317057247918_n.jpg?_nc_ht=scontent-iad3-1.cdninstagram.com&_nc_cat=106&_nc_ohc=lOyn-AmAha4AX8pZzcy&tp=1&oh=566c3450f4c31e47999ac0e26189baee&oe=5FCA03F1', 'accessibility_caption': None, 'is_video': True, 'tracking_token': 'eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FuYWx5dGljc190cmFja2VkIjp0cnVlLCJ1dWlkIjoiYjU2ODA2MTQyN2U0NGQ2NDk1NTAzMzQ2ZjBjY2UxNzUyNDE4NDYzMjM0ODgzNjk5NTM2In0sInNpZ25hdHVyZSI6IiJ9', 'tagged_users': [], 'caption': 'Sometimes the best moments in life are the ones where we receive little acts of kindness. Hit ▶️ on kindness this #NationalBullyingPreventionMonth with Interland, the online #BeInternetAwesome video game that shows #ItsCoolToBeKind. g.co/KindKingdom. #MySuperG', 'caption_is_edited': False, 'has_ranked_comments': False, 'comments': 215, 'comments_disabled': False, 'commenting_disabled_for_viewer': False, 'timestamp': 1602523317, 'likes': 17534, 'location': nan, 'viewer_has_liked': False, 'viewer_has_saved': False, 'viewer_has_saved_to_collection': False, 'viewer_in_photo_of_you': False, 'viewer_can_reshare': True, 'video_url': 'https://scontent-iad3-1.cdninstagram.com/v/t50.2886-16/121217884_199359144934914_151140672758339747_n.mp4?_nc_ht=scontent-iad3-1.cdninstagram.com&_nc_cat=106&_nc_ohc=ahcCSz9jx9AAX_rbC46&oe=5FC9F317&oh=2458a0cc02181c839f411c49bc97eb2c', 'has_audio': True, 'video_view_count': 88030, 'upload_date': datetime.datetime(2020, 10, 12, 13, 21, 57), 'hashtags': ['NationalBullyingPreventionMonth', 'BeInternetAwesome', 'ItsCoolToBeKind', 'MySuperG']}\n" 147 | ] 148 | } 149 | ], 150 | "source": [ 151 | "print(google_post.to_dict())" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "That there's 49 data points. Not bad for essentially three lines of code! I'll leave examining `Profile` and `Hashtag`'s available attributes as an exercise for the reader (the syntax is exactly the same). " 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "## Alright, that's kinda neat but what else can instascrape handle?\n", 166 | "\n", 167 | "While being able to pass just a shortcode/username/hashtag is pretty to look at, sometimes we'll want to scrape from a different source. instascrape puts the power in your hands with how you want to use it. \n", 168 | "\n", 169 | "Earlier, when we instantiated `google = Profile('google')` and called `google.scrape()`, we kicked off an assembly line that \n", 170 | "- creates the full URL for Google's Instagram profile \n", 171 | "- requests HTML from that URL \n", 172 | "- creates BeautifulSoup out of the HTML\n", 173 | "- parses the JSON data from the BeautifulSoup\n", 174 | "- converts the serialized JSON into a Python `dict`\n", 175 | "\n", 176 | "and finally parses data from that `dict` into the user friendly instance attributes that you will use. Internally, you can bypass any of these steps by passing an expected format in as an argument. \n", 177 | "\n", 178 | "This means that instascrape can scrape\n", 179 | "- shortcode/username/hashtag\n", 180 | "- full URL \n", 181 | "- HTML \n", 182 | "- BeautifulSoup\n", 183 | "- serialized JSON \n", 184 | "- JSON dictionary \n", 185 | "\n", 186 | "assuming it comes from a valid Instagram page. \n", 187 | "\n", 188 | "In practice, this allows you to do something like:" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 6, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | "Google has 6 highlight reels.\n" 201 | ] 202 | } 203 | ], 204 | "source": [ 205 | "import requests\n", 206 | "from instascrape import Profile\n", 207 | "\n", 208 | "resp = requests.get(\"https://www.instagram.com/google/\")\n", 209 | "source_html = resp.text\n", 210 | "google = instascrape.Profile(source_html)\n", 211 | "google.scrape()\n", 212 | "print(f\"Google has {google.highlight_reel_count} highlight reels.\")" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "_Flexibility is key_. Some people want to autonomously scrape their business' Instagram once a day and log follower growth over time. Other people are asynchrounously scraping hundreds of posts a second from rotating proxy servers and don't want the restriction of sequential HTTP requests. instascrape hopefully has something for everyone <3" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "# Wrapping up \n", 227 | "\n", 228 | "If you understood most of this, then you're ready to get out there and start scraping some data! There is of course more to explore that we will do in later tutorials but for now, having a solid grasp of the API will certainly give you a legup in using instascrape effectively. " 229 | ] 230 | } 231 | ], 232 | "metadata": { 233 | "kernelspec": { 234 | "display_name": "Python 3", 235 | "language": "python", 236 | "name": "python3" 237 | }, 238 | "language_info": { 239 | "codemirror_mode": { 240 | "name": "ipython", 241 | "version": 3 242 | }, 243 | "file_extension": ".py", 244 | "mimetype": "text/x-python", 245 | "name": "python", 246 | "nbconvert_exporter": "python", 247 | "pygments_lexer": "ipython3", 248 | "version": "3.7.4" 249 | } 250 | }, 251 | "nbformat": 4, 252 | "nbformat_minor": 2 253 | } 254 | --------------------------------------------------------------------------------