├── insta_browser
    ├── db
    │   ├── __init__.py
    │   ├── sql
    │   │   ├── migrations
    │   │   │   └── 1517145035.sql
    │   │   └── init.sql
    │   └── browser_db.py
    ├── processors
    │   ├── __init__.py
    │   ├── not_feed_processor.py
    │   ├── feed_processor.py
    │   └── base_processor.py
    ├── version.py
    ├── __init__.py
    ├── logger.py
    ├── auth.py
    └── browser.py
├── setup.cfg
├── MANIFEST.in
├── requirements.txt
├── .travis.yml
├── setup.py
├── README.rst
├── README.md
└── .gitignore


/insta_browser/db/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/insta_browser/processors/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1


--------------------------------------------------------------------------------
/insta_browser/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.8.6.1'
2 | 


--------------------------------------------------------------------------------
/insta_browser/__init__.py:
--------------------------------------------------------------------------------
1 | from insta_browser import browser
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.sql
2 | recursive-include insta_browser/ *.py *.sql


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | setuptools
2 | twine
3 | selenium
4 | wheel
5 | tqdm
6 | sphinx
7 | requests
8 | 


--------------------------------------------------------------------------------
/insta_browser/db/sql/migrations/1517145035.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE tmp_counters
2 | (
3 |     login VARCHAR(128) PRIMARY KEY,
4 |     updated_at DATE NOT NULL,
5 |     counters TEXT NOT NULL
6 | );


--------------------------------------------------------------------------------
/insta_browser/db/sql/init.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE accounts
 2 | (
 3 |   id       INTEGER PRIMARY KEY,
 4 |   username TEXT
 5 | );
 6 | CREATE TABLE db_version
 7 | (
 8 |   version INTEGER PRIMARY KEY
 9 | );
10 | CREATE TABLE activity
11 | (
12 |   id         INTEGER PRIMARY KEY,
13 |   account_id INTEGER,
14 |   likes      INTEGER DEFAULT 0,
15 |   comments   INTEGER DEFAULT 0,
16 |   follows    INTEGER DEFAULT 0,
17 |   unfollows  INTEGER DEFAULT 0,
18 |   date       TEXT,
19 |   CONSTRAINT likes_accounts_id_fk FOREIGN KEY (account_id) REFERENCES accounts (id)
20 | );
21 | INSERT INTO db_version (version) VALUES (strftime('%s', 'now'));


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 | - 3.6
 4 | #addons:
 5 | #  apt:
 6 | #    packages:
 7 | #    - pandoc
 8 | deploy:
 9 | - provider: pypi
10 |   distributions: sdist bdist_wheel
11 |   user: al_kricha
12 |   password:
13 |     secure: Owo01QGwzEpVCbZM6ucoj97KpJ5vbtqC3kuSz9q/4Xvb+VxoeKci4oDuEQsba9AKJ7G2h6iCws8Yl0rPXYFkz7yBpenSlyEbpiJDyKtsWQDqNdWFaDJvp/vF6OgQE+gw8YXlwpTnmha8d4Y586k+d/Z6CWr8vZ2oZJen12iZbf8ESgY87UEl5Y883qJU4qNI3N0oe68pKNPyfcS4GoTQ6W9/fuEACDr1DHfJX3QKMnnwx4ZtWp/CKfyLyRoP1XJX8WH4W/ZgZyfnSKMmBC6W1Ve0K/1DnAvtsFMtWVqHhJUb+isKoZ+RhSfzdDIVPMf89G75t0kSWiURwGoED0ijAXkSq1GrzFEF/SByql7qcmZdOG/PvwAO5ZXsoSjEkPvSFrYhY9/iKTpG7iHwNBqVlxi9hColLf0mTkGlNZLgklS86JqfoH0/lb3glU+h7ALwyH9REbZRStTG6qx4Z30hlDBzjxX6GUzGEKFZu9JcVXjRNWPuNJWEo/y14840wbsngW8LHkVUO6T1+VwcIEqpiouGUj7UKEVSQH0SYTJxE7Gj+5PiRZgz4tu9GcmdRYeHFwNKPYUl1cwIKBrqMhpZYc/vIlAPMbTEvgtpUgdGNgObVSooH9rwvs0x0shRnA7O0edbrtosqP8sNvbAWur6w47FPXAT02QD1rxVJeCozVk=
14 |   on:
15 |     branch: master
16 |     tags: true
17 | script: nosetests
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from insta_browser import version as v
 3 | 
 4 | 
 5 | description = open('README.rst').read()
 6 | 
 7 | version = v.__version__
 8 | 
 9 | setup(
10 |     name='insta_browser',
11 |     packages=['insta_browser'],
12 |     include_package_data=True,
13 |     version=version,
14 |     description='parsing/automation instagram.com',
15 |     long_description=description,
16 |     author='Aleksej Krichevsky',
17 |     author_email='krich.al.vl@gmail.com',
18 |     url='https://github.com/kricha/insta_browser',
19 |     download_url='https://github.com/kricha/insta_browser/archive/{}.tar.gz'.format(version),
20 |     keywords=['parsing', 'bot', 'instabot', 'automation', 'likes'],
21 |     license='MIT',
22 |     classifiers=[  # look here https://pypi.python.org/pypi?%3Aaction=list_classifiers
23 |         'Development Status :: 3 - Alpha',
24 |         'License :: OSI Approved :: MIT License',
25 |         'Natural Language :: English',
26 |         'Programming Language :: Python',
27 |         'Programming Language :: Python :: 3.6',
28 |         'Topic :: Internet :: WWW/HTTP :: Browsers',
29 |     ],
30 |     install_requires=[
31 |         'selenium',
32 |         'tqdm',
33 |         'requests'
34 |     ],
35 | )
36 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Instabrowser
 2 | ============
 3 | 
 4 | `Build Status <https://travis-ci.org/kricha/insta_browser>`__
 5 | `PyPI <https://pypi.org/pypi/insta_browser>`__
 6 | 
 7 | | 💻 Library for instagram.com automation.
 8 | | ♥️ Like instagram feed, username profile, location, tag.
 9 | | 🤝 Auto-follow unknown users, during liking, from locations or tags.
10 | | 📊 Get statistic of any public account.
11 | 
12 | Requirements
13 | ~~~~~~~~~~~~
14 | 
15 | -  Python 3
16 | -  `ChromeDriver <https://sites.google.com/a/chromium.org/chromedriver/downloads>`__
17 |    for headless web-surfing
18 | 
19 | Examples
20 | ~~~~~~~~
21 | 
22 | -  Example of using package for liking specific user:
23 | 
24 |    .. code:: python
25 | 
26 |       import os
27 |       from insta_browser import browser
28 | 
29 |       br = browser.Browser(
30 |           debug=True,cookie_path=os.path.join('var', 'cookies'),
31 |           log_path=os.path.join('var', 'logs'),
32 |           db_path=os.path.join('var', 'db'),
33 |           exclude=os.path.join('var', 'exclude.txt'),
34 |           auto_follow=True
35 |       )
36 | 
37 |       try:
38 |           br.auth('YOUR_INSTA_LOGIN', 'YOUR_INSTA_PASSWORD')
39 |           br.process_user('al_kricha')
40 |           print(br.get_summary())
41 |       finally:
42 |           br.close_all()
43 | 
44 | Other examples can be seen in my repository:
45 | `insta_bot <https://github.com/kricha/insta_bot>`__
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Instabrowser
 2 | [![Build Status](https://travis-ci.org/kricha/insta_browser.svg?branch=master)](https://travis-ci.org/kricha/insta_browser)
 3 | [![PyPI](https://img.shields.io/pypi/v/insta_browser.svg)](https://pypi.org/pypi/insta_browser)
 4 | 
 5 | 💻 Library for instagram.com automation.  
 6 | ♥️ Like instagram feed, username profile, location, tag.  
 7 | 🤝 Auto-follow unknown users, during liking, from locations or tags.   
 8 | 📊 Get statistic of any public account.  
 9 | 
10 | ### Requirements
11 | * Python 3
12 | * [ChromeDriver](https://sites.google.com/a/chromium.org/chromedriver/downloads) for headless web-surfing
13 | 
14 | 
15 | ### Examples
16 | 
17 | * Example of using package for liking specific user:
18 | 
19 |     ```python
20 |     import os
21 |     from insta_browser import browser
22 | 
23 |     br = browser.Browser(
24 |         debug=True,cookie_path=os.path.join('var', 'cookies'),
25 |         log_path=os.path.join('var', 'logs'),
26 |         db_path=os.path.join('var', 'db'),
27 |         exclude=os.path.join('var', 'exclude.txt'),
28 |         auto_follow=True
29 |     )
30 | 
31 |     try:
32 |         br.auth('YOUR_INSTA_LOGIN', 'YOUR_INSTA_PASSWORD')
33 |         br.process_user('al_kricha')
34 |         print(br.get_summary())
35 |     finally:
36 |         br.close_all()
37 | 
38 |     ```
39 | 
40 | Other examples can be seen in my repository: [insta_bot](https://github.com/kricha/insta_bot)
41 | 


--------------------------------------------------------------------------------
/insta_browser/logger.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from .version import __version__
 3 | import tempfile
 4 | import os
 5 | 
 6 | 
 7 | class Logger:
 8 |     def __init__(self, log_path=tempfile.gettempdir(), debug=False):
 9 |         self.log_file = os.path.join(log_path, 'insta_browser_{}.log'.format(__version__))
10 |         self.screen_shot_path = os.path.join(log_path, 'screenshot')
11 |         self.debug = debug
12 | 
13 |     def log(self, text, force=False):
14 |         if self.debug or force:
15 |             print(text)
16 |         self.log_to_file(text)
17 | 
18 |     def log_to_file(self, text):
19 |         file = open(self.log_file, 'a')
20 |         log_date = datetime.now()
21 |         formatted_date = log_date.__format__("%d-%m-%Y %H:%M:%S")
22 |         file.write("[{}] {}\n".format(formatted_date, text))
23 | 
24 |     def save_screen_shot(self, browser, screen_shot_name=None):
25 |         """
26 |         Save screen shot and log it
27 |         :param browser:
28 |         :param screen_shot_name:
29 |         :return:
30 |         """
31 |         if screen_shot_name:
32 |             try:
33 |                 screenshot_real_path = os.path.join(self.screen_shot_path, screen_shot_name)
34 |                 browser.save_screenshot(screenshot_real_path)
35 |                 self.log_to_file('Saving screen shot to {}'.format(screenshot_real_path))
36 |                 return True
37 |             except:
38 |                 return False
39 | 


--------------------------------------------------------------------------------
/insta_browser/auth.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import time
 3 | import tempfile
 4 | import os
 5 | import selenium.common.exceptions as excp
 6 | 
 7 | LOGIN_URL = 'https://instagram.com/accounts/login/'
 8 | 
 9 | 
10 | def auth_with_cookies(browser, logger, login, cookie_path=tempfile.gettempdir()):
11 |     """
12 |     Authenticate to instagram.com with cookies
13 |     :param browser: WebDriver
14 |     :param logger:
15 |     :param login:
16 |     :param cookie_path:
17 |     :return:
18 |     """
19 |     logger.save_screen_shot(browser, 'login.png')
20 |     try:
21 |         logger.log('Trying to auth with cookies.')
22 |         cookies = pickle.load(open(os.path.join(cookie_path, login + '.pkl'), "rb"))
23 |         for cookie in cookies:
24 |             browser.add_cookie(cookie)
25 |         browser.refresh()
26 |         if check_if_user_authenticated(browser):
27 |             logger.log("Successful authorization with cookies.")
28 |             return True
29 |     except:
30 |         pass
31 | 
32 |     logger.log("Unsuccessful authorization with cookies.")
33 |     return False
34 | 
35 | 
36 | def auth_with_credentials(browser, logger, login, password, cookie_path=tempfile.gettempdir()):
37 |     if browser.current_url != LOGIN_URL:
38 |         browser.get(LOGIN_URL)
39 |     time.sleep(2)
40 |     logger.log('Trying to auth with credentials.')
41 |     login_field = browser.find_element_by_name("username")
42 |     login_field.clear()
43 |     logger.log("--->AuthWithCreds: filling username.")
44 |     login_field.send_keys(login)
45 |     password_field = browser.find_element_by_name("password")
46 |     password_field.clear()
47 |     logger.log("--->AuthWithCreds: filling password.")
48 |     password_field.send_keys(password)
49 |     submit = browser.find_element_by_css_selector("form button")
50 |     logger.log("--->AuthWithCreds: submitting login form.")
51 |     submit.submit()
52 |     time.sleep(3)
53 |     logger.log("--->AuthWithCreds: saving cookies.")
54 |     pickle.dump([browser.get_cookie('sessionid')], open(os.path.join(cookie_path, login + '.pkl'), "wb"))
55 |     if check_if_user_authenticated(browser):
56 |         logger.log("Successful authorization with credentials.")
57 |         return True
58 |     logger.log("Unsuccessful authorization with credentials.")
59 |     return False
60 | 
61 | 
62 | def check_if_user_authenticated(browser):
63 |     try:
64 |         browser.find_element_by_css_selector(".logged-in")
65 |         return True
66 |     except excp.NoSuchElementException:
67 |         return False
68 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### JetBrains template
  3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
  4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  5 | 
  6 | # User-specific stuff:
  7 | .idea/**/workspace.xml
  8 | .idea/**/tasks.xml
  9 | .idea/dictionaries
 10 | 
 11 | # Sensitive or high-churn files:
 12 | .idea/**/dataSources/
 13 | .idea/**/dataSources.ids
 14 | .idea/**/dataSources.xml
 15 | .idea/**/dataSources.local.xml
 16 | .idea/**/sqlDataSources.xml
 17 | .idea/**/dynamic.xml
 18 | .idea/**/uiDesigner.xml
 19 | 
 20 | # Gradle:
 21 | .idea/**/gradle.xml
 22 | .idea/**/libraries
 23 | 
 24 | # Mongo Explorer plugin:
 25 | .idea/**/mongoSettings.xml
 26 | 
 27 | ## File-based project format:
 28 | *.iws
 29 | 
 30 | ## Plugin-specific files:
 31 | 
 32 | # IntelliJ
 33 | /out/
 34 | 
 35 | # mpeltonen/sbt-idea plugin
 36 | .idea_modules/
 37 | 
 38 | # JIRA plugin
 39 | atlassian-ide-plugin.xml
 40 | 
 41 | # Crashlytics plugin (for Android Studio and IntelliJ)
 42 | com_crashlytics_export_strings.xml
 43 | crashlytics.properties
 44 | crashlytics-build.properties
 45 | fabric.properties
 46 | ### Python template
 47 | # Byte-compiled / optimized / DLL files
 48 | __pycache__/
 49 | *.py[cod]
 50 | *$py.class
 51 | 
 52 | # C extensions
 53 | *.so
 54 | 
 55 | # Distribution / packaging
 56 | .Python
 57 | env/
 58 | build/
 59 | develop-eggs/
 60 | dist/
 61 | downloads/
 62 | eggs/
 63 | .eggs/
 64 | lib/
 65 | lib64/
 66 | parts/
 67 | sdist/
 68 | var/
 69 | wheels/
 70 | *.egg-info/
 71 | .installed.cfg
 72 | *.egg
 73 | 
 74 | # PyInstaller
 75 | #  Usually these files are written by a python script from a template
 76 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 77 | *.manifest
 78 | *.spec
 79 | 
 80 | # Installer logs
 81 | pip-log.txt
 82 | pip-delete-this-directory.txt
 83 | 
 84 | # Unit test / coverage reports
 85 | htmlcov/
 86 | .tox/
 87 | .coverage
 88 | .coverage.*
 89 | .cache
 90 | nosetests.xml
 91 | coverage.xml
 92 | *,cover
 93 | .hypothesis/
 94 | 
 95 | # Translations
 96 | *.mo
 97 | *.pot
 98 | 
 99 | # Django stuff:
100 | *.log
101 | local_settings.py
102 | 
103 | # Flask stuff:
104 | instance/
105 | .webassets-cache
106 | 
107 | # Scrapy stuff:
108 | .scrapy
109 | 
110 | # Sphinx documentation
111 | docs/_build/
112 | 
113 | # PyBuilder
114 | target/
115 | 
116 | # Jupyter Notebook
117 | .ipynb_checkpoints
118 | 
119 | # pyenv
120 | .python-version
121 | 
122 | # celery beat schedule file
123 | celerybeat-schedule
124 | 
125 | # SageMath parsed files
126 | *.sage.py
127 | 
128 | # dotenv
129 | .env
130 | 
131 | # virtualenv
132 | .venv
133 | venv/
134 | ENV/
135 | 
136 | # Spyder project settings
137 | .spyderproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | ### VirtualEnv template
142 | # Virtualenv
143 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
144 | [Bb]in
145 | [Ii]nclude
146 | [Ll]ib
147 | [Ll]ib64
148 | [Ll]ocal
149 | [Ss]cripts
150 | pyvenv.cfg
151 | pip-selfcheck.json
152 | .idea
153 | 


--------------------------------------------------------------------------------
/insta_browser/processors/not_feed_processor.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | import selenium.common.exceptions as excp
  4 | import time
  5 | import tqdm
  6 | from selenium.webdriver.common.action_chains import ActionChains
  7 | 
  8 | from .base_processor import BaseProcessor
  9 | 
 10 | TOP_POSTS_XPATH = '//*[@id="react-root"]/section/main/article/div[1]/div'
 11 | LATEST_POSTS_XPATH = '//*[@id="react-root"]/section/main/article/div[2]'
 12 | POST_CSS_SELECTOR = '#fb-root + div article'
 13 | 
 14 | 
 15 | class NotFeedProcessor(BaseProcessor):
 16 | 
 17 |     def like_user_profile(self, count):
 18 |         if count:
 19 |             posts_count = count
 20 |         else:
 21 |             posts_from_page = self.browser.find_element_by_css_selector("article header ul li span").text
 22 |             tmp_count = int(re.match('\d+', posts_from_page).group(0))
 23 |             posts_count = self.like_limit if tmp_count > self.like_limit else tmp_count
 24 |         self.browser.find_element_by_css_selector("article > div a").click()
 25 |         self.go_through_posts(posts_count)
 26 | 
 27 |     def like_top(self):
 28 |         self.get_like_limits(9)
 29 |         self.logger.log('Start processing top posts.')
 30 |         self.__get_posts_block(TOP_POSTS_XPATH)
 31 |         self.go_through_posts(self.count)
 32 | 
 33 |     def like_latest(self, count):
 34 |         self.get_like_limits(count)
 35 |         self.logger.log('Start processing latest posts.')
 36 |         self.__get_posts_block(LATEST_POSTS_XPATH)
 37 |         self.go_through_posts(self.count - 9)
 38 | 
 39 |     def go_through_posts(self, count):
 40 |         self.count = count
 41 |         time.sleep(.5)
 42 |         self.post_already_liked = 0
 43 |         progress = tqdm.tqdm(range(self.count))
 44 |         for i in progress:
 45 |             time.sleep(1)
 46 |             self.__like_post()
 47 |             if self.auto_follow:
 48 |                 if self.follow_user():
 49 |                     time.sleep(.5)
 50 |             if not self.__go_to_next_post():
 51 |                 progress.close()
 52 |                 break
 53 |             progress.update()
 54 | 
 55 |     def __like_post(self):
 56 |         """
 57 |         Like posts or skip
 58 | 
 59 |         :return:
 60 |         """
 61 |         if self.__is_not_liked_acc_post():
 62 |             self.heart.click()
 63 |             self.logger.log_to_file('--> like post {}'.format(self.browser.current_url))
 64 |             self.db.likes_increment()
 65 |             self.post_liked += 1
 66 |             self.post_already_liked = 0
 67 |             time.sleep(0.5)
 68 |         elif not self.heart and self.count > 9:
 69 |             self.post_already_liked += 1
 70 |             self.post_skipped += 1
 71 |         else:
 72 |             self.post_skipped += 1
 73 | 
 74 |     def __go_to_next_post(self):
 75 |         """
 76 |         Go to next post on non-feed page
 77 | 
 78 |         :return:
 79 |         """
 80 |         link = self.__has_next()
 81 |         if not link or self.post_already_liked > 4:
 82 |             return False
 83 |         else:
 84 |             link.click()
 85 |             return True
 86 | 
 87 |     def __get_posts_block(self, block_xpath):
 88 |         top_block = self.browser.find_element_by_xpath(block_xpath)
 89 |         post_link = top_block.find_element_by_css_selector("a")
 90 |         ActionChains(self.browser).move_to_element(post_link).click().perform()
 91 | 
 92 |     def __is_not_liked_acc_post(self):
 93 |         """
 94 |         Check if not feed post is liked
 95 | 
 96 |         :return: like WebElement if exist or False if not
 97 |         """
 98 |         self.heart = None
 99 |         try:
100 |             is_not_liked_span = self.browser.find_element_by_css_selector(".ptsdu")
101 |             self.heart = is_not_liked_span.find_element_by_xpath('..')
102 |             return True
103 |         except excp.NoSuchElementException:
104 |             return False
105 | 
106 |     def __has_next(self):
107 |         """
108 |         Check if page has nex link
109 | 
110 |         :return: next link WebElement if exist and False if not
111 |         """
112 |         try:
113 |             next_link = self.browser.find_element_by_css_selector(".HBoOv._1bdSS")
114 |             return next_link
115 |         except excp.NoSuchElementException:
116 |             return False
117 | 


--------------------------------------------------------------------------------
/insta_browser/browser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from selenium import webdriver
  3 | from .logger import Logger
  4 | from .auth import *
  5 | from .processors.not_feed_processor import *
  6 | from .processors.feed_processor import *
  7 | from .db.browser_db import BrowserDB
  8 | import re
  9 | 
 10 | 
 11 | class Browser:
 12 |     login = ''
 13 |     summary = {}
 14 | 
 15 |     """
 16 |     :param chrome: is deprecated and will be removed in future versions
 17 |     """
 18 | 
 19 |     def __init__(self, debug=False, chrome=False, cookie_path=None, log_path=None, db_path=None,
 20 |                  exclude=None, auto_follow=False):
 21 |         options = webdriver.ChromeOptions()
 22 |         options.add_argument('headless')
 23 |         options.add_argument('--window-size=900,768')
 24 |         self.browser = webdriver.Chrome(chrome_options=options)
 25 |         self.browser.implicitly_wait(10)
 26 | 
 27 |         self.cookie_path = cookie_path
 28 |         self.exclude = exclude or []
 29 |         self.chrome = chrome
 30 |         self.logger = Logger(log_path, debug)
 31 |         self.db = BrowserDB(self.logger, db_path)
 32 |         self.auto_follow = auto_follow
 33 | 
 34 |     def auth(self, login, password):
 35 |         if not login:
 36 |             raise ValueError('Please provide login and password for Browser.auth method')
 37 |         self.db.detect_account(login)
 38 |         br = self.browser
 39 |         self.get("https://www.instagram.com/accounts/login/")
 40 |         time.sleep(1)
 41 |         if not auth_with_cookies(br, self.logger, login, self.cookie_path):
 42 |             auth_with_credentials(br, self.logger, login, password, self.cookie_path)
 43 |         self.login = login
 44 | 
 45 |     def get(self, url):
 46 |         self.browser.get(url)
 47 |         time.sleep(.5)
 48 |         self.logger.log(u'Open ' + self.browser.current_url)
 49 |         return self
 50 | 
 51 |     def close_all(self):
 52 |         self.logger.save_screen_shot(self.browser, 'exit.png')
 53 |         self.browser.close()
 54 |         self.browser.quit()
 55 |         self.logger.log(u'Browser process was ended')
 56 |         self.logger.log(u'')
 57 | 
 58 |     def get_summary(self):
 59 |         log = 'Feed scrolled down {scrolled} times, liked {liked} posts, skipped {skipped} posts,' \
 60 |               ' skipped excluded {excluded} posts'. \
 61 |             format(**self.summary)
 62 |         self.logger.log_to_file(log)
 63 |         return log
 64 | 
 65 |     def process_user(self, username, count=None):
 66 |         br = self.browser
 67 |         self.get("https://www.instagram.com/{}".format(username))
 68 |         self.logger.log("Start liking @{} profile {} posts".format(username, count))
 69 |         processor = NotFeedProcessor(db=self.db, br=br, lg=self.logger)
 70 |         processor.set_auto_follow(self.auto_follow)
 71 |         processor.like_user_profile(count)
 72 |         self.summary = processor.get_summary()
 73 | 
 74 |     def process_location(self, location, count=None):
 75 |         br = self.browser
 76 |         processed_location = re.sub('^(/?explore/locations/|/|/?locations/)', '', location)
 77 |         self.get("https://www.instagram.com/explore/locations/{}".format(processed_location))
 78 |         self.logger.log("Start liking top posts from {} location".format(processed_location))
 79 |         processor = NotFeedProcessor(db=self.db, br=br, lg=self.logger)
 80 |         processor.set_auto_follow(self.auto_follow)
 81 |         processor.like_top()
 82 |         processor.like_latest(count)
 83 |         self.summary = processor.get_summary()
 84 | 
 85 |     def process_tag(self, tag, count=None):
 86 |         br = self.browser
 87 |         self.get("https://www.instagram.com/explore/tags/{}".format(tag))
 88 |         self.logger.log("Start liking top posts from #{} tag".format(tag))
 89 |         processor = NotFeedProcessor(db=self.db, br=br, lg=self.logger)
 90 |         processor.set_auto_follow(self.auto_follow)
 91 |         processor.like_top()
 92 |         processor.like_latest(count)
 93 |         self.summary = processor.get_summary()
 94 | 
 95 |     def process_feed(self, count=None):
 96 |         br = self.browser
 97 |         self.get("https://instagram.com/")
 98 |         time.sleep(.5)
 99 |         processor = FeedProcessor(db=self.db, br=br, lg=self.logger)
100 |         processor.set_auto_follow(self.auto_follow)
101 |         processor.scroll_feed_to_last_not_liked_posts(count)
102 |         processor.process(self.exclude, self.login)
103 |         self.summary = processor.get_summary()
104 | 


--------------------------------------------------------------------------------
/insta_browser/processors/feed_processor.py:
--------------------------------------------------------------------------------
  1 | import selenium.common.exceptions as excp
  2 | import time
  3 | import tqdm
  4 | from selenium.webdriver.common.by import By
  5 | from selenium.webdriver.support import expected_conditions as EC
  6 | from selenium.webdriver.support.ui import WebDriverWait
  7 | 
  8 | from .base_processor import BaseProcessor
  9 | 
 10 | NOT_LIKED_CSS_CLASS = '.coreSpriteHeartOpen'
 11 | 
 12 | 
 13 | class FeedProcessor(BaseProcessor):
 14 |     posts_list = []
 15 |     posts_hash_list = []
 16 | 
 17 |     def scroll_feed_to_last_not_liked_posts(self, count):
 18 |         """
 19 |         Scroll down feed to last not liked post
 20 | 
 21 |         :return:
 22 |         """
 23 |         self.get_like_limits(count)
 24 |         self.logger.log('Start scrolling page.')
 25 |         while self.__is_last_post_in_feed_not_liked():
 26 |             self.__scroll_down()
 27 | 
 28 |     def __scroll_down(self):
 29 |         """
 30 |         Moving to footer and waiting for querying new posts
 31 | 
 32 |         :return:
 33 |         """
 34 |         last_post = WebDriverWait(self.browser, 10). \
 35 |             until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'article:last-child')))
 36 |         self.browser.execute_script("return arguments[0].scrollIntoView();", last_post)
 37 |         self.logger.log_to_file('---> scrolled down.')
 38 |         self.feed_scrolled_down += 1
 39 |         time.sleep(1)
 40 | 
 41 |     def __is_last_post_in_feed_not_liked(self):
 42 |         """
 43 |         Check last five posts if they are not liked
 44 | 
 45 |         :return: False if one of five posts wasn't liked, True if all five were liked
 46 |         """
 47 |         posts = self.browser.find_elements_by_tag_name('article')
 48 |         for post in posts:
 49 |             post_link = self._get_feed_post_link(post)
 50 |             if post_link not in self.posts_hash_list:
 51 |                 self.posts_hash_list.append(post_link)
 52 |                 self.posts_list.append({'pl': post_link, 'p': post})
 53 | 
 54 |         if 0 < len(self.posts_list) >= self.count:
 55 |             return False
 56 | 
 57 |         try:
 58 |             for i in range(5):
 59 |                 post = posts.pop()
 60 |                 post.find_element_by_css_selector(NOT_LIKED_CSS_CLASS)
 61 |             del posts
 62 |             return True
 63 |         except excp.NoSuchElementException:
 64 |             return False
 65 |         except IndexError:
 66 |             return True
 67 | 
 68 |     def process(self, exclude, login):
 69 |         """
 70 |         liking pre-processed posts. Moving to each post with ActionChains
 71 | 
 72 |         :param exclude:
 73 |         :param login:
 74 |         :param count:
 75 |         :return:
 76 |         """
 77 |         br = self.browser
 78 | 
 79 |         self.posts_list.reverse()
 80 | 
 81 |         progress = tqdm.tqdm(self.posts_list)
 82 |         for post in progress:
 83 |             real_time_posts = br.find_elements_by_tag_name('article')
 84 |             post_link = post.get('pl')
 85 |             filtered_posts = [p for p in real_time_posts if self._get_feed_post_link(p) == post_link]
 86 |             if filtered_posts.__len__():
 87 |                 real_post = filtered_posts.pop()
 88 |                 # scroll to real post in markup
 89 |                 heart = real_post.find_element_by_css_selector('div:nth-child(3) section a:first-child')
 90 |                 self.browser.execute_script("return arguments[0].scrollIntoView(false);", heart)
 91 |                 # getting need to process elements
 92 |                 author = real_post.find_element_by_css_selector('div:first-child .notranslate').text
 93 |                 heart_classes = heart.find_element_by_css_selector('span').get_attribute('class')
 94 |                 # check restrictions
 95 |                 is_not_liked = 'coreSpriteHeartOpen' in heart_classes
 96 |                 is_mine = author == login
 97 |                 need_to_exclude = author in exclude
 98 | 
 99 |                 if is_mine or not is_not_liked:
100 |                     self.post_skipped += 1
101 |                     pass
102 |                 elif need_to_exclude:
103 |                     self.post_skipped_excluded += 1
104 |                     pass
105 |                 else:
106 |                     # like this post
107 |                     time.sleep(.3)
108 |                     heart.click()
109 |                     time.sleep(.7)
110 |                     self.db.likes_increment()
111 |                     self.post_liked += 1
112 |                     log = '---> liked @{} post {}'.format(author, post_link)
113 |                     self.logger.log_to_file(log)
114 | 
115 |                 progress.update()
116 | 


--------------------------------------------------------------------------------
/insta_browser/processors/base_processor.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import selenium.common.exceptions as excp
  3 | from selenium.webdriver.chrome.webdriver import WebDriver
  4 | from selenium.webdriver.remote.webelement import WebElement
  5 | 
  6 | from insta_browser.db.browser_db import BrowserDB
  7 | from ..logger import Logger
  8 | 
  9 | try:
 10 |     # For Python 3.0 and later
 11 |     from urllib.request import urlopen
 12 | except ImportError:
 13 |     # Fall back to Python 2's urllib2
 14 |     from urllib2 import urlopen
 15 | 
 16 | import json
 17 | 
 18 | 
 19 | class BaseProcessor:
 20 |     post_skipped_excluded = 0
 21 |     posts_count_to_like = 0
 22 |     feed_scrolled_down = 0
 23 |     post_already_liked = 0
 24 |     post_excluded = 0
 25 |     post_skipped = 0
 26 |     auto_follow = False
 27 |     like_limit = 416
 28 |     progress = None
 29 |     post_liked = 0
 30 |     heart = None
 31 |     count = 0
 32 |     hour_like_limit = 150
 33 | 
 34 |     def __init__(self, db, br, lg):
 35 |         self.db: BrowserDB = db
 36 |         self.browser: WebDriver = br
 37 |         self.logger: Logger = lg
 38 | 
 39 |     def get_summary(self):
 40 |         return {'liked': self.post_liked,
 41 |                 'skipped': self.post_skipped,
 42 |                 'excluded': self.post_skipped_excluded,
 43 |                 'already_liked': self.post_already_liked,
 44 |                 'scrolled': self.feed_scrolled_down}
 45 | 
 46 |     @staticmethod
 47 |     def _get_feed_post_link(post: WebElement):
 48 |         """
 49 |         Get link to post from post web-element from feed
 50 |         :param post: WebElement
 51 |         :return:
 52 |         """
 53 |         try:
 54 |             post_link = post.find_element_by_css_selector('div:nth-child(3) div:nth-child(4) a')
 55 |         except excp.NoSuchElementException:
 56 |             post_link = post.find_element_by_css_selector('div:nth-child(3) div:nth-child(3) a')
 57 |         return post_link.get_attribute('href')
 58 | 
 59 |     @staticmethod
 60 |     def _get_feed_post_media(post: WebElement):
 61 |         """
 62 |         Get link to post from post web-element from feed
 63 |         :param post: WebElement
 64 |         :return: str
 65 |         """
 66 |         try:
 67 |             image = post.find_element_by_css_selector('div:nth-child(2) img')
 68 |             return image.get_attribute('src')
 69 |         except excp.NoSuchElementException:
 70 |             pass
 71 | 
 72 |         try:
 73 |             video = post.find_element_by_tag_name('video')
 74 |             return video.get_attribute('src')
 75 |         except excp.NoSuchElementException:
 76 |             pass
 77 | 
 78 |         return False
 79 | 
 80 |     def follow_user(self) -> bool:
 81 |         """
 82 |         Follow user if need and could
 83 |         :return: bool
 84 |         """
 85 |         if self.__could_i_follow():
 86 |             # Second if, because we don't need to make http requests if user reaches follow limits
 87 |             if self.__do_i_need_to_follow_this_user():
 88 |                 try:
 89 |                     follow_button = self.browser.find_element_by_css_selector('._5f5mN')
 90 |                     follow_button.click()
 91 |                     self.db.follows_increment()
 92 |                     return True
 93 |                 except excp.NoSuchElementException:
 94 |                     self.logger.log('Cant find follow button.')
 95 | 
 96 |         return False
 97 | 
 98 |     def __could_i_follow(self) -> bool:
 99 |         """
100 |         Check if i could to follow more
101 |         :return: bool
102 |         """
103 |         counters = self.db.get_follow_limits_by_account()
104 |         return counters['daily'] < 1001 and counters['hourly'] < 76
105 | 
106 |     def __do_i_need_to_follow_this_user(self) -> bool:
107 |         """
108 |         Check if i need to follow current user
109 |         :return: bool
110 |         """
111 |         self.browser.implicitly_wait(1)
112 | 
113 |         try:
114 |             self.browser.find_element_by_css_selector('.qPANj')
115 |             return False
116 |         except excp.NoSuchElementException:
117 |             username = self.browser.find_element_by_css_selector('.notranslate').text
118 |             counters = self.__get_counters(username)
119 |             if not counters:
120 |                 user_link = 'https://www.instagram.com/{}/?__a=1'.format(username)
121 |                 response = urlopen(user_link)
122 |                 data = json.loads(response.read().decode('utf-8'))
123 |                 counters['followers'] = data['user']['followed_by']['count']
124 |                 counters['following'] = data['user']['follows']['count']
125 |                 counters['posts'] = data['user']['media']['count']
126 |                 need_to_be_followed = counters['posts'] > 10 and counters['following'] < 500 and counters[
127 |                     'followers'] < 1000
128 |                 if not need_to_be_followed:
129 |                     self.db.store_user_counters(username, counters)
130 |                 return need_to_be_followed
131 |             else:
132 |                 return False
133 | 
134 |     def __get_counters(self, login):
135 |         counters = self.db.get_user_counters(login)
136 |         today = datetime.date.today()
137 |         updated_at = datetime.datetime.strptime(counters['updated_at'], '%Y-%m-%d')
138 |         updated_at_date = datetime.date(year=updated_at.year, month=updated_at.month, day=updated_at.day)
139 |         if (today - updated_at_date).days > 31:
140 |             return {}
141 |         return counters['counters']
142 | 
143 |     # TODO: refactor this
144 |     def get_like_limits(self, count=None):
145 |         limits = self.db.get_like_limits_by_account()
146 |         today_likes = limits[0]
147 |         hours_left = limits[1]
148 |         hour_likes_by_activity = (self.hour_like_limit * 24 - today_likes) // hours_left
149 |         ll = None
150 |         if self.hour_like_limit <= hour_likes_by_activity < self.hour_like_limit * 2:
151 |             ll = hour_likes_by_activity
152 |         elif hour_likes_by_activity >= self.hour_like_limit * 2:
153 |             ll = self.hour_like_limit * 2
154 |         elif hour_likes_by_activity < self.hour_like_limit:
155 |             ll = hour_likes_by_activity
156 |         self.count = count if 0 < count < ll else ll
157 |         return self.count
158 | 
159 |     def set_auto_follow(self, flag: bool):
160 |         """
161 |         Enable or disable auto follow mode
162 |         :param flag:
163 |         :return:
164 |         """
165 |         self.auto_follow = flag
166 |         return self
167 | 


--------------------------------------------------------------------------------
/insta_browser/db/browser_db.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tempfile
  3 | 
  4 | import json
  5 | import sqlite3
  6 | import datetime
  7 | 
  8 | 
  9 | 
 10 | class BrowserDB:
 11 |     user_counters_table = 'tmp_counters'
 12 |     sql_path = os.path.dirname(os.path.abspath(__file__))
 13 |     account_id = None
 14 | 
 15 |     def __init__(self, logger, db_path=tempfile.gettempdir()):
 16 |         self.logger = logger
 17 |         self.db = self.__connect_db(db_path)
 18 |         self.__init_db()
 19 | 
 20 |     def __connect_db(self, db_path):
 21 |         db_name = 'insta_browser.sqlite3'
 22 |         try:
 23 |             connect_path = os.path.join(db_path, db_name)
 24 |             db = sqlite3.connect(connect_path)
 25 |         except:
 26 |             connect_path = os.path.join(tempfile.gettempdir(), db_name)
 27 |             db = sqlite3.connect(connect_path)
 28 |         self.db_log('connected to {} database')
 29 |         return db
 30 | 
 31 |     def __init_db(self):
 32 |         db = self.db
 33 |         cursor = db.cursor()
 34 |         self.__create_update_db(self.__check_db_version())
 35 | 
 36 |     def __check_db_version(self):
 37 |         cur = self.db.cursor()
 38 |         q = 'SELECT version FROM db_version ORDER BY version DESC'
 39 |         self.db_log('query: {}'.format(q))
 40 |         try:
 41 |             cur.execute(q)
 42 |             ver = cur.fetchone()
 43 |         except sqlite3.OperationalError:
 44 |             ver = False
 45 |         self.db_log('result: {}'.format(ver))
 46 |         return ver
 47 | 
 48 |     def __create_update_db(self, version):
 49 |         db = self.db
 50 |         if not version:
 51 |             self.logger.log_to_file('')
 52 |             create_sql = open(os.path.join(self.sql_path, 'sql', 'init.sql'), 'r').read()
 53 |             db.cursor().executescript(create_sql)
 54 |             self.db_log('creating new db')
 55 |             version = (0,)
 56 | 
 57 |         migration_path = os.path.join(self.sql_path, 'sql', 'migrations')
 58 |         files = [f for f in os.listdir(migration_path) if
 59 |                  os.path.isfile(os.path.join(migration_path, f)) and int(f.replace('.sql', '')) > version[0]]
 60 |         files.sort(key=str.lower)
 61 |         for file in files:
 62 |             migration_sql = open(os.path.join(migration_path, file), 'r').read()
 63 |             db.cursor().executescript(migration_sql)
 64 |             self.db_log('migrate to {}'.format(file))
 65 |             db.cursor().execute("UPDATE db_version SET version={};".format(file.replace('.sql', '')))
 66 | 
 67 |     def get_user_counters(self, login):
 68 |         result = {'updated_at': (datetime.date.today() + datetime.timedelta(days=-40)).strftime("%Y-%m-%d")}
 69 |         query = 'SELECT * FROM {} WHERE login = ?'.format(self.user_counters_table)
 70 |         row = self.db.cursor().execute(query, [login]).fetchone()
 71 |         if row:
 72 |             result = {'updated_at': row[1], 'counters': json.loads(row[2])}
 73 |         return result
 74 | 
 75 |     def store_user_counters(self, login, counters):
 76 |         query = "REPLACE INTO {} (login, updated_at, counters) VALUES (?, strftime('%Y-%m-%d', 'now'), ?)".format(
 77 |             self.user_counters_table)
 78 |         self.db.cursor().execute(query, [login, json.dumps(counters)])
 79 | 
 80 |     def detect_account(self, login):
 81 |         cur = self.db.cursor()
 82 |         if not self.get_account_id(login):
 83 |             q = 'INSERT INTO accounts (username) VALUES (?)'
 84 |             p = [login]
 85 |             cur.execute(q, p)
 86 |             self.db.commit()
 87 |             self.db_log('query: {}, params: {}'.format(q, p))
 88 |             self.detect_account(login)
 89 | 
 90 |     def get_account_id(self, login):
 91 |         q = 'SELECT id FROM accounts WHERE username = :login;'
 92 |         params = {'login': login}
 93 |         row = self.db.cursor().execute(q, params).fetchone()
 94 |         self.db_log('query: {}, params: {}, result: {}'.format(q, params, row))
 95 |         if row:
 96 |             self.account_id = row[0]
 97 |         return self.account_id
 98 | 
 99 |     def get_like_limits_by_account(self):
100 |         cur = self.db.cursor()
101 |         row = cur.execute(SELECT_LIKE_LIMITS_QUERY, [self.account_id]).fetchone()
102 |         return row
103 | 
104 |     def get_follow_limits_by_account(self):
105 |         cur = self.db.cursor()
106 |         row1 = cur.execute(SELECT_FOLLOW_TODAYS_LIMITS_QUERY, [self.account_id]).fetchone()
107 |         row2 = cur.execute(SELECT_FOLLOW_HOURS_LIMITS_QUERY, [self.account_id]).fetchone()
108 |         return {'daily': row1[0], 'hourly': row2[0], 'hours_left': row1[1]}
109 | 
110 |     def likes_increment(self):
111 |         params = [self.account_id]
112 |         self.db.execute(INSERT_UPDATE_LIKES_QUERY, params)
113 |         self.db_log('query: {}, params: {}'.format(''.join(INSERT_UPDATE_LIKES_QUERY.splitlines()), params))
114 |         self.db.commit()
115 | 
116 |     def follows_increment(self):
117 |         params = [self.account_id]
118 |         self.db.execute(INSERT_UPDATE_FOLLOWS_QUERY, params)
119 |         self.db_log('query: {}, params: {}'.format(''.join(INSERT_UPDATE_FOLLOWS_QUERY.splitlines()), params))
120 |         self.db.commit()
121 | 
122 |     def db_log(self, text):
123 |         self.logger.log_to_file('[SQLITE] {}'.format(text))
124 | 
125 | 
126 | SELECT_LIKE_LIMITS_QUERY = '''
127 | SELECT
128 |   ifnull(sum(likes), 0)      AS `limit`,
129 |   24 - strftime('%H', 'now') AS hours_left
130 | FROM activity
131 | WHERE account_id = ? AND
132 |       datetime(date) BETWEEN datetime('now', 'start of day') AND datetime('now', 'start of day', '+1 day', '-1 second');
133 | '''
134 | 
135 | SELECT_FOLLOW_TODAYS_LIMITS_QUERY = '''
136 | SELECT
137 |   ifnull(sum(follows), 0)      AS `limit`,
138 |   24 - strftime('%H', 'now') AS hours_left
139 | FROM activity
140 | WHERE account_id = ? AND
141 |       datetime(date) BETWEEN datetime('now', 'start of day') AND datetime('now', 'start of day', '+1 day', '-1 second');
142 | '''
143 | 
144 | SELECT_FOLLOW_HOURS_LIMITS_QUERY = '''
145 | SELECT
146 |   ifnull(sum(follows), 0)      AS `follows_in_hour`
147 | FROM activity
148 | WHERE account_id = ? AND
149 |       datetime(date) BETWEEN (strftime('%Y-%m-%d %H', 'now') || ':00:00') AND (strftime('%Y-%m-%d %H', 'now') || ':59:59');
150 | '''
151 | 
152 | INSERT_UPDATE_LIKES_QUERY = '''
153 | WITH new (account_id, date) AS (VALUES (?, strftime('%Y-%m-%d %H', 'now') || ':00:00'))
154 | INSERT OR REPLACE INTO activity (id, account_id, likes, comments, follows, unfollows, date)
155 |   SELECT
156 |     a.id,
157 |     n.account_id,
158 |     ifnull(a.likes, 0) + 1,
159 |     ifnull(a.comments, 0),
160 |     ifnull(a.follows, 0),
161 |     ifnull(a.unfollows, 0),
162 |     n.date
163 |   FROM new n
164 |     LEFT JOIN activity a ON a.account_id = n.account_id AND a.date = n.date;
165 | '''
166 | 
167 | INSERT_UPDATE_FOLLOWS_QUERY = '''
168 | WITH new (account_id, date) AS (VALUES (?, strftime('%Y-%m-%d %H', 'now') || ':00:00'))
169 | INSERT OR REPLACE INTO activity (id, account_id, likes, comments, follows, unfollows, date)
170 |   SELECT
171 |     a.id,
172 |     n.account_id,
173 |     ifnull(a.likes, 0),
174 |     ifnull(a.comments, 0),
175 |     ifnull(a.follows, 0) + 1,
176 |     ifnull(a.unfollows, 0),
177 |     n.date
178 |   FROM new n
179 |     LEFT JOIN activity a ON a.account_id = n.account_id AND a.date = n.date;
180 | '''
181 | 


--------------------------------------------------------------------------------