├── insta_browser ├── db │ ├── __init__.py │ ├── sql │ │ ├── migrations │ │ │ └── 1517145035.sql │ │ └── init.sql │ └── browser_db.py ├── processors │ ├── __init__.py │ ├── not_feed_processor.py │ ├── feed_processor.py │ └── base_processor.py ├── version.py ├── __init__.py ├── logger.py ├── auth.py └── browser.py ├── setup.cfg ├── MANIFEST.in ├── requirements.txt ├── .travis.yml ├── setup.py ├── README.rst ├── README.md └── .gitignore /insta_browser/db/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /insta_browser/processors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 -------------------------------------------------------------------------------- /insta_browser/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.8.6.1' 2 | -------------------------------------------------------------------------------- /insta_browser/__init__.py: -------------------------------------------------------------------------------- 1 | from insta_browser import browser 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.sql 2 | recursive-include insta_browser/ *.py *.sql -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools 2 | twine 3 | selenium 4 | wheel 5 | tqdm 6 | sphinx 7 | requests 8 | -------------------------------------------------------------------------------- /insta_browser/db/sql/migrations/1517145035.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE tmp_counters 2 | ( 3 | login VARCHAR(128) PRIMARY KEY, 4 | updated_at DATE NOT NULL, 5 | counters TEXT NOT NULL 6 | ); -------------------------------------------------------------------------------- /insta_browser/db/sql/init.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE accounts 2 | ( 3 | id INTEGER PRIMARY KEY, 4 | username TEXT 5 | ); 6 | CREATE TABLE db_version 7 | ( 8 | version INTEGER PRIMARY KEY 9 | ); 10 | CREATE TABLE activity 11 | ( 12 | id INTEGER PRIMARY KEY, 13 | account_id INTEGER, 14 | likes INTEGER DEFAULT 0, 15 | comments INTEGER DEFAULT 0, 16 | follows INTEGER DEFAULT 0, 17 | unfollows INTEGER DEFAULT 0, 18 | date TEXT, 19 | CONSTRAINT likes_accounts_id_fk FOREIGN KEY (account_id) REFERENCES accounts (id) 20 | ); 21 | INSERT INTO db_version (version) VALUES (strftime('%s', 'now')); -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 3.6 4 | #addons: 5 | # apt: 6 | # packages: 7 | # - pandoc 8 | deploy: 9 | - provider: pypi 10 | distributions: sdist bdist_wheel 11 | user: al_kricha 12 | password: 13 | secure: Owo01QGwzEpVCbZM6ucoj97KpJ5vbtqC3kuSz9q/4Xvb+VxoeKci4oDuEQsba9AKJ7G2h6iCws8Yl0rPXYFkz7yBpenSlyEbpiJDyKtsWQDqNdWFaDJvp/vF6OgQE+gw8YXlwpTnmha8d4Y586k+d/Z6CWr8vZ2oZJen12iZbf8ESgY87UEl5Y883qJU4qNI3N0oe68pKNPyfcS4GoTQ6W9/fuEACDr1DHfJX3QKMnnwx4ZtWp/CKfyLyRoP1XJX8WH4W/ZgZyfnSKMmBC6W1Ve0K/1DnAvtsFMtWVqHhJUb+isKoZ+RhSfzdDIVPMf89G75t0kSWiURwGoED0ijAXkSq1GrzFEF/SByql7qcmZdOG/PvwAO5ZXsoSjEkPvSFrYhY9/iKTpG7iHwNBqVlxi9hColLf0mTkGlNZLgklS86JqfoH0/lb3glU+h7ALwyH9REbZRStTG6qx4Z30hlDBzjxX6GUzGEKFZu9JcVXjRNWPuNJWEo/y14840wbsngW8LHkVUO6T1+VwcIEqpiouGUj7UKEVSQH0SYTJxE7Gj+5PiRZgz4tu9GcmdRYeHFwNKPYUl1cwIKBrqMhpZYc/vIlAPMbTEvgtpUgdGNgObVSooH9rwvs0x0shRnA7O0edbrtosqP8sNvbAWur6w47FPXAT02QD1rxVJeCozVk= 14 | on: 15 | branch: master 16 | tags: true 17 | script: nosetests 18 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from insta_browser import version as v 3 | 4 | 5 | description = open('README.rst').read() 6 | 7 | version = v.__version__ 8 | 9 | setup( 10 | name='insta_browser', 11 | packages=['insta_browser'], 12 | include_package_data=True, 13 | version=version, 14 | description='parsing/automation instagram.com', 15 | long_description=description, 16 | author='Aleksej Krichevsky', 17 | author_email='krich.al.vl@gmail.com', 18 | url='https://github.com/kricha/insta_browser', 19 | download_url='https://github.com/kricha/insta_browser/archive/{}.tar.gz'.format(version), 20 | keywords=['parsing', 'bot', 'instabot', 'automation', 'likes'], 21 | license='MIT', 22 | classifiers=[ # look here https://pypi.python.org/pypi?%3Aaction=list_classifiers 23 | 'Development Status :: 3 - Alpha', 24 | 'License :: OSI Approved :: MIT License', 25 | 'Natural Language :: English', 26 | 'Programming Language :: Python', 27 | 'Programming Language :: Python :: 3.6', 28 | 'Topic :: Internet :: WWW/HTTP :: Browsers', 29 | ], 30 | install_requires=[ 31 | 'selenium', 32 | 'tqdm', 33 | 'requests' 34 | ], 35 | ) 36 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Instabrowser 2 | ============ 3 | 4 | `Build Status `__ 5 | `PyPI `__ 6 | 7 | | 💻 Library for instagram.com automation. 8 | | ♥️ Like instagram feed, username profile, location, tag. 9 | | 🤝 Auto-follow unknown users, during liking, from locations or tags. 10 | | 📊 Get statistic of any public account. 11 | 12 | Requirements 13 | ~~~~~~~~~~~~ 14 | 15 | - Python 3 16 | - `ChromeDriver `__ 17 | for headless web-surfing 18 | 19 | Examples 20 | ~~~~~~~~ 21 | 22 | - Example of using package for liking specific user: 23 | 24 | .. code:: python 25 | 26 | import os 27 | from insta_browser import browser 28 | 29 | br = browser.Browser( 30 | debug=True,cookie_path=os.path.join('var', 'cookies'), 31 | log_path=os.path.join('var', 'logs'), 32 | db_path=os.path.join('var', 'db'), 33 | exclude=os.path.join('var', 'exclude.txt'), 34 | auto_follow=True 35 | ) 36 | 37 | try: 38 | br.auth('YOUR_INSTA_LOGIN', 'YOUR_INSTA_PASSWORD') 39 | br.process_user('al_kricha') 40 | print(br.get_summary()) 41 | finally: 42 | br.close_all() 43 | 44 | Other examples can be seen in my repository: 45 | `insta_bot `__ 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Instabrowser 2 | [![Build Status](https://travis-ci.org/kricha/insta_browser.svg?branch=master)](https://travis-ci.org/kricha/insta_browser) 3 | [![PyPI](https://img.shields.io/pypi/v/insta_browser.svg)](https://pypi.org/pypi/insta_browser) 4 | 5 | 💻 Library for instagram.com automation. 6 | ♥️ Like instagram feed, username profile, location, tag. 7 | 🤝 Auto-follow unknown users, during liking, from locations or tags. 8 | 📊 Get statistic of any public account. 9 | 10 | ### Requirements 11 | * Python 3 12 | * [ChromeDriver](https://sites.google.com/a/chromium.org/chromedriver/downloads) for headless web-surfing 13 | 14 | 15 | ### Examples 16 | 17 | * Example of using package for liking specific user: 18 | 19 | ```python 20 | import os 21 | from insta_browser import browser 22 | 23 | br = browser.Browser( 24 | debug=True,cookie_path=os.path.join('var', 'cookies'), 25 | log_path=os.path.join('var', 'logs'), 26 | db_path=os.path.join('var', 'db'), 27 | exclude=os.path.join('var', 'exclude.txt'), 28 | auto_follow=True 29 | ) 30 | 31 | try: 32 | br.auth('YOUR_INSTA_LOGIN', 'YOUR_INSTA_PASSWORD') 33 | br.process_user('al_kricha') 34 | print(br.get_summary()) 35 | finally: 36 | br.close_all() 37 | 38 | ``` 39 | 40 | Other examples can be seen in my repository: [insta_bot](https://github.com/kricha/insta_bot) 41 | -------------------------------------------------------------------------------- /insta_browser/logger.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from .version import __version__ 3 | import tempfile 4 | import os 5 | 6 | 7 | class Logger: 8 | def __init__(self, log_path=tempfile.gettempdir(), debug=False): 9 | self.log_file = os.path.join(log_path, 'insta_browser_{}.log'.format(__version__)) 10 | self.screen_shot_path = os.path.join(log_path, 'screenshot') 11 | self.debug = debug 12 | 13 | def log(self, text, force=False): 14 | if self.debug or force: 15 | print(text) 16 | self.log_to_file(text) 17 | 18 | def log_to_file(self, text): 19 | file = open(self.log_file, 'a') 20 | log_date = datetime.now() 21 | formatted_date = log_date.__format__("%d-%m-%Y %H:%M:%S") 22 | file.write("[{}] {}\n".format(formatted_date, text)) 23 | 24 | def save_screen_shot(self, browser, screen_shot_name=None): 25 | """ 26 | Save screen shot and log it 27 | :param browser: 28 | :param screen_shot_name: 29 | :return: 30 | """ 31 | if screen_shot_name: 32 | try: 33 | screenshot_real_path = os.path.join(self.screen_shot_path, screen_shot_name) 34 | browser.save_screenshot(screenshot_real_path) 35 | self.log_to_file('Saving screen shot to {}'.format(screenshot_real_path)) 36 | return True 37 | except: 38 | return False 39 | -------------------------------------------------------------------------------- /insta_browser/auth.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import time 3 | import tempfile 4 | import os 5 | import selenium.common.exceptions as excp 6 | 7 | LOGIN_URL = 'https://instagram.com/accounts/login/' 8 | 9 | 10 | def auth_with_cookies(browser, logger, login, cookie_path=tempfile.gettempdir()): 11 | """ 12 | Authenticate to instagram.com with cookies 13 | :param browser: WebDriver 14 | :param logger: 15 | :param login: 16 | :param cookie_path: 17 | :return: 18 | """ 19 | logger.save_screen_shot(browser, 'login.png') 20 | try: 21 | logger.log('Trying to auth with cookies.') 22 | cookies = pickle.load(open(os.path.join(cookie_path, login + '.pkl'), "rb")) 23 | for cookie in cookies: 24 | browser.add_cookie(cookie) 25 | browser.refresh() 26 | if check_if_user_authenticated(browser): 27 | logger.log("Successful authorization with cookies.") 28 | return True 29 | except: 30 | pass 31 | 32 | logger.log("Unsuccessful authorization with cookies.") 33 | return False 34 | 35 | 36 | def auth_with_credentials(browser, logger, login, password, cookie_path=tempfile.gettempdir()): 37 | if browser.current_url != LOGIN_URL: 38 | browser.get(LOGIN_URL) 39 | time.sleep(2) 40 | logger.log('Trying to auth with credentials.') 41 | login_field = browser.find_element_by_name("username") 42 | login_field.clear() 43 | logger.log("--->AuthWithCreds: filling username.") 44 | login_field.send_keys(login) 45 | password_field = browser.find_element_by_name("password") 46 | password_field.clear() 47 | logger.log("--->AuthWithCreds: filling password.") 48 | password_field.send_keys(password) 49 | submit = browser.find_element_by_css_selector("form button") 50 | logger.log("--->AuthWithCreds: submitting login form.") 51 | submit.submit() 52 | time.sleep(3) 53 | logger.log("--->AuthWithCreds: saving cookies.") 54 | pickle.dump([browser.get_cookie('sessionid')], open(os.path.join(cookie_path, login + '.pkl'), "wb")) 55 | if check_if_user_authenticated(browser): 56 | logger.log("Successful authorization with credentials.") 57 | return True 58 | logger.log("Unsuccessful authorization with credentials.") 59 | return False 60 | 61 | 62 | def check_if_user_authenticated(browser): 63 | try: 64 | browser.find_element_by_css_selector(".logged-in") 65 | return True 66 | except excp.NoSuchElementException: 67 | return False 68 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### JetBrains template 3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 5 | 6 | # User-specific stuff: 7 | .idea/**/workspace.xml 8 | .idea/**/tasks.xml 9 | .idea/dictionaries 10 | 11 | # Sensitive or high-churn files: 12 | .idea/**/dataSources/ 13 | .idea/**/dataSources.ids 14 | .idea/**/dataSources.xml 15 | .idea/**/dataSources.local.xml 16 | .idea/**/sqlDataSources.xml 17 | .idea/**/dynamic.xml 18 | .idea/**/uiDesigner.xml 19 | 20 | # Gradle: 21 | .idea/**/gradle.xml 22 | .idea/**/libraries 23 | 24 | # Mongo Explorer plugin: 25 | .idea/**/mongoSettings.xml 26 | 27 | ## File-based project format: 28 | *.iws 29 | 30 | ## Plugin-specific files: 31 | 32 | # IntelliJ 33 | /out/ 34 | 35 | # mpeltonen/sbt-idea plugin 36 | .idea_modules/ 37 | 38 | # JIRA plugin 39 | atlassian-ide-plugin.xml 40 | 41 | # Crashlytics plugin (for Android Studio and IntelliJ) 42 | com_crashlytics_export_strings.xml 43 | crashlytics.properties 44 | crashlytics-build.properties 45 | fabric.properties 46 | ### Python template 47 | # Byte-compiled / optimized / DLL files 48 | __pycache__/ 49 | *.py[cod] 50 | *$py.class 51 | 52 | # C extensions 53 | *.so 54 | 55 | # Distribution / packaging 56 | .Python 57 | env/ 58 | build/ 59 | develop-eggs/ 60 | dist/ 61 | downloads/ 62 | eggs/ 63 | .eggs/ 64 | lib/ 65 | lib64/ 66 | parts/ 67 | sdist/ 68 | var/ 69 | wheels/ 70 | *.egg-info/ 71 | .installed.cfg 72 | *.egg 73 | 74 | # PyInstaller 75 | # Usually these files are written by a python script from a template 76 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 77 | *.manifest 78 | *.spec 79 | 80 | # Installer logs 81 | pip-log.txt 82 | pip-delete-this-directory.txt 83 | 84 | # Unit test / coverage reports 85 | htmlcov/ 86 | .tox/ 87 | .coverage 88 | .coverage.* 89 | .cache 90 | nosetests.xml 91 | coverage.xml 92 | *,cover 93 | .hypothesis/ 94 | 95 | # Translations 96 | *.mo 97 | *.pot 98 | 99 | # Django stuff: 100 | *.log 101 | local_settings.py 102 | 103 | # Flask stuff: 104 | instance/ 105 | .webassets-cache 106 | 107 | # Scrapy stuff: 108 | .scrapy 109 | 110 | # Sphinx documentation 111 | docs/_build/ 112 | 113 | # PyBuilder 114 | target/ 115 | 116 | # Jupyter Notebook 117 | .ipynb_checkpoints 118 | 119 | # pyenv 120 | .python-version 121 | 122 | # celery beat schedule file 123 | celerybeat-schedule 124 | 125 | # SageMath parsed files 126 | *.sage.py 127 | 128 | # dotenv 129 | .env 130 | 131 | # virtualenv 132 | .venv 133 | venv/ 134 | ENV/ 135 | 136 | # Spyder project settings 137 | .spyderproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | ### VirtualEnv template 142 | # Virtualenv 143 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 144 | [Bb]in 145 | [Ii]nclude 146 | [Ll]ib 147 | [Ll]ib64 148 | [Ll]ocal 149 | [Ss]cripts 150 | pyvenv.cfg 151 | pip-selfcheck.json 152 | .idea 153 | -------------------------------------------------------------------------------- /insta_browser/processors/not_feed_processor.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import selenium.common.exceptions as excp 4 | import time 5 | import tqdm 6 | from selenium.webdriver.common.action_chains import ActionChains 7 | 8 | from .base_processor import BaseProcessor 9 | 10 | TOP_POSTS_XPATH = '//*[@id="react-root"]/section/main/article/div[1]/div' 11 | LATEST_POSTS_XPATH = '//*[@id="react-root"]/section/main/article/div[2]' 12 | POST_CSS_SELECTOR = '#fb-root + div article' 13 | 14 | 15 | class NotFeedProcessor(BaseProcessor): 16 | 17 | def like_user_profile(self, count): 18 | if count: 19 | posts_count = count 20 | else: 21 | posts_from_page = self.browser.find_element_by_css_selector("article header ul li span").text 22 | tmp_count = int(re.match('\d+', posts_from_page).group(0)) 23 | posts_count = self.like_limit if tmp_count > self.like_limit else tmp_count 24 | self.browser.find_element_by_css_selector("article > div a").click() 25 | self.go_through_posts(posts_count) 26 | 27 | def like_top(self): 28 | self.get_like_limits(9) 29 | self.logger.log('Start processing top posts.') 30 | self.__get_posts_block(TOP_POSTS_XPATH) 31 | self.go_through_posts(self.count) 32 | 33 | def like_latest(self, count): 34 | self.get_like_limits(count) 35 | self.logger.log('Start processing latest posts.') 36 | self.__get_posts_block(LATEST_POSTS_XPATH) 37 | self.go_through_posts(self.count - 9) 38 | 39 | def go_through_posts(self, count): 40 | self.count = count 41 | time.sleep(.5) 42 | self.post_already_liked = 0 43 | progress = tqdm.tqdm(range(self.count)) 44 | for i in progress: 45 | time.sleep(1) 46 | self.__like_post() 47 | if self.auto_follow: 48 | if self.follow_user(): 49 | time.sleep(.5) 50 | if not self.__go_to_next_post(): 51 | progress.close() 52 | break 53 | progress.update() 54 | 55 | def __like_post(self): 56 | """ 57 | Like posts or skip 58 | 59 | :return: 60 | """ 61 | if self.__is_not_liked_acc_post(): 62 | self.heart.click() 63 | self.logger.log_to_file('--> like post {}'.format(self.browser.current_url)) 64 | self.db.likes_increment() 65 | self.post_liked += 1 66 | self.post_already_liked = 0 67 | time.sleep(0.5) 68 | elif not self.heart and self.count > 9: 69 | self.post_already_liked += 1 70 | self.post_skipped += 1 71 | else: 72 | self.post_skipped += 1 73 | 74 | def __go_to_next_post(self): 75 | """ 76 | Go to next post on non-feed page 77 | 78 | :return: 79 | """ 80 | link = self.__has_next() 81 | if not link or self.post_already_liked > 4: 82 | return False 83 | else: 84 | link.click() 85 | return True 86 | 87 | def __get_posts_block(self, block_xpath): 88 | top_block = self.browser.find_element_by_xpath(block_xpath) 89 | post_link = top_block.find_element_by_css_selector("a") 90 | ActionChains(self.browser).move_to_element(post_link).click().perform() 91 | 92 | def __is_not_liked_acc_post(self): 93 | """ 94 | Check if not feed post is liked 95 | 96 | :return: like WebElement if exist or False if not 97 | """ 98 | self.heart = None 99 | try: 100 | is_not_liked_span = self.browser.find_element_by_css_selector(".ptsdu") 101 | self.heart = is_not_liked_span.find_element_by_xpath('..') 102 | return True 103 | except excp.NoSuchElementException: 104 | return False 105 | 106 | def __has_next(self): 107 | """ 108 | Check if page has nex link 109 | 110 | :return: next link WebElement if exist and False if not 111 | """ 112 | try: 113 | next_link = self.browser.find_element_by_css_selector(".HBoOv._1bdSS") 114 | return next_link 115 | except excp.NoSuchElementException: 116 | return False 117 | -------------------------------------------------------------------------------- /insta_browser/browser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from selenium import webdriver 3 | from .logger import Logger 4 | from .auth import * 5 | from .processors.not_feed_processor import * 6 | from .processors.feed_processor import * 7 | from .db.browser_db import BrowserDB 8 | import re 9 | 10 | 11 | class Browser: 12 | login = '' 13 | summary = {} 14 | 15 | """ 16 | :param chrome: is deprecated and will be removed in future versions 17 | """ 18 | 19 | def __init__(self, debug=False, chrome=False, cookie_path=None, log_path=None, db_path=None, 20 | exclude=None, auto_follow=False): 21 | options = webdriver.ChromeOptions() 22 | options.add_argument('headless') 23 | options.add_argument('--window-size=900,768') 24 | self.browser = webdriver.Chrome(chrome_options=options) 25 | self.browser.implicitly_wait(10) 26 | 27 | self.cookie_path = cookie_path 28 | self.exclude = exclude or [] 29 | self.chrome = chrome 30 | self.logger = Logger(log_path, debug) 31 | self.db = BrowserDB(self.logger, db_path) 32 | self.auto_follow = auto_follow 33 | 34 | def auth(self, login, password): 35 | if not login: 36 | raise ValueError('Please provide login and password for Browser.auth method') 37 | self.db.detect_account(login) 38 | br = self.browser 39 | self.get("https://www.instagram.com/accounts/login/") 40 | time.sleep(1) 41 | if not auth_with_cookies(br, self.logger, login, self.cookie_path): 42 | auth_with_credentials(br, self.logger, login, password, self.cookie_path) 43 | self.login = login 44 | 45 | def get(self, url): 46 | self.browser.get(url) 47 | time.sleep(.5) 48 | self.logger.log(u'Open ' + self.browser.current_url) 49 | return self 50 | 51 | def close_all(self): 52 | self.logger.save_screen_shot(self.browser, 'exit.png') 53 | self.browser.close() 54 | self.browser.quit() 55 | self.logger.log(u'Browser process was ended') 56 | self.logger.log(u'') 57 | 58 | def get_summary(self): 59 | log = 'Feed scrolled down {scrolled} times, liked {liked} posts, skipped {skipped} posts,' \ 60 | ' skipped excluded {excluded} posts'. \ 61 | format(**self.summary) 62 | self.logger.log_to_file(log) 63 | return log 64 | 65 | def process_user(self, username, count=None): 66 | br = self.browser 67 | self.get("https://www.instagram.com/{}".format(username)) 68 | self.logger.log("Start liking @{} profile {} posts".format(username, count)) 69 | processor = NotFeedProcessor(db=self.db, br=br, lg=self.logger) 70 | processor.set_auto_follow(self.auto_follow) 71 | processor.like_user_profile(count) 72 | self.summary = processor.get_summary() 73 | 74 | def process_location(self, location, count=None): 75 | br = self.browser 76 | processed_location = re.sub('^(/?explore/locations/|/|/?locations/)', '', location) 77 | self.get("https://www.instagram.com/explore/locations/{}".format(processed_location)) 78 | self.logger.log("Start liking top posts from {} location".format(processed_location)) 79 | processor = NotFeedProcessor(db=self.db, br=br, lg=self.logger) 80 | processor.set_auto_follow(self.auto_follow) 81 | processor.like_top() 82 | processor.like_latest(count) 83 | self.summary = processor.get_summary() 84 | 85 | def process_tag(self, tag, count=None): 86 | br = self.browser 87 | self.get("https://www.instagram.com/explore/tags/{}".format(tag)) 88 | self.logger.log("Start liking top posts from #{} tag".format(tag)) 89 | processor = NotFeedProcessor(db=self.db, br=br, lg=self.logger) 90 | processor.set_auto_follow(self.auto_follow) 91 | processor.like_top() 92 | processor.like_latest(count) 93 | self.summary = processor.get_summary() 94 | 95 | def process_feed(self, count=None): 96 | br = self.browser 97 | self.get("https://instagram.com/") 98 | time.sleep(.5) 99 | processor = FeedProcessor(db=self.db, br=br, lg=self.logger) 100 | processor.set_auto_follow(self.auto_follow) 101 | processor.scroll_feed_to_last_not_liked_posts(count) 102 | processor.process(self.exclude, self.login) 103 | self.summary = processor.get_summary() 104 | -------------------------------------------------------------------------------- /insta_browser/processors/feed_processor.py: -------------------------------------------------------------------------------- 1 | import selenium.common.exceptions as excp 2 | import time 3 | import tqdm 4 | from selenium.webdriver.common.by import By 5 | from selenium.webdriver.support import expected_conditions as EC 6 | from selenium.webdriver.support.ui import WebDriverWait 7 | 8 | from .base_processor import BaseProcessor 9 | 10 | NOT_LIKED_CSS_CLASS = '.coreSpriteHeartOpen' 11 | 12 | 13 | class FeedProcessor(BaseProcessor): 14 | posts_list = [] 15 | posts_hash_list = [] 16 | 17 | def scroll_feed_to_last_not_liked_posts(self, count): 18 | """ 19 | Scroll down feed to last not liked post 20 | 21 | :return: 22 | """ 23 | self.get_like_limits(count) 24 | self.logger.log('Start scrolling page.') 25 | while self.__is_last_post_in_feed_not_liked(): 26 | self.__scroll_down() 27 | 28 | def __scroll_down(self): 29 | """ 30 | Moving to footer and waiting for querying new posts 31 | 32 | :return: 33 | """ 34 | last_post = WebDriverWait(self.browser, 10). \ 35 | until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'article:last-child'))) 36 | self.browser.execute_script("return arguments[0].scrollIntoView();", last_post) 37 | self.logger.log_to_file('---> scrolled down.') 38 | self.feed_scrolled_down += 1 39 | time.sleep(1) 40 | 41 | def __is_last_post_in_feed_not_liked(self): 42 | """ 43 | Check last five posts if they are not liked 44 | 45 | :return: False if one of five posts wasn't liked, True if all five were liked 46 | """ 47 | posts = self.browser.find_elements_by_tag_name('article') 48 | for post in posts: 49 | post_link = self._get_feed_post_link(post) 50 | if post_link not in self.posts_hash_list: 51 | self.posts_hash_list.append(post_link) 52 | self.posts_list.append({'pl': post_link, 'p': post}) 53 | 54 | if 0 < len(self.posts_list) >= self.count: 55 | return False 56 | 57 | try: 58 | for i in range(5): 59 | post = posts.pop() 60 | post.find_element_by_css_selector(NOT_LIKED_CSS_CLASS) 61 | del posts 62 | return True 63 | except excp.NoSuchElementException: 64 | return False 65 | except IndexError: 66 | return True 67 | 68 | def process(self, exclude, login): 69 | """ 70 | liking pre-processed posts. Moving to each post with ActionChains 71 | 72 | :param exclude: 73 | :param login: 74 | :param count: 75 | :return: 76 | """ 77 | br = self.browser 78 | 79 | self.posts_list.reverse() 80 | 81 | progress = tqdm.tqdm(self.posts_list) 82 | for post in progress: 83 | real_time_posts = br.find_elements_by_tag_name('article') 84 | post_link = post.get('pl') 85 | filtered_posts = [p for p in real_time_posts if self._get_feed_post_link(p) == post_link] 86 | if filtered_posts.__len__(): 87 | real_post = filtered_posts.pop() 88 | # scroll to real post in markup 89 | heart = real_post.find_element_by_css_selector('div:nth-child(3) section a:first-child') 90 | self.browser.execute_script("return arguments[0].scrollIntoView(false);", heart) 91 | # getting need to process elements 92 | author = real_post.find_element_by_css_selector('div:first-child .notranslate').text 93 | heart_classes = heart.find_element_by_css_selector('span').get_attribute('class') 94 | # check restrictions 95 | is_not_liked = 'coreSpriteHeartOpen' in heart_classes 96 | is_mine = author == login 97 | need_to_exclude = author in exclude 98 | 99 | if is_mine or not is_not_liked: 100 | self.post_skipped += 1 101 | pass 102 | elif need_to_exclude: 103 | self.post_skipped_excluded += 1 104 | pass 105 | else: 106 | # like this post 107 | time.sleep(.3) 108 | heart.click() 109 | time.sleep(.7) 110 | self.db.likes_increment() 111 | self.post_liked += 1 112 | log = '---> liked @{} post {}'.format(author, post_link) 113 | self.logger.log_to_file(log) 114 | 115 | progress.update() 116 | -------------------------------------------------------------------------------- /insta_browser/processors/base_processor.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import selenium.common.exceptions as excp 3 | from selenium.webdriver.chrome.webdriver import WebDriver 4 | from selenium.webdriver.remote.webelement import WebElement 5 | 6 | from insta_browser.db.browser_db import BrowserDB 7 | from ..logger import Logger 8 | 9 | try: 10 | # For Python 3.0 and later 11 | from urllib.request import urlopen 12 | except ImportError: 13 | # Fall back to Python 2's urllib2 14 | from urllib2 import urlopen 15 | 16 | import json 17 | 18 | 19 | class BaseProcessor: 20 | post_skipped_excluded = 0 21 | posts_count_to_like = 0 22 | feed_scrolled_down = 0 23 | post_already_liked = 0 24 | post_excluded = 0 25 | post_skipped = 0 26 | auto_follow = False 27 | like_limit = 416 28 | progress = None 29 | post_liked = 0 30 | heart = None 31 | count = 0 32 | hour_like_limit = 150 33 | 34 | def __init__(self, db, br, lg): 35 | self.db: BrowserDB = db 36 | self.browser: WebDriver = br 37 | self.logger: Logger = lg 38 | 39 | def get_summary(self): 40 | return {'liked': self.post_liked, 41 | 'skipped': self.post_skipped, 42 | 'excluded': self.post_skipped_excluded, 43 | 'already_liked': self.post_already_liked, 44 | 'scrolled': self.feed_scrolled_down} 45 | 46 | @staticmethod 47 | def _get_feed_post_link(post: WebElement): 48 | """ 49 | Get link to post from post web-element from feed 50 | :param post: WebElement 51 | :return: 52 | """ 53 | try: 54 | post_link = post.find_element_by_css_selector('div:nth-child(3) div:nth-child(4) a') 55 | except excp.NoSuchElementException: 56 | post_link = post.find_element_by_css_selector('div:nth-child(3) div:nth-child(3) a') 57 | return post_link.get_attribute('href') 58 | 59 | @staticmethod 60 | def _get_feed_post_media(post: WebElement): 61 | """ 62 | Get link to post from post web-element from feed 63 | :param post: WebElement 64 | :return: str 65 | """ 66 | try: 67 | image = post.find_element_by_css_selector('div:nth-child(2) img') 68 | return image.get_attribute('src') 69 | except excp.NoSuchElementException: 70 | pass 71 | 72 | try: 73 | video = post.find_element_by_tag_name('video') 74 | return video.get_attribute('src') 75 | except excp.NoSuchElementException: 76 | pass 77 | 78 | return False 79 | 80 | def follow_user(self) -> bool: 81 | """ 82 | Follow user if need and could 83 | :return: bool 84 | """ 85 | if self.__could_i_follow(): 86 | # Second if, because we don't need to make http requests if user reaches follow limits 87 | if self.__do_i_need_to_follow_this_user(): 88 | try: 89 | follow_button = self.browser.find_element_by_css_selector('._5f5mN') 90 | follow_button.click() 91 | self.db.follows_increment() 92 | return True 93 | except excp.NoSuchElementException: 94 | self.logger.log('Cant find follow button.') 95 | 96 | return False 97 | 98 | def __could_i_follow(self) -> bool: 99 | """ 100 | Check if i could to follow more 101 | :return: bool 102 | """ 103 | counters = self.db.get_follow_limits_by_account() 104 | return counters['daily'] < 1001 and counters['hourly'] < 76 105 | 106 | def __do_i_need_to_follow_this_user(self) -> bool: 107 | """ 108 | Check if i need to follow current user 109 | :return: bool 110 | """ 111 | self.browser.implicitly_wait(1) 112 | 113 | try: 114 | self.browser.find_element_by_css_selector('.qPANj') 115 | return False 116 | except excp.NoSuchElementException: 117 | username = self.browser.find_element_by_css_selector('.notranslate').text 118 | counters = self.__get_counters(username) 119 | if not counters: 120 | user_link = 'https://www.instagram.com/{}/?__a=1'.format(username) 121 | response = urlopen(user_link) 122 | data = json.loads(response.read().decode('utf-8')) 123 | counters['followers'] = data['user']['followed_by']['count'] 124 | counters['following'] = data['user']['follows']['count'] 125 | counters['posts'] = data['user']['media']['count'] 126 | need_to_be_followed = counters['posts'] > 10 and counters['following'] < 500 and counters[ 127 | 'followers'] < 1000 128 | if not need_to_be_followed: 129 | self.db.store_user_counters(username, counters) 130 | return need_to_be_followed 131 | else: 132 | return False 133 | 134 | def __get_counters(self, login): 135 | counters = self.db.get_user_counters(login) 136 | today = datetime.date.today() 137 | updated_at = datetime.datetime.strptime(counters['updated_at'], '%Y-%m-%d') 138 | updated_at_date = datetime.date(year=updated_at.year, month=updated_at.month, day=updated_at.day) 139 | if (today - updated_at_date).days > 31: 140 | return {} 141 | return counters['counters'] 142 | 143 | # TODO: refactor this 144 | def get_like_limits(self, count=None): 145 | limits = self.db.get_like_limits_by_account() 146 | today_likes = limits[0] 147 | hours_left = limits[1] 148 | hour_likes_by_activity = (self.hour_like_limit * 24 - today_likes) // hours_left 149 | ll = None 150 | if self.hour_like_limit <= hour_likes_by_activity < self.hour_like_limit * 2: 151 | ll = hour_likes_by_activity 152 | elif hour_likes_by_activity >= self.hour_like_limit * 2: 153 | ll = self.hour_like_limit * 2 154 | elif hour_likes_by_activity < self.hour_like_limit: 155 | ll = hour_likes_by_activity 156 | self.count = count if 0 < count < ll else ll 157 | return self.count 158 | 159 | def set_auto_follow(self, flag: bool): 160 | """ 161 | Enable or disable auto follow mode 162 | :param flag: 163 | :return: 164 | """ 165 | self.auto_follow = flag 166 | return self 167 | -------------------------------------------------------------------------------- /insta_browser/db/browser_db.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | import json 5 | import sqlite3 6 | import datetime 7 | 8 | 9 | 10 | class BrowserDB: 11 | user_counters_table = 'tmp_counters' 12 | sql_path = os.path.dirname(os.path.abspath(__file__)) 13 | account_id = None 14 | 15 | def __init__(self, logger, db_path=tempfile.gettempdir()): 16 | self.logger = logger 17 | self.db = self.__connect_db(db_path) 18 | self.__init_db() 19 | 20 | def __connect_db(self, db_path): 21 | db_name = 'insta_browser.sqlite3' 22 | try: 23 | connect_path = os.path.join(db_path, db_name) 24 | db = sqlite3.connect(connect_path) 25 | except: 26 | connect_path = os.path.join(tempfile.gettempdir(), db_name) 27 | db = sqlite3.connect(connect_path) 28 | self.db_log('connected to {} database') 29 | return db 30 | 31 | def __init_db(self): 32 | db = self.db 33 | cursor = db.cursor() 34 | self.__create_update_db(self.__check_db_version()) 35 | 36 | def __check_db_version(self): 37 | cur = self.db.cursor() 38 | q = 'SELECT version FROM db_version ORDER BY version DESC' 39 | self.db_log('query: {}'.format(q)) 40 | try: 41 | cur.execute(q) 42 | ver = cur.fetchone() 43 | except sqlite3.OperationalError: 44 | ver = False 45 | self.db_log('result: {}'.format(ver)) 46 | return ver 47 | 48 | def __create_update_db(self, version): 49 | db = self.db 50 | if not version: 51 | self.logger.log_to_file('') 52 | create_sql = open(os.path.join(self.sql_path, 'sql', 'init.sql'), 'r').read() 53 | db.cursor().executescript(create_sql) 54 | self.db_log('creating new db') 55 | version = (0,) 56 | 57 | migration_path = os.path.join(self.sql_path, 'sql', 'migrations') 58 | files = [f for f in os.listdir(migration_path) if 59 | os.path.isfile(os.path.join(migration_path, f)) and int(f.replace('.sql', '')) > version[0]] 60 | files.sort(key=str.lower) 61 | for file in files: 62 | migration_sql = open(os.path.join(migration_path, file), 'r').read() 63 | db.cursor().executescript(migration_sql) 64 | self.db_log('migrate to {}'.format(file)) 65 | db.cursor().execute("UPDATE db_version SET version={};".format(file.replace('.sql', ''))) 66 | 67 | def get_user_counters(self, login): 68 | result = {'updated_at': (datetime.date.today() + datetime.timedelta(days=-40)).strftime("%Y-%m-%d")} 69 | query = 'SELECT * FROM {} WHERE login = ?'.format(self.user_counters_table) 70 | row = self.db.cursor().execute(query, [login]).fetchone() 71 | if row: 72 | result = {'updated_at': row[1], 'counters': json.loads(row[2])} 73 | return result 74 | 75 | def store_user_counters(self, login, counters): 76 | query = "REPLACE INTO {} (login, updated_at, counters) VALUES (?, strftime('%Y-%m-%d', 'now'), ?)".format( 77 | self.user_counters_table) 78 | self.db.cursor().execute(query, [login, json.dumps(counters)]) 79 | 80 | def detect_account(self, login): 81 | cur = self.db.cursor() 82 | if not self.get_account_id(login): 83 | q = 'INSERT INTO accounts (username) VALUES (?)' 84 | p = [login] 85 | cur.execute(q, p) 86 | self.db.commit() 87 | self.db_log('query: {}, params: {}'.format(q, p)) 88 | self.detect_account(login) 89 | 90 | def get_account_id(self, login): 91 | q = 'SELECT id FROM accounts WHERE username = :login;' 92 | params = {'login': login} 93 | row = self.db.cursor().execute(q, params).fetchone() 94 | self.db_log('query: {}, params: {}, result: {}'.format(q, params, row)) 95 | if row: 96 | self.account_id = row[0] 97 | return self.account_id 98 | 99 | def get_like_limits_by_account(self): 100 | cur = self.db.cursor() 101 | row = cur.execute(SELECT_LIKE_LIMITS_QUERY, [self.account_id]).fetchone() 102 | return row 103 | 104 | def get_follow_limits_by_account(self): 105 | cur = self.db.cursor() 106 | row1 = cur.execute(SELECT_FOLLOW_TODAYS_LIMITS_QUERY, [self.account_id]).fetchone() 107 | row2 = cur.execute(SELECT_FOLLOW_HOURS_LIMITS_QUERY, [self.account_id]).fetchone() 108 | return {'daily': row1[0], 'hourly': row2[0], 'hours_left': row1[1]} 109 | 110 | def likes_increment(self): 111 | params = [self.account_id] 112 | self.db.execute(INSERT_UPDATE_LIKES_QUERY, params) 113 | self.db_log('query: {}, params: {}'.format(''.join(INSERT_UPDATE_LIKES_QUERY.splitlines()), params)) 114 | self.db.commit() 115 | 116 | def follows_increment(self): 117 | params = [self.account_id] 118 | self.db.execute(INSERT_UPDATE_FOLLOWS_QUERY, params) 119 | self.db_log('query: {}, params: {}'.format(''.join(INSERT_UPDATE_FOLLOWS_QUERY.splitlines()), params)) 120 | self.db.commit() 121 | 122 | def db_log(self, text): 123 | self.logger.log_to_file('[SQLITE] {}'.format(text)) 124 | 125 | 126 | SELECT_LIKE_LIMITS_QUERY = ''' 127 | SELECT 128 | ifnull(sum(likes), 0) AS `limit`, 129 | 24 - strftime('%H', 'now') AS hours_left 130 | FROM activity 131 | WHERE account_id = ? AND 132 | datetime(date) BETWEEN datetime('now', 'start of day') AND datetime('now', 'start of day', '+1 day', '-1 second'); 133 | ''' 134 | 135 | SELECT_FOLLOW_TODAYS_LIMITS_QUERY = ''' 136 | SELECT 137 | ifnull(sum(follows), 0) AS `limit`, 138 | 24 - strftime('%H', 'now') AS hours_left 139 | FROM activity 140 | WHERE account_id = ? AND 141 | datetime(date) BETWEEN datetime('now', 'start of day') AND datetime('now', 'start of day', '+1 day', '-1 second'); 142 | ''' 143 | 144 | SELECT_FOLLOW_HOURS_LIMITS_QUERY = ''' 145 | SELECT 146 | ifnull(sum(follows), 0) AS `follows_in_hour` 147 | FROM activity 148 | WHERE account_id = ? AND 149 | datetime(date) BETWEEN (strftime('%Y-%m-%d %H', 'now') || ':00:00') AND (strftime('%Y-%m-%d %H', 'now') || ':59:59'); 150 | ''' 151 | 152 | INSERT_UPDATE_LIKES_QUERY = ''' 153 | WITH new (account_id, date) AS (VALUES (?, strftime('%Y-%m-%d %H', 'now') || ':00:00')) 154 | INSERT OR REPLACE INTO activity (id, account_id, likes, comments, follows, unfollows, date) 155 | SELECT 156 | a.id, 157 | n.account_id, 158 | ifnull(a.likes, 0) + 1, 159 | ifnull(a.comments, 0), 160 | ifnull(a.follows, 0), 161 | ifnull(a.unfollows, 0), 162 | n.date 163 | FROM new n 164 | LEFT JOIN activity a ON a.account_id = n.account_id AND a.date = n.date; 165 | ''' 166 | 167 | INSERT_UPDATE_FOLLOWS_QUERY = ''' 168 | WITH new (account_id, date) AS (VALUES (?, strftime('%Y-%m-%d %H', 'now') || ':00:00')) 169 | INSERT OR REPLACE INTO activity (id, account_id, likes, comments, follows, unfollows, date) 170 | SELECT 171 | a.id, 172 | n.account_id, 173 | ifnull(a.likes, 0), 174 | ifnull(a.comments, 0), 175 | ifnull(a.follows, 0) + 1, 176 | ifnull(a.unfollows, 0), 177 | n.date 178 | FROM new n 179 | LEFT JOIN activity a ON a.account_id = n.account_id AND a.date = n.date; 180 | ''' 181 | --------------------------------------------------------------------------------