├── Project ├── __init__.py ├── Jobs │ ├── ExampleJob.py │ └── __init__.py └── Models.py ├── SiteAutomations ├── __init__.py └── Examples │ ├── __init__.py │ ├── BingExample.py │ └── GoogleExample.py ├── Helpers ├── Requests │ ├── Exceptions.py │ ├── Wait.py │ └── __init__.py ├── Contexts.py ├── Data.py ├── Mailers.py ├── Validation.py ├── Commands.py ├── Controllers.py └── __init__.py ├── __init__.py ├── .gitattributes ├── .env ├── .gitignore ├── UnitTests ├── GMailerTests.py ├── page_tests.json ├── EnvironmentTests.py ├── PageTests.py └── CommandsTests.py ├── Database.py ├── README.md ├── ThreadedExample.py ├── Environment.py ├── Example.py ├── common.py ├── genesis.py └── LICENCE.md /Project/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SiteAutomations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SiteAutomations/Examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Helpers/Requests/Exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class NoSuchElementException(Exception): 4 | pass 5 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | from genesis import make_project, make_project_scaffold 3 | except ImportError: 4 | from selenext.genesis import make_project, make_project_scaffold 5 | -------------------------------------------------------------------------------- /Helpers/Contexts.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | 3 | 4 | @contextmanager 5 | def quitting(thing): 6 | """ 7 | Calls close() and quit() on thing. 8 | 9 | :param thing: 10 | :return: 11 | """ 12 | 13 | yield thing 14 | 15 | thing.close() 16 | try: 17 | thing.quit() 18 | except Exception: 19 | pass 20 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | # Browsers: chrome, firefox, safari, phantomjs, opera 2 | BROWSER=chrome 3 | 4 | # Database settings 5 | # DB_TYPE values: sql, mysql, postgresql, berkeley 6 | DB_TYPE=sql 7 | DB=default.db 8 | DB_HOST=localhost 9 | DB_PORT=3306 10 | DB_USERNAME=None 11 | DB_PASSWORD=None 12 | 13 | # Admin Settings 14 | ADMIN_EMAIL=None 15 | 16 | # Mailer settings. 17 | GMAIL_USERNAME=none 18 | GMAIL_PASSWORD=none 19 | GMAIL_HOST=smtp.gmail.com 20 | GMAIL_PORT=587 21 | 22 | # Dummy Logger Settings 23 | # Level: INFO, DEBUG, WARN, ERROR, FATAL 24 | DUMMY_LOGGER_LEVEL=DEBUG 25 | -------------------------------------------------------------------------------- /Helpers/Requests/Wait.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class RequestsWait(object): 4 | def __init__(self, driver, wait_time, poll_frequency=1, ignored_exceptions=None): 5 | self.driver = driver 6 | self.wait_time = wait_time 7 | self.poll_frequency = poll_frequency 8 | self.ignored_exceptions = ignored_exceptions 9 | 10 | def until(self, function, *args, **kwargs): 11 | try: 12 | return function(self.driver, *args, **kwargs) 13 | except AttributeError: 14 | return False 15 | 16 | def until_not(self, function, *args, **kwargs): 17 | try: 18 | return function(self.driver, *args, **kwargs) 19 | except AttributeError: 20 | return False 21 | -------------------------------------------------------------------------------- /Project/Jobs/ExampleJob.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | 4 | class SomethingToDo(object): 5 | def __init__(self, some_object): 6 | self.some_object = some_object 7 | 8 | def start(self): 9 | print('Starting job.') 10 | self.some_object.do_something('cool') 11 | 12 | def stop(self): 13 | print('Job done.') 14 | 15 | 16 | class SomeObject(object): 17 | def do_something(self, something): 18 | print('SomeObject doing something {}.'.format(something)) 19 | 20 | 21 | # The start_job function is the only thing required 22 | # for a job to be executed. 23 | def start_job(): 24 | # Set up objects in the start_job function. 25 | something_to_do = SomethingToDo(SomeObject()) 26 | something_to_do.start() 27 | something_to_do.stop() 28 | -------------------------------------------------------------------------------- /Project/Jobs/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def run_job(module_name, *args, **kwargs): 4 | """ 5 | Run a job by name. Any *args and **kwargs will be passed to the 6 | given module's `start_job` function. 7 | 8 | :param module_name: 9 | :return: 10 | """ 11 | 12 | module_obj = __import__('Jobs.{}'.format(module_name), fromlist=['']) 13 | module_attrs = dir(module_obj) 14 | 15 | if 'start_job' in module_attrs: 16 | start_job = getattr(module_obj, 'start_job') 17 | if args and kwargs: 18 | start_job(*args, **kwargs) 19 | elif args: 20 | start_job(*args) 21 | elif kwargs: 22 | start_job(**kwargs) 23 | else: 24 | start_job() 25 | else: 26 | raise AttributeError('Jobs must contain a `start_job` method.') 27 | -------------------------------------------------------------------------------- /Project/Models.py: -------------------------------------------------------------------------------- 1 | """ 2 | The `Models.py` file is where all models are held. 3 | 4 | This may be changed in the future so models are 5 | contained in their own file and or directory, 6 | but as of now, they must be in Models.py. 7 | """ 8 | 9 | import datetime 10 | from peewee import * 11 | from ..Environment import env, get_database 12 | 13 | db = get_database(env("DB_TYPE")) 14 | 15 | 16 | class BaseModel(Model): 17 | class Meta: 18 | database = db 19 | 20 | 21 | # The User model stores passwords for retrieval in case you need 22 | # to log in to a website. It is not meant to store passwords 23 | # securely, but this can be accomplished by modifying the 24 | # `Middleware/UserMiddleware.py` file if it is needed. 25 | class User(BaseModel): 26 | """ The basic User model """ 27 | 28 | username = CharField() 29 | email = CharField() 30 | password = CharField() 31 | created_at = DateTimeField(default=datetime.datetime.now) 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python compiled files 2 | *.pyc 3 | 4 | *.log 5 | test.py 6 | *_ignore.md 7 | 8 | # Databases 9 | *.db 10 | 11 | # PyCharm Project files 12 | .idea 13 | 14 | # Environment file 15 | .env 16 | 17 | # Windows image file caches 18 | Thumbs.db 19 | ehthumbs.db 20 | 21 | # Folder config file 22 | Desktop.ini 23 | 24 | # Recycle Bin used on file shares 25 | $RECYCLE.BIN/ 26 | 27 | # Windows Installer files 28 | *.cab 29 | *.msi 30 | *.msm 31 | *.msp 32 | 33 | # Windows shortcuts 34 | *.lnk 35 | 36 | # ========================= 37 | # Operating System Files 38 | # ========================= 39 | 40 | # OSX 41 | # ========================= 42 | 43 | .DS_Store 44 | .AppleDouble 45 | .LSOverride 46 | 47 | # Thumbnails 48 | ._* 49 | 50 | # Files that might appear in the root of a volume 51 | .DocumentRevisions-V100 52 | .fseventsd 53 | .Spotlight-V100 54 | .TemporaryItems 55 | .Trashes 56 | .VolumeIcon.icns 57 | 58 | # Directories potentially created on remote AFP share 59 | .AppleDB 60 | .AppleDesktop 61 | Network Trash Folder 62 | Temporary Items 63 | .apdisk 64 | -------------------------------------------------------------------------------- /UnitTests/GMailerTests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from Helpers.Mailers import GMailer 3 | 4 | 5 | class GmailerTest(unittest.TestCase): 6 | def setUp(self): 7 | pass 8 | 9 | def test_mailer_set_up_with_invalid_inputs(self): 10 | self.assertRaises(TypeError, GMailer, 123, '123', 'smtp.gmail.com', 123) 11 | self.assertRaises(TypeError, GMailer, 'some_email', 11212, 'smtp.gmail.com', 123) 12 | self.assertRaises(TypeError, GMailer, 'some_email', 'some-password', 1231, 123) 13 | self.assertRaises(TypeError, GMailer, 'some_email', 'some-password', 'smtp.gmail.com', '123') 14 | 15 | def test_mailer_set_up_with_valid_inputs(self): 16 | m = GMailer('some_email', 'some-password', 'some.host.com', 123) 17 | self.assertEqual(type(m.username), str) 18 | self.assertEqual(type(m.password), str) 19 | self.assertEqual(type(m.host), str) 20 | self.assertEqual(type(m.port), int) 21 | 22 | def tearDown(self): 23 | pass 24 | 25 | 26 | def main(): 27 | unittest.main() 28 | 29 | if __name__ == '__main__': 30 | main() 31 | -------------------------------------------------------------------------------- /UnitTests/page_tests.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": "https://google.com", 3 | "elements": { 4 | "search_input": { 5 | "selector": "q", 6 | "lookup_method": "name" 7 | }, 8 | "search_button": { 9 | "selector": "btnG", 10 | "lookup_method": "name" 11 | }, 12 | "google_search_button": { 13 | "selector": "btnK", 14 | "lookup_method": "name" 15 | }, 16 | "results": { 17 | "multiple": true, 18 | "selector": "//h3[@class='r']", 19 | "lookup_method": "xpath" 20 | }, 21 | "bound_results": { 22 | "bind": "str", 23 | "multiple": true, 24 | "selector": "//h3[@class='r']", 25 | "lookup_method": "xpath" 26 | }, 27 | "search_form": { 28 | "selector": "searchform", 29 | "lookup_method": "id", 30 | "parent": { 31 | "selector": "viewport", 32 | "lookup_method": "id", 33 | "parent": { 34 | "selector": "body", 35 | "lookup_method": "tag_name" 36 | } 37 | } 38 | }, 39 | "missing_element": { 40 | "selector": "no_element_yo", 41 | "lookup_method": "id" 42 | } 43 | } 44 | } -------------------------------------------------------------------------------- /SiteAutomations/Examples/BingExample.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | from random import randint 3 | from ...Helpers.Controllers import IndependentController 4 | 5 | 6 | def _do_search(driver, wait, search_term): 7 | """ 8 | Perform a Bing Search. 9 | 10 | :param driver: 11 | :param wait: 12 | :param search_term: 13 | :return: 14 | """ 15 | driver.get('http://www.bing.com/') 16 | 17 | search_input = driver.find_element_by_name('q') 18 | search_button = driver.find_element_by_id('sb_form_go') 19 | 20 | search_input.send_keys(search_term) 21 | sleep(randint(1, 2)) 22 | search_button.click() 23 | 24 | wait.until(lambda driver: driver.find_element_by_xpath('//span[@class="sb_count"]').is_displayed()) 25 | return True 26 | 27 | 28 | class BingSearch(object): 29 | """ 30 | This Controller needs a WebDriver instance, WebDriverWait instance, 31 | and the collection of Models in order to work. 32 | """ 33 | def __init__(self, driver, wait, models): 34 | self.driver = driver 35 | self.wait = wait 36 | self.models = models 37 | 38 | def do_search(self, search_term): 39 | return _do_search(self.driver, self.wait, search_term) 40 | 41 | 42 | class ThreadedBingSearch(IndependentController): 43 | """ 44 | Note that if you inherit from IndependentController, you can use 45 | the CommandManager or ThreadedCommandManager to perform requests 46 | with their own WebDriver instances. 47 | """ 48 | def __init__(self, models): 49 | self.models = models 50 | 51 | def do_search(self, search_term): 52 | return _do_search(self.driver, self.wait, search_term) 53 | -------------------------------------------------------------------------------- /UnitTests/EnvironmentTests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from peewee import SqliteDatabase 4 | from selenium import webdriver 5 | 6 | from Environment import env, env_driver, get_database 7 | 8 | 9 | class ConfigLoaderEnvironmentTest(unittest.TestCase): 10 | def setUp(self): 11 | pass 12 | 13 | def test_env_with_undefined_key(self): 14 | self.assertRaises(KeyError, env, 'DUR') 15 | 16 | def test_env_with_defined_key(self): 17 | self.assertEqual(env('BROWSER'), 'chrome') 18 | 19 | def test_env_driver_with_defined_browser(self): 20 | self.assertEqual(env_driver(env('BROWSER')), webdriver.Chrome) 21 | 22 | def test_env_driver_with_undefined_browser(self): 23 | self.assertEqual(env_driver(env('DB')), False) 24 | 25 | def test_get_database_with_defined_database_type(self): 26 | self.assertIsInstance(get_database(env('DB_TYPE')), SqliteDatabase) 27 | 28 | def test_get_database_with_undefined_database_type(self): 29 | self.assertEqual(get_database(env('BROWSER')), False) 30 | 31 | def test_get_list(self): 32 | self.assertIsInstance(env('LIST'), list) 33 | 34 | def test_get_dict(self): 35 | self.assertIsInstance(env('DICT'), dict) 36 | 37 | def test_list_values(self): 38 | self.assertEqual(env('LIST')[0], 'Item1') 39 | self.assertEqual(env('LIST')[1], 'Item2') 40 | 41 | def test_dict_values(self): 42 | self.assertEqual(env('DICT')['key1'], 'value1') 43 | self.assertEqual(env('DICT')['key2'], 'value2') 44 | 45 | def test_empty_list(self): 46 | self.assertEqual(env('EMPTY_LIST'), []) 47 | 48 | def test_empty_dict(self): 49 | self.assertEqual(env('EMPTY_DICT'), {}) 50 | 51 | def tearDown(self): 52 | pass 53 | 54 | 55 | def main(): 56 | unittest.main() 57 | 58 | if __name__ == '__main__': 59 | main() 60 | -------------------------------------------------------------------------------- /Database.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is used to migrate database tables to the database. 3 | """ 4 | 5 | 6 | def migrate(models): 7 | """ 8 | Run database migrations for the defined models. 9 | 10 | :return: None 11 | """ 12 | models_pieces = [ 13 | 'BareField', 'BigIntegerField', 'BinaryField', 'BlobField', 14 | 'BooleanField', 'CharField', 'Check', 'Clause', 'CompositeKey', 15 | 'DQ', 'DataError', 'DatabaseError', 'DateField', 'DateTimeField', 16 | 'DecimalField', 'DeferredRelation', 'DoesNotExist', 'DoubleField', 17 | 'Field', 'FixedCharField', 'FloatField', 'ForeignKeyField', 18 | 'ImproperlyConfigured', 'IntegerField', 'IntegrityError', 19 | 'InterfaceError', 'InternalError', 'JOIN', 'JOIN_FULL', 20 | 'JOIN_INNER', 'JOIN_LEFT_OUTER', 'Model', 'MySQLDatabase', 21 | 'NotSupportedError', 'OperationalError', 'Param', 22 | 'PostgresqlDatabase', 'PrimaryKeyField', 'ProgrammingError', 23 | 'Proxy', 'R', 'SmallIntegerField', 'SQL', 'SqliteDatabase', 'TextField', 24 | 'TimeField', 25 | 'TimestampField', 'UUIDField', 'Using', 'Window', '__builtins__', 26 | '__doc__', '__file__', '__name__', '__package__', 'datetime', 'db', 27 | 'fn', 'prefetch', 'AutoField', 'Column', 'IPField', 'Index', 'Database', 28 | 'DeferredThroughModel', 'ModelIndex', 'BitField', 'Select', 'SchemaManager', 29 | 'ManyToManyField', 'BigBitField', 'Context', 'Value', 'Table', 'DeferredForeignKey', 30 | 'Cast' 31 | ] 32 | 33 | migrations = list(set(dir(models)) - set(models_pieces)) 34 | migrations.remove('BaseModel') 35 | 36 | # Grab the actual class object for the model. 37 | migrations = [getattr(models, klass) for klass in migrations] 38 | # Make sure we didn't pick up stragglers. 39 | migrations = [c for c in migrations if 'class' in str(c)] 40 | 41 | db = models.db 42 | db.connect() 43 | try: 44 | db.drop_tables(migrations) 45 | except: 46 | pass 47 | db.create_tables(migrations) 48 | db.close() 49 | return 50 | 51 | if __name__ == '__main__': 52 | pass 53 | # from Project import Models 54 | # migrate() 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | selenext is a micro-framework for web automation/scraping using selenium in Python 2 or Python 3. This is currently still in development, however it's possible to write and run your automations within the framework as it is. 2 | 3 | ## Discord 4 | 5 | https://discord.gg/x8KC5Xh 6 | 7 | ## Install 8 | 9 | Just download this as a .zip and then unzip the `selenext` directory into 10 | your `site-packages` folder. Once there is a stable release, there will 11 | be a pip install available. You can also grab the repository and pull 12 | updates from github, just create a symbolic link to the selenext directory 13 | into your python path. 14 | 15 | ## Dependencies 16 | 17 | selenext is pretty modular, so you should only need the dependencies for 18 | the pieces you are using. For database interaction, install: 19 | 20 | * [peewee](http://docs.peewee-orm.com/en/latest/) 21 | `pip install peewee` + installing peewee's dependencies. 22 | 23 | You'll need Selenium for doing any browser automation or web scraping. 24 | 25 | * [selenium python bindings](http://selenium-python.readthedocs.org/) 26 | `pip install selenium` 27 | 28 | You also need the a Web Driver executable like [ChromeDriver](https://sites.google.com/a/chromium.org/chromedriver/downloads) 29 | 30 | ## Optional Dependencies 31 | 32 | If you want to use the `requests` module along with `BeautifulSoup/lxml` 33 | to read the content off a web page using the same API as selenium, you 34 | can use the `WebReader` class to do so. It only supports reading 35 | information off of a page, so methods like `click` are not implemented. 36 | To use the `WebReader`, you will need: 37 | 38 | * [requests](http://docs.python-requests.org/en/master/) 39 | `pip install requests` 40 | * [BeautifulSoup4](https://www.crummy.com/software/BeautifulSoup/) 41 | * [lxml](http://lxml.de/) 42 | 43 | If you need to spin some text, check out [spintax](https://github.com/AceLewis/spintax) for python! 44 | 45 | Once you have the dependencies, you can download this repository and 46 | start using it, however you may want to read over the documentation 47 | below. 48 | 49 | ## Examples 50 | 51 | Check out [the documentation](https://github.com/Wykleph/selenext/wiki/Documentation) for the various parts and pieces in the wiki. API documentation will be coming soon. 52 | -------------------------------------------------------------------------------- /SiteAutomations/Examples/GoogleExample.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is an example of what the a `SiteAutomation` might look like 3 | using selenext. This controller is pretty bare-bones. 4 | """ 5 | 6 | from __future__ import print_function 7 | from time import sleep 8 | from random import randint 9 | from ...Helpers.Controllers import IndependentController, has_kwargs 10 | 11 | 12 | def _do_search(driver, wait, search_term): 13 | """ 14 | Perform a Google search. 15 | 16 | :param driver: 17 | :param wait: 18 | :param search_term: 19 | :return: 20 | """ 21 | driver.get('https://google.com') 22 | 23 | sleep(randint(1, 2)) 24 | 25 | # Type search 26 | search_input = driver.find_element_by_name('q') 27 | search_input.send_keys(search_term) 28 | 29 | sleep(randint(1, 2)) 30 | 31 | # Click search button. 32 | search_button = driver.find_element_by_name('btnG') 33 | search_button.click() 34 | wait.until(lambda the_driver: the_driver.find_element_by_id('resultStats').is_displayed()) 35 | return True 36 | 37 | 38 | class GoogleSearch(object): 39 | """ 40 | This Controller needs a WebDriver instance, WebDriverWait instance, 41 | and the collection of Models in order to work. 42 | """ 43 | def __init__(self, driver, wait, models): 44 | # Set up driver. 45 | if driver.__class__.__name__ != 'WebDriver': 46 | err = "{} is not an instance of WebDriver.".format(str(driver)) 47 | raise TypeError(err) 48 | self.driver = driver 49 | self.wait = wait 50 | self.models = models 51 | 52 | def do_search(self, search_term): 53 | return _do_search(self.driver, self.wait, search_term) 54 | 55 | 56 | class ThreadedGoogleSearch(IndependentController): 57 | """ 58 | Note that if you inherit from IndependentController, you can use 59 | the CommandManager or ThreadedCommandManager to perform requests 60 | with their own WebDriver instances. 61 | """ 62 | def __init__(self, models): 63 | self.models = models 64 | 65 | # Using the @has_kwargs decorator allows keyword arguments to be 66 | # passed to the method. When you assemble a command pack for the 67 | # CommandManager, just include an instance of the Kwargs object. 68 | @has_kwargs 69 | def do_search(self, search_term, some_kwarg='some value'): 70 | print(some_kwarg) 71 | return _do_search(self.driver, self.wait, search_term) 72 | -------------------------------------------------------------------------------- /Helpers/Data.py: -------------------------------------------------------------------------------- 1 | from difflib import SequenceMatcher 2 | from math import radians, sin, cos, sqrt, asin 3 | 4 | 5 | def levenshtein_distance(s1, s2): 6 | """ 7 | The minimum amount of edits needed to make s2 into s1. 8 | 9 | Args: 10 | s1: string 11 | s2: string 12 | 13 | Returns: 14 | int 15 | 16 | """ 17 | 18 | if len(s1) > len(s2): 19 | s1, s2 = s2, s1 20 | 21 | distances = range(len(s1) + 1) 22 | for index2, char2 in enumerate(s2): 23 | new_distances = [index2+1] 24 | for index1, char1 in enumerate(s1): 25 | if char1 == char2: 26 | new_distances.append(distances[index1]) 27 | else: 28 | new_distances.append( 29 | 1 + min( 30 | ( 31 | distances[index1], 32 | distances[index1+1], 33 | new_distances[-1] 34 | ) 35 | ) 36 | ) 37 | distances = new_distances 38 | distance = distances[-1] 39 | return distance 40 | 41 | 42 | def string_similarity(s1, s2): 43 | """ 44 | Get a float representation of the difference between 2 strings. 45 | 46 | Args: 47 | s1: string 48 | s2: string 49 | 50 | Returns: float 51 | """ 52 | 53 | return SequenceMatcher(None, s1, s2).ratio() 54 | 55 | 56 | def haversine(lat1, lon1, lat2, lon2, units='metric'): 57 | """ 58 | 59 | Args: 60 | lat1: float 61 | lon1: float 62 | lat2: float 63 | lon2: float 64 | 65 | Returns: float 66 | 67 | """ 68 | earth_radius = 6372.8 # Earth radius in kilometers 69 | 70 | d_lat = radians(lat2 - lat1) 71 | d_lon = radians(lon2 - lon1) 72 | lat1 = radians(lat1) 73 | lat2 = radians(lat2) 74 | 75 | a = sin(d_lat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(d_lon / 2) ** 2 76 | c = 2 * asin(sqrt(a)) 77 | 78 | km = earth_radius * c 79 | 80 | if units == 'metric': 81 | return km 82 | elif units == 'imperial': 83 | # Return miles. 84 | return km / 1.609344 85 | else: 86 | return km 87 | 88 | 89 | # if __name__ == '__main__': 90 | # print haversine(45.5231, 122.6765, 39.7392, 104.9903, units='imperial') 91 | # print string_similarity('Hello World', 'Hello WOrld!') 92 | # print levenshtein_distance('Hello World', 'Hello WOrld!') 93 | -------------------------------------------------------------------------------- /ThreadedExample.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from time import sleep 3 | # Database models used to interact with databases. 4 | from Project import Models 5 | # Controllers are kept in the SiteAutomations folder. 6 | from SiteAutomations.Examples import GoogleExample, BingExample 7 | # Pull in the command factory, which is the backbone to threaded automations. 8 | # also pull in the Kwargs object for passing kwargs into commands. 9 | from Helpers.Commands import ThreadedCommandFactory, Kwargs 10 | 11 | # Define some controllers to pass to ThreadedCommandFactory. 12 | # Note that the controllers being used are subclasses of IndependentController. 13 | # These classes don't handle any threading, but they are set up so that the threaded 14 | # command factory will be able to create threaded commands. 15 | controllers = { 16 | 'google': GoogleExample.ThreadedGoogleSearch(Models), 17 | 'bing': BingExample.ThreadedBingSearch(Models) 18 | } 19 | 20 | cmd_factory = ThreadedCommandFactory(controllers, logging=False) 21 | # In addition to creating Command objects, instances of any CommandFactory object 22 | # will act similar to how a dictionary works. The only difference is you don't need 23 | # to call anything like iteritems() when iterating over it: 24 | # 25 | # for key, controller in cmd_factory: 26 | # print key, controller 27 | # 28 | # b = cmd_factory['bing'] 29 | # b.do_search('hello world') 30 | # 31 | # del cmd_factory['bing'] 32 | # del cmd_factory['google'] 33 | 34 | 35 | # Register arguments to pass to each controller. They are 36 | # matched by the key in the controllers dictionary. The 37 | # next search_command shows how to add kwargs to the command 38 | # pack. 39 | search_command_1 = { 40 | 'google': ('google wiki',), 41 | 'bing': ('bing wiki',) 42 | } 43 | 44 | # Each argument is passed as *args. If you need any 45 | # **kwargs, just instantiate a Kwargs object with the 46 | # dictionary containing the **kwargs and make sure 47 | # the method you are calling with the command pack 48 | # is decorated with @has_kwargs. 49 | search_command_2 = { 50 | 'google': ('star wars', Kwargs({'some_kwarg': 'Overridden value!!!'})), 51 | 'bing': ('star wars',) 52 | } 53 | 54 | # Create the threads. A Command instance is returned when 55 | # the threads are created. These Command objects are used 56 | # to start the threads. Pass a function(controller as first 57 | # argument and *args as the second) as the first parameter 58 | # to the create_command method, and the command pack as the 59 | # second parameter to the create_command method. 60 | cmd1 = cmd_factory.create_command(lambda controller, *search_term: controller.do_search(*search_term), search_command_1) 61 | cmd2 = cmd_factory.create_command(lambda controller, *search_term: controller.do_search(*search_term), search_command_2) 62 | 63 | # Start the threads. Each search will be executed, and it will 64 | # only take as long as the longest automation time. 65 | cmd1.start() 66 | print('finished first search') 67 | sleep(5) 68 | cmd2.start() 69 | print('finished second search') 70 | sleep(5) 71 | 72 | # Close the WebDrivers down. 73 | cmd_factory.shutdown() 74 | -------------------------------------------------------------------------------- /Environment.py: -------------------------------------------------------------------------------- 1 | from os import getcwd 2 | from .common import EnvironmentContainer 3 | 4 | 5 | def load_env(): 6 | """ 7 | Load the project's local .env file into the `EnvironmentContainer`. 8 | Since the `EnvironmentContainer`'s `container` attribute is a 9 | static attribute, any new instances should have the same 10 | attributes set. 11 | 12 | Returns: 13 | None 14 | """ 15 | 16 | from .common import ConfigParser 17 | 18 | filepath = getcwd().replace('\\', '/').replace('SiteAutomations', '').replace('Jobs', '') 19 | filepath += '/.env' if filepath[-1] != '/' else '.env' 20 | 21 | ConfigParser(EnvironmentContainer, filepath=filepath).load() 22 | return 23 | 24 | 25 | # If the .env file has not been loaded, then load it! 26 | if EnvironmentContainer.container == {}: 27 | load_env() 28 | 29 | 30 | def env(variable_name, func=lambda x: x): 31 | """ 32 | Get the corresponding environment variable. Pass a function 33 | like `int` or `bool` as the `type_hint` keyword argument to 34 | automatically run that function on the output. 35 | 36 | Args: 37 | variable_name: string 38 | func: function 39 | 40 | Returns: 41 | string 42 | """ 43 | 44 | return func(EnvironmentContainer.container[variable_name]) 45 | 46 | 47 | def env_driver(browser): 48 | """ 49 | Return the web driver. 50 | 51 | Args: 52 | browser: string 53 | 54 | Returns: 55 | selenium WebDriver 56 | """ 57 | 58 | from selenium import webdriver 59 | 60 | the_driver = False 61 | if browser == 'chrome': 62 | the_driver = webdriver.Chrome 63 | 64 | elif browser == 'firefox': 65 | the_driver = webdriver.Firefox 66 | 67 | elif browser == 'safari': 68 | the_driver = webdriver.Safari 69 | 70 | elif browser == 'phantomjs': 71 | the_driver = webdriver.PhantomJS 72 | 73 | elif browser == 'opera': 74 | the_driver = webdriver.Opera 75 | 76 | return the_driver 77 | 78 | 79 | def get_database(db_type): 80 | """ 81 | Get the database object that should be used. 82 | 83 | Args: 84 | db_type: string 85 | 86 | Returns: 87 | peewee database driver. 88 | """ 89 | 90 | db = False 91 | if db_type == 'sql': 92 | from peewee import SqliteDatabase 93 | 94 | db = SqliteDatabase(env("DB")) 95 | 96 | elif db_type == 'mysql': 97 | from peewee import MySQLDatabase 98 | 99 | db = MySQLDatabase( 100 | env("DB"), 101 | host=env("DB_HOST"), 102 | port=int(env("DB_PORT")), 103 | user=env("DB_USERNAME"), 104 | passwd=env("DB_PASSWORD") 105 | ) 106 | 107 | elif db_type == 'postgresql': 108 | from peewee import PostgresqlDatabase 109 | 110 | db = PostgresqlDatabase( 111 | env('DB'), 112 | host=env("DB_HOST"), 113 | port=int(env("DB_PORT")), 114 | user=env("DB_USERNAME"), 115 | passwd=env("DB_PASSWORD") 116 | ) 117 | 118 | elif db_type == 'berkeley': 119 | from playhouse.berkeleydb import BerkeleyDatabase 120 | 121 | db = BerkeleyDatabase( 122 | env('DB'), 123 | host=env("DB_HOST"), 124 | port=int(env("DB_PORT")), 125 | user=env("DB_USERNAME"), 126 | passwd=env("DB_PASSWORD") 127 | ) 128 | 129 | return db 130 | -------------------------------------------------------------------------------- /UnitTests/PageTests.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import unittest 3 | 4 | from time import sleep 5 | from selenium.webdriver import Chrome 6 | from selenium.webdriver.remote.webelement import WebElement 7 | from selenium.webdriver.chrome.webdriver import WebDriver 8 | from selenium.common.exceptions import NoSuchElementException 9 | from selenext.Helpers import Page, View, PageElement, ParentElement 10 | from json.decoder import JSONDecodeError 11 | 12 | 13 | class PageTest(unittest.TestCase): 14 | def setUp(self): 15 | self.driver = Chrome() 16 | self.assertIsInstance(self.driver, WebDriver) 17 | 18 | self.page = Page(self.driver, 'page_tests.json', file=True) 19 | 20 | def test_page_instance_is_page(self): 21 | self.assertIsInstance(self.page, Page) 22 | 23 | def test_page_instantiation(self): 24 | self.assertRaises(TypeError, Page) 25 | self.assertRaises(TypeError, Page, self.driver) 26 | self.assertRaises(JSONDecodeError, Page, self.driver, '') 27 | self.assertRaises(JSONDecodeError, Page, self.driver, 'page_tests.json') 28 | 29 | def test_page_attributes(self): 30 | self.assertIsInstance(self.page.driver, WebDriver) 31 | self.assertIsInstance(self.page.root, str) 32 | 33 | # The next line should work, but doesn't? Leaving it commented out for now. 34 | # self.assertRaises(NoSuchElementException, self.page.search_input) 35 | self.page.get(self.page.root) 36 | # In the debugger, the self.page.search_input attribute equals a PageElement, 37 | # but as soon as the attribute is accessed, it will change to a WebElement. 38 | self.assertIsInstance(self.page.search_input, WebElement) 39 | self.assertIsInstance(self.page.view.search_input, PageElement) 40 | 41 | def test_page_view_attributes(self): 42 | self.assertIsInstance(self.page.view, View) 43 | self.assertIsInstance(self.page.view.elements, dict) 44 | self.assertIsInstance(self.page.view.json_dict, dict) 45 | self.assertIsInstance(self.page.view.driver, WebDriver) 46 | 47 | def test_page_element_attributes(self): 48 | self.page.get(self.page.root) 49 | self.assertTrue(self.page.view.search_input.exists()) 50 | self.assertFalse(self.page.view.missing_element.exists()) 51 | self.assertIsInstance(self.page.view.search_input.driver, WebDriver) 52 | self.assertIsInstance(self.page.view.search_input.element_dict, dict) 53 | self.assertIsInstance(self.page.view.search_input.lookup_method, str) 54 | self.assertIsInstance(self.page.view.search_input.selector, str) 55 | self.assertIsInstance(self.page.view.search_input, PageElement) 56 | self.assertIsInstance(self.page.view.search_form.parent, ParentElement) 57 | self.assertIsInstance(self.page.view.search_form.parent.parent, ParentElement) 58 | self.assertIsNone(self.page.view.search_input.parent) 59 | 60 | def test_multiple_page_element(self): 61 | self.page.get(self.page.root) 62 | self.page.search_input.send_keys('cookies') 63 | sleep(2) 64 | self.page.search_button.click() 65 | 66 | while not self.page.view.results.exists(): 67 | sleep(1) 68 | 69 | self.assertIsInstance(self.page.results, list) 70 | self.assertIsInstance(self.page.results[3], WebElement) 71 | self.assertIsInstance(self.page.bound_results[3], str) 72 | 73 | 74 | def main(): 75 | unittest.main() 76 | 77 | if __name__ == '__main__': 78 | main() 79 | -------------------------------------------------------------------------------- /Helpers/Mailers.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import smtplib 3 | from email.mime.text import MIMEText 4 | 5 | 6 | class GMailer: 7 | """ 8 | Send emails using Gmail. 9 | """ 10 | def __init__(self, username, password, host, port, tls=True, ehlo=True, anon=False): 11 | username_type = type(username) 12 | if username_type != str and not isinstance(username, type(u'')): 13 | raise TypeError('A string was expected for the username variable.') 14 | if type(password) != str and not isinstance(password, type(u'')): 15 | raise TypeError('A string was expected for the password variable.') 16 | if type(host) != str and not isinstance(host, type(u'')): 17 | raise TypeError('A string was expected for the host variable.') 18 | if type(port) != int: 19 | try: 20 | port = int(port) 21 | except ValueError: 22 | raise TypeError('An integer was expected for the port variable.') 23 | 24 | self.username = username 25 | self.password = password 26 | self.host = host 27 | self.port = port 28 | self.tls = tls 29 | self.ehlo = ehlo 30 | self.anon = anon 31 | 32 | def send_email(self, emails, subject, the_msg): 33 | """ 34 | Send an email. 35 | 36 | :param emails: 37 | :param subject: 38 | :param the_msg: 39 | :return: 40 | """ 41 | 42 | email_list = emails.split(',') 43 | email_list = [email.strip() for email in email_list] 44 | 45 | msg = "\r\n".join([ 46 | "From: {}".format(self.username), 47 | "To: {}".format(emails), 48 | "Subject: {}".format(subject) 49 | ]) 50 | 51 | msg += "\r\n{}".format(the_msg) 52 | smtp = smtplib.SMTP("{}:{}".format(self.host, self.port)) 53 | print("Sending report...") 54 | 55 | if self.ehlo: 56 | print(smtp.ehlo()) 57 | if self.tls: 58 | print(smtp.starttls()) 59 | if not self.anon: 60 | print(smtp.login(self.username, self.password)) 61 | print() 62 | print(smtp.sendmail(self.username, email_list, msg)) 63 | smtp.close() 64 | return self 65 | 66 | def send_email_with_attachment(self, emails, filepath, subject): 67 | """ 68 | Send an email with an attachment. 69 | 70 | :param emails: 71 | :param filepath: 72 | :param subject: 73 | :return: 74 | """ 75 | 76 | email_list = emails.split(',') 77 | email_list = [email.strip() for email in email_list] 78 | 79 | msg = "\r\n".join([ 80 | "From: {}".format(self.username), 81 | "To: {}".format(emails), 82 | "Subject: {}".format(subject) 83 | ]) 84 | 85 | with open(filepath, 'rb') as f: 86 | attachment = MIMEText(f.read()) 87 | 88 | attachment.add_header('Content-Disposition', 'attachment', filename=filepath) 89 | 90 | msg += "\r\n" + attachment.as_string() 91 | 92 | smtp = smtplib.SMTP("{}:{}".format(self.host, self.port)) 93 | 94 | if self.ehlo: 95 | print(smtp.ehlo()) 96 | if self.tls: 97 | print(smtp.starttls()) 98 | if not self.anon: 99 | print(smtp.login(self.username, self.password)) 100 | print() 101 | print(smtp.sendmail(self.username, email_list, msg)) 102 | smtp.close() 103 | return self 104 | -------------------------------------------------------------------------------- /UnitTests/CommandsTests.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import unittest 3 | 4 | from Helpers.Commands import ThreadedCommandFactory, Command, Kwargs 5 | from SiteAutomations.Examples import GoogleExample 6 | from Project import Models 7 | 8 | 9 | class KwargsTest(unittest.TestCase): 10 | def setUp(self): 11 | self.test_dict = { 12 | 'test1': 'hello world', 13 | 'test2': 'zim zam kabam!' 14 | } 15 | self.kwarg_obj = Kwargs(self.test_dict) 16 | 17 | def test_iterate_over_object(self): 18 | self.assertEqual(type(self.kwarg_obj), Kwargs) 19 | for k, v in self.kwarg_obj: 20 | self.assertEqual(type(k), str) 21 | 22 | def test_instantiate_kwargs_object_with_invalid_input(self): 23 | self.assertRaises(TypeError, Kwargs, 'hello world') 24 | self.assertRaises(TypeError, Kwargs, u'hello world') 25 | self.assertRaises(TypeError, Kwargs, 123) 26 | self.assertRaises(TypeError, Kwargs, 1.00) 27 | 28 | def test_get_item_from_kwargs_object(self): 29 | self.assertEqual(self.kwarg_obj['test1'], 'hello world') 30 | self.assertEqual(self.kwarg_obj['test2'], 'zim zam kabam!') 31 | 32 | def test_set_item_from_kwargs_object(self): 33 | def set_kwargs_val(key, value): 34 | self.kwarg_obj[key] = value 35 | return self.kwarg_obj 36 | 37 | valid_output = { 38 | 'test1': 'hello world', 39 | 'test2': 'zim zam kabam!', 40 | 'test3': 123 41 | } 42 | self.assertEqual(set_kwargs_val('test3', 123).dictionary, valid_output) 43 | 44 | def test_delete_item_from_kwargs_object(self): 45 | test_kwargs_object = Kwargs(self.kwarg_obj.dictionary) 46 | self.assertEqual(test_kwargs_object.__delitem__('test1').dictionary, {'test2': 'zim zam kabam!'}) 47 | 48 | 49 | class ThreadedCommandFactoryTest(unittest.TestCase): 50 | def setUp(self): 51 | self.controllers = { 52 | 'goog1': GoogleExample.ThreadedGoogleSearch(Models), 53 | 'goog2': GoogleExample.ThreadedGoogleSearch(Models) 54 | } 55 | 56 | self.cmd = ThreadedCommandFactory(self.controllers, False) 57 | 58 | def test_command_manager_attributes(self): 59 | self.assertEqual(type(self.cmd.controllers), dict) 60 | self.assertEqual(type(self.cmd.pool), list) 61 | 62 | def test_create_threads_with_invalid_inputs(self): 63 | def _(*args): 64 | print(args) 65 | self.assertRaises(TypeError, self.cmd.create_command, _, [1, 2, 3]) 66 | self.assertRaises(TypeError, self.cmd.create_command, _, (1, 2, 3)) 67 | self.assertRaises(TypeError, self.cmd.create_command, _, 234231) 68 | self.assertRaises(TypeError, self.cmd.create_command, _, 'jalsdkfoij') 69 | self.assertRaises(TypeError, self.cmd.create_command, _, u'asdfhosidjfn') 70 | self.assertRaises(TypeError, self.cmd.create_command, _, 0b1010110) 71 | 72 | def test_create_threads_with_valid_input(self): 73 | def _(*args, **kwargs): 74 | print(args) 75 | print(kwargs) 76 | command_pack = { 77 | 'goog1': ('hello', Kwargs({'a': 1, 'b': 2, 'c': 3})), 78 | 'goog2': ('hello world!',) 79 | } 80 | self.assertIsInstance(self.cmd.create_command(_, command_pack), Command) 81 | 82 | def tearDown(self): 83 | try: 84 | items = self.cmd.controllers.iteritems() 85 | except AttributeError: 86 | items = self.cmd.controllers.items() 87 | 88 | for k, controller in items: 89 | controller.driver.close() 90 | try: 91 | controller.driver.quit() 92 | except AttributeError: 93 | pass 94 | 95 | 96 | def main(): 97 | unittest.main() 98 | 99 | if __name__ == '__main__': 100 | main() 101 | -------------------------------------------------------------------------------- /Example.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from time import sleep 3 | # Database models used to interact with databases. 4 | from Project import Models 5 | # The environment variable loader. These variables can be set in the .env file. 6 | from Environment import env, env_driver 7 | # Controllers are kept in the SiteAutomations folder. 8 | from SiteAutomations.Examples import GoogleExample, BingExample 9 | from Helpers.Contexts import quitting 10 | # Pull in the command factory for the second example. 11 | from Helpers.Commands import CommandFactory, Kwargs 12 | from selenium.webdriver.support.wait import WebDriverWait 13 | 14 | # This is where the WebDriver is instantiated. Instead 15 | # of instantiating it directly, use the `env` and 16 | # `env_driver` functions to grab it based on the 17 | # `.env` configuration file. 18 | # This could be written as: 19 | # 20 | # browser = env("BROWSER") 21 | # web_driver = env_driver(browser) 22 | # with quitting(web_driver()) as driver: 23 | # pass 24 | # 25 | 26 | 27 | with quitting(env_driver(env("BROWSER"))()) as driver: 28 | wait = WebDriverWait(driver, 30) 29 | 30 | # Pass the web driver to the site automation along with anything 31 | # else it might need to do its job. This could include an 32 | # instance of WebDriverWait, and even the collection of 33 | # Models. 34 | google_search = GoogleExample.GoogleSearch(driver, wait, Models) 35 | bing_search = BingExample.BingSearch(driver, wait, Models) 36 | 37 | # Do stuff with your controllers. 38 | google_search.do_search('google wiki') 39 | sleep(5) 40 | bing_search.do_search('bing wiki') 41 | sleep(5) 42 | 43 | # Optionally, you can use the command manager to do the searches. 44 | # This will make each controller use it's own personal WebDriver. 45 | # Define some controllers to pass to CommandFactory. 46 | # Note that the controllers being used are subclasses of IndependentController. 47 | # These classes don't handle any threading, but they are set up so that the 48 | # command factory will be able to create commands with them. 49 | controllers = { 50 | 'google': GoogleExample.ThreadedGoogleSearch(Models), 51 | 'bing': BingExample.ThreadedBingSearch(Models) # Check out the example files for more info on threading. 52 | } 53 | 54 | # We use the CommandFactory instead of the ThreadedCommandFactory 55 | # so that the each controller has it's own WebDriver instance 56 | # and each request is made in the main thread. 57 | cmd_factory = CommandFactory(controllers, logging=False) 58 | # In addition to creating Command objects, instances of any CommandFactory object will 59 | # act similar to how a dictionary works. The only difference is you don't need to call 60 | # iteritems() when iterating over it: 61 | # 62 | # for key, controller in cmd_factory: 63 | # print key, controller 64 | # 65 | # b = cmd_factory['bing'] 66 | # b.do_search('hello world') 67 | # 68 | # del cmd_factory['bing'] 69 | # del cmd_factory['google'] 70 | 71 | 72 | # Register arguments to pass to each controller. They are 73 | # matched by the key in the controllers dictionary. 74 | # Command packs are passed as *args. If you need any 75 | # **kwargs, just instantiate a Kwargs object with the 76 | # dictionary containing the **kwargs and make sure 77 | # the method you are calling with the command pack 78 | # is decorated with @has_kwargs. 79 | search_command = { 80 | 'google': ('star wars', Kwargs({'some_kwarg': 'NEW KWARG VALUE!'})), # You can override keyword arguments as well! 81 | 'bing': ('star wars',) 82 | } 83 | 84 | # Create the command. Pass a function as the first parameter and 85 | # the command pack as the second parameter. A Command instance 86 | # is returned when the command is created. These Command 87 | # objects are used to start the work! 88 | cmd = cmd_factory.create_command(lambda controller, *search_term: controller.do_search(*search_term), search_command) 89 | 90 | # Start the command. Each search will be executed one after the 91 | # other. 92 | cmd.start() 93 | print('finished first search') 94 | sleep(5) 95 | 96 | # Close the WebDrivers down. 97 | cmd_factory.shutdown() 98 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class EnvironmentContainer(object): 4 | """ 5 | Static container object used to facilitate holding environment variables across 6 | instances. 7 | """ 8 | 9 | container = {} 10 | 11 | 12 | class ConfigParser: 13 | def __init__(self, container, filepath='.env'): 14 | self.filepath = filepath 15 | self.lines = {} 16 | self.list_mode = False 17 | self.list_name = None 18 | self.dict_mode = False 19 | self.dict_name = None 20 | self.sline = None 21 | self.container = container 22 | 23 | def load(self): 24 | filepath = self.filepath 25 | with open(filepath, 'r') as f: 26 | 27 | for line in f: 28 | self.sline = line.strip() 29 | 30 | # Handle comments. 31 | if line[0] == '#': 32 | continue 33 | 34 | # Handle dict mode. 35 | if self.dict_mode: 36 | if self.sline == self.dict_name + '{END}': 37 | if self.dict_name not in self.lines.keys(): 38 | self.lines[self.dict_name] = {} 39 | self.dict_mode = False 40 | self.dict_name = None 41 | continue 42 | 43 | self.process_dict_line(line, self.dict_name) 44 | continue 45 | 46 | # Handle list mode. 47 | if self.list_mode: 48 | if self.sline == '{}[END]'.format(self.list_name): 49 | if self.list_name not in self.lines.keys(): 50 | self.lines[self.list_name] = [] 51 | self.list_name = None 52 | self.list_mode = False 53 | continue 54 | try: 55 | self.lines[self.list_name].append(self.sline) 56 | except KeyError: 57 | self.lines[self.list_name] = [self.sline] 58 | continue 59 | 60 | # Handle key=value lines. 61 | if '=' in line: 62 | self.process_key_value(line) 63 | continue 64 | 65 | # Handle list definitions. 66 | if self.sline[-3:] == '[]:': 67 | self.list_mode = True 68 | self.list_name = self.sline[:-3] 69 | continue 70 | 71 | # Handle dict definitions. 72 | if self.sline[-3:] == '{}:': 73 | self.dict_mode = True 74 | self.dict_name = self.sline[:-3] 75 | continue 76 | for k, v in self.lines.items(): 77 | EnvironmentContainer.container[k] = v 78 | 79 | def check_for_list_mode(self, line): 80 | """ 81 | Check to see if the line is defining a list or not. If it does, it will return 82 | the lists name and the bool representing whether it is in list mode or not. 83 | 84 | Args: 85 | line: 86 | 87 | Returns: 88 | tuple 89 | """ 90 | 91 | self.list_name = self.sline[:-3] 92 | self.list_mode = True if self.sline[-3:] == '[]:' else False 93 | return self.list_name, self.list_mode 94 | 95 | def check_for_dict_mode(self, line): 96 | """ 97 | Check to see if the line is defining a dictionary or not. If it does, it will return 98 | the dictionaries name and the bool representing whether it is in dict mode or not. 99 | 100 | Args: 101 | line: 102 | 103 | Returns: 104 | tuple 105 | """ 106 | 107 | self.dict_name = self.sline[:-3] 108 | self.dict_mode = True if self.sline[-3:] == '{}:' else False 109 | return self.dict_name, self.dict_mode 110 | 111 | def get_key_value(self, line): 112 | """ 113 | Get the key and value from the given line, assuming it's separated by the first = sign. 114 | 115 | Args: 116 | line: 117 | 118 | Returns: 119 | 120 | """ 121 | 122 | line_pieces = self.sline.split('=') 123 | key = line_pieces[0] 124 | # Safely grab the value. If the value contains an = symbol, it should 125 | # not get mangled, and data should not be missing 126 | value = '='.join(line_pieces[1:]).strip() 127 | 128 | return key, value 129 | 130 | def process_dict_line(self, line, dict_name): 131 | """ 132 | Process a line that occurs within a dict that is being defined in the .env file. 133 | 134 | Args: 135 | line: 136 | dict_name: 137 | 138 | Returns: 139 | self 140 | """ 141 | 142 | key, value = self.get_key_value(line) 143 | try: 144 | self.lines[dict_name][key] = value 145 | except KeyError: 146 | self.lines[dict_name] = {key: value} 147 | return self 148 | 149 | def process_key_value(self, line): 150 | """ 151 | Process a key=value from the given line. 152 | 153 | Args: 154 | line: 155 | 156 | Returns: 157 | self 158 | """ 159 | 160 | key, value = self.get_key_value(line) 161 | self.add_root_key(key, value) 162 | return self 163 | 164 | def add_root_key(self, key, value): 165 | """ 166 | Add a key to self.lines with the given value. 167 | 168 | Args: 169 | key: 170 | value: 171 | 172 | Returns: 173 | self 174 | """ 175 | 176 | self.lines[key] = value 177 | return self 178 | 179 | def get(self, variable_name): 180 | """ 181 | 182 | Args: 183 | variable_name: string 184 | 185 | Returns: 186 | string 187 | """ 188 | 189 | return self.lines[variable_name] 190 | 191 | 192 | -------------------------------------------------------------------------------- /Helpers/Validation.py: -------------------------------------------------------------------------------- 1 | """ 2 | The validation module provides an api for filtering selenium elements 3 | down using wildcard expressions, or regular expressions. 4 | 5 | The functionality here will be expanded in the future to include 6 | other filtering techniques. 7 | """ 8 | from __future__ import print_function 9 | from re import match as rematch 10 | 11 | 12 | # todo: Should this be kept or removed? 13 | class WebElementFilter(object): 14 | def __init__(self): 15 | """ 16 | The regex_special_wildcard_chars is a list of special 17 | regex characters, aside from `.`, and `*`. It is 18 | here to help generate regex statements for any 19 | wildcard matching. This is done by escaping 20 | any character in the pattern that exists 21 | inside regex_special_wildcard_chars.. 22 | """ 23 | self._regex_special_wildcard_chars = [ 24 | '^', '$', '+', '?', '{', '}', '[', ']', '(', ')', 25 | '|', ':', '<', '>', '=', '-', '!' 26 | ] 27 | 28 | self._inner_text_search = False 29 | self._attribute_search = False 30 | 31 | def _reset(self): 32 | """ 33 | Reset the variables used to perform matches. 34 | 35 | :return: 36 | """ 37 | 38 | self._inner_text_search = False 39 | self._attribute_search = False 40 | 41 | def attribute(self, attr): 42 | """ 43 | Set up an attribute to match. 44 | 45 | :param attr: 46 | :return: 47 | """ 48 | 49 | self._attribute_search = attr 50 | return self 51 | 52 | def inner_text(self): 53 | """ 54 | Test against the inner text of an element. 55 | 56 | :return: 57 | """ 58 | 59 | self._inner_text_search = True 60 | return self 61 | 62 | def wildcard_match(self, element, pattern, attr_name=False): 63 | """ 64 | Check to see if an attribute matches a wildcard expression. 65 | 66 | :param element: 67 | :param pattern: 68 | :param attr_name: 69 | :return: 70 | """ 71 | 72 | attr_value = '' 73 | 74 | # Handle `attribute` and `inner_text` if both are set. 75 | attr_search = self._attribute_search 76 | if attr_search and self._inner_text_search: 77 | self._reset() 78 | 79 | # Check if attribute matches. 80 | self.attribute(attr_search) 81 | if self.wildcard_match(element, pattern): 82 | # Don't return because we still need to check to see 83 | # if the inner_text matches. 84 | self._reset() 85 | else: 86 | self._reset() 87 | return False 88 | 89 | # Check to see if the inner text matches. 90 | self.inner_text() 91 | if self.wildcard_match(element, pattern): 92 | self._reset() 93 | # Return since both checks passed. 94 | return True 95 | else: 96 | self._reset() 97 | return False 98 | 99 | # Get the element attribute. 100 | if self._attribute_search: 101 | attr_value = element.get_attribute(self._attribute_search) 102 | 103 | # Get the elements inner text. 104 | if self._inner_text_search: 105 | attr_value = element.text 106 | 107 | # Get the element based on attr_name. 108 | if attr_name: 109 | attr_value = element.get_attribute(attr_name) 110 | # If the attribute doesn't exist, then obviously it doesn't match. 111 | if not attr_value: 112 | self._reset() 113 | return False 114 | 115 | pattern = self._prepare_wildcard_pattern(pattern) 116 | 117 | # Reset for reuse. 118 | self._reset() 119 | 120 | # Perform the regex match. 121 | if not rematch(pattern, attr_value): 122 | return False 123 | return True 124 | 125 | def regex_match(self, element, pattern, attr_name=False): 126 | """ 127 | Check to see if an attribute matches a regular expression. 128 | 129 | :param element: 130 | :param pattern: 131 | :param attr_name: 132 | :return: 133 | """ 134 | 135 | attr_value = '' 136 | 137 | # Handle `attribute` and `inner_text` if both are set. 138 | attr_search = self._attribute_search 139 | if attr_search and self._inner_text_search: 140 | self._reset() 141 | 142 | # Check if attribute matches. 143 | self.attribute(attr_search) 144 | if self.regex_match(element, pattern): 145 | # Don't return because we still need to check to see 146 | # if the inner_text matches. 147 | self._reset() 148 | else: 149 | self._reset() 150 | return False 151 | 152 | # Check to see if the inner text matches. 153 | self.inner_text() 154 | if self.regex_match(element, pattern): 155 | self._reset() 156 | # Return since both checks passed. 157 | return True 158 | else: 159 | self._reset() 160 | return False 161 | 162 | # Get the element attribute based on the input to the `attribute` method. 163 | if self._attribute_search: 164 | attr_value = element.get_attribute(self._attribute_search) 165 | print('attribute_search:', attr_value) 166 | 167 | # Get the elements inner text. 168 | if self._inner_text_search: 169 | attr_value = element.text 170 | 171 | # Get the element by attr_name kwarg. 172 | if attr_name: 173 | attr_value = element.get_attribute(attr_name) 174 | if not attr_value: 175 | return False 176 | 177 | # Reset for reuse. 178 | self._reset() 179 | 180 | # Perform the regex match. 181 | if not rematch(pattern, attr_value): 182 | return False 183 | return True 184 | 185 | def _prepare_wildcard_pattern(self, pat): 186 | """ 187 | Prepare a regex patter for a wildcard expression. The * operator is supported. 188 | 189 | :param pat: 190 | :return: 191 | """ 192 | 193 | pat = pat.replace('*', '.*') 194 | special_chars = self._regex_special_wildcard_chars 195 | return ''.join(['\\' + c if c in special_chars else c for c in pat]) 196 | -------------------------------------------------------------------------------- /genesis.py: -------------------------------------------------------------------------------- 1 | """ 2 | genesis.py is used for creating new selenext projects and writing program stubs. 3 | """ 4 | 5 | from __future__ import print_function 6 | 7 | import sys 8 | from os import mkdir 9 | from os.path import isfile, isdir, expanduser 10 | 11 | 12 | def write_stub(filepath, stub, append_py=True): 13 | """ 14 | Write a stub to the given filepath. 15 | 16 | Args: 17 | filepath: string 18 | stub: string 19 | append_py: bool 20 | 21 | Returns: 22 | None 23 | """ 24 | 25 | if append_py: 26 | if filepath[-3:] != '.py': 27 | filepath += '.py' 28 | with open(filepath, 'w') as f: 29 | f.write(stub) 30 | f.close() 31 | return 32 | 33 | 34 | def make_threaded_stub(filepath): 35 | """ 36 | Create a stub with a ThreadedCommandFactory instance for multi-threaded automations. 37 | 38 | Args: 39 | filepath: string 40 | 41 | Returns: 42 | None 43 | """ 44 | 45 | stub = """from time import sleep 46 | from Project import Models 47 | from Config.Environment import env 48 | from Helpers.Commands import ThreadedCommandFactory 49 | from Helpers.Validation import WebElementFilter 50 | from SiteAutomations import # Pull in Controllers 51 | 52 | 53 | # Replace with Controller from SiteAutomations. 54 | controllers = { 55 | 'controller_name': None 56 | } 57 | command_factory = ThreadedCommandFactory(controllers) 58 | 59 | # Replace None with a tuple of inputs for the target method. 60 | some_command = { 61 | 'controller_name': None 62 | } 63 | # Add the target method to the call to create_command. 64 | cmd = command_factory.create_command(, some_command) 65 | """ 66 | write_stub(filepath, stub) 67 | return 68 | 69 | 70 | def make_stub(filepath): 71 | """ 72 | Create a stub for a simple automation. 73 | 74 | Args: 75 | filepath: string 76 | 77 | Returns: 78 | None 79 | """ 80 | 81 | stub = """from time import sleep 82 | from Project import Models 83 | from Config.Environment import env, env_driver 84 | from Helpers.Validation import WebElementFilter 85 | from Helpers.Contexts import quitting 86 | from SiteAutomations import # Pull in Controllers 87 | 88 | 89 | with quitting(env_driver(env("BROWSER"))()) as driver: 90 | pass 91 | """ 92 | write_stub(filepath, stub) 93 | return 94 | 95 | 96 | def make_managed_stub(filepath): 97 | """ 98 | Creates a stub with a command factory instance for managing automations. 99 | 100 | Args: 101 | filepath: string 102 | 103 | Returns: 104 | None 105 | """ 106 | 107 | stub = """from time import sleep 108 | from Project import Models 109 | from Config.Environment import env 110 | from Helpers.Commands import CommandFactory 111 | from Helpers.Validation import WebElementFilter 112 | from SiteAutomations import # Pull in Controllers 113 | 114 | 115 | # Replace with Controller from SiteAutomations. 116 | controllers = { 117 | 'controller_name': None 118 | } 119 | command_factory = CommandFactory(controllers) 120 | 121 | # Replace None with a tuple of inputs for the target method. 122 | some_command = { 123 | 'controller_name': None 124 | } 125 | # Add the target method to the call to create_command. 126 | cmd = command_factory.create_command(, some_command) 127 | """ 128 | write_stub(filepath, stub) 129 | return 130 | 131 | 132 | def _get_folder(filepath): 133 | """ 134 | Handles adding a / or \\ to the end of a directory path. 135 | 136 | Args: 137 | filepath: string 138 | 139 | Returns: 140 | string 141 | """ 142 | 143 | if '/' in filepath: 144 | folder = filepath if filepath[-1] == '/' else filepath + '/' 145 | elif '\\' in filepath: 146 | folder = filepath if filepath[-1] == '\\' else filepath + '\\' 147 | else: 148 | folder = filepath + '/' 149 | return folder 150 | 151 | 152 | def make_project_scaffold(directory): 153 | """ 154 | Create the project scaffold in the given directory. 155 | 156 | Creates the .env, migrations.py, models.py and main.py files. 157 | 158 | Args: 159 | directory: string 160 | 161 | Returns: 162 | None 163 | """ 164 | 165 | print() 166 | env_stub = """# Browsers: chrome, firefox, safari, phantomjs, opera 167 | BROWSER=chrome 168 | 169 | # Database settings 170 | # DB_TYPE values: sql, mysql, postgresql, berkeley 171 | DB_TYPE=sql 172 | DB=default.db 173 | DB_HOST=localhost 174 | DB_PORT=3306 175 | DB_USERNAME=None 176 | DB_PASSWORD=None 177 | """ 178 | 179 | migrations_stub = """from selenext.Database import migrate 180 | import models 181 | 182 | migrate(models) 183 | """ 184 | models_stub = """from peewee import * 185 | from selenext.Environment import get_database, env 186 | 187 | 188 | db = get_database(env("DB_TYPE")) 189 | 190 | 191 | class BaseModel(Model): 192 | class Meta: 193 | database = db 194 | """ 195 | 196 | folder = _get_folder(directory) 197 | 198 | env_filename = folder + '.env' 199 | migrations_filename = folder + 'migrations.py' 200 | models_filename = folder + 'models.py' 201 | main_filename = folder + 'main.py' 202 | 203 | if not isfile(env_filename): 204 | print('Writing .env file...') 205 | write_stub(env_filename, env_stub, append_py=False) 206 | print('.env file written...') 207 | if not isfile(migrations_filename): 208 | print('Writing migrations.py file...') 209 | write_stub(migrations_filename, migrations_stub) 210 | print('migrations.py file written...') 211 | if not isfile(models_filename): 212 | print('Writing models.py file...') 213 | write_stub(models_filename, models_stub) 214 | print('models.py file written...') 215 | if not isfile(main_filename): 216 | print('Writing main.py file...') 217 | write_stub(main_filename, '') 218 | print('main.py file writte...') 219 | print() 220 | return 221 | 222 | 223 | def create_module(directory): 224 | """ 225 | Create a python module with the given directory path. 226 | 227 | Args: 228 | directory: string 229 | 230 | Returns: 231 | None 232 | """ 233 | if not isdir(directory): 234 | print('Creating module folder...') 235 | mkdir(directory) 236 | print('Module folder created...') 237 | make_init(directory) 238 | return 239 | 240 | 241 | def make_init(directory): 242 | """ 243 | Create the __init__.py file. 244 | 245 | Args: 246 | directory: string 247 | 248 | Returns: 249 | None 250 | """ 251 | init_filepath = directory + '__init__.py' 252 | if not isfile(init_filepath): 253 | print('Creating __init__.py...') 254 | write_stub(init_filepath, '') 255 | print('__init__.py created...') 256 | return None 257 | 258 | 259 | def make_gitignore(directory): 260 | """ 261 | Create the .gitignore file. 262 | Args: 263 | directory: 264 | 265 | Returns: 266 | None 267 | """ 268 | gitignore_path = directory + '.gitignore' 269 | if not isfile(gitignore_path): 270 | print('Creating .gitignore...') 271 | write_stub(gitignore_path, '.env', append_py=False) 272 | print('.gitignore created...') 273 | return None 274 | 275 | 276 | def make_project(directory): 277 | """ 278 | Create a new selenext project in the given directory. 279 | 280 | Args: 281 | directory: string 282 | 283 | Returns: 284 | None 285 | """ 286 | directory = expanduser(directory) 287 | print() 288 | folder = _get_folder(directory) 289 | site_automations_folder = _get_folder(folder + 'SiteAutomations') 290 | jobs_folder = _get_folder(folder + 'Jobs') 291 | 292 | if not isdir(folder): 293 | print('Creating Project: {}...'.format(folder)) 294 | mkdir(folder) 295 | print('Project folder created!!!') 296 | else: 297 | print('Project folder already exists...') 298 | 299 | make_init(folder) 300 | make_gitignore(folder) 301 | 302 | print('Creating SiteAutomations...') 303 | create_module(site_automations_folder) 304 | print('Creating Jobs...') 305 | create_module(jobs_folder) 306 | print('Generating scaffold...') 307 | make_project_scaffold(directory) 308 | print() 309 | return 310 | 311 | # Start main program 312 | if __name__ == '__main__': 313 | def main(): 314 | args = sys.argv[1:] 315 | 316 | arg_len = len(args) 317 | 318 | if arg_len >= 4: 319 | exit() 320 | 321 | command = None 322 | value = None 323 | 324 | if arg_len == 1: 325 | command = args[0] 326 | elif arg_len == 2: 327 | command, value = args 328 | elif arg_len == 3: 329 | command, value, the_flag = args 330 | 331 | if command == 'make:threaded-stub': 332 | make_threaded_stub(value) 333 | elif command == 'make:stub': 334 | make_stub(value) 335 | elif command == 'make:managed-stub': 336 | make_managed_stub(value) 337 | elif command == 'make:project': 338 | make_project(value) 339 | elif command == 'make:project_scaffold': 340 | make_project_scaffold(value) 341 | elif 'run:' in command: 342 | # Get the Module name 343 | module_name = command.split(':')[-1] 344 | module_obj = __import__('Project.Jobs.{}'.format(module_name), fromlist=['']) 345 | module_attrs = dir(module_obj) 346 | if 'start_job' in module_attrs: 347 | start_job = getattr(module_obj, 'start_job') 348 | start_job() 349 | else: 350 | raise AttributeError('Jobs must contain a `start_job` method.') 351 | 352 | main() 353 | -------------------------------------------------------------------------------- /Helpers/Commands.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | from ..Environment import env, env_driver 4 | from ..Helpers import DummyLogger, DummyThread 5 | 6 | 7 | class Kwargs(object): 8 | """ 9 | An object used for passing **kwargs with your *args. 10 | 11 | Example: 12 | 13 | @has_kwargs 14 | def some_controller_func(first_arg, second_arg, some_arg=True, other_arg='NA'): 15 | print first_arg, second_arg 16 | print some_arg, other_arg 17 | 18 | a = ('hello', 'world', Kwargs({'some_arg': 'HELLO', 'other_arg': 'WORLD!!!'})) 19 | some_controller_func(*a) 20 | 21 | """ 22 | 23 | def __init__(self, dictionary): 24 | try: 25 | dictionary.keys() 26 | except: 27 | raise TypeError('Kwargs requires a dictionary, got a {}'.format(type(dictionary))) 28 | self.dictionary = dictionary 29 | 30 | def __len__(self): 31 | return len(self.dictionary) 32 | 33 | def __getitem__(self, key): 34 | return self.dictionary[key] 35 | 36 | def __setitem__(self, key, value): 37 | self.dictionary[key] = value 38 | 39 | def __delitem__(self, key): 40 | del self.dictionary[key] 41 | return self 42 | 43 | def __iter__(self): 44 | try: 45 | for k, v in self.dictionary.iteritems(): 46 | yield k, v 47 | except AttributeError: 48 | for k, v in self.dictionary.items(): 49 | yield k, v 50 | 51 | 52 | class BaseCommandFactory(object): 53 | def __init__(self, controllers, logging=False, attach_drivers=True, wait_timeout=30, dummy_logger_prints=False, 54 | log_file='main_log.txt'): 55 | if type(controllers) != dict: 56 | raise TypeError('controllers must be a dictionary of controllers.') 57 | self.attach_drivers = attach_drivers 58 | self.log_file = log_file 59 | self.logging_val = logging 60 | if logging: 61 | # Set up the logger # 62 | logging.basicConfig(level=logging.DEBUG) 63 | logger = logging.getLogger(ThreadedCommandFactory.__name__) 64 | 65 | # Create log handler for writing to file. # 66 | handler = logging.FileHandler(self.log_file) 67 | handler.setLevel(logging.DEBUG) 68 | 69 | # Create a logging format. # 70 | formatter = logging.Formatter( 71 | '%(asctime)s - %(name)s.%(levelname)s: %(message)s', 72 | datefmt='%m-%d-%Y %H:%M:%S' 73 | ) 74 | handler.setFormatter(formatter) 75 | 76 | # Add the handlers to the logger. 77 | logger.addHandler(handler) 78 | self.logger = logger 79 | else: 80 | self.logger = DummyLogger(prints=dummy_logger_prints, level='DEBUG') 81 | 82 | self.controllers = controllers 83 | self.wait_timeout = wait_timeout 84 | self.pool = [] 85 | if attach_drivers: 86 | self.logger.info('Attaching drivers.') 87 | self._attach_drivers() 88 | self.logger.info('Drivers attached.') 89 | 90 | def __len__(self): 91 | return len(self.controllers) 92 | 93 | def __setitem__(self, key, value): 94 | self.controllers[key] = value 95 | 96 | def __getitem__(self, item): 97 | return self.controllers[item] 98 | 99 | def __delitem__(self, key): 100 | self._shutdown_driver(key) 101 | del self.controllers[key] 102 | 103 | def __iter__(self): 104 | try: 105 | for k, v in self.controllers.iteritems(): 106 | yield k, v 107 | except AttributeError: 108 | for k, v in self.controllers.items(): 109 | yield k, v 110 | 111 | def _attach_drivers(self): 112 | """ 113 | Attach separate drivers to each controller. 114 | 115 | :return: 116 | """ 117 | 118 | try: 119 | items = self.controllers.iteritems() 120 | except AttributeError: 121 | items = self.controllers.items() 122 | 123 | for key, args in items: 124 | if 'attach_driver' in dir(args): 125 | args.attach_driver(env_driver(env('BROWSER'))(), timeout=self.wait_timeout) 126 | 127 | def _shutdown_driver(self, key, retry=True): 128 | try: 129 | self.controllers[key].driver.close() 130 | except: 131 | pass 132 | try: 133 | self.controllers[key].driver.quit() 134 | except: 135 | pass 136 | if retry: 137 | self._shutdown_driver(key, retry=False) 138 | return self 139 | 140 | def shutdown(self): 141 | """ 142 | Shut down the WebDriver instances. 143 | 144 | :return: None 145 | """ 146 | 147 | try: 148 | items = self.controllers.iteritems() 149 | except AttributeError: 150 | items = self.controllers.items() 151 | 152 | for key, controller in items: 153 | self._shutdown_driver(key) 154 | return None 155 | 156 | 157 | class ThreadedCommandFactory(BaseCommandFactory): 158 | """ 159 | Used for creating threaded commands. Each controller must use a separate instance of WebDriver. 160 | 161 | Example: 162 | controllers = { 163 | 'google': google_controller, 164 | 'yahoo': yahoo_controller 165 | } 166 | thread_manager = ThreadedControllerManager(controllers, attach_drivers=True) 167 | """ 168 | 169 | def create_command(self, target, command_pack): 170 | """ 171 | Create threads for the given target function. The command pack is used to provide args 172 | to the target function. 173 | 174 | Example of basic setup: 175 | 176 | def do_login(controller, username, password): 177 | return controller.do_login(username, password) 178 | 179 | m = ThreadedCommandFactory({ 180 | 'google': google_controller, 181 | 'bing': bing_controller 182 | } 183 | ) 184 | cmd = do_login_command = { 185 | 'google': ('google_username', 'google_password'), 186 | 'bing': ('bing_username', 'bing_password') 187 | } 188 | cmd = m.create_command(do_login, do_login_command) 189 | cmd.start() 190 | 191 | :param target: function 192 | :param command_pack: dict 193 | :return: Command 194 | """ 195 | 196 | if type(command_pack) != dict: 197 | raise TypeError('Expected a dictionary for the command_pack variable.') 198 | 199 | self.logger.info('Creating threads.') 200 | 201 | try: 202 | items = command_pack.iteritems() 203 | except AttributeError: 204 | items = command_pack.items() 205 | 206 | for key, args in items: 207 | args = (self.controllers[key],) + args 208 | thread = threading.Thread(target=target, args=args) 209 | self.pool.append(thread) 210 | 211 | # Swap variables. 212 | thread_pool, self.pool = self.pool, [] 213 | 214 | return Command(self.logging_val, thread_pool, log_file=self.log_file) 215 | 216 | 217 | class CommandFactory(BaseCommandFactory): 218 | def create_command(self, target, command_pack, dummy_logger_prints=False): 219 | """ 220 | Create a command that will execute jobs one by one. 221 | 222 | :param target: function 223 | :param command_pack: dict 224 | :param dummy_logger_prints: bool 225 | :return: Command 226 | """ 227 | 228 | if type(command_pack) != dict: 229 | raise TypeError('Expected a dictionary for the command_pack variable.') 230 | 231 | self.logger.info('Creating command.') 232 | 233 | try: 234 | items = command_pack.iteritems() 235 | except AttributeError: 236 | items = command_pack.items() 237 | 238 | for key, args in items: 239 | args = (self.controllers[key],) + args 240 | thread = DummyThread(target=target, args=args) 241 | self.pool.append(thread) 242 | 243 | pool, self.pool = self.pool, [] 244 | 245 | return Command(self.logging_val, pool, log_file=self.log_file, dummy_logger_prints=dummy_logger_prints) 246 | 247 | 248 | class Command(object): 249 | def __init__(self, logging, pool, dummy_logger_prints=False, log_file='command_log.txt'): 250 | self.log_file = log_file 251 | if logging: 252 | # Set up the logger # 253 | logging.basicConfig(level=logging.DEBUG) 254 | logger = logging.getLogger(Command.__name__) 255 | 256 | # Create log handler for writing to file. # 257 | handler = logging.FileHandler(self.log_file) 258 | handler.setLevel(logging.DEBUG) 259 | 260 | # Create a logging format. # 261 | formatter = logging.Formatter( 262 | '%(asctime)s - %(name)s.%(levelname)s: %(message)s', 263 | datefmt='%m-%d-%Y %H:%M:%S' 264 | ) 265 | handler.setFormatter(formatter) 266 | 267 | # Add the handlers to the logger. 268 | logger.addHandler(handler) 269 | self.logger = logger 270 | else: 271 | self.logger = DummyLogger(prints=dummy_logger_prints, level='DEBUG') 272 | 273 | self.pool = pool 274 | 275 | def start(self, dump_pool=True, join_threads=True): 276 | """ 277 | Start the threads in the thread pool. 278 | 279 | :param dump_pool: bool 280 | :param join_threads: bool 281 | :return: self 282 | """ 283 | 284 | self.logger.info('Starting command.') 285 | for thread in self.pool: 286 | thread.start() 287 | if join_threads: 288 | for i, thread in enumerate(self.pool): 289 | thread.join() 290 | self.logger.debug('Thread #{} joined: {}'.format(i, thread)) 291 | if dump_pool: 292 | self.logger.debug('Dumping pool.') 293 | self.dump_pool() 294 | return self 295 | 296 | def dump_pool(self): 297 | """ 298 | Remove the threads from the thread pool. 299 | 300 | :return: self 301 | """ 302 | 303 | self.pool = [] 304 | self.logger.info('Threads dumped, 0 threads in pool.') 305 | return self 306 | -------------------------------------------------------------------------------- /Helpers/Controllers.py: -------------------------------------------------------------------------------- 1 | from random import uniform 2 | from time import sleep 3 | from .Commands import Kwargs 4 | from selenium.webdriver.support.wait import WebDriverWait 5 | from selenext.Environment import env 6 | 7 | 8 | # Generate a bunch of decorators for waiting up to 60 seconds. 9 | for __ in range(1, 61): 10 | exec('''def waits{}(function): 11 | """ 12 | A decorator for waiting {} second after function execution. Great for waiting between actions. 13 | 14 | Args: 15 | function: function 16 | 17 | Returns: 18 | wait_decorator: function 19 | """ 20 | 21 | def wait_decorator(*args, **kwargs): 22 | function_result = function(*args, **kwargs) 23 | sleep({}) 24 | return function_result 25 | return wait_decorator'''.format(__, __, __)) 26 | 27 | 28 | def randomly_waits(function): 29 | """ 30 | A decorator for waiting a random amount of time(0.1-3.01 seconds) after function execution. 31 | 32 | Args: 33 | function: function 34 | 35 | Returns: 36 | random_wait_decorator 37 | """ 38 | 39 | def random_wait_decorator(*args, **kwargs): 40 | # Execute function and grab result 41 | function_result = function(*args, **kwargs) 42 | # Sleep 43 | sleep(uniform(0.99, 3.01)) 44 | return function_result 45 | return random_wait_decorator 46 | 47 | 48 | def randomly_waits_env_range(function): 49 | """ 50 | A decorator for waiting a random amount of time after function execution. The range is defined 51 | in the project .env file using the `WAIT_LOW` & `WAIT_HIGH` keys. 52 | 53 | Args: 54 | function: function 55 | 56 | Returns: 57 | random_wait_decorator 58 | """ 59 | 60 | def random_wait_decorator(*args, **kwargs): 61 | # Execute function and grab result 62 | function_result = function(*args, **kwargs) 63 | # Sleep 64 | sleep(uniform(env('WAIT_LOW', func=float), env('WAIT_HIGH', func=float))) 65 | return function_result 66 | return random_wait_decorator 67 | 68 | 69 | def randomly_waits_glance(function): 70 | """ 71 | A decorator for waiting a random amount of time(4.99-15.01 seconds) after function execution. 72 | 73 | Args: 74 | function: function 75 | 76 | Returns: 77 | random_wait_decorator 78 | """ 79 | def random_wait_decorator(*args, **kwargs): 80 | function_result = function(*args, **kwargs) 81 | sleep(uniform(4.99, 15.01)) 82 | return function_result 83 | 84 | return random_wait_decorator 85 | 86 | 87 | def randomly_waits_browse(function): 88 | """ 89 | A decorator for waiting a random amount of time(14.99-30.01 seconds) after function execution. 90 | 91 | Args: 92 | function: function 93 | 94 | Returns: 95 | random_wait_decorator 96 | """ 97 | def random_wait_decorator(*args, **kwargs): 98 | function_result = function(*args, **kwargs) 99 | sleep(uniform(14.99, 30.01)) 100 | return function_result 101 | 102 | return random_wait_decorator 103 | 104 | 105 | @randomly_waits 106 | def human_fill(element, text): 107 | """ 108 | Send keys to an element and wait a random amount of time afterwards. 109 | 110 | Args: 111 | element: 112 | text: 113 | 114 | Returns: 115 | 116 | """ 117 | 118 | return element.send_keys(text) 119 | 120 | 121 | @randomly_waits 122 | def human_click(element): 123 | """ 124 | Click on an element and wait a random amount of time afterwards. 125 | 126 | Args: 127 | element: 128 | 129 | Returns: 130 | 131 | """ 132 | 133 | return element.click() 134 | 135 | 136 | def has_kwargs(function): 137 | """ 138 | Decorator for passing **kwargs with your *args through the use of 139 | the Kwargs object. Note that only 1 Kwargs object can be passed 140 | with the *args to the decorated function. Any additional Kwargs 141 | objects will be ignored. 142 | 143 | Args: 144 | function: function 145 | 146 | Returns: 147 | kwargsable: function 148 | """ 149 | 150 | # Define a local function within has_kwargs 151 | def kwargsable(*args, **kwargs): 152 | args = list(args) 153 | # Extract kwargs from args. 154 | d_args = [thing for thing in args if type(thing) == Kwargs] 155 | 156 | # Check if default kwargs should be used. 157 | kwargs_len = len(kwargs) 158 | if kwargs_len == 0: 159 | # If there is a Kwargs instance in the list then process the kwargs 160 | try: 161 | d_args = d_args[0] # Pop the Kwargs instance off the list 162 | except IndexError: 163 | # func is equal to some_controller_func 164 | return function(*args, **{}) 165 | 166 | # Remove kwargs from args 167 | args = [item for item in args if type(item) != Kwargs] 168 | 169 | # Get kwargs from the dictionary args if default kwargs aren't used 170 | if kwargs_len == 0: 171 | kwargs = {k: v for (k, v) in d_args} 172 | # Execute the func, which is some_controller_func 173 | return function(*args, **kwargs) 174 | 175 | # Return the local callable function `_process` 176 | return kwargsable 177 | 178 | 179 | class PageController(object): 180 | """ 181 | Standard controller for controlling a page. 182 | """ 183 | def __init__(self, page): 184 | self.page = page 185 | 186 | @randomly_waits 187 | def fill(self, element, text): 188 | """ 189 | Send keys to an element, then randomly wait. 190 | 191 | Args: 192 | element: 193 | text: 194 | 195 | Returns: 196 | self 197 | """ 198 | 199 | element.send_keys(text) 200 | return self 201 | 202 | @randomly_waits 203 | def click(self, element): 204 | """ 205 | Click on an element, then randomly wait. 206 | 207 | Args: 208 | element: 209 | 210 | Returns: 211 | self 212 | """ 213 | 214 | element.click() 215 | return self 216 | 217 | 218 | class LoginPageController(PageController): 219 | """ 220 | A generic controller for logging in to a webpage. It uses a `Page` object and a 221 | `WebDriverWait` object to do the job. 222 | """ 223 | def __init__(self, page): 224 | super(LoginPageController, self).__init__(page) 225 | 226 | @randomly_waits 227 | def do_login(self, username, password, remember_me=False, stay_logged_in=False, wait_func=False, navigate=False): 228 | """ 229 | Log in to a web page using the given `Page` object as a template. The `username` and `password` 230 | attributes must be set on the `Page` object along with an attribute that defines a `logged_in` 231 | `PageState`. If you want to use the `remember_me` keyword arg, you must also have that attribute 232 | set on the `Page` object. If you want the function to navigate to the login page and wait for the 233 | login form to be presented you need to set the `login_form_displayed` `PageState` and 234 | 235 | Args: 236 | username: str 237 | password: str 238 | remember_me: bool 239 | stay_logged_in: bool 240 | wait_func: func, bool 241 | navigate: bool 242 | 243 | Returns: 244 | self 245 | """ 246 | 247 | if navigate: 248 | self.page.get(self.page.login_page) 249 | self.page.state.login_form_displayed.wait() 250 | 251 | self.fill(self.page.username, username) 252 | self.fill(self.page.password, password) 253 | 254 | # Click remember me checkbox 255 | if remember_me: 256 | self.click(self.page.remember_me) 257 | 258 | # Stay logged in checkbox 259 | if stay_logged_in: 260 | self.click(self.page.stay_logged_in) 261 | 262 | # Click login button 263 | self.page.login_button.click() 264 | 265 | # Call the wait function. 266 | if wait_func: 267 | self.wait.until(wait_func) 268 | 269 | self.page.state.logged_in.wait() 270 | 271 | return self 272 | 273 | 274 | class SearchPageController(PageController): 275 | """ 276 | A generic controller for a simple search page. Just pass in your `Page` object 277 | with a `search_input` and a `search_button` defined, along with a `WebDriverWait` 278 | object. You can call the `perform_search` method and it will fill in the input 279 | using the `Page` object passed in, then click the search button using the `Page` 280 | object. You can also pass in the `wait_func` keyword to set a wait function that 281 | should be called before returning control. 282 | """ 283 | def __init__(self, page, wait): 284 | super(SearchPageController, self).__init__(page, wait=wait) 285 | 286 | def perform_search(self, term, wait_func=False): 287 | """ 288 | Search for the given term using the page object and the wait object if it is 289 | set. 290 | 291 | Args: 292 | term: 293 | wait_func: 294 | 295 | Returns: 296 | self 297 | """ 298 | self.fill(self.page.search_input, term) 299 | self.page.search_button.click() 300 | 301 | if wait_func: 302 | self.wait.until(wait_func) 303 | 304 | return self 305 | 306 | 307 | class IndependentController(object): 308 | """ 309 | The base class for a threaded controller setup. 310 | """ 311 | def attach_driver(self, driver, timeout=30): 312 | """ 313 | Drivers must be attached after the controller has been instantiated so each controller has 314 | its own driver. This will also attach a WebDriverWait to the class instance. 315 | 316 | Args: 317 | driver: Selenium WebDriver 318 | timeout: int 319 | 320 | Returns: 321 | self 322 | """ 323 | 324 | self.driver = driver 325 | self.wait = WebDriverWait(self.driver, timeout) 326 | return self 327 | 328 | def call(self, method_name, *args, **kwargs): 329 | """ 330 | Call one of the controller's methods with the given *args or **kwargs 331 | 332 | Args: 333 | method_name: 334 | *args: 335 | **kwargs: 336 | 337 | Returns: 338 | method results 339 | """ 340 | 341 | # Grab the method from self. 342 | method = getattr(self, method_name) 343 | 344 | # Determine how to call the method and return the results. 345 | if args and kwargs: 346 | return method(*args, **kwargs) 347 | elif args and not kwargs: 348 | return method(*args) 349 | elif kwargs and not args: 350 | return method(**kwargs) 351 | else: 352 | return method() 353 | -------------------------------------------------------------------------------- /LICENCE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2016 py-am-i 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Helpers/Requests/__init__.py: -------------------------------------------------------------------------------- 1 | from uuid import uuid4 2 | import requests 3 | from bs4 import BeautifulSoup 4 | from lxml import etree 5 | from json import loads 6 | from .Exceptions import NoSuchElementException 7 | 8 | 9 | class WebElement(object): 10 | """ 11 | The Requests WebElement behaves very much like a selenium WebElement 12 | except that it does not provide any methods for interaction like 13 | clicking and sending keys. DOM Traversal and data extraction 14 | methods are provided, and attributes like WebElement.text 15 | are available. 16 | """ 17 | def __init__(self, soup, response, url, parent=None): 18 | 19 | self.soup = None 20 | self.make_soup(soup, response) 21 | 22 | self.tag_name = self.soup.name if self.soup is not None else None 23 | self.text = self.soup.text if self.soup is not None else None 24 | try: 25 | self.content = self.soup.content if self.soup is not None else None 26 | except AttributeError: 27 | self.content = None 28 | self.current_response = response 29 | self.current_url = url 30 | self.size = 0, 0 31 | self.location = 0, 0 32 | self.rect = 0, 0 33 | self.screenshot_as_png = None 34 | self.screenshot_as_base64 = None 35 | self.parent = parent 36 | self.location_once_scrolled_into_view = 0, 0 37 | self.id = uuid4() 38 | 39 | def __getitem__(self, item): 40 | return self.soup[item] 41 | 42 | def make_soup(self, soup, response): 43 | if soup is None: 44 | if response is not None: 45 | soup = BeautifulSoup(response, 'html.parser') 46 | tag = None 47 | for thing in soup.children: 48 | tag = getattr(soup, thing.name) 49 | # If we don't have a valid tag object, then continue looking. 50 | if not tag or tag is None: 51 | continue 52 | break 53 | 54 | self.soup = tag 55 | else: 56 | self.soup = soup 57 | 58 | return self 59 | 60 | def get_attribute(self, attribute, **kwargs): 61 | return self.soup[attribute] 62 | 63 | def value_of_css_property(self, name): 64 | return None 65 | 66 | def find_element_by_id(self, element_id): 67 | """ 68 | Find an element in the DOM by id. 69 | 70 | Args: 71 | element_id: 72 | 73 | Returns: 74 | WebElement 75 | """ 76 | 77 | ele = self.soup.find(id=element_id) 78 | if ele is None: 79 | raise NoSuchElementException('The element could not be located by id: {}'.format(element_id)) 80 | 81 | return WebElement(ele, self.current_response, self.current_url, parent=self.id) 82 | 83 | def find_element_by_name(self, name): 84 | """ 85 | Find an element in the DOM by name. 86 | 87 | Args: 88 | name: 89 | 90 | Returns: 91 | WebElement 92 | """ 93 | ele = self.soup.find(attrs={'name': name}) 94 | if ele is None: 95 | raise NoSuchElementException('The element could not be located by name: {}'.format(name)) 96 | 97 | return WebElement(ele, self.current_response, self.current_url, parent=self.id) 98 | 99 | def find_element_by_class_name(self, class_name): 100 | """ 101 | Find an element in the DOM by class name. 102 | 103 | Args: 104 | class_name: 105 | 106 | Returns: 107 | WebElement 108 | """ 109 | ele = self.soup.find(attrs={'class': class_name}) 110 | if ele is None: 111 | raise NoSuchElementException('The element could not be located by class name: {}'.format(class_name)) 112 | 113 | return WebElement(ele, self.current_response, self.current_url, parent=self.id) 114 | 115 | def find_element_by_tag_name(self, tag_name): 116 | """ 117 | Find an element by tag name. 118 | 119 | Args: 120 | tag_name: 121 | 122 | Returns: 123 | WebElement 124 | """ 125 | 126 | ele = self.soup.find(tag_name) 127 | if ele is None: 128 | raise NoSuchElementException('The element could not be located by tag name: {}'.format(tag_name)) 129 | 130 | return WebElement(ele, self.current_response, self.current_url, parent=self.id) 131 | 132 | def find_element_by_css_selector(self, selector): 133 | """ 134 | Find an element in the DOM by css selector. 135 | 136 | Args: 137 | selector: 138 | 139 | Returns: 140 | WebElement 141 | """ 142 | ele = self.soup.select(selector)[0] 143 | if ele is None: 144 | raise NoSuchElementException('The element could not be located by css selector: {}'.format(selector)) 145 | 146 | return WebElement(ele, self.current_response, self.current_url, parent=self.id) 147 | 148 | def find_element_by_xpath(self, xpath): 149 | """ 150 | Find an element in the DOM by xpath. 151 | 152 | Args: 153 | xpath: 154 | 155 | Returns: 156 | WebElement 157 | """ 158 | 159 | tree = etree.fromstring(self.current_response, etree.HTMLParser()) 160 | element = tree.xpath(xpath)[0] 161 | self.current_response = etree.tostring(element) 162 | # print(self.current_response) 163 | return WebElement(None, self.current_response, self.current_url, parent=self.id) 164 | 165 | # %%%%%%%%%%%%%%%%%%% Find elements %%%%%%%%%%%%%%%%%%% # 166 | def find_elements_by_id(self, element_id): 167 | """ 168 | Find all elements in the DOM matching the given id. 169 | 170 | Args: 171 | element_id: 172 | 173 | Returns: 174 | list 175 | """ 176 | 177 | resp = self.current_response 178 | url = self.current_url 179 | return [WebElement(element, resp, url, parent=self.id) for element in self.soup.find_all(id=element_id)] 180 | 181 | def find_elements_by_name(self, name): 182 | """ 183 | Find all elements in the DOM matching the given name. 184 | 185 | Args: 186 | name: 187 | 188 | Returns: 189 | list 190 | """ 191 | 192 | resp = self.current_response 193 | url = self.current_url 194 | return [WebElement(element, resp, url, parent=self.id) for element in self.soup.find_all(attrs={'name': name})] 195 | 196 | def find_elements_by_class_name(self, name): 197 | """ 198 | Find all elements in the DOM matching the given class name. 199 | 200 | Args: 201 | name: 202 | 203 | Returns: 204 | list 205 | """ 206 | resp = self.current_response 207 | url = self.current_url 208 | return [WebElement(element, resp, url, parent=self.id) for element in self.soup.find_all(attrs={'class': name})] 209 | 210 | def find_elements_by_tag_name(self, name): 211 | """ 212 | Find all elements in the DOM matching the given tag name. 213 | 214 | Args: 215 | name: 216 | 217 | Returns: 218 | list 219 | """ 220 | 221 | resp = self.current_response 222 | url = self.current_url 223 | return [WebElement(element, resp, url, parent=self.id) for element in self.soup.find_all(name)] 224 | 225 | def find_elements_by_css_selector(self, selector): 226 | """ 227 | Find all elements in the DOM matching the given css selector. 228 | 229 | Args: 230 | selector: 231 | 232 | Returns: 233 | list 234 | """ 235 | 236 | resp = self.current_response 237 | url = self.current_url 238 | return [WebElement(element, resp, url, parent=self.id) for element in self.soup.select(selector)] 239 | 240 | def find_elements_by_xpath(self, xpath): 241 | """ 242 | Find all elements in the DOM matching the given xpath. 243 | 244 | Args: 245 | xpath: 246 | 247 | Returns: 248 | list 249 | """ 250 | 251 | tree = etree.fromstring(self.current_response, etree.HTMLParser()) 252 | elements = tree.xpath(xpath) 253 | 254 | output_elements = [] 255 | for element in elements: 256 | resp = etree.tostring(element) 257 | output_elements.append(WebElement(None, resp, self.current_url, parent=self.id)) 258 | 259 | return output_elements 260 | 261 | 262 | class WebHistory(object): 263 | def __init__(self): 264 | self.index = -1 265 | self.history = [] 266 | 267 | def __getitem__(self, item): 268 | return self.history[item] 269 | 270 | def current_url(self): 271 | """ 272 | Get the current index's url. 273 | 274 | Returns: 275 | str 276 | """ 277 | 278 | return self.history[self.index] 279 | 280 | def register(self, url): 281 | """ 282 | Register a url with the history list. 283 | 284 | Args: 285 | url: 286 | str 287 | Returns: 288 | self 289 | """ 290 | 291 | self.history.append(url) 292 | self.index += 1 293 | return self 294 | 295 | def back(self): 296 | """ 297 | Move the pointer back in the history and return the url for that new index. 298 | 299 | Returns: 300 | str 301 | """ 302 | 303 | self.index -= 1 304 | return self.history[self.index] 305 | 306 | def forward(self): 307 | """ 308 | Move the pointer forward in the history and return the url for that new index. 309 | 310 | Returns: 311 | str 312 | """ 313 | 314 | self.index += 1 315 | return self.history[self.index] 316 | 317 | 318 | class WebReader(WebElement): 319 | def __init__(self): 320 | self.soup = None 321 | self.current_response = None 322 | self.current_url = None 323 | 324 | self.requests = requests 325 | self.web_history = WebHistory() 326 | 327 | self.size = 0, 0 328 | self.location = 0, 0 329 | self.rect = 0, 0 330 | self.screenshot_as_png = None 331 | self.screenshot_as_base64 = None 332 | self.location_once_scrolled_into_view = 0, 0 333 | self.id = uuid4() 334 | 335 | super(WebReader, self).__init__(None, None, None) 336 | 337 | def back(self): 338 | """ 339 | Navigate to the last place in the web history. 340 | 341 | Returns: 342 | self 343 | """ 344 | 345 | self.get(self.web_history.back()) 346 | return self 347 | 348 | def close(self): 349 | """ 350 | Does nothing. Is just a placeholder method. 351 | 352 | Returns: 353 | self 354 | """ 355 | 356 | return self 357 | 358 | def forward(self): 359 | """ 360 | Navigate to the next place in the web history. 361 | 362 | Returns: 363 | self 364 | """ 365 | 366 | self.get(self.web_history.forward()) 367 | return self 368 | 369 | def get(self, url, headers=None, add_to_history=True, cookies=None): 370 | """ 371 | Get a response for the given url. 372 | 373 | Args: 374 | url: 375 | headers: 376 | add_to_history: 377 | cookies: 378 | 379 | Returns: 380 | str 381 | """ 382 | 383 | if add_to_history: 384 | self.web_history.register(url) 385 | self.current_url = url 386 | 387 | if headers is None: 388 | headers = {} 389 | 390 | self.current_response = self.requests.get( 391 | url, 392 | headers=headers, 393 | cookies=cookies if cookies else {} 394 | ).text.strip() 395 | 396 | # Check for json response and if so, then return a dictionary 397 | # and set the current response to the dictionary. 398 | if self.current_response[0] == '{': 399 | if hasattr(self.current_response, 'json'): 400 | self.current_response = self.current_response.json() 401 | self.current_response = loads(self.current_response) 402 | self.soup = None 403 | return self.current_response 404 | 405 | # Make soup 406 | self.soup = BeautifulSoup(self.current_response, 'html.parser') 407 | 408 | return self.current_response 409 | 410 | def quit(self): 411 | """ 412 | Does nothing. Is just a placeholder method. 413 | 414 | Returns: 415 | self 416 | """ 417 | 418 | return self 419 | 420 | def refresh(self): 421 | """ 422 | Grab the current_url again. 423 | 424 | Returns: 425 | self 426 | """ 427 | 428 | self.get(self.web_history.current_url(), add_to_history=False) 429 | return self 430 | -------------------------------------------------------------------------------- /Helpers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from time import sleep 3 | from selenium.common.exceptions import NoSuchElementException, TimeoutException 4 | from .Requests import WebReader 5 | from json import loads 6 | import re 7 | 8 | 9 | class PageState(object): 10 | """ 11 | Object for holding the definition of a web page's state, waiting for the state, 12 | and checking to see if the web page is in the defined state. 13 | """ 14 | def __init__(self, driver, elements, state_dict): 15 | self._driver = driver 16 | self._elements = elements 17 | self._state_dict = state_dict 18 | 19 | def __call__(self, *args, **kwargs): 20 | """ 21 | Return True if the page matches the given state and False if not. 22 | 23 | :param args: 24 | :param kwargs: 25 | :return: bool 26 | """ 27 | 28 | for aen in self._pull_from_dict(self._state_dict, 'exists'): 29 | if not self._get_page_element(aen).exists(): 30 | return False 31 | 32 | for den in self._pull_from_dict(self._state_dict, 'absent'): 33 | if self._get_page_element(den).exists(): 34 | return False 35 | 36 | for nden in self._pull_from_dict(self._state_dict, 'not_displayed'): 37 | nden_page_element = self._get_page_element(nden) 38 | if not nden_page_element.exists(): 39 | return False 40 | if nden_page_element.exists() and nden_page_element.is_displayed(): 41 | return False 42 | 43 | for disen in self._pull_from_dict(self._state_dict, 'displayed'): 44 | disen_page_element = self._get_page_element(disen) 45 | disen_exists = disen_page_element.exists() 46 | if not disen_exists: 47 | return False 48 | elif disen_exists and not disen_page_element().is_displayed(): 49 | return False 50 | 51 | for enabled_en in self._pull_from_dict(self._state_dict, 'enabled'): 52 | enabled_page_element = self._get_page_element(enabled_en) 53 | enabled_exists = enabled_page_element.exists() 54 | if not enabled_exists: 55 | return False 56 | elif enabled_exists and not enabled_page_element().is_enabled(): 57 | return False 58 | 59 | for enabled_en in self._pull_from_dict(self._state_dict, 'disabled'): 60 | enabled_page_element = self._get_page_element(enabled_en) 61 | enabled_exists = enabled_page_element.exists() 62 | if not enabled_exists: 63 | return False 64 | elif enabled_exists and enabled_page_element().is_enabled(): 65 | return False 66 | 67 | return True 68 | 69 | def wait(self, timeout=30): 70 | """ 71 | Wait for the conditions set in the state dict to be met. 72 | 73 | Args: 74 | timeout: 75 | 76 | Returns: 77 | self 78 | """ 79 | 80 | page_settings = self._state_dict 81 | 82 | exists = self._pull_from_dict(page_settings, 'exists') 83 | absent = self._pull_from_dict(page_settings, 'absent') 84 | not_displayed = self._pull_from_dict(page_settings, 'not_displayed') 85 | displayed = self._pull_from_dict(page_settings, 'displayed') 86 | enabled = self._pull_from_dict(page_settings, 'enabled') 87 | disabled = self._pull_from_dict(page_settings, 'disabled') 88 | 89 | for appears_element_name in exists: 90 | self._get_page_element(appears_element_name).wait_exists(timeout=timeout) 91 | 92 | for displayed_element_name in displayed: 93 | self._get_page_element(displayed_element_name).wait_displayed(timeout=timeout) 94 | 95 | for enabled_element_name in enabled: 96 | self._get_page_element(enabled_element_name).wait_enabled(timeout=timeout) 97 | 98 | for disabled_element_name in disabled: 99 | self._get_page_element(disabled_element_name).wait_disabled(timeout=timeout) 100 | 101 | for not_displayed_element_name in not_displayed: 102 | self._get_page_element(not_displayed_element_name).wait_not_displayed(timeout=timeout) 103 | 104 | for disappear_element_name in absent: 105 | self._get_page_element(disappear_element_name).wait_absent(timeout=timeout) 106 | 107 | return self 108 | 109 | def _pull_from_dict(self, settings, name): 110 | try: 111 | return settings[name] 112 | except KeyError: 113 | return [] 114 | 115 | def _get_page_element(self, name): 116 | """ 117 | Get a PageElement from the datastore using the given name. 118 | Args: 119 | name: 120 | 121 | Returns: 122 | 123 | """ 124 | 125 | ele = self._elements[name] 126 | if isinstance(ele, PageElement): 127 | output_element = self._elements[name] 128 | else: 129 | output_element = PageElement(self._driver, ele) 130 | 131 | return output_element 132 | 133 | 134 | class PageStateContainer(object): 135 | """ 136 | Container for holding PageState objects and allowing dynamic access to them as attributes. 137 | """ 138 | def __init__(self, driver, elements, state_dict): 139 | self._driver = driver 140 | self._elements = elements 141 | self._state_dict = state_dict 142 | self._handle_state_dict(state_dict) 143 | 144 | def _handle_state_dict(self, state_dict): 145 | """ 146 | Handle the definition of the web page states by setting up the _states attributes and 147 | dynamically setting up the rest of the attributes in the dictionary. 148 | 149 | Args: 150 | state_dict: 151 | dict 152 | Returns: 153 | self 154 | """ 155 | try: 156 | state_dict_iterator = state_dict.iteritems() 157 | except AttributeError: 158 | state_dict_iterator = state_dict.items() 159 | 160 | self._states = {} 161 | 162 | for page_name, settings in state_dict_iterator: 163 | self._states[page_name] = PageState(self._driver, self._elements, settings) 164 | setattr(self, page_name, self._states[page_name]) 165 | 166 | return self 167 | 168 | def wait(self, state_name, timeout=30): 169 | """ 170 | Wait for the web page to be in the defined state(by state_name). Only invoke this 171 | method when you expect the web page to be in that current state. 172 | 173 | Args: 174 | state_name: 175 | str 176 | timeout: 177 | None or int 178 | 179 | Returns: 180 | self 181 | """ 182 | 183 | state = getattr(self, state_name) 184 | state.wait(timeout=timeout) 185 | 186 | return self 187 | 188 | 189 | class PageElement(object): 190 | """ 191 | The PageElement object holds a selenium WebDriver instance and directions on how to 192 | find a given element on the page. When an instance of the PageElement object has 193 | it's __call__ method invoked it will use the WebDriver instance to look up and 194 | return the WebElement it finds. If the element cannot be found, it will raise 195 | the same selenium error. 196 | """ 197 | def __init__(self, driver, element_dict): # , parent=None): 198 | self.driver = driver 199 | self.element_dict = element_dict 200 | self.parent = None 201 | # self.parent = parent 202 | 203 | try: 204 | bind_path = element_dict['bind'] 205 | except KeyError: 206 | bind_path = False 207 | 208 | # Handle any binds. 209 | if bind_path: 210 | new_func = self._get_bind(bind_path) 211 | # Set the bind to the object we pulled in, instead of the string that locates the object. 212 | element_dict['bind'] = new_func 213 | 214 | try: 215 | frame_location = element_dict['frame'] 216 | except KeyError: 217 | frame_location = False 218 | 219 | if frame_location: 220 | frame_location = self._handle_frame(frame_location) 221 | element_dict['frame'] = frame_location 222 | 223 | # Set up the parent element 224 | try: 225 | parent_location = element_dict['parent'] 226 | except KeyError: 227 | parent_location = False 228 | 229 | if parent_location: 230 | parent_location = self._handle_parent(parent_location) 231 | element_dict['parent'] = parent_location 232 | 233 | self._handle_element_dict(element_dict) 234 | 235 | def _handle_parent(self, parent_location): 236 | """ 237 | Return a parent element based on teh parent location dictionary. 238 | 239 | Args: 240 | parent_location: 241 | dict 242 | 243 | Returns: 244 | ParentElement 245 | """ 246 | 247 | if not isinstance(parent_location, ParentElement): 248 | element = ParentElement(self.driver, parent_location) 249 | else: 250 | element = parent_location 251 | return element 252 | 253 | def __call__(self, *args, **kwargs): 254 | """ 255 | Lookup the WebElement using the WebDriver. It will navigate parent elements 256 | and even frames. Frames can even be selected using parent elements(in the 257 | JSON). 258 | 259 | Args: 260 | *args: 261 | **kwargs: 262 | 263 | Returns: 264 | WebElement 265 | """ 266 | 267 | if not isinstance(self, Frame) and not isinstance(self.driver, WebReader): 268 | self.driver.switch_to_default_content() 269 | 270 | # Navigate to the given frame. 271 | if hasattr(self, 'frame'): 272 | frame = getattr(self, 'frame') 273 | self.driver.switch_to.frame(frame()) 274 | 275 | # Get the selenium driver method that is used to look up the element. 276 | # find_element_by_* 277 | lookup_method = self._get_lookup_method() 278 | 279 | # Get the web element using the given selector and lookup method. 280 | output = lookup_method(getattr(self, 'selector')) 281 | if hasattr(self, 'index') and type(output) == list: 282 | output = output[int(self.index)] 283 | 284 | # Since parents are used to locate elements, we 285 | # don't need to worry about any bindings. 286 | if isinstance(self, ParentElement): 287 | return output 288 | 289 | # Run any regex search statements on the text output. 290 | if hasattr(self, 'regex'): 291 | regex_flags = 0 if not hasattr(self, 'regex_flags') else getattr(self, 'regex_flags') 292 | # Handle case where the `multiple` flag is set in the JSON and the `regex` flag is set. 293 | if type(output) == list: 294 | output = [re.search(getattr(self, 'regex'), t.text, flags=regex_flags) for t in output] 295 | output = [match.group() for match in output if match is not None] 296 | else: 297 | # Since the `multiple` flag is not set we can go ahead and do the regex search on 298 | # the output text. 299 | output = re.search(getattr(self, 'regex'), output.text, flags=regex_flags) 300 | if output is not None: 301 | output = output.group() 302 | 303 | # Run any regex findall statements on the text output. 304 | if hasattr(self, 'regex_all'): 305 | # Handle the case where the `multiple` flag is set in the JSON and the `regex_all` flag is set. 306 | regex_flags = 0 if not hasattr(self, 'regex_flags') else getattr(self, 'regex_flags') 307 | if type(output) == list: 308 | output = [re.findall(getattr(self, 'regex_all'), t.text, flags=regex_flags) for t in output] 309 | else: 310 | output = re.findall(getattr(self, 'regex_all'), output.text, flags=regex_flags) 311 | 312 | # Bind the text if needed. 313 | if hasattr(self, 'bind'): 314 | binding = getattr(self, 'bind') 315 | if type(output) == list: 316 | output = [binding(t.text) for t in output] 317 | else: 318 | output = binding(output.text) 319 | return output 320 | 321 | def exists(self): 322 | """ 323 | Return true if the element exists and/or the given element contains text that matches 324 | the given regex pattern(if set). 325 | 326 | Returns: 327 | bool 328 | """ 329 | 330 | try: 331 | lookup_method = self._get_lookup_method() 332 | ele = lookup_method(getattr(self, 'selector')) 333 | 334 | if ele: 335 | # Check if we have a list of elements instead of a single element. 336 | # if type(ele) != list: 337 | # # Check the regex attribute to see if we need to extract anything. 338 | # if hasattr(self, 'regex'): 339 | # # Run a regex search to see if we get any results. 340 | # if re.search(getattr(self, 'regex'), ele.text) is None: 341 | # return False 342 | # # If we have a regex_all flag then we need to run the re.findall method on the pattern 343 | # # and element text to see if we get a match. 344 | # elif hasattr(self, 'regex_all'): 345 | # if not re.findall(getattr(self, 'regex_all'), ele.text): 346 | # return False 347 | return True 348 | return False 349 | except NoSuchElementException: 350 | return False 351 | 352 | def wait_disabled(self, timeout=30): 353 | """ 354 | Wait for the element to be disabled. 355 | 356 | Args: 357 | timeout: 358 | None or int 359 | 360 | Returns: 361 | self 362 | """ 363 | 364 | wait_time = 0 365 | 366 | web_element = self() 367 | 368 | if type(web_element) == list: 369 | for thing in web_element: 370 | while thing.is_enabled(): 371 | sleep(1) 372 | wait_time += 1 373 | if timeout is not None: 374 | if wait_time >= timeout: 375 | raise TimeoutException() 376 | return self 377 | 378 | while web_element.is_enabled(): 379 | sleep(1) 380 | wait_time += 1 381 | if timeout is not None: 382 | if wait_time >= timeout: 383 | raise TimeoutException() 384 | return self 385 | 386 | def wait_enabled(self, timeout=30): 387 | """ 388 | Wait for the element to be enabled. 389 | 390 | Args: 391 | timeout: 392 | None or int 393 | 394 | Returns: 395 | self 396 | """ 397 | 398 | wait_time = 0 399 | 400 | web_element = self() 401 | 402 | if type(web_element) == list: 403 | for thing in web_element: 404 | while not thing.is_enabled(): 405 | sleep(1) 406 | wait_time += 1 407 | if timeout is not None: 408 | if wait_time >= timeout: 409 | raise TimeoutException() 410 | return self 411 | 412 | while not web_element.is_enabled(): 413 | sleep(1) 414 | wait_time += 1 415 | if timeout is not None: 416 | if wait_time >= timeout: 417 | raise TimeoutException() 418 | return self 419 | 420 | def wait_not_displayed(self, timeout=30): 421 | """ 422 | Wait for the element to not be displayed any longer. 423 | 424 | Args: 425 | timeout: 426 | None or int 427 | 428 | Returns: 429 | self 430 | """ 431 | 432 | wait_time = 0 433 | 434 | ele = self() 435 | 436 | if type(ele) == list: 437 | for thing in ele: 438 | while thing.is_displayed(): 439 | sleep(1) 440 | wait_time += 1 441 | if timeout is not None: 442 | if wait_time >= timeout: 443 | raise TimeoutException() 444 | return self 445 | 446 | while ele.is_displayed(): 447 | sleep(1) 448 | wait_time += 1 449 | if timeout is not None: 450 | if wait_time >= timeout: 451 | raise TimeoutException() 452 | return self 453 | 454 | def wait_displayed(self, timeout=None): 455 | """ 456 | Wait for the element to be displayed. 457 | 458 | Args: 459 | timeout: 460 | None or int 461 | 462 | Returns: 463 | self 464 | """ 465 | 466 | wait_time = 0 467 | 468 | ele = self() 469 | 470 | if type(ele) == list: 471 | for thing in ele: 472 | while not thing.is_displayed(): 473 | sleep(1) 474 | wait_time += 1 475 | if timeout is not None: 476 | if wait_time >= timeout: 477 | raise TimeoutException() 478 | return self 479 | 480 | while not ele.is_displayed(): 481 | sleep(1) 482 | wait_time += 1 483 | if timeout is not None: 484 | if wait_time >= timeout: 485 | raise TimeoutException() 486 | return self 487 | 488 | def wait_exists(self, timeout=None): 489 | """ 490 | Wait for the element to appear in the DOM. 491 | 492 | Args: 493 | timeout: 494 | None or int 495 | 496 | Returns: 497 | self 498 | """ 499 | 500 | wait_time = 0 501 | 502 | while not self.exists(): 503 | sleep(1) 504 | wait_time += 1 505 | if timeout is not None: 506 | if wait_time >= timeout: 507 | raise TimeoutException() 508 | return self 509 | 510 | def wait_absent(self, timeout=None): 511 | """ 512 | Wait for the element to no longer appear in the DOM. 513 | 514 | Args: 515 | timeout: 516 | None or int 517 | 518 | Returns: 519 | self 520 | """ 521 | 522 | wait_time = 0 523 | while self.exists(): 524 | sleep(1) 525 | wait_time += 1 526 | if timeout is not None: 527 | if wait_time >= timeout: 528 | raise TimeoutException() 529 | return self 530 | 531 | def _handle_element_dict(self, element_dict): 532 | """ 533 | Set attributes based on the given dict. 534 | 535 | Args: 536 | element_dict: 537 | dict 538 | Returns: 539 | self 540 | """ 541 | 542 | try: 543 | iterable = element_dict.iteritems() 544 | except AttributeError: 545 | iterable = element_dict.items() 546 | 547 | for k, v in iterable: 548 | setattr(self, k, v) 549 | 550 | return self 551 | 552 | def _get_lookup_method(self): 553 | """ 554 | Return the method used for looking up the element. 555 | 556 | Returns: 557 | WebDriver method 558 | """ 559 | 560 | if self.parent is not None: 561 | parent = getattr(self, 'parent')() 562 | else: 563 | parent = self.driver 564 | # Handle finding multiple elements 565 | if hasattr(self, 'multiple'): 566 | lookup_method = getattr(parent, 'find_elements_by_{}'.format(getattr(self, 'lookup_method'))) 567 | else: 568 | lookup_method = getattr(parent, 'find_element_by_{}'.format(getattr(self, 'lookup_method'))) 569 | 570 | return lookup_method 571 | 572 | def _get_bind(self, bind_path): 573 | """ 574 | Get the callable object based on the bind path given. 575 | 576 | Args: 577 | bind_path: 578 | str or int 579 | Returns: 580 | callable 581 | """ 582 | 583 | # Handle direct imports 584 | if type(bind_path) == list: 585 | imp, obj = bind_path 586 | p = __import__(imp, fromlist=['']) 587 | new_func = getattr(p, obj) 588 | # Check if the bind_path exists in the builtins and use that if it does. 589 | elif bind_path in __builtins__: 590 | new_func = __builtins__[bind_path] 591 | # Check to see if it's drilling into a module function or class. 592 | elif '.' in bind_path: 593 | steps = bind_path.split('.') 594 | new_func = __import__(steps[0], fromlist=['']) 595 | for step in steps[1:]: 596 | new_func = getattr(new_func, step) 597 | # If none of those things apply then try a regular import 598 | else: 599 | try: 600 | new_func = __import__(bind_path, fromlist=['']) 601 | except ImportError: 602 | raise ImportError('Could not find the object to bind to: {}'.format(bind_path)) 603 | 604 | return new_func 605 | 606 | def _handle_frame(self, frame_location): 607 | """ 608 | Return a Frame instance based on the given frame location. 609 | 610 | Args: 611 | frame_location: 612 | dict 613 | Returns: 614 | Frame 615 | """ 616 | 617 | return Frame(self.driver, frame_location) 618 | 619 | 620 | class ParentElement(PageElement): 621 | """ 622 | Needed so the PageElement object knows to handle these instances differently. 623 | """ 624 | pass 625 | 626 | 627 | class Frame(PageElement): 628 | """ 629 | Needed so the PageElement object knows to handle these instances differently. 630 | """ 631 | pass 632 | 633 | 634 | class View(object): 635 | """ 636 | The View object is a container for view/page's element dict. It sets up 637 | everything so that the page object can access what it needs. 638 | """ 639 | def __init__(self, driver, view_dict): 640 | self.driver = driver 641 | self.view_dict = view_dict 642 | self._handle_view_dict(view_dict) 643 | 644 | def get(self, item): 645 | """ 646 | Wrapper around WebDriver().get() 647 | 648 | Args: 649 | item: 650 | 651 | Returns: 652 | 653 | """ 654 | 655 | return self.driver.get(item) 656 | 657 | def __getattribute__(self, item): 658 | thing = object.__getattribute__(self, item) 659 | # if isinstance(thing, PageElement): 660 | # return thing() 661 | return thing 662 | 663 | def _handle_view_dict(self, view_dict): 664 | """ 665 | Handle the elements in the view dict. Set attributes and handle all the 666 | special attributes in the dict. 667 | 668 | Args: 669 | view_dict: 670 | 671 | Returns: 672 | self 673 | """ 674 | try: 675 | iterable = view_dict.iteritems() 676 | except AttributeError: 677 | iterable = view_dict.items() 678 | 679 | for k, v in iterable: 680 | if k == 'elements': 681 | self._handle_elements(v) 682 | if k == 'states': 683 | k = '_states' 684 | setattr(self, k, v) 685 | 686 | if hasattr(self, 'elements') and hasattr(self, '_states'): 687 | self.state = PageStateContainer(self.driver, self.elements, self._states) 688 | 689 | return self 690 | 691 | def _handle_elements(self, element_dict): 692 | """ 693 | Set up the elements dictionary along with the element attributes. 694 | 695 | Args: 696 | element_dict: 697 | 698 | Returns: 699 | self 700 | """ 701 | 702 | try: 703 | element_dict_iterator = element_dict.iteritems() 704 | except AttributeError: 705 | element_dict_iterator = element_dict.items() 706 | 707 | self.elements = {} 708 | 709 | for element_name, the_dict in element_dict_iterator: 710 | # Set the element up in the dict. 711 | 712 | self.elements[element_name] = PageElement(self.driver, the_dict) 713 | 714 | # has_parent = False 715 | # try: 716 | # _ = the_dict['parent'] 717 | # has_parent = True 718 | # except KeyError: 719 | # pass 720 | 721 | # Check for parent element. 722 | # if has_parent: 723 | # parent = ParentElement(self.driver, the_dict['parent']) 724 | # # Remove parent element 725 | # del the_dict['parent'] 726 | # else: 727 | # parent = None 728 | 729 | # self.elements[element_name] = PageElement(self.driver, the_dict, parent=parent) 730 | setattr(self, element_name, self.elements[element_name]) 731 | 732 | return self 733 | 734 | 735 | class Page(object): 736 | """ 737 | The Page object is a light wrapper around the View object. They are almost the same 738 | but the Page object makes accessing elements on the page a bit simpler. 739 | """ 740 | def __init__(self, driver, view_dict): 741 | self.driver = driver 742 | view_dict = dict(view_dict) 743 | self.view = View(driver, view_dict) 744 | 745 | def wait_ready(self, wait=0.1): 746 | while self.driver.execute_script('return document.readyState') != 'complete': 747 | sleep(wait) 748 | return self 749 | 750 | def __bool__(self): 751 | """ 752 | Use the page object as a bool to check to see if everything on the page exists. 753 | 754 | Returns: 755 | bool 756 | """ 757 | 758 | try: 759 | iterable = self.view.elements.iteritems() 760 | except AttributeError: 761 | iterable = self.view.elements.items() 762 | 763 | for k, v in iterable: 764 | test_element = getattr(self.view, k) 765 | if not test_element.exists(): 766 | return False 767 | 768 | return True 769 | 770 | def __getattr__(self, item): 771 | """ 772 | Check to see if the attribute exists on the `View` object. 773 | 774 | Args: 775 | item: 776 | str 777 | Returns: 778 | mixed 779 | """ 780 | 781 | # Check if the page_view has the item. 782 | if hasattr(self.view, item): 783 | # Handle PageElements by calling the instance and getting the 784 | # actual selenium web element. 785 | if isinstance(getattr(self.view, item), PageElement): 786 | return getattr(self.view, item)() 787 | # Handle any other items. 788 | return getattr(self.view, item) 789 | 790 | raise AttributeError("{} is not set as an attribute.".format(item)) 791 | 792 | 793 | def load_page(filepath, driver): 794 | """ 795 | Return a `Page` object for the given `filepath` and `WebDriver`. 796 | 797 | Args: 798 | filepath: 799 | str 800 | driver: 801 | WebDriver 802 | Returns: 803 | Page 804 | """ 805 | 806 | fstring = '' 807 | with open(filepath, 'r') as f: 808 | fstring = f.read() if fstring == '' else fstring 809 | 810 | # Early return 811 | if not fstring: 812 | return False 813 | return Page(driver, loads(fstring)) 814 | 815 | 816 | class MetaObject(object): 817 | """ 818 | The purpose of this class is to create a container out of a list of objects. 819 | the container can run middleware on the objects 820 | 821 | The MetaObject takes a list of objects. It will iterate through the list and 822 | assign it's own attributes key to the name of the current object in the list, 823 | and the value is set to the current object in the list. 824 | 825 | Example: 826 | from decimal import Decimal 827 | 828 | class Price(Decimal): 829 | name = 'price' 830 | 831 | class SKU(str): 832 | name = 'sku' 833 | 834 | class SubmitButton(SeleniumElement): 835 | name = 'submit_button' 836 | 837 | class Page(MetaObject): 838 | pass 839 | 840 | objects = [Price('34.99'), SKU('Some-sku-aosfij'), SubmitButton(driver.find_element_by_tag_name('button'))] 841 | page = Page(objects) 842 | 843 | print(page.sku) 844 | print(page.price) 845 | page.submit_button.click() 846 | """ 847 | def __init__(self, objects): 848 | self._dict = {} 849 | self._pos = 0 850 | self._size = 0 851 | 852 | # Add objects to the MetaObject as class attributes. 853 | for obj in objects: 854 | self.add_object(obj) 855 | 856 | def __getitem__(self, item): 857 | return self._dict[item] 858 | 859 | def __setitem__(self, key, value): 860 | self._dict[key] = value 861 | 862 | def __len__(self): 863 | return len(self._dict) 864 | 865 | def __delitem__(self, key): 866 | del self._dict[key] 867 | 868 | def __iter__(self): 869 | return self 870 | 871 | def __add__(self, other): 872 | """ 873 | Add MetaObjects together. 874 | 875 | Args: 876 | other: 877 | MetaObject 878 | Returns: 879 | MetaObject 880 | """ 881 | 882 | if isinstance(other, MetaObject): 883 | i1, i2 = list(self._dict.values()), list(other._dict.values()) 884 | return MetaObject(i1 + i2) 885 | else: 886 | raise ValueError('You cannot add other object types to a MetaObject') 887 | 888 | @staticmethod 889 | def _run_middleware(obj): 890 | """ 891 | Run middleware on an object. 892 | 893 | Args: 894 | obj: 895 | 896 | Returns: 897 | obj 898 | """ 899 | 900 | # Check if the middleware has not been run. 901 | if obj.meta_run_middleware: 902 | # Run the middleware 903 | item = obj.meta_run_middleware() 904 | # Disable the middleware method on the object so it cannot be run again. 905 | setattr(item, 'meta_run_middleware', False) 906 | obj = item 907 | 908 | return obj 909 | 910 | @staticmethod 911 | def _item_name(item): 912 | """ 913 | Return the item's name. Checks for the meta_name attribute and if it can't find 914 | that, it uses item.__class__.__name__ 915 | 916 | Args: 917 | item: 918 | 919 | Returns: 920 | str 921 | """ 922 | # Figure out the objects name. 923 | if hasattr(item, 'meta_name'): 924 | return item.meta_name 925 | 926 | return item.__class__.__name__ 927 | 928 | def add_object(self, obj): 929 | """ 930 | Add a single object to the MetaObject 931 | 932 | Args: 933 | obj: 934 | 935 | Returns: 936 | None 937 | """ 938 | 939 | # Run any middleware. 940 | if hasattr(obj, 'meta_run_middleware'): 941 | obj = MetaObject._run_middleware(obj) 942 | item_name = MetaObject._item_name(obj) 943 | else: 944 | item_name = MetaObject._item_name(obj) 945 | 946 | # Set the object as a class attribute! 947 | setattr(self, item_name, obj) 948 | # Set the internal dictionary up 949 | self._dict[item_name] = obj 950 | 951 | # Set the size so iteration works. 952 | self._size = len(self._dict) 953 | 954 | def next(self): 955 | """ 956 | Allow iterating through the object. 957 | 958 | Returns: 959 | tuple 960 | """ 961 | 962 | if self._pos < len(self._dict): 963 | keys = list(self._dict.keys()) 964 | values = list(self._dict.values()) 965 | k, v = keys[self._pos], values[self._pos] 966 | self._pos += 1 967 | return k, v 968 | raise StopIteration 969 | 970 | __next__ = next 971 | 972 | 973 | class SeleniumElement(object): 974 | """ 975 | The SeleniumElement object is used as a wrapper around any selenium element object. 976 | You can inherit from SeleniumElement to come up with new "types". This can make 977 | coding go a little bit quicker because things can end up a little bit more organized. 978 | 979 | Example: 980 | Usually when you are scraping text from rows in a table there may be web elements 981 | associated with those rows that you need to interact with. Creating new data types for 982 | the various pieces of text and the web elements allows you to just throw these things 983 | into a list with little regard for what order things are in later. We can just check 984 | each item in the list(the row) using the `isinstance` function and deal with it how we 985 | want to. 986 | 987 | Code Example: 988 | 989 | # Inherit from SeleniumElement for any element you need to interact with. 990 | # You can also give the class a name that will show up when combined with the MetaObject 991 | # class. 992 | class RowSelector(SeleniumElement): 993 | name = 'selector' 994 | 995 | # Inherit from Decimal for the price because in this scenario it would be a pure 996 | # decimal number with no string characters. 997 | from decimal import Decimal 998 | class Price(Decimal): 999 | name = 'price' 1000 | 1001 | # Define a whole new data type for any other text data we want to scrape. 1002 | class SKU(str): 1003 | name = 'sku' 1004 | 1005 | # Select each column in the table and assign some new data types to the results. 1006 | table_rows = [RowSelector(element) for element in driver.find_elements_by_id('row_selector_radio_button')] 1007 | prices = [Price(element.text.strip()) for element in driver.find_elements_by_id('row_price')] 1008 | skus = [SKU(element.text.strip()) for element in driver.find_elements_by_id('row_sku')] 1009 | 1010 | # Now that we have the columns in the table, we can create a list of MetaObject objects! 1011 | # Since we have each "column" in its own list, we can use `zip` to create a list of rows! 1012 | # Then we just use a list comprehension to change that list of rows(which are just lists 1013 | # themselves) into a list of MetaObject objects. 1014 | rows = [MetaObject(row) for row in zip(table_rows, prices, skus)] 1015 | 1016 | # If we iterate through the rows, you can access the various pieces we scraped 1017 | # and even interact with any of the object if they inherited from SeleniumElement. 1018 | for row in rows: 1019 | if row.sku != '': 1020 | # Click the radio button to select the row or whatever. 1021 | print(row.price) 1022 | print(row.sku) 1023 | row.selector.click() 1024 | """ 1025 | def __init__(self, element): 1026 | self.element = element 1027 | 1028 | def __getattr__(self, item): 1029 | if hasattr(self.element, item): 1030 | return getattr(self.element, item) 1031 | 1032 | 1033 | class DummyLogger(object): 1034 | """ 1035 | A logger that does absolutely nothing. Meant as a drop in replacement for a 1036 | logger that you would normally get from the logging module. 1037 | """ 1038 | def __init__(self, prints=True, level='DEBUG'): 1039 | self.prints = prints 1040 | self.levels = { 1041 | 'INFO': 0, 1042 | 'DEBUG': 1, 1043 | 'WARN': 2, 1044 | 'ERROR': 3, 1045 | 'FATAL': 4 1046 | } 1047 | try: 1048 | self.level = self.levels[level] 1049 | except KeyError: 1050 | self.level = 0 1051 | 1052 | def info(self, *args): 1053 | if self.prints and self.level >= 0: 1054 | print("INFO: {}".format(args)) 1055 | return self 1056 | 1057 | def debug(self, *args): 1058 | if self.prints and self.level >= 1: 1059 | print("DEBUG: {}".format(args)) 1060 | return self 1061 | 1062 | def warn(self, *args): 1063 | if self.prints and self.level >= 2: 1064 | print("WARN: {}".format(args)) 1065 | return self 1066 | 1067 | def error(self, *args): 1068 | if self.prints and self.level >= 3: 1069 | print("ERROR: {}".format(args)) 1070 | return self 1071 | 1072 | def fatal(self, *args): 1073 | if self.prints and self.level >= 4: 1074 | print("FATAL: {}".format(args)) 1075 | return self 1076 | 1077 | 1078 | class DummyThread(object): 1079 | """ 1080 | A drop in for threading.Thread. It only has the join and start methods at the moment. 1081 | """ 1082 | def __init__(self, target=False, args=()): 1083 | if not target: 1084 | raise ValueError('target must be callable.') 1085 | if len(args) == 0 or type(args) != tuple: 1086 | raise ValueError('args must be a tuple with more than 0 values') 1087 | self.target = target 1088 | self.args = args 1089 | 1090 | def join(self): 1091 | """ 1092 | Does nothing. 1093 | 1094 | :return: 1095 | """ 1096 | 1097 | pass 1098 | 1099 | def start(self): 1100 | """ 1101 | Execute the target function with the given args. 1102 | 1103 | :return: 1104 | """ 1105 | 1106 | return self.target(*self.args) 1107 | --------------------------------------------------------------------------------