├── .gitignore ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── pysmartprice ├── __init__.py ├── abstract.py ├── base.py ├── constants.py ├── helpers.py ├── results.py ├── smartparser.py └── tests │ ├── __init__.py │ └── test_core.py ├── requirements.txt ├── runtests.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | build 3 | dist 4 | *.egg-info 5 | .DS_Store 6 | Pipfile 7 | Pipfile.lock 8 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include README.rst 3 | recursive-include docs * -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | |Build|_ |CodeRate|_ |pypi|_ 3 | 4 | .. |Build| image:: https://scrutinizer-ci.com/g/asifpy/python-smartprice/badges/build.png?b=master 5 | .. _Build: https://scrutinizer-ci.com/g/asifpy/python-smartprice/ 6 | 7 | .. |CodeRate| image:: https://scrutinizer-ci.com/g/asifpy/python-smartprice/badges/quality-score.png?b=master 8 | .. _CodeRate: https://scrutinizer-ci.com/g/asifpy/python-smartprice/ 9 | 10 | .. |pypi| image:: https://badge.fury.io/py/python-smartprice.svg 11 | .. _pypi: https://badge.fury.io/py/python-smartprice 12 | 13 | 14 | 15 | ================= 16 | python-smartprice 17 | ================= 18 | 19 | A simple scraping-based python library for MySmartPrice - price comparison site. 20 | 21 | Introduction 22 | ------------ 23 | 24 | MySmartPrice_ is a platform which provided best seller price across a wide array of categories that include Mobiles, Electronics, Computers, Fashion and Lifestyle, Cameras, Books, Appliances and Personal Care. 25 | 26 | 27 | Installation 28 | ------------ 29 | 30 | ``pip install python-smartprice`` 31 | 32 | 33 | Prerequisites 34 | ------------- 35 | - Requests 36 | - BeautifulSoup 4 37 | - Python 2.7+ 38 | 39 | Quickstart 40 | ---------- 41 | 42 | GET PRICE LIST 43 | -------------- 44 | 45 | Check all the below `Supported attributes`_. 46 | 47 | .. code-block:: python 48 | 49 | >>> from pysmartprice.base import SmartPrice 50 | >>> smartprice = SmartPrice() 51 | 52 | >>> len(smartprice.samsung_mobiles) 53 | Out[5]: 142 54 | 55 | >>> smartprice.samsung_mobiles 56 | Out[6]: [, 57 | , 58 | , 59 | , 60 | , 61 | , 62 | , 63 | ] 64 | 65 | >>> samsung_mobile = smartprice.samsung_mobiles[0] 66 | 67 | >>> samsung_mobile.title 68 | Out[7]: u'Samsung Galaxy J7' 69 | 70 | >>> samsung_mobile.best_price 71 | Out[8]: '14,299' 72 | 73 | >>> samsung_mobile.dumptojson 74 | Out[9]: 75 | {u'best_price': u'14,664', 76 | u'img': u'http://c0028545.cdn1.cloudfiles.rackspacecloud.com/7178-7-thumb.jpg', 77 | u'product_id': u'7178', 78 | u'title': u'\nSamsung Galaxy J7\n', 79 | u'url': u'http://www.mysmartprice.com/mobile/samsung-galaxy-j7-msp7178'} 80 | 81 | SEARCH PRICE LIST 82 | ----------------- 83 | 84 | .. code-block:: python 85 | 86 | >>> from pysmartprice.base import SmartPrice 87 | >>> smartprice = SmartPrice() 88 | 89 | >>> smartprice.search('SAMSUNG') 90 | Out[4]: [, 91 | , 92 | , 93 | , 94 | , 95 | , 96 | , 97 | , 98 | , 99 | ...] 100 | 101 | In [3]: len(smartprice.search('SAMSUNG')) 102 | Out[3]: 1012 103 | 104 | In [3]: results = smartprice.search('SAMSUNG') 105 | 106 | In [4]: results[0].dumptojson 107 | Out[4]: 108 | {u'best_price': u'14,664', 109 | u'img': u'http://c0028545.cdn1.cloudfiles.rackspacecloud.com/7178-7-thumb.jpg', 110 | u'product_id': u'7178', 111 | u'title': u'\nSamsung Galaxy J7\n', 112 | u'url': u'http://www.mysmartprice.com/mobile/samsung-galaxy-j7-msp7178'} 113 | 114 | GET SELLERS DETAILS 115 | ------------------- 116 | 117 | .. code-block:: python 118 | 119 | >> from pysmartprice.base import SmartPrice 120 | >> smartprice = SmartPrice() 121 | 122 | >> results = smartprice.sellers('Samsung Galaxy J2') 123 | >> results 124 | Out[4]: [] 125 | 126 | In [5]: results[0].dumptojson 127 | Out[5]: 128 | {'best_price': '8,199', 129 | 'img': 'http://c0028545.cdn1.cloudfiles.rackspacecloud.com/7448-6-thumb.jpg', 130 | 'product_id': '7448', 131 | 'title': 'Samsung Galaxy J2', 132 | 'sellers': [ 133 | {'logo': u'http://c223968.r68.cf1.rackcdn.com/syberplace_store.png', 134 | 'name': u'Seller:\nSyberplace', 135 | 'price': u'8,199', 136 | 'rating': u'5/5'}, 137 | {'logo': u'http://c223968.r68.cf1.rackcdn.com/amazon_store.png', 138 | 'name': u'Seller:Amazon Seller', 139 | 'price': u'8,330', 140 | 'rating': u'4.5/5'}, 141 | {'logo': u'http://c223968.r68.cf1.rackcdn.com/ebay_store.png', 142 | 'name': u'Seller:Bberry.stores', 143 | 'price': u'8,215', 144 | 'rating': u'5/5'}, 145 | {'logo': u'http://c223968.r68.cf1.rackcdn.com/paytm_store.png', 146 | 'name': u'Seller:RYAISHA RETAILS PVT LTD', 147 | 'price': u'8,496', 148 | 'rating': u'3.2/5'}, 149 | {'logo': u'http://c223968.r68.cf1.rackcdn.com/indiatimes_store.png', 150 | 'name': u'Seller:Indiatimes', 151 | 'price': u'8,314', 152 | 'rating': u'2.5/5'}, 153 | {'logo': u'http://c223968.r68.cf1.rackcdn.com/flipkart_store.png', 154 | 'name': u'Seller:WS Retail', 155 | 'price': u'8,499', 156 | 'rating': u'4.2/5'}], 157 | 'url': 'http://www.mysmartprice.com/mobile/samsung-galaxy-j2-msp7448'} 158 | 159 | 160 | 161 | Supported attributes 162 | -------------------- 163 | Get the best prices for the below attributes 164 | 165 | - mobiles 166 | - samsung_mobiles 167 | - nokia_mobiles 168 | - micromax_mobiles 169 | - iphones 170 | - htc_mobiles 171 | - tablets 172 | - samsung_tablets 173 | - lenovo_tablets 174 | - micromax_tablets 175 | - apple_ipads 176 | - laptops 177 | - hp_laptops 178 | - dell_laptops 179 | - lenovo_laptops 180 | - sony_laptops 181 | - toshiba_laptops 182 | - macbooks 183 | - samsung_laptops 184 | - tvs 185 | - samsung_tvs 186 | - sony_tvs 187 | - lg_tvs 188 | - panasonic_tvs 189 | - sharp_tvs 190 | 191 | 192 | .. _MySmartPrice: http://www.mysmartprice.com/ 193 | -------------------------------------------------------------------------------- /pysmartprice/__init__.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.0.3' -------------------------------------------------------------------------------- /pysmartprice/abstract.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | from bs4 import BeautifulSoup 3 | 4 | from pysmartprice.results import SmartPriceResult 5 | from pysmartprice.helpers import scrape, scrape_helper 6 | from pysmartprice import constants 7 | 8 | 9 | class BaseParser(object): 10 | def __init__(self, mapper, **kwargs): 11 | self.mapper = mapper 12 | self.params = kwargs 13 | self.url = constants.URL_MAPPER[self.mapper] 14 | self.response = scrape(self._make_url(self.url), **kwargs) 15 | self.soup = BeautifulSoup(self.response, 'lxml') 16 | self.result = [ 17 | SmartPriceResult(self.get_product_attrs(item)) 18 | for item in self.products_html 19 | ] 20 | 21 | def _make_url(self, target): 22 | return '{}{}'.format(constants.SMARTPRICE_WEB_URL, target) 23 | 24 | @property 25 | def price_results(self): 26 | if self.get_page_range: 27 | return self.process_multiple_pages() 28 | 29 | return self.result 30 | 31 | 32 | class ParserMixin(object): 33 | def get_product_attrs(self, item): 34 | return dict( 35 | img=item.find('img').get('src'), 36 | title=item.find('a', attrs={'class': 'prdct-item__name'}).text, 37 | url=item.find( 38 | 'a', attrs={'class': 'prdct-item__name'}).get('href'), 39 | best_price=item.find( 40 | 'span', attrs={'class': 'prdct-item__prc-val'}).text, 41 | product_id=item.get('data-mspid') 42 | ) 43 | 44 | @property 45 | def products_html(self): 46 | html = self.soup.findAll('div', attrs={'class': 'prdct-item'}) 47 | return html 48 | 49 | def process_multiple_pages(self): 50 | results = self.result 51 | first_page, last_page = self.get_page_range 52 | paged_url = self.get_paged_url 53 | page_urls = [] 54 | 55 | for page in range(first_page+1, last_page+1): 56 | url = paged_url.replace('.html', '-{}.html'.format(page)) 57 | params = self.params.copy() 58 | if self.params.get('page', None): 59 | params.update({'page': page}) 60 | page_urls.append((self._make_url(url), params)) 61 | 62 | # Scrape pages in parallel 63 | pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()*2) 64 | 65 | for page in pool.map(scrape_helper, page_urls): 66 | self.soup = BeautifulSoup(page, 'lxml') 67 | 68 | results += [ 69 | SmartPriceResult(self.get_product_attrs(item)) 70 | for item in self.products_html 71 | ] 72 | return results 73 | 74 | @property 75 | def get_page_range(self): 76 | page_range = self.soup.findAll( 77 | 'span', attrs={'class': 'pgntn__rslt-page'}) 78 | 79 | if not page_range: 80 | return None 81 | 82 | first_page = int(page_range[0].text) 83 | last_page = int(page_range[1].text) 84 | return first_page, last_page 85 | 86 | -------------------------------------------------------------------------------- /pysmartprice/base.py: -------------------------------------------------------------------------------- 1 | from pysmartprice.smartparser import ( 2 | PriceListParser, 3 | SearchParser, 4 | SellerParser 5 | ) 6 | from pysmartprice.constants import SMARTPRICE_ATTRS 7 | 8 | 9 | class SmartPrice(object): 10 | 11 | def parser_results(self, product, **kwargs): 12 | parser = PriceListParser(product, **kwargs) 13 | return parser.price_results 14 | 15 | def __getattr__(self, attr): 16 | if attr not in SMARTPRICE_ATTRS: 17 | msg = '{} object has no attribute {}' 18 | raise AttributeError(msg.format(self.__class__.__name__, attr)) 19 | 20 | setattr(self, attr, self.parser_results(SMARTPRICE_ATTRS[attr])) 21 | return getattr(self, attr) 22 | 23 | def search(self, search_key): 24 | params = dict(s=search_key, page=1) 25 | parser = SearchParser('search', **params) 26 | return parser.price_results 27 | 28 | def sellers(self, product): 29 | search_res = self.search(product) 30 | products = [ 31 | res for res in search_res if product.lower() in res.title.lower()] 32 | 33 | for product in products: 34 | seller_parser = SellerParser(product.url) 35 | setattr(product, 'sellers', seller_parser.result) 36 | 37 | return products 38 | -------------------------------------------------------------------------------- /pysmartprice/constants.py: -------------------------------------------------------------------------------- 1 | SMARTPRICE_WEB_URL = 'http://www.mysmartprice.com/' 2 | 3 | URL_MAPPER = { 4 | # mobiles 5 | 'mobile-price': 'mobile/pricelist/mobile-price-list-in-india.html', 6 | 'samsung-mobile': 'mobile/pricelist/samsung-mobile-price-list-in-india.html', 7 | 'nokia-mobile': 'mobile/pricelist/nokia-mobile-price-list-in-india.html', 8 | 'micromax-mobile': 'mobile/pricelist/micromax-mobile-price-list-in-india.html', 9 | 'apple-mobile': 'mobile/pricelist/apple-mobile-price-list-in-india.html', 10 | 'htc-mobile': 'mobile/pricelist/htc-mobile-price-list-in-india.html', 11 | 12 | # tablets 13 | 'tablet': 'mobile/pricelist/tablet-price-list-in-india.html', 14 | 'samsung-tablet': 'mobile/pricelist/samsung-tablet-price-list-in-india.html', 15 | 'lenovo-tablet': 'mobile/pricelist/lenovo-tablet-price-list-in-india.html', 16 | 'micromax-tablet': 'mobile/pricelist/micromax-tablet-price-list-in-india.html', 17 | 'apple-tablet': 'mobile/pricelist/apple-tablet-price-list-in-india.html', 18 | 19 | # computers 20 | 'laptops': 'computer/pricelist/laptops-price-list-in-india.html', 21 | 'hp-laptop': 'computer/pricelist/hp-laptop-price-list-in-india.html', 22 | 'dell-laptop': 'computer/pricelist/dell-laptop-price-list-in-india.html', 23 | 'lenovo-laptop': 'computer/pricelist/lenovo-laptop-price-list-in-india.html', 24 | 'sony-laptop': 'computer/pricelist/sony-laptop-price-list-in-india.html', 25 | 'toshiba-laptop': 'computer/pricelist/toshiba-laptop-price-list-in-india.html', 26 | 'apple-laptop': 'computer/pricelist/apple-laptop-price-list-in-india.html', 27 | 'samsung-laptop': 'computer/pricelist/samsung-laptop-price-list-in-india.html', 28 | 29 | # electronics 30 | 'tv-price': 'electronics/pricelist/tv-price-list-in-india.html', 31 | 'samsung-tv': 'electronics/pricelist/samsung-tv-price-list-in-india.html', 32 | 'sony-tv': 'electronics/pricelist/sony-tv-price-list-in-india.html', 33 | 'lg-tv': 'electronics/pricelist/lg-tv-price-list-in-india.html', 34 | 'panasonic-tv': 'electronics/pricelist/panasonic-tv-price-list-in-india.html', 35 | 'sharp-tv': 'electronics/pricelist/sharp-tv-price-list-in-india.html', 36 | 37 | 'search': 'msp/search/msp_search_new.php', 38 | } 39 | 40 | 41 | SMARTPRICE_ATTRS = { 42 | # mobiles 43 | 'mobiles': 'mobile-price', 44 | 'samsung_mobiles': 'samsung-mobile', 45 | 'nokia_mobiles': 'nokia-mobile', 46 | 'micromax_mobiles': 'micromax-mobile', 47 | 'apple_mobiles': 'apple-mobile', 48 | 'htc_mobiles': 'htc-mobile', 49 | 50 | # tablets 51 | 'tablets': 'tablet', 52 | 'samsung_tablets': 'samsung-tablet', 53 | 'lenovo_tablets': 'lenovo-tablet', 54 | 'micromax_tablets': 'micromax-tablet', 55 | 'apple_ipads': 'apple-tablet', 56 | 57 | # computers 58 | 'laptops': 'laptops', 59 | 'hp_laptops': 'hp-laptop', 60 | 'dell_laptops': 'dell-laptop', 61 | 'lenovo_laptops': 'lenovo-laptop', 62 | 'sony_laptops': 'sony-laptop', 63 | 'toshiba_laptops': 'toshiba-laptop', 64 | 'macbooks': 'apple-laptop', 65 | 'samsung_laptops': 'samsung-laptop', 66 | 67 | 'tvs': 'tv-price', 68 | 'samsung_tvs': 'samsung-tv', 69 | 'sony_tvs': 'sony-tv', 70 | 'lg_tvs': 'lg_tv', 71 | 'panasonic_tvs': 'panasonic-tv', 72 | 'sharp_tvs': 'sharp-tv' 73 | 74 | } 75 | -------------------------------------------------------------------------------- /pysmartprice/helpers.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | def scrape(url, **kwargs): 5 | resp = requests.get(url, params=kwargs) 6 | return resp.text 7 | 8 | 9 | def scrape_helper(args): 10 | return scrape(args[0], **args[1]) 11 | -------------------------------------------------------------------------------- /pysmartprice/results.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class ComplexEncoder(json.JSONEncoder): 5 | def default(self, obj): 6 | if hasattr(obj, 'jsonable'): 7 | return obj.dumptojson 8 | else: 9 | return json.JSONEncoder.default(self, obj) 10 | 11 | 12 | class SmartPriceBase(object): 13 | def __init__(self, params): 14 | self.__dict__.update(params) 15 | 16 | def jsonable(self): 17 | return self.__dict__ 18 | 19 | @property 20 | def dumptojson(self): 21 | return json.loads( 22 | json.dumps( 23 | self.jsonable(), 24 | cls=ComplexEncoder, 25 | # sort_keys=True, 26 | indent=4, 27 | separators=(',', ': ')) 28 | ) 29 | 30 | 31 | class SmartPriceResult(SmartPriceBase): 32 | pass 33 | 34 | 35 | class SmartPriceSeller(SmartPriceBase): 36 | pass 37 | -------------------------------------------------------------------------------- /pysmartprice/smartparser.py: -------------------------------------------------------------------------------- 1 | from pysmartprice.abstract import ( 2 | BaseParser, 3 | ParserMixin 4 | ) 5 | from pysmartprice.helpers import scrape 6 | from bs4 import BeautifulSoup 7 | from pysmartprice.results import SmartPriceSeller 8 | 9 | 10 | class PriceListParser(BaseParser, ParserMixin): 11 | @property 12 | def get_paged_url(self): 13 | i = self.url.find(self.mapper) 14 | paged_url = '{}pages/{}'.format(self.url[:i], self.url[i:]) 15 | return paged_url 16 | 17 | 18 | class SearchParser(BaseParser, ParserMixin): 19 | @property 20 | def get_paged_url(self): 21 | return self.url 22 | 23 | 24 | # SCRAPE SELLERS 25 | class SellerParser(object): 26 | def __init__(self, url, *args, **kwargs): 27 | self.url = url 28 | self.response = scrape(self.url, **kwargs) 29 | self.soup = BeautifulSoup(self.response, 'lxml') 30 | self.result = [ 31 | SmartPriceSeller(self.get_product_attrs(item)) 32 | for item in self.products_html 33 | ] 34 | 35 | def get_product_attrs(self, item): 36 | return dict( 37 | logo=item.find('img', {'class': 'prc-tbl__str-logo'}).get('src'), 38 | rating=item.find( 39 | 'span', attrs={'class': 'rtng-bdg rtng-bdg--dark-grn'}).text, 40 | price=item.find( 41 | 'span', attrs={'class': 'prc-tbl__cost-val'}).text, 42 | name=item.find('div', attrs={'class': 'prc-tbl__slr-name'}).text 43 | ) 44 | 45 | @property 46 | def products_html(self): 47 | html = self.soup.findAll( 48 | 'div', attrs={'class': 'prc-tbl-row__inr clearfix'}) 49 | return html 50 | -------------------------------------------------------------------------------- /pysmartprice/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asifpy/python-smartprice/a6247290604a0fcf789ea022dd087b307d0f42f6/pysmartprice/tests/__init__.py -------------------------------------------------------------------------------- /pysmartprice/tests/test_core.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pytest 3 | import requests 4 | 5 | from pysmartprice.base import SmartPrice 6 | from pysmartprice import constants 7 | 8 | 9 | class TestSmartPrice(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.smartprice = SmartPrice() 13 | # self.generate_soupelement_tests() 14 | 15 | def generate_soupelement_tests(self): 16 | """Generate test methods to check valid URLS""" 17 | for key, url in constants.URL_MAPPER.iteritems(): 18 | testmethodname = 'test_fn_{0}'.format(key) 19 | testmethod = lambda self: self.assertEqual(key, key) 20 | setattr(TestSmartPrice, testmethodname, testmethod) 21 | 22 | def test_webexists(self): 23 | self.assertEqual( 24 | 'http://www.mysmartprice.com/', 25 | constants.SMARTPRICE_WEB_URL 26 | ) 27 | 28 | def test_validurls(self): 29 | for key, url in constants.URL_MAPPER.iteritems(): 30 | complete_url = '{}{}'.format(constants.SMARTPRICE_WEB_URL, url) 31 | requests.get(complete_url) 32 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.5.3 2 | beautifulsoup4==4.4.0 3 | pytest==2.8.5 4 | py==1.4.31 5 | -------------------------------------------------------------------------------- /runtests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | if __name__ == '__main__': 4 | # use the default shared TestLoader instance 5 | test_loader = unittest.defaultTestLoader 6 | 7 | # use the basic test runner that outputs to sys.stderr 8 | test_runner = unittest.TextTestRunner() 9 | 10 | # automatically discover all tests in the current dir of the form test*.py 11 | # NOTE: only works for python 2.7 and later 12 | test_suite = test_loader.discover('pysmartprice') 13 | 14 | # run the test suite 15 | test_runner.run(test_suite) 16 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | import pysmartprice 4 | 5 | with open(os.path.join(os.path.dirname(__file__), 'README.rst')) as readme: 6 | README = readme.read() 7 | 8 | # allow setup.py to be run from any path 9 | os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir))) 10 | 11 | setup( 12 | name='python-smartprice', 13 | version=pysmartprice.VERSION, 14 | packages=['pysmartprice'], 15 | include_package_data=True, 16 | license='BSD License', 17 | description='A simple scraping-based python library for MySmartPrice', 18 | long_description=README, 19 | url='https://github.com/asifpy/python-smartprice', 20 | author='Asif Jamadar', 21 | author_email='saluasif@gmail.com', 22 | keywords=['smartprice', 'price comparision', 'scrapping'], 23 | install_requires=[ 24 | 'requests>=2.5.3', 25 | 'beautifulsoup4>4.4.0', 26 | 'pytest>=2.8.5', 27 | 'lxml>=4.1.0' 28 | ], 29 | ) 30 | --------------------------------------------------------------------------------