├── extension ├── devtools │ ├── views │ │ ├── SitemapSelectorGraph.html │ │ ├── SitemapExport.html │ │ ├── SitemapBrowseData.html │ │ ├── SitemapList.html │ │ ├── SelectorEditTableColumn.html │ │ ├── SitemapListItem.html │ │ ├── SelectorListItem.html │ │ ├── DataPreview.html │ │ ├── SelectorList.html │ │ ├── SitemapExportDataCSV.html │ │ ├── SitemapImport.html │ │ ├── SitemapCreate.html │ │ ├── Viewport.html │ │ ├── SitemapScrapeConfig.html │ │ └── SitemapEditMetadata.html │ ├── devtools_init_page.html │ ├── devtools_init_page.js │ ├── devtools_scraper_panel.css │ └── devtools_scraper_panel.html ├── assets │ ├── images │ │ ├── icon128.png │ │ ├── icon16.png │ │ ├── icon19.png │ │ ├── icon38.png │ │ ├── icon48.png │ │ └── LICENSE │ ├── bootstrap-3.0.0 │ │ └── fonts │ │ │ ├── glyphicons-halflings-regular.eot │ │ │ ├── glyphicons-halflings-regular.ttf │ │ │ └── glyphicons-halflings-regular.woff │ ├── jquery.bootstrapvalidator │ │ └── bootstrapValidator.css │ ├── base64.js │ ├── LICENSE-sugar-js │ ├── LICENSE-jquery-js │ ├── jquery.whencallsequentially.js │ ├── LICENSE-d3-js │ └── LICENSE-icanhaz-js ├── scripts │ ├── App.js │ ├── Selector │ │ ├── SelectorElement.js │ │ ├── SelectorValue.js │ │ ├── SelectorElementStyle.js │ │ ├── SelectorElementAttribute.js │ │ ├── SelectorHTML.js │ │ ├── SelectorText.js │ │ ├── SelectorGroup.js │ │ ├── SelectorElementScroll.js │ │ ├── SelectorLink.js │ │ ├── SelectorImage.js │ │ └── SelectorPopupLink.js │ ├── DateUtils │ │ ├── DateRoller.js │ │ ├── DatePatternSupport.js │ │ └── SimpleDateFormatter.js │ ├── Queue.js │ ├── Config.js │ ├── ElementQuery.js │ ├── StoreDevtools.js │ ├── ChromePopupBrowser.js │ ├── Job.js │ ├── BackgroundScript.js │ ├── UniqueElementList.js │ ├── ContentScript.js │ ├── SelectorGraph.js │ └── Store.js ├── popup.html ├── content_script │ └── content_script.js ├── options_page │ ├── options_page.js │ └── options.html ├── background_page │ └── background_script.js └── manifest.json ├── docs ├── images │ ├── store-logo-sources.txt │ ├── sitemap-tree.png │ ├── chrome-store-logo.png │ ├── chrome-store-logo.xcf │ ├── selectors │ │ ├── table │ │ │ ├── table.png │ │ │ └── selectors.png │ │ ├── element-click │ │ │ ├── click-more.png │ │ │ └── click-once.png │ │ ├── link │ │ │ ├── pagination-link-selectors.png │ │ │ ├── pagination-selector-graph.png │ │ │ └── multiple-level-link-selectors.png │ │ └── text │ │ │ ├── text-selector-multiple-per-page.png │ │ │ ├── text-selector-multiple-elements-with-text-selectors.png │ │ │ └── text-selector-multiple-single-text-selectors-in-one-page.png │ ├── chrome-store-logo-920x680.png │ ├── chrome-store-logo-920x680.xcf │ ├── scraping-a-site │ │ ├── news-site.png │ │ ├── news-site-sitemap.png │ │ └── news-site-selector-graph.png │ └── open-web-scraper │ │ └── open-web-scraper.png ├── Installation.md ├── Open Web Scraper.md ├── Selectors │ ├── HTML selector.md │ ├── Element style selector.md │ ├── Element attribute selector.md │ ├── Link popup selector.md │ ├── Image selector.md │ ├── Grouped selector.md │ ├── Element scroll down selector.md │ ├── Table selector.md │ ├── Element selector.md │ ├── Link selector.md │ ├── Text selector.md │ └── Element click selector.md ├── Storage backends.md ├── CSS selector.md ├── Development.md ├── Selectors.md └── Scraping a site.md ├── .gitmodules ├── .gitignore ├── playgrounds ├── sitemap-tree │ ├── style.css │ ├── sitemap.json │ └── index.html └── extension │ └── webpage.css ├── tests ├── FakeStore.js ├── spec │ ├── QueueSpec.js │ ├── Selector │ │ ├── SelectorValueSpec.js │ │ ├── SelectorElementSpec.js │ │ ├── SelectorGroupSpec.js │ │ ├── SelectorLinkSpec.js │ │ ├── SelectorElementAttributeSpec.js │ │ ├── SelectorElementScrollSpec.js │ │ ├── SelectorElementStyleSpec.js │ │ ├── SelectorHTMLSpec.js │ │ ├── SelectorImageSpec.js │ │ └── SelectorPopupLinkSpec.js │ ├── BackgroundScriptSpec.js │ ├── SelectorSpec.js │ ├── ChromePopupBrowserSpec.js │ ├── ElementQuerySpec.js │ ├── UniqueElementListSpec.js │ ├── JobSpec.js │ ├── ContentScriptSpec.js │ ├── ContentSelectorSpec.js │ └── jquery.whencallsequentiallySpec.js ├── Matchers.js └── ChromeAPI.js ├── jasmine-standalone └── lib │ └── jasmine-1.3.1 │ └── MIT.LICENSE └── README.md /extension/devtools/views/SitemapSelectorGraph.html: -------------------------------------------------------------------------------- 1 |
-------------------------------------------------------------------------------- /docs/images/store-logo-sources.txt: -------------------------------------------------------------------------------- 1 | http://jsfiddle.net/t8Sgq/ 2 | http://jsfiddle.net/qpVkY/ 3 | -------------------------------------------------------------------------------- /docs/images/sitemap-tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/sitemap-tree.png -------------------------------------------------------------------------------- /docs/images/chrome-store-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/chrome-store-logo.png -------------------------------------------------------------------------------- /docs/images/chrome-store-logo.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/chrome-store-logo.xcf -------------------------------------------------------------------------------- /extension/assets/images/icon128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/extension/assets/images/icon128.png -------------------------------------------------------------------------------- /extension/assets/images/icon16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/extension/assets/images/icon16.png -------------------------------------------------------------------------------- /extension/assets/images/icon19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/extension/assets/images/icon19.png -------------------------------------------------------------------------------- /extension/assets/images/icon38.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/extension/assets/images/icon38.png -------------------------------------------------------------------------------- /extension/assets/images/icon48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/extension/assets/images/icon48.png -------------------------------------------------------------------------------- /docs/images/selectors/table/table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/selectors/table/table.png -------------------------------------------------------------------------------- /docs/images/chrome-store-logo-920x680.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/chrome-store-logo-920x680.png -------------------------------------------------------------------------------- /docs/images/chrome-store-logo-920x680.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/chrome-store-logo-920x680.xcf -------------------------------------------------------------------------------- /docs/images/scraping-a-site/news-site.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/scraping-a-site/news-site.png -------------------------------------------------------------------------------- /docs/images/selectors/table/selectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/selectors/table/selectors.png -------------------------------------------------------------------------------- /extension/devtools/devtools_init_page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /extension/devtools/devtools_init_page.js: -------------------------------------------------------------------------------- 1 | chrome.devtools.panels.create("Web Scraper", "../assets/images/icon48.png", "devtools/devtools_scraper_panel.html"); -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "extension/assets/css-selector"] 2 | path = extension/assets/css-selector 3 | url = https://github.com/martinsbalodis/css-selector.git 4 | -------------------------------------------------------------------------------- /docs/images/open-web-scraper/open-web-scraper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/open-web-scraper/open-web-scraper.png -------------------------------------------------------------------------------- /docs/images/scraping-a-site/news-site-sitemap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/scraping-a-site/news-site-sitemap.png -------------------------------------------------------------------------------- /docs/images/selectors/element-click/click-more.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/selectors/element-click/click-more.png -------------------------------------------------------------------------------- /docs/images/selectors/element-click/click-once.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/selectors/element-click/click-once.png -------------------------------------------------------------------------------- /docs/images/scraping-a-site/news-site-selector-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/scraping-a-site/news-site-selector-graph.png -------------------------------------------------------------------------------- /docs/images/selectors/link/pagination-link-selectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/selectors/link/pagination-link-selectors.png -------------------------------------------------------------------------------- /docs/images/selectors/link/pagination-selector-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/selectors/link/pagination-selector-graph.png -------------------------------------------------------------------------------- /docs/images/selectors/link/multiple-level-link-selectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/selectors/link/multiple-level-link-selectors.png -------------------------------------------------------------------------------- /docs/images/selectors/text/text-selector-multiple-per-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/selectors/text/text-selector-multiple-per-page.png -------------------------------------------------------------------------------- /extension/assets/bootstrap-3.0.0/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/extension/assets/bootstrap-3.0.0/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /extension/assets/bootstrap-3.0.0/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/extension/assets/bootstrap-3.0.0/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /extension/assets/bootstrap-3.0.0/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/extension/assets/bootstrap-3.0.0/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /docs/images/selectors/text/text-selector-multiple-elements-with-text-selectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/selectors/text/text-selector-multiple-elements-with-text-selectors.png -------------------------------------------------------------------------------- /docs/images/selectors/text/text-selector-multiple-single-text-selectors-in-one-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwillmer/web-scraper-chrome-extension/HEAD/docs/images/selectors/text/text-selector-multiple-single-text-selectors-in-one-page.png -------------------------------------------------------------------------------- /extension/scripts/App.js: -------------------------------------------------------------------------------- 1 | $(function () { 2 | 3 | // init bootstrap alerts 4 | $(".alert").alert(); 5 | 6 | var store = new StoreDevtools(); 7 | new SitemapController({ 8 | store: store, 9 | templateDir: 'views/' 10 | }); 11 | }); -------------------------------------------------------------------------------- /extension/assets/images/LICENSE: -------------------------------------------------------------------------------- 1 | icons source: 2 | https://www.iconfinder.com/iconsets/free-grey-cloud-icons#readme 3 | https://www.iconfinder.com/icons/129397/spider_web_icon#size=96 4 | 5 | license: 6 | http://creativecommons.org/licenses/by/3.0/legalcode 7 | -------------------------------------------------------------------------------- /extension/devtools/views/SitemapExport.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | projectFilesBackup 3 | extension.zip 4 | 5 | /.vs/web-scraper-chrome-extension/v15/.suo 6 | /.vs/web-scraper-chrome-extension/v15 7 | /.vs/VSWorkspaceState.json 8 | /.vs/slnx.sqlite 9 | /.vs/ProjectSettings.json 10 | /.vs/config/applicationhost.config 11 | -------------------------------------------------------------------------------- /playgrounds/sitemap-tree/style.css: -------------------------------------------------------------------------------- 1 | .node circle { 2 | cursor: pointer; 3 | fill: #fff; 4 | stroke: steelblue; 5 | stroke-width: 1.5px; 6 | } 7 | 8 | .node text { 9 | font-size: 11px; 10 | } 11 | 12 | path.link { 13 | fill: none; 14 | stroke: #ccc; 15 | stroke-width: 1.5px; 16 | } -------------------------------------------------------------------------------- /extension/devtools/views/SitemapBrowseData.html: -------------------------------------------------------------------------------- 1 || {{.}} | 7 | {{/columns}} 8 |
|---|
| ID | 6 |Start URL | 7 |actions | 8 |
|---|
13 | Open Developer tools where you will find Web Scraper tab: 14 |
27 | Documentation is available on webscraper.io 28 |
29 | 30 | -------------------------------------------------------------------------------- /extension/assets/jquery.bootstrapvalidator/bootstrapValidator.css: -------------------------------------------------------------------------------- 1 | /** 2 | * BootstrapValidator (http://bootstrapvalidator.com) 3 | * The best jQuery plugin to validate form fields. Designed to use with Bootstrap 3 4 | * 5 | * @author http://twitter.com/nghuuphuoc 6 | * @copyright (c) 2013 - 2014 Nguyen Huu Phuoc 7 | * @license MIT 8 | */ 9 | 10 | .bv-form .help-block { 11 | margin-bottom: 0; 12 | } 13 | .bv-form .tooltip-inner { 14 | text-align: left; 15 | } 16 | .nav-tabs li.bv-tab-success > a { 17 | color: #3c763d; 18 | } 19 | .nav-tabs li.bv-tab-error > a { 20 | color: #a94442; 21 | } 22 | -------------------------------------------------------------------------------- /extension/devtools/views/SelectorListItem.html: -------------------------------------------------------------------------------- 1 || ID | 11 |Selector | 12 |type | 13 |Multiple | 14 |Parent selectors | 15 |Actions | 16 |
|---|
23 | Waiting for process to finish. > 24 |
25 | -------------------------------------------------------------------------------- /extension/scripts/Selector/SelectorElement.js: -------------------------------------------------------------------------------- 1 | var SelectorElement = { 2 | 3 | canReturnMultipleRecords: function () { 4 | return true; 5 | }, 6 | 7 | canHaveChildSelectors: function () { 8 | return true; 9 | }, 10 | 11 | canHaveLocalChildSelectors: function () { 12 | return true; 13 | }, 14 | 15 | canCreateNewJobs: function () { 16 | return false; 17 | }, 18 | willReturnElements: function () { 19 | return true; 20 | }, 21 | 22 | _getData: function (parentElement) { 23 | 24 | var dfd = $.Deferred(); 25 | 26 | var elements = this.getDataElements(parentElement); 27 | dfd.resolve(jQuery.makeArray(elements)); 28 | 29 | return dfd.promise(); 30 | }, 31 | 32 | getDataColumns: function () { 33 | return []; 34 | }, 35 | 36 | getFeatures: function () { 37 | return ['multiple', 'delay'] 38 | } 39 | }; 40 | -------------------------------------------------------------------------------- /docs/Selectors/Element style selector.md: -------------------------------------------------------------------------------- 1 | # Element style selector 2 | Element style selector can extract an style value of an HTML element. 3 | For example you could use this selector to extract the with attribute from 4 | this div: `