├── CBAlex ├── course.html └── mylib.py ├── Chris ├── README └── crawler │ └── example │ ├── .crawler.py.swp │ ├── CaseParser.pyc │ ├── Lien_Data_20150812.txt │ ├── crawler.py │ ├── crawler.pyc │ ├── index │ ├── myLib.pyc │ ├── result │ ├── result.bak │ └── test.py ├── README.md ├── aswbe ├── 01_search.py ├── 02_position.py ├── 03_summary.py └── selenium_aswbe.py ├── case ├── theater.tar └── theater │ ├── crawler.py │ ├── crawler.pyc │ ├── examples │ ├── indievox.page │ ├── indievox_in.page │ ├── legacy.page │ ├── legacy_content.page │ └── thewall.page │ ├── image │ ├── 1421 │ ├── 1428 │ ├── 1450 │ ├── 1451 │ ├── 1452 │ ├── 1454 │ ├── 1455 │ ├── 1456 │ ├── 1461 │ ├── 1462 │ ├── 1463 │ ├── 1464 │ ├── 1467 │ ├── 1468 │ ├── 1471 │ ├── 1472 │ ├── 1473 │ ├── 1481 │ ├── 1482 │ ├── 1483 │ ├── 1485 │ ├── 1487 │ ├── 1489 │ ├── 1493 │ ├── 1495 │ ├── 1496 │ ├── 1499 │ └── 1500 │ ├── main.py │ ├── mylib.py │ ├── mylib.pyc │ ├── myparser │ ├── __init__.py │ ├── __init__.pyc │ ├── indievox.py │ ├── indievox.pyc │ ├── legacy.py │ ├── legacy.pyc │ ├── theater_thewall.py │ └── theater_thewall.pyc │ ├── parseplatform │ ├── __init__.py │ ├── __init__.pyc │ ├── associate.py │ ├── create_object.py │ ├── upload_data.py │ └── upload_data.pyc │ ├── result │ └── legacy.result │ ├── test.html │ ├── tmp.data │ ├── urlcreate │ ├── __init__.py │ ├── __init__.pyc │ ├── theater_thewall.py │ └── theater_thewall.pyc │ └── webpage.cfg ├── crawl_framework └── mylib.py └── windows_test └── 1234.txt /CBAlex/course.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/CBAlex/course.html -------------------------------------------------------------------------------- /CBAlex/mylib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/CBAlex/mylib.py -------------------------------------------------------------------------------- /Chris/README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/Chris/README -------------------------------------------------------------------------------- /Chris/crawler/example/.crawler.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/Chris/crawler/example/.crawler.py.swp -------------------------------------------------------------------------------- /Chris/crawler/example/CaseParser.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/Chris/crawler/example/CaseParser.pyc -------------------------------------------------------------------------------- /Chris/crawler/example/Lien_Data_20150812.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/Chris/crawler/example/Lien_Data_20150812.txt -------------------------------------------------------------------------------- /Chris/crawler/example/crawler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/Chris/crawler/example/crawler.py -------------------------------------------------------------------------------- /Chris/crawler/example/crawler.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/Chris/crawler/example/crawler.pyc -------------------------------------------------------------------------------- /Chris/crawler/example/index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/Chris/crawler/example/index -------------------------------------------------------------------------------- /Chris/crawler/example/myLib.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/Chris/crawler/example/myLib.pyc -------------------------------------------------------------------------------- /Chris/crawler/example/result: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/Chris/crawler/example/result -------------------------------------------------------------------------------- /Chris/crawler/example/result.bak: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/Chris/crawler/example/result.bak -------------------------------------------------------------------------------- /Chris/crawler/example/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/Chris/crawler/example/test.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Crawler 2 | -------------------------------------------------------------------------------- /aswbe/01_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/aswbe/01_search.py -------------------------------------------------------------------------------- /aswbe/02_position.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/aswbe/02_position.py -------------------------------------------------------------------------------- /aswbe/03_summary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/aswbe/03_summary.py -------------------------------------------------------------------------------- /aswbe/selenium_aswbe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/aswbe/selenium_aswbe.py -------------------------------------------------------------------------------- /case/theater.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater.tar -------------------------------------------------------------------------------- /case/theater/crawler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/crawler.py -------------------------------------------------------------------------------- /case/theater/crawler.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/crawler.pyc -------------------------------------------------------------------------------- /case/theater/examples/indievox.page: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/examples/indievox.page -------------------------------------------------------------------------------- /case/theater/examples/indievox_in.page: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/examples/indievox_in.page -------------------------------------------------------------------------------- /case/theater/examples/legacy.page: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/examples/legacy.page -------------------------------------------------------------------------------- /case/theater/examples/legacy_content.page: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/examples/legacy_content.page -------------------------------------------------------------------------------- /case/theater/examples/thewall.page: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/examples/thewall.page -------------------------------------------------------------------------------- /case/theater/image/1421: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1421 -------------------------------------------------------------------------------- /case/theater/image/1428: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1428 -------------------------------------------------------------------------------- /case/theater/image/1450: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1450 -------------------------------------------------------------------------------- /case/theater/image/1451: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1451 -------------------------------------------------------------------------------- /case/theater/image/1452: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1452 -------------------------------------------------------------------------------- /case/theater/image/1454: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1454 -------------------------------------------------------------------------------- /case/theater/image/1455: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1455 -------------------------------------------------------------------------------- /case/theater/image/1456: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1456 -------------------------------------------------------------------------------- /case/theater/image/1461: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1461 -------------------------------------------------------------------------------- /case/theater/image/1462: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1462 -------------------------------------------------------------------------------- /case/theater/image/1463: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1463 -------------------------------------------------------------------------------- /case/theater/image/1464: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1464 -------------------------------------------------------------------------------- /case/theater/image/1467: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1467 -------------------------------------------------------------------------------- /case/theater/image/1468: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1468 -------------------------------------------------------------------------------- /case/theater/image/1471: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1471 -------------------------------------------------------------------------------- /case/theater/image/1472: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1472 -------------------------------------------------------------------------------- /case/theater/image/1473: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1473 -------------------------------------------------------------------------------- /case/theater/image/1481: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1481 -------------------------------------------------------------------------------- /case/theater/image/1482: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1482 -------------------------------------------------------------------------------- /case/theater/image/1483: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1483 -------------------------------------------------------------------------------- /case/theater/image/1485: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1485 -------------------------------------------------------------------------------- /case/theater/image/1487: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1487 -------------------------------------------------------------------------------- /case/theater/image/1489: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1489 -------------------------------------------------------------------------------- /case/theater/image/1493: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1493 -------------------------------------------------------------------------------- /case/theater/image/1495: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1495 -------------------------------------------------------------------------------- /case/theater/image/1496: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1496 -------------------------------------------------------------------------------- /case/theater/image/1499: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/image/1499 -------------------------------------------------------------------------------- /case/theater/image/1500: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /case/theater/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/main.py -------------------------------------------------------------------------------- /case/theater/mylib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/mylib.py -------------------------------------------------------------------------------- /case/theater/mylib.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/mylib.pyc -------------------------------------------------------------------------------- /case/theater/myparser/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /case/theater/myparser/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/myparser/__init__.pyc -------------------------------------------------------------------------------- /case/theater/myparser/indievox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/myparser/indievox.py -------------------------------------------------------------------------------- /case/theater/myparser/indievox.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/myparser/indievox.pyc -------------------------------------------------------------------------------- /case/theater/myparser/legacy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/myparser/legacy.py -------------------------------------------------------------------------------- /case/theater/myparser/legacy.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/myparser/legacy.pyc -------------------------------------------------------------------------------- /case/theater/myparser/theater_thewall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/myparser/theater_thewall.py -------------------------------------------------------------------------------- /case/theater/myparser/theater_thewall.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/myparser/theater_thewall.pyc -------------------------------------------------------------------------------- /case/theater/parseplatform/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /case/theater/parseplatform/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/parseplatform/__init__.pyc -------------------------------------------------------------------------------- /case/theater/parseplatform/associate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/parseplatform/associate.py -------------------------------------------------------------------------------- /case/theater/parseplatform/create_object.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/parseplatform/create_object.py -------------------------------------------------------------------------------- /case/theater/parseplatform/upload_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/parseplatform/upload_data.py -------------------------------------------------------------------------------- /case/theater/parseplatform/upload_data.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/parseplatform/upload_data.pyc -------------------------------------------------------------------------------- /case/theater/result/legacy.result: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/result/legacy.result -------------------------------------------------------------------------------- /case/theater/test.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/test.html -------------------------------------------------------------------------------- /case/theater/tmp.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/tmp.data -------------------------------------------------------------------------------- /case/theater/urlcreate/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /case/theater/urlcreate/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/urlcreate/__init__.pyc -------------------------------------------------------------------------------- /case/theater/urlcreate/theater_thewall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/urlcreate/theater_thewall.py -------------------------------------------------------------------------------- /case/theater/urlcreate/theater_thewall.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/urlcreate/theater_thewall.pyc -------------------------------------------------------------------------------- /case/theater/webpage.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/case/theater/webpage.cfg -------------------------------------------------------------------------------- /crawl_framework/mylib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsrr/Crawler/HEAD/crawl_framework/mylib.py -------------------------------------------------------------------------------- /windows_test/1234.txt: -------------------------------------------------------------------------------- 1 | 1234 2 | 2345 3 | 5678 4 | --------------------------------------------------------------------------------