├── .gitignore ├── LICENSE ├── README.md ├── assets ├── spidercreator_banner.png └── spidercreator_hosted.png ├── ctxexec ├── __init__.py ├── cand_sp_exec.py ├── exec_sp.py ├── local_srv.py └── pipeline.py ├── examples ├── jobposting_spider.py ├── product_listings_spider.py └── real_state_listings.py ├── exec_funcs.py ├── main.py ├── pipeline ├── __init__.py ├── get_url_from_sp.py ├── make_candsp_runnable.py ├── make_dom_repr.py ├── mindmap.py ├── roiclf_spcandmkr.py ├── sp_addr_remapping.py ├── sp_combination.py ├── spider_draft.py ├── verification_pipeline.py ├── verify_sp_execution.py └── xpath_builder_planning.py ├── planning ├── __init__.py ├── plan_tokenizer.py ├── planner_to_rec.py └── rec_filtering.py ├── pyproject.toml ├── receive_bu_data.py ├── record_activity.py ├── requirements.txt ├── shared.py ├── spidercreator.py └── utils ├── __init__.py ├── recordings.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/README.md -------------------------------------------------------------------------------- /assets/spidercreator_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/assets/spidercreator_banner.png -------------------------------------------------------------------------------- /assets/spidercreator_hosted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/assets/spidercreator_hosted.png -------------------------------------------------------------------------------- /ctxexec/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | -------------------------------------------------------------------------------- /ctxexec/cand_sp_exec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/ctxexec/cand_sp_exec.py -------------------------------------------------------------------------------- /ctxexec/exec_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/ctxexec/exec_sp.py -------------------------------------------------------------------------------- /ctxexec/local_srv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/ctxexec/local_srv.py -------------------------------------------------------------------------------- /ctxexec/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/ctxexec/pipeline.py -------------------------------------------------------------------------------- /examples/jobposting_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/examples/jobposting_spider.py -------------------------------------------------------------------------------- /examples/product_listings_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/examples/product_listings_spider.py -------------------------------------------------------------------------------- /examples/real_state_listings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/examples/real_state_listings.py -------------------------------------------------------------------------------- /exec_funcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/exec_funcs.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/main.py -------------------------------------------------------------------------------- /pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | -------------------------------------------------------------------------------- /pipeline/get_url_from_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pipeline/get_url_from_sp.py -------------------------------------------------------------------------------- /pipeline/make_candsp_runnable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pipeline/make_candsp_runnable.py -------------------------------------------------------------------------------- /pipeline/make_dom_repr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pipeline/make_dom_repr.py -------------------------------------------------------------------------------- /pipeline/mindmap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pipeline/mindmap.py -------------------------------------------------------------------------------- /pipeline/roiclf_spcandmkr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pipeline/roiclf_spcandmkr.py -------------------------------------------------------------------------------- /pipeline/sp_addr_remapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pipeline/sp_addr_remapping.py -------------------------------------------------------------------------------- /pipeline/sp_combination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pipeline/sp_combination.py -------------------------------------------------------------------------------- /pipeline/spider_draft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pipeline/spider_draft.py -------------------------------------------------------------------------------- /pipeline/verification_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pipeline/verification_pipeline.py -------------------------------------------------------------------------------- /pipeline/verify_sp_execution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pipeline/verify_sp_execution.py -------------------------------------------------------------------------------- /pipeline/xpath_builder_planning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pipeline/xpath_builder_planning.py -------------------------------------------------------------------------------- /planning/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | -------------------------------------------------------------------------------- /planning/plan_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/planning/plan_tokenizer.py -------------------------------------------------------------------------------- /planning/planner_to_rec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/planning/planner_to_rec.py -------------------------------------------------------------------------------- /planning/rec_filtering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/planning/rec_filtering.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/pyproject.toml -------------------------------------------------------------------------------- /receive_bu_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/receive_bu_data.py -------------------------------------------------------------------------------- /record_activity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/record_activity.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/requirements.txt -------------------------------------------------------------------------------- /shared.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/shared.py -------------------------------------------------------------------------------- /spidercreator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/spidercreator.py -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | -------------------------------------------------------------------------------- /utils/recordings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/utils/recordings.py -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlosplanchon/spidercreator/HEAD/utils/utils.py --------------------------------------------------------------------------------