├── .gitignore ├── README.md ├── code ├── acl_crawl │ ├── acl_crawl │ │ ├── README.md │ │ ├── __init__.py │ │ ├── items.py │ │ ├── links_delta.pdf.2023.txt │ │ ├── links_pdf.2022.txt │ │ ├── links_pdf.2023.txt │ │ ├── links_pdf.txt │ │ ├── middlewares.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ └── get_bib.py │ └── scrapy.cfg ├── notebooks │ ├── enhance_metadata_s2_n_bib.ipynb │ └── get_ACL_citations.ipynb └── parse_grobid_extraction │ ├── main.py │ └── sequence_truncate.py └── lit_review.md /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/README.md -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/acl_crawl/README.md -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/items.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/acl_crawl/items.py -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/links_delta.pdf.2023.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/acl_crawl/links_delta.pdf.2023.txt -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/links_pdf.2022.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/acl_crawl/links_pdf.2022.txt -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/links_pdf.2023.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/acl_crawl/links_pdf.2023.txt -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/links_pdf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/acl_crawl/links_pdf.txt -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/middlewares.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/acl_crawl/middlewares.py -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/acl_crawl/pipelines.py -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/acl_crawl/settings.py -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/acl_crawl/spiders/__init__.py -------------------------------------------------------------------------------- /code/acl_crawl/acl_crawl/spiders/get_bib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/acl_crawl/spiders/get_bib.py -------------------------------------------------------------------------------- /code/acl_crawl/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/acl_crawl/scrapy.cfg -------------------------------------------------------------------------------- /code/notebooks/enhance_metadata_s2_n_bib.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/notebooks/enhance_metadata_s2_n_bib.ipynb -------------------------------------------------------------------------------- /code/notebooks/get_ACL_citations.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/notebooks/get_ACL_citations.ipynb -------------------------------------------------------------------------------- /code/parse_grobid_extraction/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/parse_grobid_extraction/main.py -------------------------------------------------------------------------------- /code/parse_grobid_extraction/sequence_truncate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/code/parse_grobid_extraction/sequence_truncate.py -------------------------------------------------------------------------------- /lit_review.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shauryr/ACL-anthology-corpus/HEAD/lit_review.md --------------------------------------------------------------------------------