├── .gitattributes ├── .gitignore ├── README.md ├── datasets ├── arxiv2M.parquet ├── arxivExceed512Tokens.parquet └── missing_papers.parquet ├── graveyard └── verifyresult.py ├── join.py ├── process.py ├── requirements.txt ├── run.py ├── run_extra.py ├── script512.py └── setup.sh /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/README.md -------------------------------------------------------------------------------- /datasets/arxiv2M.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/datasets/arxiv2M.parquet -------------------------------------------------------------------------------- /datasets/arxivExceed512Tokens.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/datasets/arxivExceed512Tokens.parquet -------------------------------------------------------------------------------- /datasets/missing_papers.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/datasets/missing_papers.parquet -------------------------------------------------------------------------------- /graveyard/verifyresult.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/graveyard/verifyresult.py -------------------------------------------------------------------------------- /join.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/join.py -------------------------------------------------------------------------------- /process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/process.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/requirements.txt -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/run.py -------------------------------------------------------------------------------- /run_extra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/run_extra.py -------------------------------------------------------------------------------- /script512.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/script512.py -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrocosmcorp/alexandria-pipeline/HEAD/setup.sh --------------------------------------------------------------------------------