├── .dockerignore ├── .gitattributes ├── .gitignore ├── Dockerfile.cron ├── Dockerfile.db_worker ├── Dockerfile.server ├── README.md ├── ai2-ca.crt ├── cron.k8s-env.yaml ├── cron.sh ├── dataprep ├── .gitignore ├── Dockerfile ├── build.sbt ├── cli │ ├── build.sbt │ └── src │ │ └── main │ │ ├── resources │ │ └── logback.xml │ │ └── scala │ │ └── org │ │ └── allenai │ │ └── spv2 │ │ └── DataprepCli.scala ├── core │ ├── build.sbt │ └── src │ │ └── main │ │ ├── java │ │ └── org │ │ │ └── allenai │ │ │ └── spv2 │ │ │ ├── PaperSource.java │ │ │ ├── RetryPaperSource.java │ │ │ └── ScholarBucketPaperSource.java │ │ ├── protobuf │ │ └── document.proto │ │ └── scala │ │ └── org │ │ └── allenai │ │ └── spv2 │ │ ├── PreprocessPdf.scala │ │ └── Utilities.scala ├── dataprep-service.k8s-env.yaml ├── makeDocker.sh ├── project │ ├── build.properties │ └── plugins.sbt ├── server │ ├── build.sbt │ └── src │ │ └── main │ │ ├── resources │ │ └── logback.xml │ │ └── scala │ │ └── org │ │ └── allenai │ │ └── spv2 │ │ └── DataprepServer.scala └── version.sbt ├── dataprep2.py ├── db_worker.k8s-env.yaml ├── db_worker.py ├── makeDocker.sh ├── model ├── C49.h5 ├── all.tokenstats3.gz └── glove.6B.100d.txt.gz ├── requirements.in ├── server.k8s-env.yaml ├── server.py ├── settings.py ├── stringmatch ├── .gitignore ├── __init__.py ├── stringmatch.cpp ├── stringmatch_builder.py └── test_stringmatch.py ├── supervisord.conf ├── tensorflow-cpu └── tensorflow-1.3.1-cp35-cp35m-linux_x86_64.whl ├── token_statistics.py ├── unicode.py ├── varint.py ├── version.txt └── with_labels.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | __pycache__ 3 | dataprep/target 4 | .idea -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile.cron: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/Dockerfile.cron -------------------------------------------------------------------------------- /Dockerfile.db_worker: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/Dockerfile.db_worker -------------------------------------------------------------------------------- /Dockerfile.server: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/Dockerfile.server -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/README.md -------------------------------------------------------------------------------- /ai2-ca.crt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/ai2-ca.crt -------------------------------------------------------------------------------- /cron.k8s-env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/cron.k8s-env.yaml -------------------------------------------------------------------------------- /cron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/cron.sh -------------------------------------------------------------------------------- /dataprep/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /dataprep/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/Dockerfile -------------------------------------------------------------------------------- /dataprep/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/build.sbt -------------------------------------------------------------------------------- /dataprep/cli/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/cli/build.sbt -------------------------------------------------------------------------------- /dataprep/cli/src/main/resources/logback.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/cli/src/main/resources/logback.xml -------------------------------------------------------------------------------- /dataprep/cli/src/main/scala/org/allenai/spv2/DataprepCli.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/cli/src/main/scala/org/allenai/spv2/DataprepCli.scala -------------------------------------------------------------------------------- /dataprep/core/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/core/build.sbt -------------------------------------------------------------------------------- /dataprep/core/src/main/java/org/allenai/spv2/PaperSource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/core/src/main/java/org/allenai/spv2/PaperSource.java -------------------------------------------------------------------------------- /dataprep/core/src/main/java/org/allenai/spv2/RetryPaperSource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/core/src/main/java/org/allenai/spv2/RetryPaperSource.java -------------------------------------------------------------------------------- /dataprep/core/src/main/java/org/allenai/spv2/ScholarBucketPaperSource.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/core/src/main/java/org/allenai/spv2/ScholarBucketPaperSource.java -------------------------------------------------------------------------------- /dataprep/core/src/main/protobuf/document.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/core/src/main/protobuf/document.proto -------------------------------------------------------------------------------- /dataprep/core/src/main/scala/org/allenai/spv2/PreprocessPdf.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/core/src/main/scala/org/allenai/spv2/PreprocessPdf.scala -------------------------------------------------------------------------------- /dataprep/core/src/main/scala/org/allenai/spv2/Utilities.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/core/src/main/scala/org/allenai/spv2/Utilities.scala -------------------------------------------------------------------------------- /dataprep/dataprep-service.k8s-env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/dataprep-service.k8s-env.yaml -------------------------------------------------------------------------------- /dataprep/makeDocker.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/makeDocker.sh -------------------------------------------------------------------------------- /dataprep/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.13 2 | -------------------------------------------------------------------------------- /dataprep/project/plugins.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/project/plugins.sbt -------------------------------------------------------------------------------- /dataprep/server/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/server/build.sbt -------------------------------------------------------------------------------- /dataprep/server/src/main/resources/logback.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/server/src/main/resources/logback.xml -------------------------------------------------------------------------------- /dataprep/server/src/main/scala/org/allenai/spv2/DataprepServer.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep/server/src/main/scala/org/allenai/spv2/DataprepServer.scala -------------------------------------------------------------------------------- /dataprep/version.sbt: -------------------------------------------------------------------------------- 1 | version in ThisBuild := "2.6" 2 | -------------------------------------------------------------------------------- /dataprep2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/dataprep2.py -------------------------------------------------------------------------------- /db_worker.k8s-env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/db_worker.k8s-env.yaml -------------------------------------------------------------------------------- /db_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/db_worker.py -------------------------------------------------------------------------------- /makeDocker.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/makeDocker.sh -------------------------------------------------------------------------------- /model/C49.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/model/C49.h5 -------------------------------------------------------------------------------- /model/all.tokenstats3.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/model/all.tokenstats3.gz -------------------------------------------------------------------------------- /model/glove.6B.100d.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/model/glove.6B.100d.txt.gz -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/requirements.in -------------------------------------------------------------------------------- /server.k8s-env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/server.k8s-env.yaml -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/server.py -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/settings.py -------------------------------------------------------------------------------- /stringmatch/.gitignore: -------------------------------------------------------------------------------- 1 | _stringmatch* 2 | -------------------------------------------------------------------------------- /stringmatch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/stringmatch/__init__.py -------------------------------------------------------------------------------- /stringmatch/stringmatch.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/stringmatch/stringmatch.cpp -------------------------------------------------------------------------------- /stringmatch/stringmatch_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/stringmatch/stringmatch_builder.py -------------------------------------------------------------------------------- /stringmatch/test_stringmatch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/stringmatch/test_stringmatch.py -------------------------------------------------------------------------------- /supervisord.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/supervisord.conf -------------------------------------------------------------------------------- /tensorflow-cpu/tensorflow-1.3.1-cp35-cp35m-linux_x86_64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/tensorflow-cpu/tensorflow-1.3.1-cp35-cp35m-linux_x86_64.whl -------------------------------------------------------------------------------- /token_statistics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/token_statistics.py -------------------------------------------------------------------------------- /unicode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/unicode.py -------------------------------------------------------------------------------- /varint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/varint.py -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 3.2.4 2 | -------------------------------------------------------------------------------- /with_labels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/spv2/HEAD/with_labels.py --------------------------------------------------------------------------------