├── .gitattributes ├── .gitignore ├── Dockerfile ├── LICENSE.txt ├── README.md ├── __init__.py ├── maluuba ├── __init__.py └── newsqa │ ├── TokenizerSplitter.java │ ├── __init__.py │ ├── data_generator.py │ ├── data_processing.py │ ├── dev_story_ids.csv │ ├── simplify.py │ ├── span_utils.py │ ├── split_dataset.py │ ├── stories_requiring_extra_newline.csv │ ├── stories_requiring_two_extra_newlines.csv │ ├── stories_to_decode_specially.csv │ ├── test_story_ids.csv │ ├── tests │ ├── __init__.py │ ├── test_newsqa.py │ └── test_tokenize.py │ ├── tokenize_dataset.py │ └── train_story_ids.csv └── requirements.txt /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/README.md -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /maluuba/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /maluuba/newsqa/TokenizerSplitter.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/TokenizerSplitter.java -------------------------------------------------------------------------------- /maluuba/newsqa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /maluuba/newsqa/data_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/data_generator.py -------------------------------------------------------------------------------- /maluuba/newsqa/data_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/data_processing.py -------------------------------------------------------------------------------- /maluuba/newsqa/dev_story_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/dev_story_ids.csv -------------------------------------------------------------------------------- /maluuba/newsqa/simplify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/simplify.py -------------------------------------------------------------------------------- /maluuba/newsqa/span_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/span_utils.py -------------------------------------------------------------------------------- /maluuba/newsqa/split_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/split_dataset.py -------------------------------------------------------------------------------- /maluuba/newsqa/stories_requiring_extra_newline.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/stories_requiring_extra_newline.csv -------------------------------------------------------------------------------- /maluuba/newsqa/stories_requiring_two_extra_newlines.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/stories_requiring_two_extra_newlines.csv -------------------------------------------------------------------------------- /maluuba/newsqa/stories_to_decode_specially.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/stories_to_decode_specially.csv -------------------------------------------------------------------------------- /maluuba/newsqa/test_story_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/test_story_ids.csv -------------------------------------------------------------------------------- /maluuba/newsqa/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /maluuba/newsqa/tests/test_newsqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/tests/test_newsqa.py -------------------------------------------------------------------------------- /maluuba/newsqa/tests/test_tokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/tests/test_tokenize.py -------------------------------------------------------------------------------- /maluuba/newsqa/tokenize_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/tokenize_dataset.py -------------------------------------------------------------------------------- /maluuba/newsqa/train_story_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maluuba/newsqa/HEAD/maluuba/newsqa/train_story_ids.csv -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | six>=1.10.0 2 | tqdm>=4.19 3 | --------------------------------------------------------------------------------