├── .gitignore ├── Classifiers ├── MDW_by_subject.csv ├── Naive-Bayes-Classifier.py └── PracticePDFExtractor.py ├── LICENSE ├── README.md ├── UI └── SimpleQuerier.html ├── cc-by-nc-sa-3.0.md ├── extractor_research ├── extractors │ ├── __init__.py │ ├── miner.py │ ├── pdf2.py │ ├── pdfbox-app-1.8.5.jar │ ├── pdfbox.py │ ├── textstream.py │ ├── textstream │ │ ├── LICENSE │ │ ├── PDFTextStream.jar │ │ ├── TextStream.class │ │ └── TextStream.java │ └── xpdf.py ├── input │ ├── American_Opera_Rev_Syllabus.pdf │ ├── Leonard_Intro_Musicology_syllabus(2).pdf │ ├── Leonard_Intro_Musicology_syllabus.pdf │ ├── Leonard_Victorian_Music_syllabus_docx.pdf │ ├── Leonard_Women_Music_syllabus.pdf │ ├── Music_since_1900_syllabus.pdf │ └── pride_and_prej │ │ ├── 1.pdf │ │ ├── 2.pdf │ │ ├── 3.pdf │ │ └── plain.txt ├── main.py ├── output │ ├── American_Opera_Rev_Syllabus.html │ ├── American_Opera_Rev_Syllabus.xml │ ├── Leonard_Intro_Musicology_syllabus(2).html │ ├── Leonard_Intro_Musicology_syllabus(2).xml │ ├── Leonard_Intro_Musicology_syllabus.html │ ├── Leonard_Intro_Musicology_syllabus.xml │ ├── Leonard_Victorian_Music_syllabus_docx.html │ ├── Leonard_Victorian_Music_syllabus_docx.xml │ ├── Leonard_Women_Music_syllabus.html │ ├── Leonard_Women_Music_syllabus.xml │ ├── Music_since_1900_syllabus.html │ ├── Music_since_1900_syllabus.xml │ └── pride_and_prej │ │ ├── miner_with_layout │ │ └── 1.txt │ │ ├── miner_without_layout │ │ └── 1.txt │ │ ├── pdf2_default │ │ └── 1.txt │ │ ├── pdfbox_default │ │ └── 1.txt │ │ ├── textstream_default │ │ └── 1.txt │ │ ├── xpdf_with_layout │ │ └── 1.txt │ │ └── xpdf_without_layout │ │ └── 1.txt ├── stats │ └── pride_and_prej │ │ └── 1_speed_log.txt └── visualize │ ├── __init__.py │ └── html_parser.py ├── gpl-3.0.md ├── opensyllabus ├── __init__.py ├── config.py ├── core │ ├── __init__.py │ ├── extractor.py │ ├── ingestion.py │ ├── mongo.py │ ├── ocr.py │ └── utils.py ├── run_getemptydocs.py ├── run_getstats.py └── run_ingestion.py ├── sanitize.py └── twitter ├── .gitignore └── twitter.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/.gitignore -------------------------------------------------------------------------------- /Classifiers/MDW_by_subject.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/Classifiers/MDW_by_subject.csv -------------------------------------------------------------------------------- /Classifiers/Naive-Bayes-Classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/Classifiers/Naive-Bayes-Classifier.py -------------------------------------------------------------------------------- /Classifiers/PracticePDFExtractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/Classifiers/PracticePDFExtractor.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/README.md -------------------------------------------------------------------------------- /UI/SimpleQuerier.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/UI/SimpleQuerier.html -------------------------------------------------------------------------------- /cc-by-nc-sa-3.0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/cc-by-nc-sa-3.0.md -------------------------------------------------------------------------------- /extractor_research/extractors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extractor_research/extractors/miner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/extractors/miner.py -------------------------------------------------------------------------------- /extractor_research/extractors/pdf2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/extractors/pdf2.py -------------------------------------------------------------------------------- /extractor_research/extractors/pdfbox-app-1.8.5.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/extractors/pdfbox-app-1.8.5.jar -------------------------------------------------------------------------------- /extractor_research/extractors/pdfbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/extractors/pdfbox.py -------------------------------------------------------------------------------- /extractor_research/extractors/textstream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/extractors/textstream.py -------------------------------------------------------------------------------- /extractor_research/extractors/textstream/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/extractors/textstream/LICENSE -------------------------------------------------------------------------------- /extractor_research/extractors/textstream/PDFTextStream.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/extractors/textstream/PDFTextStream.jar -------------------------------------------------------------------------------- /extractor_research/extractors/textstream/TextStream.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/extractors/textstream/TextStream.class -------------------------------------------------------------------------------- /extractor_research/extractors/textstream/TextStream.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/extractors/textstream/TextStream.java -------------------------------------------------------------------------------- /extractor_research/extractors/xpdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/extractors/xpdf.py -------------------------------------------------------------------------------- /extractor_research/input/American_Opera_Rev_Syllabus.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/input/American_Opera_Rev_Syllabus.pdf -------------------------------------------------------------------------------- /extractor_research/input/Leonard_Intro_Musicology_syllabus(2).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/input/Leonard_Intro_Musicology_syllabus(2).pdf -------------------------------------------------------------------------------- /extractor_research/input/Leonard_Intro_Musicology_syllabus.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/input/Leonard_Intro_Musicology_syllabus.pdf -------------------------------------------------------------------------------- /extractor_research/input/Leonard_Victorian_Music_syllabus_docx.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/input/Leonard_Victorian_Music_syllabus_docx.pdf -------------------------------------------------------------------------------- /extractor_research/input/Leonard_Women_Music_syllabus.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/input/Leonard_Women_Music_syllabus.pdf -------------------------------------------------------------------------------- /extractor_research/input/Music_since_1900_syllabus.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/input/Music_since_1900_syllabus.pdf -------------------------------------------------------------------------------- /extractor_research/input/pride_and_prej/1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/input/pride_and_prej/1.pdf -------------------------------------------------------------------------------- /extractor_research/input/pride_and_prej/2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/input/pride_and_prej/2.pdf -------------------------------------------------------------------------------- /extractor_research/input/pride_and_prej/3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/input/pride_and_prej/3.pdf -------------------------------------------------------------------------------- /extractor_research/input/pride_and_prej/plain.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/input/pride_and_prej/plain.txt -------------------------------------------------------------------------------- /extractor_research/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/main.py -------------------------------------------------------------------------------- /extractor_research/output/American_Opera_Rev_Syllabus.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/American_Opera_Rev_Syllabus.html -------------------------------------------------------------------------------- /extractor_research/output/American_Opera_Rev_Syllabus.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/American_Opera_Rev_Syllabus.xml -------------------------------------------------------------------------------- /extractor_research/output/Leonard_Intro_Musicology_syllabus(2).html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/Leonard_Intro_Musicology_syllabus(2).html -------------------------------------------------------------------------------- /extractor_research/output/Leonard_Intro_Musicology_syllabus(2).xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/Leonard_Intro_Musicology_syllabus(2).xml -------------------------------------------------------------------------------- /extractor_research/output/Leonard_Intro_Musicology_syllabus.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/Leonard_Intro_Musicology_syllabus.html -------------------------------------------------------------------------------- /extractor_research/output/Leonard_Intro_Musicology_syllabus.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/Leonard_Intro_Musicology_syllabus.xml -------------------------------------------------------------------------------- /extractor_research/output/Leonard_Victorian_Music_syllabus_docx.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/Leonard_Victorian_Music_syllabus_docx.html -------------------------------------------------------------------------------- /extractor_research/output/Leonard_Victorian_Music_syllabus_docx.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/Leonard_Victorian_Music_syllabus_docx.xml -------------------------------------------------------------------------------- /extractor_research/output/Leonard_Women_Music_syllabus.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/Leonard_Women_Music_syllabus.html -------------------------------------------------------------------------------- /extractor_research/output/Leonard_Women_Music_syllabus.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/Leonard_Women_Music_syllabus.xml -------------------------------------------------------------------------------- /extractor_research/output/Music_since_1900_syllabus.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/Music_since_1900_syllabus.html -------------------------------------------------------------------------------- /extractor_research/output/Music_since_1900_syllabus.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/Music_since_1900_syllabus.xml -------------------------------------------------------------------------------- /extractor_research/output/pride_and_prej/miner_with_layout/1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/pride_and_prej/miner_with_layout/1.txt -------------------------------------------------------------------------------- /extractor_research/output/pride_and_prej/miner_without_layout/1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/pride_and_prej/miner_without_layout/1.txt -------------------------------------------------------------------------------- /extractor_research/output/pride_and_prej/pdf2_default/1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/pride_and_prej/pdf2_default/1.txt -------------------------------------------------------------------------------- /extractor_research/output/pride_and_prej/pdfbox_default/1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/pride_and_prej/pdfbox_default/1.txt -------------------------------------------------------------------------------- /extractor_research/output/pride_and_prej/textstream_default/1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/pride_and_prej/textstream_default/1.txt -------------------------------------------------------------------------------- /extractor_research/output/pride_and_prej/xpdf_with_layout/1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/pride_and_prej/xpdf_with_layout/1.txt -------------------------------------------------------------------------------- /extractor_research/output/pride_and_prej/xpdf_without_layout/1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/output/pride_and_prej/xpdf_without_layout/1.txt -------------------------------------------------------------------------------- /extractor_research/stats/pride_and_prej/1_speed_log.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/stats/pride_and_prej/1_speed_log.txt -------------------------------------------------------------------------------- /extractor_research/visualize/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extractor_research/visualize/html_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/extractor_research/visualize/html_parser.py -------------------------------------------------------------------------------- /gpl-3.0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/gpl-3.0.md -------------------------------------------------------------------------------- /opensyllabus/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /opensyllabus/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/opensyllabus/config.py -------------------------------------------------------------------------------- /opensyllabus/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /opensyllabus/core/extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/opensyllabus/core/extractor.py -------------------------------------------------------------------------------- /opensyllabus/core/ingestion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/opensyllabus/core/ingestion.py -------------------------------------------------------------------------------- /opensyllabus/core/mongo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/opensyllabus/core/mongo.py -------------------------------------------------------------------------------- /opensyllabus/core/ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/opensyllabus/core/ocr.py -------------------------------------------------------------------------------- /opensyllabus/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/opensyllabus/core/utils.py -------------------------------------------------------------------------------- /opensyllabus/run_getemptydocs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/opensyllabus/run_getemptydocs.py -------------------------------------------------------------------------------- /opensyllabus/run_getstats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/opensyllabus/run_getstats.py -------------------------------------------------------------------------------- /opensyllabus/run_ingestion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/opensyllabus/run_ingestion.py -------------------------------------------------------------------------------- /sanitize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/sanitize.py -------------------------------------------------------------------------------- /twitter/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | twitter-log.csv 3 | -------------------------------------------------------------------------------- /twitter/twitter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xpmethod/opensyllabus/HEAD/twitter/twitter.py --------------------------------------------------------------------------------