├── README.md
└── data_cleaning
    ├── CBOW.ipynb
    ├── LDA.ipynb
    ├── assign_SOC.ipynb
    ├── auxiliary files
        ├── OCRcorrect_enchant.py
        ├── OCRcorrect_hyphen.py
        ├── PWL.txt
        ├── TitleBase.txt
        ├── __pycache__
        │   ├── ExtractLDAresult.cpython-36.pyc
        │   ├── OCRcorrect_enchant.cpython-36.pyc
        │   ├── OCRcorrect_hyphen.cpython-36.pyc
        │   ├── compute_spelling.cpython-36.pyc
        │   ├── detect_ending.cpython-36.pyc
        │   ├── edit_distance.cpython-36.pyc
        │   ├── extract_LDA_result.cpython-36.pyc
        │   ├── extract_information.cpython-36.pyc
        │   ├── title_detection.cpython-36.pyc
        │   └── title_substitute.cpython-36.pyc
        ├── apst_mapping.xlsx
        ├── compute_spelling.py
        ├── detect_ending.py
        ├── edit_distance.py
        ├── example_ONET_api.png
        ├── extract_LDA_result.py
        ├── extract_information.py
        ├── phrase_substitutes.csv
        ├── state_name.txt
        ├── title2soc.txt
        ├── title_detection.py
        ├── title_substitute.py
        └── word_substitutes.csv
    ├── initial_cleaning.ipynb
    └── structured_data.ipynb


/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/README.md


--------------------------------------------------------------------------------
/data_cleaning/CBOW.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/CBOW.ipynb


--------------------------------------------------------------------------------
/data_cleaning/LDA.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/LDA.ipynb


--------------------------------------------------------------------------------
/data_cleaning/assign_SOC.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/assign_SOC.ipynb


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/OCRcorrect_enchant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/OCRcorrect_enchant.py


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/OCRcorrect_hyphen.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/OCRcorrect_hyphen.py


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/PWL.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/PWL.txt


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/TitleBase.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/TitleBase.txt


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/__pycache__/ExtractLDAresult.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/__pycache__/ExtractLDAresult.cpython-36.pyc


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/__pycache__/OCRcorrect_enchant.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/__pycache__/OCRcorrect_enchant.cpython-36.pyc


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/__pycache__/OCRcorrect_hyphen.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/__pycache__/OCRcorrect_hyphen.cpython-36.pyc


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/__pycache__/compute_spelling.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/__pycache__/compute_spelling.cpython-36.pyc


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/__pycache__/detect_ending.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/__pycache__/detect_ending.cpython-36.pyc


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/__pycache__/edit_distance.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/__pycache__/edit_distance.cpython-36.pyc


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/__pycache__/extract_LDA_result.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/__pycache__/extract_LDA_result.cpython-36.pyc


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/__pycache__/extract_information.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/__pycache__/extract_information.cpython-36.pyc


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/__pycache__/title_detection.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/__pycache__/title_detection.cpython-36.pyc


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/__pycache__/title_substitute.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/__pycache__/title_substitute.cpython-36.pyc


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/apst_mapping.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/apst_mapping.xlsx


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/compute_spelling.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/compute_spelling.py


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/detect_ending.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/detect_ending.py


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/edit_distance.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/edit_distance.py


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/example_ONET_api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/example_ONET_api.png


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/extract_LDA_result.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/extract_LDA_result.py


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/extract_information.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/extract_information.py


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/phrase_substitutes.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/phrase_substitutes.csv


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/state_name.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/state_name.txt


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/title2soc.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/title2soc.txt


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/title_detection.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/title_detection.py


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/title_substitute.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/title_substitute.py


--------------------------------------------------------------------------------
/data_cleaning/auxiliary files/word_substitutes.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/auxiliary files/word_substitutes.csv


--------------------------------------------------------------------------------
/data_cleaning/initial_cleaning.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/initial_cleaning.ipynb


--------------------------------------------------------------------------------
/data_cleaning/structured_data.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaiptt125/newspaper_project/HEAD/data_cleaning/structured_data.ipynb


--------------------------------------------------------------------------------